Page MenuHomec4science

No OneTemporary

File Metadata

Created
Thu, Jun 27, 21:47
This file is larger than 256 KB, so syntax highlighting was skipped.
This document is not UTF8. It was detected as ISO-8859-1 (Latin 1) and converted to UTF8 for display.
diff --git a/lib/kokkos/.gitignore b/lib/kokkos/.gitignore
new file mode 100644
index 000000000..f9d16be15
--- /dev/null
+++ b/lib/kokkos/.gitignore
@@ -0,0 +1,8 @@
+# Standard ignores
+*~
+*.pyc
+\#*#
+.#*
+.*.swp
+.cproject
+.project
diff --git a/lib/kokkos/CMakeLists.txt b/lib/kokkos/CMakeLists.txt
new file mode 100644
index 000000000..0d437c0f8
--- /dev/null
+++ b/lib/kokkos/CMakeLists.txt
@@ -0,0 +1,123 @@
+
+#
+# A) Forward delcare the package so that certain options are also defined for
+# subpackages
+#
+
+TRIBITS_PACKAGE_DECL(Kokkos) # ENABLE_SHADOWING_WARNINGS)
+
+#------------------------------------------------------------------------------
+#
+# B) Define the common options for Kokkos first so they can be used by
+# subpackages as well.
+#
+
+TRIBITS_ADD_DEBUG_OPTION()
+
+TRIBITS_ADD_OPTION_AND_DEFINE(
+ Kokkos_ENABLE_SIERRA_BUILD
+ KOKKOS_FOR_SIERRA
+ "Configure Kokkos for building within the Sierra build system."
+ OFF
+ )
+
+TRIBITS_ADD_OPTION_AND_DEFINE(
+ Kokkos_ENABLE_Cuda
+ KOKKOS_HAVE_CUDA
+ "Enable CUDA support in Kokkos."
+ "${TPL_ENABLE_CUDA}"
+ )
+
+TRIBITS_ADD_OPTION_AND_DEFINE(
+ Kokkos_ENABLE_Cuda_UVM
+ KOKKOS_USE_CUDA_UVM
+ "Enable CUDA Unified Virtual Memory support in Kokkos."
+ OFF
+ )
+
+TRIBITS_ADD_OPTION_AND_DEFINE(
+ Kokkos_ENABLE_Pthread
+ KOKKOS_HAVE_PTHREAD
+ "Enable Pthread support in Kokkos."
+ "${TPL_ENABLE_Pthread}"
+ )
+
+TRIBITS_ADD_OPTION_AND_DEFINE(
+ Kokkos_ENABLE_OpenMP
+ KOKKOS_HAVE_OPENMP
+ "Enable OpenMP support in Kokkos."
+ "${${PROJECT_NAME}_ENABLE_OpenMP}"
+ )
+
+TRIBITS_ADD_OPTION_AND_DEFINE(
+ Kokkos_ENABLE_QTHREAD
+ KOKKOS_HAVE_QTHREAD
+ "Enable QTHREAD support in Kokkos."
+ "${TPL_ENABLE_QTHREAD}"
+ )
+
+TRIBITS_ADD_OPTION_AND_DEFINE(
+ Kokkos_ENABLE_CXX11
+ KOKKOS_HAVE_CXX11
+ "Enable C++11 support in Kokkos."
+ "${${PROJECT_NAME}_ENABLE_CXX11}"
+ )
+
+TRIBITS_ADD_OPTION_AND_DEFINE(
+ Kokkos_ENABLE_HWLOC
+ KOKKOS_HAVE_HWLOC
+ "Enable HWLOC support in Kokkos."
+ "${TPL_ENABLE_HWLOC}"
+ )
+
+TRIBITS_ADD_OPTION_AND_DEFINE(
+ Kokkos_ENABLE_MPI
+ KOKKOS_HAVE_MPI
+ "Enable MPI support in Kokkos."
+ "${TPL_ENABLE_MPI}"
+ )
+
+TRIBITS_ADD_OPTION_AND_DEFINE(
+ Kokkos_ENABLE_Debug_Bounds_Check
+ KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK
+ "Enable bounds checking support in Kokkos."
+ OFF
+ )
+
+#TRIBITS_ADD_OPTION_AND_DEFINE(
+# Kokkos_ENABLE_Profiling_Collect_Kernel_Data
+# KOKKOS_ENABLE_PROFILING_COLLECT_KERNEL_DATA
+# "Enable profiling support for kernel data collections in Kokkos."
+# "${${PROJECT_NAME}_ENABLE_KokkosProfiler}"
+# )
+
+# placeholder for future device...
+TRIBITS_ADD_OPTION_AND_DEFINE(
+ Kokkos_ENABLE_Winthread
+ KOKKOS_HAVE_WINTHREAD
+ "Enable Winthread support in Kokkos."
+ "${TPL_ENABLE_Winthread}"
+ )
+
+#------------------------------------------------------------------------------
+#
+# C) Process the subpackages for Kokkos
+#
+
+TRIBITS_PROCESS_SUBPACKAGES()
+
+#
+# D) If Kokkos itself is enabled, process the Kokkos package
+#
+
+TRIBITS_PACKAGE_DEF()
+
+TRIBITS_EXCLUDE_AUTOTOOLS_FILES()
+
+TRIBITS_EXCLUDE_FILES(
+ classic/doc
+ classic/LinAlg/doc/CrsRefactorNotesMay2012
+ )
+
+TRIBITS_PACKAGE_POSTPROCESS()
+
diff --git a/lib/kokkos/HOW_TO_SNAPSHOT b/lib/kokkos/HOW_TO_SNAPSHOT
new file mode 100644
index 000000000..46bfb4167
--- /dev/null
+++ b/lib/kokkos/HOW_TO_SNAPSHOT
@@ -0,0 +1,73 @@
+
+Developers of Kokkos (those who commit modifications to Kokkos)
+must maintain the snapshot of Kokkos in the Trilinos repository.
+
+This file contains instructions for how to
+snapshot Kokkos from github.com/kokkos to Trilinos.
+
+------------------------------------------------------------------------
+*** EVERYTHING GOES RIGHT WORKFLOW ***
+
+1) Given a 'git clone' of Kokkos and of Trilinos repositories.
+1.1) Let ${KOKKOS} be the absolute path to the Kokkos clone.
+ This path *must* terminate with the directory name 'kokkos';
+ e.g., ${HOME}/kokkos .
+1.2) Let ${TRILINOS} be the absolute path to the Trilinos directory.
+
+2) Given that the Kokkos build & test is clean and
+ changes are committed to the Kokkos clone.
+
+3) Snapshot the current commit in the Kokkos clone into the Trilinos clone.
+ This overwrites ${TRILINOS}/packages/kokkos with the content of ${KOKKOS}:
+ ${KOKKOS}/config/snapshot.py --verbose ${KOKKOS} ${TRILINOS}/packages
+
+4) Verify the snapshot commit happened as expected
+ cd ${TRILINOS}/packages/kokkos
+ git log -1 --name-only
+
+5) Modify, build, and test Trilinos with the Kokkos snapshot.
+
+6) Given that that the Trilinos build & test is clean and
+ changes are committed to the Trilinos clone.
+
+7) Attempt push to the Kokkos repository.
+ If push fails then you must 'remove the Kokkos snapshot'
+ from your Trilinos clone.
+ See below.
+
+8) Attempt to push to the Trilinos repository.
+ If updating for a failed push requires you to change Kokkos you must
+ 'remove the Kokkos snapshot' from your Trilinos clone.
+ See below.
+
+------------------------------------------------------------------------
+*** WHEN SOMETHING GOES WRONG AND YOU MUST ***
+*** REMOVE THE KOKKOS SNAPSHOT FROM YOUR TRILINOS CLONE ***
+
+1) Query the Trilinos clone commit log.
+ git log --oneline
+
+2) Note the <SHA1> of the commit to the Trillinos clone
+ immediately BEFORE the Kokkos snapshot commit.
+ Copy this <SHA1> for use in the next command.
+
+3) IF more than one outstanding commit then you can remove just the
+ Kokkos snapshot commit with 'git rebase -i'. Edit the rebase file.
+ Remove or comment out the Kokkos snapshot commit entry.
+ git rebase -i <SHA1>
+
+4) IF the Kokkos snapshot commit is the one and only
+ outstanding commit then remove just than commit.
+ git reset --hard HEAD~1
+
+------------------------------------------------------------------------
+*** REGARDING 'snapshot.py' TOOL ***
+
+The 'snapshot.py' tool is developed and maintained by the
+Center for Computing Research (CCR)
+Software Engineering, Maintenance, and Support (SEMS) team.
+
+Contact Brent Perschbacher <bmpersc@sandia.gov> for questions>
+
+------------------------------------------------------------------------
+
diff --git a/lib/kokkos/Makefile.kokkos b/lib/kokkos/Makefile.kokkos
index 30ecec336..1034b1542 100644
--- a/lib/kokkos/Makefile.kokkos
+++ b/lib/kokkos/Makefile.kokkos
@@ -1,377 +1,431 @@
# Default settings common options
+#LAMMPS specific settings:
KOKKOS_PATH=../../lib/kokkos
+CXXFLAGS=$(CCFLAGS)
#Options: OpenMP,Serial,Pthreads,Cuda
-KOKKOS_DEVICES ?= "OpenMP"
-#KOKKOS_DEVICES ?= "Pthreads"
-#Options: KNC,SNB,HSW,Kepler,Kepler30,Kepler32,Kepler35,Kepler37,Maxwell,Maxwell50,Maxwell52,Maxwell53,ARMv8,BGQ,Power7,Power8
+#KOKKOS_DEVICES ?= "OpenMP"
+KOKKOS_DEVICES ?= "Pthreads"
+#Options: KNC,SNB,HSW,Kepler,Kepler30,Kepler32,Kepler35,Kepler37,Maxwell,Maxwell50,Maxwell52,Maxwell53,ARMv8,BGQ,Power7,Power8,KNL
KOKKOS_ARCH ?= ""
#Options: yes,no
KOKKOS_DEBUG ?= "no"
-#Options: hwloc,librt
+#Options: hwloc,librt,experimental_memkind
KOKKOS_USE_TPLS ?= ""
#Options: c++11
KOKKOS_CXX_STANDARD ?= "c++11"
-#Options: kernel_times,aggregate_mpi
-KOKKOS_PROFILING ?= ""
+#Options: aggressive_vectorization
+KOKKOS_OPTIONS ?= "aggressive_vectorization"
#Default settings specific options
#Options: force_uvm,use_ldg,rdc,enable_lambda
KOKKOS_CUDA_OPTIONS ?= ""
# Check for general settings
KOKKOS_INTERNAL_ENABLE_DEBUG := $(strip $(shell echo $(KOKKOS_DEBUG) | grep "yes" | wc -l))
KOKKOS_INTERNAL_ENABLE_PROFILING_COLLECT_KERNEL_DATA := $(strip $(shell echo $(KOKKOS_PROFILING) | grep "kernel_times" | wc -l))
KOKKOS_INTERNAL_ENABLE_PROFILING_AGGREGATE_MPI := $(strip $(shell echo $(KOKKOS_PROFILING) | grep "aggregate_mpi" | wc -l))
KOKKOS_INTERNAL_ENABLE_CXX11 := $(strip $(shell echo $(KOKKOS_CXX_STANDARD) | grep "c++11" | wc -l))
# Check for external libraries
KOKKOS_INTERNAL_USE_HWLOC := $(strip $(shell echo $(KOKKOS_USE_TPLS) | grep "hwloc" | wc -l))
KOKKOS_INTERNAL_USE_LIBRT := $(strip $(shell echo $(KOKKOS_USE_TPLS) | grep "librt" | wc -l))
+KOKKOS_INTERNAL_USE_MEMKIND := $(strip $(shell echo $(KOKKOS_USE_TPLS) | grep "experimental_memkind" | wc -l))
# Check for advanced settings
+KOKKOS_INTERNAL_OPT_RANGE_AGGRESSIVE_VECTORIZATION := $(strip $(shell echo $(KOKKOS_OPTIONS) | grep "aggressive_vectorization" | wc -l))
KOKKOS_INTERNAL_CUDA_USE_LDG := $(strip $(shell echo $(KOKKOS_CUDA_OPTIONS) | grep "use_ldg" | wc -l))
KOKKOS_INTERNAL_CUDA_USE_UVM := $(strip $(shell echo $(KOKKOS_CUDA_OPTIONS) | grep "force_uvm" | wc -l))
KOKKOS_INTERNAL_CUDA_USE_RELOC := $(strip $(shell echo $(KOKKOS_CUDA_OPTIONS) | grep "rdc" | wc -l))
KOKKOS_INTERNAL_CUDA_USE_LAMBDA := $(strip $(shell echo $(KOKKOS_CUDA_OPTIONS) | grep "enable_lambda" | wc -l))
# Check for Kokkos Host Execution Spaces one of which must be on
KOKKOS_INTERNAL_USE_OPENMP := $(strip $(shell echo $(KOKKOS_DEVICES) | grep OpenMP | wc -l))
KOKKOS_INTERNAL_USE_PTHREADS := $(strip $(shell echo $(KOKKOS_DEVICES) | grep Pthread | wc -l))
KOKKOS_INTERNAL_USE_SERIAL := $(strip $(shell echo $(KOKKOS_DEVICES) | grep Serial | wc -l))
KOKKOS_INTERNAL_USE_QTHREAD := $(strip $(shell echo $(KOKKOS_DEVICES) | grep Qthread | wc -l))
ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 0)
ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 0)
KOKKOS_INTERNAL_USE_SERIAL := 1
endif
endif
-KOKKOS_INTERNAL_COMPILER_PGI := $(shell $(CXX) --version 2>&1 | grep PGI | wc -l)
-KOKKOS_INTERNAL_COMPILER_XL := $(shell $(CXX) -qversion 2>&1 | grep XL | wc -l)
-KOKKOS_INTERNAL_COMPILER_CRAY := $(shell $(CXX) -craype-verbose 2>&1 | grep "CC-" | wc -l)
-KOKKOS_INTERNAL_OS_CYGWIN := $(shell uname | grep CYGWIN | wc -l)
+KOKKOS_INTERNAL_COMPILER_INTEL := $(shell $(CXX) --version 2>&1 | grep "Intel Corporation" | wc -l)
+KOKKOS_INTERNAL_COMPILER_PGI := $(shell $(CXX) --version 2>&1 | grep PGI | wc -l)
+KOKKOS_INTERNAL_COMPILER_XL := $(shell $(CXX) -qversion 2>&1 | grep XL | wc -l)
+KOKKOS_INTERNAL_COMPILER_CRAY := $(shell $(CXX) -craype-verbose 2>&1 | grep "CC-" | wc -l)
+KOKKOS_INTERNAL_OS_CYGWIN := $(shell uname | grep CYGWIN | wc -l)
ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)
KOKKOS_INTERNAL_OPENMP_FLAG := -mp
else
ifeq ($(KOKKOS_INTERNAL_COMPILER_XL), 1)
KOKKOS_INTERNAL_OPENMP_FLAG := -qsmp=omp
else
ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1)
# OpenMP is turned on by default in Cray compiler environment
KOKKOS_INTERNAL_OPENMP_FLAG :=
else
KOKKOS_INTERNAL_OPENMP_FLAG := -fopenmp
endif
endif
endif
ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)
KOKKOS_INTERNAL_CXX11_FLAG := --c++11
else
ifeq ($(KOKKOS_INTERNAL_COMPILER_XL), 1)
KOKKOS_INTERNAL_CXX11_FLAG := -std=c++11
else
ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1)
KOKKOS_INTERNAL_CXX11_FLAG := -hstd=c++11
else
KOKKOS_INTERNAL_CXX11_FLAG := --std=c++11
endif
endif
endif
# Check for other Execution Spaces
KOKKOS_INTERNAL_USE_CUDA := $(strip $(shell echo $(KOKKOS_DEVICES) | grep Cuda | wc -l))
# Check for Kokkos Architecture settings
#Intel based
KOKKOS_INTERNAL_USE_ARCH_KNC := $(strip $(shell echo $(KOKKOS_ARCH) | grep KNC | wc -l))
KOKKOS_INTERNAL_USE_ARCH_SNB := $(strip $(shell echo $(KOKKOS_ARCH) | grep SNB | wc -l))
KOKKOS_INTERNAL_USE_ARCH_HSW := $(strip $(shell echo $(KOKKOS_ARCH) | grep HSW | wc -l))
+KOKKOS_INTERNAL_USE_ARCH_KNL := $(strip $(shell echo $(KOKKOS_ARCH) | grep KNL | wc -l))
#NVIDIA based
+NVCC_WRAPPER := $(KOKKOS_PATH)/config/nvcc_wrapper
KOKKOS_INTERNAL_USE_ARCH_KEPLER30 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Kepler30 | wc -l))
KOKKOS_INTERNAL_USE_ARCH_KEPLER32 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Kepler32 | wc -l))
KOKKOS_INTERNAL_USE_ARCH_KEPLER35 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Kepler35 | wc -l))
KOKKOS_INTERNAL_USE_ARCH_KEPLER37 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Kepler37 | wc -l))
KOKKOS_INTERNAL_USE_ARCH_MAXWELL50 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Maxwell50 | wc -l))
KOKKOS_INTERNAL_USE_ARCH_MAXWELL52 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Maxwell52 | wc -l))
KOKKOS_INTERNAL_USE_ARCH_MAXWELL53 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Maxwell53 | wc -l))
KOKKOS_INTERNAL_USE_ARCH_NVIDIA := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_KEPLER30) \
+ $(KOKKOS_INTERNAL_USE_ARCH_KEPLER32) \
+ $(KOKKOS_INTERNAL_USE_ARCH_KEPLER35) \
+ $(KOKKOS_INTERNAL_USE_ARCH_KEPLER37) \
+ $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL50) \
+ $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL52) \
+ $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL53) | bc))
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_NVIDIA), 0)
KOKKOS_INTERNAL_USE_ARCH_MAXWELL50 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Maxwell | wc -l))
KOKKOS_INTERNAL_USE_ARCH_KEPLER35 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Kepler | wc -l))
KOKKOS_INTERNAL_USE_ARCH_NVIDIA := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_KEPLER30) \
+ $(KOKKOS_INTERNAL_USE_ARCH_KEPLER32) \
+ $(KOKKOS_INTERNAL_USE_ARCH_KEPLER35) \
+ $(KOKKOS_INTERNAL_USE_ARCH_KEPLER37) \
+ $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL50) \
+ $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL52) \
+ $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL53) | bc))
endif
#ARM based
KOKKOS_INTERNAL_USE_ARCH_ARMV80 := $(strip $(shell echo $(KOKKOS_ARCH) | grep ARMv8 | wc -l))
#IBM based
KOKKOS_INTERNAL_USE_ARCH_BGQ := $(strip $(shell echo $(KOKKOS_ARCH) | grep BGQ | wc -l))
KOKKOS_INTERNAL_USE_ARCH_POWER7 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Power7 | wc -l))
KOKKOS_INTERNAL_USE_ARCH_POWER8 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Power8 | wc -l))
KOKKOS_INTERNAL_USE_ARCH_IBM := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_BGQ)+$(KOKKOS_INTERNAL_USE_ARCH_POWER7)+$(KOKKOS_INTERNAL_USE_ARCH_POWER8) | bc))
#AMD based
KOKKOS_INTERNAL_USE_ARCH_AMDAVX := $(strip $(shell echo $(KOKKOS_ARCH) | grep AMDAVX | wc -l))
#Any AVX?
-KOKKOS_INTERNAL_USE_ARCH_AVX := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_SNB)+$(KOKKOS_INTERNAL_USE_ARCH_AMDAVX) | bc ))
-KOKKOS_INTERNAL_USE_ARCH_AVX2 := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_HSW) | bc ))
+KOKKOS_INTERNAL_USE_ARCH_AVX := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_SNB)+$(KOKKOS_INTERNAL_USE_ARCH_AMDAVX) | bc ))
+KOKKOS_INTERNAL_USE_ARCH_AVX2 := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_HSW) | bc ))
+KOKKOS_INTERNAL_USE_ARCH_AVX512MIC := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_KNL) | bc ))
#Incompatible flags?
KOKKOS_INTERNAL_USE_ARCH_MULTIHOST := $(strip $(shell echo "$(KOKKOS_INTERNAL_USE_ARCH_AVX)+$(KOKKOS_INTERNAL_USE_ARCH_AVX2)+$(KOKKOS_INTERNAL_USE_ARCH_KNC)+$(KOKKOS_INTERNAL_USE_ARCH_IBM)+$(KOKKOS_INTERNAL_USE_ARCH_AMDAVX)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV80)>1" | bc ))
KOKKOS_INTERNAL_USE_ARCH_MULTIGPU := $(strip $(shell echo "$(KOKKOS_INTERNAL_USE_ARCH_NVIDIA)>1" | bc))
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MULTIHOST), 1)
$(error Defined Multiple Host architectures: KOKKOS_ARCH=$(KOKKOS_ARCH) )
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MULTIGPU), 1)
$(error Defined Multiple GPU architectures: KOKKOS_ARCH=$(KOKKOS_ARCH) )
endif
#Generating the list of Flags
KOKKOS_CPPFLAGS = -I./ -I$(KOKKOS_PATH)/core/src -I$(KOKKOS_PATH)/containers/src -I$(KOKKOS_PATH)/algorithms/src
# No warnings:
KOKKOS_CXXFLAGS =
# INTEL and CLANG warnings:
#KOKKOS_CXXFLAGS = -Wall -Wshadow -pedantic -Wsign-compare -Wtype-limits -Wuninitialized
# GCC warnings:
#KOKKOS_CXXFLAGS = -Wall -Wshadow -pedantic -Wsign-compare -Wtype-limits -Wuninitialized -Wignored-qualifiers -Wempty-body -Wclobbered
KOKKOS_LIBS = -lkokkos
KOKKOS_LDFLAGS = -L$(shell pwd)
KOKKOS_SRC =
KOKKOS_HEADERS =
#Generating the KokkosCore_config.h file
tmp := $(shell echo "/* ---------------------------------------------" > KokkosCore_config.tmp)
tmp := $(shell echo "Makefile constructed configuration:" >> KokkosCore_config.tmp)
tmp := $(shell date >> KokkosCore_config.tmp)
tmp := $(shell echo "----------------------------------------------*/" >> KokkosCore_config.tmp)
tmp := $(shell echo "/* Execution Spaces */" >> KokkosCore_config.tmp)
ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1)
tmp := $(shell echo '\#define KOKKOS_HAVE_OPENMP 1' >> KokkosCore_config.tmp)
endif
ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1)
tmp := $(shell echo "\#define KOKKOS_HAVE_PTHREAD 1" >> KokkosCore_config.tmp )
endif
ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1)
tmp := $(shell echo "\#define KOKKOS_HAVE_SERIAL 1" >> KokkosCore_config.tmp )
endif
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
tmp := $(shell echo "\#define KOKKOS_HAVE_CUDA 1" >> KokkosCore_config.tmp )
endif
ifeq ($(KOKKOS_INTERNAL_USE_QTHREAD), 1)
KOKKOS_CPPFLAGS += -I$(QTHREAD_PATH)/include
KOKKOS_LDFLAGS += -L$(QTHREAD_PATH)/lib
tmp := $(shell echo "\#define KOKKOS_HAVE_QTHREAD 1" >> KokkosCore_config.tmp )
endif
tmp := $(shell echo "/* General Settings */" >> KokkosCore_config.tmp)
ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX11), 1)
KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX11_FLAG)
tmp := $(shell echo "\#define KOKKOS_HAVE_CXX11 1" >> KokkosCore_config.tmp )
endif
ifeq ($(KOKKOS_INTERNAL_ENABLE_DEBUG), 1)
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
KOKKOS_CXXFLAGS += -G
endif
KOKKOS_CXXFLAGS += -g
KOKKOS_LDFLAGS += -g -ldl
tmp := $(shell echo "\#define KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK 1" >> KokkosCore_config.tmp )
tmp := $(shell echo "\#define KOKKOS_HAVE_DEBUG 1" >> KokkosCore_config.tmp )
endif
ifeq ($(KOKKOS_INTERNAL_USE_HWLOC), 1)
KOKKOS_CPPFLAGS += -I$(HWLOC_PATH)/include
KOKKOS_LDFLAGS += -L$(HWLOC_PATH)/lib
KOKKOS_LIBS += -lhwloc
tmp := $(shell echo "\#define KOKKOS_HAVE_HWLOC 1" >> KokkosCore_config.tmp )
endif
ifeq ($(KOKKOS_INTERNAL_USE_LIBRT), 1)
tmp := $(shell echo "\#define KOKKOS_USE_LIBRT 1" >> KokkosCore_config.tmp )
tmp := $(shell echo "\#define PREC_TIMER 1" >> KokkosCore_config.tmp )
tmp := $(shell echo "\#define KOKKOSP_ENABLE_RTLIB 1" >> KokkosCore_config.tmp )
KOKKOS_LIBS += -lrt
endif
+ifeq ($(KOKKOS_INTERNAL_USE_MEMKIND), 1)
+ KOKKOS_CPPFLAGS += -I$(MEMKIND_PATH)/include
+ KOKKOS_LDFLAGS += -L$(MEMKIND_PATH)/lib
+ KOKKOS_LIBS += -lmemkind
+ tmp := $(shell echo "\#define KOKKOS_HAVE_HBWSPACE 1" >> KokkosCore_config.tmp )
+endif
+
+tmp := $(shell echo "/* Optimization Settings */" >> KokkosCore_config.tmp)
+
+ifeq ($(KOKKOS_INTERNAL_OPT_RANGE_AGGRESSIVE_VECTORIZATION), 1)
+ tmp := $(shell echo "\#define KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION 1" >> KokkosCore_config.tmp )
+endif
+
tmp := $(shell echo "/* Cuda Settings */" >> KokkosCore_config.tmp)
ifeq ($(KOKKOS_INTERNAL_CUDA_USE_LDG), 1)
tmp := $(shell echo "\#define KOKKOS_CUDA_USE_LDG_INTRINSIC 1" >> KokkosCore_config.tmp )
endif
ifeq ($(KOKKOS_INTERNAL_CUDA_USE_UVM), 1)
tmp := $(shell echo "\#define KOKKOS_CUDA_USE_UVM 1" >> KokkosCore_config.tmp )
tmp := $(shell echo "\#define KOKKOS_USE_CUDA_UVM 1" >> KokkosCore_config.tmp )
endif
ifeq ($(KOKKOS_INTERNAL_CUDA_USE_RELOC), 1)
tmp := $(shell echo "\#define KOKKOS_CUDA_USE_RELOCATABLE_DEVICE_CODE 1" >> KokkosCore_config.tmp )
KOKKOS_CXXFLAGS += --relocatable-device-code=true
KOKKOS_LDFLAGS += --relocatable-device-code=true
endif
ifeq ($(KOKKOS_INTERNAL_CUDA_USE_LAMBDA), 1)
tmp := $(shell echo "\#define KOKKOS_CUDA_USE_LAMBDA 1" >> KokkosCore_config.tmp )
KOKKOS_CXXFLAGS += -expt-extended-lambda
endif
#Add Architecture flags
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX), 1)
ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1)
KOKKOS_CXXFLAGS +=
KOKKOS_LDFLAGS +=
else
KOKKOS_CXXFLAGS += -mavx
KOKKOS_LDFLAGS += -mavx
endif
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_POWER8), 1)
KOKKOS_CXXFLAGS += -mcpu=power8
KOKKOS_LDFLAGS += -mcpu=power8
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX2), 1)
- KOKKOS_CXXFLAGS += -march=core-avx2
- KOKKOS_LDFLAGS += -march=core-avx2
+ ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1)
+ KOKKOS_CXXFLAGS += -xCORE-AVX2
+ KOKKOS_LDFLAGS += -xCORE-AVX2
+ else
+ ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1)
+
+ else
+ ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)
+
+ else
+ # Assume that this is a really a GNU compiler
+ KOKKOS_CXXFLAGS += -march=core-avx2
+ KOKKOS_LDFLAGS += -march=core-avx2
+ endif
+ endif
+ endif
+endif
+
+ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX512MIC), 1)
+ ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1)
+ KOKKOS_CXXFLAGS += -xMIC-AVX512
+ KOKKOS_LDFLAGS += -xMIC-AVX512
+ else
+ ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1)
+
+ else
+ ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)
+
+ else
+ # Asssume that this is really a GNU compiler
+ KOKKOS_CXXFLAGS += -march=knl
+ KOKKOS_LDFLAGS += -march=knl
+ endif
+ endif
+ endif
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KNC), 1)
KOKKOS_CXXFLAGS += -mmic
KOKKOS_LDFLAGS += -mmic
endif
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER30), 1)
KOKKOS_CXXFLAGS += -arch=sm_30
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER32), 1)
KOKKOS_CXXFLAGS += -arch=sm_32
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER35), 1)
KOKKOS_CXXFLAGS += -arch=sm_35
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER37), 1)
KOKKOS_CXXFLAGS += -arch=sm_37
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MAXWELL50), 1)
KOKKOS_CXXFLAGS += -arch=sm_50
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MAXWELL52), 1)
KOKKOS_CXXFLAGS += -arch=sm_52
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MAXWELL53), 1)
KOKKOS_CXXFLAGS += -arch=sm_53
endif
endif
KOKKOS_INTERNAL_LS_CONFIG := $(shell ls KokkosCore_config.h)
ifeq ($(KOKKOS_INTERNAL_LS_CONFIG), KokkosCore_config.h)
KOKKOS_INTERNAL_NEW_CONFIG := $(strip $(shell diff KokkosCore_config.h KokkosCore_config.tmp | grep define | wc -l))
else
KOKKOS_INTERNAL_NEW_CONFIG := 1
endif
ifneq ($(KOKKOS_INTERNAL_NEW_CONFIG), 0)
tmp := $(shell cp KokkosCore_config.tmp KokkosCore_config.h)
endif
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/*.hpp)
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/impl/*.hpp)
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/containers/src/*.hpp)
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/containers/src/impl/*.hpp)
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/algorithms/src/*.hpp)
KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/impl/*.cpp)
KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/containers/src/impl/*.cpp)
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/Cuda/*.cpp)
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/Cuda/*.hpp)
KOKKOS_LDFLAGS += -L$(CUDA_PATH)/lib64
KOKKOS_LIBS += -lcudart -lcuda
endif
ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1)
KOKKOS_LIBS += -lpthread
KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/Threads/*.cpp)
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/Threads/*.hpp)
endif
ifeq ($(KOKKOS_INTERNAL_USE_QTHREAD), 1)
KOKKOS_LIBS += -lqthread
KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/Qthread/*.cpp)
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/Qthread/*.hpp)
endif
ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1)
KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/OpenMP/*.cpp)
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/OpenMP/*.hpp)
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
KOKKOS_CXXFLAGS += -Xcompiler $(KOKKOS_INTERNAL_OPENMP_FLAG)
else
KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_OPENMP_FLAG)
endif
KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_OPENMP_FLAG)
endif
#With Cygwin functions such as fdopen and fileno are not defined
#when strict ansi is enabled. strict ansi gets enabled with --std=c++11
#though. So we hard undefine it here. Not sure if that has any bad side effects
#This is needed for gtest actually, not for Kokkos itself!
ifeq ($(KOKKOS_INTERNAL_OS_CYGWIN), 1)
KOKKOS_CXXFLAGS += -U__STRICT_ANSI__
endif
# Setting up dependencies
KokkosCore_config.h:
KOKKOS_CPP_DEPENDS := KokkosCore_config.h $(KOKKOS_HEADERS)
KOKKOS_OBJ = $(KOKKOS_SRC:.cpp=.o)
KOKKOS_OBJ_LINK = $(notdir $(KOKKOS_OBJ))
include $(KOKKOS_PATH)/Makefile.targets
kokkos-clean:
rm -f $(KOKKOS_OBJ_LINK) KokkosCore_config.h KokkosCore_config.tmp libkokkos.a
libkokkos.a: $(KOKKOS_OBJ_LINK) $(KOKKOS_SRC) $(KOKKOS_HEADERS)
ar cr libkokkos.a $(KOKKOS_OBJ_LINK)
ranlib libkokkos.a
KOKKOS_LINK_DEPENDS=libkokkos.a
diff --git a/lib/kokkos/Makefile.targets b/lib/kokkos/Makefile.targets
index 009adb42c..7574aeb33 100644
--- a/lib/kokkos/Makefile.targets
+++ b/lib/kokkos/Makefile.targets
@@ -1,57 +1,62 @@
Kokkos_UnorderedMap_impl.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/containers/src/impl/Kokkos_UnorderedMap_impl.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/containers/src/impl/Kokkos_UnorderedMap_impl.cpp
Kokkos_AllocationTracker.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_AllocationTracker.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_AllocationTracker.cpp
Kokkos_BasicAllocators.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_BasicAllocators.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_BasicAllocators.cpp
Kokkos_Core.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Core.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Core.cpp
Kokkos_Error.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Error.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Error.cpp
Kokkos_HostSpace.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_HostSpace.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_HostSpace.cpp
Kokkos_hwloc.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_hwloc.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_hwloc.cpp
Kokkos_Serial.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Serial.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Serial.cpp
Kokkos_Serial_TaskPolicy.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Serial_TaskPolicy.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Serial_TaskPolicy.cpp
Kokkos_Shape.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Shape.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Shape.cpp
Kokkos_spinwait.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_spinwait.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_spinwait.cpp
Kokkos_Profiling_Interface.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Profiling_Interface.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Profiling_Interface.cpp
KokkosExp_SharedAlloc.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/KokkosExp_SharedAlloc.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/KokkosExp_SharedAlloc.cpp
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
Kokkos_Cuda_BasicAllocators.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_BasicAllocators.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_BasicAllocators.cpp
Kokkos_Cuda_Impl.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Impl.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Impl.cpp
Kokkos_CudaSpace.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Cuda/Kokkos_CudaSpace.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Cuda/Kokkos_CudaSpace.cpp
endif
ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1)
Kokkos_ThreadsExec_base.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Threads/Kokkos_ThreadsExec_base.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Threads/Kokkos_ThreadsExec_base.cpp
Kokkos_ThreadsExec.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Threads/Kokkos_ThreadsExec.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Threads/Kokkos_ThreadsExec.cpp
Kokkos_Threads_TaskPolicy.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Threads/Kokkos_Threads_TaskPolicy.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Threads/Kokkos_Threads_TaskPolicy.cpp
endif
ifeq ($(KOKKOS_INTERNAL_USE_QTHREAD), 1)
Kokkos_QthreadExec.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Qthread/Kokkos_QthreadExec.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Qthread/Kokkos_QthreadExec.cpp
Kokkos_Qthread_TaskPolicy.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Qthread/Kokkos_Qthread_TaskPolicy.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Qthread/Kokkos_Qthread_TaskPolicy.cpp
endif
ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1)
Kokkos_OpenMPexec.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/OpenMP/Kokkos_OpenMPexec.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/OpenMP/Kokkos_OpenMPexec.cpp
endif
+Kokkos_HBWSpace.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_HBWSpace.cpp
+ $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_HBWSpace.cpp
+Kokkos_HBWAllocators.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_HBWAllocators.cpp
+ $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_HBWAllocators.cpp
+
diff --git a/lib/kokkos/README b/lib/kokkos/README
index 85bd0142b..904e39abf 100644
--- a/lib/kokkos/README
+++ b/lib/kokkos/README
@@ -1,126 +1,134 @@
Kokkos implements a programming model in C++ for writing performance portable
applications targeting all major HPC platforms. For that purpose it provides
abstractions for both parallel execution of code and data management.
Kokkos is designed to target complex node architectures with N-level memory
hierarchies and multiple types of execution resources. It currently can use
OpenMP, Pthreads and CUDA as backend programming models.
The core developers of Kokkos are Carter Edwards and Christian Trott
at the Computer Science Research Institute of the Sandia National
Laboratories.
The KokkosP interface and associated tools are developed by the Application
Performance Team and Kokkos core developers at Sandia National Laboratories.
To learn more about Kokkos consider watching one of our presentations:
GTC 2015:
http://on-demand.gputechconf.com/gtc/2015/video/S5166.html
http://on-demand.gputechconf.com/gtc/2015/presentation/S5166-H-Carter-Edwards.pdf
A programming guide can be found under doc/Kokkos_PG.pdf. This is an initial version
and feedback is greatly appreciated.
+A separate repository with extensive tutorial material can be found under
+https://github.com/kokkos/kokkos-tutorials.
+
+If you have a patch to contribute please feel free to issue a pull request against
+the develop branch. For major contributions it is better to contact us first
+for guidance.
+
For questions please send an email to
kokkos-users@software.sandia.gov
For non-public questions send an email to
hcedwar(at)sandia.gov and crtrott(at)sandia.gov
============================================================================
====Requirements============================================================
============================================================================
Primary tested compilers are:
GCC 4.7.2
GCC 4.8.4
GCC 4.9.2
GCC 5.1.0
Intel 14.0.4
Intel 15.0.2
Clang 3.5.2
Clang 3.6.1
Secondary tested compilers are:
CUDA 6.5 (with gcc 4.7.2)
CUDA 7.0 (with gcc 4.7.2)
+ CUDA 7.5 (with gcc 4.7.2)
Other compilers working:
PGI 15.4
IBM XL 13.1.2
Cygwin 2.1.0 64bit with gcc 4.9.3
Primary tested compiler are passing in release mode
with warnings as errors. We are using the following set
of flags:
GCC: -Wall -Wshadow -pedantic -Werror -Wsign-compare -Wtype-limits
-Wignored-qualifiers -Wempty-body -Wclobbered -Wuninitialized
Intel: -Wall -Wshadow -pedantic -Werror -Wsign-compare -Wtype-limits -Wuninitialized
Clang: -Wall -Wshadow -pedantic -Werror -Wsign-compare -Wtype-limits -Wuninitialized
Secondary compilers are passing without -Werror.
Other compilers are tested occasionally.
============================================================================
====Getting started=========================================================
============================================================================
In the 'example/tutorial' directory you will find step by step tutorial
examples which explain many of the features of Kokkos. They work with
simple Makefiles. To build with g++ and OpenMP simply type 'make openmp'
in the 'example/tutorial' directory. This will build all examples in the
subfolders.
============================================================================
====Running Unit Tests======================================================
============================================================================
To run the unit tests create a build directory and run the following commands
KOKKOS_PATH/generate_makefile.bash
make build-test
make test
Run KOKKOS_PATH/generate_makefile.bash --help for more detailed options such as
changing the device type for which to build.
============================================================================
====Install the library=====================================================
============================================================================
To install Kokkos as a library create a build directory and run the following
KOKKOS_PATH/generate_makefile.bash --prefix=INSTALL_PATH
make lib
make install
KOKKOS_PATH/generate_makefile.bash --help for more detailed options such as
changing the device type for which to build.
============================================================================
====CMakeFiles==============================================================
============================================================================
The CMake files contained in this repository require Tribits and are used
for integration with Trilinos. They do not currently support a standalone
CMake build.
===========================================================================
====Kokkos and CUDA UVM====================================================
===========================================================================
Kokkos does support UVM as a specific memory space called CudaUVMSpace.
Allocations made with that space are accessible from host and device.
You can tell Kokkos to use that as the default space for Cuda allocations.
In either case UVM comes with a number of restrictions:
(i) You can't access allocations on the host while a kernel is potentially
running. This will lead to segfaults. To avoid that you either need to
call Kokkos::Cuda::fence() (or just Kokkos::fence()), after kernels, or
you can set the environment variable CUDA_LAUNCH_BLOCKING=1.
Furthermore in multi socket multi GPU machines, UVM defaults to using
zero copy allocations for technical reasons related to using multiple
GPUs from the same process. If an executable doesn't do that (e.g. each
MPI rank of an application uses a single GPU [can be the same GPU for
multiple MPI ranks]) you can set CUDA_MANAGED_FORCE_DEVICE_ALLOC=1.
This will enforce proper UVM allocations, but can lead to errors if
more than a single GPU is used by a single process.
diff --git a/lib/kokkos/algorithms/CMakeLists.txt b/lib/kokkos/algorithms/CMakeLists.txt
new file mode 100644
index 000000000..7853184a5
--- /dev/null
+++ b/lib/kokkos/algorithms/CMakeLists.txt
@@ -0,0 +1,10 @@
+
+
+TRIBITS_SUBPACKAGE(Algorithms)
+
+ADD_SUBDIRECTORY(src)
+
+TRIBITS_ADD_TEST_DIRECTORIES(unit_tests)
+#TRIBITS_ADD_TEST_DIRECTORIES(performance_tests)
+
+TRIBITS_SUBPACKAGE_POSTPROCESS()
diff --git a/lib/kokkos/algorithms/cmake/Dependencies.cmake b/lib/kokkos/algorithms/cmake/Dependencies.cmake
new file mode 100644
index 000000000..1d71d8af3
--- /dev/null
+++ b/lib/kokkos/algorithms/cmake/Dependencies.cmake
@@ -0,0 +1,5 @@
+TRIBITS_PACKAGE_DEFINE_DEPENDENCIES(
+ LIB_REQUIRED_PACKAGES KokkosCore
+ LIB_OPTIONAL_TPLS Pthread CUDA HWLOC
+ TEST_OPTIONAL_TPLS CUSPARSE
+ )
diff --git a/lib/kokkos/algorithms/cmake/KokkosAlgorithms_config.h.in b/lib/kokkos/algorithms/cmake/KokkosAlgorithms_config.h.in
new file mode 100644
index 000000000..67334b70f
--- /dev/null
+++ b/lib/kokkos/algorithms/cmake/KokkosAlgorithms_config.h.in
@@ -0,0 +1,4 @@
+#ifndef KOKKOS_ALGORITHMS_CONFIG_H
+#define KOKKOS_ALGORITHMS_CONFIG_H
+
+#endif
diff --git a/lib/kokkos/algorithms/src/CMakeLists.txt b/lib/kokkos/algorithms/src/CMakeLists.txt
new file mode 100644
index 000000000..dfbf3323c
--- /dev/null
+++ b/lib/kokkos/algorithms/src/CMakeLists.txt
@@ -0,0 +1,21 @@
+
+TRIBITS_CONFIGURE_FILE(${PACKAGE_NAME}_config.h)
+
+INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR})
+INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
+
+#-----------------------------------------------------------------------------
+
+FILE(GLOB HEADERS *.hpp)
+FILE(GLOB SOURCES *.cpp)
+LIST(APPEND HEADERS ${CMAKE_CURRENT_BINARY_DIR}/${PACKAGE_NAME}_config.h)
+
+#-----------------------------------------------------------------------------
+
+TRIBITS_ADD_LIBRARY(
+ kokkosalgorithms
+ HEADERS ${HEADERS}
+ SOURCES ${SOURCES}
+ DEPLIBS
+ )
+
diff --git a/lib/kokkos/algorithms/src/Kokkos_Random.hpp b/lib/kokkos/algorithms/src/Kokkos_Random.hpp
index 17f5e073c..192b1d64f 100644
--- a/lib/kokkos/algorithms/src/Kokkos_Random.hpp
+++ b/lib/kokkos/algorithms/src/Kokkos_Random.hpp
@@ -1,1692 +1,1744 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_RANDOM_HPP
#define KOKKOS_RANDOM_HPP
#include <Kokkos_Core.hpp>
-//#include <Kokkos_Complex.hpp>
+#include <Kokkos_Complex.hpp>
#include <cstdio>
#include <cstdlib>
#include <cmath>
/// \file Kokkos_Random.hpp
/// \brief Pseudorandom number generators
///
/// These generators are based on Vigna, Sebastiano (2014). "An
/// experimental exploration of Marsaglia's xorshift generators,
/// scrambled." See: http://arxiv.org/abs/1402.6246
namespace Kokkos {
/*Template functions to get equidistributed random numbers from a generator for a specific Scalar type
template<class Generator,Scalar>
struct rand{
//Max value returned by draw(Generator& gen)
KOKKOS_INLINE_FUNCTION
static Scalar max();
//Returns a value between zero and max()
KOKKOS_INLINE_FUNCTION
static Scalar draw(Generator& gen);
//Returns a value between zero and range()
//Note: for floating point values range can be larger than max()
KOKKOS_INLINE_FUNCTION
static Scalar draw(Generator& gen, const Scalar& range){}
//Return value between start and end
KOKKOS_INLINE_FUNCTION
static Scalar draw(Generator& gen, const Scalar& start, const Scalar& end);
};
The Random number generators themselves have two components a state-pool and the actual generator
A state-pool manages a number of generators, so that each active thread is able to grep its own.
This allows the generation of random numbers which are independent between threads. Note that
in contrast to CuRand none of the functions of the pool (or the generator) are collectives,
i.e. all functions can be called inside conditionals.
template<class Device>
class Pool {
public:
//The Kokkos device type
typedef Device device_type;
//The actual generator type
typedef Generator<Device> generator_type;
//Default constructor: does not initialize a pool
Pool();
//Initializing constructor: calls init(seed,Device_Specific_Number);
Pool(unsigned int seed);
//Intialize Pool with seed as a starting seed with a pool_size of num_states
//The Random_XorShift64 generator is used in serial to initialize all states,
//thus the intialization process is platform independent and deterministic.
void init(unsigned int seed, int num_states);
//Get a generator. This will lock one of the states, guaranteeing that each thread
//will have its private generator. Note: on Cuda getting a state involves atomics,
//and is thus not deterministic!
generator_type get_state();
//Give a state back to the pool. This unlocks the state, and writes the modified
//state of the generator back to the pool.
void free_state(generator_type gen);
}
template<class Device>
class Generator {
public:
//The Kokkos device type
typedef DeviceType device_type;
//Max return values of respective [X]rand[S]() functions
enum {MAX_URAND = 0xffffffffU};
enum {MAX_URAND64 = 0xffffffffffffffffULL-1};
enum {MAX_RAND = static_cast<int>(0xffffffffU/2)};
enum {MAX_RAND64 = static_cast<int64_t>(0xffffffffffffffffULL/2-1)};
//Init with a state and the idx with respect to pool. Note: in serial the
//Generator can be used by just giving it the necessary state arguments
KOKKOS_INLINE_FUNCTION
Generator (STATE_ARGUMENTS, int state_idx = 0);
//Draw a equidistributed uint32_t in the range (0,MAX_URAND]
KOKKOS_INLINE_FUNCTION
uint32_t urand();
//Draw a equidistributed uint64_t in the range (0,MAX_URAND64]
KOKKOS_INLINE_FUNCTION
uint64_t urand64();
//Draw a equidistributed uint32_t in the range (0,range]
KOKKOS_INLINE_FUNCTION
uint32_t urand(const uint32_t& range);
//Draw a equidistributed uint32_t in the range (start,end]
KOKKOS_INLINE_FUNCTION
uint32_t urand(const uint32_t& start, const uint32_t& end );
//Draw a equidistributed uint64_t in the range (0,range]
KOKKOS_INLINE_FUNCTION
uint64_t urand64(const uint64_t& range);
//Draw a equidistributed uint64_t in the range (start,end]
KOKKOS_INLINE_FUNCTION
uint64_t urand64(const uint64_t& start, const uint64_t& end );
//Draw a equidistributed int in the range (0,MAX_RAND]
KOKKOS_INLINE_FUNCTION
int rand();
//Draw a equidistributed int in the range (0,range]
KOKKOS_INLINE_FUNCTION
int rand(const int& range);
//Draw a equidistributed int in the range (start,end]
KOKKOS_INLINE_FUNCTION
int rand(const int& start, const int& end );
//Draw a equidistributed int64_t in the range (0,MAX_RAND64]
KOKKOS_INLINE_FUNCTION
int64_t rand64();
//Draw a equidistributed int64_t in the range (0,range]
KOKKOS_INLINE_FUNCTION
int64_t rand64(const int64_t& range);
//Draw a equidistributed int64_t in the range (start,end]
KOKKOS_INLINE_FUNCTION
int64_t rand64(const int64_t& start, const int64_t& end );
//Draw a equidistributed float in the range (0,1.0]
KOKKOS_INLINE_FUNCTION
float frand();
//Draw a equidistributed float in the range (0,range]
KOKKOS_INLINE_FUNCTION
float frand(const float& range);
//Draw a equidistributed float in the range (start,end]
KOKKOS_INLINE_FUNCTION
float frand(const float& start, const float& end );
//Draw a equidistributed double in the range (0,1.0]
KOKKOS_INLINE_FUNCTION
double drand();
//Draw a equidistributed double in the range (0,range]
KOKKOS_INLINE_FUNCTION
double drand(const double& range);
//Draw a equidistributed double in the range (start,end]
KOKKOS_INLINE_FUNCTION
double drand(const double& start, const double& end );
//Draw a standard normal distributed double
KOKKOS_INLINE_FUNCTION
double normal() ;
//Draw a normal distributed double with given mean and standard deviation
KOKKOS_INLINE_FUNCTION
double normal(const double& mean, const double& std_dev=1.0);
}
//Additional Functions:
//Fills view with random numbers in the range (0,range]
template<class ViewType, class PoolType>
void fill_random(ViewType view, PoolType pool, ViewType::value_type range);
//Fills view with random numbers in the range (start,end]
template<class ViewType, class PoolType>
void fill_random(ViewType view, PoolType pool,
ViewType::value_type start, ViewType::value_type end);
*/
template<class Generator, class Scalar>
struct rand;
template<class Generator>
struct rand<Generator,char> {
KOKKOS_INLINE_FUNCTION
static short max(){return 127;}
KOKKOS_INLINE_FUNCTION
static short draw(Generator& gen)
{return short((gen.rand()&0xff+256)%256);}
KOKKOS_INLINE_FUNCTION
static short draw(Generator& gen, const char& range)
{return char(gen.rand(range));}
KOKKOS_INLINE_FUNCTION
static short draw(Generator& gen, const char& start, const char& end)
{return char(gen.rand(start,end));}
};
template<class Generator>
struct rand<Generator,short> {
KOKKOS_INLINE_FUNCTION
static short max(){return 32767;}
KOKKOS_INLINE_FUNCTION
static short draw(Generator& gen)
{return short((gen.rand()&0xffff+65536)%32768);}
KOKKOS_INLINE_FUNCTION
static short draw(Generator& gen, const short& range)
{return short(gen.rand(range));}
KOKKOS_INLINE_FUNCTION
static short draw(Generator& gen, const short& start, const short& end)
{return short(gen.rand(start,end));}
};
template<class Generator>
struct rand<Generator,int> {
KOKKOS_INLINE_FUNCTION
static int max(){return Generator::MAX_RAND;}
KOKKOS_INLINE_FUNCTION
static int draw(Generator& gen)
{return gen.rand();}
KOKKOS_INLINE_FUNCTION
static int draw(Generator& gen, const int& range)
{return gen.rand(range);}
KOKKOS_INLINE_FUNCTION
static int draw(Generator& gen, const int& start, const int& end)
{return gen.rand(start,end);}
};
template<class Generator>
struct rand<Generator,unsigned int> {
KOKKOS_INLINE_FUNCTION
static unsigned int max () {
return Generator::MAX_URAND;
}
KOKKOS_INLINE_FUNCTION
static unsigned int draw (Generator& gen) {
return gen.urand ();
}
KOKKOS_INLINE_FUNCTION
static unsigned int draw(Generator& gen, const unsigned int& range) {
return gen.urand (range);
}
KOKKOS_INLINE_FUNCTION
static unsigned int
draw (Generator& gen, const unsigned int& start, const unsigned int& end) {
return gen.urand (start, end);
}
};
template<class Generator>
struct rand<Generator,long> {
KOKKOS_INLINE_FUNCTION
static long max () {
// FIXME (mfh 26 Oct 2014) It would be better to select the
// return value at compile time, using something like enable_if.
return sizeof (long) == 4 ?
static_cast<long> (Generator::MAX_RAND) :
static_cast<long> (Generator::MAX_RAND64);
}
KOKKOS_INLINE_FUNCTION
static long draw (Generator& gen) {
// FIXME (mfh 26 Oct 2014) It would be better to select the
// return value at compile time, using something like enable_if.
return sizeof (long) == 4 ?
static_cast<long> (gen.rand ()) :
static_cast<long> (gen.rand64 ());
}
KOKKOS_INLINE_FUNCTION
static long draw (Generator& gen, const long& range) {
// FIXME (mfh 26 Oct 2014) It would be better to select the
// return value at compile time, using something like enable_if.
return sizeof (long) == 4 ?
static_cast<long> (gen.rand (static_cast<int> (range))) :
static_cast<long> (gen.rand64 (range));
}
KOKKOS_INLINE_FUNCTION
static long draw (Generator& gen, const long& start, const long& end) {
// FIXME (mfh 26 Oct 2014) It would be better to select the
// return value at compile time, using something like enable_if.
return sizeof (long) == 4 ?
static_cast<long> (gen.rand (static_cast<int> (start),
static_cast<int> (end))) :
static_cast<long> (gen.rand64 (start, end));
}
};
template<class Generator>
struct rand<Generator,unsigned long> {
KOKKOS_INLINE_FUNCTION
static unsigned long max () {
// FIXME (mfh 26 Oct 2014) It would be better to select the
// return value at compile time, using something like enable_if.
return sizeof (unsigned long) == 4 ?
static_cast<unsigned long> (Generator::MAX_URAND) :
static_cast<unsigned long> (Generator::MAX_URAND64);
}
KOKKOS_INLINE_FUNCTION
static unsigned long draw (Generator& gen) {
// FIXME (mfh 26 Oct 2014) It would be better to select the
// return value at compile time, using something like enable_if.
return sizeof (unsigned long) == 4 ?
static_cast<unsigned long> (gen.urand ()) :
static_cast<unsigned long> (gen.urand64 ());
}
KOKKOS_INLINE_FUNCTION
static unsigned long draw(Generator& gen, const unsigned long& range) {
// FIXME (mfh 26 Oct 2014) It would be better to select the
// return value at compile time, using something like enable_if.
return sizeof (unsigned long) == 4 ?
static_cast<unsigned long> (gen.urand (static_cast<unsigned int> (range))) :
static_cast<unsigned long> (gen.urand64 (range));
}
KOKKOS_INLINE_FUNCTION
static unsigned long
draw (Generator& gen, const unsigned long& start, const unsigned long& end) {
// FIXME (mfh 26 Oct 2014) It would be better to select the
// return value at compile time, using something like enable_if.
return sizeof (unsigned long) == 4 ?
static_cast<unsigned long> (gen.urand (static_cast<unsigned int> (start),
static_cast<unsigned int> (end))) :
static_cast<unsigned long> (gen.urand64 (start, end));
}
};
// NOTE (mfh 26 oct 2014) This is a partial specialization for long
// long, a C99 / C++11 signed type which is guaranteed to be at
// least 64 bits. Do NOT write a partial specialization for
// int64_t!!! This is just a typedef! It could be either long or
// long long. We don't know which a priori, and I've seen both.
// The types long and long long are guaranteed to differ, so it's
// always safe to specialize for both.
template<class Generator>
struct rand<Generator, long long> {
KOKKOS_INLINE_FUNCTION
static long long max () {
// FIXME (mfh 26 Oct 2014) It's legal for long long to be > 64 bits.
return Generator::MAX_RAND64;
}
KOKKOS_INLINE_FUNCTION
static long long draw (Generator& gen) {
// FIXME (mfh 26 Oct 2014) It's legal for long long to be > 64 bits.
return gen.rand64 ();
}
KOKKOS_INLINE_FUNCTION
static long long draw (Generator& gen, const long long& range) {
// FIXME (mfh 26 Oct 2014) It's legal for long long to be > 64 bits.
return gen.rand64 (range);
}
KOKKOS_INLINE_FUNCTION
static long long draw (Generator& gen, const long long& start, const long long& end) {
// FIXME (mfh 26 Oct 2014) It's legal for long long to be > 64 bits.
return gen.rand64 (start, end);
}
};
// NOTE (mfh 26 oct 2014) This is a partial specialization for
// unsigned long long, a C99 / C++11 unsigned type which is
// guaranteed to be at least 64 bits. Do NOT write a partial
// specialization for uint64_t!!! This is just a typedef! It could
// be either unsigned long or unsigned long long. We don't know
// which a priori, and I've seen both. The types unsigned long and
// unsigned long long are guaranteed to differ, so it's always safe
// to specialize for both.
template<class Generator>
struct rand<Generator,unsigned long long> {
KOKKOS_INLINE_FUNCTION
static unsigned long long max () {
// FIXME (mfh 26 Oct 2014) It's legal for unsigned long long to be > 64 bits.
return Generator::MAX_URAND64;
}
KOKKOS_INLINE_FUNCTION
static unsigned long long draw (Generator& gen) {
// FIXME (mfh 26 Oct 2014) It's legal for unsigned long long to be > 64 bits.
return gen.urand64 ();
}
KOKKOS_INLINE_FUNCTION
static unsigned long long draw (Generator& gen, const unsigned long long& range) {
// FIXME (mfh 26 Oct 2014) It's legal for long long to be > 64 bits.
return gen.urand64 (range);
}
KOKKOS_INLINE_FUNCTION
static unsigned long long
draw (Generator& gen, const unsigned long long& start, const unsigned long long& end) {
// FIXME (mfh 26 Oct 2014) It's legal for long long to be > 64 bits.
return gen.urand64 (start, end);
}
};
template<class Generator>
struct rand<Generator,float> {
KOKKOS_INLINE_FUNCTION
static float max(){return 1.0f;}
KOKKOS_INLINE_FUNCTION
static float draw(Generator& gen)
{return gen.frand();}
KOKKOS_INLINE_FUNCTION
static float draw(Generator& gen, const float& range)
{return gen.frand(range);}
KOKKOS_INLINE_FUNCTION
static float draw(Generator& gen, const float& start, const float& end)
{return gen.frand(start,end);}
};
template<class Generator>
struct rand<Generator,double> {
KOKKOS_INLINE_FUNCTION
static double max(){return 1.0;}
KOKKOS_INLINE_FUNCTION
static double draw(Generator& gen)
{return gen.drand();}
KOKKOS_INLINE_FUNCTION
static double draw(Generator& gen, const double& range)
{return gen.drand(range);}
KOKKOS_INLINE_FUNCTION
static double draw(Generator& gen, const double& start, const double& end)
{return gen.drand(start,end);}
};
+ template<class Generator>
+ struct rand<Generator, ::Kokkos::complex<float> > {
+ KOKKOS_INLINE_FUNCTION
+ static ::Kokkos::complex<float> max () {
+ return ::Kokkos::complex<float> (1.0, 1.0);
+ }
+ KOKKOS_INLINE_FUNCTION
+ static ::Kokkos::complex<float> draw (Generator& gen) {
+ const float re = gen.frand ();
+ const float im = gen.frand ();
+ return ::Kokkos::complex<float> (re, im);
+ }
+ KOKKOS_INLINE_FUNCTION
+ static ::Kokkos::complex<float> draw (Generator& gen, const ::Kokkos::complex<float>& range) {
+ const float re = gen.frand (real (range));
+ const float im = gen.frand (imag (range));
+ return ::Kokkos::complex<float> (re, im);
+ }
+ KOKKOS_INLINE_FUNCTION
+ static ::Kokkos::complex<float> draw (Generator& gen, const ::Kokkos::complex<float>& start, const ::Kokkos::complex<float>& end) {
+ const float re = gen.frand (real (start), real (end));
+ const float im = gen.frand (imag (start), imag (end));
+ return ::Kokkos::complex<float> (re, im);
+ }
+ };
+
+ template<class Generator>
+ struct rand<Generator, ::Kokkos::complex<double> > {
+ KOKKOS_INLINE_FUNCTION
+ static ::Kokkos::complex<double> max () {
+ return ::Kokkos::complex<double> (1.0, 1.0);
+ }
+ KOKKOS_INLINE_FUNCTION
+ static ::Kokkos::complex<double> draw (Generator& gen) {
+ const double re = gen.drand ();
+ const double im = gen.drand ();
+ return ::Kokkos::complex<double> (re, im);
+ }
+ KOKKOS_INLINE_FUNCTION
+ static ::Kokkos::complex<double> draw (Generator& gen, const ::Kokkos::complex<double>& range) {
+ const double re = gen.drand (real (range));
+ const double im = gen.drand (imag (range));
+ return ::Kokkos::complex<double> (re, im);
+ }
+ KOKKOS_INLINE_FUNCTION
+ static ::Kokkos::complex<double> draw (Generator& gen, const ::Kokkos::complex<double>& start, const ::Kokkos::complex<double>& end) {
+ const double re = gen.drand (real (start), real (end));
+ const double im = gen.drand (imag (start), imag (end));
+ return ::Kokkos::complex<double> (re, im);
+ }
+ };
+
template<class DeviceType>
class Random_XorShift64_Pool;
template<class DeviceType>
class Random_XorShift64 {
private:
uint64_t state_;
const int state_idx_;
friend class Random_XorShift64_Pool<DeviceType>;
public:
typedef DeviceType device_type;
enum {MAX_URAND = 0xffffffffU};
enum {MAX_URAND64 = 0xffffffffffffffffULL-1};
enum {MAX_RAND = static_cast<int>(0xffffffff/2)};
enum {MAX_RAND64 = static_cast<int64_t>(0xffffffffffffffffLL/2-1)};
KOKKOS_INLINE_FUNCTION
Random_XorShift64 (uint64_t state, int state_idx = 0)
: state_(state),state_idx_(state_idx){}
KOKKOS_INLINE_FUNCTION
uint32_t urand() {
state_ ^= state_ >> 12;
state_ ^= state_ << 25;
state_ ^= state_ >> 27;
uint64_t tmp = state_ * 2685821657736338717ULL;
tmp = tmp>>16;
return static_cast<uint32_t>(tmp&MAX_URAND);
}
KOKKOS_INLINE_FUNCTION
uint64_t urand64() {
state_ ^= state_ >> 12;
state_ ^= state_ << 25;
state_ ^= state_ >> 27;
return (state_ * 2685821657736338717ULL) - 1;
}
KOKKOS_INLINE_FUNCTION
uint32_t urand(const uint32_t& range) {
const uint32_t max_val = (MAX_URAND/range)*range;
uint32_t tmp = urand();
while(tmp>=max_val)
tmp = urand();
return tmp%range;
}
KOKKOS_INLINE_FUNCTION
uint32_t urand(const uint32_t& start, const uint32_t& end ) {
return urand(end-start)+start;
}
KOKKOS_INLINE_FUNCTION
uint64_t urand64(const uint64_t& range) {
const uint64_t max_val = (MAX_URAND64/range)*range;
uint64_t tmp = urand64();
while(tmp>=max_val)
tmp = urand64();
return tmp%range;
}
KOKKOS_INLINE_FUNCTION
uint64_t urand64(const uint64_t& start, const uint64_t& end ) {
return urand64(end-start)+start;
}
KOKKOS_INLINE_FUNCTION
int rand() {
return static_cast<int>(urand()/2);
}
KOKKOS_INLINE_FUNCTION
int rand(const int& range) {
const int max_val = (MAX_RAND/range)*range;
int tmp = rand();
while(tmp>=max_val)
tmp = rand();
return tmp%range;
}
KOKKOS_INLINE_FUNCTION
int rand(const int& start, const int& end ) {
return rand(end-start)+start;
}
KOKKOS_INLINE_FUNCTION
int64_t rand64() {
return static_cast<int64_t>(urand64()/2);
}
KOKKOS_INLINE_FUNCTION
int64_t rand64(const int64_t& range) {
const int64_t max_val = (MAX_RAND64/range)*range;
int64_t tmp = rand64();
while(tmp>=max_val)
tmp = rand64();
return tmp%range;
}
KOKKOS_INLINE_FUNCTION
int64_t rand64(const int64_t& start, const int64_t& end ) {
return rand64(end-start)+start;
}
KOKKOS_INLINE_FUNCTION
float frand() {
return 1.0f * urand64()/MAX_URAND64;
}
KOKKOS_INLINE_FUNCTION
float frand(const float& range) {
return range * urand64()/MAX_URAND64;
}
KOKKOS_INLINE_FUNCTION
float frand(const float& start, const float& end ) {
return frand(end-start)+start;
}
KOKKOS_INLINE_FUNCTION
double drand() {
return 1.0 * urand64()/MAX_URAND64;
}
KOKKOS_INLINE_FUNCTION
double drand(const double& range) {
return range * urand64()/MAX_URAND64;
}
KOKKOS_INLINE_FUNCTION
double drand(const double& start, const double& end ) {
return drand(end-start)+start;
}
//Marsaglia polar method for drawing a standard normal distributed random number
KOKKOS_INLINE_FUNCTION
double normal() {
double S = 2.0;
double U;
while(S>=1.0) {
U = drand();
const double V = drand();
S = U*U+V*V;
}
return U*sqrt(-2.0*log(S)/S);
}
KOKKOS_INLINE_FUNCTION
double normal(const double& mean, const double& std_dev=1.0) {
return mean + normal()*std_dev;
}
};
template<class DeviceType = Kokkos::DefaultExecutionSpace>
class Random_XorShift64_Pool {
private:
typedef View<int*,DeviceType> lock_type;
typedef View<uint64_t*,DeviceType> state_data_type;
lock_type locks_;
state_data_type state_;
int num_states_;
public:
typedef Random_XorShift64<DeviceType> generator_type;
typedef DeviceType device_type;
Random_XorShift64_Pool() {
num_states_ = 0;
}
Random_XorShift64_Pool(uint64_t seed) {
num_states_ = 0;
init(seed,DeviceType::max_hardware_threads());
}
Random_XorShift64_Pool(const Random_XorShift64_Pool& src):
locks_(src.locks_),
state_(src.state_),
num_states_(src.num_states_)
{}
Random_XorShift64_Pool operator = (const Random_XorShift64_Pool& src) {
locks_ = src.locks_;
state_ = src.state_;
num_states_ = src.num_states_;
return *this;
}
void init(uint64_t seed, int num_states) {
num_states_ = num_states;
locks_ = lock_type("Kokkos::Random_XorShift64::locks",num_states_);
state_ = state_data_type("Kokkos::Random_XorShift64::state",num_states_);
typename state_data_type::HostMirror h_state = create_mirror_view(state_);
typename lock_type::HostMirror h_lock = create_mirror_view(locks_);
// Execute on the HostMirror's default execution space.
Random_XorShift64<typename state_data_type::HostMirror::execution_space> gen(seed,0);
for(int i = 0; i < 17; i++)
gen.rand();
for(int i = 0; i < num_states_; i++) {
int n1 = gen.rand();
int n2 = gen.rand();
int n3 = gen.rand();
int n4 = gen.rand();
h_state(i) = (((static_cast<uint64_t>(n1)) & 0xffff)<<00) |
(((static_cast<uint64_t>(n2)) & 0xffff)<<16) |
(((static_cast<uint64_t>(n3)) & 0xffff)<<32) |
(((static_cast<uint64_t>(n4)) & 0xffff)<<48);
h_lock(i) = 0;
}
deep_copy(state_,h_state);
deep_copy(locks_,h_lock);
}
KOKKOS_INLINE_FUNCTION
Random_XorShift64<DeviceType> get_state() const {
const int i = DeviceType::hardware_thread_id();;
return Random_XorShift64<DeviceType>(state_(i),i);
}
KOKKOS_INLINE_FUNCTION
void free_state(const Random_XorShift64<DeviceType>& state) const {
state_(state.state_idx_) = state.state_;
}
};
template<class DeviceType>
class Random_XorShift1024_Pool;
template<class DeviceType>
class Random_XorShift1024 {
private:
int p_;
const int state_idx_;
uint64_t state_[16];
friend class Random_XorShift1024_Pool<DeviceType>;
public:
typedef DeviceType device_type;
enum {MAX_URAND = 0xffffffffU};
enum {MAX_URAND64 = 0xffffffffffffffffULL-1};
enum {MAX_RAND = static_cast<int>(0xffffffffU/2)};
enum {MAX_RAND64 = static_cast<int64_t>(0xffffffffffffffffULL/2-1)};
KOKKOS_INLINE_FUNCTION
Random_XorShift1024 (uint64_t* state, int p, int state_idx = 0):
p_(p),state_idx_(state_idx){
for(int i=0 ; i<16; i++)
state_[i] = state[i];
}
KOKKOS_INLINE_FUNCTION
uint32_t urand() {
uint64_t state_0 = state_[ p_ ];
uint64_t state_1 = state_[ p_ = ( p_ + 1 ) & 15 ];
state_1 ^= state_1 << 31;
state_1 ^= state_1 >> 11;
state_0 ^= state_0 >> 30;
uint64_t tmp = ( state_[ p_ ] = state_0 ^ state_1 ) * 1181783497276652981ULL;
tmp = tmp>>16;
return static_cast<uint32_t>(tmp&MAX_URAND);
}
KOKKOS_INLINE_FUNCTION
uint64_t urand64() {
uint64_t state_0 = state_[ p_ ];
uint64_t state_1 = state_[ p_ = ( p_ + 1 ) & 15 ];
state_1 ^= state_1 << 31;
state_1 ^= state_1 >> 11;
state_0 ^= state_0 >> 30;
return (( state_[ p_ ] = state_0 ^ state_1 ) * 1181783497276652981LL) - 1;
}
KOKKOS_INLINE_FUNCTION
uint32_t urand(const uint32_t& range) {
const uint32_t max_val = (MAX_URAND/range)*range;
uint32_t tmp = urand();
while(tmp>=max_val)
tmp = urand();
return tmp%range;
}
KOKKOS_INLINE_FUNCTION
uint32_t urand(const uint32_t& start, const uint32_t& end ) {
return urand(end-start)+start;
}
KOKKOS_INLINE_FUNCTION
uint64_t urand64(const uint64_t& range) {
const uint64_t max_val = (MAX_URAND64/range)*range;
uint64_t tmp = urand64();
while(tmp>=max_val)
tmp = urand64();
return tmp%range;
}
KOKKOS_INLINE_FUNCTION
uint64_t urand64(const uint64_t& start, const uint64_t& end ) {
return urand64(end-start)+start;
}
KOKKOS_INLINE_FUNCTION
int rand() {
return static_cast<int>(urand()/2);
}
KOKKOS_INLINE_FUNCTION
int rand(const int& range) {
const int max_val = (MAX_RAND/range)*range;
int tmp = rand();
while(tmp>=max_val)
tmp = rand();
return tmp%range;
}
KOKKOS_INLINE_FUNCTION
int rand(const int& start, const int& end ) {
return rand(end-start)+start;
}
KOKKOS_INLINE_FUNCTION
int64_t rand64() {
return static_cast<int64_t>(urand64()/2);
}
KOKKOS_INLINE_FUNCTION
int64_t rand64(const int64_t& range) {
const int64_t max_val = (MAX_RAND64/range)*range;
int64_t tmp = rand64();
while(tmp>=max_val)
tmp = rand64();
return tmp%range;
}
KOKKOS_INLINE_FUNCTION
int64_t rand64(const int64_t& start, const int64_t& end ) {
return rand64(end-start)+start;
}
KOKKOS_INLINE_FUNCTION
float frand() {
return 1.0f * urand64()/MAX_URAND64;
}
KOKKOS_INLINE_FUNCTION
float frand(const float& range) {
return range * urand64()/MAX_URAND64;
}
KOKKOS_INLINE_FUNCTION
float frand(const float& start, const float& end ) {
return frand(end-start)+start;
}
KOKKOS_INLINE_FUNCTION
double drand() {
return 1.0 * urand64()/MAX_URAND64;
}
KOKKOS_INLINE_FUNCTION
double drand(const double& range) {
return range * urand64()/MAX_URAND64;
}
KOKKOS_INLINE_FUNCTION
double drand(const double& start, const double& end ) {
return frand(end-start)+start;
}
//Marsaglia polar method for drawing a standard normal distributed random number
KOKKOS_INLINE_FUNCTION
double normal() {
double S = 2.0;
double U;
while(S>=1.0) {
U = drand();
const double V = drand();
S = U*U+V*V;
}
return U*sqrt(-2.0*log(S)/S);
}
KOKKOS_INLINE_FUNCTION
double normal(const double& mean, const double& std_dev=1.0) {
return mean + normal()*std_dev;
}
};
template<class DeviceType = Kokkos::DefaultExecutionSpace>
class Random_XorShift1024_Pool {
private:
typedef View<int*,DeviceType> int_view_type;
typedef View<uint64_t*[16],DeviceType> state_data_type;
int_view_type locks_;
state_data_type state_;
int_view_type p_;
int num_states_;
public:
typedef Random_XorShift1024<DeviceType> generator_type;
typedef DeviceType device_type;
Random_XorShift1024_Pool() {
num_states_ = 0;
}
inline
Random_XorShift1024_Pool(uint64_t seed){
num_states_ = 0;
init(seed,DeviceType::max_hardware_threads());
}
Random_XorShift1024_Pool(const Random_XorShift1024_Pool& src):
locks_(src.locks_),
state_(src.state_),
p_(src.p_),
num_states_(src.num_states_)
{}
Random_XorShift1024_Pool operator = (const Random_XorShift1024_Pool& src) {
locks_ = src.locks_;
state_ = src.state_;
p_ = src.p_;
num_states_ = src.num_states_;
return *this;
}
inline
void init(uint64_t seed, int num_states) {
num_states_ = num_states;
locks_ = int_view_type("Kokkos::Random_XorShift1024::locks",num_states_);
state_ = state_data_type("Kokkos::Random_XorShift1024::state",num_states_);
p_ = int_view_type("Kokkos::Random_XorShift1024::p",num_states_);
typename state_data_type::HostMirror h_state = create_mirror_view(state_);
typename int_view_type::HostMirror h_lock = create_mirror_view(locks_);
typename int_view_type::HostMirror h_p = create_mirror_view(p_);
// Execute on the HostMirror's default execution space.
Random_XorShift64<typename state_data_type::HostMirror::execution_space> gen(seed,0);
for(int i = 0; i < 17; i++)
gen.rand();
for(int i = 0; i < num_states_; i++) {
for(int j = 0; j < 16 ; j++) {
int n1 = gen.rand();
int n2 = gen.rand();
int n3 = gen.rand();
int n4 = gen.rand();
h_state(i,j) = (((static_cast<uint64_t>(n1)) & 0xffff)<<00) |
(((static_cast<uint64_t>(n2)) & 0xffff)<<16) |
(((static_cast<uint64_t>(n3)) & 0xffff)<<32) |
(((static_cast<uint64_t>(n4)) & 0xffff)<<48);
}
h_p(i) = 0;
h_lock(i) = 0;
}
deep_copy(state_,h_state);
deep_copy(locks_,h_lock);
}
KOKKOS_INLINE_FUNCTION
Random_XorShift1024<DeviceType> get_state() const {
const int i = DeviceType::hardware_thread_id();
return Random_XorShift1024<DeviceType>(&state_(i,0),p_(i),i);
};
KOKKOS_INLINE_FUNCTION
void free_state(const Random_XorShift1024<DeviceType>& state) const {
for(int i = 0; i<16; i++)
state_(state.state_idx_,i) = state.state_[i];
p_(state.state_idx_) = state.p_;
}
};
#if defined(KOKKOS_HAVE_CUDA) && defined(__CUDACC__)
template<>
class Random_XorShift1024<Kokkos::Cuda> {
private:
int p_;
const int state_idx_;
uint64_t* state_;
friend class Random_XorShift1024_Pool<Kokkos::Cuda>;
public:
typedef Kokkos::Cuda device_type;
enum {MAX_URAND = 0xffffffffU};
enum {MAX_URAND64 = 0xffffffffffffffffULL-1};
enum {MAX_RAND = static_cast<int>(0xffffffffU/2)};
enum {MAX_RAND64 = static_cast<int64_t>(0xffffffffffffffffULL/2-1)};
KOKKOS_INLINE_FUNCTION
Random_XorShift1024 (uint64_t* state, int p, int state_idx = 0):
p_(p),state_idx_(state_idx),state_(state){
}
KOKKOS_INLINE_FUNCTION
uint32_t urand() {
uint64_t state_0 = state_[ p_ ];
uint64_t state_1 = state_[ p_ = ( p_ + 1 ) & 15 ];
state_1 ^= state_1 << 31;
state_1 ^= state_1 >> 11;
state_0 ^= state_0 >> 30;
uint64_t tmp = ( state_[ p_ ] = state_0 ^ state_1 ) * 1181783497276652981ULL;
tmp = tmp>>16;
return static_cast<uint32_t>(tmp&MAX_URAND);
}
KOKKOS_INLINE_FUNCTION
uint64_t urand64() {
uint64_t state_0 = state_[ p_ ];
uint64_t state_1 = state_[ p_ = ( p_ + 1 ) & 15 ];
state_1 ^= state_1 << 31;
state_1 ^= state_1 >> 11;
state_0 ^= state_0 >> 30;
return (( state_[ p_ ] = state_0 ^ state_1 ) * 1181783497276652981LL) - 1;
}
KOKKOS_INLINE_FUNCTION
uint32_t urand(const uint32_t& range) {
const uint32_t max_val = (MAX_URAND/range)*range;
uint32_t tmp = urand();
while(tmp>=max_val)
urand();
return tmp%range;
}
KOKKOS_INLINE_FUNCTION
uint32_t urand(const uint32_t& start, const uint32_t& end ) {
return urand(end-start)+start;
}
KOKKOS_INLINE_FUNCTION
uint64_t urand64(const uint64_t& range) {
const uint64_t max_val = (MAX_URAND64/range)*range;
uint64_t tmp = urand64();
while(tmp>=max_val)
urand64();
return tmp%range;
}
KOKKOS_INLINE_FUNCTION
uint64_t urand64(const uint64_t& start, const uint64_t& end ) {
return urand64(end-start)+start;
}
KOKKOS_INLINE_FUNCTION
int rand() {
return static_cast<int>(urand()/2);
}
KOKKOS_INLINE_FUNCTION
int rand(const int& range) {
const int max_val = (MAX_RAND/range)*range;
int tmp = rand();
while(tmp>=max_val)
rand();
return tmp%range;
}
KOKKOS_INLINE_FUNCTION
int rand(const int& start, const int& end ) {
return rand(end-start)+start;
}
KOKKOS_INLINE_FUNCTION
int64_t rand64() {
return static_cast<int64_t>(urand64()/2);
}
KOKKOS_INLINE_FUNCTION
int64_t rand64(const int64_t& range) {
const int64_t max_val = (MAX_RAND64/range)*range;
int64_t tmp = rand64();
while(tmp>=max_val)
rand64();
return tmp%range;
}
KOKKOS_INLINE_FUNCTION
int64_t rand64(const int64_t& start, const int64_t& end ) {
return rand64(end-start)+start;
}
KOKKOS_INLINE_FUNCTION
float frand() {
return 1.0f * urand64()/MAX_URAND64;
}
KOKKOS_INLINE_FUNCTION
float frand(const float& range) {
return range * urand64()/MAX_URAND64;
}
KOKKOS_INLINE_FUNCTION
float frand(const float& start, const float& end ) {
return frand(end-start)+start;
}
KOKKOS_INLINE_FUNCTION
double drand() {
return 1.0 * urand64()/MAX_URAND64;
}
KOKKOS_INLINE_FUNCTION
double drand(const double& range) {
return range * urand64()/MAX_URAND64;
}
KOKKOS_INLINE_FUNCTION
double drand(const double& start, const double& end ) {
return frand(end-start)+start;
}
//Marsaglia polar method for drawing a standard normal distributed random number
KOKKOS_INLINE_FUNCTION
double normal() {
double S = 2.0;
double U;
while(S>=1.0) {
U = drand();
const double V = drand();
S = U*U+V*V;
}
return U*sqrt(-2.0*log(S)/S);
}
KOKKOS_INLINE_FUNCTION
double normal(const double& mean, const double& std_dev=1.0) {
return mean + normal()*std_dev;
}
};
template<>
inline
Random_XorShift64_Pool<Kokkos::Cuda>::Random_XorShift64_Pool(uint64_t seed) {
num_states_ = 0;
init(seed,4*32768);
}
template<>
KOKKOS_INLINE_FUNCTION
Random_XorShift64<Kokkos::Cuda> Random_XorShift64_Pool<Kokkos::Cuda>::get_state() const {
#ifdef __CUDA_ARCH__
const int i_offset = (threadIdx.x*blockDim.y + threadIdx.y)*blockDim.z+threadIdx.z;
int i = (((blockIdx.x*gridDim.y+blockIdx.y)*gridDim.z + blockIdx.z) *
blockDim.x*blockDim.y*blockDim.z + i_offset)%num_states_;
while(Kokkos::atomic_compare_exchange(&locks_(i),0,1)) {
i+=blockDim.x*blockDim.y*blockDim.z;
if(i>=num_states_) {i = i_offset;}
}
return Random_XorShift64<Kokkos::Cuda>(state_(i),i);
#else
return Random_XorShift64<Kokkos::Cuda>(state_(0),0);
#endif
}
template<>
KOKKOS_INLINE_FUNCTION
void Random_XorShift64_Pool<Kokkos::Cuda>::free_state(const Random_XorShift64<Kokkos::Cuda> &state) const {
#ifdef __CUDA_ARCH__
state_(state.state_idx_) = state.state_;
locks_(state.state_idx_) = 0;
return;
#endif
}
template<>
inline
Random_XorShift1024_Pool<Kokkos::Cuda>::Random_XorShift1024_Pool(uint64_t seed) {
num_states_ = 0;
init(seed,4*32768);
}
template<>
KOKKOS_INLINE_FUNCTION
Random_XorShift1024<Kokkos::Cuda> Random_XorShift1024_Pool<Kokkos::Cuda>::get_state() const {
#ifdef __CUDA_ARCH__
const int i_offset = (threadIdx.x*blockDim.y + threadIdx.y)*blockDim.z+threadIdx.z;
int i = (((blockIdx.x*gridDim.y+blockIdx.y)*gridDim.z + blockIdx.z) *
blockDim.x*blockDim.y*blockDim.z + i_offset)%num_states_;
while(Kokkos::atomic_compare_exchange(&locks_(i),0,1)) {
i+=blockDim.x*blockDim.y*blockDim.z;
if(i>=num_states_) {i = i_offset;}
}
return Random_XorShift1024<Kokkos::Cuda>(&state_(i,0), p_(i), i);
#else
return Random_XorShift1024<Kokkos::Cuda>(&state_(0,0), p_(0), 0);
#endif
}
template<>
KOKKOS_INLINE_FUNCTION
void Random_XorShift1024_Pool<Kokkos::Cuda>::free_state(const Random_XorShift1024<Kokkos::Cuda> &state) const {
#ifdef __CUDA_ARCH__
for(int i=0; i<16; i++)
state_(state.state_idx_,i) = state.state_[i];
locks_(state.state_idx_) = 0;
return;
#endif
}
#endif
template<class ViewType, class RandomPool, int loops, int rank>
struct fill_random_functor_range;
template<class ViewType, class RandomPool, int loops, int rank>
struct fill_random_functor_begin_end;
template<class ViewType, class RandomPool, int loops>
struct fill_random_functor_range<ViewType,RandomPool,loops,1>{
typedef typename ViewType::execution_space execution_space;
ViewType a;
RandomPool rand_pool;
typename ViewType::const_value_type range;
typedef rand<typename RandomPool::generator_type, typename ViewType::non_const_value_type> Rand;
fill_random_functor_range(ViewType a_, RandomPool rand_pool_,
typename ViewType::const_value_type range_):
a(a_),rand_pool(rand_pool_),range(range_) {}
KOKKOS_INLINE_FUNCTION
void operator() (unsigned int i) const {
typename RandomPool::generator_type gen = rand_pool.get_state();
for(unsigned int j=0;j<loops;j++) {
const uint64_t idx = i*loops+j;
if(idx<a.dimension_0())
a(idx) = Rand::draw(gen,range);
}
rand_pool.free_state(gen);
}
};
template<class ViewType, class RandomPool, int loops>
struct fill_random_functor_range<ViewType,RandomPool,loops,2>{
typedef typename ViewType::execution_space execution_space;
ViewType a;
RandomPool rand_pool;
typename ViewType::const_value_type range;
typedef rand<typename RandomPool::generator_type, typename ViewType::non_const_value_type> Rand;
fill_random_functor_range(ViewType a_, RandomPool rand_pool_,
typename ViewType::const_value_type range_):
a(a_),rand_pool(rand_pool_),range(range_) {}
KOKKOS_INLINE_FUNCTION
void operator() (unsigned int i) const {
typename RandomPool::generator_type gen = rand_pool.get_state();
for(unsigned int j=0;j<loops;j++) {
const uint64_t idx = i*loops+j;
if(idx<a.dimension_0()) {
for(unsigned int k=0;k<a.dimension_1();k++)
a(idx,k) = Rand::draw(gen,range);
}
}
rand_pool.free_state(gen);
}
};
template<class ViewType, class RandomPool, int loops>
struct fill_random_functor_range<ViewType,RandomPool,loops,3>{
typedef typename ViewType::execution_space execution_space;
ViewType a;
RandomPool rand_pool;
typename ViewType::const_value_type range;
typedef rand<typename RandomPool::generator_type, typename ViewType::non_const_value_type> Rand;
fill_random_functor_range(ViewType a_, RandomPool rand_pool_,
typename ViewType::const_value_type range_):
a(a_),rand_pool(rand_pool_),range(range_) {}
KOKKOS_INLINE_FUNCTION
void operator() (unsigned int i) const {
typename RandomPool::generator_type gen = rand_pool.get_state();
for(unsigned int j=0;j<loops;j++) {
const uint64_t idx = i*loops+j;
if(idx<a.dimension_0()) {
for(unsigned int k=0;k<a.dimension_1();k++)
for(unsigned int l=0;l<a.dimension_2();l++)
a(idx,k,l) = Rand::draw(gen,range);
}
}
rand_pool.free_state(gen);
}
};
template<class ViewType, class RandomPool, int loops>
struct fill_random_functor_range<ViewType,RandomPool,loops,4>{
typedef typename ViewType::execution_space execution_space;
ViewType a;
RandomPool rand_pool;
typename ViewType::const_value_type range;
typedef rand<typename RandomPool::generator_type, typename ViewType::non_const_value_type> Rand;
fill_random_functor_range(ViewType a_, RandomPool rand_pool_,
typename ViewType::const_value_type range_):
a(a_),rand_pool(rand_pool_),range(range_) {}
KOKKOS_INLINE_FUNCTION
void operator() (unsigned int i) const {
typename RandomPool::generator_type gen = rand_pool.get_state();
for(unsigned int j=0;j<loops;j++) {
const uint64_t idx = i*loops+j;
if(idx<a.dimension_0()) {
for(unsigned int k=0;k<a.dimension_1();k++)
for(unsigned int l=0;l<a.dimension_2();l++)
for(unsigned int m=0;m<a.dimension_3();m++)
a(idx,k,l,m) = Rand::draw(gen,range);
}
}
rand_pool.free_state(gen);
}
};
template<class ViewType, class RandomPool, int loops>
struct fill_random_functor_range<ViewType,RandomPool,loops,5>{
typedef typename ViewType::execution_space execution_space;
ViewType a;
RandomPool rand_pool;
typename ViewType::const_value_type range;
typedef rand<typename RandomPool::generator_type, typename ViewType::non_const_value_type> Rand;
fill_random_functor_range(ViewType a_, RandomPool rand_pool_,
typename ViewType::const_value_type range_):
a(a_),rand_pool(rand_pool_),range(range_) {}
KOKKOS_INLINE_FUNCTION
void operator() (unsigned int i) const {
typename RandomPool::generator_type gen = rand_pool.get_state();
for(unsigned int j=0;j<loops;j++) {
const uint64_t idx = i*loops+j;
if(idx<a.dimension_0()) {
for(unsigned int k=0;k<a.dimension_1();k++)
for(unsigned int l=0;l<a.dimension_2();l++)
for(unsigned int m=0;m<a.dimension_3();m++)
for(unsigned int n=0;n<a.dimension_4();n++)
a(idx,k,l,m,n) = Rand::draw(gen,range);
}
}
rand_pool.free_state(gen);
}
};
template<class ViewType, class RandomPool, int loops>
struct fill_random_functor_range<ViewType,RandomPool,loops,6>{
typedef typename ViewType::execution_space execution_space;
ViewType a;
RandomPool rand_pool;
typename ViewType::const_value_type range;
typedef rand<typename RandomPool::generator_type, typename ViewType::non_const_value_type> Rand;
fill_random_functor_range(ViewType a_, RandomPool rand_pool_,
typename ViewType::const_value_type range_):
a(a_),rand_pool(rand_pool_),range(range_) {}
KOKKOS_INLINE_FUNCTION
void operator() (unsigned int i) const {
typename RandomPool::generator_type gen = rand_pool.get_state();
for(unsigned int j=0;j<loops;j++) {
const uint64_t idx = i*loops+j;
if(idx<a.dimension_0()) {
for(unsigned int k=0;k<a.dimension_1();k++)
for(unsigned int l=0;l<a.dimension_2();l++)
for(unsigned int m=0;m<a.dimension_3();m++)
for(unsigned int n=0;n<a.dimension_4();n++)
for(unsigned int o=0;o<a.dimension_5();o++)
a(idx,k,l,m,n,o) = Rand::draw(gen,range);
}
}
rand_pool.free_state(gen);
}
};
template<class ViewType, class RandomPool, int loops>
struct fill_random_functor_range<ViewType,RandomPool,loops,7>{
typedef typename ViewType::execution_space execution_space;
ViewType a;
RandomPool rand_pool;
typename ViewType::const_value_type range;
typedef rand<typename RandomPool::generator_type, typename ViewType::non_const_value_type> Rand;
fill_random_functor_range(ViewType a_, RandomPool rand_pool_,
typename ViewType::const_value_type range_):
a(a_),rand_pool(rand_pool_),range(range_) {}
KOKKOS_INLINE_FUNCTION
void operator() (unsigned int i) const {
typename RandomPool::generator_type gen = rand_pool.get_state();
for(unsigned int j=0;j<loops;j++) {
const uint64_t idx = i*loops+j;
if(idx<a.dimension_0()) {
for(unsigned int k=0;k<a.dimension_1();k++)
for(unsigned int l=0;l<a.dimension_2();l++)
for(unsigned int m=0;m<a.dimension_3();m++)
for(unsigned int n=0;n<a.dimension_4();n++)
for(unsigned int o=0;o<a.dimension_5();o++)
for(unsigned int p=0;p<a.dimension_6();p++)
a(idx,k,l,m,n,o,p) = Rand::draw(gen,range);
}
}
rand_pool.free_state(gen);
}
};
template<class ViewType, class RandomPool, int loops>
struct fill_random_functor_range<ViewType,RandomPool,loops,8>{
typedef typename ViewType::execution_space execution_space;
ViewType a;
RandomPool rand_pool;
typename ViewType::const_value_type range;
typedef rand<typename RandomPool::generator_type, typename ViewType::non_const_value_type> Rand;
fill_random_functor_range(ViewType a_, RandomPool rand_pool_,
typename ViewType::const_value_type range_):
a(a_),rand_pool(rand_pool_),range(range_) {}
KOKKOS_INLINE_FUNCTION
void operator() (unsigned int i) const {
typename RandomPool::generator_type gen = rand_pool.get_state();
for(unsigned int j=0;j<loops;j++) {
const uint64_t idx = i*loops+j;
if(idx<a.dimension_0()) {
for(unsigned int k=0;k<a.dimension_1();k++)
for(unsigned int l=0;l<a.dimension_2();l++)
for(unsigned int m=0;m<a.dimension_3();m++)
for(unsigned int n=0;n<a.dimension_4();n++)
for(unsigned int o=0;o<a.dimension_5();o++)
for(unsigned int p=0;p<a.dimension_6();p++)
for(unsigned int q=0;q<a.dimension_7();q++)
a(idx,k,l,m,n,o,p,q) = Rand::draw(gen,range);
}
}
rand_pool.free_state(gen);
}
};
template<class ViewType, class RandomPool, int loops>
struct fill_random_functor_begin_end<ViewType,RandomPool,loops,1>{
typedef typename ViewType::execution_space execution_space;
ViewType a;
RandomPool rand_pool;
typename ViewType::const_value_type begin,end;
typedef rand<typename RandomPool::generator_type, typename ViewType::non_const_value_type> Rand;
fill_random_functor_begin_end(ViewType a_, RandomPool rand_pool_,
typename ViewType::const_value_type begin_, typename ViewType::const_value_type end_):
a(a_),rand_pool(rand_pool_),begin(begin_),end(end_) {}
KOKKOS_INLINE_FUNCTION
void operator() (unsigned int i) const {
typename RandomPool::generator_type gen = rand_pool.get_state();
for(unsigned int j=0;j<loops;j++) {
const uint64_t idx = i*loops+j;
if(idx<a.dimension_0())
a(idx) = Rand::draw(gen,begin,end);
}
rand_pool.free_state(gen);
}
};
template<class ViewType, class RandomPool, int loops>
struct fill_random_functor_begin_end<ViewType,RandomPool,loops,2>{
typedef typename ViewType::execution_space execution_space;
ViewType a;
RandomPool rand_pool;
typename ViewType::const_value_type begin,end;
typedef rand<typename RandomPool::generator_type, typename ViewType::non_const_value_type> Rand;
fill_random_functor_begin_end(ViewType a_, RandomPool rand_pool_,
typename ViewType::const_value_type begin_, typename ViewType::const_value_type end_):
a(a_),rand_pool(rand_pool_),begin(begin_),end(end_) {}
KOKKOS_INLINE_FUNCTION
void operator() (unsigned int i) const {
typename RandomPool::generator_type gen = rand_pool.get_state();
for(unsigned int j=0;j<loops;j++) {
const uint64_t idx = i*loops+j;
if(idx<a.dimension_0()) {
for(unsigned int k=0;k<a.dimension_1();k++)
a(idx,k) = Rand::draw(gen,begin,end);
}
}
rand_pool.free_state(gen);
}
};
template<class ViewType, class RandomPool, int loops>
struct fill_random_functor_begin_end<ViewType,RandomPool,loops,3>{
typedef typename ViewType::execution_space execution_space;
ViewType a;
RandomPool rand_pool;
typename ViewType::const_value_type begin,end;
typedef rand<typename RandomPool::generator_type, typename ViewType::non_const_value_type> Rand;
fill_random_functor_begin_end(ViewType a_, RandomPool rand_pool_,
typename ViewType::const_value_type begin_, typename ViewType::const_value_type end_):
a(a_),rand_pool(rand_pool_),begin(begin_),end(end_) {}
KOKKOS_INLINE_FUNCTION
void operator() (unsigned int i) const {
typename RandomPool::generator_type gen = rand_pool.get_state();
for(unsigned int j=0;j<loops;j++) {
const uint64_t idx = i*loops+j;
if(idx<a.dimension_0()) {
for(unsigned int k=0;k<a.dimension_1();k++)
for(unsigned int l=0;l<a.dimension_2();l++)
a(idx,k,l) = Rand::draw(gen,begin,end);
}
}
rand_pool.free_state(gen);
}
};
template<class ViewType, class RandomPool, int loops>
struct fill_random_functor_begin_end<ViewType,RandomPool,loops,4>{
typedef typename ViewType::execution_space execution_space;
ViewType a;
RandomPool rand_pool;
typename ViewType::const_value_type begin,end;
typedef rand<typename RandomPool::generator_type, typename ViewType::non_const_value_type> Rand;
fill_random_functor_begin_end(ViewType a_, RandomPool rand_pool_,
typename ViewType::const_value_type begin_, typename ViewType::const_value_type end_):
a(a_),rand_pool(rand_pool_),begin(begin_),end(end_) {}
KOKKOS_INLINE_FUNCTION
void operator() (unsigned int i) const {
typename RandomPool::generator_type gen = rand_pool.get_state();
for(unsigned int j=0;j<loops;j++) {
const uint64_t idx = i*loops+j;
if(idx<a.dimension_0()) {
for(unsigned int k=0;k<a.dimension_1();k++)
for(unsigned int l=0;l<a.dimension_2();l++)
for(unsigned int m=0;m<a.dimension_3();m++)
a(idx,k,l,m) = Rand::draw(gen,begin,end);
}
}
rand_pool.free_state(gen);
}
};
template<class ViewType, class RandomPool, int loops>
struct fill_random_functor_begin_end<ViewType,RandomPool,loops,5>{
typedef typename ViewType::execution_space execution_space;
ViewType a;
RandomPool rand_pool;
typename ViewType::const_value_type begin,end;
typedef rand<typename RandomPool::generator_type, typename ViewType::non_const_value_type> Rand;
fill_random_functor_begin_end(ViewType a_, RandomPool rand_pool_,
typename ViewType::const_value_type begin_, typename ViewType::const_value_type end_):
a(a_),rand_pool(rand_pool_),begin(begin_),end(end_) {}
KOKKOS_INLINE_FUNCTION
void operator() (unsigned int i) const {
typename RandomPool::generator_type gen = rand_pool.get_state();
for(unsigned int j=0;j<loops;j++) {
const uint64_t idx = i*loops+j;
if(idx<a.dimension_0()){
for(unsigned int l=0;l<a.dimension_1();l++)
for(unsigned int m=0;m<a.dimension_2();m++)
for(unsigned int n=0;n<a.dimension_3();n++)
for(unsigned int o=0;o<a.dimension_4();o++)
a(idx,l,m,n,o) = Rand::draw(gen,begin,end);
}
}
rand_pool.free_state(gen);
}
};
template<class ViewType, class RandomPool, int loops>
struct fill_random_functor_begin_end<ViewType,RandomPool,loops,6>{
typedef typename ViewType::execution_space execution_space;
ViewType a;
RandomPool rand_pool;
typename ViewType::const_value_type begin,end;
typedef rand<typename RandomPool::generator_type, typename ViewType::non_const_value_type> Rand;
fill_random_functor_begin_end(ViewType a_, RandomPool rand_pool_,
typename ViewType::const_value_type begin_, typename ViewType::const_value_type end_):
a(a_),rand_pool(rand_pool_),begin(begin_),end(end_) {}
KOKKOS_INLINE_FUNCTION
void operator() (unsigned int i) const {
typename RandomPool::generator_type gen = rand_pool.get_state();
for(unsigned int j=0;j<loops;j++) {
const uint64_t idx = i*loops+j;
if(idx<a.dimension_0()) {
for(unsigned int k=0;k<a.dimension_1();k++)
for(unsigned int l=0;l<a.dimension_2();l++)
for(unsigned int m=0;m<a.dimension_3();m++)
for(unsigned int n=0;n<a.dimension_4();n++)
for(unsigned int o=0;o<a.dimension_5();o++)
a(idx,k,l,m,n,o) = Rand::draw(gen,begin,end);
}
}
rand_pool.free_state(gen);
}
};
template<class ViewType, class RandomPool, int loops>
struct fill_random_functor_begin_end<ViewType,RandomPool,loops,7>{
typedef typename ViewType::execution_space execution_space;
ViewType a;
RandomPool rand_pool;
typename ViewType::const_value_type begin,end;
typedef rand<typename RandomPool::generator_type, typename ViewType::non_const_value_type> Rand;
fill_random_functor_begin_end(ViewType a_, RandomPool rand_pool_,
typename ViewType::const_value_type begin_, typename ViewType::const_value_type end_):
a(a_),rand_pool(rand_pool_),begin(begin_),end(end_) {}
KOKKOS_INLINE_FUNCTION
void operator() (unsigned int i) const {
typename RandomPool::generator_type gen = rand_pool.get_state();
for(unsigned int j=0;j<loops;j++) {
const uint64_t idx = i*loops+j;
if(idx<a.dimension_0()) {
for(unsigned int k=0;k<a.dimension_1();k++)
for(unsigned int l=0;l<a.dimension_2();l++)
for(unsigned int m=0;m<a.dimension_3();m++)
for(unsigned int n=0;n<a.dimension_4();n++)
for(unsigned int o=0;o<a.dimension_5();o++)
for(unsigned int p=0;p<a.dimension_6();p++)
a(idx,k,l,m,n,o,p) = Rand::draw(gen,begin,end);
}
}
rand_pool.free_state(gen);
}
};
template<class ViewType, class RandomPool, int loops>
struct fill_random_functor_begin_end<ViewType,RandomPool,loops,8>{
typedef typename ViewType::execution_space execution_space;
ViewType a;
RandomPool rand_pool;
typename ViewType::const_value_type begin,end;
typedef rand<typename RandomPool::generator_type, typename ViewType::non_const_value_type> Rand;
fill_random_functor_begin_end(ViewType a_, RandomPool rand_pool_,
typename ViewType::const_value_type begin_, typename ViewType::const_value_type end_):
a(a_),rand_pool(rand_pool_),begin(begin_),end(end_) {}
KOKKOS_INLINE_FUNCTION
void operator() (unsigned int i) const {
typename RandomPool::generator_type gen = rand_pool.get_state();
for(unsigned int j=0;j<loops;j++) {
const uint64_t idx = i*loops+j;
if(idx<a.dimension_0()) {
for(unsigned int k=0;k<a.dimension_1();k++)
for(unsigned int l=0;l<a.dimension_2();l++)
for(unsigned int m=0;m<a.dimension_3();m++)
for(unsigned int n=0;n<a.dimension_4();n++)
for(unsigned int o=0;o<a.dimension_5();o++)
for(unsigned int p=0;p<a.dimension_6();p++)
for(unsigned int q=0;q<a.dimension_7();q++)
a(idx,k,l,m,n,o,p,q) = Rand::draw(gen,begin,end);
}
}
rand_pool.free_state(gen);
}
};
template<class ViewType, class RandomPool>
void fill_random(ViewType a, RandomPool g, typename ViewType::const_value_type range) {
int64_t LDA = a.dimension_0();
if(LDA>0)
parallel_for((LDA+127)/128,fill_random_functor_range<ViewType,RandomPool,128,ViewType::Rank>(a,g,range));
}
template<class ViewType, class RandomPool>
void fill_random(ViewType a, RandomPool g, typename ViewType::const_value_type begin,typename ViewType::const_value_type end ) {
int64_t LDA = a.dimension_0();
if(LDA>0)
parallel_for((LDA+127)/128,fill_random_functor_begin_end<ViewType,RandomPool,128,ViewType::Rank>(a,g,begin,end));
}
}
#endif
diff --git a/lib/kokkos/algorithms/unit_tests/CMakeLists.txt b/lib/kokkos/algorithms/unit_tests/CMakeLists.txt
new file mode 100644
index 000000000..654104b44
--- /dev/null
+++ b/lib/kokkos/algorithms/unit_tests/CMakeLists.txt
@@ -0,0 +1,38 @@
+
+INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR})
+INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
+INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}/../src )
+
+SET(SOURCES
+ UnitTestMain.cpp
+ TestCuda.cpp
+ )
+
+SET(LIBRARIES kokkoscore)
+
+IF(Kokkos_ENABLE_OpenMP)
+ LIST( APPEND SOURCES
+ TestOpenMP.cpp
+ )
+ENDIF()
+
+IF(Kokkos_ENABLE_Serial)
+ LIST( APPEND SOURCES
+ TestSerial.cpp
+ )
+ENDIF()
+
+IF(Kokkos_ENABLE_Pthread)
+ LIST( APPEND SOURCES
+ TestThreads.cpp
+ )
+ENDIF()
+
+TRIBITS_ADD_EXECUTABLE_AND_TEST(
+ UnitTest
+ SOURCES ${SOURCES}
+ COMM serial mpi
+ NUM_MPI_PROCS 1
+ FAIL_REGULAR_EXPRESSION " FAILED "
+ TESTONLYLIBS kokkos_gtest
+ )
diff --git a/lib/kokkos/algorithms/unit_tests/Makefile b/lib/kokkos/algorithms/unit_tests/Makefile
index 5fc94ac0f..5d79364c5 100644
--- a/lib/kokkos/algorithms/unit_tests/Makefile
+++ b/lib/kokkos/algorithms/unit_tests/Makefile
@@ -1,92 +1,92 @@
KOKKOS_PATH = ../..
GTEST_PATH = ../../TPL/gtest
vpath %.cpp ${KOKKOS_PATH}/algorithms/unit_tests
default: build_all
echo "End Build"
-
+
include $(KOKKOS_PATH)/Makefile.kokkos
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
- CXX = nvcc_wrapper
+ CXX = $(NVCC_WRAPPER)
CXXFLAGS ?= -O3
LINK = $(CXX)
LDFLAGS ?= -lpthread
else
CXX ?= g++
CXXFLAGS ?= -O3
LINK ?= $(CXX)
LDFLAGS ?= -lpthread
endif
KOKKOS_CXXFLAGS += -I$(GTEST_PATH) -I${KOKKOS_PATH}/algorithms/unit_tests
TEST_TARGETS =
TARGETS =
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
OBJ_CUDA = TestCuda.o UnitTestMain.o gtest-all.o
TARGETS += KokkosAlgorithms_UnitTest_Cuda
TEST_TARGETS += test-cuda
endif
ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1)
OBJ_THREADS = TestThreads.o UnitTestMain.o gtest-all.o
TARGETS += KokkosAlgorithms_UnitTest_Threads
TEST_TARGETS += test-threads
endif
ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1)
OBJ_OPENMP = TestOpenMP.o UnitTestMain.o gtest-all.o
TARGETS += KokkosAlgorithms_UnitTest_OpenMP
TEST_TARGETS += test-openmp
endif
ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1)
OBJ_SERIAL = TestSerial.o UnitTestMain.o gtest-all.o
TARGETS += KokkosAlgorithms_UnitTest_Serial
TEST_TARGETS += test-serial
endif
KokkosAlgorithms_UnitTest_Cuda: $(OBJ_CUDA) $(KOKKOS_LINK_DEPENDS)
$(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_CUDA) $(KOKKOS_LIBS) $(LIB) -o KokkosAlgorithms_UnitTest_Cuda
KokkosAlgorithms_UnitTest_Threads: $(OBJ_THREADS) $(KOKKOS_LINK_DEPENDS)
$(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_THREADS) $(KOKKOS_LIBS) $(LIB) -o KokkosAlgorithms_UnitTest_Threads
-
+
KokkosAlgorithms_UnitTest_OpenMP: $(OBJ_OPENMP) $(KOKKOS_LINK_DEPENDS)
$(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_OPENMP) $(KOKKOS_LIBS) $(LIB) -o KokkosAlgorithms_UnitTest_OpenMP
KokkosAlgorithms_UnitTest_Serial: $(OBJ_SERIAL) $(KOKKOS_LINK_DEPENDS)
$(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_SERIAL) $(KOKKOS_LIBS) $(LIB) -o KokkosAlgorithms_UnitTest_Serial
test-cuda: KokkosAlgorithms_UnitTest_Cuda
./KokkosAlgorithms_UnitTest_Cuda
test-threads: KokkosAlgorithms_UnitTest_Threads
./KokkosAlgorithms_UnitTest_Threads
test-openmp: KokkosAlgorithms_UnitTest_OpenMP
./KokkosAlgorithms_UnitTest_OpenMP
test-serial: KokkosAlgorithms_UnitTest_Serial
./KokkosAlgorithms_UnitTest_Serial
-
+
build_all: $(TARGETS)
test: $(TEST_TARGETS)
-
+
clean: kokkos-clean
rm -f *.o $(TARGETS)
# Compilation rules
%.o:%.cpp $(KOKKOS_CPP_DEPENDS)
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<
gtest-all.o:$(GTEST_PATH)/gtest/gtest-all.cc
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $(GTEST_PATH)/gtest/gtest-all.cc
diff --git a/lib/kokkos/cmake/Dependencies.cmake b/lib/kokkos/cmake/Dependencies.cmake
new file mode 100644
index 000000000..8c51eab4d
--- /dev/null
+++ b/lib/kokkos/cmake/Dependencies.cmake
@@ -0,0 +1,10 @@
+TRIBITS_PACKAGE_DEFINE_DEPENDENCIES(
+ SUBPACKAGES_DIRS_CLASSIFICATIONS_OPTREQS
+ #SubPackageName Directory Class Req/Opt
+ #
+ # New Kokkos subpackages:
+ Core core PS REQUIRED
+ Containers containers PS OPTIONAL
+ Algorithms algorithms PS OPTIONAL
+ Example example EX OPTIONAL
+ )
diff --git a/lib/kokkos/cmake/tpls/FindTPLCUSPARSE.cmake b/lib/kokkos/cmake/tpls/FindTPLCUSPARSE.cmake
new file mode 100644
index 000000000..aad1e2bad
--- /dev/null
+++ b/lib/kokkos/cmake/tpls/FindTPLCUSPARSE.cmake
@@ -0,0 +1,75 @@
+# @HEADER
+# ************************************************************************
+#
+# Trilinos: An Object-Oriented Solver Framework
+# Copyright (2001) Sandia Corporation
+#
+#
+# Copyright (2001) Sandia Corporation. Under the terms of Contract
+# DE-AC04-94AL85000, there is a non-exclusive license for use of this
+# work by or on behalf of the U.S. Government. Export of this program
+# may require a license from the United States Government.
+#
+# 1. Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+#
+# 3. Neither the name of the Corporation nor the names of the
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# NOTICE: The United States Government is granted for itself and others
+# acting on its behalf a paid-up, nonexclusive, irrevocable worldwide
+# license in this data to reproduce, prepare derivative works, and
+# perform publicly and display publicly. Beginning five (5) years from
+# July 25, 2001, the United States Government is granted for itself and
+# others acting on its behalf a paid-up, nonexclusive, irrevocable
+# worldwide license in this data to reproduce, prepare derivative works,
+# distribute copies to the public, perform publicly and display
+# publicly, and to permit others to do so.
+#
+# NEITHER THE UNITED STATES GOVERNMENT, NOR THE UNITED STATES DEPARTMENT
+# OF ENERGY, NOR SANDIA CORPORATION, NOR ANY OF THEIR EMPLOYEES, MAKES
+# ANY WARRANTY, EXPRESS OR IMPLIED, OR ASSUMES ANY LEGAL LIABILITY OR
+# RESPONSIBILITY FOR THE ACCURACY, COMPLETENESS, OR USEFULNESS OF ANY
+# INFORMATION, APPARATUS, PRODUCT, OR PROCESS DISCLOSED, OR REPRESENTS
+# THAT ITS USE WOULD NOT INFRINGE PRIVATELY OWNED RIGHTS.
+#
+# ************************************************************************
+# @HEADER
+
+# Check for CUDA support
+
+IF (NOT TPL_ENABLE_CUDA OR CUDA_VERSION VERSION_LESS "4.1")
+ MESSAGE(FATAL_ERROR "\nCUSPARSE: did not find acceptable version of CUDA libraries (4.1 or greater)")
+ELSE()
+ IF(CMAKE_VERSION VERSION_LESS "2.8.8")
+ # FindCUDA before CMake 2.8.8 does not find cusparse library; therefore, we must
+ find_library(CUDA_cusparse_LIBRARY
+ cusparse
+ HINTS ${CUDA_TOOLKIT_ROOT_DIR}/lib
+ )
+ IF(CUDA_cusparse_LIBRARY STREQUAL "CUDA_cusparse_LIBRARY-NOTFOUND")
+ MESSAGE(FATAL_ERROR "\nCUSPARSE: could not find cuspasre library.")
+ ENDIF()
+ ENDIF(CMAKE_VERSION VERSION_LESS "2.8.8")
+ GLOBAL_SET(TPL_CUSPARSE_LIBRARY_DIRS)
+ GLOBAL_SET(TPL_CUSPARSE_INCLUDE_DIRS ${TPL_CUDA_INCLUDE_DIRS})
+ GLOBAL_SET(TPL_CUSPARSE_LIBRARIES ${CUDA_cusparse_LIBRARY})
+ENDIF()
+
diff --git a/lib/kokkos/cmake/tpls/FindTPLHWLOC.cmake b/lib/kokkos/cmake/tpls/FindTPLHWLOC.cmake
new file mode 100644
index 000000000..715b3e9bd
--- /dev/null
+++ b/lib/kokkos/cmake/tpls/FindTPLHWLOC.cmake
@@ -0,0 +1,71 @@
+# @HEADER
+# ************************************************************************
+#
+# Trilinos: An Object-Oriented Solver Framework
+# Copyright (2001) Sandia Corporation
+#
+#
+# Copyright (2001) Sandia Corporation. Under the terms of Contract
+# DE-AC04-94AL85000, there is a non-exclusive license for use of this
+# work by or on behalf of the U.S. Government. Export of this program
+# may require a license from the United States Government.
+#
+# 1. Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+#
+# 3. Neither the name of the Corporation nor the names of the
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# NOTICE: The United States Government is granted for itself and others
+# acting on its behalf a paid-up, nonexclusive, irrevocable worldwide
+# license in this data to reproduce, prepare derivative works, and
+# perform publicly and display publicly. Beginning five (5) years from
+# July 25, 2001, the United States Government is granted for itself and
+# others acting on its behalf a paid-up, nonexclusive, irrevocable
+# worldwide license in this data to reproduce, prepare derivative works,
+# distribute copies to the public, perform publicly and display
+# publicly, and to permit others to do so.
+#
+# NEITHER THE UNITED STATES GOVERNMENT, NOR THE UNITED STATES DEPARTMENT
+# OF ENERGY, NOR SANDIA CORPORATION, NOR ANY OF THEIR EMPLOYEES, MAKES
+# ANY WARRANTY, EXPRESS OR IMPLIED, OR ASSUMES ANY LEGAL LIABILITY OR
+# RESPONSIBILITY FOR THE ACCURACY, COMPLETENESS, OR USEFULNESS OF ANY
+# INFORMATION, APPARATUS, PRODUCT, OR PROCESS DISCLOSED, OR REPRESENTS
+# THAT ITS USE WOULD NOT INFRINGE PRIVATELY OWNED RIGHTS.
+#
+# ************************************************************************
+# @HEADER
+
+
+#-----------------------------------------------------------------------------
+# Hardware locality detection and control library.
+#
+# Acquisition information:
+# Date checked: November 2011
+# Checked by: H. Carter Edwards <hcedwar AT sandia.gov>
+# Source: http://www.open-mpi.org/projects/hwloc/
+# Version: 1.3
+#
+
+TRIBITS_TPL_FIND_INCLUDE_DIRS_AND_LIBRARIES( HWLOC
+ REQUIRED_HEADERS hwloc.h
+ REQUIRED_LIBS_NAMES "hwloc"
+ )
+
diff --git a/lib/kokkos/cmake/tpls/FindTPLPthread.cmake b/lib/kokkos/cmake/tpls/FindTPLPthread.cmake
new file mode 100644
index 000000000..fc401d754
--- /dev/null
+++ b/lib/kokkos/cmake/tpls/FindTPLPthread.cmake
@@ -0,0 +1,82 @@
+# @HEADER
+# ************************************************************************
+#
+# Trilinos: An Object-Oriented Solver Framework
+# Copyright (2001) Sandia Corporation
+#
+#
+# Copyright (2001) Sandia Corporation. Under the terms of Contract
+# DE-AC04-94AL85000, there is a non-exclusive license for use of this
+# work by or on behalf of the U.S. Government. Export of this program
+# may require a license from the United States Government.
+#
+# 1. Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+#
+# 3. Neither the name of the Corporation nor the names of the
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# NOTICE: The United States Government is granted for itself and others
+# acting on its behalf a paid-up, nonexclusive, irrevocable worldwide
+# license in this data to reproduce, prepare derivative works, and
+# perform publicly and display publicly. Beginning five (5) years from
+# July 25, 2001, the United States Government is granted for itself and
+# others acting on its behalf a paid-up, nonexclusive, irrevocable
+# worldwide license in this data to reproduce, prepare derivative works,
+# distribute copies to the public, perform publicly and display
+# publicly, and to permit others to do so.
+#
+# NEITHER THE UNITED STATES GOVERNMENT, NOR THE UNITED STATES DEPARTMENT
+# OF ENERGY, NOR SANDIA CORPORATION, NOR ANY OF THEIR EMPLOYEES, MAKES
+# ANY WARRANTY, EXPRESS OR IMPLIED, OR ASSUMES ANY LEGAL LIABILITY OR
+# RESPONSIBILITY FOR THE ACCURACY, COMPLETENESS, OR USEFULNESS OF ANY
+# INFORMATION, APPARATUS, PRODUCT, OR PROCESS DISCLOSED, OR REPRESENTS
+# THAT ITS USE WOULD NOT INFRINGE PRIVATELY OWNED RIGHTS.
+#
+# ************************************************************************
+# @HEADER
+
+
+SET(USE_THREADS FALSE)
+
+IF(NOT TPL_Pthread_INCLUDE_DIRS AND NOT TPL_Pthread_LIBRARY_DIRS AND NOT TPL_Pthread_LIBRARIES)
+ # Use CMake's Thread finder since it is a bit smarter in determining
+ # whether pthreads is already built into the compiler and doesn't need
+ # a library to link.
+ FIND_PACKAGE(Threads)
+ #If Threads found a copy of pthreads make sure it is one of the cases the tribits
+ #tpl system cannot handle.
+ IF(Threads_FOUND AND CMAKE_USE_PTHREADS_INIT)
+ IF(CMAKE_THREAD_LIBS_INIT STREQUAL "" OR CMAKE_THREAD_LIBS_INIT STREQUAL "-pthread")
+ SET(USE_THREADS TRUE)
+ ENDIF()
+ ENDIF()
+ENDIF()
+
+IF(USE_THREADS)
+ SET(TPL_Pthread_INCLUDE_DIRS "")
+ SET(TPL_Pthread_LIBRARIES "${CMAKE_THREAD_LIBS_INIT}")
+ SET(TPL_Pthread_LIBRARY_DIRS "")
+ELSE()
+ TRIBITS_TPL_FIND_INCLUDE_DIRS_AND_LIBRARIES( Pthread
+ REQUIRED_HEADERS pthread.h
+ REQUIRED_LIBS_NAMES pthread
+ )
+ENDIF()
diff --git a/lib/kokkos/cmake/tpls/FindTPLQTHREAD.cmake b/lib/kokkos/cmake/tpls/FindTPLQTHREAD.cmake
new file mode 100644
index 000000000..994b72b20
--- /dev/null
+++ b/lib/kokkos/cmake/tpls/FindTPLQTHREAD.cmake
@@ -0,0 +1,70 @@
+# @HEADER
+# ************************************************************************
+#
+# Trilinos: An Object-Oriented Solver Framework
+# Copyright (2001) Sandia Corporation
+#
+#
+# Copyright (2001) Sandia Corporation. Under the terms of Contract
+# DE-AC04-94AL85000, there is a non-exclusive license for use of this
+# work by or on behalf of the U.S. Government. Export of this program
+# may require a license from the United States Government.
+#
+# 1. Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+#
+# 3. Neither the name of the Corporation nor the names of the
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# NOTICE: The United States Government is granted for itself and others
+# acting on its behalf a paid-up, nonexclusive, irrevocable worldwide
+# license in this data to reproduce, prepare derivative works, and
+# perform publicly and display publicly. Beginning five (5) years from
+# July 25, 2001, the United States Government is granted for itself and
+# others acting on its behalf a paid-up, nonexclusive, irrevocable
+# worldwide license in this data to reproduce, prepare derivative works,
+# distribute copies to the public, perform publicly and display
+# publicly, and to permit others to do so.
+#
+# NEITHER THE UNITED STATES GOVERNMENT, NOR THE UNITED STATES DEPARTMENT
+# OF ENERGY, NOR SANDIA CORPORATION, NOR ANY OF THEIR EMPLOYEES, MAKES
+# ANY WARRANTY, EXPRESS OR IMPLIED, OR ASSUMES ANY LEGAL LIABILITY OR
+# RESPONSIBILITY FOR THE ACCURACY, COMPLETENESS, OR USEFULNESS OF ANY
+# INFORMATION, APPARATUS, PRODUCT, OR PROCESS DISCLOSED, OR REPRESENTS
+# THAT ITS USE WOULD NOT INFRINGE PRIVATELY OWNED RIGHTS.
+#
+# ************************************************************************
+# @HEADER
+
+
+#-----------------------------------------------------------------------------
+# Hardware locality detection and control library.
+#
+# Acquisition information:
+# Date checked: July 2014
+# Checked by: H. Carter Edwards <hcedwar AT sandia.gov>
+# Source: https://code.google.com/p/qthreads
+#
+
+TRIBITS_TPL_FIND_INCLUDE_DIRS_AND_LIBRARIES( QTHREAD
+ REQUIRED_HEADERS qthread.h
+ REQUIRED_LIBS_NAMES "qthread"
+ )
+
diff --git a/lib/kokkos/config/nvcc_wrapper b/lib/kokkos/config/nvcc_wrapper
index 63e0ef50a..058911929 100755
--- a/lib/kokkos/config/nvcc_wrapper
+++ b/lib/kokkos/config/nvcc_wrapper
@@ -1,185 +1,257 @@
#!/bin/bash
#
# This shell script (nvcc_wrapper) wraps both the host compiler and
# NVCC, if you are building Trilinos with CUDA enabled. The script
# remedies some differences between the interface of NVCC and that of
# the host compiler, in particular for linking. It also means that
# Trilinos doesn't need separate .cu files; it can just use .cpp
# files.
#
# Hopefully, at some point, NVIDIA may fix NVCC so as to make this
# script obsolete. For now, this script exists and if you want to
# build Trilinos with CUDA enabled, you must use this script as your
# compiler.
# Default settings: change those according to your machine. For
# example, you may have have two different wrappers with either icpc
# or g++ as their back-end compiler. The defaults can be overwritten
# by using the usual arguments (e.g., -arch=sm_30 -ccbin icpc).
default_arch="sm_35"
#default_arch="sm_50"
#
# The default C++ compiler.
#
-default_compiler=${NVCC_WRAPPER_DEFAULT_COMPILER:-"g++"}
-#default_compiler="icpc"
-#default_compiler="/usr/local/gcc/4.8.3/bin/g++"
-#default_compiler="/usr/local/gcc/4.9.1/bin/g++"
+host_compiler=${NVCC_WRAPPER_DEFAULT_COMPILER:-"g++"}
+#host_compiler="icpc"
+#host_compiler="/usr/local/gcc/4.8.3/bin/g++"
+#host_compiler="/usr/local/gcc/4.9.1/bin/g++"
#
# Internal variables
#
+
+# C++ files
cpp_files=""
+
+# Host compiler arguments
xcompiler_args=""
-cuda_arg=""
+
+# Cuda (NVCC) only arguments
+cuda_args=""
+
+# Arguments for both NVCC and Host compiler
+shared_args=""
+
+# Linker arguments
xlinker_args=""
+
+# Object files passable to NVCC
object_files=""
+
+# Link objects for the host linker only
object_files_xlinker=""
-first_host_option=1
+
+# Does the User set the architecture
arch_set=0
+
+# Does the user overwrite the host compiler
ccbin_set=0
-nvcc_error_code=0
+
+#Error code of compilation
+error_code=0
+
+# Do a dry run without actually compiling
dry_run=0
+
+# Skip NVCC compilation and use host compiler directly
+host_only=0
+
+# Enable workaround for CUDA 6.5 for pragma ident
replace_pragma_ident=0
+# Mark first host compiler argument
+first_xcompiler_arg=1
+
+temp_dir=${TMPDIR:-/tmp}
+
#echo "Arguments: $# $@"
while [ $# -gt 0 ]
do
case $1 in
#show the executed command
- --show)
+ --show|--nvcc-wrapper-show)
dry_run=1
;;
+ #run host compilation only
+ --host-only)
+ host_only=1
+ ;;
#replace '#pragma ident' with '#ident' this is needed to compile OpenMPI due to a configure script bug and a non standardized behaviour of pragma with macros
--replace-pragma-ident)
replace_pragma_ident=1
;;
#handle source files to be compiled as cuda files
*.cpp|*.cxx|*.cc|*.C|*.c++|*.cu)
cpp_files="$cpp_files $1"
;;
+ #Handle shared args (valid for both nvcc and the host compiler)
+ -O*|-D*|-c|-I*|-L*|-l*|-g|--help|--version|-E|-M|-shared)
+ shared_args="$shared_args $1"
+ ;;
+ #Handle shared args that have an argument
+ -o)
+ shared_args="$shared_args $1 $2"
+ shift
+ ;;
#Handle known nvcc args
- -O*|-D*|-gencode*|-c|-I*|-L*|-l*|-g|--help|--version|--dryrun|--verbose|--keep-dir|-E|-M|-G|--relocatable-device-code*|-shared|-lineinfo|-expt-extended-lambda|--resource-usage)
+ -gencode*|--dryrun|--verbose|--keep|--keep-dir*|-G|--relocatable-device-code*|-lineinfo|-expt-extended-lambda|--resource-usage)
cuda_args="$cuda_args $1"
;;
+ #Handle known nvcc args that have an argument
+ -rdc|-maxrregcount|--default-stream)
+ cuda_args="$cuda_args $1 $2"
+ shift
+ ;;
#Handle c++11 setting
--std=c++11|-std=c++11)
- cuda_args="$cuda_args $1"
+ shared_args="$shared_args $1"
;;
#strip of -std=c++98 due to nvcc warnings and Tribits will place both -std=c++11 and -std=c++98
-std=c++98|--std=c++98)
;;
- #Handle known nvcc args that have an argument
- -o|-rdc|-maxrregcount|--default-stream)
- cuda_args="$cuda_args $1 $2"
- shift
- ;;
#strip of pedantic because it produces endless warnings about #LINE added by the preprocessor
-pedantic|-Wpedantic|-ansi)
;;
#strip -Xcompiler because we add it
-Xcompiler)
;;
#strip of "-x cu" because we add that
-x)
if [[ $2 != "cu" ]]; then
- xcompiler_args="$xcompiler_args,-x,$2"
+ if [ $first_xcompiler_arg -eq 1 ]; then
+ xcompiler_args="-x,$2"
+ first_xcompiler_arg=0
+ else
+ xcompiler_args="$xcompiler_args,-x,$2"
+ fi
fi
shift
;;
#Handle -ccbin (if its not set we can set it to a default value)
-ccbin)
cuda_args="$cuda_args $1 $2"
ccbin_set=1
+ host_compiler=$2
shift
;;
#Handle -arch argument (if its not set use a default
-arch*)
cuda_args="$cuda_args $1"
arch_set=1
;;
#Handle -Xcudafe argument
-Xcudafe)
cuda_args="$cuda_args -Xcudafe $2"
shift
;;
#Handle args that should be sent to the linker
-Wl*)
xlinker_args="$xlinker_args -Xlinker ${1:4:${#1}}"
+ host_linker_args="$host_linker_args ${1:4:${#1}}"
;;
#Handle object files: -x cu applies to all input files, so give them to linker, except if only linking
*.a|*.so|*.o|*.obj)
object_files="$object_files $1"
object_files_xlinker="$object_files_xlinker -Xlinker $1"
;;
- #Handle object files: -x cu applies to all input files, so give them to linker, except if only linking
+ #Handle object files which always need to use "-Xlinker": -x cu applies to all input files, so give them to linker, except if only linking
*.so.*|*.dylib)
- object_files_xlinker="$object_files_xlinker -Xlinker $1"
object_files="$object_files -Xlinker $1"
+ object_files_xlinker="$object_files_xlinker -Xlinker $1"
;;
#All other args are sent to the host compiler
*)
- if [ $first_host_option -eq 0 ]; then
+ if [ $first_xcompiler_arg -eq 1 ]; then
+ xcompiler_args=$1
+ first_xcompiler_arg=0
+ else
xcompiler_args="$xcompiler_args,$1"
- else
- xcompiler_args="-Xcompiler $1"
- first_host_option=0
fi
;;
esac
shift
done
#Add default host compiler if necessary
if [ $ccbin_set -ne 1 ]; then
- cuda_args="$cuda_args -ccbin $default_compiler"
+ cuda_args="$cuda_args -ccbin $host_compiler"
fi
#Add architecture command
if [ $arch_set -ne 1 ]; then
cuda_args="$cuda_args -arch=$default_arch"
fi
#Compose compilation command
-command="nvcc $cuda_args $xlinker_args $xcompiler_args"
+nvcc_command="nvcc $cuda_args $shared_args $xlinker_args"
+if [ $first_xcompiler_arg -eq 0 ]; then
+ nvcc_command="$nvcc_command -Xcompiler $xcompiler_args"
+fi
+
+#Compose host only command
+host_command="$host_compiler $shared_args $xcompiler_args $host_linker_args"
#nvcc does not accept '#pragma ident SOME_MACRO_STRING' but it does accept '#ident SOME_MACRO_STRING'
if [ $replace_pragma_ident -eq 1 ]; then
cpp_files2=""
for file in $cpp_files
do
var=`grep pragma ${file} | grep ident | grep "#"`
if [ "${#var}" -gt 0 ]
then
- sed 's/#[\ \t]*pragma[\ \t]*ident/#ident/g' $file > /tmp/nvcc_wrapper_tmp_$file
- cpp_files2="$cpp_files2 /tmp/nvcc_wrapper_tmp_$file"
+ sed 's/#[\ \t]*pragma[\ \t]*ident/#ident/g' $file > $temp_dir/nvcc_wrapper_tmp_$file
+ cpp_files2="$cpp_files2 $temp_dir/nvcc_wrapper_tmp_$file"
else
cpp_files2="$cpp_files2 $file"
fi
done
cpp_files=$cpp_files2
- echo $cpp_files
+ #echo $cpp_files
+fi
+
+if [ "$cpp_files" ]; then
+ nvcc_command="$nvcc_command $object_files_xlinker -x cu $cpp_files"
+else
+ nvcc_command="$nvcc_command $object_files"
fi
if [ "$cpp_files" ]; then
- command="$command $object_files_xlinker -x cu $cpp_files"
+ host_command="$host_command $object_files $cpp_files"
else
- command="$command $object_files"
+ host_command="$host_command $object_files"
fi
#Print command for dryrun
if [ $dry_run -eq 1 ]; then
- echo $command
+ if [ $host_only -eq 1 ]; then
+ echo $host_command
+ else
+ echo $nvcc_command
+ fi
exit 0
fi
#Run compilation command
-$command
-nvcc_error_code=$?
+if [ $host_only -eq 1 ]; then
+ $host_command
+else
+ $nvcc_command
+fi
+error_code=$?
#Report error code
-exit $nvcc_error_code
+exit $error_code
diff --git a/lib/kokkos/config/test_all_sandia b/lib/kokkos/config/test_all_sandia
index 7d52039be..659f14066 100755
--- a/lib/kokkos/config/test_all_sandia
+++ b/lib/kokkos/config/test_all_sandia
@@ -1,305 +1,437 @@
#!/bin/bash -e
#
# Global config
#
set -o pipefail
-COMPILER_ROOT="/home/projects/x86-64"
-
GCC_BUILD_LIST="OpenMP,Pthread,Serial,OpenMP_Serial,Pthread_Serial"
INTEL_BUILD_LIST="OpenMP,Pthread,Serial,OpenMP_Serial,Pthread_Serial"
CLANG_BUILD_LIST="Pthread,Serial,Pthread_Serial"
CUDA_BUILD_LIST="Cuda_OpenMP,Cuda_Pthread,Cuda_Serial"
GCC_WARNING_FLAGS="-Wall,-Wshadow,-pedantic,-Werror,-Wsign-compare,-Wtype-limits,-Wignored-qualifiers,-Wempty-body,-Wclobbered,-Wuninitialized"
CLANG_WARNING_FLAGS="-Wall,-Wshadow,-pedantic,-Werror,-Wsign-compare,-Wtype-limits,-Wuninitialized"
INTEL_WARNING_FLAGS="-Wall,-Wshadow,-pedantic,-Werror,-Wsign-compare,-Wtype-limits,-Wuninitialized"
CUDA_WARNING_FLAGS=""
-# Format: (compiler module-list build-list exe-name warning-flag)
-COMPILERS=("gcc/4.7.2 gcc/4.7.2/base,hwloc/1.10.0/host/gnu/4.7.2 $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
- "gcc/4.8.4 gcc/4.9.2/base,hwloc/1.10.0/host/gnu/4.9.2 $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
- "gcc/4.9.2 gcc/4.9.2/base,hwloc/1.10.0/host/gnu/4.9.2 $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
- "gcc/5.1.0 gcc/5.1.0/base,hwloc/1.10.0/host/gnu/5.1.0 $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
- "intel/14.0.4 intel/14.0.4/base,hwloc/1.10.0/host/gnu/4.7.2 $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
- "intel/15.0.2 intel/15.0.2/base,hwloc/1.10.0/host/gnu/4.7.2 $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
- "clang/3.5.2 clang/3.5.2/base $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS"
- "clang/3.6.1 clang/3.6.1/base $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS"
- "cuda/6.5.14 cuda/6.5.14,nvcc-wrapper/gnu,gcc/4.7.2/base $CUDA_BUILD_LIST nvcc_wrapper $CUDA_WARNING_FLAGS"
- "cuda/7.0.28 cuda/7.0.18,nvcc-wrapper/gnu,gcc/4.7.2/base $CUDA_BUILD_LIST nvcc_wrapper $CUDA_WARNING_FLAGS"
- )
+BASE_MODULE_LIST="<COMPILER_NAME>/<COMPILER_VERSION>/base,hwloc/1.10.1/<COMPILER_NAME>/<COMPILER_VERSION>/base"
+CUDA_MODULE_LIST="<COMPILER_NAME>/<COMPILER_VERSION>,gcc/4.7.2/base"
export OMP_NUM_THREADS=4
-export SEMS_MODULE_ROOT=/projects/modulefiles
-module use /home/projects/modulefiles
-module use /projects/modulefiles/rhel6-x86_64/sems/compiler
+declare -i NUM_RESULTS_TO_KEEP=7
+
+RESULT_ROOT_PREFIX=TestAll
+
+source /projects/modulefiles/utils/sems-modules-init.sh
+source /projects/modulefiles/utils/kokkos-modules-init.sh
SCRIPT_KOKKOS_ROOT=$( cd "$( dirname "$0" )" && cd .. && pwd )
#
# Handle arguments
#
DEBUG=False
ARGS=""
CUSTOM_BUILD_LIST=""
DRYRUN=False
+BUILD_ONLY=False
+declare -i NUM_JOBS_TO_RUN_IN_PARALLEL=3
+TEST_SCRIPT=False
while [[ $# > 0 ]]
do
key="$1"
case $key in
--kokkos-path*)
KOKKOS_PATH="${key#*=}"
;;
--build-list*)
CUSTOM_BUILD_LIST="${key#*=}"
;;
--debug*)
DEBUG=True
;;
+--build-only*)
+BUILD_ONLY=True
+;;
+--test-script*)
+TEST_SCRIPT=True
+;;
+--num*)
+NUM_JOBS_TO_RUN_IN_PARALLEL="${key#*=}"
+;;
--dry-run*)
DRYRUN=True
;;
--help)
echo "test_all_sandia <ARGS> <OPTIONS>:"
echo "--kokkos-path=/Path/To/Kokkos: Path to the Kokkos root directory"
echo " Defaults to root repo containing this script"
echo "--debug: Run tests in debug. Defaults to False"
+echo "--test-script: Test this script, not Kokkos"
+echo "--num=N: Number of jobs to run in parallel "
echo "--dry-run: Just print what would be executed"
+echo "--build-only: Just do builds, don't run anything"
echo "--build-list=BUILD,BUILD,BUILD..."
echo " Provide a comma-separated list of builds instead of running all builds"
echo " Valid items:"
echo " OpenMP, Pthread, Serial, OpenMP_Serial, Pthread_Serial"
echo " Cuda_OpenMP, Cuda_Pthread, Cuda_Serial"
echo ""
echo "ARGS: list of expressions matching compilers to test"
+echo " supported compilers"
+echo " gcc/4.7.2"
+echo " gcc/4.8.4"
+echo " gcc/4.9.2"
+echo " gcc/5.1.0"
+echo " intel/14.0.4"
+echo " intel/15.0.2"
+echo " clang/3.5.2"
+echo " clang/3.6.1"
+echo " cuda/6.5.14"
+echo " cuda/7.0.28"
+echo " cuda/7.5.18"
echo ""
echo "Examples:"
echo " Run all tests"
echo " % test_all_sandia"
echo ""
echo " Run all gcc tests"
echo " % test_all_sandia gcc"
echo ""
echo " Run all gcc/4.7.2 and all intel tests"
echo " % test_all_sandia gcc/4.7.2 intel"
echo ""
echo " Run all tests in debug"
echo " % test_all_sandia --debug"
echo ""
echo " Run gcc/4.7.2 and only do OpenMP and OpenMP_Serial builds"
echo " % test_all_sandia gcc/4.7.2 --build-list=OpenMP,OpenMP_Serial"
+echo ""
+echo "If you want to kill the tests, do:"
+echo " hit ctrl-z"
+echo " % kill -9 %1"
echo
exit 0
;;
*)
# args, just append
ARGS="$ARGS $1"
;;
esac
shift
done
-
# set kokkos path
if [ -z "$KOKKOS_PATH" ]; then
KOKKOS_PATH=$SCRIPT_KOKKOS_ROOT
else
# Ensure KOKKOS_PATH is abs path
KOKKOS_PATH=$( cd $KOKKOS_PATH && pwd )
fi
# set build type
if [ "$DEBUG" = "True" ]; then
BUILD_TYPE=debug
else
BUILD_TYPE=release
fi
# If no args provided, do all compilers
if [ -z "$ARGS" ]; then
ARGS='?'
fi
+# Format: (compiler module-list build-list exe-name warning-flag)
+COMPILERS=("gcc/4.7.2 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
+ "gcc/4.8.4 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
+ "gcc/4.9.2 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
+ "gcc/5.1.0 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
+ "intel/14.0.4 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
+ "intel/15.0.2 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
+ "clang/3.5.2 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS"
+ "clang/3.6.1 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS"
+ "cuda/6.5.14 $CUDA_MODULE_LIST $CUDA_BUILD_LIST $KOKKOS_PATH/config/nvcc_wrapper $CUDA_WARNING_FLAGS"
+ "cuda/7.0.28 $CUDA_MODULE_LIST $CUDA_BUILD_LIST $KOKKOS_PATH/config/nvcc_wrapper $CUDA_WARNING_FLAGS"
+ "cuda/7.5.18 $CUDA_MODULE_LIST $CUDA_BUILD_LIST $KOKKOS_PATH/config/nvcc_wrapper $CUDA_WARNING_FLAGS"
+ )
+
# Process args to figure out which compilers to test
COMPILERS_TO_TEST=""
for ARG in $ARGS; do
for COMPILER_DATA in "${COMPILERS[@]}"; do
- arr=($COMPILER_DATA)
- COMPILER=${arr[0]}
+ ARR=($COMPILER_DATA)
+ COMPILER=${ARR[0]}
if [[ "$COMPILER" = $ARG* ]]; then
if [[ "$COMPILERS_TO_TEST" != *${COMPILER}* ]]; then
COMPILERS_TO_TEST="$COMPILERS_TO_TEST $COMPILER"
else
echo "Tried to add $COMPILER twice"
fi
fi
done
done
#
# Functions
#
+# get_compiler_name <COMPILER>
+get_compiler_name() {
+ echo $1 | cut -d/ -f1
+}
+
+# get_compiler_version <COMPILER>
+get_compiler_version() {
+ echo $1 | cut -d/ -f2
+}
+
# Do not call directly
get_compiler_data() {
- compiler=$1
- item=$2
+ local compiler=$1
+ local item=$2
+ local compiler_name=$(get_compiler_name $compiler)
+ local compiler_vers=$(get_compiler_version $compiler)
+ local compiler_data
for compiler_data in "${COMPILERS[@]}" ; do
- arr=($compiler_data)
+ local arr=($compiler_data)
if [ "$compiler" = "${arr[0]}" ]; then
- echo "${arr[$item]}" | tr , ' '
+ echo "${arr[$item]}" | tr , ' ' | sed -e "s/<COMPILER_NAME>/$compiler_name/g" -e "s/<COMPILER_VERSION>/$compiler_vers/g"
return 0
fi
done
# Not found
echo "Unreconized compiler $compiler" >&2
exit 1
}
#
# For all getters, usage: <GETTER> <COMPILER>
#
get_compiler_modules() {
get_compiler_data $1 1
}
get_compiler_build_list() {
get_compiler_data $1 2
}
get_compiler_exe_name() {
get_compiler_data $1 3
}
get_compiler_warning_flags() {
get_compiler_data $1 4
}
run_cmd() {
echo "RUNNING: $*"
if [ "$DRYRUN" != "True" ]; then
- eval "$*"
+ eval "$* 2>&1"
fi
}
+# report_and_log_test_results <SUCCESS> <DESC> <PHASE>
report_and_log_test_result() {
- if [ "$1" = "0" ]; then
- echo "PASSED $2"
- TEST_RESULTS="${TEST_RESULTS}\nPASSED $2"
+ # Use sane var names
+ local success=$1; local desc=$2; local phase=$3;
+
+ if [ "$success" = "0" ]; then
+ echo " PASSED $desc"
+ touch $PASSED_DIR/$desc
else
- echo "FAILED $2" >&2
- TEST_RESULTS="${TEST_RESULTS}\nFAILED $2 ($3)"
- NUM_FAILED+=1
+ echo " FAILED $desc" >&2
+ echo $phase > $FAILED_DIR/$desc
+ cat ${desc}.${phase}.log
fi
}
+setup_env() {
+ local compiler=$1
+ local compiler_modules=$(get_compiler_modules $compiler)
+
+ module purge
+
+ local mod
+ for mod in $compiler_modules; do
+ module load $mod 2>&1
+ # It is ridiculously hard to check for the success of a loaded
+ # module. Module does not return error codes and piping to grep
+ # causes module to run in a subshell.
+ module list 2>&1 | grep "$mod" >& /dev/null || return 1
+ done
+
+ return 0
+}
+
# single_build_and_test <COMPILER> <BUILD> <BUILD_TYPE>
single_build_and_test() {
# Use sane var names
local compiler=$1; local build=$2; local build_type=$3;
- cd $ROOT_DIR/$compiler
+ # set up env
+ mkdir -p $ROOT_DIR/$compiler/"${build}-$build_type"
+ cd $ROOT_DIR/$compiler/"${build}-$build_type"
+ local desc=$(echo "${compiler}-${build}-${build_type}" | sed 's:/:-:g')
+ setup_env $compiler >& ${desc}.configure.log || { report_and_log_test_result 1 ${desc} configure && return 0; }
+ # Set up flags
local compiler_warning_flags=$(get_compiler_warning_flags $compiler)
local compiler_exe=$(get_compiler_exe_name $compiler)
if [[ "$build_type" = hwloc* ]]; then
- local extra_args="--with-hwloc=$HWLOC_ROOT"
+ local extra_args=--with-hwloc=$(dirname $(dirname $(which hwloc-info)))
fi
if [[ "$build_type" = *debug* ]]; then
local extra_args="$extra_args --debug"
local cxxflags="-g $compiler_warning_flags"
else
local cxxflags="-O3 $compiler_warning_flags"
fi
- local desc=$(echo "${compiler}-${build}-${build_type}" | sed 's:/:-:g')
- echo " Doing build: $desc"
-
- mkdir "${build}-$build_type"
- cd "${build}-$build_type"
+ if [[ "$compiler" == cuda* ]]; then
+ cxxflags="--keep --keep-dir=$(pwd) $cxxflags"
+ export TMPDIR=$(pwd)
+ fi
# cxxflags="-DKOKKOS_USING_EXPERIMENTAL_VIEW $cxxflags"
- run_cmd ${KOKKOS_PATH}/generate_makefile.bash --with-devices=$build --compiler=$(which $compiler_exe) --cxxflags=\"$cxxflags\" \"$extra_args\" 2>&1 | tee ${desc}.configure.log || { report_and_log_test_result 1 ${desc} configure && return 0; }
- run_cmd make build-test 2>&1 | tee ${desc}.build.log || { report_and_log_test_result 1 ${desc} build && return 0; }
- run_cmd make test 2>&1 | tee ${desc}.test.log || { report_and_log_test_result 1 ${desc} test && return 0; }
+ echo " Starting job $desc"
+
+ if [ "$TEST_SCRIPT" = "True" ]; then
+ local rand=$[ 1 + $[ RANDOM % 10 ]]
+ sleep $rand
+ if [ $rand -gt 5 ]; then
+ run_cmd ls fake_problem >& ${desc}.configure.log || { report_and_log_test_result 1 $desc configure && return 0; }
+ fi
+ else
+ run_cmd ${KOKKOS_PATH}/generate_makefile.bash --with-devices=$build --compiler=$(which $compiler_exe) --cxxflags=\"$cxxflags\" $extra_args >& ${desc}.configure.log || { report_and_log_test_result 1 ${desc} configure && return 0; }
+ run_cmd make build-test >& ${desc}.build.log || { report_and_log_test_result 1 ${desc} build && return 0; }
+ if [[ "$BUILD_ONLY" == False ]]; then
+ run_cmd make test >& ${desc}.test.log || { report_and_log_test_result 1 ${desc} test && return 0; }
+ fi
+ fi
+
report_and_log_test_result 0 $desc
+
return 0
}
-setup_env() {
+# wait_for_jobs <NUM-JOBS>
+wait_for_jobs() {
+ local -i max_jobs=$1
+ local -i num_active_jobs=$(jobs | wc -l)
+ while [ $num_active_jobs -ge $max_jobs ]
+ do
+ sleep 1
+ num_active_jobs=$(jobs | wc -l)
+ jobs >& /dev/null
+ done
+}
+
+# run_in_background <COMPILER> <BUILD> <BUILD_TYPE>
+run_in_background() {
local compiler=$1
- local compiler_modules=$(get_compiler_modules $compiler)
- module purge
+ local -i num_jobs=$NUM_JOBS_TO_RUN_IN_PARALLEL
+ if [[ "$BUILD_ONLY" == True ]]; then
+ num_jobs=8
+ else
+ if [[ "$compiler" == cuda* ]]; then
+ num_jobs=1
+ fi
+ fi
+ wait_for_jobs $num_jobs
- for mod in $compiler_modules; do
- module load $mod
- # It is ridiculously hard to check for the success of a loaded
- # module. Module does not return error codes and piping to grep
- # causes module to run in a subshell.
- module list 2>&1 | grep "$mod"
- done
+ single_build_and_test $* &
}
# build_and_test_all <COMPILER>
build_and_test_all() {
# Get compiler data
local compiler=$1
if [ -z "$CUSTOM_BUILD_LIST" ]; then
local compiler_build_list=$(get_compiler_build_list $compiler)
else
local compiler_build_list=$(echo "$CUSTOM_BUILD_LIST" | tr , ' ')
fi
- # set up env
- cd $ROOT_DIR
- mkdir -p $compiler
- setup_env $compiler
-
# do builds
+ local build
for build in $compiler_build_list
do
- single_build_and_test $compiler $build $BUILD_TYPE
+ run_in_background $compiler $build $BUILD_TYPE
# If not cuda, do a hwloc test too
if [[ "$compiler" != cuda* ]]; then
- single_build_and_test $compiler $build "hwloc-$BUILD_TYPE"
+ run_in_background $compiler $build "hwloc-$BUILD_TYPE"
fi
done
return 0
}
+get_test_root_dir() {
+ local existing_results=$(find . -maxdepth 1 -name "$RESULT_ROOT_PREFIX*" | sort)
+ local -i num_existing_results=$(echo $existing_results | tr ' ' '\n' | wc -l)
+ local -i num_to_delete=${num_existing_results}-${NUM_RESULTS_TO_KEEP}
+
+ if [ $num_to_delete -gt 0 ]; then
+ /bin/rm -rf $(echo $existing_results | tr ' ' '\n' | head -n $num_to_delete)
+ fi
+
+ echo $(pwd)/${RESULT_ROOT_PREFIX}_$(date +"%Y-%m-%d_%H.%M.%S")
+}
+
+wait_summarize_and_exit() {
+ wait_for_jobs 1
+
+ echo "#######################################################"
+ echo "PASSED TESTS"
+ echo "#######################################################"
+
+ \ls -1 $PASSED_DIR | sort
+
+ echo "#######################################################"
+ echo "FAILED TESTS"
+ echo "#######################################################"
+
+ local failed_test
+ local -i rv=0
+ for failed_test in $(\ls -1 $FAILED_DIR)
+ do
+ echo $failed_test "("$(cat $FAILED_DIR/$failed_test)" failed)"
+ rv=$rv+1
+ done
+
+ exit $rv
+}
+
#
# Main
#
-/bin/rm -rf TestAll
-mkdir TestAll
-cd TestAll
+ROOT_DIR=$(get_test_root_dir)
+mkdir -p $ROOT_DIR
+cd $ROOT_DIR
-TEST_RESULTS=""
-declare -i NUM_FAILED=0
-ROOT_DIR=$(pwd)
+PASSED_DIR=$ROOT_DIR/results/passed
+FAILED_DIR=$ROOT_DIR/results/failed
+mkdir -p $PASSED_DIR
+mkdir -p $FAILED_DIR
+
+echo "Going to test compilers: " $COMPILERS_TO_TEST
for COMPILER in $COMPILERS_TO_TEST; do
echo "Testing compiler $COMPILER"
build_and_test_all $COMPILER
done
-echo "#######################################################"
-echo "RESULT SUMMARY"
-echo "#######################################################"
-echo -e $TEST_RESULTS
-
-exit $NUM_FAILED
+wait_summarize_and_exit
diff --git a/lib/kokkos/config/testing_scripts/obj_size_opt_check b/lib/kokkos/config/testing_scripts/obj_size_opt_check
new file mode 100755
index 000000000..47c84d1a9
--- /dev/null
+++ b/lib/kokkos/config/testing_scripts/obj_size_opt_check
@@ -0,0 +1,287 @@
+#! /usr/bin/env python
+
+"""
+Compute the size at which the current compiler will start to
+significantly scale back optimization.
+
+The CPP file being modified will need the following tags.
+// JGF_DUPLICATE_BEGIN - Put before start of function to duplicate
+// JGF_DUPLICATE_END - Put after end of function to duplcate
+// JGF_DUPE function_name(args); - Put anywhere where it's legal to
+put a function call but not in your timing section.
+
+The program will need to output the string:
+FOM: <number>
+This will represent the program's performance
+"""
+
+import argparse, sys, os, doctest, subprocess, re, time
+
+VERBOSE = False
+
+###############################################################################
+def parse_command_line(args, description):
+###############################################################################
+ parser = argparse.ArgumentParser(
+ usage="""\n%s <cppfile> <build-command> <run-command> [--verbose]
+OR
+%s --help
+OR
+%s --test
+
+\033[1mEXAMPLES:\033[0m
+ > %s foo.cpp 'make -j4' foo
+""" % ((os.path.basename(args[0]), ) * 4),
+
+description=description,
+
+formatter_class=argparse.ArgumentDefaultsHelpFormatter
+)
+
+ parser.add_argument("cppfile", help="Name of file to modify.")
+
+ parser.add_argument("buildcmd", help="Build command")
+
+ parser.add_argument("execmd", help="Run command")
+
+ parser.add_argument("-v", "--verbose", action="store_true",
+ help="Print extra information")
+
+ parser.add_argument("-s", "--start", type=int, default=1,
+ help="Starting number of dupes")
+
+ parser.add_argument("-e", "--end", type=int, default=1000,
+ help="Ending number of dupes")
+
+ parser.add_argument("-n", "--repeat", type=int, default=10,
+ help="Number of times to repeat an individial execution. Best value will be taken.")
+
+ parser.add_argument("-t", "--template", action="store_true",
+ help="Use templating instead of source copying to increase object size")
+
+ parser.add_argument("-c", "--csv", action="store_true",
+ help="Print results as CSV")
+
+ args = parser.parse_args(args[1:])
+
+ if (args.verbose):
+ global VERBOSE
+ VERBOSE = True
+
+ return args.cppfile, args.buildcmd, args.execmd, args.start, args.end, args.repeat, args.template, args.csv
+
+###############################################################################
+def verbose_print(msg, override=None):
+###############################################################################
+ if ( (VERBOSE and not override is False) or override):
+ print msg
+
+###############################################################################
+def error_print(msg):
+###############################################################################
+ print >> sys.stderr, msg
+
+###############################################################################
+def expect(condition, error_msg):
+###############################################################################
+ """
+ Similar to assert except doesn't generate an ugly stacktrace. Useful for
+ checking user error, not programming error.
+ """
+ if (not condition):
+ raise SystemExit("FAIL: %s" % error_msg)
+
+###############################################################################
+def run_cmd(cmd, ok_to_fail=False, input_str=None, from_dir=None, verbose=None,
+ arg_stdout=subprocess.PIPE, arg_stderr=subprocess.PIPE):
+###############################################################################
+ verbose_print("RUN: %s" % cmd, verbose)
+
+ if (input_str is not None):
+ stdin = subprocess.PIPE
+ else:
+ stdin = None
+
+ proc = subprocess.Popen(cmd,
+ shell=True,
+ stdout=arg_stdout,
+ stderr=arg_stderr,
+ stdin=stdin,
+ cwd=from_dir)
+ output, errput = proc.communicate(input_str)
+ output = output.strip() if output is not None else output
+ stat = proc.wait()
+
+ if (ok_to_fail):
+ return stat, output, errput
+ else:
+ if (arg_stderr is not None):
+ errput = errput if errput is not None else open(arg_stderr.name, "r").read()
+ expect(stat == 0, "Command: '%s' failed with error '%s'" % (cmd, errput))
+ else:
+ expect(stat == 0, "Command: '%s' failed. See terminal output" % cmd)
+ return output
+
+###############################################################################
+def build_and_run(source, cppfile, buildcmd, execmd, repeat):
+###############################################################################
+ open(cppfile, 'w').writelines(source)
+
+ run_cmd(buildcmd)
+
+ best = None
+ for i in xrange(repeat):
+ wait_for_quiet_machine()
+ output = run_cmd(execmd)
+
+ current = None
+ fom_regex = re.compile(r'^FOM: ([0-9.]+)$')
+ for line in output.splitlines():
+ m = fom_regex.match(line)
+ if (m is not None):
+ current = float(m.groups()[0])
+ break
+
+ expect(current is not None, "No lines in output matched FOM regex")
+
+ if (best is None or best < current):
+ best = current
+
+ return best
+
+###############################################################################
+def wait_for_quiet_machine():
+###############################################################################
+ while(True):
+ time.sleep(2)
+
+ # The first iteration of top gives garbage results
+ idle_pct_raw = run_cmd("top -bn2 | grep 'Cpu(s)' | tr ',' ' ' | tail -n 1 | awk '{print $5}'")
+
+ idle_pct_re = re.compile(r'^([0-9.]+)%id$')
+ m = idle_pct_re.match(idle_pct_raw)
+
+ expect(m is not None, "top not returning output in expected form")
+
+ idle_pct = float(m.groups()[0])
+ if (idle_pct < 95):
+ error_print("Machine is too busy, waiting for it to become free")
+ else:
+ break
+
+###############################################################################
+def add_n_dupes(curr_lines, num_dupes, template):
+###############################################################################
+ function_name = None
+ function_invocation = None
+ function_lines = []
+
+ function_re = re.compile(r'^.* (\w+) *[(]')
+ function_inv_re = re.compile(r'^.*JGF_DUPE: +(.+)$')
+
+ # Get function lines
+ record = False
+ definition_insertion_point = None
+ invocation_insertion_point = None
+ for idx, line in enumerate(curr_lines):
+ if ("JGF_DUPLICATE_BEGIN" in line):
+ record = True
+ m = function_re.match(curr_lines[idx+1])
+ expect(m is not None, "Could not find function in line '%s'" % curr_lines[idx+1])
+ function_name = m.groups()[0]
+
+ elif ("JGF_DUPLICATE_END" in line):
+ record = False
+ definition_insertion_point = idx + 1
+
+ elif (record):
+ function_lines.append(line)
+
+ elif ("JGF_DUPE" in line):
+ m = function_inv_re.match(line)
+ expect(m is not None, "Could not find function invocation example in line '%s'" % line)
+ function_invocation = m.groups()[0]
+ invocation_insertion_point = idx + 1
+
+ expect(function_name is not None, "Could not find name of dupe function")
+ expect(function_invocation is not None, "Could not find function invocation point")
+
+ expect(definition_insertion_point < invocation_insertion_point, "fix me")
+
+ dupe_func_defs = []
+ dupe_invocations = ["int jgf_rand = std::rand();\n", "if (false) {}\n"]
+
+ for i in xrange(num_dupes):
+ if (not template):
+ dupe_func = list(function_lines)
+ dupe_func[0] = dupe_func[0].replace(function_name, "%s%d" % (function_name, i))
+ dupe_func_defs.extend(dupe_func)
+
+ dupe_invocations.append("else if (jgf_rand == %d) " % i)
+ if (template):
+ dupe_call = function_invocation.replace(function_name, "%s<%d>" % (function_name, i)) + "\n"
+ else:
+ dupe_call = function_invocation.replace(function_name, "%s%d" % (function_name, i)) + "\n"
+ dupe_invocations.append(dupe_call)
+
+ curr_lines[invocation_insertion_point:invocation_insertion_point] = dupe_invocations
+ curr_lines[definition_insertion_point:definition_insertion_point] = dupe_func_defs
+
+###############################################################################
+def report(num_dupes, curr_lines, object_file, orig_fom, curr_fom, csv=False, is_first_report=False):
+###############################################################################
+ fom_change = (curr_fom - orig_fom) / orig_fom
+
+ if (csv):
+ if (is_first_report):
+ print "num_dupes, obj_byte_size, loc, fom, pct_diff"
+
+ print "%s, %s, %s, %s, %s" % (num_dupes, os.path.getsize(object_file), len(curr_lines), curr_fom, fom_change*100)
+ else:
+ print "========================================================"
+ print "For number of dupes:", num_dupes
+ print "Object file size (bytes):", os.path.getsize(object_file)
+ print "Lines of code:", len(curr_lines)
+ print "Field of merit:", curr_fom
+ print "Change pct:", fom_change*100
+
+###############################################################################
+def obj_size_opt_check(cppfile, buildcmd, execmd, start, end, repeat, template, csv=False):
+###############################################################################
+ orig_source_lines = open(cppfile, 'r').readlines()
+
+ backup_file = "%s.orig" % cppfile
+ object_file = "%s.o" % os.path.splitext(cppfile)[0]
+ os.rename(cppfile, backup_file)
+
+ orig_fom = build_and_run(orig_source_lines, cppfile, buildcmd, execmd, repeat)
+ report(0, orig_source_lines, object_file, orig_fom, orig_fom, csv=csv, is_first_report=True)
+
+ i = start
+ while (i < end):
+ curr_lines = list(orig_source_lines)
+ add_n_dupes(curr_lines, i, template)
+
+ curr_fom = build_and_run(curr_lines, cppfile, buildcmd, execmd, repeat)
+
+ report(i, curr_lines, object_file, orig_fom, curr_fom, csv=csv)
+
+ i *= 2 # make growth function configurable?
+
+ os.remove(cppfile)
+ os.rename(backup_file, cppfile)
+
+###############################################################################
+def _main_func(description):
+###############################################################################
+ if ("--test" in sys.argv):
+ test_results = doctest.testmod(verbose=True)
+ sys.exit(1 if test_results.failed > 0 else 0)
+
+ cppfile, buildcmd, execmd, start, end, repeat, template, csv = parse_command_line(sys.argv, description)
+
+ obj_size_opt_check(cppfile, buildcmd, execmd, start, end, repeat, template, csv)
+
+###############################################################################
+if (__name__ == "__main__"):
+ _main_func(__doc__)
diff --git a/lib/kokkos/containers/CMakeLists.txt b/lib/kokkos/containers/CMakeLists.txt
new file mode 100644
index 000000000..894935fa0
--- /dev/null
+++ b/lib/kokkos/containers/CMakeLists.txt
@@ -0,0 +1,10 @@
+
+
+TRIBITS_SUBPACKAGE(Containers)
+
+ADD_SUBDIRECTORY(src)
+
+TRIBITS_ADD_TEST_DIRECTORIES(unit_tests)
+TRIBITS_ADD_TEST_DIRECTORIES(performance_tests)
+
+TRIBITS_SUBPACKAGE_POSTPROCESS()
diff --git a/lib/kokkos/containers/cmake/Dependencies.cmake b/lib/kokkos/containers/cmake/Dependencies.cmake
new file mode 100644
index 000000000..1d71d8af3
--- /dev/null
+++ b/lib/kokkos/containers/cmake/Dependencies.cmake
@@ -0,0 +1,5 @@
+TRIBITS_PACKAGE_DEFINE_DEPENDENCIES(
+ LIB_REQUIRED_PACKAGES KokkosCore
+ LIB_OPTIONAL_TPLS Pthread CUDA HWLOC
+ TEST_OPTIONAL_TPLS CUSPARSE
+ )
diff --git a/lib/kokkos/containers/cmake/KokkosContainers_config.h.in b/lib/kokkos/containers/cmake/KokkosContainers_config.h.in
new file mode 100644
index 000000000..d91fdda1e
--- /dev/null
+++ b/lib/kokkos/containers/cmake/KokkosContainers_config.h.in
@@ -0,0 +1,4 @@
+#ifndef KOKKOS_CONTAINERS_CONFIG_H
+#define KOKKOS_CONTAINERS_CONFIG_H
+
+#endif
diff --git a/lib/kokkos/containers/performance_tests/CMakeLists.txt b/lib/kokkos/containers/performance_tests/CMakeLists.txt
new file mode 100644
index 000000000..6b5780293
--- /dev/null
+++ b/lib/kokkos/containers/performance_tests/CMakeLists.txt
@@ -0,0 +1,26 @@
+
+INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR})
+INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
+INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}/../src )
+
+SET(SOURCES
+ TestMain.cpp
+ TestCuda.cpp
+ )
+
+IF(Kokkos_ENABLE_Pthread)
+ LIST( APPEND SOURCES TestThreads.cpp)
+ENDIF()
+
+IF(Kokkos_ENABLE_OpenMP)
+ LIST( APPEND SOURCES TestOpenMP.cpp)
+ENDIF()
+
+TRIBITS_ADD_EXECUTABLE_AND_TEST(
+ PerformanceTest
+ SOURCES ${SOURCES}
+ COMM serial mpi
+ NUM_MPI_PROCS 1
+ FAIL_REGULAR_EXPRESSION " FAILED "
+ TESTONLYLIBS kokkos_gtest
+ )
diff --git a/lib/kokkos/containers/performance_tests/Makefile b/lib/kokkos/containers/performance_tests/Makefile
index 7ced94528..e7abaf44c 100644
--- a/lib/kokkos/containers/performance_tests/Makefile
+++ b/lib/kokkos/containers/performance_tests/Makefile
@@ -1,81 +1,81 @@
KOKKOS_PATH = ../..
GTEST_PATH = ../../TPL/gtest
vpath %.cpp ${KOKKOS_PATH}/containers/performance_tests
default: build_all
echo "End Build"
-
+
include $(KOKKOS_PATH)/Makefile.kokkos
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
- CXX = nvcc_wrapper
+ CXX = $(NVCC_WRAPPER)
CXXFLAGS ?= -O3
LINK = $(CXX)
LDFLAGS ?= -lpthread
else
CXX ?= g++
CXXFLAGS ?= -O3
LINK ?= $(CXX)
LDFLAGS ?= -lpthread
endif
KOKKOS_CXXFLAGS += -I$(GTEST_PATH) -I${KOKKOS_PATH}/containers/performance_tests
TEST_TARGETS =
TARGETS =
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
OBJ_CUDA = TestCuda.o TestMain.o gtest-all.o
TARGETS += KokkosContainers_PerformanceTest_Cuda
TEST_TARGETS += test-cuda
endif
ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1)
OBJ_THREADS = TestThreads.o TestMain.o gtest-all.o
TARGETS += KokkosContainers_PerformanceTest_Threads
TEST_TARGETS += test-threads
endif
ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1)
OBJ_OPENMP = TestOpenMP.o TestMain.o gtest-all.o
TARGETS += KokkosContainers_PerformanceTest_OpenMP
TEST_TARGETS += test-openmp
endif
KokkosContainers_PerformanceTest_Cuda: $(OBJ_CUDA) $(KOKKOS_LINK_DEPENDS)
$(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_CUDA) $(KOKKOS_LIBS) $(LIB) -o KokkosContainers_PerformanceTest_Cuda
KokkosContainers_PerformanceTest_Threads: $(OBJ_THREADS) $(KOKKOS_LINK_DEPENDS)
$(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_THREADS) $(KOKKOS_LIBS) $(LIB) -o KokkosContainers_PerformanceTest_Threads
-
+
KokkosContainers_PerformanceTest_OpenMP: $(OBJ_OPENMP) $(KOKKOS_LINK_DEPENDS)
$(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_OPENMP) $(KOKKOS_LIBS) $(LIB) -o KokkosContainers_PerformanceTest_OpenMP
test-cuda: KokkosContainers_PerformanceTest_Cuda
./KokkosContainers_PerformanceTest_Cuda
test-threads: KokkosContainers_PerformanceTest_Threads
./KokkosContainers_PerformanceTest_Threads
test-openmp: KokkosContainers_PerformanceTest_OpenMP
./KokkosContainers_PerformanceTest_OpenMP
-
+
build_all: $(TARGETS)
test: $(TEST_TARGETS)
-
+
clean: kokkos-clean
rm -f *.o $(TARGETS)
# Compilation rules
%.o:%.cpp $(KOKKOS_CPP_DEPENDS)
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<
gtest-all.o:$(GTEST_PATH)/gtest/gtest-all.cc
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $(GTEST_PATH)/gtest/gtest-all.cc
diff --git a/lib/kokkos/containers/src/CMakeLists.txt b/lib/kokkos/containers/src/CMakeLists.txt
new file mode 100644
index 000000000..da5a79153
--- /dev/null
+++ b/lib/kokkos/containers/src/CMakeLists.txt
@@ -0,0 +1,31 @@
+
+TRIBITS_CONFIGURE_FILE(${PACKAGE_NAME}_config.h)
+
+INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR})
+INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
+
+#-----------------------------------------------------------------------------
+
+SET(HEADERS "")
+SET(SOURCES "")
+
+SET(HEADERS_IMPL "")
+
+FILE(GLOB HEADERS *.hpp)
+FILE(GLOB HEADERS_IMPL impl/*.hpp)
+FILE(GLOB SOURCES impl/*.cpp)
+
+SET(TRILINOS_INCDIR ${CMAKE_INSTALL_PREFIX}/${${PROJECT_NAME}_INSTALL_INCLUDE_DIR})
+
+INSTALL(FILES ${HEADERS_IMPL} DESTINATION ${TRILINOS_INCDIR}/impl/)
+
+TRIBITS_ADD_LIBRARY(
+ kokkoscontainers
+ HEADERS ${HEADERS}
+ NOINSTALLHEADERS ${HEADERS_IMPL}
+ SOURCES ${SOURCES}
+ DEPLIBS
+ )
+
+#-----------------------------------------------------------------------------
+
diff --git a/lib/kokkos/containers/src/Kokkos_Bitset.hpp b/lib/kokkos/containers/src/Kokkos_Bitset.hpp
index b51b1c2b2..74da5f61b 100644
--- a/lib/kokkos/containers/src/Kokkos_Bitset.hpp
+++ b/lib/kokkos/containers/src/Kokkos_Bitset.hpp
@@ -1,437 +1,437 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_BITSET_HPP
#define KOKKOS_BITSET_HPP
#include <Kokkos_Core.hpp>
#include <Kokkos_Functional.hpp>
#include <impl/Kokkos_Bitset_impl.hpp>
#include <stdexcept>
namespace Kokkos {
template <typename Device = Kokkos::DefaultExecutionSpace >
class Bitset;
template <typename Device = Kokkos::DefaultExecutionSpace >
class ConstBitset;
template <typename DstDevice, typename SrcDevice>
void deep_copy( Bitset<DstDevice> & dst, Bitset<SrcDevice> const& src);
template <typename DstDevice, typename SrcDevice>
void deep_copy( Bitset<DstDevice> & dst, ConstBitset<SrcDevice> const& src);
template <typename DstDevice, typename SrcDevice>
void deep_copy( ConstBitset<DstDevice> & dst, ConstBitset<SrcDevice> const& src);
/// A thread safe view to a bitset
template <typename Device>
class Bitset
{
public:
typedef Device execution_space;
typedef unsigned size_type;
enum { BIT_SCAN_REVERSE = 1u };
enum { MOVE_HINT_BACKWARD = 2u };
enum {
BIT_SCAN_FORWARD_MOVE_HINT_FORWARD = 0u
, BIT_SCAN_REVERSE_MOVE_HINT_FORWARD = BIT_SCAN_REVERSE
, BIT_SCAN_FORWARD_MOVE_HINT_BACKWARD = MOVE_HINT_BACKWARD
, BIT_SCAN_REVERSE_MOVE_HINT_BACKWARD = BIT_SCAN_REVERSE | MOVE_HINT_BACKWARD
};
private:
enum { block_size = static_cast<unsigned>(sizeof(unsigned)*CHAR_BIT) };
enum { block_mask = block_size-1u };
- enum { block_shift = static_cast<int>(Impl::power_of_two<block_size>::value) };
+ enum { block_shift = Kokkos::Impl::integral_power_of_two(block_size) };
public:
/// constructor
/// arg_size := number of bit in set
Bitset(unsigned arg_size = 0u)
: m_size(arg_size)
, m_last_block_mask(0u)
, m_blocks("Bitset", ((m_size + block_mask) >> block_shift) )
{
for (int i=0, end = static_cast<int>(m_size & block_mask); i < end; ++i) {
m_last_block_mask |= 1u << i;
}
}
/// assignment
Bitset<Device> & operator = (Bitset<Device> const & rhs)
{
this->m_size = rhs.m_size;
this->m_last_block_mask = rhs.m_last_block_mask;
this->m_blocks = rhs.m_blocks;
return *this;
}
/// copy constructor
Bitset( Bitset<Device> const & rhs)
: m_size( rhs.m_size )
, m_last_block_mask( rhs.m_last_block_mask )
, m_blocks( rhs.m_blocks )
{}
/// number of bits in the set
/// can be call from the host or the device
KOKKOS_FORCEINLINE_FUNCTION
unsigned size() const
{ return m_size; }
/// number of bits which are set to 1
/// can only be called from the host
unsigned count() const
{
Impl::BitsetCount< Bitset<Device> > f(*this);
return f.apply();
}
/// set all bits to 1
/// can only be called from the host
void set()
{
Kokkos::deep_copy(m_blocks, ~0u );
if (m_last_block_mask) {
//clear the unused bits in the last block
typedef Kokkos::Impl::DeepCopy< typename execution_space::memory_space, Kokkos::HostSpace > raw_deep_copy;
raw_deep_copy( m_blocks.ptr_on_device() + (m_blocks.dimension_0() -1u), &m_last_block_mask, sizeof(unsigned));
}
}
/// set all bits to 0
/// can only be called from the host
void reset()
{
Kokkos::deep_copy(m_blocks, 0u );
}
/// set all bits to 0
/// can only be called from the host
void clear()
{
Kokkos::deep_copy(m_blocks, 0u );
}
/// set i'th bit to 1
/// can only be called from the device
KOKKOS_FORCEINLINE_FUNCTION
bool set( unsigned i ) const
{
if ( i < m_size ) {
unsigned * block_ptr = &m_blocks[ i >> block_shift ];
const unsigned mask = 1u << static_cast<int>( i & block_mask );
return !( atomic_fetch_or( block_ptr, mask ) & mask );
}
return false;
}
/// set i'th bit to 0
/// can only be called from the device
KOKKOS_FORCEINLINE_FUNCTION
bool reset( unsigned i ) const
{
if ( i < m_size ) {
unsigned * block_ptr = &m_blocks[ i >> block_shift ];
const unsigned mask = 1u << static_cast<int>( i & block_mask );
return atomic_fetch_and( block_ptr, ~mask ) & mask;
}
return false;
}
/// return true if the i'th bit set to 1
/// can only be called from the device
KOKKOS_FORCEINLINE_FUNCTION
bool test( unsigned i ) const
{
if ( i < m_size ) {
const unsigned block = volatile_load(&m_blocks[ i >> block_shift ]);
const unsigned mask = 1u << static_cast<int>( i & block_mask );
return block & mask;
}
return false;
}
/// used with find_any_set_near or find_any_unset_near functions
/// returns the max number of times those functions should be call
/// when searching for an available bit
KOKKOS_FORCEINLINE_FUNCTION
unsigned max_hint() const
{
return m_blocks.dimension_0();
}
/// find a bit set to 1 near the hint
/// returns a pair< bool, unsigned> where if result.first is true then result.second is the bit found
/// and if result.first is false the result.second is a new hint
KOKKOS_INLINE_FUNCTION
Kokkos::pair<bool, unsigned> find_any_set_near( unsigned hint , unsigned scan_direction = BIT_SCAN_FORWARD_MOVE_HINT_FORWARD ) const
{
const unsigned block_idx = (hint >> block_shift) < m_blocks.dimension_0() ? (hint >> block_shift) : 0;
const unsigned offset = hint & block_mask;
unsigned block = volatile_load(&m_blocks[ block_idx ]);
block = !m_last_block_mask || (block_idx < (m_blocks.dimension_0()-1)) ? block : block & m_last_block_mask ;
return find_any_helper(block_idx, offset, block, scan_direction);
}
/// find a bit set to 0 near the hint
/// returns a pair< bool, unsigned> where if result.first is true then result.second is the bit found
/// and if result.first is false the result.second is a new hint
KOKKOS_INLINE_FUNCTION
Kokkos::pair<bool, unsigned> find_any_unset_near( unsigned hint , unsigned scan_direction = BIT_SCAN_FORWARD_MOVE_HINT_FORWARD ) const
{
const unsigned block_idx = hint >> block_shift;
const unsigned offset = hint & block_mask;
unsigned block = volatile_load(&m_blocks[ block_idx ]);
block = !m_last_block_mask || (block_idx < (m_blocks.dimension_0()-1) ) ? ~block : ~block & m_last_block_mask ;
return find_any_helper(block_idx, offset, block, scan_direction);
}
private:
KOKKOS_FORCEINLINE_FUNCTION
Kokkos::pair<bool, unsigned> find_any_helper(unsigned block_idx, unsigned offset, unsigned block, unsigned scan_direction) const
{
Kokkos::pair<bool, unsigned> result( block > 0u, 0);
if (!result.first) {
result.second = update_hint( block_idx, offset, scan_direction );
}
else {
result.second = scan_block( (block_idx << block_shift)
, offset
, block
, scan_direction
);
}
return result;
}
KOKKOS_FORCEINLINE_FUNCTION
unsigned scan_block(unsigned block_start, int offset, unsigned block, unsigned scan_direction ) const
{
offset = !(scan_direction & BIT_SCAN_REVERSE) ? offset : (offset + block_mask) & block_mask;
block = Impl::rotate_right(block, offset);
return ((( !(scan_direction & BIT_SCAN_REVERSE) ?
Impl::bit_scan_forward(block) :
Impl::bit_scan_reverse(block)
) + offset
) & block_mask
) + block_start;
}
KOKKOS_FORCEINLINE_FUNCTION
unsigned update_hint( long long block_idx, unsigned offset, unsigned scan_direction ) const
{
block_idx += scan_direction & MOVE_HINT_BACKWARD ? -1 : 1;
block_idx = block_idx >= 0 ? block_idx : m_blocks.dimension_0() - 1;
block_idx = block_idx < static_cast<long long>(m_blocks.dimension_0()) ? block_idx : 0;
return static_cast<unsigned>(block_idx)*block_size + offset;
}
private:
unsigned m_size;
unsigned m_last_block_mask;
View< unsigned *, execution_space, MemoryTraits<RandomAccess> > m_blocks;
private:
template <typename DDevice>
friend class Bitset;
template <typename DDevice>
friend class ConstBitset;
template <typename Bitset>
friend struct Impl::BitsetCount;
template <typename DstDevice, typename SrcDevice>
friend void deep_copy( Bitset<DstDevice> & dst, Bitset<SrcDevice> const& src);
template <typename DstDevice, typename SrcDevice>
friend void deep_copy( Bitset<DstDevice> & dst, ConstBitset<SrcDevice> const& src);
};
/// a thread-safe view to a const bitset
/// i.e. can only test bits
template <typename Device>
class ConstBitset
{
public:
typedef Device execution_space;
typedef unsigned size_type;
private:
enum { block_size = static_cast<unsigned>(sizeof(unsigned)*CHAR_BIT) };
enum { block_mask = block_size -1u };
- enum { block_shift = static_cast<int>(Impl::power_of_two<block_size>::value) };
+ enum { block_shift = Kokkos::Impl::integral_power_of_two(block_size) };
public:
ConstBitset()
: m_size (0)
{}
ConstBitset(Bitset<Device> const& rhs)
: m_size(rhs.m_size)
, m_blocks(rhs.m_blocks)
{}
ConstBitset(ConstBitset<Device> const& rhs)
: m_size( rhs.m_size )
, m_blocks( rhs.m_blocks )
{}
ConstBitset<Device> & operator = (Bitset<Device> const & rhs)
{
this->m_size = rhs.m_size;
this->m_blocks = rhs.m_blocks;
return *this;
}
ConstBitset<Device> & operator = (ConstBitset<Device> const & rhs)
{
this->m_size = rhs.m_size;
this->m_blocks = rhs.m_blocks;
return *this;
}
KOKKOS_FORCEINLINE_FUNCTION
unsigned size() const
{
return m_size;
}
unsigned count() const
{
Impl::BitsetCount< ConstBitset<Device> > f(*this);
return f.apply();
}
KOKKOS_FORCEINLINE_FUNCTION
bool test( unsigned i ) const
{
if ( i < m_size ) {
const unsigned block = m_blocks[ i >> block_shift ];
const unsigned mask = 1u << static_cast<int>( i & block_mask );
return block & mask;
}
return false;
}
private:
unsigned m_size;
View< const unsigned *, execution_space, MemoryTraits<RandomAccess> > m_blocks;
private:
template <typename DDevice>
friend class ConstBitset;
template <typename Bitset>
friend struct Impl::BitsetCount;
template <typename DstDevice, typename SrcDevice>
friend void deep_copy( Bitset<DstDevice> & dst, ConstBitset<SrcDevice> const& src);
template <typename DstDevice, typename SrcDevice>
friend void deep_copy( ConstBitset<DstDevice> & dst, ConstBitset<SrcDevice> const& src);
};
template <typename DstDevice, typename SrcDevice>
void deep_copy( Bitset<DstDevice> & dst, Bitset<SrcDevice> const& src)
{
if (dst.size() != src.size()) {
throw std::runtime_error("Error: Cannot deep_copy bitsets of different sizes!");
}
typedef Kokkos::Impl::DeepCopy< typename DstDevice::memory_space, typename SrcDevice::memory_space > raw_deep_copy;
raw_deep_copy(dst.m_blocks.ptr_on_device(), src.m_blocks.ptr_on_device(), sizeof(unsigned)*src.m_blocks.dimension_0());
}
template <typename DstDevice, typename SrcDevice>
void deep_copy( Bitset<DstDevice> & dst, ConstBitset<SrcDevice> const& src)
{
if (dst.size() != src.size()) {
throw std::runtime_error("Error: Cannot deep_copy bitsets of different sizes!");
}
typedef Kokkos::Impl::DeepCopy< typename DstDevice::memory_space, typename SrcDevice::memory_space > raw_deep_copy;
raw_deep_copy(dst.m_blocks.ptr_on_device(), src.m_blocks.ptr_on_device(), sizeof(unsigned)*src.m_blocks.dimension_0());
}
template <typename DstDevice, typename SrcDevice>
void deep_copy( ConstBitset<DstDevice> & dst, ConstBitset<SrcDevice> const& src)
{
if (dst.size() != src.size()) {
throw std::runtime_error("Error: Cannot deep_copy bitsets of different sizes!");
}
typedef Kokkos::Impl::DeepCopy< typename DstDevice::memory_space, typename SrcDevice::memory_space > raw_deep_copy;
raw_deep_copy(dst.m_blocks.ptr_on_device(), src.m_blocks.ptr_on_device(), sizeof(unsigned)*src.m_blocks.dimension_0());
}
} // namespace Kokkos
#endif //KOKKOS_BITSET_HPP
diff --git a/lib/kokkos/containers/src/Kokkos_DualView.hpp b/lib/kokkos/containers/src/Kokkos_DualView.hpp
index 5e70731bd..68d033641 100644
--- a/lib/kokkos/containers/src/Kokkos_DualView.hpp
+++ b/lib/kokkos/containers/src/Kokkos_DualView.hpp
@@ -1,880 +1,971 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
/// \file Kokkos_DualView.hpp
/// \brief Declaration and definition of Kokkos::DualView.
///
/// This header file declares and defines Kokkos::DualView and its
/// related nonmember functions.
#ifndef KOKKOS_DUALVIEW_HPP
#define KOKKOS_DUALVIEW_HPP
#include <Kokkos_Core.hpp>
#include <impl/Kokkos_Error.hpp>
namespace Kokkos {
/* \class DualView
* \brief Container to manage mirroring a Kokkos::View that lives
* in device memory with a Kokkos::View that lives in host memory.
*
* This class provides capabilities to manage data which exists in two
* memory spaces at the same time. It keeps views of the same layout
* on two memory spaces as well as modified flags for both
* allocations. Users are responsible for setting the modified flags
* manually if they change the data in either memory space, by calling
* the sync() method templated on the device where they modified the
* data. Users may synchronize data by calling the modify() function,
* templated on the device towards which they want to synchronize
* (i.e., the target of the one-way copy operation).
*
* The DualView class also provides convenience methods such as
* realloc, resize and capacity which call the appropriate methods of
* the underlying Kokkos::View objects.
*
* The four template arguments are the same as those of Kokkos::View.
* (Please refer to that class' documentation for a detailed
* description.)
*
* \tparam DataType The type of the entries stored in the container.
*
* \tparam Layout The array's layout in memory.
*
* \tparam Device The Kokkos Device type. If its memory space is
* not the same as the host's memory space, then DualView will
* contain two separate Views: one in device memory, and one in
* host memory. Otherwise, DualView will only store one View.
*
* \tparam MemoryTraits (optional) The user's intended memory access
* behavior. Please see the documentation of Kokkos::View for
* examples. The default suffices for most users.
*/
template< class DataType ,
class Arg1Type = void ,
class Arg2Type = void ,
class Arg3Type = void>
class DualView : public ViewTraits< DataType , Arg1Type , Arg2Type, Arg3Type >
{
public:
//! \name Typedefs for device types and various Kokkos::View specializations.
//@{
typedef ViewTraits< DataType , Arg1Type , Arg2Type, Arg3Type > traits ;
//! The Kokkos Host Device type;
typedef typename traits::host_mirror_space host_mirror_space ;
//! The type of a Kokkos::View on the device.
typedef View< typename traits::data_type ,
- typename traits::array_layout ,
- typename traits::device_type ,
- typename traits::memory_traits > t_dev ;
+ Arg1Type ,
+ Arg2Type ,
+ Arg3Type > t_dev ;
/// \typedef t_host
/// \brief The type of a Kokkos::View host mirror of \c t_dev.
typedef typename t_dev::HostMirror t_host ;
//! The type of a const View on the device.
//! The type of a Kokkos::View on the device.
typedef View< typename traits::const_data_type ,
- typename traits::array_layout ,
- typename traits::device_type ,
- typename traits::memory_traits > t_dev_const ;
+ Arg1Type ,
+ Arg2Type ,
+ Arg3Type > t_dev_const ;
/// \typedef t_host_const
/// \brief The type of a const View host mirror of \c t_dev_const.
typedef typename t_dev_const::HostMirror t_host_const;
//! The type of a const, random-access View on the device.
typedef View< typename traits::const_data_type ,
typename traits::array_layout ,
typename traits::device_type ,
MemoryRandomAccess > t_dev_const_randomread ;
/// \typedef t_host_const_randomread
/// \brief The type of a const, random-access View host mirror of
/// \c t_dev_const_randomread.
typedef typename t_dev_const_randomread::HostMirror t_host_const_randomread;
//! The type of an unmanaged View on the device.
typedef View< typename traits::data_type ,
typename traits::array_layout ,
typename traits::device_type ,
MemoryUnmanaged> t_dev_um;
//! The type of an unmanaged View host mirror of \c t_dev_um.
typedef View< typename t_host::data_type ,
typename t_host::array_layout ,
typename t_host::device_type ,
MemoryUnmanaged> t_host_um;
//! The type of a const unmanaged View on the device.
typedef View< typename traits::const_data_type ,
typename traits::array_layout ,
typename traits::device_type ,
MemoryUnmanaged> t_dev_const_um;
//! The type of a const unmanaged View host mirror of \c t_dev_const_um.
typedef View<typename t_host::const_data_type,
typename t_host::array_layout,
typename t_host::device_type,
MemoryUnmanaged> t_host_const_um;
//@}
//! \name The two View instances.
//@{
t_dev d_view;
t_host h_view;
//@}
//! \name Counters to keep track of changes ("modified" flags)
//@{
View<unsigned int,LayoutLeft,typename t_host::execution_space> modified_device;
View<unsigned int,LayoutLeft,typename t_host::execution_space> modified_host;
//@}
//! \name Constructors
//@{
/// \brief Empty constructor.
///
/// Both device and host View objects are constructed using their
/// default constructors. The "modified" flags are both initialized
/// to "unmodified."
DualView () :
modified_device (View<unsigned int,LayoutLeft,typename t_host::execution_space> ("DualView::modified_device")),
modified_host (View<unsigned int,LayoutLeft,typename t_host::execution_space> ("DualView::modified_host"))
{}
/// \brief Constructor that allocates View objects on both host and device.
///
/// This constructor works like the analogous constructor of View.
/// The first argument is a string label, which is entirely for your
/// benefit. (Different DualView objects may have the same label if
/// you like.) The arguments that follow are the dimensions of the
/// View objects. For example, if the View has three dimensions,
/// the first three integer arguments will be nonzero, and you may
/// omit the integer arguments that follow.
DualView (const std::string& label,
const size_t n0 = 0,
const size_t n1 = 0,
const size_t n2 = 0,
const size_t n3 = 0,
const size_t n4 = 0,
const size_t n5 = 0,
const size_t n6 = 0,
const size_t n7 = 0)
: d_view (label, n0, n1, n2, n3, n4, n5, n6, n7)
, h_view (create_mirror_view (d_view)) // without UVM, host View mirrors
, modified_device (View<unsigned int,LayoutLeft,typename t_host::execution_space> ("DualView::modified_device"))
, modified_host (View<unsigned int,LayoutLeft,typename t_host::execution_space> ("DualView::modified_host"))
{}
//! Copy constructor (shallow copy)
template<class SS, class LS, class DS, class MS>
DualView (const DualView<SS,LS,DS,MS>& src) :
d_view (src.d_view),
h_view (src.h_view),
modified_device (src.modified_device),
modified_host (src.modified_host)
{}
+ //! Subview constructor
+ template< class SD, class S1 , class S2 , class S3
+ , class Arg0 , class ... Args >
+ DualView( const DualView<SD,S1,S2,S3> & src
+ , const Arg0 & arg0
+ , Args ... args
+ )
+ : d_view( Kokkos::subview( src.d_view , arg0 , args ... ) )
+ , h_view( Kokkos::subview( src.h_view , arg0 , args ... ) )
+ , modified_device (src.modified_device)
+ , modified_host (src.modified_host)
+ {}
+
/// \brief Create DualView from existing device and host View objects.
///
/// This constructor assumes that the device and host View objects
/// are synchronized. You, the caller, are responsible for making
/// sure this is the case before calling this constructor. After
/// this constructor returns, you may use DualView's sync() and
/// modify() methods to ensure synchronization of the View objects.
///
/// \param d_view_ Device View
/// \param h_view_ Host View (must have type t_host = t_dev::HostMirror)
DualView (const t_dev& d_view_, const t_host& h_view_) :
d_view (d_view_),
h_view (h_view_),
modified_device (View<unsigned int,LayoutLeft,typename t_host::execution_space> ("DualView::modified_device")),
modified_host (View<unsigned int,LayoutLeft,typename t_host::execution_space> ("DualView::modified_host"))
{
+#if ! defined( KOKKOS_USING_EXPERIMENTAL_VIEW )
Impl::assert_shapes_are_equal (d_view.shape (), h_view.shape ());
+#else
+ if ( d_view.rank != h_view.rank ||
+ d_view.dimension_0() != h_view.dimension_0() ||
+ d_view.dimension_1() != h_view.dimension_1() ||
+ d_view.dimension_2() != h_view.dimension_2() ||
+ d_view.dimension_3() != h_view.dimension_3() ||
+ d_view.dimension_4() != h_view.dimension_4() ||
+ d_view.dimension_5() != h_view.dimension_5() ||
+ d_view.dimension_6() != h_view.dimension_6() ||
+ d_view.dimension_7() != h_view.dimension_7() ||
+ d_view.stride_0() != h_view.stride_0() ||
+ d_view.stride_1() != h_view.stride_1() ||
+ d_view.stride_2() != h_view.stride_2() ||
+ d_view.stride_3() != h_view.stride_3() ||
+ d_view.stride_4() != h_view.stride_4() ||
+ d_view.stride_5() != h_view.stride_5() ||
+ d_view.stride_6() != h_view.stride_6() ||
+ d_view.stride_7() != h_view.stride_7() ||
+ d_view.span() != h_view.span() ) {
+ Kokkos::Impl::throw_runtime_exception("DualView constructed with incompatible views");
+ }
+#endif
}
//@}
//! \name Methods for synchronizing, marking as modified, and getting Views.
//@{
/// \brief Return a View on a specific device \c Device.
///
/// Please don't be afraid of the if_c expression in the return
/// value's type. That just tells the method what the return type
/// should be: t_dev if the \c Device template parameter matches
/// this DualView's device type, else t_host.
///
/// For example, suppose you create a DualView on Cuda, like this:
/// \code
/// typedef Kokkos::DualView<float, Kokkos::LayoutRight, Kokkos::Cuda> dual_view_type;
/// dual_view_type DV ("my dual view", 100);
/// \endcode
/// If you want to get the CUDA device View, do this:
/// \code
/// typename dual_view_type::t_dev cudaView = DV.view<Kokkos::Cuda> ();
/// \endcode
/// and if you want to get the host mirror of that View, do this:
/// \code
/// typedef typename Kokkos::HostSpace::execution_space host_device_type;
/// typename dual_view_type::t_host hostView = DV.view<host_device_type> ();
/// \endcode
template< class Device >
KOKKOS_INLINE_FUNCTION
const typename Impl::if_c<
Impl::is_same<typename t_dev::memory_space,
typename Device::memory_space>::value,
t_dev,
t_host>::type& view () const
{
return Impl::if_c<
Impl::is_same<
typename t_dev::memory_space,
typename Device::memory_space>::value,
t_dev,
t_host >::select (d_view , h_view);
}
/// \brief Update data on device or host only if data in the other
/// space has been marked as modified.
///
/// If \c Device is the same as this DualView's device type, then
/// copy data from host to device. Otherwise, copy data from device
/// to host. In either case, only copy if the source of the copy
/// has been modified.
///
/// This is a one-way synchronization only. If the target of the
/// copy has been modified, this operation will discard those
/// modifications. It will also reset both device and host modified
/// flags.
///
/// \note This method doesn't know on its own whether you modified
/// the data in either View. You must manually mark modified data
/// as modified, by calling the modify() method with the
/// appropriate template parameter.
template<class Device>
void sync( const typename Impl::enable_if<
( Impl::is_same< typename traits::data_type , typename traits::non_const_data_type>::value) ||
( Impl::is_same< Device , int>::value)
, int >::type& = 0)
{
const unsigned int dev =
Impl::if_c<
Impl::is_same<
typename t_dev::memory_space,
typename Device::memory_space>::value ,
unsigned int,
unsigned int>::select (1, 0);
if (dev) { // if Device is the same as DualView's device type
if ((modified_host () > 0) && (modified_host () >= modified_device ())) {
deep_copy (d_view, h_view);
modified_host() = modified_device() = 0;
}
} else { // hopefully Device is the same as DualView's host type
if ((modified_device () > 0) && (modified_device () >= modified_host ())) {
deep_copy (h_view, d_view);
modified_host() = modified_device() = 0;
}
}
if(Impl::is_same<typename t_host::memory_space,typename t_dev::memory_space>::value) {
t_dev::execution_space::fence();
t_host::execution_space::fence();
}
}
template<class Device>
void sync ( const typename Impl::enable_if<
( ! Impl::is_same< typename traits::data_type , typename traits::non_const_data_type>::value ) ||
( Impl::is_same< Device , int>::value)
, int >::type& = 0 )
{
const unsigned int dev =
Impl::if_c<
Impl::is_same<
typename t_dev::memory_space,
typename Device::memory_space>::value,
unsigned int,
unsigned int>::select (1, 0);
if (dev) { // if Device is the same as DualView's device type
if ((modified_host () > 0) && (modified_host () >= modified_device ())) {
Impl::throw_runtime_exception("Calling sync on a DualView with a const datatype.");
}
} else { // hopefully Device is the same as DualView's host type
if ((modified_device () > 0) && (modified_device () >= modified_host ())) {
Impl::throw_runtime_exception("Calling sync on a DualView with a const datatype.");
}
}
}
template<class Device>
bool need_sync()
{
const unsigned int dev =
Impl::if_c<
Impl::is_same<
typename t_dev::memory_space,
typename Device::memory_space>::value ,
unsigned int,
unsigned int>::select (1, 0);
if (dev) { // if Device is the same as DualView's device type
if ((modified_host () > 0) && (modified_host () >= modified_device ())) {
return true;
}
} else { // hopefully Device is the same as DualView's host type
if ((modified_device () > 0) && (modified_device () >= modified_host ())) {
return true;
}
}
return false;
}
/// \brief Mark data as modified on the given device \c Device.
///
/// If \c Device is the same as this DualView's device type, then
/// mark the device's data as modified. Otherwise, mark the host's
/// data as modified.
template<class Device>
void modify () {
const unsigned int dev =
Impl::if_c<
Impl::is_same<
typename t_dev::memory_space,
typename Device::memory_space>::value,
unsigned int,
unsigned int>::select (1, 0);
if (dev) { // if Device is the same as DualView's device type
// Increment the device's modified count.
modified_device () = (modified_device () > modified_host () ?
modified_device () : modified_host ()) + 1;
} else { // hopefully Device is the same as DualView's host type
// Increment the host's modified count.
modified_host () = (modified_device () > modified_host () ?
modified_device () : modified_host ()) + 1;
}
}
//@}
//! \name Methods for reallocating or resizing the View objects.
//@{
/// \brief Reallocate both View objects.
///
/// This discards any existing contents of the objects, and resets
/// their modified flags. It does <i>not</i> copy the old contents
/// of either View into the new View objects.
void realloc( const size_t n0 = 0 ,
const size_t n1 = 0 ,
const size_t n2 = 0 ,
const size_t n3 = 0 ,
const size_t n4 = 0 ,
const size_t n5 = 0 ,
const size_t n6 = 0 ,
const size_t n7 = 0 ) {
::Kokkos::realloc(d_view,n0,n1,n2,n3,n4,n5,n6,n7);
h_view = create_mirror_view( d_view );
/* Reset dirty flags */
modified_device() = modified_host() = 0;
}
/// \brief Resize both views, copying old contents into new if necessary.
///
/// This method only copies the old contents into the new View
/// objects for the device which was last marked as modified.
void resize( const size_t n0 = 0 ,
const size_t n1 = 0 ,
const size_t n2 = 0 ,
const size_t n3 = 0 ,
const size_t n4 = 0 ,
const size_t n5 = 0 ,
const size_t n6 = 0 ,
const size_t n7 = 0 ) {
if(modified_device() >= modified_host()) {
/* Resize on Device */
::Kokkos::resize(d_view,n0,n1,n2,n3,n4,n5,n6,n7);
h_view = create_mirror_view( d_view );
/* Mark Device copy as modified */
modified_device() = modified_device()+1;
} else {
/* Realloc on Device */
::Kokkos::realloc(d_view,n0,n1,n2,n3,n4,n5,n6,n7);
t_host temp_view = create_mirror_view( d_view );
/* Remap on Host */
Kokkos::deep_copy( temp_view , h_view );
h_view = temp_view;
/* Mark Host copy as modified */
modified_host() = modified_host()+1;
}
}
//@}
//! \name Methods for getting capacity, stride, or dimension(s).
//@{
//! The allocation size (same as Kokkos::View::capacity).
size_t capacity() const {
#if defined( KOKKOS_USING_EXPERIMENTAL_VIEW )
return d_view.span();
#else
return d_view.capacity();
#endif
}
//! Get stride(s) for each dimension.
template< typename iType>
void stride(iType* stride_) const {
d_view.stride(stride_);
}
/* \brief return size of dimension 0 */
size_t dimension_0() const {return d_view.dimension_0();}
/* \brief return size of dimension 1 */
size_t dimension_1() const {return d_view.dimension_1();}
/* \brief return size of dimension 2 */
size_t dimension_2() const {return d_view.dimension_2();}
/* \brief return size of dimension 3 */
size_t dimension_3() const {return d_view.dimension_3();}
/* \brief return size of dimension 4 */
size_t dimension_4() const {return d_view.dimension_4();}
/* \brief return size of dimension 5 */
size_t dimension_5() const {return d_view.dimension_5();}
/* \brief return size of dimension 6 */
size_t dimension_6() const {return d_view.dimension_6();}
/* \brief return size of dimension 7 */
size_t dimension_7() const {return d_view.dimension_7();}
//@}
};
} // namespace Kokkos
+
+//----------------------------------------------------------------------------
+//----------------------------------------------------------------------------
+//
+// Partial specializations of Kokkos::subview() for DualView objects.
+//
+
+#if defined( KOKKOS_USING_EXPERIMENTAL_VIEW )
+
+namespace Kokkos {
+namespace Impl {
+
+template< class D, class A1, class A2, class A3, class ... Args >
+struct DualViewSubview {
+
+ typedef typename Kokkos::Experimental::Impl::ViewMapping
+ < void
+ , Kokkos::ViewTraits< D, A1, A2, A3 >
+ , Args ...
+ >::traits_type dst_traits ;
+
+ typedef Kokkos::DualView
+ < typename dst_traits::data_type
+ , typename dst_traits::array_layout
+ , typename dst_traits::device_type
+ , typename dst_traits::memory_traits
+ > type ;
+};
+
+} /* namespace Impl */
+
+
+template< class D , class A1 , class A2 , class A3 , class ... Args >
+typename Impl::DualViewSubview<D,A1,A2,A3,Args...>::type
+subview( const DualView<D,A1,A2,A3> & src , Args ... args )
+{
+ return typename
+ Impl::DualViewSubview<D,A1,A2,A3,Args...>::type( src , args ... );
+}
+
+} /* namespace Kokkos */
+
+#else
+
+//----------------------------------------------------------------------------
+//----------------------------------------------------------------------------
//
// Partial specializations of Kokkos::subview() for DualView objects.
//
namespace Kokkos {
namespace Impl {
template< class SrcDataType , class SrcArg1Type , class SrcArg2Type , class SrcArg3Type
, class SubArg0_type , class SubArg1_type , class SubArg2_type , class SubArg3_type
, class SubArg4_type , class SubArg5_type , class SubArg6_type , class SubArg7_type
>
struct ViewSubview< DualView< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type >
, SubArg0_type , SubArg1_type , SubArg2_type , SubArg3_type
, SubArg4_type , SubArg5_type , SubArg6_type , SubArg7_type >
{
private:
typedef DualView< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type > SrcViewType ;
enum { V0 = Impl::is_same< SubArg0_type , void >::value ? 1 : 0 };
enum { V1 = Impl::is_same< SubArg1_type , void >::value ? 1 : 0 };
enum { V2 = Impl::is_same< SubArg2_type , void >::value ? 1 : 0 };
enum { V3 = Impl::is_same< SubArg3_type , void >::value ? 1 : 0 };
enum { V4 = Impl::is_same< SubArg4_type , void >::value ? 1 : 0 };
enum { V5 = Impl::is_same< SubArg5_type , void >::value ? 1 : 0 };
enum { V6 = Impl::is_same< SubArg6_type , void >::value ? 1 : 0 };
enum { V7 = Impl::is_same< SubArg7_type , void >::value ? 1 : 0 };
// The source view rank must be equal to the input argument rank
// Once a void argument is encountered all subsequent arguments must be void.
enum { InputRank =
Impl::StaticAssert<( SrcViewType::rank ==
( V0 ? 0 : (
V1 ? 1 : (
V2 ? 2 : (
V3 ? 3 : (
V4 ? 4 : (
V5 ? 5 : (
V6 ? 6 : (
V7 ? 7 : 8 ))))))) ))
&&
( SrcViewType::rank ==
( 8 - ( V0 + V1 + V2 + V3 + V4 + V5 + V6 + V7 ) ) )
>::value ? SrcViewType::rank : 0 };
enum { R0 = Impl::ViewOffsetRange< SubArg0_type >::is_range ? 1 : 0 };
enum { R1 = Impl::ViewOffsetRange< SubArg1_type >::is_range ? 1 : 0 };
enum { R2 = Impl::ViewOffsetRange< SubArg2_type >::is_range ? 1 : 0 };
enum { R3 = Impl::ViewOffsetRange< SubArg3_type >::is_range ? 1 : 0 };
enum { R4 = Impl::ViewOffsetRange< SubArg4_type >::is_range ? 1 : 0 };
enum { R5 = Impl::ViewOffsetRange< SubArg5_type >::is_range ? 1 : 0 };
enum { R6 = Impl::ViewOffsetRange< SubArg6_type >::is_range ? 1 : 0 };
enum { R7 = Impl::ViewOffsetRange< SubArg7_type >::is_range ? 1 : 0 };
enum { OutputRank = unsigned(R0) + unsigned(R1) + unsigned(R2) + unsigned(R3)
+ unsigned(R4) + unsigned(R5) + unsigned(R6) + unsigned(R7) };
// Reverse
enum { R0_rev = 0 == InputRank ? 0u : (
1 == InputRank ? unsigned(R0) : (
2 == InputRank ? unsigned(R1) : (
3 == InputRank ? unsigned(R2) : (
4 == InputRank ? unsigned(R3) : (
5 == InputRank ? unsigned(R4) : (
6 == InputRank ? unsigned(R5) : (
7 == InputRank ? unsigned(R6) : unsigned(R7) ))))))) };
typedef typename SrcViewType::array_layout SrcViewLayout ;
// Choose array layout, attempting to preserve original layout if at all possible.
typedef typename Impl::if_c<
( // Same Layout IF
// OutputRank 0
( OutputRank == 0 )
||
// OutputRank 1 or 2, InputLayout Left, Interval 0
// because single stride one or second index has a stride.
( OutputRank <= 2 && R0 && Impl::is_same<SrcViewLayout,LayoutLeft>::value )
||
// OutputRank 1 or 2, InputLayout Right, Interval [InputRank-1]
// because single stride one or second index has a stride.
( OutputRank <= 2 && R0_rev && Impl::is_same<SrcViewLayout,LayoutRight>::value )
), SrcViewLayout , Kokkos::LayoutStride >::type OutputViewLayout ;
// Choose data type as a purely dynamic rank array to accomodate a runtime range.
typedef typename Impl::if_c< OutputRank == 0 , typename SrcViewType::value_type ,
typename Impl::if_c< OutputRank == 1 , typename SrcViewType::value_type *,
typename Impl::if_c< OutputRank == 2 , typename SrcViewType::value_type **,
typename Impl::if_c< OutputRank == 3 , typename SrcViewType::value_type ***,
typename Impl::if_c< OutputRank == 4 , typename SrcViewType::value_type ****,
typename Impl::if_c< OutputRank == 5 , typename SrcViewType::value_type *****,
typename Impl::if_c< OutputRank == 6 , typename SrcViewType::value_type ******,
typename Impl::if_c< OutputRank == 7 , typename SrcViewType::value_type *******,
typename SrcViewType::value_type ********
>::type >::type >::type >::type >::type >::type >::type >::type OutputData ;
// Choose space.
// If the source view's template arg1 or arg2 is a space then use it,
// otherwise use the source view's execution space.
typedef typename Impl::if_c< Impl::is_space< SrcArg1Type >::value , SrcArg1Type ,
typename Impl::if_c< Impl::is_space< SrcArg2Type >::value , SrcArg2Type , typename SrcViewType::execution_space
>::type >::type OutputSpace ;
public:
// If keeping the layout then match non-data type arguments
// else keep execution space and memory traits.
typedef typename
Impl::if_c< Impl::is_same< SrcViewLayout , OutputViewLayout >::value
, Kokkos::DualView< OutputData , SrcArg1Type , SrcArg2Type , SrcArg3Type >
, Kokkos::DualView< OutputData , OutputViewLayout , OutputSpace
, typename SrcViewType::memory_traits >
>::type type ;
};
} /* namespace Impl */
} /* namespace Kokkos */
namespace Kokkos {
template< class D , class A1 , class A2 , class A3 ,
class ArgType0 >
typename Impl::ViewSubview< DualView<D,A1,A2,A3>
, ArgType0 , void , void , void
, void , void , void , void
>::type
subview( const DualView<D,A1,A2,A3> & src ,
const ArgType0 & arg0 )
{
typedef typename
Impl::ViewSubview< DualView<D,A1,A2,A3>
, ArgType0 , void , void , void
, void , void , void , void
>::type
DstViewType ;
DstViewType sub_view;
sub_view.d_view = subview(src.d_view,arg0);
sub_view.h_view = subview(src.h_view,arg0);
sub_view.modified_device = src.modified_device;
sub_view.modified_host = src.modified_host;
return sub_view;
}
template< class D , class A1 , class A2 , class A3 ,
class ArgType0 , class ArgType1 >
typename Impl::ViewSubview< DualView<D,A1,A2,A3>
, ArgType0 , ArgType1 , void , void
, void , void , void , void
>::type
subview( const DualView<D,A1,A2,A3> & src ,
const ArgType0 & arg0 ,
const ArgType1 & arg1 )
{
typedef typename
Impl::ViewSubview< DualView<D,A1,A2,A3>
, ArgType0 , ArgType1 , void , void
, void , void , void , void
>::type
DstViewType ;
DstViewType sub_view;
sub_view.d_view = subview(src.d_view,arg0,arg1);
sub_view.h_view = subview(src.h_view,arg0,arg1);
sub_view.modified_device = src.modified_device;
sub_view.modified_host = src.modified_host;
return sub_view;
}
template< class D , class A1 , class A2 , class A3 ,
class ArgType0 , class ArgType1 , class ArgType2 >
typename Impl::ViewSubview< DualView<D,A1,A2,A3>
, ArgType0 , ArgType1 , ArgType2 , void
, void , void , void , void
>::type
subview( const DualView<D,A1,A2,A3> & src ,
const ArgType0 & arg0 ,
const ArgType1 & arg1 ,
const ArgType2 & arg2 )
{
typedef typename
Impl::ViewSubview< DualView<D,A1,A2,A3>
, ArgType0 , ArgType1 , ArgType2 , void
, void , void , void , void
>::type
DstViewType ;
DstViewType sub_view;
sub_view.d_view = subview(src.d_view,arg0,arg1,arg2);
sub_view.h_view = subview(src.h_view,arg0,arg1,arg2);
sub_view.modified_device = src.modified_device;
sub_view.modified_host = src.modified_host;
return sub_view;
}
template< class D , class A1 , class A2 , class A3 ,
class ArgType0 , class ArgType1 , class ArgType2 , class ArgType3 >
typename Impl::ViewSubview< DualView<D,A1,A2,A3>
, ArgType0 , ArgType1 , ArgType2 , ArgType3
, void , void , void , void
>::type
subview( const DualView<D,A1,A2,A3> & src ,
const ArgType0 & arg0 ,
const ArgType1 & arg1 ,
const ArgType2 & arg2 ,
const ArgType3 & arg3 )
{
typedef typename
Impl::ViewSubview< DualView<D,A1,A2,A3>
, ArgType0 , ArgType1 , ArgType2 , ArgType3
, void , void , void , void
>::type
DstViewType ;
DstViewType sub_view;
sub_view.d_view = subview(src.d_view,arg0,arg1,arg2,arg3);
sub_view.h_view = subview(src.h_view,arg0,arg1,arg2,arg3);
sub_view.modified_device = src.modified_device;
sub_view.modified_host = src.modified_host;
return sub_view;
}
template< class D , class A1 , class A2 , class A3 ,
class ArgType0 , class ArgType1 , class ArgType2 , class ArgType3 ,
class ArgType4 >
typename Impl::ViewSubview< DualView<D,A1,A2,A3>
, ArgType0 , ArgType1 , ArgType2 , ArgType3
, ArgType4 , void , void , void
>::type
subview( const DualView<D,A1,A2,A3> & src ,
const ArgType0 & arg0 ,
const ArgType1 & arg1 ,
const ArgType2 & arg2 ,
const ArgType3 & arg3 ,
const ArgType4 & arg4 )
{
typedef typename
Impl::ViewSubview< DualView<D,A1,A2,A3>
, ArgType0 , ArgType1 , ArgType2 , ArgType3
, ArgType4 , void , void ,void
>::type
DstViewType ;
DstViewType sub_view;
sub_view.d_view = subview(src.d_view,arg0,arg1,arg2,arg3,arg4);
sub_view.h_view = subview(src.h_view,arg0,arg1,arg2,arg3,arg4);
sub_view.modified_device = src.modified_device;
sub_view.modified_host = src.modified_host;
return sub_view;
}
template< class D , class A1 , class A2 , class A3 ,
class ArgType0 , class ArgType1 , class ArgType2 , class ArgType3 ,
class ArgType4 , class ArgType5 >
typename Impl::ViewSubview< DualView<D,A1,A2,A3>
, ArgType0 , ArgType1 , ArgType2 , ArgType3
, ArgType4 , ArgType5 , void , void
>::type
subview( const DualView<D,A1,A2,A3> & src ,
const ArgType0 & arg0 ,
const ArgType1 & arg1 ,
const ArgType2 & arg2 ,
const ArgType3 & arg3 ,
const ArgType4 & arg4 ,
const ArgType5 & arg5 )
{
typedef typename
Impl::ViewSubview< DualView<D,A1,A2,A3>
, ArgType0 , ArgType1 , ArgType2 , ArgType3
, ArgType4 , ArgType5 , void , void
>::type
DstViewType ;
DstViewType sub_view;
sub_view.d_view = subview(src.d_view,arg0,arg1,arg2,arg3,arg4,arg5);
sub_view.h_view = subview(src.h_view,arg0,arg1,arg2,arg3,arg4,arg5);
sub_view.modified_device = src.modified_device;
sub_view.modified_host = src.modified_host;
return sub_view;
}
template< class D , class A1 , class A2 , class A3 ,
class ArgType0 , class ArgType1 , class ArgType2 , class ArgType3 ,
class ArgType4 , class ArgType5 , class ArgType6 >
typename Impl::ViewSubview< DualView<D,A1,A2,A3>
, ArgType0 , ArgType1 , ArgType2 , ArgType3
, ArgType4 , ArgType5 , ArgType6 , void
>::type
subview( const DualView<D,A1,A2,A3> & src ,
const ArgType0 & arg0 ,
const ArgType1 & arg1 ,
const ArgType2 & arg2 ,
const ArgType3 & arg3 ,
const ArgType4 & arg4 ,
const ArgType5 & arg5 ,
const ArgType6 & arg6 )
{
typedef typename
Impl::ViewSubview< DualView<D,A1,A2,A3>
, ArgType0 , ArgType1 , ArgType2 , ArgType3
, ArgType4 , ArgType5 , ArgType6 , void
>::type
DstViewType ;
DstViewType sub_view;
sub_view.d_view = subview(src.d_view,arg0,arg1,arg2,arg3,arg4,arg5,arg6);
sub_view.h_view = subview(src.h_view,arg0,arg1,arg2,arg3,arg4,arg5,arg6);
sub_view.modified_device = src.modified_device;
sub_view.modified_host = src.modified_host;
return sub_view;
}
template< class D , class A1 , class A2 , class A3 ,
class ArgType0 , class ArgType1 , class ArgType2 , class ArgType3 ,
class ArgType4 , class ArgType5 , class ArgType6 , class ArgType7 >
typename Impl::ViewSubview< DualView<D,A1,A2,A3>
, ArgType0 , ArgType1 , ArgType2 , ArgType3
, ArgType4 , ArgType5 , ArgType6 , ArgType7
>::type
subview( const DualView<D,A1,A2,A3> & src ,
const ArgType0 & arg0 ,
const ArgType1 & arg1 ,
const ArgType2 & arg2 ,
const ArgType3 & arg3 ,
const ArgType4 & arg4 ,
const ArgType5 & arg5 ,
const ArgType6 & arg6 ,
const ArgType7 & arg7 )
{
typedef typename
Impl::ViewSubview< DualView<D,A1,A2,A3>
, ArgType0 , ArgType1 , ArgType2 , ArgType3
, ArgType4 , ArgType5 , ArgType6 , ArgType7
>::type
DstViewType ;
DstViewType sub_view;
sub_view.d_view = subview(src.d_view,arg0,arg1,arg2,arg3,arg4,arg5,arg6,arg7);
sub_view.h_view = subview(src.h_view,arg0,arg1,arg2,arg3,arg4,arg5,arg6,arg7);
sub_view.modified_device = src.modified_device;
sub_view.modified_host = src.modified_host;
return sub_view;
}
+} // namespace Kokkos
+
+#endif /* defined( KOKKOS_USING_EXPERIMENTAL_VIEW ) */
+
+//----------------------------------------------------------------------------
+//----------------------------------------------------------------------------
+
+namespace Kokkos {
+
//
// Partial specialization of Kokkos::deep_copy() for DualView objects.
//
template< class DT , class DL , class DD , class DM ,
class ST , class SL , class SD , class SM >
void
deep_copy (DualView<DT,DL,DD,DM> dst, // trust me, this must not be a reference
const DualView<ST,SL,SD,SM>& src )
{
if (src.modified_device () >= src.modified_host ()) {
deep_copy (dst.d_view, src.d_view);
dst.template modify<typename DualView<DT,DL,DD,DM>::device_type> ();
} else {
deep_copy (dst.h_view, src.h_view);
dst.template modify<typename DualView<DT,DL,DD,DM>::host_mirror_space> ();
}
}
template< class ExecutionSpace ,
class DT , class DL , class DD , class DM ,
class ST , class SL , class SD , class SM >
void
deep_copy (const ExecutionSpace& exec ,
DualView<DT,DL,DD,DM> dst, // trust me, this must not be a reference
const DualView<ST,SL,SD,SM>& src )
{
if (src.modified_device () >= src.modified_host ()) {
deep_copy (exec, dst.d_view, src.d_view);
dst.template modify<typename DualView<DT,DL,DD,DM>::device_type> ();
} else {
deep_copy (exec, dst.h_view, src.h_view);
dst.template modify<typename DualView<DT,DL,DD,DM>::host_mirror_space> ();
}
}
} // namespace Kokkos
#endif
diff --git a/lib/kokkos/containers/src/Kokkos_Vector.hpp b/lib/kokkos/containers/src/Kokkos_Vector.hpp
index db54b0c35..6a360e8d1 100644
--- a/lib/kokkos/containers/src/Kokkos_Vector.hpp
+++ b/lib/kokkos/containers/src/Kokkos_Vector.hpp
@@ -1,287 +1,283 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_VECTOR_HPP
#define KOKKOS_VECTOR_HPP
#include <Kokkos_Core_fwd.hpp>
#include <Kokkos_DualView.hpp>
/* Drop in replacement for std::vector based on Kokkos::DualView
* Most functions only work on the host (it will not compile if called from device kernel)
*
*/
namespace Kokkos {
-template <typename Scalar, class Space = Kokkos::DefaultExecutionSpace >
-class vector : public DualView<Scalar*,LayoutLeft,Space> {
-public:
- typedef typename Space::memory_space memory_space;
- typedef typename Space::execution_space execution_space;
- typedef typename Kokkos::Device<execution_space,memory_space> device_type;
+template< class Scalar, class Arg1Type = void>
+class vector : public DualView<Scalar*,LayoutLeft,Arg1Type> {
typedef Scalar value_type;
typedef Scalar* pointer;
typedef const Scalar* const_pointer;
typedef Scalar* reference;
typedef const Scalar* const_reference;
typedef Scalar* iterator;
typedef const Scalar* const_iterator;
private:
size_t _size;
typedef size_t size_type;
float _extra_storage;
- typedef DualView<Scalar*,LayoutLeft,Space> DV;
+ typedef DualView<Scalar*,LayoutLeft,Arg1Type> DV;
public:
#ifdef KOKKOS_CUDA_USE_UVM
KOKKOS_INLINE_FUNCTION Scalar& operator() (int i) const {return DV::h_view(i);};
KOKKOS_INLINE_FUNCTION Scalar& operator[] (int i) const {return DV::h_view(i);};
#else
inline Scalar& operator() (int i) const {return DV::h_view(i);};
inline Scalar& operator[] (int i) const {return DV::h_view(i);};
#endif
/* Member functions which behave like std::vector functions */
vector():DV() {
_size = 0;
_extra_storage = 1.1;
DV::modified_host() = 1;
};
- vector(int n, Scalar val=Scalar()):DualView<Scalar*,LayoutLeft,Space>("Vector",size_t(n*(1.1))) {
+ vector(int n, Scalar val=Scalar()):DualView<Scalar*,LayoutLeft,Arg1Type>("Vector",size_t(n*(1.1))) {
_size = n;
_extra_storage = 1.1;
DV::modified_host() = 1;
assign(n,val);
}
void resize(size_t n) {
if(n>=capacity())
DV::resize(size_t (n*_extra_storage));
_size = n;
}
void resize(size_t n, const Scalar& val) {
assign(n,val);
}
void assign (size_t n, const Scalar& val) {
/* Resize if necessary (behavour of std:vector) */
if(n>capacity())
DV::resize(size_t (n*_extra_storage));
_size = n;
/* Assign value either on host or on device */
if( DV::modified_host() >= DV::modified_device() ) {
set_functor_host f(DV::h_view,val);
parallel_for(n,f);
DV::t_host::execution_space::fence();
DV::modified_host()++;
} else {
set_functor f(DV::d_view,val);
parallel_for(n,f);
DV::t_dev::execution_space::fence();
DV::modified_device()++;
}
}
void reserve(size_t n) {
DV::resize(size_t (n*_extra_storage));
}
void push_back(Scalar val) {
DV::modified_host()++;
if(_size == capacity()) {
size_t new_size = _size*_extra_storage;
if(new_size == _size) new_size++;
DV::resize(new_size);
}
DV::h_view(_size) = val;
_size++;
};
void pop_back() {
_size--;
};
void clear() {
_size = 0;
}
size_type size() const {return _size;};
size_type max_size() const {return 2000000000;}
size_type capacity() const {return DV::capacity();};
bool empty() const {return _size==0;};
iterator begin() const {return &DV::h_view(0);};
iterator end() const {return &DV::h_view(_size);};
/* std::algorithms wich work originally with iterators, here they are implemented as member functions */
size_t
lower_bound (const size_t& start,
const size_t& theEnd,
const Scalar& comp_val) const
{
int lower = start; // FIXME (mfh 24 Apr 2014) narrowing conversion
int upper = _size > theEnd? theEnd : _size-1; // FIXME (mfh 24 Apr 2014) narrowing conversion
if (upper <= lower) {
return theEnd;
}
Scalar lower_val = DV::h_view(lower);
Scalar upper_val = DV::h_view(upper);
size_t idx = (upper+lower)/2;
Scalar val = DV::h_view(idx);
if(val>upper_val) return upper;
if(val<lower_val) return start;
while(upper>lower) {
if(comp_val>val) {
lower = ++idx;
} else {
upper = idx;
}
idx = (upper+lower)/2;
val = DV::h_view(idx);
}
return idx;
}
bool is_sorted() {
for(int i=0;i<_size-1;i++) {
if(DV::h_view(i)>DV::h_view(i+1)) return false;
}
return true;
}
iterator find(Scalar val) const {
if(_size == 0) return end();
int upper,lower,current;
current = _size/2;
upper = _size-1;
lower = 0;
if((val<DV::h_view(0)) || (val>DV::h_view(_size-1)) ) return end();
while(upper>lower)
{
if(val>DV::h_view(current)) lower = current+1;
else upper = current;
current = (upper+lower)/2;
}
if(val==DV::h_view(current)) return &DV::h_view(current);
else return end();
}
/* Additional functions for data management */
void device_to_host(){
deep_copy(DV::h_view,DV::d_view);
}
void host_to_device() const {
deep_copy(DV::d_view,DV::h_view);
}
void on_host() {
DV::modified_host() = DV::modified_device() + 1;
}
void on_device() {
DV::modified_device() = DV::modified_host() + 1;
}
void set_overallocation(float extra) {
_extra_storage = 1.0 + extra;
}
public:
struct set_functor {
typedef typename DV::t_dev::execution_space execution_space;
typename DV::t_dev _data;
Scalar _val;
set_functor(typename DV::t_dev data, Scalar val) :
_data(data),_val(val) {}
KOKKOS_INLINE_FUNCTION
void operator() (const int &i) const {
_data(i) = _val;
}
};
struct set_functor_host {
typedef typename DV::t_host::execution_space execution_space;
typename DV::t_host _data;
Scalar _val;
set_functor_host(typename DV::t_host data, Scalar val) :
_data(data),_val(val) {}
KOKKOS_INLINE_FUNCTION
void operator() (const int &i) const {
_data(i) = _val;
}
};
};
}
#endif
diff --git a/lib/kokkos/containers/unit_tests/CMakeLists.txt b/lib/kokkos/containers/unit_tests/CMakeLists.txt
new file mode 100644
index 000000000..7fff0f835
--- /dev/null
+++ b/lib/kokkos/containers/unit_tests/CMakeLists.txt
@@ -0,0 +1,40 @@
+
+INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR})
+INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
+INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}/../src )
+
+SET(SOURCES
+ UnitTestMain.cpp
+ TestCuda.cpp
+ )
+
+SET(LIBRARIES kokkoscore)
+
+IF(Kokkos_ENABLE_Pthread)
+ LIST( APPEND SOURCES
+ TestThreads.cpp
+ )
+ENDIF()
+
+IF(Kokkos_ENABLE_Serial)
+ LIST( APPEND SOURCES
+ TestSerial.cpp
+ )
+ENDIF()
+
+IF(Kokkos_ENABLE_OpenMP)
+ LIST( APPEND SOURCES
+ TestOpenMP.cpp
+ )
+ENDIF()
+
+
+TRIBITS_ADD_EXECUTABLE_AND_TEST(
+ UnitTest
+ SOURCES ${SOURCES}
+ COMM serial mpi
+ NUM_MPI_PROCS 1
+ FAIL_REGULAR_EXPRESSION " FAILED "
+ TESTONLYLIBS kokkos_gtest
+ )
+
diff --git a/lib/kokkos/containers/unit_tests/Makefile b/lib/kokkos/containers/unit_tests/Makefile
index 176bfa906..48e3ff61d 100644
--- a/lib/kokkos/containers/unit_tests/Makefile
+++ b/lib/kokkos/containers/unit_tests/Makefile
@@ -1,92 +1,92 @@
KOKKOS_PATH = ../..
GTEST_PATH = ../../TPL/gtest
vpath %.cpp ${KOKKOS_PATH}/containers/unit_tests
default: build_all
echo "End Build"
-
+
include $(KOKKOS_PATH)/Makefile.kokkos
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
- CXX = nvcc_wrapper
+ CXX = $(NVCC_WRAPPER)
CXXFLAGS ?= -O3
LINK = $(CXX)
LDFLAGS ?= -lpthread
else
CXX ?= g++
CXXFLAGS ?= -O3
LINK ?= $(CXX)
LDFLAGS ?= -lpthread
endif
KOKKOS_CXXFLAGS += -I$(GTEST_PATH) -I${KOKKOS_PATH}/containers/unit_tests
TEST_TARGETS =
TARGETS =
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
OBJ_CUDA = TestCuda.o UnitTestMain.o gtest-all.o
TARGETS += KokkosContainers_UnitTest_Cuda
TEST_TARGETS += test-cuda
endif
ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1)
OBJ_THREADS = TestThreads.o UnitTestMain.o gtest-all.o
TARGETS += KokkosContainers_UnitTest_Threads
TEST_TARGETS += test-threads
endif
ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1)
OBJ_OPENMP = TestOpenMP.o UnitTestMain.o gtest-all.o
TARGETS += KokkosContainers_UnitTest_OpenMP
TEST_TARGETS += test-openmp
endif
ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1)
OBJ_SERIAL = TestSerial.o UnitTestMain.o gtest-all.o
TARGETS += KokkosContainers_UnitTest_Serial
TEST_TARGETS += test-serial
endif
KokkosContainers_UnitTest_Cuda: $(OBJ_CUDA) $(KOKKOS_LINK_DEPENDS)
$(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_CUDA) $(KOKKOS_LIBS) $(LIB) -o KokkosContainers_UnitTest_Cuda
KokkosContainers_UnitTest_Threads: $(OBJ_THREADS) $(KOKKOS_LINK_DEPENDS)
$(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_THREADS) $(KOKKOS_LIBS) $(LIB) -o KokkosContainers_UnitTest_Threads
-
+
KokkosContainers_UnitTest_OpenMP: $(OBJ_OPENMP) $(KOKKOS_LINK_DEPENDS)
$(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_OPENMP) $(KOKKOS_LIBS) $(LIB) -o KokkosContainers_UnitTest_OpenMP
KokkosContainers_UnitTest_Serial: $(OBJ_SERIAL) $(KOKKOS_LINK_DEPENDS)
$(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_SERIAL) $(KOKKOS_LIBS) $(LIB) -o KokkosContainers_UnitTest_Serial
test-cuda: KokkosContainers_UnitTest_Cuda
./KokkosContainers_UnitTest_Cuda
test-threads: KokkosContainers_UnitTest_Threads
./KokkosContainers_UnitTest_Threads
test-openmp: KokkosContainers_UnitTest_OpenMP
./KokkosContainers_UnitTest_OpenMP
test-serial: KokkosContainers_UnitTest_Serial
./KokkosContainers_UnitTest_Serial
-
+
build_all: $(TARGETS)
test: $(TEST_TARGETS)
-
+
clean: kokkos-clean
rm -f *.o $(TARGETS)
# Compilation rules
%.o:%.cpp $(KOKKOS_CPP_DEPENDS)
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<
gtest-all.o:$(GTEST_PATH)/gtest/gtest-all.cc
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $(GTEST_PATH)/gtest/gtest-all.cc
diff --git a/lib/kokkos/containers/unit_tests/TestComplex.hpp b/lib/kokkos/containers/unit_tests/TestComplex.hpp
index 5065d7257..94c04b61f 100644
--- a/lib/kokkos/containers/unit_tests/TestComplex.hpp
+++ b/lib/kokkos/containers/unit_tests/TestComplex.hpp
@@ -1,264 +1,263 @@
//@HEADER
// ************************************************************************
-//
+//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
-//
+//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
-//
+//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
-//
+//
// ************************************************************************
//@HEADER
#ifndef KOKKOS_TEST_COMPLEX_HPP
#define KOKKOS_TEST_COMPLEX_HPP
-//#include <Kokkos_Complex.hpp>
+#include <Kokkos_Complex.hpp>
#include <gtest/gtest.h>
#include <iostream>
namespace Test {
namespace Impl {
template <typename RealType>
void testComplexConstructors () {
typedef Kokkos::complex<RealType> complex_type;
complex_type z1;
complex_type z2 (0.0, 0.0);
complex_type z3 (1.0, 0.0);
complex_type z4 (0.0, 1.0);
complex_type z5 (-1.0, -2.0);
ASSERT_TRUE( z1 == z2 );
ASSERT_TRUE( z1 != z3 );
ASSERT_TRUE( z1 != z4 );
ASSERT_TRUE( z1 != z5 );
ASSERT_TRUE( z2 != z3 );
ASSERT_TRUE( z2 != z4 );
ASSERT_TRUE( z2 != z5 );
ASSERT_TRUE( z3 != z4 );
ASSERT_TRUE( z3 != z5 );
complex_type z6 (-1.0, -2.0);
ASSERT_TRUE( z5 == z6 );
// Make sure that complex has value semantics, in particular, that
// equality tests use values and not pointers, so that
// reassignment actually changes the value.
z1 = complex_type (-3.0, -4.0);
ASSERT_TRUE( z1.real () == -3.0 );
ASSERT_TRUE( z1.imag () == -4.0 );
ASSERT_TRUE( z1 != z2 );
complex_type z7 (1.0);
ASSERT_TRUE( z3 == z7 );
ASSERT_TRUE( z7 == 1.0 );
ASSERT_TRUE( z7 != -1.0 );
z7 = complex_type (5.0);
ASSERT_TRUE( z7.real () == 5.0 );
ASSERT_TRUE( z7.imag () == 0.0 );
}
template <typename RealType>
void testPlus () {
typedef Kokkos::complex<RealType> complex_type;
complex_type z1 (1.0, -1.0);
complex_type z2 (-1.0, 1.0);
complex_type z3 = z1 + z2;
ASSERT_TRUE( z3 == complex_type (0.0, 0.0) );
}
template <typename RealType>
void testMinus () {
typedef Kokkos::complex<RealType> complex_type;
// Test binary minus.
complex_type z1 (1.0, -1.0);
complex_type z2 (-1.0, 1.0);
complex_type z3 = z1 - z2;
ASSERT_TRUE( z3 == complex_type (2.0, -2.0) );
// Test unary minus.
complex_type z4 (3.0, -4.0);
ASSERT_TRUE( -z1 == complex_type (-3.0, 4.0) );
}
template <typename RealType>
void testTimes () {
typedef Kokkos::complex<RealType> complex_type;
complex_type z1 (1.0, -1.0);
complex_type z2 (-1.0, 1.0);
- complex_type z3 = z1 - z2;
- ASSERT_TRUE( z3 == complex_type (2.0, -2.0) );
+ complex_type z3 = z1 * z2;
+ ASSERT_TRUE( z3 == complex_type (0.0, 2.0) );
- // Test unary minus.
- complex_type z4 (3.0, -4.0);
- ASSERT_TRUE( z4 == complex_type (3.0, -4.0) );
- ASSERT_TRUE( -z4 == complex_type (-3.0, 4.0) );
- ASSERT_TRUE( z4 == -complex_type (-3.0, 4.0) );
+ // Make sure that std::complex * Kokkos::complex works too.
+ std::complex<RealType> z4 (-1.0, 1.0);
+ complex_type z5 = z4 * z1;
+ ASSERT_TRUE( z5 == complex_type (0.0, 2.0) );
}
template <typename RealType>
void testDivide () {
typedef Kokkos::complex<RealType> complex_type;
// Test division of a complex number by a real number.
complex_type z1 (1.0, -1.0);
complex_type z2 (1.0 / 2.0, -1.0 / 2.0);
ASSERT_TRUE( z1 / 2.0 == z2 );
// (-1+2i)/(1-i) == ((-1+2i)(1+i)) / ((1-i)(1+i))
// (-1+2i)(1+i) == -3 + i
complex_type z3 (-1.0, 2.0);
complex_type z4 (1.0, -1.0);
complex_type z5 (-3.0, 1.0);
ASSERT_TRUE(z3 * Kokkos::conj (z4) == z5 );
// Test division of a complex number by a complex number.
// This assumes that RealType is a floating-point type.
complex_type z6 (Kokkos::real (z5) / 2.0,
Kokkos::imag (z5) / 2.0);
complex_type z7 = z3 / z4;
ASSERT_TRUE( z7 == z6 );
}
template <typename RealType>
void testOutsideKernel () {
testComplexConstructors<RealType> ();
testPlus<RealType> ();
testTimes<RealType> ();
testDivide<RealType> ();
}
template<typename RealType, typename Device>
void testCreateView () {
typedef Kokkos::complex<RealType> complex_type;
Kokkos::View<complex_type*, Device> x ("x", 10);
ASSERT_TRUE( x.dimension_0 () == 10 );
// Test that View assignment works.
Kokkos::View<complex_type*, Device> x_nonconst = x;
Kokkos::View<const complex_type*, Device> x_const = x;
}
template<typename RealType, typename Device>
class Fill {
public:
typedef typename Device::execution_space execution_space;
typedef Kokkos::View<Kokkos::complex<RealType>*, Device> view_type;
typedef typename view_type::size_type size_type;
KOKKOS_INLINE_FUNCTION
void operator () (const size_type i) const {
x_(i) = val_;
}
Fill (const view_type& x, const Kokkos::complex<RealType>& val) :
x_ (x), val_ (val)
{}
private:
view_type x_;
const Kokkos::complex<RealType> val_;
};
template<typename RealType, typename Device>
class Sum {
public:
typedef typename Device::execution_space execution_space;
typedef Kokkos::View<const Kokkos::complex<RealType>*, Device> view_type;
typedef typename view_type::size_type size_type;
- typedef Kokkos::complex<RealType> value_type;
+ typedef Kokkos::complex<RealType> value_type;
KOKKOS_INLINE_FUNCTION
void operator () (const size_type i, Kokkos::complex<RealType>& sum) const {
sum += x_(i);
}
Sum (const view_type& x) : x_ (x) {}
private:
view_type x_;
};
template<typename RealType, typename Device>
void testInsideKernel () {
typedef Kokkos::complex<RealType> complex_type;
typedef Kokkos::View<complex_type*, Device> view_type;
typedef typename view_type::size_type size_type;
const size_type N = 1000;
view_type x ("x", N);
ASSERT_TRUE( x.dimension_0 () == N );
// Kokkos::parallel_reduce (N, [=] (const size_type i, complex_type& result) {
// result += x[i];
// });
Kokkos::parallel_for (N, Fill<RealType, Device> (x, complex_type (1.0, -1.0)));
complex_type sum;
Kokkos::parallel_reduce (N, Sum<RealType, Device> (x), sum);
ASSERT_TRUE( sum.real () == 1000.0 && sum.imag () == -1000.0 );
}
} // namespace Impl
template <typename Device>
void testComplex ()
{
Impl::testOutsideKernel<float> ();
Impl::testOutsideKernel<double> ();
Impl::testCreateView<float, Device> ();
Impl::testCreateView<double, Device> ();
Impl::testInsideKernel<float, Device> ();
Impl::testInsideKernel<double, Device> ();
}
} // namespace Test
#endif // KOKKOS_TEST_COMPLEX_HPP
diff --git a/lib/kokkos/core/CMakeLists.txt b/lib/kokkos/core/CMakeLists.txt
new file mode 100644
index 000000000..42fce6b2f
--- /dev/null
+++ b/lib/kokkos/core/CMakeLists.txt
@@ -0,0 +1,11 @@
+
+
+TRIBITS_SUBPACKAGE(Core)
+
+ADD_SUBDIRECTORY(src)
+
+TRIBITS_ADD_TEST_DIRECTORIES(unit_test)
+TRIBITS_ADD_TEST_DIRECTORIES(perf_test)
+
+TRIBITS_SUBPACKAGE_POSTPROCESS()
+
diff --git a/lib/kokkos/core/cmake/Dependencies.cmake b/lib/kokkos/core/cmake/Dependencies.cmake
new file mode 100644
index 000000000..13ade23a9
--- /dev/null
+++ b/lib/kokkos/core/cmake/Dependencies.cmake
@@ -0,0 +1,4 @@
+TRIBITS_PACKAGE_DEFINE_DEPENDENCIES(
+ LIB_OPTIONAL_TPLS Pthread CUDA HWLOC QTHREAD
+ TEST_OPTIONAL_TPLS CUSPARSE
+ )
diff --git a/lib/kokkos/core/cmake/KokkosCore_config.h.in b/lib/kokkos/core/cmake/KokkosCore_config.h.in
new file mode 100644
index 000000000..d381c59a2
--- /dev/null
+++ b/lib/kokkos/core/cmake/KokkosCore_config.h.in
@@ -0,0 +1,50 @@
+#ifndef KOKKOS_CORE_CONFIG_H
+#define KOKKOS_CORE_CONFIG_H
+
+/* The trivial 'src/build_common.sh' creates a config
+ * that must stay in sync with this file.
+ */
+#cmakedefine KOKKOS_FOR_SIERRA
+
+#if !defined( KOKKOS_FOR_SIERRA )
+
+#cmakedefine KOKKOS_HAVE_MPI
+#cmakedefine KOKKOS_HAVE_CUDA
+
+// mfh 16 Sep 2014: If passed in on the command line, that overrides
+// any value of KOKKOS_USE_CUDA_UVM here. Doing this should prevent build
+// warnings like this one:
+//
+// packages/kokkos/core/src/KokkosCore_config.h:13:1: warning: "KOKKOS_USE_CUDA_UVM" redefined
+//
+// At some point, we should edit the test-build scripts in
+// Trilinos/cmake/ctest/drivers/perseus/, and take
+// -DKOKKOS_USE_CUDA_UVM from the command-line arguments there. I
+// hesitate to do that now, because I'm not sure if all the files are
+// including KokkosCore_config.h (or a header file that includes it) like
+// they should.
+
+#if ! defined(KOKKOS_USE_CUDA_UVM)
+#cmakedefine KOKKOS_USE_CUDA_UVM
+#endif // ! defined(KOKKOS_USE_CUDA_UVM)
+
+#cmakedefine KOKKOS_HAVE_PTHREAD
+#cmakedefine KOKKOS_HAVE_SERIAL
+#cmakedefine KOKKOS_HAVE_QTHREAD
+#cmakedefine KOKKOS_HAVE_Winthread
+#cmakedefine KOKKOS_HAVE_OPENMP
+#cmakedefine KOKKOS_HAVE_HWLOC
+#cmakedefine KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK
+#cmakedefine KOKKOS_HAVE_CXX11
+#cmakedefine KOKKOS_HAVE_CUSPARSE
+#cmakedefine KOKKOS_ENABLE_PROFILING_COLLECT_KERNEL_DATA
+#cmakedefine KOKKOS_ENABLE_PROFILING_AGGREGATE_MPI
+
+// Don't forbid users from defining this macro on the command line,
+// but still make sure that CMake logic can control its definition.
+#if ! defined(KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA)
+#cmakedefine KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA 1
+#endif // KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA
+
+#endif // KOKKOS_FOR_SIERRA
+#endif // KOKKOS_CORE_CONFIG_H
diff --git a/lib/kokkos/core/perf_test/CMakeLists.txt b/lib/kokkos/core/perf_test/CMakeLists.txt
new file mode 100644
index 000000000..34aa81e92
--- /dev/null
+++ b/lib/kokkos/core/perf_test/CMakeLists.txt
@@ -0,0 +1,18 @@
+
+INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINRARY_DIR})
+INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
+
+SET(SOURCES
+ PerfTestMain.cpp
+ PerfTestHost.cpp
+ PerfTestCuda.cpp
+ )
+
+TRIBITS_ADD_EXECUTABLE_AND_TEST(
+ PerfTest
+ SOURCES ${SOURCES}
+ COMM serial mpi
+ NUM_MPI_PROCS 1
+ FAIL_REGULAR_EXPRESSION " FAILED "
+ TESTONLYLIBS kokkos_gtest
+ )
diff --git a/lib/kokkos/core/perf_test/Makefile b/lib/kokkos/core/perf_test/Makefile
index 2bf189a22..8fa1fbfc3 100644
--- a/lib/kokkos/core/perf_test/Makefile
+++ b/lib/kokkos/core/perf_test/Makefile
@@ -1,66 +1,66 @@
KOKKOS_PATH = ../..
-GTEST_PATH = ../../TPL/gtest
+GTEST_PATH = ../../tpls/gtest
vpath %.cpp ${KOKKOS_PATH}/core/perf_test
default: build_all
echo "End Build"
-
+
include $(KOKKOS_PATH)/Makefile.kokkos
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
- CXX = nvcc_wrapper
+ CXX = $(NVCC_WRAPPER)
CXXFLAGS ?= -O3
LINK = $(CXX)
LDFLAGS ?= -lpthread
else
CXX ?= g++
CXXFLAGS ?= -O3
LINK ?= $(CXX)
LDFLAGS ?= -lpthread
endif
KOKKOS_CXXFLAGS += -I$(GTEST_PATH) -I${KOKKOS_PATH}/core/perf_test
TEST_TARGETS =
TARGETS =
OBJ_PERF = PerfTestHost.o PerfTestCuda.o PerfTestMain.o gtest-all.o
TARGETS += KokkosCore_PerformanceTest
TEST_TARGETS += test-performance
OBJ_ATOMICS = test_atomic.o
TARGETS += KokkosCore_PerformanceTest_Atomics
TEST_TARGETS += test-atomic
KokkosCore_PerformanceTest: $(OBJ_PERF) $(KOKKOS_LINK_DEPENDS)
$(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_PERF) $(KOKKOS_LIBS) $(LIB) -o KokkosCore_PerformanceTest
KokkosCore_PerformanceTest_Atomics: $(OBJ_ATOMICS) $(KOKKOS_LINK_DEPENDS)
$(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_ATOMICS) $(KOKKOS_LIBS) $(LIB) -o KokkosCore_PerformanceTest_Atomics
test-performance: KokkosCore_PerformanceTest
./KokkosCore_PerformanceTest
test-atomic: KokkosCore_PerformanceTest_Atomics
./KokkosCore_PerformanceTest_Atomics
-
+
build_all: $(TARGETS)
test: $(TEST_TARGETS)
-
+
clean: kokkos-clean
rm -f *.o $(TARGETS)
# Compilation rules
%.o:%.cpp $(KOKKOS_CPP_DEPENDS)
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<
gtest-all.o:$(GTEST_PATH)/gtest/gtest-all.cc
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $(GTEST_PATH)/gtest/gtest-all.cc
diff --git a/lib/kokkos/core/perf_test/PerfTestCuda.cpp b/lib/kokkos/core/perf_test/PerfTestCuda.cpp
index 1263a7672..4a4bc13cd 100644
--- a/lib/kokkos/core/perf_test/PerfTestCuda.cpp
+++ b/lib/kokkos/core/perf_test/PerfTestCuda.cpp
@@ -1,189 +1,189 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <iostream>
#include <iomanip>
#include <algorithm>
#include <gtest/gtest.h>
#include <Kokkos_Core.hpp>
#if defined( KOKKOS_HAVE_CUDA )
#include <impl/Kokkos_Timer.hpp>
#include <PerfTestHexGrad.hpp>
#include <PerfTestBlasKernels.hpp>
#include <PerfTestGramSchmidt.hpp>
#include <PerfTestDriver.hpp>
namespace Test {
class cuda : public ::testing::Test {
protected:
static void SetUpTestCase() {
Kokkos::HostSpace::execution_space::initialize();
Kokkos::Cuda::initialize( Kokkos::Cuda::SelectDevice(0) );
}
static void TearDownTestCase() {
Kokkos::Cuda::finalize();
Kokkos::HostSpace::execution_space::finalize();
}
};
TEST_F( cuda, hexgrad )
{
EXPECT_NO_THROW( run_test_hexgrad< Kokkos::Cuda >( 10 , 20, "Kokkos::Cuda" ) );
}
TEST_F( cuda, gramschmidt )
{
EXPECT_NO_THROW( run_test_gramschmidt< Kokkos::Cuda >( 10 , 20, "Kokkos::Cuda" ) );
}
namespace {
template <typename T>
struct TextureFetch
{
typedef Kokkos::View< T *, Kokkos::CudaSpace> array_type;
typedef Kokkos::View< const T *, Kokkos::CudaSpace, Kokkos::MemoryRandomAccess> const_array_type;
typedef Kokkos::View< int *, Kokkos::CudaSpace> index_array_type;
typedef Kokkos::View< const int *, Kokkos::CudaSpace> const_index_array_type;
struct FillArray
{
array_type m_array;
FillArray( const array_type & array )
: m_array(array)
{}
void apply() const
{
Kokkos::parallel_for( Kokkos::RangePolicy<Kokkos::Cuda,int>(0,m_array.dimension_0()), *this);
}
KOKKOS_INLINE_FUNCTION
void operator()(int i) const { m_array(i) = i; }
};
struct RandomIndexes
{
index_array_type m_indexes;
typename index_array_type::HostMirror m_host_indexes;
RandomIndexes( const index_array_type & indexes)
: m_indexes(indexes)
, m_host_indexes(Kokkos::create_mirror(m_indexes))
{}
void apply() const
{
Kokkos::parallel_for( Kokkos::RangePolicy<Kokkos::HostSpace::execution_space,int>(0,m_host_indexes.dimension_0()), *this);
//random shuffle
Kokkos::HostSpace::execution_space::fence();
std::random_shuffle(m_host_indexes.ptr_on_device(), m_host_indexes.ptr_on_device() + m_host_indexes.dimension_0());
Kokkos::deep_copy(m_indexes,m_host_indexes);
}
KOKKOS_INLINE_FUNCTION
void operator()(int i) const { m_host_indexes(i) = i; }
};
struct RandomReduce
{
const_array_type m_array;
const_index_array_type m_indexes;
RandomReduce( const const_array_type & array, const const_index_array_type & indexes)
: m_array(array)
, m_indexes(indexes)
{}
void apply(T & reduce) const
{
Kokkos::parallel_reduce( Kokkos::RangePolicy<Kokkos::Cuda,int>(0,m_array.dimension_0()), *this, reduce);
}
KOKKOS_INLINE_FUNCTION
void operator()(int i, T & reduce) const
{ reduce += m_array(m_indexes(i)); }
};
static void run(int size, double & reduce_time, T &reduce)
{
array_type array("array",size);
index_array_type indexes("indexes",size);
{ FillArray f(array); f.apply(); }
{ RandomIndexes f(indexes); f.apply(); }
Kokkos::Cuda::fence();
Kokkos::Impl::Timer timer;
for (int j=0; j<10; ++j) {
RandomReduce f(array,indexes);
f.apply(reduce);
}
Kokkos::Cuda::fence();
reduce_time = timer.seconds();
}
};
} // unnamed namespace
TEST_F( cuda, texture_double )
{
printf("Random reduce of double through texture fetch\n");
- for (int i=1; i<=27; ++i) {
+ for (int i=1; i<=26; ++i) {
int size = 1<<i;
double time = 0;
double reduce = 0;
TextureFetch<double>::run(size,time,reduce);
printf(" time = %1.3e size = 2^%d\n", time, i);
}
}
} // namespace Test
#endif /* #if defined( KOKKOS_HAVE_CUDA ) */
diff --git a/lib/kokkos/core/perf_test/test_atomic.cpp b/lib/kokkos/core/perf_test/test_atomic.cpp
index f1e5c1b62..882a5c615 100644
--- a/lib/kokkos/core/perf_test/test_atomic.cpp
+++ b/lib/kokkos/core/perf_test/test_atomic.cpp
@@ -1,504 +1,504 @@
/*
//@HEADER
// ************************************************************************
-//
+//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
-//
+//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
-//
+//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
-//
+//
// ************************************************************************
//@HEADER
*/
#include <cstdio>
#include <cstring>
#include <cstdlib>
#include <Kokkos_Core.hpp>
#include <impl/Kokkos_Timer.hpp>
typedef Kokkos::DefaultExecutionSpace exec_space;
#define RESET 0
#define BRIGHT 1
#define DIM 2
#define UNDERLINE 3
#define BLINK 4
#define REVERSE 7
#define HIDDEN 8
#define BLACK 0
#define RED 1
#define GREEN 2
#define YELLOW 3
#define BLUE 4
#define MAGENTA 5
#define CYAN 6
#define GREY 7
#define WHITE 8
void textcolor(int attr, int fg, int bg)
{ char command[13];
/* Command is the control command to the terminal */
sprintf(command, "%c[%d;%d;%dm", 0x1B, attr, fg + 30, bg + 40);
printf("%s", command);
}
void textcolor_standard() {textcolor(RESET, BLACK, WHITE);}
template<class T,class DEVICE_TYPE>
struct ZeroFunctor{
typedef DEVICE_TYPE execution_space;
typedef typename Kokkos::View<T,execution_space> type;
typedef typename Kokkos::View<T,execution_space>::HostMirror h_type;
type data;
KOKKOS_INLINE_FUNCTION
void operator()(int i) const {
data() = 0;
}
};
//---------------------------------------------------
//--------------atomic_fetch_add---------------------
//---------------------------------------------------
template<class T,class DEVICE_TYPE>
struct AddFunctor{
typedef DEVICE_TYPE execution_space;
typedef Kokkos::View<T,execution_space> type;
type data;
KOKKOS_INLINE_FUNCTION
void operator()(int i) const {
Kokkos::atomic_fetch_add(&data(),(T)1);
}
};
template<class T>
T AddLoop(int loop) {
struct ZeroFunctor<T,exec_space> f_zero;
typename ZeroFunctor<T,exec_space>::type data("Data");
typename ZeroFunctor<T,exec_space>::h_type h_data("HData");
f_zero.data = data;
Kokkos::parallel_for(1,f_zero);
exec_space::fence();
struct AddFunctor<T,exec_space> f_add;
f_add.data = data;
Kokkos::parallel_for(loop,f_add);
exec_space::fence();
Kokkos::deep_copy(h_data,data);
T val = h_data();
return val;
}
template<class T,class DEVICE_TYPE>
struct AddNonAtomicFunctor{
typedef DEVICE_TYPE execution_space;
typedef Kokkos::View<T,execution_space> type;
type data;
KOKKOS_INLINE_FUNCTION
void operator()(int i) const {
data()+=(T)1;
}
};
template<class T>
T AddLoopNonAtomic(int loop) {
struct ZeroFunctor<T,exec_space> f_zero;
typename ZeroFunctor<T,exec_space>::type data("Data");
typename ZeroFunctor<T,exec_space>::h_type h_data("HData");
f_zero.data = data;
Kokkos::parallel_for(1,f_zero);
exec_space::fence();
struct AddNonAtomicFunctor<T,exec_space> f_add;
f_add.data = data;
Kokkos::parallel_for(loop,f_add);
exec_space::fence();
Kokkos::deep_copy(h_data,data);
T val = h_data();
return val;
}
template<class T>
T AddLoopSerial(int loop) {
T* data = new T[1];
data[0] = 0;
for(int i=0;i<loop;i++)
*data+=(T)1;
T val = *data;
- delete data;
+ delete [] data;
return val;
}
template<class T,class DEVICE_TYPE>
struct CASFunctor{
typedef DEVICE_TYPE execution_space;
typedef Kokkos::View<T,execution_space> type;
type data;
KOKKOS_INLINE_FUNCTION
void operator()(int i) const {
T old = data();
T newval, assumed;
do {
assumed = old;
newval = assumed + (T)1;
old = Kokkos::atomic_compare_exchange(&data(), assumed, newval);
}
while( old != assumed );
}
};
template<class T>
T CASLoop(int loop) {
struct ZeroFunctor<T,exec_space> f_zero;
typename ZeroFunctor<T,exec_space>::type data("Data");
typename ZeroFunctor<T,exec_space>::h_type h_data("HData");
f_zero.data = data;
Kokkos::parallel_for(1,f_zero);
exec_space::fence();
struct CASFunctor<T,exec_space> f_cas;
f_cas.data = data;
Kokkos::parallel_for(loop,f_cas);
exec_space::fence();
Kokkos::deep_copy(h_data,data);
T val = h_data();
return val;
}
template<class T,class DEVICE_TYPE>
struct CASNonAtomicFunctor{
typedef DEVICE_TYPE execution_space;
typedef Kokkos::View<T,execution_space> type;
type data;
KOKKOS_INLINE_FUNCTION
void operator()(int i) const {
volatile T assumed;
volatile T newval;
bool fail=1;
do {
assumed = data();
newval = assumed + (T)1;
if(data()==assumed) {
data() = newval;
fail = 0;
}
}
while(fail);
}
};
template<class T>
T CASLoopNonAtomic(int loop) {
struct ZeroFunctor<T,exec_space> f_zero;
typename ZeroFunctor<T,exec_space>::type data("Data");
typename ZeroFunctor<T,exec_space>::h_type h_data("HData");
f_zero.data = data;
Kokkos::parallel_for(1,f_zero);
exec_space::fence();
struct CASNonAtomicFunctor<T,exec_space> f_cas;
f_cas.data = data;
Kokkos::parallel_for(loop,f_cas);
exec_space::fence();
Kokkos::deep_copy(h_data,data);
T val = h_data();
return val;
}
template<class T>
T CASLoopSerial(int loop) {
T* data = new T[1];
data[0] = 0;
for(int i=0;i<loop;i++) {
T assumed;
T newval;
T old;
do {
assumed = *data;
newval = assumed + (T)1;
old = *data;
*data = newval;
}
while(!(assumed==old));
}
T val = *data;
- delete data;
+ delete [] data;
return val;
}
template<class T,class DEVICE_TYPE>
struct ExchFunctor{
typedef DEVICE_TYPE execution_space;
typedef Kokkos::View<T,execution_space> type;
type data, data2;
KOKKOS_INLINE_FUNCTION
void operator()(int i) const {
T old = Kokkos::atomic_exchange(&data(),(T)i);
Kokkos::atomic_fetch_add(&data2(),old);
}
};
template<class T>
T ExchLoop(int loop) {
struct ZeroFunctor<T,exec_space> f_zero;
typename ZeroFunctor<T,exec_space>::type data("Data");
typename ZeroFunctor<T,exec_space>::h_type h_data("HData");
f_zero.data = data;
Kokkos::parallel_for(1,f_zero);
exec_space::fence();
typename ZeroFunctor<T,exec_space>::type data2("Data");
typename ZeroFunctor<T,exec_space>::h_type h_data2("HData");
f_zero.data = data2;
Kokkos::parallel_for(1,f_zero);
exec_space::fence();
struct ExchFunctor<T,exec_space> f_exch;
f_exch.data = data;
f_exch.data2 = data2;
Kokkos::parallel_for(loop,f_exch);
exec_space::fence();
Kokkos::deep_copy(h_data,data);
Kokkos::deep_copy(h_data2,data2);
T val = h_data() + h_data2();
return val;
}
template<class T,class DEVICE_TYPE>
struct ExchNonAtomicFunctor{
typedef DEVICE_TYPE execution_space;
typedef Kokkos::View<T,execution_space> type;
type data, data2;
KOKKOS_INLINE_FUNCTION
void operator()(int i) const {
T old = data();
data()=(T) i;
data2()+=old;
}
};
template<class T>
T ExchLoopNonAtomic(int loop) {
struct ZeroFunctor<T,exec_space> f_zero;
typename ZeroFunctor<T,exec_space>::type data("Data");
typename ZeroFunctor<T,exec_space>::h_type h_data("HData");
f_zero.data = data;
Kokkos::parallel_for(1,f_zero);
exec_space::fence();
typename ZeroFunctor<T,exec_space>::type data2("Data");
typename ZeroFunctor<T,exec_space>::h_type h_data2("HData");
f_zero.data = data2;
Kokkos::parallel_for(1,f_zero);
exec_space::fence();
struct ExchNonAtomicFunctor<T,exec_space> f_exch;
f_exch.data = data;
f_exch.data2 = data2;
Kokkos::parallel_for(loop,f_exch);
exec_space::fence();
Kokkos::deep_copy(h_data,data);
Kokkos::deep_copy(h_data2,data2);
T val = h_data() + h_data2();
return val;
}
template<class T>
T ExchLoopSerial(int loop) {
T* data = new T[1];
T* data2 = new T[1];
data[0] = 0;
data2[0] = 0;
for(int i=0;i<loop;i++) {
T old = *data;
*data=(T) i;
*data2+=old;
}
T val = *data2 + *data;
- delete data;
- delete data2;
+ delete [] data;
+ delete [] data2;
return val;
}
template<class T>
T LoopVariant(int loop, int test) {
switch (test) {
case 1: return AddLoop<T>(loop);
case 2: return CASLoop<T>(loop);
case 3: return ExchLoop<T>(loop);
}
return 0;
}
template<class T>
T LoopVariantSerial(int loop, int test) {
switch (test) {
case 1: return AddLoopSerial<T>(loop);
case 2: return CASLoopSerial<T>(loop);
case 3: return ExchLoopSerial<T>(loop);
}
return 0;
}
template<class T>
T LoopVariantNonAtomic(int loop, int test) {
switch (test) {
case 1: return AddLoopNonAtomic<T>(loop);
case 2: return CASLoopNonAtomic<T>(loop);
case 3: return ExchLoopNonAtomic<T>(loop);
}
return 0;
}
template<class T>
void Loop(int loop, int test, const char* type_name) {
LoopVariant<T>(loop,test);
Kokkos::Impl::Timer timer;
T res = LoopVariant<T>(loop,test);
double time1 = timer.seconds();
timer.reset();
T resNonAtomic = LoopVariantNonAtomic<T>(loop,test);
double time2 = timer.seconds();
timer.reset();
T resSerial = LoopVariantSerial<T>(loop,test);
double time3 = timer.seconds();
time1*=1e6/loop;
time2*=1e6/loop;
time3*=1e6/loop;
//textcolor_standard();
bool passed = true;
if(resSerial!=res) passed = false;
//if(!passed) textcolor(RESET,BLACK,YELLOW);
printf("%s Test %i %s --- Loop: %i Value (S,A,NA): %e %e %e Time: %7.4e %7.4e %7.4e Size of Type %i)",type_name,test,passed?"PASSED":"FAILED",loop,1.0*resSerial,1.0*res,1.0*resNonAtomic,time1,time2,time3,(int)sizeof(T));
//if(!passed) textcolor_standard();
printf("\n");
}
template<class T>
void Test(int loop, int test, const char* type_name) {
if(test==-1) {
Loop<T>(loop,1,type_name);
Loop<T>(loop,2,type_name);
Loop<T>(loop,3,type_name);
}
else
Loop<T>(loop,test,type_name);
}
int main(int argc, char* argv[])
{
int type = -1;
int loop = 1000000;
int test = -1;
for(int i=0;i<argc;i++)
{
if((strcmp(argv[i],"--test")==0)) {test=atoi(argv[++i]); continue;}
if((strcmp(argv[i],"--type")==0)) {type=atoi(argv[++i]); continue;}
if((strcmp(argv[i],"-l")==0)||(strcmp(argv[i],"--loop")==0)) {loop=atoi(argv[++i]); continue;}
}
Kokkos::initialize(argc,argv);
printf("Using %s\n",Kokkos::atomic_query_version());
bool all_tests = false;
if(type==-1) all_tests = true;
while(type<100) {
if(type==1) {
Test<int>(loop,test,"int ");
}
if(type==2) {
Test<long int>(loop,test,"long int ");
}
if(type==3) {
Test<long long int>(loop,test,"long long int ");
}
if(type==4) {
Test<unsigned int>(loop,test,"unsigned int ");
}
if(type==5) {
Test<unsigned long int>(loop,test,"unsigned long int ");
}
if(type==6) {
Test<unsigned long long int>(loop,test,"unsigned long long int ");
}
if(type==10) {
//Test<float>(loop,test,"float ");
}
if(type==11) {
Test<double>(loop,test,"double ");
}
if(!all_tests) type=100;
else type++;
}
Kokkos::finalize();
}
diff --git a/lib/kokkos/core/src/CMakeLists.txt b/lib/kokkos/core/src/CMakeLists.txt
new file mode 100644
index 000000000..807a01ed0
--- /dev/null
+++ b/lib/kokkos/core/src/CMakeLists.txt
@@ -0,0 +1,113 @@
+
+TRIBITS_ADD_OPTION_AND_DEFINE(
+ Kokkos_ENABLE_Serial
+ KOKKOS_HAVE_SERIAL
+ "Whether to enable the Kokkos::Serial device. This device executes \"parallel\" kernels sequentially on a single CPU thread. It is enabled by default. If you disable this device, please enable at least one other CPU device, such as Kokkos::OpenMP or Kokkos::Threads."
+ ON
+ )
+
+ASSERT_DEFINED(${PROJECT_NAME}_ENABLE_CXX11)
+ASSERT_DEFINED(${PACKAGE_NAME}_ENABLE_CUDA)
+
+# Kokkos_ENABLE_CXX11_DISPATCH_LAMBDA governs whether Kokkos allows
+# use of lambdas at the outer level of parallel dispatch (that is, as
+# the argument to an outer parallel_for, parallel_reduce, or
+# parallel_scan). This works with non-CUDA execution spaces if C++11
+# is enabled. It does not currently work with public releases of
+# CUDA. If that changes, please change the default here to ON if CUDA
+# and C++11 are ON.
+IF (${PROJECT_NAME}_ENABLE_CXX11)
+ IF (${PACKAGE_NAME}_ENABLE_CUDA)
+ SET(Kokkos_ENABLE_CXX11_DISPATCH_LAMBDA_DEFAULT OFF)
+ ELSE ()
+ SET(Kokkos_ENABLE_CXX11_DISPATCH_LAMBDA_DEFAULT ON)
+ ENDIF ()
+ELSE ()
+ SET(Kokkos_ENABLE_CXX11_DISPATCH_LAMBDA_DEFAULT OFF)
+ENDIF ()
+
+TRIBITS_ADD_OPTION_AND_DEFINE(
+ Kokkos_ENABLE_CXX11_DISPATCH_LAMBDA
+ KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA
+ "Whether Kokkos allows use of lambdas at the outer level of parallel dispatch (that is, as the argument to an outer parallel_for, parallel_reduce, or parallel_scan). This requires C++11. It also does not currently work with public releases of CUDA. As a result, even if C++11 is enabled, this will be OFF by default if CUDA is enabled. If this option is ON, the macro KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA will be defined. For compatibility with Kokkos' Makefile build system, it is also possible to define that macro on the command line."
+ ${Kokkos_ENABLE_CXX11_DISPATCH_LAMBDA_DEFAULT}
+ )
+
+TRIBITS_CONFIGURE_FILE(${PACKAGE_NAME}_config.h)
+
+INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR})
+INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
+
+#-----------------------------------------------------------------------------
+
+SET(TRILINOS_INCDIR ${CMAKE_INSTALL_PREFIX}/${${PROJECT_NAME}_INSTALL_INCLUDE_DIR})
+
+#-----------------------------------------------------------------------------
+
+SET(HEADERS_PUBLIC "")
+SET(HEADERS_PRIVATE "")
+SET(SOURCES "")
+
+FILE(GLOB HEADERS_PUBLIC Kokkos*.hpp)
+LIST( APPEND HEADERS_PUBLIC ${CMAKE_CURRENT_BINARY_DIR}/${PACKAGE_NAME}_config.h )
+
+#-----------------------------------------------------------------------------
+
+FILE(GLOB HEADERS_IMPL impl/*.hpp)
+FILE(GLOB SOURCES_IMPL impl/*.cpp)
+
+LIST(APPEND HEADERS_PRIVATE ${HEADERS_IMPL} )
+LIST(APPEND SOURCES ${SOURCES_IMPL} )
+
+INSTALL(FILES ${HEADERS_IMPL} DESTINATION ${TRILINOS_INCDIR}/impl/)
+
+#-----------------------------------------------------------------------------
+
+FILE(GLOB HEADERS_THREADS Threads/*.hpp)
+FILE(GLOB SOURCES_THREADS Threads/*.cpp)
+
+LIST(APPEND HEADERS_PRIVATE ${HEADERS_THREADS} )
+LIST(APPEND SOURCES ${SOURCES_THREADS} )
+
+INSTALL(FILES ${HEADERS_THREADS} DESTINATION ${TRILINOS_INCDIR}/Threads/)
+
+#-----------------------------------------------------------------------------
+
+FILE(GLOB HEADERS_OPENMP OpenMP/*.hpp)
+FILE(GLOB SOURCES_OPENMP OpenMP/*.cpp)
+
+LIST(APPEND HEADERS_PRIVATE ${HEADERS_OPENMP} )
+LIST(APPEND SOURCES ${SOURCES_OPENMP} )
+
+INSTALL(FILES ${HEADERS_OPENMP} DESTINATION ${TRILINOS_INCDIR}/OpenMP/)
+
+#-----------------------------------------------------------------------------
+
+FILE(GLOB HEADERS_CUDA Cuda/*.hpp)
+FILE(GLOB SOURCES_CUDA Cuda/*.cpp)
+
+LIST(APPEND HEADERS_PRIVATE ${HEADERS_CUDA} )
+LIST(APPEND SOURCES ${SOURCES_CUDA} )
+
+INSTALL(FILES ${HEADERS_CUDA} DESTINATION ${TRILINOS_INCDIR}/Cuda/)
+
+#-----------------------------------------------------------------------------
+FILE(GLOB HEADERS_QTHREAD Qthread/*.hpp)
+FILE(GLOB SOURCES_QTHREAD Qthread/*.cpp)
+
+LIST(APPEND HEADERS_PRIVATE ${HEADERS_QTHREAD} )
+LIST(APPEND SOURCES ${SOURCES_QTHREAD} )
+
+INSTALL(FILES ${HEADERS_QTHREAD} DESTINATION ${TRILINOS_INCDIR}/Qthread/)
+
+#-----------------------------------------------------------------------------
+
+TRIBITS_ADD_LIBRARY(
+ kokkoscore
+ HEADERS ${HEADERS_PUBLIC}
+ NOINSTALLHEADERS ${HEADERS_PRIVATE}
+ SOURCES ${SOURCES}
+ DEPLIBS
+ )
+
+
diff --git a/lib/kokkos/core/src/Cuda/KokkosExp_Cuda_View.hpp b/lib/kokkos/core/src/Cuda/KokkosExp_Cuda_View.hpp
index 37c5e53e5..4ed7d8e2a 100644
--- a/lib/kokkos/core/src/Cuda/KokkosExp_Cuda_View.hpp
+++ b/lib/kokkos/core/src/Cuda/KokkosExp_Cuda_View.hpp
@@ -1,283 +1,334 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_EXPERIMENTAL_CUDA_VIEW_HPP
#define KOKKOS_EXPERIMENTAL_CUDA_VIEW_HPP
/* only compile this file if CUDA is enabled for Kokkos */
#if defined( KOKKOS_HAVE_CUDA )
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Experimental {
namespace Impl {
+template<>
+struct ViewOperatorBoundsErrorAbort< Kokkos::CudaSpace > {
+ KOKKOS_INLINE_FUNCTION
+ static void apply( const size_t rank
+ , const size_t n0 , const size_t n1
+ , const size_t n2 , const size_t n3
+ , const size_t n4 , const size_t n5
+ , const size_t n6 , const size_t n7
+ , const size_t i0 , const size_t i1
+ , const size_t i2 , const size_t i3
+ , const size_t i4 , const size_t i5
+ , const size_t i6 , const size_t i7 )
+ {
+ const int r =
+ ( n0 <= i0 ? 0 :
+ ( n1 <= i1 ? 1 :
+ ( n2 <= i2 ? 2 :
+ ( n3 <= i3 ? 3 :
+ ( n4 <= i4 ? 4 :
+ ( n5 <= i5 ? 5 :
+ ( n6 <= i6 ? 6 : 7 )))))));
+ const size_t n =
+ ( n0 <= i0 ? n0 :
+ ( n1 <= i1 ? n1 :
+ ( n2 <= i2 ? n2 :
+ ( n3 <= i3 ? n3 :
+ ( n4 <= i4 ? n4 :
+ ( n5 <= i5 ? n5 :
+ ( n6 <= i6 ? n6 : n7 )))))));
+ const size_t i =
+ ( n0 <= i0 ? i0 :
+ ( n1 <= i1 ? i1 :
+ ( n2 <= i2 ? i2 :
+ ( n3 <= i3 ? i3 :
+ ( n4 <= i4 ? i4 :
+ ( n5 <= i5 ? i5 :
+ ( n6 <= i6 ? i6 : i7 )))))));
+ printf("Cuda view array bounds error index %d : FAILED %lu < %lu\n" , r , i , n );
+ Kokkos::Impl::cuda_abort("Cuda view array bounds error");
+ }
+};
+
+} // namespace Impl
+} // namespace Experimental
+} // namespace Kokkos
+
+//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
+
+namespace Kokkos {
+namespace Experimental {
+namespace Impl {
+
// Cuda Texture fetches can be performed for 4, 8 and 16 byte objects (int,int2,int4)
// Via reinterpret_case this can be used to support all scalar types of those sizes.
// Any other scalar type falls back to either normal reads out of global memory,
// or using the __ldg intrinsic on Kepler GPUs or newer (Compute Capability >= 3.0)
template< typename ValueType , typename AliasType >
struct CudaTextureFetch {
::cudaTextureObject_t m_obj ;
const ValueType * m_ptr ;
int m_offset ;
// Deference operator pulls through texture object and returns by value
template< typename iType >
KOKKOS_INLINE_FUNCTION
ValueType operator[]( const iType & i ) const
{
#if defined( __CUDA_ARCH__ ) && ( 300 <= __CUDA_ARCH__ )
AliasType v = tex1Dfetch<AliasType>( m_obj , i + m_offset );
return *(reinterpret_cast<ValueType*> (&v));
#else
return m_ptr[ i ];
#endif
}
// Pointer to referenced memory
KOKKOS_INLINE_FUNCTION
operator const ValueType * () const { return m_ptr ; }
KOKKOS_INLINE_FUNCTION
CudaTextureFetch() : m_obj() , m_ptr() , m_offset() {}
KOKKOS_INLINE_FUNCTION
~CudaTextureFetch() {}
KOKKOS_INLINE_FUNCTION
CudaTextureFetch( const CudaTextureFetch & rhs )
: m_obj( rhs.m_obj )
, m_ptr( rhs.m_ptr )
, m_offset( rhs.m_offset )
{}
KOKKOS_INLINE_FUNCTION
CudaTextureFetch( CudaTextureFetch && rhs )
: m_obj( rhs.m_obj )
, m_ptr( rhs.m_ptr )
, m_offset( rhs.m_offset )
{}
KOKKOS_INLINE_FUNCTION
CudaTextureFetch & operator = ( const CudaTextureFetch & rhs )
{
m_obj = rhs.m_obj ;
m_ptr = rhs.m_ptr ;
m_offset = rhs.m_offset ;
return *this ;
}
KOKKOS_INLINE_FUNCTION
CudaTextureFetch & operator = ( CudaTextureFetch && rhs )
{
m_obj = rhs.m_obj ;
m_ptr = rhs.m_ptr ;
m_offset = rhs.m_offset ;
return *this ;
}
// Texture object spans the entire allocation.
// This handle may view a subset of the allocation, so an offset is required.
template< class CudaMemorySpace >
inline explicit
CudaTextureFetch( const ValueType * const arg_ptr
, Kokkos::Experimental::Impl::SharedAllocationRecord< CudaMemorySpace , void > & record
)
- // 'attach_texture_object' returns 0 when __CUDA_ARCH__ < 300
: m_obj( record.template attach_texture_object< AliasType >() )
, m_ptr( arg_ptr )
, m_offset( record.attach_texture_object_offset( reinterpret_cast<const AliasType*>( arg_ptr ) ) )
{}
};
#if defined( KOKKOS_CUDA_USE_LDG_INTRINSIC )
template< typename ValueType , typename AliasType >
struct CudaLDGFetch {
const ValueType * m_ptr ;
template< typename iType >
KOKKOS_INLINE_FUNCTION
ValueType operator[]( const iType & i ) const
{
AliasType v = __ldg(reinterpret_cast<AliasType*>(&m_ptr[i]));
return *(reinterpret_cast<ValueType*> (&v));
}
KOKKOS_INLINE_FUNCTION
operator const ValueType * () const { return m_ptr ; }
KOKKOS_INLINE_FUNCTION
CudaLDGFetch() : m_ptr() {}
KOKKOS_INLINE_FUNCTION
~CudaLDGFetch() {}
KOKKOS_INLINE_FUNCTION
CudaLDGFetch( const CudaLDGFetch & rhs )
: m_ptr( rhs.m_ptr )
{}
KOKKOS_INLINE_FUNCTION
CudaLDGFetch( CudaLDGFetch && rhs )
: m_ptr( rhs.m_ptr )
{}
KOKKOS_INLINE_FUNCTION
CudaLDGFetch & operator = ( const CudaLDGFetch & rhs )
{
m_ptr = rhs.m_ptr ;
return *this ;
}
KOKKOS_INLINE_FUNCTION
CudaLDGFetch & operator = ( CudaLDGFetch && rhs )
{
m_ptr = rhs.m_ptr ;
return *this ;
}
template< class CudaMemorySpace >
inline explicit
CudaTextureFetch( const ValueType * const arg_ptr
, Kokkos::Experimental::Impl::SharedAllocationRecord< CudaMemorySpace , void > const &
)
: m_ptr( arg_data_ptr )
{}
};
#endif
} // namespace Impl
} // namespace Experimental
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Experimental {
namespace Impl {
/** \brief Replace Default ViewDataHandle with Cuda texture fetch specialization
* if 'const' value type, CudaSpace and random access.
*/
template< class Traits >
class ViewDataHandle< Traits ,
typename std::enable_if<(
// Is Cuda memory space
( std::is_same< typename Traits::memory_space,Kokkos::CudaSpace>::value ||
std::is_same< typename Traits::memory_space,Kokkos::CudaUVMSpace>::value )
&&
// Is a trivial const value of 4, 8, or 16 bytes
std::is_trivial<typename Traits::const_value_type>::value
&&
std::is_same<typename Traits::const_value_type,typename Traits::value_type>::value
&&
( sizeof(typename Traits::const_value_type) == 4 ||
sizeof(typename Traits::const_value_type) == 8 ||
sizeof(typename Traits::const_value_type) == 16 )
&&
// Random access trait
( Traits::memory_traits::RandomAccess != 0 )
)>::type >
{
public:
using track_type = Kokkos::Experimental::Impl::SharedAllocationTracker ;
using value_type = typename Traits::const_value_type ;
using return_type = typename Traits::const_value_type ; // NOT a reference
using alias_type = typename std::conditional< ( sizeof(value_type) == 4 ) , int ,
typename std::conditional< ( sizeof(value_type) == 8 ) , ::int2 ,
typename std::conditional< ( sizeof(value_type) == 16 ) , ::int4 , void
>::type
>::type
>::type ;
#if defined( KOKKOS_CUDA_USE_LDG_INTRINSIC )
using handle_type = Kokkos::Experimental::Impl::CudaLDGFetch< value_type , alias_type > ;
#else
using handle_type = Kokkos::Experimental::Impl::CudaTextureFetch< value_type , alias_type > ;
#endif
KOKKOS_INLINE_FUNCTION
static handle_type const & assign( handle_type const & arg_handle , track_type const & /* arg_tracker */ )
{
return arg_handle ;
}
KOKKOS_INLINE_FUNCTION
static handle_type assign( value_type * arg_data_ptr, track_type const & arg_tracker )
{
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
// Assignment of texture = non-texture requires creation of a texture object
// which can only occur on the host. In addition, 'get_record' is only valid
// if called in a host execution space
return handle_type( arg_data_ptr , arg_tracker.template get_record< typename Traits::memory_space >() );
#else
Kokkos::Impl::cuda_abort("Cannot create Cuda texture object from within a Cuda kernel");
return handle_type();
#endif
}
};
}
}
}
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#endif /* #if defined( KOKKOS_HAVE_CUDA ) */
#endif /* #ifndef KOKKOS_CUDA_VIEW_HPP */
diff --git a/lib/kokkos/core/src/Cuda/Kokkos_CudaExec.hpp b/lib/kokkos/core/src/Cuda/Kokkos_CudaExec.hpp
index c1b2d51c4..ca0399016 100644
--- a/lib/kokkos/core/src/Cuda/Kokkos_CudaExec.hpp
+++ b/lib/kokkos/core/src/Cuda/Kokkos_CudaExec.hpp
@@ -1,277 +1,277 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_CUDAEXEC_HPP
#define KOKKOS_CUDAEXEC_HPP
#include <Kokkos_Macros.hpp>
/* only compile this file if CUDA is enabled for Kokkos */
#ifdef KOKKOS_HAVE_CUDA
#include <string>
#include <Kokkos_Parallel.hpp>
#include <impl/Kokkos_Error.hpp>
#include <Cuda/Kokkos_Cuda_abort.hpp>
#include <Cuda/Kokkos_Cuda_Error.hpp>
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
struct CudaTraits {
enum { WarpSize = 32 /* 0x0020 */ };
enum { WarpIndexMask = 0x001f /* Mask for warpindex */ };
enum { WarpIndexShift = 5 /* WarpSize == 1 << WarpShift */ };
enum { SharedMemoryBanks = 32 /* Compute device 2.0 */ };
enum { SharedMemoryCapacity = 0x0C000 /* 48k shared / 16k L1 Cache */ };
enum { SharedMemoryUsage = 0x04000 /* 16k shared / 48k L1 Cache */ };
enum { UpperBoundGridCount = 65535 /* Hard upper bound */ };
enum { ConstantMemoryCapacity = 0x010000 /* 64k bytes */ };
enum { ConstantMemoryUsage = 0x008000 /* 32k bytes */ };
enum { ConstantMemoryCache = 0x002000 /* 8k bytes */ };
typedef unsigned long
ConstantGlobalBufferType[ ConstantMemoryUsage / sizeof(unsigned long) ];
enum { ConstantMemoryUseThreshold = 0x000200 /* 512 bytes */ };
KOKKOS_INLINE_FUNCTION static
CudaSpace::size_type warp_count( CudaSpace::size_type i )
{ return ( i + WarpIndexMask ) >> WarpIndexShift ; }
KOKKOS_INLINE_FUNCTION static
CudaSpace::size_type warp_align( CudaSpace::size_type i )
{
enum { Mask = ~CudaSpace::size_type( WarpIndexMask ) };
return ( i + WarpIndexMask ) & Mask ;
}
};
//----------------------------------------------------------------------------
CudaSpace::size_type cuda_internal_maximum_warp_count();
CudaSpace::size_type cuda_internal_maximum_grid_count();
CudaSpace::size_type cuda_internal_maximum_shared_words();
CudaSpace::size_type * cuda_internal_scratch_flags( const CudaSpace::size_type size );
CudaSpace::size_type * cuda_internal_scratch_space( const CudaSpace::size_type size );
CudaSpace::size_type * cuda_internal_scratch_unified( const CudaSpace::size_type size );
} // namespace Impl
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#if defined( __CUDACC__ )
/** \brief Access to constant memory on the device */
#ifdef KOKKOS_CUDA_USE_RELOCATABLE_DEVICE_CODE
extern
#endif
__device__ __constant__
Kokkos::Impl::CudaTraits::ConstantGlobalBufferType
kokkos_impl_cuda_constant_memory_buffer ;
__device__ __constant__
int* kokkos_impl_cuda_atomic_lock_array ;
#define CUDA_SPACE_ATOMIC_MASK 0x1FFFF
#define CUDA_SPACE_ATOMIC_XOR_MASK 0x15A39
namespace Kokkos {
namespace Impl {
__device__ inline
bool lock_address_cuda_space(void* ptr) {
size_t offset = size_t(ptr);
offset = offset >> 2;
offset = offset & CUDA_SPACE_ATOMIC_MASK;
//offset = offset xor CUDA_SPACE_ATOMIC_XOR_MASK;
return (0 == atomicCAS(&kokkos_impl_cuda_atomic_lock_array[offset],0,1));
}
__device__ inline
void unlock_address_cuda_space(void* ptr) {
size_t offset = size_t(ptr);
offset = offset >> 2;
offset = offset & CUDA_SPACE_ATOMIC_MASK;
//offset = offset xor CUDA_SPACE_ATOMIC_XOR_MASK;
atomicExch( &kokkos_impl_cuda_atomic_lock_array[ offset ], 0);
}
}
}
template< typename T >
inline
__device__
T * kokkos_impl_cuda_shared_memory()
{ extern __shared__ Kokkos::CudaSpace::size_type sh[]; return (T*) sh ; }
namespace Kokkos {
namespace Impl {
//----------------------------------------------------------------------------
// See section B.17 of Cuda C Programming Guide Version 3.2
// for discussion of
// __launch_bounds__(maxThreadsPerBlock,minBlocksPerMultiprocessor)
// function qualifier which could be used to improve performance.
//----------------------------------------------------------------------------
// Maximize L1 cache and minimize shared memory:
// cudaFuncSetCacheConfig(MyKernel, cudaFuncCachePreferL1 );
// For 2.0 capability: 48 KB L1 and 16 KB shared
//----------------------------------------------------------------------------
template< class DriverType >
__global__
static void cuda_parallel_launch_constant_memory()
{
const DriverType & driver =
*((const DriverType *) kokkos_impl_cuda_constant_memory_buffer );
driver();
}
template< class DriverType >
__global__
static void cuda_parallel_launch_local_memory( const DriverType driver )
{
driver();
}
template < class DriverType ,
bool Large = ( CudaTraits::ConstantMemoryUseThreshold < sizeof(DriverType) ) >
struct CudaParallelLaunch ;
template < class DriverType >
struct CudaParallelLaunch< DriverType , true > {
inline
CudaParallelLaunch( const DriverType & driver
, const dim3 & grid
, const dim3 & block
, const int shmem
, const cudaStream_t stream = 0 )
{
if ( grid.x && ( block.x * block.y * block.z ) ) {
if ( sizeof( Kokkos::Impl::CudaTraits::ConstantGlobalBufferType ) <
sizeof( DriverType ) ) {
Kokkos::Impl::throw_runtime_exception( std::string("CudaParallelLaunch FAILED: Functor is too large") );
}
if ( CudaTraits::SharedMemoryCapacity < shmem ) {
Kokkos::Impl::throw_runtime_exception( std::string("CudaParallelLaunch FAILED: shared memory request is too large") );
}
else if ( shmem ) {
- cudaFuncSetCacheConfig( cuda_parallel_launch_constant_memory< DriverType > , cudaFuncCachePreferShared );
+ CUDA_SAFE_CALL( cudaFuncSetCacheConfig( cuda_parallel_launch_constant_memory< DriverType > , cudaFuncCachePreferShared ) );
} else {
- cudaFuncSetCacheConfig( cuda_parallel_launch_constant_memory< DriverType > , cudaFuncCachePreferL1 );
+ CUDA_SAFE_CALL( cudaFuncSetCacheConfig( cuda_parallel_launch_constant_memory< DriverType > , cudaFuncCachePreferL1 ) );
}
// Copy functor to constant memory on the device
cudaMemcpyToSymbol( kokkos_impl_cuda_constant_memory_buffer , & driver , sizeof(DriverType) );
int* lock_array_ptr = lock_array_cuda_space_ptr();
cudaMemcpyToSymbol( kokkos_impl_cuda_atomic_lock_array , & lock_array_ptr , sizeof(int*) );
// Invoke the driver function on the device
cuda_parallel_launch_constant_memory< DriverType ><<< grid , block , shmem , stream >>>();
#if defined( KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK )
Kokkos::Cuda::fence();
CUDA_SAFE_CALL( cudaGetLastError() );
#endif
}
}
};
template < class DriverType >
struct CudaParallelLaunch< DriverType , false > {
inline
CudaParallelLaunch( const DriverType & driver
, const dim3 & grid
, const dim3 & block
, const int shmem
, const cudaStream_t stream = 0 )
{
if ( grid.x && ( block.x * block.y * block.z ) ) {
if ( CudaTraits::SharedMemoryCapacity < shmem ) {
Kokkos::Impl::throw_runtime_exception( std::string("CudaParallelLaunch FAILED: shared memory request is too large") );
}
else if ( shmem ) {
- cudaFuncSetCacheConfig( cuda_parallel_launch_local_memory< DriverType > , cudaFuncCachePreferShared );
+ CUDA_SAFE_CALL( cudaFuncSetCacheConfig( cuda_parallel_launch_local_memory< DriverType > , cudaFuncCachePreferShared ) );
} else {
- cudaFuncSetCacheConfig( cuda_parallel_launch_local_memory< DriverType > , cudaFuncCachePreferL1 );
+ CUDA_SAFE_CALL( cudaFuncSetCacheConfig( cuda_parallel_launch_local_memory< DriverType > , cudaFuncCachePreferL1 ) );
}
int* lock_array_ptr = lock_array_cuda_space_ptr();
cudaMemcpyToSymbol( kokkos_impl_cuda_atomic_lock_array , & lock_array_ptr , sizeof(int*) );
cuda_parallel_launch_local_memory< DriverType ><<< grid , block , shmem , stream >>>( driver );
#if defined( KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK )
Kokkos::Cuda::fence();
CUDA_SAFE_CALL( cudaGetLastError() );
#endif
}
}
};
//----------------------------------------------------------------------------
} // namespace Impl
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#endif /* defined( __CUDACC__ ) */
#endif /* defined( KOKKOS_HAVE_CUDA ) */
#endif /* #ifndef KOKKOS_CUDAEXEC_HPP */
diff --git a/lib/kokkos/core/src/Cuda/Kokkos_CudaSpace.cpp b/lib/kokkos/core/src/Cuda/Kokkos_CudaSpace.cpp
index 13316cb63..829ad03a4 100644
--- a/lib/kokkos/core/src/Cuda/Kokkos_CudaSpace.cpp
+++ b/lib/kokkos/core/src/Cuda/Kokkos_CudaSpace.cpp
@@ -1,686 +1,860 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <stdlib.h>
#include <iostream>
#include <sstream>
#include <stdexcept>
+#include <algorithm>
#include <Kokkos_Macros.hpp>
/* only compile this file if CUDA is enabled for Kokkos */
#ifdef KOKKOS_HAVE_CUDA
#include <Kokkos_Cuda.hpp>
#include <Kokkos_CudaSpace.hpp>
#include <Cuda/Kokkos_Cuda_BasicAllocators.hpp>
#include <Cuda/Kokkos_Cuda_Internal.hpp>
#include <impl/Kokkos_Error.hpp>
/*--------------------------------------------------------------------------*/
/*--------------------------------------------------------------------------*/
namespace Kokkos {
namespace Impl {
namespace {
cudaStream_t get_deep_copy_stream() {
static cudaStream_t s = 0;
if( s == 0) {
cudaStreamCreate ( &s );
}
return s;
}
}
DeepCopy<CudaSpace,CudaSpace,Cuda>::DeepCopy( void * dst , const void * src , size_t n )
{ CUDA_SAFE_CALL( cudaMemcpy( dst , src , n , cudaMemcpyDefault ) ); }
DeepCopy<HostSpace,CudaSpace,Cuda>::DeepCopy( void * dst , const void * src , size_t n )
{ CUDA_SAFE_CALL( cudaMemcpy( dst , src , n , cudaMemcpyDefault ) ); }
DeepCopy<CudaSpace,HostSpace,Cuda>::DeepCopy( void * dst , const void * src , size_t n )
{ CUDA_SAFE_CALL( cudaMemcpy( dst , src , n , cudaMemcpyDefault ) ); }
DeepCopy<CudaSpace,CudaSpace,Cuda>::DeepCopy( const Cuda & instance , void * dst , const void * src , size_t n )
{ CUDA_SAFE_CALL( cudaMemcpyAsync( dst , src , n , cudaMemcpyDefault , instance.cuda_stream() ) ); }
DeepCopy<HostSpace,CudaSpace,Cuda>::DeepCopy( const Cuda & instance , void * dst , const void * src , size_t n )
{ CUDA_SAFE_CALL( cudaMemcpyAsync( dst , src , n , cudaMemcpyDefault , instance.cuda_stream() ) ); }
DeepCopy<CudaSpace,HostSpace,Cuda>::DeepCopy( const Cuda & instance , void * dst , const void * src , size_t n )
{ CUDA_SAFE_CALL( cudaMemcpyAsync( dst , src , n , cudaMemcpyDefault , instance.cuda_stream() ) ); }
void DeepCopyAsyncCuda( void * dst , const void * src , size_t n) {
cudaStream_t s = get_deep_copy_stream();
CUDA_SAFE_CALL( cudaMemcpyAsync( dst , src , n , cudaMemcpyDefault , s ) );
cudaStreamSynchronize(s);
}
} // namespace Impl
} // namespace Kokkos
/*--------------------------------------------------------------------------*/
/*--------------------------------------------------------------------------*/
namespace Kokkos {
+#if ! defined( KOKKOS_USING_EXPERIMENTAL_VIEW )
+
namespace {
void texture_object_attach_impl( Impl::AllocationTracker const & tracker
, unsigned type_size
, ::cudaChannelFormatDesc const & desc
)
{
enum { TEXTURE_BOUND_1D = 2u << 27 };
if ( tracker.attribute() == NULL ) {
// check for correct allocator
const bool ok_alloc = tracker.allocator()->support_texture_binding();
const bool ok_count = (tracker.alloc_size() / type_size) < TEXTURE_BOUND_1D;
if (ok_alloc && ok_count) {
Impl::TextureAttribute * attr = new Impl::TextureAttribute( tracker.alloc_ptr(), tracker.alloc_size(), desc );
tracker.set_attribute( attr );
}
else {
std::ostringstream oss;
oss << "Error: Cannot attach texture object";
if (!ok_alloc) {
oss << ", incompatabile allocator " << tracker.allocator()->name();
}
if (!ok_count) {
oss << ", array " << tracker.label() << " too large";
}
oss << ".";
Kokkos::Impl::throw_runtime_exception( oss.str() );
}
}
if ( NULL == dynamic_cast<Impl::TextureAttribute *>(tracker.attribute()) ) {
std::ostringstream oss;
oss << "Error: Allocation " << tracker.label() << " already has an attribute attached.";
Kokkos::Impl::throw_runtime_exception( oss.str() );
}
}
} // unnamed namespace
/*--------------------------------------------------------------------------*/
Impl::AllocationTracker CudaSpace::allocate_and_track( const std::string & label, const size_t size )
{
return Impl::AllocationTracker( allocator(), size, label);
}
void CudaSpace::texture_object_attach( Impl::AllocationTracker const & tracker
, unsigned type_size
, ::cudaChannelFormatDesc const & desc
)
{
texture_object_attach_impl( tracker, type_size, desc );
}
+#endif /* #if ! defined( KOKKOS_USING_EXPERIMENTAL_VIEW ) */
+
void CudaSpace::access_error()
{
const std::string msg("Kokkos::CudaSpace::access_error attempt to execute Cuda function from non-Cuda space" );
Kokkos::Impl::throw_runtime_exception( msg );
}
void CudaSpace::access_error( const void * const )
{
const std::string msg("Kokkos::CudaSpace::access_error attempt to execute Cuda function from non-Cuda space" );
Kokkos::Impl::throw_runtime_exception( msg );
}
/*--------------------------------------------------------------------------*/
+#if ! defined( KOKKOS_USING_EXPERIMENTAL_VIEW )
+
Impl::AllocationTracker CudaUVMSpace::allocate_and_track( const std::string & label, const size_t size )
{
return Impl::AllocationTracker( allocator(), size, label);
}
void CudaUVMSpace::texture_object_attach( Impl::AllocationTracker const & tracker
, unsigned type_size
, ::cudaChannelFormatDesc const & desc
)
{
texture_object_attach_impl( tracker, type_size, desc );
}
+#endif /* #if ! defined( KOKKOS_USING_EXPERIMENTAL_VIEW ) */
+
bool CudaUVMSpace::available()
{
#if defined( CUDA_VERSION ) && ( 6000 <= CUDA_VERSION ) && !defined(__APPLE__)
enum { UVM_available = true };
#else
enum { UVM_available = false };
#endif
return UVM_available;
}
/*--------------------------------------------------------------------------*/
+#if ! defined( KOKKOS_USING_EXPERIMENTAL_VIEW )
+
Impl::AllocationTracker CudaHostPinnedSpace::allocate_and_track( const std::string & label, const size_t size )
{
return Impl::AllocationTracker( allocator(), size, label);
}
+#endif /* #if ! defined( KOKKOS_USING_EXPERIMENTAL_VIEW ) */
+
} // namespace Kokkos
/*--------------------------------------------------------------------------*/
/*--------------------------------------------------------------------------*/
namespace Kokkos {
CudaSpace::CudaSpace()
: m_device( Kokkos::Cuda().cuda_device() )
{
}
CudaUVMSpace::CudaUVMSpace()
: m_device( Kokkos::Cuda().cuda_device() )
{
}
CudaHostPinnedSpace::CudaHostPinnedSpace()
{
}
void * CudaSpace::allocate( const size_t arg_alloc_size ) const
{
void * ptr = NULL;
CUDA_SAFE_CALL( cudaMalloc( &ptr, arg_alloc_size ) );
return ptr ;
}
void * CudaUVMSpace::allocate( const size_t arg_alloc_size ) const
{
void * ptr = NULL;
CUDA_SAFE_CALL( cudaMallocManaged( &ptr, arg_alloc_size , cudaMemAttachGlobal ) );
return ptr ;
}
void * CudaHostPinnedSpace::allocate( const size_t arg_alloc_size ) const
{
void * ptr = NULL;
CUDA_SAFE_CALL( cudaHostAlloc( &ptr, arg_alloc_size , cudaHostAllocDefault ) );
return ptr ;
}
void CudaSpace::deallocate( void * const arg_alloc_ptr , const size_t /* arg_alloc_size */ ) const
{
try {
CUDA_SAFE_CALL( cudaFree( arg_alloc_ptr ) );
} catch(...) {}
}
void CudaUVMSpace::deallocate( void * const arg_alloc_ptr , const size_t /* arg_alloc_size */ ) const
{
try {
CUDA_SAFE_CALL( cudaFree( arg_alloc_ptr ) );
} catch(...) {}
}
void CudaHostPinnedSpace::deallocate( void * const arg_alloc_ptr , const size_t /* arg_alloc_size */ ) const
{
try {
CUDA_SAFE_CALL( cudaFreeHost( arg_alloc_ptr ) );
} catch(...) {}
}
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Experimental {
namespace Impl {
SharedAllocationRecord< void , void >
SharedAllocationRecord< Kokkos::CudaSpace , void >::s_root_record ;
SharedAllocationRecord< void , void >
SharedAllocationRecord< Kokkos::CudaUVMSpace , void >::s_root_record ;
SharedAllocationRecord< void , void >
SharedAllocationRecord< Kokkos::CudaHostPinnedSpace , void >::s_root_record ;
::cudaTextureObject_t
SharedAllocationRecord< Kokkos::CudaSpace , void >::
attach_texture_object( const unsigned sizeof_alias
, void * const alloc_ptr
, size_t const alloc_size )
{
- // Only valid for 300 <= __CUDA_ARCH__
- // otherwise return zero.
+ enum { TEXTURE_BOUND_1D = 1u << 27 };
+
+ if ( ( alloc_ptr == 0 ) || ( sizeof_alias * TEXTURE_BOUND_1D <= alloc_size ) ) {
+ std::ostringstream msg ;
+ msg << "Kokkos::CudaSpace ERROR: Cannot attach texture object to"
+ << " alloc_ptr(" << alloc_ptr << ")"
+ << " alloc_size(" << alloc_size << ")"
+ << " max_size(" << ( sizeof_alias * TEXTURE_BOUND_1D ) << ")" ;
+ std::cerr << msg.str() << std::endl ;
+ std::cerr.flush();
+ Kokkos::Impl::throw_runtime_exception( msg.str() );
+ }
::cudaTextureObject_t tex_obj ;
struct cudaResourceDesc resDesc ;
struct cudaTextureDesc texDesc ;
memset( & resDesc , 0 , sizeof(resDesc) );
memset( & texDesc , 0 , sizeof(texDesc) );
resDesc.resType = cudaResourceTypeLinear ;
resDesc.res.linear.desc = ( sizeof_alias == 4 ? cudaCreateChannelDesc< int >() :
( sizeof_alias == 8 ? cudaCreateChannelDesc< ::int2 >() :
/* sizeof_alias == 16 */ cudaCreateChannelDesc< ::int4 >() ) );
resDesc.res.linear.sizeInBytes = alloc_size ;
resDesc.res.linear.devPtr = alloc_ptr ;
CUDA_SAFE_CALL( cudaCreateTextureObject( & tex_obj , & resDesc, & texDesc, NULL ) );
return tex_obj ;
}
std::string
SharedAllocationRecord< Kokkos::CudaSpace , void >::get_label() const
{
SharedAllocationHeader header ;
Kokkos::Impl::DeepCopy< Kokkos::HostSpace , Kokkos::CudaSpace >( & header , RecordBase::head() , sizeof(SharedAllocationHeader) );
return std::string( header.m_label );
}
std::string
SharedAllocationRecord< Kokkos::CudaUVMSpace , void >::get_label() const
{
return std::string( RecordBase::head()->m_label );
}
std::string
SharedAllocationRecord< Kokkos::CudaHostPinnedSpace , void >::get_label() const
{
return std::string( RecordBase::head()->m_label );
}
SharedAllocationRecord< Kokkos::CudaSpace , void > *
SharedAllocationRecord< Kokkos::CudaSpace , void >::
allocate( const Kokkos::CudaSpace & arg_space
, const std::string & arg_label
, const size_t arg_alloc_size
)
{
return new SharedAllocationRecord( arg_space , arg_label , arg_alloc_size );
}
SharedAllocationRecord< Kokkos::CudaUVMSpace , void > *
SharedAllocationRecord< Kokkos::CudaUVMSpace , void >::
allocate( const Kokkos::CudaUVMSpace & arg_space
, const std::string & arg_label
, const size_t arg_alloc_size
)
{
return new SharedAllocationRecord( arg_space , arg_label , arg_alloc_size );
}
SharedAllocationRecord< Kokkos::CudaHostPinnedSpace , void > *
SharedAllocationRecord< Kokkos::CudaHostPinnedSpace , void >::
allocate( const Kokkos::CudaHostPinnedSpace & arg_space
, const std::string & arg_label
, const size_t arg_alloc_size
)
{
return new SharedAllocationRecord( arg_space , arg_label , arg_alloc_size );
}
void
SharedAllocationRecord< Kokkos::CudaSpace , void >::
deallocate( SharedAllocationRecord< void , void > * arg_rec )
{
delete static_cast<SharedAllocationRecord*>(arg_rec);
}
void
SharedAllocationRecord< Kokkos::CudaUVMSpace , void >::
deallocate( SharedAllocationRecord< void , void > * arg_rec )
{
delete static_cast<SharedAllocationRecord*>(arg_rec);
}
void
SharedAllocationRecord< Kokkos::CudaHostPinnedSpace , void >::
deallocate( SharedAllocationRecord< void , void > * arg_rec )
{
delete static_cast<SharedAllocationRecord*>(arg_rec);
}
SharedAllocationRecord< Kokkos::CudaSpace , void >::
~SharedAllocationRecord()
{
m_space.deallocate( SharedAllocationRecord< void , void >::m_alloc_ptr
, SharedAllocationRecord< void , void >::m_alloc_size
);
}
SharedAllocationRecord< Kokkos::CudaUVMSpace , void >::
~SharedAllocationRecord()
{
m_space.deallocate( SharedAllocationRecord< void , void >::m_alloc_ptr
, SharedAllocationRecord< void , void >::m_alloc_size
);
}
SharedAllocationRecord< Kokkos::CudaHostPinnedSpace , void >::
~SharedAllocationRecord()
{
m_space.deallocate( SharedAllocationRecord< void , void >::m_alloc_ptr
, SharedAllocationRecord< void , void >::m_alloc_size
);
}
SharedAllocationRecord< Kokkos::CudaSpace , void >::
SharedAllocationRecord( const Kokkos::CudaSpace & arg_space
, const std::string & arg_label
, const size_t arg_alloc_size
, const SharedAllocationRecord< void , void >::function_type arg_dealloc
)
// Pass through allocated [ SharedAllocationHeader , user_memory ]
// Pass through deallocation function
: SharedAllocationRecord< void , void >
( & SharedAllocationRecord< Kokkos::CudaSpace , void >::s_root_record
, reinterpret_cast<SharedAllocationHeader*>( arg_space.allocate( sizeof(SharedAllocationHeader) + arg_alloc_size ) )
, sizeof(SharedAllocationHeader) + arg_alloc_size
, arg_dealloc
)
, m_tex_obj( 0 )
, m_space( arg_space )
{
SharedAllocationHeader header ;
// Fill in the Header information
header.m_record = static_cast< SharedAllocationRecord< void , void > * >( this );
strncpy( header.m_label
, arg_label.c_str()
, SharedAllocationHeader::maximum_label_length
);
// Copy to device memory
Kokkos::Impl::DeepCopy<CudaSpace,HostSpace>::DeepCopy( RecordBase::m_alloc_ptr , & header , sizeof(SharedAllocationHeader) );
}
SharedAllocationRecord< Kokkos::CudaUVMSpace , void >::
SharedAllocationRecord( const Kokkos::CudaUVMSpace & arg_space
, const std::string & arg_label
, const size_t arg_alloc_size
, const SharedAllocationRecord< void , void >::function_type arg_dealloc
)
// Pass through allocated [ SharedAllocationHeader , user_memory ]
// Pass through deallocation function
: SharedAllocationRecord< void , void >
( & SharedAllocationRecord< Kokkos::CudaUVMSpace , void >::s_root_record
, reinterpret_cast<SharedAllocationHeader*>( arg_space.allocate( sizeof(SharedAllocationHeader) + arg_alloc_size ) )
, sizeof(SharedAllocationHeader) + arg_alloc_size
, arg_dealloc
)
, m_tex_obj( 0 )
, m_space( arg_space )
{
// Fill in the Header information, directly accessible via UVM
RecordBase::m_alloc_ptr->m_record = this ;
strncpy( RecordBase::m_alloc_ptr->m_label
, arg_label.c_str()
, SharedAllocationHeader::maximum_label_length
);
}
SharedAllocationRecord< Kokkos::CudaHostPinnedSpace , void >::
SharedAllocationRecord( const Kokkos::CudaHostPinnedSpace & arg_space
, const std::string & arg_label
, const size_t arg_alloc_size
, const SharedAllocationRecord< void , void >::function_type arg_dealloc
)
// Pass through allocated [ SharedAllocationHeader , user_memory ]
// Pass through deallocation function
: SharedAllocationRecord< void , void >
( & SharedAllocationRecord< Kokkos::CudaHostPinnedSpace , void >::s_root_record
, reinterpret_cast<SharedAllocationHeader*>( arg_space.allocate( sizeof(SharedAllocationHeader) + arg_alloc_size ) )
, sizeof(SharedAllocationHeader) + arg_alloc_size
, arg_dealloc
)
, m_space( arg_space )
{
// Fill in the Header information, directly accessible via UVM
RecordBase::m_alloc_ptr->m_record = this ;
strncpy( RecordBase::m_alloc_ptr->m_label
, arg_label.c_str()
, SharedAllocationHeader::maximum_label_length
);
}
+//----------------------------------------------------------------------------
+
+void * SharedAllocationRecord< Kokkos::CudaSpace , void >::
+allocate_tracked( const Kokkos::CudaSpace & arg_space
+ , const std::string & arg_alloc_label
+ , const size_t arg_alloc_size )
+{
+ if ( ! arg_alloc_size ) return (void *) 0 ;
+
+ SharedAllocationRecord * const r =
+ allocate( arg_space , arg_alloc_label , arg_alloc_size );
+
+ RecordBase::increment( r );
+
+ return r->data();
+}
+
+void SharedAllocationRecord< Kokkos::CudaSpace , void >::
+deallocate_tracked( void * const arg_alloc_ptr )
+{
+ if ( arg_alloc_ptr != 0 ) {
+ SharedAllocationRecord * const r = get_record( arg_alloc_ptr );
+
+ RecordBase::decrement( r );
+ }
+}
+
+void * SharedAllocationRecord< Kokkos::CudaSpace , void >::
+reallocate_tracked( void * const arg_alloc_ptr
+ , const size_t arg_alloc_size )
+{
+ SharedAllocationRecord * const r_old = get_record( arg_alloc_ptr );
+ SharedAllocationRecord * const r_new = allocate( r_old->m_space , r_old->get_label() , arg_alloc_size );
+
+ Kokkos::Impl::DeepCopy<CudaSpace,CudaSpace>( r_new->data() , r_old->data()
+ , std::min( r_old->size() , r_new->size() ) );
+
+ RecordBase::increment( r_new );
+ RecordBase::decrement( r_old );
+
+ return r_new->data();
+}
+
+void * SharedAllocationRecord< Kokkos::CudaUVMSpace , void >::
+allocate_tracked( const Kokkos::CudaUVMSpace & arg_space
+ , const std::string & arg_alloc_label
+ , const size_t arg_alloc_size )
+{
+ if ( ! arg_alloc_size ) return (void *) 0 ;
+
+ SharedAllocationRecord * const r =
+ allocate( arg_space , arg_alloc_label , arg_alloc_size );
+
+ RecordBase::increment( r );
+
+ return r->data();
+}
+
+void SharedAllocationRecord< Kokkos::CudaUVMSpace , void >::
+deallocate_tracked( void * const arg_alloc_ptr )
+{
+ if ( arg_alloc_ptr != 0 ) {
+ SharedAllocationRecord * const r = get_record( arg_alloc_ptr );
+
+ RecordBase::decrement( r );
+ }
+}
+
+void * SharedAllocationRecord< Kokkos::CudaUVMSpace , void >::
+reallocate_tracked( void * const arg_alloc_ptr
+ , const size_t arg_alloc_size )
+{
+ SharedAllocationRecord * const r_old = get_record( arg_alloc_ptr );
+ SharedAllocationRecord * const r_new = allocate( r_old->m_space , r_old->get_label() , arg_alloc_size );
+
+ Kokkos::Impl::DeepCopy<CudaUVMSpace,CudaUVMSpace>( r_new->data() , r_old->data()
+ , std::min( r_old->size() , r_new->size() ) );
+
+ RecordBase::increment( r_new );
+ RecordBase::decrement( r_old );
+
+ return r_new->data();
+}
+
+void * SharedAllocationRecord< Kokkos::CudaHostPinnedSpace , void >::
+allocate_tracked( const Kokkos::CudaHostPinnedSpace & arg_space
+ , const std::string & arg_alloc_label
+ , const size_t arg_alloc_size )
+{
+ if ( ! arg_alloc_size ) return (void *) 0 ;
+
+ SharedAllocationRecord * const r =
+ allocate( arg_space , arg_alloc_label , arg_alloc_size );
+
+ RecordBase::increment( r );
+
+ return r->data();
+}
+
+void SharedAllocationRecord< Kokkos::CudaHostPinnedSpace , void >::
+deallocate_tracked( void * const arg_alloc_ptr )
+{
+ if ( arg_alloc_ptr != 0 ) {
+ SharedAllocationRecord * const r = get_record( arg_alloc_ptr );
+
+ RecordBase::decrement( r );
+ }
+}
+
+void * SharedAllocationRecord< Kokkos::CudaHostPinnedSpace , void >::
+reallocate_tracked( void * const arg_alloc_ptr
+ , const size_t arg_alloc_size )
+{
+ SharedAllocationRecord * const r_old = get_record( arg_alloc_ptr );
+ SharedAllocationRecord * const r_new = allocate( r_old->m_space , r_old->get_label() , arg_alloc_size );
+
+ Kokkos::Impl::DeepCopy<CudaHostPinnedSpace,CudaHostPinnedSpace>( r_new->data() , r_old->data()
+ , std::min( r_old->size() , r_new->size() ) );
+
+ RecordBase::increment( r_new );
+ RecordBase::decrement( r_old );
+
+ return r_new->data();
+}
+
+//----------------------------------------------------------------------------
+
SharedAllocationRecord< Kokkos::CudaSpace , void > *
SharedAllocationRecord< Kokkos::CudaSpace , void >::get_record( void * alloc_ptr )
{
using Header = SharedAllocationHeader ;
using RecordBase = SharedAllocationRecord< void , void > ;
using RecordCuda = SharedAllocationRecord< Kokkos::CudaSpace , void > ;
#if 0
// Copy the header from the allocation
- SharedAllocationHeader head ;
+ Header head ;
- SharedAllocationHeader const * const head_cuda = Header::get_header( alloc_ptr );
+ Header const * const head_cuda = alloc_ptr ? Header::get_header( alloc_ptr ) : (Header*) 0 ;
- Kokkos::Impl::DeepCopy<HostSpace,CudaSpace>::DeepCopy( & head , head_cuda , sizeof(SharedAllocationHeader) );
+ if ( alloc_ptr ) {
+ Kokkos::Impl::DeepCopy<HostSpace,CudaSpace>::DeepCopy( & head , head_cuda , sizeof(SharedAllocationHeader) );
+ }
- RecordCuda * const record = static_cast< RecordCuda * >( head.m_record );
+ RecordCuda * const record = alloc_ptr ? static_cast< RecordCuda * >( head.m_record ) : (RecordCuda *) 0 ;
- if ( record->m_alloc_ptr != head_cuda ) {
+ if ( ! alloc_ptr || record->m_alloc_ptr != head_cuda ) {
Kokkos::Impl::throw_runtime_exception( std::string("Kokkos::Experimental::Impl::SharedAllocationRecord< Kokkos::CudaSpace , void >::get_record ERROR" ) );
}
#else
// Iterate the list to search for the record among all allocations
// requires obtaining the root of the list and then locking the list.
RecordCuda * const record = static_cast< RecordCuda * >( RecordBase::find( & s_root_record , alloc_ptr ) );
if ( record == 0 ) {
Kokkos::Impl::throw_runtime_exception( std::string("Kokkos::Experimental::Impl::SharedAllocationRecord< Kokkos::CudaSpace , void >::get_record ERROR" ) );
}
#endif
return record ;
}
SharedAllocationRecord< Kokkos::CudaUVMSpace , void > *
SharedAllocationRecord< Kokkos::CudaUVMSpace , void >::get_record( void * alloc_ptr )
{
using Header = SharedAllocationHeader ;
using RecordCuda = SharedAllocationRecord< Kokkos::CudaUVMSpace , void > ;
- Header * const h = reinterpret_cast< Header * >( alloc_ptr ) - 1 ;
+ Header * const h = alloc_ptr ? reinterpret_cast< Header * >( alloc_ptr ) - 1 : (Header *) 0 ;
- if ( h->m_record->m_alloc_ptr != h ) {
+ if ( ! alloc_ptr || h->m_record->m_alloc_ptr != h ) {
Kokkos::Impl::throw_runtime_exception( std::string("Kokkos::Experimental::Impl::SharedAllocationRecord< Kokkos::CudaUVMSpace , void >::get_record ERROR" ) );
}
return static_cast< RecordCuda * >( h->m_record );
}
SharedAllocationRecord< Kokkos::CudaHostPinnedSpace , void > *
SharedAllocationRecord< Kokkos::CudaHostPinnedSpace , void >::get_record( void * alloc_ptr )
{
using Header = SharedAllocationHeader ;
using RecordCuda = SharedAllocationRecord< Kokkos::CudaHostPinnedSpace , void > ;
- Header * const h = reinterpret_cast< Header * >( alloc_ptr ) - 1 ;
+ Header * const h = alloc_ptr ? reinterpret_cast< Header * >( alloc_ptr ) - 1 : (Header *) 0 ;
- if ( h->m_record->m_alloc_ptr != h ) {
+ if ( ! alloc_ptr || h->m_record->m_alloc_ptr != h ) {
Kokkos::Impl::throw_runtime_exception( std::string("Kokkos::Experimental::Impl::SharedAllocationRecord< Kokkos::CudaHostPinnedSpace , void >::get_record ERROR" ) );
}
return static_cast< RecordCuda * >( h->m_record );
}
// Iterate records to print orphaned memory ...
void
SharedAllocationRecord< Kokkos::CudaSpace , void >::
print_records( std::ostream & s , const Kokkos::CudaSpace & space , bool detail )
{
SharedAllocationRecord< void , void > * r = & s_root_record ;
char buffer[256] ;
SharedAllocationHeader head ;
if ( detail ) {
do {
if ( r->m_alloc_ptr ) {
Kokkos::Impl::DeepCopy<HostSpace,CudaSpace>::DeepCopy( & head , r->m_alloc_ptr , sizeof(SharedAllocationHeader) );
}
else {
head.m_label[0] = 0 ;
}
- snprintf( buffer , 256 , "Cuda addr( 0x%.12lx ) list( 0x%.12lx 0x%.12lx ) extent[ 0x%.12lx + %.8ld ] count(%d) dealloc(0x%.12lx) %s\n"
- , reinterpret_cast<unsigned long>( r )
- , reinterpret_cast<unsigned long>( r->m_prev )
- , reinterpret_cast<unsigned long>( r->m_next )
- , reinterpret_cast<unsigned long>( r->m_alloc_ptr )
+ //Formatting dependent on sizeof(uintptr_t)
+ const char * format_string;
+
+ if (sizeof(uintptr_t) == sizeof(unsigned long)) {
+ format_string = "Cuda addr( 0x%.12lx ) list( 0x%.12lx 0x%.12lx ) extent[ 0x%.12lx + %.8ld ] count(%d) dealloc(0x%.12lx) %s\n";
+ }
+ else if (sizeof(uintptr_t) == sizeof(unsigned long long)) {
+ format_string = "Cuda addr( 0x%.12llx ) list( 0x%.12llx 0x%.12llx ) extent[ 0x%.12llx + %.8ld ] count(%d) dealloc(0x%.12llx) %s\n";
+ }
+
+ snprintf( buffer , 256
+ , format_string
+ , reinterpret_cast<uintptr_t>( r )
+ , reinterpret_cast<uintptr_t>( r->m_prev )
+ , reinterpret_cast<uintptr_t>( r->m_next )
+ , reinterpret_cast<uintptr_t>( r->m_alloc_ptr )
, r->m_alloc_size
, r->m_count
- , reinterpret_cast<unsigned long>( r->m_dealloc )
+ , reinterpret_cast<uintptr_t>( r->m_dealloc )
, head.m_label
);
std::cout << buffer ;
r = r->m_next ;
} while ( r != & s_root_record );
}
else {
do {
if ( r->m_alloc_ptr ) {
Kokkos::Impl::DeepCopy<HostSpace,CudaSpace>::DeepCopy( & head , r->m_alloc_ptr , sizeof(SharedAllocationHeader) );
- snprintf( buffer , 256 , "Cuda [ 0x%.12lx + %ld ] %s\n"
- , reinterpret_cast< unsigned long >( r->data() )
+ //Formatting dependent on sizeof(uintptr_t)
+ const char * format_string;
+
+ if (sizeof(uintptr_t) == sizeof(unsigned long)) {
+ format_string = "Cuda [ 0x%.12lx + %ld ] %s\n";
+ }
+ else if (sizeof(uintptr_t) == sizeof(unsigned long long)) {
+ format_string = "Cuda [ 0x%.12llx + %ld ] %s\n";
+ }
+
+ snprintf( buffer , 256
+ , format_string
+ , reinterpret_cast< uintptr_t >( r->data() )
, r->size()
, head.m_label
);
}
else {
snprintf( buffer , 256 , "Cuda [ 0 + 0 ]\n" );
}
std::cout << buffer ;
r = r->m_next ;
} while ( r != & s_root_record );
}
}
void
SharedAllocationRecord< Kokkos::CudaUVMSpace , void >::
print_records( std::ostream & s , const Kokkos::CudaUVMSpace & space , bool detail )
{
SharedAllocationRecord< void , void >::print_host_accessible_records( s , "CudaUVM" , & s_root_record , detail );
}
void
SharedAllocationRecord< Kokkos::CudaHostPinnedSpace , void >::
print_records( std::ostream & s , const Kokkos::CudaHostPinnedSpace & space , bool detail )
{
SharedAllocationRecord< void , void >::print_host_accessible_records( s , "CudaHostPinned" , & s_root_record , detail );
}
} // namespace Impl
} // namespace Experimental
} // namespace Kokkos
/*--------------------------------------------------------------------------*/
/*--------------------------------------------------------------------------*/
namespace Kokkos {
namespace {
__global__ void init_lock_array_kernel() {
unsigned i = blockIdx.x*blockDim.x + threadIdx.x;
if(i<CUDA_SPACE_ATOMIC_MASK+1)
kokkos_impl_cuda_atomic_lock_array[i] = 0;
}
}
namespace Impl {
int* lock_array_cuda_space_ptr(bool deallocate) {
static int* ptr = NULL;
if(deallocate) {
cudaFree(ptr);
ptr = NULL;
}
if(ptr==NULL && !deallocate)
cudaMalloc(&ptr,sizeof(int)*(CUDA_SPACE_ATOMIC_MASK+1));
return ptr;
}
void init_lock_array_cuda_space() {
int is_initialized = 0;
if(! is_initialized) {
int* lock_array_ptr = lock_array_cuda_space_ptr();
cudaMemcpyToSymbol( kokkos_impl_cuda_atomic_lock_array , & lock_array_ptr , sizeof(int*) );
init_lock_array_kernel<<<(CUDA_SPACE_ATOMIC_MASK+255)/256,256>>>();
}
}
}
}
#endif // KOKKOS_HAVE_CUDA
diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Alloc.hpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Alloc.hpp
index e1314c0e5..574617627 100644
--- a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Alloc.hpp
+++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Alloc.hpp
@@ -1,183 +1,183 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_CUDA_ALLOCATION_TRACKING_HPP
#define KOKKOS_CUDA_ALLOCATION_TRACKING_HPP
#include <Kokkos_Macros.hpp>
/* only compile this file if CUDA is enabled for Kokkos */
#ifdef KOKKOS_HAVE_CUDA
#include <impl/Kokkos_Traits.hpp>
#include <impl/Kokkos_AllocationTracker.hpp> // AllocatorAttributeBase
namespace Kokkos {
namespace Impl {
template< class DestructFunctor >
SharedAllocationRecord *
shared_allocation_record( Kokkos::CudaSpace const & arg_space
, void * const arg_alloc_ptr
, DestructFunctor const & arg_destruct )
{
SharedAllocationRecord * const record = SharedAllocationRecord::get_record( arg_alloc_ptr );
// assert: record != 0
// assert: sizeof(DestructFunctor) <= record->m_destruct_size
// assert: record->m_destruct_function == 0
DestructFunctor * const functor =
reinterpret_cast< DestructFunctor * >(
- reinterpret_cast< unsigned long >( record ) + sizeof(SharedAllocationRecord) );
+ reinterpret_cast< uintptr_t >( record ) + sizeof(SharedAllocationRecord) );
new( functor ) DestructFunctor( arg_destruct );
record->m_destruct_functor = & shared_allocation_destroy< DestructFunctor > ;
return record ;
}
/// class CudaUnmanagedAllocator
/// does nothing when deallocate(ptr,size) is called
struct CudaUnmanagedAllocator
{
static const char * name()
{
return "Cuda Unmanaged Allocator";
}
static void deallocate(void * /*ptr*/, size_t /*size*/) {}
static bool support_texture_binding() { return true; }
};
/// class CudaUnmanagedAllocator
/// does nothing when deallocate(ptr,size) is called
struct CudaUnmanagedUVMAllocator
{
static const char * name()
{
return "Cuda Unmanaged UVM Allocator";
}
static void deallocate(void * /*ptr*/, size_t /*size*/) {}
static bool support_texture_binding() { return true; }
};
/// class CudaUnmanagedHostAllocator
/// does nothing when deallocate(ptr,size) is called
class CudaUnmanagedHostAllocator
{
public:
static const char * name()
{
return "Cuda Unmanaged Host Allocator";
}
// Unmanaged deallocate does nothing
static void deallocate(void * /*ptr*/, size_t /*size*/) {}
};
/// class CudaMallocAllocator
class CudaMallocAllocator
{
public:
static const char * name()
{
return "Cuda Malloc Allocator";
}
static void* allocate(size_t size);
static void deallocate(void * ptr, size_t);
static void * reallocate(void * old_ptr, size_t old_size, size_t new_size);
static bool support_texture_binding() { return true; }
};
/// class CudaUVMAllocator
class CudaUVMAllocator
{
public:
static const char * name()
{
return "Cuda UVM Allocator";
}
static void* allocate(size_t size);
static void deallocate(void * ptr, size_t);
static void * reallocate(void * old_ptr, size_t old_size, size_t new_size);
static bool support_texture_binding() { return true; }
};
/// class CudaHostAllocator
class CudaHostAllocator
{
public:
static const char * name()
{
return "Cuda Host Allocator";
}
static void* allocate(size_t size);
static void deallocate(void * ptr, size_t);
static void * reallocate(void * old_ptr, size_t old_size, size_t new_size);
};
}} // namespace Kokkos::Impl
#endif //KOKKOS_HAVE_CUDA
#endif // #ifndef KOKKOS_CUDA_ALLOCATION_TRACKING_HPP
diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_BasicAllocators.cpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_BasicAllocators.cpp
index 8c8c5e47a..1f409dffa 100644
--- a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_BasicAllocators.cpp
+++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_BasicAllocators.cpp
@@ -1,192 +1,198 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <Kokkos_Macros.hpp>
+#if ! defined( KOKKOS_USING_EXPERIMENTAL_VIEW )
+
/* only compile this file if CUDA is enabled for Kokkos */
#ifdef KOKKOS_HAVE_CUDA
#include <impl/Kokkos_Error.hpp>
#include <Cuda/Kokkos_Cuda_BasicAllocators.hpp>
#include <Cuda/Kokkos_Cuda_Error.hpp>
#include <sstream>
namespace Kokkos { namespace Impl {
/*--------------------------------------------------------------------------*/
+
TextureAttribute::TextureAttribute( void * const alloc_ptr
, size_t alloc_size
, cudaChannelFormatDesc const & desc
)
: m_tex_obj(0)
{
cuda_device_synchronize();
struct cudaResourceDesc resDesc ;
struct cudaTextureDesc texDesc ;
memset( & resDesc , 0 , sizeof(resDesc) );
memset( & texDesc , 0 , sizeof(texDesc) );
resDesc.resType = cudaResourceTypeLinear ;
resDesc.res.linear.desc = desc ;
resDesc.res.linear.sizeInBytes = alloc_size ;
resDesc.res.linear.devPtr = alloc_ptr ;
CUDA_SAFE_CALL( cudaCreateTextureObject( & m_tex_obj , & resDesc, & texDesc, NULL) );
cuda_device_synchronize();
}
TextureAttribute::~TextureAttribute()
{
if (m_tex_obj) {
cudaDestroyTextureObject( m_tex_obj );
}
}
/*--------------------------------------------------------------------------*/
void * CudaMallocAllocator::allocate( size_t size )
{
void * ptr = NULL;
CUDA_SAFE_CALL( cudaMalloc( &ptr, size ) );
return ptr;
}
void CudaMallocAllocator::deallocate( void * ptr, size_t /*size*/ )
{
try {
CUDA_SAFE_CALL( cudaFree( ptr ) );
} catch(...) {}
}
void * CudaMallocAllocator::reallocate(void * old_ptr, size_t old_size, size_t new_size)
{
void * ptr = old_ptr;
if (old_size != new_size) {
ptr = allocate( new_size );
size_t copy_size = old_size < new_size ? old_size : new_size;
CUDA_SAFE_CALL( cudaMemcpy( ptr , old_ptr , copy_size , cudaMemcpyDefault ) );
deallocate( old_ptr, old_size );
}
return ptr;
}
/*--------------------------------------------------------------------------*/
void * CudaUVMAllocator::allocate( size_t size )
{
#if defined( CUDA_VERSION ) && ( 6000 <= CUDA_VERSION )
void * ptr = NULL;
CUDA_SAFE_CALL( cudaMallocManaged( &ptr, size, cudaMemAttachGlobal ) );
return ptr;
#else
throw_runtime_exception( "CUDA VERSION does not support UVM" );
return NULL;
#endif
}
void CudaUVMAllocator::deallocate( void * ptr, size_t /*size*/ )
{
try {
CUDA_SAFE_CALL( cudaFree( ptr ) );
} catch(...) {}
}
void * CudaUVMAllocator::reallocate(void * old_ptr, size_t old_size, size_t new_size)
{
void * ptr = old_ptr;
if (old_size != new_size) {
ptr = allocate( new_size );
size_t copy_size = old_size < new_size ? old_size : new_size;
CUDA_SAFE_CALL( cudaMemcpy( ptr , old_ptr , copy_size , cudaMemcpyDefault ) );
deallocate( old_ptr, old_size );
}
return ptr;
}
/*--------------------------------------------------------------------------*/
void * CudaHostAllocator::allocate( size_t size )
{
void * ptr = NULL;
CUDA_SAFE_CALL( cudaHostAlloc( &ptr , size , cudaHostAllocDefault ) );
return ptr;
}
void CudaHostAllocator::deallocate( void * ptr, size_t /*size*/ )
{
try {
CUDA_SAFE_CALL( cudaFreeHost( ptr ) );
} catch(...) {}
}
void * CudaHostAllocator::reallocate(void * old_ptr, size_t old_size, size_t new_size)
{
void * ptr = old_ptr;
if (old_size != new_size) {
ptr = allocate( new_size );
size_t copy_size = old_size < new_size ? old_size : new_size;
CUDA_SAFE_CALL( cudaMemcpy( ptr , old_ptr , copy_size , cudaMemcpyHostToHost ) );
deallocate( old_ptr, old_size );
}
return ptr;
}
/*--------------------------------------------------------------------------*/
}} // namespace Kokkos::Impl
#endif //KOKKOS_HAVE_CUDA
+
+#endif /* #if ! defined( KOKKOS_USING_EXPERIMENTAL_VIEW ) */
+
diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_BasicAllocators.hpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_BasicAllocators.hpp
index 86fe1c901..58445ab07 100644
--- a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_BasicAllocators.hpp
+++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_BasicAllocators.hpp
@@ -1,187 +1,190 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_CUDA_BASIC_ALLOCATORS_HPP
#define KOKKOS_CUDA_BASIC_ALLOCATORS_HPP
#include <Kokkos_Macros.hpp>
+#if ! defined( KOKKOS_USING_EXPERIMENTAL_VIEW )
+
/* only compile this file if CUDA is enabled for Kokkos */
#ifdef KOKKOS_HAVE_CUDA
#include <impl/Kokkos_Traits.hpp>
#include <impl/Kokkos_AllocationTracker.hpp> // AllocatorAttributeBase
namespace Kokkos { namespace Impl {
// Cuda 5.0 <texture_types.h> defines 'cudaTextureObject_t'
// to be an 'unsigned long long'. This chould change with
// future version of Cuda and this typedef would have to
// change accordingly.
#if defined( CUDA_VERSION ) && ( 5000 <= CUDA_VERSION )
typedef enable_if<
sizeof(::cudaTextureObject_t) == sizeof(const void *) ,
::cudaTextureObject_t >::type cuda_texture_object_type ;
#else
typedef const void * cuda_texture_object_type ;
#endif
struct TextureAttribute : public AllocatorAttributeBase
{
cuda_texture_object_type m_tex_obj ;
TextureAttribute( void * const alloc_ptr
, size_t alloc_size
, cudaChannelFormatDesc const & desc
);
~TextureAttribute();
};
-
/// class CudaUnmanagedAllocator
/// does nothing when deallocate(ptr,size) is called
struct CudaUnmanagedAllocator
{
static const char * name()
{
return "Cuda Unmanaged Allocator";
}
static void deallocate(void * /*ptr*/, size_t /*size*/) {}
static bool support_texture_binding() { return true; }
};
/// class CudaUnmanagedAllocator
/// does nothing when deallocate(ptr,size) is called
struct CudaUnmanagedUVMAllocator
{
static const char * name()
{
return "Cuda Unmanaged UVM Allocator";
}
static void deallocate(void * /*ptr*/, size_t /*size*/) {}
static bool support_texture_binding() { return true; }
};
/// class CudaUnmanagedHostAllocator
/// does nothing when deallocate(ptr,size) is called
class CudaUnmanagedHostAllocator
{
public:
static const char * name()
{
return "Cuda Unmanaged Host Allocator";
}
// Unmanaged deallocate does nothing
static void deallocate(void * /*ptr*/, size_t /*size*/) {}
};
/// class CudaMallocAllocator
class CudaMallocAllocator
{
public:
static const char * name()
{
return "Cuda Malloc Allocator";
}
static void* allocate(size_t size);
static void deallocate(void * ptr, size_t);
static void * reallocate(void * old_ptr, size_t old_size, size_t new_size);
static bool support_texture_binding() { return true; }
};
/// class CudaUVMAllocator
class CudaUVMAllocator
{
public:
static const char * name()
{
return "Cuda UVM Allocator";
}
static void* allocate(size_t size);
static void deallocate(void * ptr, size_t);
static void * reallocate(void * old_ptr, size_t old_size, size_t new_size);
static bool support_texture_binding() { return true; }
};
/// class CudaHostAllocator
class CudaHostAllocator
{
public:
static const char * name()
{
return "Cuda Host Allocator";
}
static void* allocate(size_t size);
static void deallocate(void * ptr, size_t);
static void * reallocate(void * old_ptr, size_t old_size, size_t new_size);
};
}} // namespace Kokkos::Impl
#endif //KOKKOS_HAVE_CUDA
+#endif /* #if ! defined( KOKKOS_USING_EXPERIMENTAL_VIEW ) */
+
#endif //KOKKOS_CUDA_BASIC_ALLOCATORS_HPP
diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Impl.cpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Impl.cpp
index b7c3a62d3..de00b0415 100644
--- a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Impl.cpp
+++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Impl.cpp
@@ -1,678 +1,774 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
/*--------------------------------------------------------------------------*/
/* Kokkos interfaces */
#include <Kokkos_Core.hpp>
/* only compile this file if CUDA is enabled for Kokkos */
#ifdef KOKKOS_HAVE_CUDA
#include <Cuda/Kokkos_Cuda_Error.hpp>
#include <Cuda/Kokkos_Cuda_Internal.hpp>
#include <impl/Kokkos_AllocationTracker.hpp>
#include <impl/Kokkos_Error.hpp>
/*--------------------------------------------------------------------------*/
/* Standard 'C' libraries */
#include <stdlib.h>
/* Standard 'C++' libraries */
#include <vector>
#include <iostream>
#include <sstream>
#include <string>
#ifdef KOKKOS_CUDA_USE_RELOCATABLE_DEVICE_CODE
__device__ __constant__
Kokkos::Impl::CudaTraits::ConstantGlobalBufferType
kokkos_impl_cuda_constant_memory_buffer ;
#endif
/*--------------------------------------------------------------------------*/
namespace Kokkos {
namespace Impl {
namespace {
__global__
void query_cuda_kernel_arch( int * d_arch )
{
#if defined( __CUDA_ARCH__ )
*d_arch = __CUDA_ARCH__ ;
#else
*d_arch = 0 ;
#endif
}
/** Query what compute capability is actually launched to the device: */
int cuda_kernel_arch()
{
int * d_arch = 0 ;
cudaMalloc( (void **) & d_arch , sizeof(int) );
query_cuda_kernel_arch<<<1,1>>>( d_arch );
int arch = 0 ;
cudaMemcpy( & arch , d_arch , sizeof(int) , cudaMemcpyDefault );
cudaFree( d_arch );
return arch ;
}
bool cuda_launch_blocking()
{
const char * env = getenv("CUDA_LAUNCH_BLOCKING");
if (env == 0) return false;
return atoi(env);
}
}
void cuda_device_synchronize()
{
// static const bool launch_blocking = cuda_launch_blocking();
// if (!launch_blocking) {
CUDA_SAFE_CALL( cudaDeviceSynchronize() );
// }
}
void cuda_internal_error_throw( cudaError e , const char * name, const char * file, const int line )
{
std::ostringstream out ;
out << name << " error( " << cudaGetErrorName(e) << "): " << cudaGetErrorString(e);
if (file) {
out << " " << file << ":" << line;
}
throw_runtime_exception( out.str() );
}
//----------------------------------------------------------------------------
// Some significant cuda device properties:
//
// cudaDeviceProp::name : Text label for device
// cudaDeviceProp::major : Device major number
// cudaDeviceProp::minor : Device minor number
// cudaDeviceProp::warpSize : number of threads per warp
// cudaDeviceProp::multiProcessorCount : number of multiprocessors
// cudaDeviceProp::sharedMemPerBlock : capacity of shared memory per block
// cudaDeviceProp::totalConstMem : capacity of constant memory
// cudaDeviceProp::totalGlobalMem : capacity of global memory
// cudaDeviceProp::maxGridSize[3] : maximum grid size
//
// Section 4.4.2.4 of the CUDA Toolkit Reference Manual
//
// struct cudaDeviceProp {
// char name[256];
// size_t totalGlobalMem;
// size_t sharedMemPerBlock;
// int regsPerBlock;
// int warpSize;
// size_t memPitch;
// int maxThreadsPerBlock;
// int maxThreadsDim[3];
// int maxGridSize[3];
// size_t totalConstMem;
// int major;
// int minor;
// int clockRate;
// size_t textureAlignment;
// int deviceOverlap;
// int multiProcessorCount;
// int kernelExecTimeoutEnabled;
// int integrated;
// int canMapHostMemory;
// int computeMode;
// int concurrentKernels;
// int ECCEnabled;
// int pciBusID;
// int pciDeviceID;
// int tccDriver;
// int asyncEngineCount;
// int unifiedAddressing;
// int memoryClockRate;
// int memoryBusWidth;
// int l2CacheSize;
// int maxThreadsPerMultiProcessor;
// };
namespace {
class CudaInternalDevices {
public:
enum { MAXIMUM_DEVICE_COUNT = 8 };
struct cudaDeviceProp m_cudaProp[ MAXIMUM_DEVICE_COUNT ] ;
int m_cudaDevCount ;
CudaInternalDevices();
static const CudaInternalDevices & singleton();
};
CudaInternalDevices::CudaInternalDevices()
{
// See 'cudaSetDeviceFlags' for host-device thread interaction
// Section 4.4.2.6 of the CUDA Toolkit Reference Manual
CUDA_SAFE_CALL (cudaGetDeviceCount( & m_cudaDevCount ) );
for ( int i = 0 ; i < m_cudaDevCount ; ++i ) {
CUDA_SAFE_CALL( cudaGetDeviceProperties( m_cudaProp + i , i ) );
}
}
const CudaInternalDevices & CudaInternalDevices::singleton()
{
static CudaInternalDevices self ; return self ;
}
}
//----------------------------------------------------------------------------
class CudaInternal {
private:
CudaInternal( const CudaInternal & );
CudaInternal & operator = ( const CudaInternal & );
+#if ! defined( KOKKOS_USING_EXPERIMENTAL_VIEW )
+
AllocationTracker m_scratchFlagsTracker;
AllocationTracker m_scratchSpaceTracker;
AllocationTracker m_scratchUnifiedTracker;
+#endif
+
public:
typedef Cuda::size_type size_type ;
int m_cudaDev ;
int m_cudaArch ;
unsigned m_maxWarpCount ;
unsigned m_maxBlock ;
unsigned m_maxSharedWords ;
size_type m_scratchSpaceCount ;
size_type m_scratchFlagsCount ;
size_type m_scratchUnifiedCount ;
size_type m_scratchUnifiedSupported ;
size_type m_streamCount ;
size_type * m_scratchSpace ;
size_type * m_scratchFlags ;
size_type * m_scratchUnified ;
cudaStream_t * m_stream ;
static CudaInternal & singleton();
int verify_is_initialized( const char * const label ) const ;
int is_initialized() const
{ return 0 != m_scratchSpace && 0 != m_scratchFlags ; }
void initialize( int cuda_device_id , int stream_count );
void finalize();
void print_configuration( std::ostream & ) const ;
~CudaInternal();
CudaInternal()
: m_cudaDev( -1 )
, m_cudaArch( -1 )
, m_maxWarpCount( 0 )
, m_maxBlock( 0 )
, m_maxSharedWords( 0 )
, m_scratchSpaceCount( 0 )
, m_scratchFlagsCount( 0 )
, m_scratchUnifiedCount( 0 )
, m_scratchUnifiedSupported( 0 )
, m_streamCount( 0 )
, m_scratchSpace( 0 )
, m_scratchFlags( 0 )
, m_scratchUnified( 0 )
, m_stream( 0 )
{}
size_type * scratch_space( const size_type size );
size_type * scratch_flags( const size_type size );
size_type * scratch_unified( const size_type size );
};
//----------------------------------------------------------------------------
void CudaInternal::print_configuration( std::ostream & s ) const
{
const CudaInternalDevices & dev_info = CudaInternalDevices::singleton();
#if defined( KOKKOS_HAVE_CUDA )
s << "macro KOKKOS_HAVE_CUDA : defined" << std::endl ;
#endif
#if defined( CUDA_VERSION )
s << "macro CUDA_VERSION = " << CUDA_VERSION
<< " = version " << CUDA_VERSION / 1000
<< "." << ( CUDA_VERSION % 1000 ) / 10
<< std::endl ;
#endif
for ( int i = 0 ; i < dev_info.m_cudaDevCount ; ++i ) {
s << "Kokkos::Cuda[ " << i << " ] "
<< dev_info.m_cudaProp[i].name
<< " capability " << dev_info.m_cudaProp[i].major << "." << dev_info.m_cudaProp[i].minor
<< ", Total Global Memory: " << human_memory_size(dev_info.m_cudaProp[i].totalGlobalMem)
<< ", Shared Memory per Block: " << human_memory_size(dev_info.m_cudaProp[i].sharedMemPerBlock);
if ( m_cudaDev == i ) s << " : Selected" ;
s << std::endl ;
}
}
//----------------------------------------------------------------------------
CudaInternal::~CudaInternal()
{
if ( m_stream ||
m_scratchSpace ||
m_scratchFlags ||
m_scratchUnified ) {
std::cerr << "Kokkos::Cuda ERROR: Failed to call Kokkos::Cuda::finalize()"
<< std::endl ;
std::cerr.flush();
}
m_cudaDev = -1 ;
m_cudaArch = -1 ;
m_maxWarpCount = 0 ;
m_maxBlock = 0 ;
m_maxSharedWords = 0 ;
m_scratchSpaceCount = 0 ;
m_scratchFlagsCount = 0 ;
m_scratchUnifiedCount = 0 ;
m_scratchUnifiedSupported = 0 ;
m_streamCount = 0 ;
m_scratchSpace = 0 ;
m_scratchFlags = 0 ;
m_scratchUnified = 0 ;
m_stream = 0 ;
}
int CudaInternal::verify_is_initialized( const char * const label ) const
{
if ( m_cudaDev < 0 ) {
std::cerr << "Kokkos::Cuda::" << label << " : ERROR device not initialized" << std::endl ;
}
return 0 <= m_cudaDev ;
}
CudaInternal & CudaInternal::singleton()
{
static CudaInternal self ;
return self ;
}
void CudaInternal::initialize( int cuda_device_id , int stream_count )
{
enum { WordSize = sizeof(size_type) };
if ( ! HostSpace::execution_space::is_initialized() ) {
const std::string msg("Cuda::initialize ERROR : HostSpace::execution_space is not initialized");
throw_runtime_exception( msg );
}
const CudaInternalDevices & dev_info = CudaInternalDevices::singleton();
const bool ok_init = 0 == m_scratchSpace || 0 == m_scratchFlags ;
const bool ok_id = 0 <= cuda_device_id &&
cuda_device_id < dev_info.m_cudaDevCount ;
// Need device capability 2.0 or better
const bool ok_dev = ok_id &&
( 2 <= dev_info.m_cudaProp[ cuda_device_id ].major &&
0 <= dev_info.m_cudaProp[ cuda_device_id ].minor );
if ( ok_init && ok_dev ) {
const struct cudaDeviceProp & cudaProp =
dev_info.m_cudaProp[ cuda_device_id ];
m_cudaDev = cuda_device_id ;
CUDA_SAFE_CALL( cudaSetDevice( m_cudaDev ) );
CUDA_SAFE_CALL( cudaDeviceReset() );
Kokkos::Impl::cuda_device_synchronize();
// Query what compute capability architecture a kernel executes:
m_cudaArch = cuda_kernel_arch();
if ( m_cudaArch != cudaProp.major * 100 + cudaProp.minor * 10 ) {
std::cerr << "Kokkos::Cuda::initialize WARNING: running kernels compiled for compute capability "
<< ( m_cudaArch / 100 ) << "." << ( ( m_cudaArch % 100 ) / 10 )
<< " on device with compute capability "
<< cudaProp.major << "." << cudaProp.minor
<< " , this will likely reduce potential performance."
<< std::endl ;
}
//----------------------------------
// Maximum number of warps,
// at most one warp per thread in a warp for reduction.
// HCE 2012-February :
// Found bug in CUDA 4.1 that sometimes a kernel launch would fail
// if the thread count == 1024 and a functor is passed to the kernel.
// Copying the kernel to constant memory and then launching with
// thread count == 1024 would work fine.
//
// HCE 2012-October :
// All compute capabilities support at least 16 warps (512 threads).
// However, we have found that 8 warps typically gives better performance.
m_maxWarpCount = 8 ;
// m_maxWarpCount = cudaProp.maxThreadsPerBlock / Impl::CudaTraits::WarpSize ;
if ( Impl::CudaTraits::WarpSize < m_maxWarpCount ) {
m_maxWarpCount = Impl::CudaTraits::WarpSize ;
}
m_maxSharedWords = cudaProp.sharedMemPerBlock / WordSize ;
//----------------------------------
// Maximum number of blocks:
m_maxBlock = m_cudaArch < 300 ? 65535 : cudaProp.maxGridSize[0] ;
//----------------------------------
m_scratchUnifiedSupported = cudaProp.unifiedAddressing ;
if ( ! m_scratchUnifiedSupported ) {
std::cout << "Kokkos::Cuda device "
<< cudaProp.name << " capability "
<< cudaProp.major << "." << cudaProp.minor
<< " does not support unified virtual address space"
<< std::endl ;
}
//----------------------------------
// Multiblock reduction uses scratch flags for counters
// and scratch space for partial reduction values.
// Allocate some initial space. This will grow as needed.
{
const unsigned reduce_block_count = m_maxWarpCount * Impl::CudaTraits::WarpSize ;
(void) scratch_unified( 16 * sizeof(size_type) );
(void) scratch_flags( reduce_block_count * 2 * sizeof(size_type) );
(void) scratch_space( reduce_block_count * 16 * sizeof(size_type) );
}
//----------------------------------
if ( stream_count ) {
m_stream = (cudaStream_t*) ::malloc( stream_count * sizeof(cudaStream_t) );
m_streamCount = stream_count ;
for ( size_type i = 0 ; i < m_streamCount ; ++i ) m_stream[i] = 0 ;
}
}
else {
std::ostringstream msg ;
msg << "Kokkos::Cuda::initialize(" << cuda_device_id << ") FAILED" ;
if ( ! ok_init ) {
msg << " : Already initialized" ;
}
if ( ! ok_id ) {
msg << " : Device identifier out of range "
<< "[0.." << dev_info.m_cudaDevCount << "]" ;
}
else if ( ! ok_dev ) {
msg << " : Device " ;
msg << dev_info.m_cudaProp[ cuda_device_id ].major ;
msg << "." ;
msg << dev_info.m_cudaProp[ cuda_device_id ].minor ;
msg << " has insufficient capability, required 2.0 or better" ;
}
Kokkos::Impl::throw_runtime_exception( msg.str() );
}
+ #ifdef KOKKOS_CUDA_USE_UVM
+ if(!cuda_launch_blocking()) {
+ std::cout << "Kokkos::Cuda::initialize WARNING: Cuda is allocating into UVMSpace by default" << std::endl;
+ std::cout << " without setting CUDA_LAUNCH_BLOCKING=1." << std::endl;
+ std::cout << " The code must call Cuda::fence() after each kernel" << std::endl;
+ std::cout << " or will likely crash when accessing data on the host." << std::endl;
+ }
+
+ const char * env_force_device_alloc = getenv("CUDA_MANAGED_FORCE_DEVICE_ALLOC");
+ bool force_device_alloc;
+ if (env_force_device_alloc == 0) force_device_alloc=false;
+ else force_device_alloc=atoi(env_force_device_alloc)!=0;
+
+ const char * env_visible_devices = getenv("CUDA_VISIBLE_DEVICES");
+ bool visible_devices_one=true;
+ if (env_visible_devices == 0) visible_devices_one=false;
+
+ if(!visible_devices_one && !force_device_alloc) {
+ std::cout << "Kokkos::Cuda::initialize WARNING: Cuda is allocating into UVMSpace by default" << std::endl;
+ std::cout << " without setting CUDA_MANAGED_FORCE_DEVICE_ALLOC=1 or " << std::endl;
+ std::cout << " setting CUDA_VISIBLE_DEVICES." << std::endl;
+ std::cout << " This could on multi GPU systems lead to severe performance" << std::endl;
+ std::cout << " penalties." << std::endl;
+ }
+ #endif
+
// Init the array for used for arbitrarily sized atomics
Impl::init_lock_array_cuda_space();
}
//----------------------------------------------------------------------------
typedef Cuda::size_type ScratchGrain[ Impl::CudaTraits::WarpSize ] ;
enum { sizeScratchGrain = sizeof(ScratchGrain) };
Cuda::size_type *
CudaInternal::scratch_flags( const Cuda::size_type size )
{
if ( verify_is_initialized("scratch_flags") && m_scratchFlagsCount * sizeScratchGrain < size ) {
m_scratchFlagsCount = ( size + sizeScratchGrain - 1 ) / sizeScratchGrain ;
+#if ! defined( KOKKOS_USING_EXPERIMENTAL_VIEW )
+
m_scratchFlagsTracker = CudaSpace::allocate_and_track( std::string("InternalScratchFlags") , sizeof( ScratchGrain ) * m_scratchFlagsCount );
+
m_scratchFlags = reinterpret_cast<size_type *>(m_scratchFlagsTracker.alloc_ptr());
+#else
+
+ typedef Kokkos::Experimental::Impl::SharedAllocationRecord< Kokkos::CudaSpace , void > Record ;
+
+ Record * const r = Record::allocate( Kokkos::CudaSpace()
+ , "InternalScratchFlags"
+ , ( sizeof( ScratchGrain ) * m_scratchFlagsCount ) );
+
+ Record::increment( r );
+
+ m_scratchFlags = reinterpret_cast<size_type *>( r->data() );
+
+#endif
+
+
CUDA_SAFE_CALL( cudaMemset( m_scratchFlags , 0 , m_scratchFlagsCount * sizeScratchGrain ) );
}
return m_scratchFlags ;
}
Cuda::size_type *
CudaInternal::scratch_space( const Cuda::size_type size )
{
if ( verify_is_initialized("scratch_space") && m_scratchSpaceCount * sizeScratchGrain < size ) {
m_scratchSpaceCount = ( size + sizeScratchGrain - 1 ) / sizeScratchGrain ;
+#if ! defined( KOKKOS_USING_EXPERIMENTAL_VIEW )
+
m_scratchSpaceTracker = CudaSpace::allocate_and_track( std::string("InternalScratchSpace") , sizeof( ScratchGrain ) * m_scratchSpaceCount );
+
m_scratchSpace = reinterpret_cast<size_type *>(m_scratchSpaceTracker.alloc_ptr());
+#else
+
+ typedef Kokkos::Experimental::Impl::SharedAllocationRecord< Kokkos::CudaSpace , void > Record ;
+
+ Record * const r = Record::allocate( Kokkos::CudaSpace()
+ , "InternalScratchSpace"
+ , ( sizeof( ScratchGrain ) * m_scratchSpaceCount ) );
+
+ Record::increment( r );
+
+ m_scratchSpace = reinterpret_cast<size_type *>( r->data() );
+
+#endif
+
}
return m_scratchSpace ;
}
Cuda::size_type *
CudaInternal::scratch_unified( const Cuda::size_type size )
{
if ( verify_is_initialized("scratch_unified") &&
m_scratchUnifiedSupported && m_scratchUnifiedCount * sizeScratchGrain < size ) {
m_scratchUnifiedCount = ( size + sizeScratchGrain - 1 ) / sizeScratchGrain ;
+#if ! defined( KOKKOS_USING_EXPERIMENTAL_VIEW )
+
m_scratchUnifiedTracker = CudaHostPinnedSpace::allocate_and_track( std::string("InternalScratchUnified") , sizeof( ScratchGrain ) * m_scratchUnifiedCount );
+
m_scratchUnified = reinterpret_cast<size_type *>( m_scratchUnifiedTracker.alloc_ptr() );
+
+#else
+
+ typedef Kokkos::Experimental::Impl::SharedAllocationRecord< Kokkos::CudaHostPinnedSpace , void > Record ;
+
+ Record * const r = Record::allocate( Kokkos::CudaHostPinnedSpace()
+ , "InternalScratchUnified"
+ , ( sizeof( ScratchGrain ) * m_scratchUnifiedCount ) );
+
+ Record::increment( r );
+
+ m_scratchUnified = reinterpret_cast<size_type *>( r->data() );
+
+#endif
+
}
return m_scratchUnified ;
}
//----------------------------------------------------------------------------
void CudaInternal::finalize()
{
if ( 0 != m_scratchSpace || 0 != m_scratchFlags ) {
lock_array_cuda_space_ptr(true);
if ( m_stream ) {
for ( size_type i = 1 ; i < m_streamCount ; ++i ) {
cudaStreamDestroy( m_stream[i] );
m_stream[i] = 0 ;
}
::free( m_stream );
}
+#if ! defined( KOKKOS_USING_EXPERIMENTAL_VIEW )
+
m_scratchSpaceTracker.clear();
m_scratchFlagsTracker.clear();
m_scratchUnifiedTracker.clear();
+#else
+
+ typedef Kokkos::Experimental::Impl::SharedAllocationRecord< CudaSpace > RecordCuda ;
+ typedef Kokkos::Experimental::Impl::SharedAllocationRecord< CudaHostPinnedSpace > RecordHost ;
+
+ RecordCuda::decrement( RecordCuda::get_record( m_scratchFlags ) );
+ RecordCuda::decrement( RecordCuda::get_record( m_scratchSpace ) );
+ RecordHost::decrement( RecordHost::get_record( m_scratchUnified ) );
+
+#endif
+
m_cudaDev = -1 ;
m_maxWarpCount = 0 ;
m_maxBlock = 0 ;
m_maxSharedWords = 0 ;
m_scratchSpaceCount = 0 ;
m_scratchFlagsCount = 0 ;
m_scratchUnifiedCount = 0 ;
m_streamCount = 0 ;
m_scratchSpace = 0 ;
m_scratchFlags = 0 ;
m_scratchUnified = 0 ;
m_stream = 0 ;
}
}
//----------------------------------------------------------------------------
Cuda::size_type cuda_internal_maximum_warp_count()
{ return CudaInternal::singleton().m_maxWarpCount ; }
Cuda::size_type cuda_internal_maximum_grid_count()
{ return CudaInternal::singleton().m_maxBlock ; }
Cuda::size_type cuda_internal_maximum_shared_words()
{ return CudaInternal::singleton().m_maxSharedWords ; }
Cuda::size_type * cuda_internal_scratch_space( const Cuda::size_type size )
{ return CudaInternal::singleton().scratch_space( size ); }
Cuda::size_type * cuda_internal_scratch_flags( const Cuda::size_type size )
{ return CudaInternal::singleton().scratch_flags( size ); }
Cuda::size_type * cuda_internal_scratch_unified( const Cuda::size_type size )
{ return CudaInternal::singleton().scratch_unified( size ); }
} // namespace Impl
} // namespace Kokkos
//----------------------------------------------------------------------------
namespace Kokkos {
Cuda::size_type Cuda::detect_device_count()
{ return Impl::CudaInternalDevices::singleton().m_cudaDevCount ; }
int Cuda::is_initialized()
{ return Impl::CudaInternal::singleton().is_initialized(); }
void Cuda::initialize( const Cuda::SelectDevice config , size_t num_instances )
{ Impl::CudaInternal::singleton().initialize( config.cuda_device_id , num_instances ); }
std::vector<unsigned>
Cuda::detect_device_arch()
{
const Impl::CudaInternalDevices & s = Impl::CudaInternalDevices::singleton();
std::vector<unsigned> output( s.m_cudaDevCount );
for ( int i = 0 ; i < s.m_cudaDevCount ; ++i ) {
output[i] = s.m_cudaProp[i].major * 100 + s.m_cudaProp[i].minor ;
}
return output ;
}
Cuda::size_type Cuda::device_arch()
{
const int dev_id = Impl::CudaInternal::singleton().m_cudaDev ;
int dev_arch = 0 ;
if ( 0 <= dev_id ) {
const struct cudaDeviceProp & cudaProp =
Impl::CudaInternalDevices::singleton().m_cudaProp[ dev_id ] ;
dev_arch = cudaProp.major * 100 + cudaProp.minor ;
}
return dev_arch ;
}
void Cuda::finalize()
{ Impl::CudaInternal::singleton().finalize(); }
Cuda::Cuda()
: m_device( Impl::CudaInternal::singleton().m_cudaDev )
, m_stream( 0 )
{
Impl::CudaInternal::singleton().verify_is_initialized( "Cuda instance constructor" );
}
Cuda::Cuda( const int instance_id )
: m_device( Impl::CudaInternal::singleton().m_cudaDev )
, m_stream(
Impl::CudaInternal::singleton().verify_is_initialized( "Cuda instance constructor" )
? Impl::CudaInternal::singleton().m_stream[ instance_id % Impl::CudaInternal::singleton().m_streamCount ]
: 0 )
{}
void Cuda::print_configuration( std::ostream & s , const bool )
{ Impl::CudaInternal::singleton().print_configuration( s ); }
bool Cuda::sleep() { return false ; }
bool Cuda::wake() { return true ; }
void Cuda::fence()
{
Kokkos::Impl::cuda_device_synchronize();
}
} // namespace Kokkos
#endif // KOKKOS_HAVE_CUDA
//----------------------------------------------------------------------------
diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Internal.hpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Internal.hpp
index dd8a08729..328857d99 100644
--- a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Internal.hpp
+++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Internal.hpp
@@ -1,165 +1,190 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_CUDA_INTERNAL_HPP
#define KOKKOS_CUDA_INTERNAL_HPP
-
+#include<iostream>
#include <Kokkos_Macros.hpp>
/* only compile this file if CUDA is enabled for Kokkos */
#ifdef KOKKOS_HAVE_CUDA
#include <Cuda/Kokkos_Cuda_Error.hpp>
namespace Kokkos { namespace Impl {
+template<class DriverType, bool Large>
+struct CudaGetMaxBlockSize;
+
+template<class DriverType, bool Large = (CudaTraits::ConstantMemoryUseThreshold < sizeof(DriverType))>
+int cuda_get_max_block_size(const typename DriverType::functor_type & f, const size_t vector_length, const size_t shmem_extra) {
+ return CudaGetMaxBlockSize<DriverType,Large>::get_block_size(f,vector_length, shmem_extra);
+}
+
template<class DriverType>
-int cuda_get_max_block_size(const typename DriverType::functor_type & f) {
-#if ( CUDA_VERSION < 6050 )
- return 256;
-#else
- bool Large = ( CudaTraits::ConstantMemoryUseThreshold < sizeof(DriverType) );
-
- int numBlocks;
- if(Large) {
+struct CudaGetMaxBlockSize<DriverType,true> {
+ static int get_block_size(const typename DriverType::functor_type & f, const size_t vector_length, const size_t shmem_extra) {
+ int numBlocks;
int blockSize=32;
- int sharedmem = FunctorTeamShmemSize< typename DriverType::functor_type >::value( f , blockSize );
+ int sharedmem = shmem_extra + FunctorTeamShmemSize< typename DriverType::functor_type >::value( f , blockSize/vector_length );
cudaOccupancyMaxActiveBlocksPerMultiprocessor(
&numBlocks,
cuda_parallel_launch_constant_memory<DriverType>,
blockSize,
sharedmem);
while (blockSize<1024 && numBlocks>0) {
blockSize*=2;
- sharedmem = FunctorTeamShmemSize< typename DriverType::functor_type >::value( f , blockSize );
+ sharedmem = shmem_extra + FunctorTeamShmemSize< typename DriverType::functor_type >::value( f , blockSize/vector_length);
cudaOccupancyMaxActiveBlocksPerMultiprocessor(
&numBlocks,
cuda_parallel_launch_constant_memory<DriverType>,
blockSize,
sharedmem);
}
if(numBlocks>0) return blockSize;
else return blockSize/2;
- } else {
+ }
+};
+
+template<class DriverType>
+struct CudaGetMaxBlockSize<DriverType,false> {
+ static int get_block_size(const typename DriverType::functor_type & f, const size_t vector_length, const size_t shmem_extra) {
+ int numBlocks;
+
int blockSize=32;
- int sharedmem = FunctorTeamShmemSize< typename DriverType::functor_type >::value( f , blockSize );
+ int sharedmem = shmem_extra + FunctorTeamShmemSize< typename DriverType::functor_type >::value( f , blockSize/vector_length );
cudaOccupancyMaxActiveBlocksPerMultiprocessor(
&numBlocks,
cuda_parallel_launch_local_memory<DriverType>,
blockSize,
sharedmem);
while (blockSize<1024 && numBlocks>0) {
blockSize*=2;
- sharedmem = FunctorTeamShmemSize< typename DriverType::functor_type >::value( f , blockSize );
+ sharedmem = shmem_extra + FunctorTeamShmemSize< typename DriverType::functor_type >::value( f , blockSize/vector_length );
cudaOccupancyMaxActiveBlocksPerMultiprocessor(
&numBlocks,
cuda_parallel_launch_local_memory<DriverType>,
blockSize,
sharedmem);
}
if(numBlocks>0) return blockSize;
else return blockSize/2;
}
-#endif
+};
+
+
+
+template<class DriverType, bool Large>
+struct CudaGetOptBlockSize;
+
+template<class DriverType, bool Large = (CudaTraits::ConstantMemoryUseThreshold < sizeof(DriverType))>
+int cuda_get_opt_block_size(const typename DriverType::functor_type & f, const size_t vector_length, const size_t shmem_extra) {
+ return CudaGetOptBlockSize<DriverType,Large>::get_block_size(f,vector_length,shmem_extra);
}
template<class DriverType>
-int cuda_get_opt_block_size(const typename DriverType::functor_type & f) {
-#if ( CUDA_VERSION < 6050 )
- return 256;
-#else
- bool Large = ( CudaTraits::ConstantMemoryUseThreshold < sizeof(DriverType) );
-
- int blockSize=16;
- int numBlocks;
- int sharedmem;
- int maxOccupancy=0;
- int bestBlockSize=0;
-
- if(Large) {
+struct CudaGetOptBlockSize<DriverType,true> {
+ static int get_block_size(const typename DriverType::functor_type & f, const size_t vector_length, const size_t shmem_extra) {
+ int blockSize=16;
+ int numBlocks;
+ int sharedmem;
+ int maxOccupancy=0;
+ int bestBlockSize=0;
+
while(blockSize<1024) {
blockSize*=2;
//calculate the occupancy with that optBlockSize and check whether its larger than the largest one found so far
- sharedmem = FunctorTeamShmemSize< typename DriverType::functor_type >::value( f , blockSize );
+ sharedmem = shmem_extra + FunctorTeamShmemSize< typename DriverType::functor_type >::value( f , blockSize/vector_length );
cudaOccupancyMaxActiveBlocksPerMultiprocessor(
&numBlocks,
cuda_parallel_launch_constant_memory<DriverType>,
blockSize,
sharedmem);
if(maxOccupancy < numBlocks*blockSize) {
- maxOccupancy = numBlocks*blockSize;
- bestBlockSize = blockSize;
+ maxOccupancy = numBlocks*blockSize;
+ bestBlockSize = blockSize;
}
}
- } else {
+ return bestBlockSize;
+ }
+};
+
+template<class DriverType>
+struct CudaGetOptBlockSize<DriverType,false> {
+ static int get_block_size(const typename DriverType::functor_type & f, const size_t vector_length, const size_t shmem_extra) {
+ int blockSize=16;
+ int numBlocks;
+ int sharedmem;
+ int maxOccupancy=0;
+ int bestBlockSize=0;
+
while(blockSize<1024) {
blockSize*=2;
- sharedmem = FunctorTeamShmemSize< typename DriverType::functor_type >::value( f , blockSize );
+ sharedmem = shmem_extra + FunctorTeamShmemSize< typename DriverType::functor_type >::value( f , blockSize/vector_length );
cudaOccupancyMaxActiveBlocksPerMultiprocessor(
&numBlocks,
cuda_parallel_launch_local_memory<DriverType>,
blockSize,
sharedmem);
if(maxOccupancy < numBlocks*blockSize) {
maxOccupancy = numBlocks*blockSize;
bestBlockSize = blockSize;
}
}
+ return bestBlockSize;
}
- return bestBlockSize;
-#endif
-}
+};
}} // namespace Kokkos::Impl
#endif // KOKKOS_HAVE_CUDA
#endif /* #ifndef KOKKOS_CUDA_INTERNAL_HPP */
diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Parallel.hpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Parallel.hpp
index 3aea9be1d..003aac277 100644
--- a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Parallel.hpp
+++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Parallel.hpp
@@ -1,1862 +1,2048 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_CUDA_PARALLEL_HPP
#define KOKKOS_CUDA_PARALLEL_HPP
#include <iostream>
+#include <algorithm>
#include <stdio.h>
#include <Kokkos_Macros.hpp>
/* only compile this file if CUDA is enabled for Kokkos */
#if defined( __CUDACC__ ) && defined( KOKKOS_HAVE_CUDA )
#include <utility>
#include <Kokkos_Parallel.hpp>
#include <Cuda/Kokkos_CudaExec.hpp>
#include <Cuda/Kokkos_Cuda_ReduceScan.hpp>
#include <Cuda/Kokkos_Cuda_Internal.hpp>
#include <Kokkos_Vectorization.hpp>
#ifdef KOKKOSP_ENABLE_PROFILING
#include <impl/Kokkos_Profiling_Interface.hpp>
#include <typeinfo>
#endif
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
template< typename Type >
struct CudaJoinFunctor {
typedef Type value_type ;
KOKKOS_INLINE_FUNCTION
static void join( volatile value_type & update ,
volatile const value_type & input )
{ update += input ; }
};
class CudaTeamMember {
private:
typedef Kokkos::Cuda execution_space ;
typedef execution_space::scratch_memory_space scratch_memory_space ;
void * m_team_reduce ;
scratch_memory_space m_team_shared ;
int m_league_rank ;
int m_league_size ;
public:
#if defined( __CUDA_ARCH__ )
__device__ inline
const execution_space::scratch_memory_space & team_shmem() const
{ return m_team_shared ; }
__device__ inline int league_rank() const { return m_league_rank ; }
__device__ inline int league_size() const { return m_league_size ; }
__device__ inline int team_rank() const { return threadIdx.y ; }
__device__ inline int team_size() const { return blockDim.y ; }
__device__ inline void team_barrier() const { __syncthreads(); }
template<class ValueType>
__device__ inline void team_broadcast(ValueType& value, const int& thread_id) const {
__shared__ ValueType sh_val;
if(threadIdx.x == 0 && threadIdx.y == thread_id) {
sh_val = val;
}
team_barrier();
val = sh_val;
}
#ifdef KOKKOS_HAVE_CXX11
template< class ValueType, class JoinOp >
__device__ inline
typename JoinOp::value_type team_reduce( const ValueType & value
, const JoinOp & op_in ) const
{
typedef JoinLambdaAdapter<ValueType,JoinOp> JoinOpFunctor ;
const JoinOpFunctor op(op_in);
ValueType * const base_data = (ValueType *) m_team_reduce ;
#else
template< class JoinOp >
__device__ inline
typename JoinOp::value_type team_reduce( const typename JoinOp::value_type & value
, const JoinOp & op ) const
{
typedef JoinOp JoinOpFunctor ;
typename JoinOp::value_type * const base_data = (typename JoinOp::value_type *) m_team_reduce ;
#endif
__syncthreads(); // Don't write in to shared data until all threads have entered this function
if ( 0 == threadIdx.y ) { base_data[0] = 0 ; }
base_data[ threadIdx.y ] = value ;
Impl::cuda_intra_block_reduce_scan<false,JoinOpFunctor,void>( op , base_data );
return base_data[ blockDim.y - 1 ];
}
/** \brief Intra-team exclusive prefix sum with team_rank() ordering
* with intra-team non-deterministic ordering accumulation.
*
* The global inter-team accumulation value will, at the end of the
* league's parallel execution, be the scan's total.
* Parallel execution ordering of the league's teams is non-deterministic.
* As such the base value for each team's scan operation is similarly
* non-deterministic.
*/
template< typename Type >
__device__ inline Type team_scan( const Type & value , Type * const global_accum ) const
{
Type * const base_data = (Type *) m_team_reduce ;
__syncthreads(); // Don't write in to shared data until all threads have entered this function
if ( 0 == threadIdx.y ) { base_data[0] = 0 ; }
base_data[ threadIdx.y + 1 ] = value ;
Impl::cuda_intra_block_reduce_scan<true,Impl::CudaJoinFunctor<Type>,void>( Impl::CudaJoinFunctor<Type>() , base_data + 1 );
if ( global_accum ) {
if ( blockDim.y == threadIdx.y + 1 ) {
base_data[ blockDim.y ] = atomic_fetch_add( global_accum , base_data[ blockDim.y ] );
}
__syncthreads(); // Wait for atomic
base_data[ threadIdx.y ] += base_data[ blockDim.y ] ;
}
return base_data[ threadIdx.y ];
}
/** \brief Intra-team exclusive prefix sum with team_rank() ordering.
*
* The highest rank thread can compute the reduction total as
* reduction_total = dev.team_scan( value ) + value ;
*/
template< typename Type >
__device__ inline Type team_scan( const Type & value ) const
{ return this->template team_scan<Type>( value , 0 ); }
//----------------------------------------
// Private for the driver
__device__ inline
CudaTeamMember( void * shared
, const int shared_begin
, const int shared_size
, const int arg_league_rank
, const int arg_league_size )
: m_team_reduce( shared )
, m_team_shared( ((char *)shared) + shared_begin , shared_size )
, m_league_rank( arg_league_rank )
, m_league_size( arg_league_size )
{}
#else
const execution_space::scratch_memory_space & team_shmem() const {return m_team_shared;}
int league_rank() const {return 0;}
int league_size() const {return 1;}
int team_rank() const {return 0;}
int team_size() const {return 1;}
void team_barrier() const {}
template<class ValueType>
void team_broadcast(ValueType& value, const int& thread_id) const {}
template< class JoinOp >
typename JoinOp::value_type team_reduce( const typename JoinOp::value_type & value
, const JoinOp & op ) const {return typename JoinOp::value_type();}
template< typename Type >
Type team_scan( const Type & value , Type * const global_accum ) const {return Type();}
template< typename Type >
Type team_scan( const Type & value ) const {return Type();}
//----------------------------------------
// Private for the driver
CudaTeamMember( void * shared
, const int shared_begin
, const int shared_end
, const int arg_league_rank
, const int arg_league_size );
#endif /* #if ! defined( __CUDA_ARCH__ ) */
};
} // namespace Impl
template< class Arg0 , class Arg1 >
class TeamPolicy< Arg0 , Arg1 , Kokkos::Cuda >
{
private:
enum { MAX_WARP = 8 };
const int m_league_size ;
const int m_team_size ;
const int m_vector_length ;
+ const size_t m_scratch_size ;
public:
//! Tag this class as a kokkos execution policy
typedef TeamPolicy execution_policy ;
//! Execution space of this execution policy
typedef Kokkos::Cuda execution_space ;
typedef typename
Impl::if_c< ! Impl::is_same< Kokkos::Cuda , Arg0 >::value , Arg0 , Arg1 >::type
work_tag ;
//----------------------------------------
template< class FunctorType >
inline static
int team_size_max( const FunctorType & functor )
{
int n = MAX_WARP * Impl::CudaTraits::WarpSize ;
for ( ; n ; n >>= 1 ) {
const int shmem_size =
/* for global reduce */ Impl::cuda_single_inter_block_reduce_scan_shmem<false,FunctorType,work_tag>( functor , n )
/* for team reduce */ + ( n + 2 ) * sizeof(double)
/* for team shared */ + Impl::FunctorTeamShmemSize< FunctorType >::value( functor , n );
if ( shmem_size < Impl::CudaTraits::SharedMemoryCapacity ) break ;
}
return n ;
}
template< class FunctorType >
static int team_size_recommended( const FunctorType & functor )
{ return team_size_max( functor ); }
template< class FunctorType >
static int team_size_recommended( const FunctorType & functor , const int vector_length)
{
int max = team_size_max( functor )/vector_length;
if(max<1) max = 1;
return max;
}
inline static
int vector_length_max()
{ return Impl::CudaTraits::WarpSize; }
//----------------------------------------
inline int vector_length() const { return m_vector_length ; }
inline int team_size() const { return m_team_size ; }
inline int league_size() const { return m_league_size ; }
+ inline size_t scratch_size() const { return m_scratch_size ; }
/** \brief Specify league size, request team size */
- TeamPolicy( execution_space & , int league_size_ , int team_size_request , int vector_length_request = 1 )
+ TeamPolicy( execution_space &
+ , int league_size_
+ , int team_size_request
+ , int vector_length_request = 1 )
: m_league_size( league_size_ )
, m_team_size( team_size_request )
- , m_vector_length ( vector_length_request )
+ , m_vector_length( vector_length_request )
+ , m_scratch_size ( 0 )
{
// Allow only power-of-two vector_length
- int check = 0;
- for(int k = 1; k <= vector_length_max(); k*=2)
- if(k == vector_length_request)
- check = 1;
- if(!check)
+ if ( ! Kokkos::Impl::is_integral_power_of_two( vector_length_request ) ) {
Impl::throw_runtime_exception( "Requested non-power-of-two vector length for TeamPolicy.");
+ }
// Make sure league size is permissable
if(league_size_ >= int(Impl::cuda_internal_maximum_grid_count()))
Impl::throw_runtime_exception( "Requested too large league_size for TeamPolicy on Cuda execution space.");
+
+ // Make sure total block size is permissable
+ if ( m_team_size * m_vector_length > 1024 ) {
+ Impl::throw_runtime_exception(std::string("Kokkos::TeamPolicy< Cuda > the team size is too large. Team size x vector length must be smaller than 1024."));
+ }
}
- TeamPolicy( int league_size_ , int team_size_request , int vector_length_request = 1 )
+ /** \brief Specify league size, request team size */
+ TeamPolicy( execution_space &
+ , int league_size_
+ , const Kokkos::AUTO_t & /* team_size_request */
+ , int vector_length_request = 1 )
+ : m_league_size( league_size_ )
+ , m_team_size( -1 )
+ , m_vector_length( vector_length_request )
+ , m_scratch_size ( 0 )
+ {
+ // Allow only power-of-two vector_length
+ if ( ! Kokkos::Impl::is_integral_power_of_two( vector_length_request ) ) {
+ Impl::throw_runtime_exception( "Requested non-power-of-two vector length for TeamPolicy.");
+ }
+
+ // Make sure league size is permissable
+ if(league_size_ >= int(Impl::cuda_internal_maximum_grid_count()))
+ Impl::throw_runtime_exception( "Requested too large league_size for TeamPolicy on Cuda execution space.");
+ }
+
+ TeamPolicy( int league_size_
+ , int team_size_request
+ , int vector_length_request = 1 )
: m_league_size( league_size_ )
, m_team_size( team_size_request )
, m_vector_length ( vector_length_request )
+ , m_scratch_size ( 0 )
{
// Allow only power-of-two vector_length
- int check = 0;
- for(int k = 1; k <= vector_length_max(); k*=2)
- if(k == vector_length_request)
- check = 1;
- if(!check)
+ if ( ! Kokkos::Impl::is_integral_power_of_two( vector_length_request ) ) {
Impl::throw_runtime_exception( "Requested non-power-of-two vector length for TeamPolicy.");
+ }
// Make sure league size is permissable
if(league_size_ >= int(Impl::cuda_internal_maximum_grid_count()))
Impl::throw_runtime_exception( "Requested too large league_size for TeamPolicy on Cuda execution space.");
+ // Make sure total block size is permissable
+ if ( m_team_size * m_vector_length > 1024 ) {
+ Impl::throw_runtime_exception(std::string("Kokkos::TeamPolicy< Cuda > the team size is too large. Team size x vector length must be smaller than 1024."));
+ }
+ }
+
+ TeamPolicy( int league_size_
+ , const Kokkos::AUTO_t & /* team_size_request */
+ , int vector_length_request = 1 )
+ : m_league_size( league_size_ )
+ , m_team_size( -1 )
+ , m_vector_length ( vector_length_request )
+ , m_scratch_size ( 0 )
+ {
+ // Allow only power-of-two vector_length
+ if ( ! Kokkos::Impl::is_integral_power_of_two( vector_length_request ) ) {
+ Impl::throw_runtime_exception( "Requested non-power-of-two vector length for TeamPolicy.");
+ }
+
+ // Make sure league size is permissable
+ if(league_size_ >= int(Impl::cuda_internal_maximum_grid_count()))
+ Impl::throw_runtime_exception( "Requested too large league_size for TeamPolicy on Cuda execution space.");
+ }
+
+ template<class MemorySpace>
+ TeamPolicy( int league_size_
+ , int team_size_request
+ , const Experimental::TeamScratchRequest<MemorySpace> & scratch_request )
+ : m_league_size( league_size_ )
+ , m_team_size( team_size_request )
+ , m_vector_length( 1 )
+ , m_scratch_size(scratch_request.total(team_size_request))
+ {
+ // Allow only power-of-two vector_length
+ if ( ! Kokkos::Impl::is_integral_power_of_two( m_vector_length ) ) {
+ Impl::throw_runtime_exception( "Requested non-power-of-two vector length for TeamPolicy.");
+ }
+
+ // Make sure league size is permissable
+ if(league_size_ >= int(Impl::cuda_internal_maximum_grid_count()))
+ Impl::throw_runtime_exception( "Requested too large league_size for TeamPolicy on Cuda execution space.");
+
+ // Make sure total block size is permissable
+ if ( m_team_size * m_vector_length > 1024 ) {
+ Impl::throw_runtime_exception(std::string("Kokkos::TeamPolicy< Cuda > the team size is too large. Team size x vector length must be smaller than 1024."));
+ }
+ }
+
+ template<class MemorySpace>
+ TeamPolicy( int league_size_
+ , const Kokkos::AUTO_t & /* team_size_request */
+ , const Experimental::TeamScratchRequest<MemorySpace> & scratch_request )
+ : m_league_size( league_size_ )
+ , m_team_size( 256 )
+ , m_vector_length ( 1 )
+ , m_scratch_size(scratch_request.total(2356))
+ {
+ // Allow only power-of-two vector_length
+ if ( ! Kokkos::Impl::is_integral_power_of_two( m_vector_length ) ) {
+ Impl::throw_runtime_exception( "Requested non-power-of-two vector length for TeamPolicy.");
+ }
+
+ // Make sure league size is permissable
+ if(league_size_ >= int(Impl::cuda_internal_maximum_grid_count()))
+ Impl::throw_runtime_exception( "Requested too large league_size for TeamPolicy on Cuda execution space.");
}
typedef Kokkos::Impl::CudaTeamMember member_type ;
};
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
template< class FunctorType , class Arg0 , class Arg1 , class Arg2 >
-class ParallelFor< FunctorType , Kokkos::RangePolicy< Arg0 , Arg1 , Arg2 , Kokkos::Cuda > >
+class ParallelFor< FunctorType
+ , Kokkos::RangePolicy< Arg0 , Arg1 , Arg2 , Kokkos::Cuda >
+ >
{
private:
typedef Kokkos::RangePolicy< Arg0 , Arg1 , Arg2 , Kokkos::Cuda > Policy ;
+ typedef typename Policy::member_type Member ;
+ typedef typename Policy::work_tag WorkTag ;
const FunctorType m_functor ;
const Policy m_policy ;
- ParallelFor();
- ParallelFor & operator = ( const ParallelFor & );
+ ParallelFor() = delete ;
+ ParallelFor & operator = ( const ParallelFor & ) = delete ;
- template< class Tag >
- inline static
- __device__
- void driver( const FunctorType & functor
- , typename Impl::enable_if< Impl::is_same< Tag , void >::value
- , typename Policy::member_type const & >::type iwork
- )
- { functor( iwork ); }
+ template< class TagType >
+ inline __device__
+ typename std::enable_if< std::is_same< TagType , void >::value >::type
+ exec_range( const Member i ) const
+ { m_functor( i ); }
- template< class Tag >
- inline static
- __device__
- void driver( const FunctorType & functor
- , typename Impl::enable_if< ! Impl::is_same< Tag , void >::value
- , typename Policy::member_type const & >::type iwork
- )
- { functor( Tag() , iwork ); }
+ template< class TagType >
+ inline __device__
+ typename std::enable_if< ! std::is_same< TagType , void >::value >::type
+ exec_range( const Member i ) const
+ { m_functor( TagType() , i ); }
public:
typedef FunctorType functor_type ;
inline
__device__
void operator()(void) const
{
- const typename Policy::member_type work_stride = blockDim.y * gridDim.x ;
- const typename Policy::member_type work_end = m_policy.end();
+ const Member work_stride = blockDim.y * gridDim.x ;
+ const Member work_end = m_policy.end();
- for ( typename Policy::member_type
+ for ( Member
iwork = m_policy.begin() + threadIdx.y + blockDim.y * blockIdx.x ;
iwork < work_end ;
iwork += work_stride ) {
- ParallelFor::template driver< typename Policy::work_tag >( m_functor, iwork );
+ this-> template exec_range< WorkTag >( iwork );
}
}
- ParallelFor( const FunctorType & functor ,
- const Policy & policy )
- : m_functor( functor )
- , m_policy( policy )
+ inline
+ void execute() const
{
+ const int nwork = m_policy.end() - m_policy.begin();
const dim3 block( 1 , CudaTraits::WarpSize * cuda_internal_maximum_warp_count(), 1);
- const dim3 grid( std::min( ( int( policy.end() - policy.begin() ) + block.y - 1 ) / block.y
- , cuda_internal_maximum_grid_count() )
- , 1 , 1);
+ const dim3 grid( std::min( ( nwork + block.y - 1 ) / block.y , cuda_internal_maximum_grid_count() ) , 1 , 1);
CudaParallelLaunch< ParallelFor >( *this , grid , block , 0 );
}
+
+ ParallelFor( const FunctorType & arg_functor ,
+ const Policy & arg_policy )
+ : m_functor( arg_functor )
+ , m_policy( arg_policy )
+ { }
};
template< class FunctorType , class Arg0 , class Arg1 >
-class ParallelFor< FunctorType , Kokkos::TeamPolicy< Arg0 , Arg1 , Kokkos::Cuda > >
+class ParallelFor< FunctorType
+ , Kokkos::TeamPolicy< Arg0 , Arg1 , Kokkos::Cuda >
+ >
{
private:
typedef Kokkos::TeamPolicy< Arg0 , Arg1 , Kokkos::Cuda > Policy ;
+ typedef typename Policy::member_type Member ;
+ typedef typename Policy::work_tag WorkTag ;
public:
typedef FunctorType functor_type ;
typedef Cuda::size_type size_type ;
private:
// Algorithmic constraints: blockDim.y is a power of two AND blockDim.y == blockDim.z == 1
// shared memory utilization:
//
// [ team reduce space ]
// [ team shared space ]
//
const FunctorType m_functor ;
- size_type m_shmem_begin ;
- size_type m_shmem_size ;
- size_type m_league_size ;
+ const size_type m_league_size ;
+ const size_type m_team_size ;
+ const size_type m_vector_size ;
+ const size_type m_shmem_begin ;
+ const size_type m_shmem_size ;
template< class TagType >
__device__ inline
- void driver( typename Impl::enable_if< Impl::is_same< TagType , void >::value ,
- const typename Policy::member_type & >::type member ) const
+ typename std::enable_if< std::is_same< TagType , void >::value >::type
+ exec_team( const Member & member ) const
{ m_functor( member ); }
template< class TagType >
__device__ inline
- void driver( typename Impl::enable_if< ! Impl::is_same< TagType , void >::value ,
- const typename Policy::member_type & >::type member ) const
+ typename std::enable_if< ! std::is_same< TagType , void >::value >::type
+ exec_team( const Member & member ) const
{ m_functor( TagType() , member ); }
public:
__device__ inline
void operator()(void) const
{
// Iterate this block through the league
for ( int league_rank = blockIdx.x ; league_rank < m_league_size ; league_rank += gridDim.x ) {
- ParallelFor::template driver< typename Policy::work_tag >(
+ this-> template exec_team< WorkTag >(
typename Policy::member_type( kokkos_impl_cuda_shared_memory<void>()
, m_shmem_begin
, m_shmem_size
, league_rank
, m_league_size ) );
}
}
+ inline
+ void execute() const
+ {
+ const int shmem_size_total = m_shmem_begin + m_shmem_size ;
+ const dim3 grid( int(m_league_size) , 1 , 1 );
+ const dim3 block( int(m_vector_size) , int(m_team_size) , 1 );
- ParallelFor( const FunctorType & functor
- , const Policy & policy
- )
- : m_functor( functor )
- , m_shmem_begin( sizeof(double) * ( policy.team_size() + 2 ) )
- , m_shmem_size( FunctorTeamShmemSize< FunctorType >::value( functor , policy.team_size() ) )
- , m_league_size( policy.league_size() )
- {
- // Functor's reduce memory, team scan memory, and team shared memory depend upon team size.
-
- const int shmem_size_total = m_shmem_begin + m_shmem_size ;
+ CudaParallelLaunch< ParallelFor >( *this, grid, block, shmem_size_total ); // copy to device and execute
- if ( CudaTraits::SharedMemoryCapacity < shmem_size_total ) {
- Kokkos::Impl::throw_runtime_exception(std::string("Kokkos::Impl::ParallelFor< Cuda > insufficient shared memory"));
}
- const dim3 grid( int(policy.league_size()) , 1 , 1 );
- const dim3 block( policy.vector_length() , policy.team_size() , 1 );
+ ParallelFor( const FunctorType & arg_functor
+ , const Policy & arg_policy
+ )
+ : m_functor( arg_functor )
+ , m_league_size( arg_policy.league_size() )
+ , m_team_size( 0 <= arg_policy.team_size() ? arg_policy.team_size() :
+ Kokkos::Impl::cuda_get_opt_block_size< ParallelFor >( arg_functor , arg_policy.vector_length(), arg_policy.scratch_size() ) / arg_policy.vector_length() )
+ , m_vector_size( arg_policy.vector_length() )
+ , m_shmem_begin( sizeof(double) * ( m_team_size + 2 ) )
+ , m_shmem_size( arg_policy.scratch_size() + FunctorTeamShmemSize< FunctorType >::value( m_functor , m_team_size ) )
+ {
+ // Functor's reduce memory, team scan memory, and team shared memory depend upon team size.
- CudaParallelLaunch< ParallelFor >( *this, grid, block, shmem_size_total ); // copy to device and execute
- }
+ const int shmem_size_total = m_shmem_begin + m_shmem_size ;
+
+ if ( CudaTraits::SharedMemoryCapacity < shmem_size_total ) {
+ Kokkos::Impl::throw_runtime_exception(std::string("Kokkos::Impl::ParallelFor< Cuda > insufficient shared memory"));
+ }
+
+ if ( m_team_size >
+ Kokkos::Impl::cuda_get_max_block_size< ParallelFor >
+ ( arg_functor , arg_policy.vector_length(), arg_policy.scratch_size() ) / arg_policy.vector_length()) {
+ Kokkos::Impl::throw_runtime_exception(std::string("Kokkos::Impl::ParallelFor< Cuda > requested too large team size."));
+ }
+ }
};
} // namespace Impl
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
template< class FunctorType , class Arg0 , class Arg1 , class Arg2 >
-class ParallelReduce< FunctorType , Kokkos::RangePolicy< Arg0 , Arg1 , Arg2 , Kokkos::Cuda > >
+class ParallelReduce< FunctorType
+ , Kokkos::RangePolicy< Arg0 , Arg1 , Arg2 , Kokkos::Cuda >
+ >
{
private:
typedef Kokkos::RangePolicy<Arg0,Arg1,Arg2, Kokkos::Cuda > Policy ;
- typedef typename Policy::WorkRange work_range ;
- typedef typename Policy::work_tag work_tag ;
- typedef Kokkos::Impl::FunctorValueTraits< FunctorType , work_tag > ValueTraits ;
- typedef Kokkos::Impl::FunctorValueInit< FunctorType , work_tag > ValueInit ;
+
+ typedef typename Policy::WorkRange WorkRange ;
+ typedef typename Policy::work_tag WorkTag ;
+ typedef typename Policy::member_type Member ;
+
+ typedef Kokkos::Impl::FunctorValueTraits< FunctorType, WorkTag > ValueTraits ;
+ typedef Kokkos::Impl::FunctorValueInit< FunctorType, WorkTag > ValueInit ;
public:
typedef typename ValueTraits::pointer_type pointer_type ;
typedef typename ValueTraits::value_type value_type ;
typedef typename ValueTraits::reference_type reference_type ;
typedef FunctorType functor_type ;
typedef Cuda::size_type size_type ;
// Algorithmic constraints: blockSize is a power of two AND blockDim.y == blockDim.z == 1
- const FunctorType m_functor ;
- const Policy m_policy ;
- size_type * m_scratch_space ;
- size_type * m_scratch_flags ;
- size_type * m_unified_space ;
+ const FunctorType m_functor ;
+ const Policy m_policy ;
+ const pointer_type m_result_ptr ;
+ size_type * m_scratch_space ;
+ size_type * m_scratch_flags ;
+ size_type * m_unified_space ;
- // Determine block size constrained by shared memory:
- static inline
- unsigned local_block_size( const FunctorType & f )
- {
- unsigned n = CudaTraits::WarpSize * 8 ;
- while ( n && CudaTraits::SharedMemoryCapacity < cuda_single_inter_block_reduce_scan_shmem<false,FunctorType,work_tag>( f , n ) ) { n >>= 1 ; }
- return n ;
- }
+ template< class TagType >
+ __device__ inline
+ typename std::enable_if< std::is_same< TagType , void >::value >::type
+ exec_range( const Member & i , reference_type update ) const
+ { m_functor( i , update ); }
- template< class Tag >
- inline static
- __device__
- void driver( const FunctorType & functor
- , typename Impl::enable_if< Impl::is_same< Tag , void >::value
- , typename Policy::member_type const & >::type iwork
- , reference_type value )
- { functor( iwork , value ); }
+ template< class TagType >
+ __device__ inline
+ typename std::enable_if< ! std::is_same< TagType , void >::value >::type
+ exec_range( const Member & i , reference_type update ) const
+ { m_functor( TagType() , i , update ); }
- template< class Tag >
- inline static
- __device__
- void driver( const FunctorType & functor
- , typename Impl::enable_if< ! Impl::is_same< Tag , void >::value
- , typename Policy::member_type const & >::type iwork
- , reference_type value )
- { functor( Tag() , iwork , value ); }
+#if ! defined( KOKKOS_EXPERIMENTAL_CUDA_SHFL_REDUCTION )
-#ifndef KOKKOS_EXPERIMENTAL_CUDA_SHFL_REDUCTION
__device__ inline
void operator()(void) const
{
const integral_nonzero_constant< size_type , ValueTraits::StaticValueSize / sizeof(size_type) >
word_count( ValueTraits::value_size( m_functor ) / sizeof(size_type) );
{
reference_type value =
ValueInit::init( m_functor , kokkos_impl_cuda_shared_memory<size_type>() + threadIdx.y * word_count.value );
// Number of blocks is bounded so that the reduction can be limited to two passes.
// Each thread block is given an approximately equal amount of work to perform.
// Accumulate the values for this block.
// The accumulation ordering does not match the final pass, but is arithmatically equivalent.
- const work_range range( m_policy , blockIdx.x , gridDim.x );
+ const WorkRange range( m_policy , blockIdx.x , gridDim.x );
- for ( typename work_range::member_type iwork = range.begin() + threadIdx.y , iwork_end = range.end() ;
+ for ( Member iwork = range.begin() + threadIdx.y , iwork_end = range.end() ;
iwork < iwork_end ; iwork += blockDim.y ) {
- ParallelReduce::template driver< work_tag >( m_functor , iwork , value );
+ this-> template exec_range< WorkTag >( iwork , value );
}
}
// Reduce with final value at blockDim.y - 1 location.
- if ( cuda_single_inter_block_reduce_scan<false,FunctorType,work_tag>(
+ if ( cuda_single_inter_block_reduce_scan<false,FunctorType,WorkTag>(
m_functor , blockIdx.x , gridDim.x ,
kokkos_impl_cuda_shared_memory<size_type>() , m_scratch_space , m_scratch_flags ) ) {
// This is the final block with the final result at the final threads' location
size_type * const shared = kokkos_impl_cuda_shared_memory<size_type>() + ( blockDim.y - 1 ) * word_count.value ;
size_type * const global = m_unified_space ? m_unified_space : m_scratch_space ;
if ( threadIdx.y == 0 ) {
- Kokkos::Impl::FunctorFinal< FunctorType , work_tag >::final( m_functor , shared );
+ Kokkos::Impl::FunctorFinal< FunctorType , WorkTag >::final( m_functor , shared );
}
if ( CudaTraits::WarpSize < word_count.value ) { __syncthreads(); }
for ( unsigned i = threadIdx.y ; i < word_count.value ; i += blockDim.y ) { global[i] = shared[i]; }
}
}
-#else
+
+#else /* defined( KOKKOS_EXPERIMENTAL_CUDA_SHFL_REDUCTION ) */
+
__device__ inline
void operator()(void) const
{
value_type value = 0;
// Number of blocks is bounded so that the reduction can be limited to two passes.
// Each thread block is given an approximately equal amount of work to perform.
// Accumulate the values for this block.
// The accumulation ordering does not match the final pass, but is arithmatically equivalent.
const Policy range( m_policy , blockIdx.x , gridDim.x );
- for ( typename Policy::member_type iwork = range.begin() + threadIdx.y , iwork_end = range.end() ;
+ for ( Member iwork = range.begin() + threadIdx.y , iwork_end = range.end() ;
iwork < iwork_end ; iwork += blockDim.y ) {
- ParallelReduce::template driver< work_tag >( m_functor , iwork , value );
+ this-> template exec_range< WorkTag >( iwork , value );
}
pointer_type const result = (pointer_type) (m_unified_space ? m_unified_space : m_scratch_space) ;
int max_active_thread = range.end()-range.begin() < blockDim.y ? range.end() - range.begin():blockDim.y;
max_active_thread = max_active_thread == 0?blockDim.y:max_active_thread;
if(Impl::cuda_inter_block_reduction<FunctorType,Impl::JoinAdd<value_type> >
(value,Impl::JoinAdd<value_type>(),m_scratch_space,result,m_scratch_flags,max_active_thread)) {
const unsigned id = threadIdx.y*blockDim.x + threadIdx.x;
if(id==0) {
- Kokkos::Impl::FunctorFinal< FunctorType , work_tag >::final( m_functor , (void*) &value );
+ Kokkos::Impl::FunctorFinal< FunctorType , WorkTag >::final( m_functor , (void*) &value );
*result = value;
}
}
}
+
#endif
- template< class HostViewType >
- ParallelReduce( const FunctorType & functor
- , const Policy & policy
- , const HostViewType & result
- )
- : m_functor( functor )
- , m_policy( policy )
- , m_scratch_space( 0 )
- , m_scratch_flags( 0 )
- , m_unified_space( 0 )
- {
- const int block_size = local_block_size( functor );
- const int block_count = std::min( int(block_size)
- , ( int(policy.end() - policy.begin()) + block_size - 1 ) / block_size
- );
- m_scratch_space = cuda_internal_scratch_space( ValueTraits::value_size( functor ) * block_count );
- m_scratch_flags = cuda_internal_scratch_flags( sizeof(size_type) );
- m_unified_space = cuda_internal_scratch_unified( ValueTraits::value_size( functor ) );
+ // Determine block size constrained by shared memory:
+ static inline
+ unsigned local_block_size( const FunctorType & f )
+ {
+ unsigned n = CudaTraits::WarpSize * 8 ;
+ while ( n && CudaTraits::SharedMemoryCapacity < cuda_single_inter_block_reduce_scan_shmem<false,FunctorType,WorkTag>( f , n ) ) { n >>= 1 ; }
+ return n ;
+ }
+
+ inline
+ void execute()
+ {
+ const int block_size = local_block_size( m_functor );
+
+ m_scratch_space = cuda_internal_scratch_space( ValueTraits::value_size( m_functor ) * block_size /* block_size == max block_count */ );
+ m_scratch_flags = cuda_internal_scratch_flags( sizeof(size_type) );
+ m_unified_space = cuda_internal_scratch_unified( ValueTraits::value_size( m_functor ) );
+
+ const int nwork = m_policy.end() - m_policy.begin();
+ // REQUIRED ( 1 , N , 1 )
+ const dim3 block( 1 , block_size , 1 );
+ // Required grid.x <= block.y
+ const dim3 grid( std::min( int(block.y) , int( ( nwork + block.y - 1 ) / block.y ) ) , 1 , 1 );
- const dim3 grid( block_count , 1 , 1 );
- const dim3 block( 1 , block_size , 1 ); // REQUIRED DIMENSIONS ( 1 , N , 1 )
#ifdef KOKKOS_EXPERIMENTAL_CUDA_SHFL_REDUCTION
const int shmem = 0;
#else
- const int shmem = cuda_single_inter_block_reduce_scan_shmem<false,FunctorType,work_tag>( m_functor , block.y );
+ const int shmem = cuda_single_inter_block_reduce_scan_shmem<false,FunctorType,WorkTag>( m_functor , block.y );
#endif
CudaParallelLaunch< ParallelReduce >( *this, grid, block, shmem ); // copy to device and execute
Cuda::fence();
- if ( result.ptr_on_device() ) {
+ if ( m_result_ptr ) {
if ( m_unified_space ) {
const int count = ValueTraits::value_count( m_functor );
- for ( int i = 0 ; i < count ; ++i ) { result.ptr_on_device()[i] = pointer_type(m_unified_space)[i] ; }
+ for ( int i = 0 ; i < count ; ++i ) { m_result_ptr[i] = pointer_type(m_unified_space)[i] ; }
}
else {
const int size = ValueTraits::value_size( m_functor );
- DeepCopy<HostSpace,CudaSpace>( result.ptr_on_device() , m_scratch_space , size );
+ DeepCopy<HostSpace,CudaSpace>( m_result_ptr , m_scratch_space , size );
}
}
}
+
+ template< class HostViewType >
+ ParallelReduce( const FunctorType & arg_functor
+ , const Policy & arg_policy
+ , const HostViewType & arg_result
+ )
+ : m_functor( arg_functor )
+ , m_policy( arg_policy )
+ , m_result_ptr( arg_result.ptr_on_device() )
+ , m_scratch_space( 0 )
+ , m_scratch_flags( 0 )
+ , m_unified_space( 0 )
+ { }
};
+//----------------------------------------------------------------------------
+
template< class FunctorType , class Arg0 , class Arg1 >
-class ParallelReduce< FunctorType , Kokkos::TeamPolicy< Arg0 , Arg1 , Kokkos::Cuda > >
+class ParallelReduce< FunctorType
+ , Kokkos::TeamPolicy< Arg0 , Arg1 , Kokkos::Cuda >
+ >
{
private:
- typedef Kokkos::TeamPolicy<Arg0,Arg1,Kokkos::Cuda> Policy ;
- typedef typename Policy::work_tag work_tag ;
- typedef Kokkos::Impl::FunctorValueTraits< FunctorType , work_tag > ValueTraits ;
- typedef Kokkos::Impl::FunctorValueInit< FunctorType , work_tag > ValueInit ;
- typedef typename ValueTraits::pointer_type pointer_type ;
- typedef typename ValueTraits::reference_type reference_type ;
+ typedef Kokkos::TeamPolicy<Arg0,Arg1,Kokkos::Cuda> Policy ;
+ typedef typename Policy::member_type Member ;
+ typedef typename Policy::work_tag WorkTag ;
+
+ typedef Kokkos::Impl::FunctorValueTraits< FunctorType, WorkTag > ValueTraits ;
+ typedef Kokkos::Impl::FunctorValueInit< FunctorType, WorkTag > ValueInit ;
+
+ typedef typename ValueTraits::pointer_type pointer_type ;
+ typedef typename ValueTraits::reference_type reference_type ;
public:
typedef FunctorType functor_type ;
typedef Cuda::size_type size_type ;
private:
// Algorithmic constraints: blockDim.y is a power of two AND blockDim.y == blockDim.z == 1
// shared memory utilization:
//
// [ global reduce space ]
// [ team reduce space ]
// [ team shared space ]
//
- const FunctorType m_functor ;
- size_type * m_scratch_space ;
- size_type * m_scratch_flags ;
- size_type * m_unified_space ;
- size_type m_team_begin ;
- size_type m_shmem_begin ;
- size_type m_shmem_size ;
- size_type m_league_size ;
+ const FunctorType m_functor ;
+ const pointer_type m_result_ptr ;
+ size_type * m_scratch_space ;
+ size_type * m_scratch_flags ;
+ size_type * m_unified_space ;
+ size_type m_team_begin ;
+ size_type m_shmem_begin ;
+ size_type m_shmem_size ;
+ const size_type m_league_size ;
+ const size_type m_team_size ;
template< class TagType >
__device__ inline
- void driver( typename Impl::enable_if< Impl::is_same< TagType , void >::value ,
- const typename Policy::member_type & >::type member
- , reference_type update ) const
+ typename std::enable_if< std::is_same< TagType , void >::value >::type
+ exec_team( const Member & member , reference_type update ) const
{ m_functor( member , update ); }
template< class TagType >
__device__ inline
- void driver( typename Impl::enable_if< ! Impl::is_same< TagType , void >::value ,
- const typename Policy::member_type & >::type member
- , reference_type update ) const
+ typename std::enable_if< ! std::is_same< TagType , void >::value >::type
+ exec_team( const Member & member , reference_type update ) const
{ m_functor( TagType() , member , update ); }
public:
__device__ inline
void operator()(void) const
{
const integral_nonzero_constant< size_type , ValueTraits::StaticValueSize / sizeof(size_type) >
word_count( ValueTraits::value_size( m_functor ) / sizeof(size_type) );
reference_type value =
ValueInit::init( m_functor , kokkos_impl_cuda_shared_memory<size_type>() + threadIdx.y * word_count.value );
// Iterate this block through the league
for ( int league_rank = blockIdx.x ; league_rank < m_league_size ; league_rank += gridDim.x ) {
-
- ParallelReduce::template driver< work_tag >
- ( typename Policy::member_type( kokkos_impl_cuda_shared_memory<char>() + m_team_begin
+ this-> template exec_team< WorkTag >
+ ( Member( kokkos_impl_cuda_shared_memory<char>() + m_team_begin
, m_shmem_begin
, m_shmem_size
, league_rank
, m_league_size )
, value );
}
// Reduce with final value at blockDim.y - 1 location.
- if ( cuda_single_inter_block_reduce_scan<false,FunctorType,work_tag>(
+ if ( cuda_single_inter_block_reduce_scan<false,FunctorType,WorkTag>(
m_functor , blockIdx.x , gridDim.x ,
kokkos_impl_cuda_shared_memory<size_type>() , m_scratch_space , m_scratch_flags ) ) {
// This is the final block with the final result at the final threads' location
size_type * const shared = kokkos_impl_cuda_shared_memory<size_type>() + ( blockDim.y - 1 ) * word_count.value ;
size_type * const global = m_unified_space ? m_unified_space : m_scratch_space ;
if ( threadIdx.y == 0 ) {
- Kokkos::Impl::FunctorFinal< FunctorType , work_tag >::final( m_functor , shared );
+ Kokkos::Impl::FunctorFinal< FunctorType , WorkTag >::final( m_functor , shared );
}
if ( CudaTraits::WarpSize < word_count.value ) { __syncthreads(); }
for ( unsigned i = threadIdx.y ; i < word_count.value ; i += blockDim.y ) { global[i] = shared[i]; }
}
}
+ inline
+ void execute()
+ {
+ const int block_count = std::min( m_league_size , m_team_size );
+
+ m_scratch_space = cuda_internal_scratch_space( ValueTraits::value_size( m_functor ) * block_count );
+ m_scratch_flags = cuda_internal_scratch_flags( sizeof(size_type) );
+ m_unified_space = cuda_internal_scratch_unified( ValueTraits::value_size( m_functor ) );
+
+ // REQUIRED DIMENSIONS ( 1 , N , 1 )
+ const dim3 block( 1 , m_team_size , 1 );
+ const dim3 grid( std::min( int(m_league_size) , int(m_team_size) ) , 1 , 1 );
+ const int shmem_size_total = m_team_begin + m_shmem_begin + m_shmem_size ;
+
+ CudaParallelLaunch< ParallelReduce >( *this, grid, block, shmem_size_total ); // copy to device and execute
+
+ Cuda::fence();
+
+ if ( m_result_ptr ) {
+ if ( m_unified_space ) {
+ const int count = ValueTraits::value_count( m_functor );
+ for ( int i = 0 ; i < count ; ++i ) { m_result_ptr[i] = pointer_type(m_unified_space)[i] ; }
+ }
+ else {
+ const int size = ValueTraits::value_size( m_functor );
+ DeepCopy<HostSpace,CudaSpace>( m_result_ptr, m_scratch_space, size );
+ }
+ }
+ }
template< class HostViewType >
- ParallelReduce( const FunctorType & functor
- , const Policy & policy
- , const HostViewType & result
+ ParallelReduce( const FunctorType & arg_functor
+ , const Policy & arg_policy
+ , const HostViewType & arg_result
)
- : m_functor( functor )
+ : m_functor( arg_functor )
+ , m_result_ptr( arg_result.ptr_on_device() )
, m_scratch_space( 0 )
, m_scratch_flags( 0 )
, m_unified_space( 0 )
- , m_team_begin( cuda_single_inter_block_reduce_scan_shmem<false,FunctorType,work_tag>( functor , policy.team_size() ) )
- , m_shmem_begin( sizeof(double) * ( policy.team_size() + 2 ) )
- , m_shmem_size( FunctorTeamShmemSize< FunctorType >::value( functor , policy.team_size() ) )
- , m_league_size( policy.league_size() )
+ , m_team_begin( 0 )
+ , m_shmem_begin( 0 )
+ , m_shmem_size( 0 )
+ , m_league_size( arg_policy.league_size() )
+ , m_team_size( 0 <= arg_policy.team_size() ? arg_policy.team_size() :
+ Kokkos::Impl::cuda_get_opt_block_size< ParallelReduce >( arg_functor , arg_policy.vector_length(), arg_policy.scratch_size() ) / arg_policy.vector_length() )
{
+ // Return Init value if the number of worksets is zero
+ if( arg_policy.league_size() == 0) {
+ ValueInit::init( m_functor , arg_result.ptr_on_device() );
+ return ;
+ }
+
+ m_team_begin = cuda_single_inter_block_reduce_scan_shmem<false,FunctorType,WorkTag>( arg_functor , m_team_size );
+ m_shmem_begin = sizeof(double) * ( m_team_size + 2 );
+ m_shmem_size = arg_policy.scratch_size() + FunctorTeamShmemSize< FunctorType >::value( arg_functor , m_team_size );
// The global parallel_reduce does not support vector_length other than 1 at the moment
- if(policy.vector_length() > 1)
+ if( arg_policy.vector_length() > 1)
Impl::throw_runtime_exception( "Kokkos::parallel_reduce with a TeamPolicy using a vector length of greater than 1 is not currently supported for CUDA.");
+ if( m_team_size < 32)
+ Impl::throw_runtime_exception( "Kokkos::parallel_reduce with a TeamPolicy using a team_size smaller than 32 is not currently supported with CUDA.");
+
// Functor's reduce memory, team scan memory, and team shared memory depend upon team size.
const int shmem_size_total = m_team_begin + m_shmem_begin + m_shmem_size ;
- const int not_power_of_two = 0 != ( policy.team_size() & ( policy.team_size() - 1 ) );
- if ( not_power_of_two || CudaTraits::SharedMemoryCapacity < shmem_size_total ) {
+ if ( ! Kokkos::Impl::is_integral_power_of_two( m_team_size ) ||
+ CudaTraits::SharedMemoryCapacity < shmem_size_total ) {
Kokkos::Impl::throw_runtime_exception(std::string("Kokkos::Impl::ParallelReduce< Cuda > bad team size"));
}
- const int block_count = std::min( policy.league_size() , policy.team_size() );
-
- m_scratch_space = cuda_internal_scratch_space( ValueTraits::value_size( functor ) * block_count );
- m_scratch_flags = cuda_internal_scratch_flags( sizeof(size_type) );
- m_unified_space = cuda_internal_scratch_unified( ValueTraits::value_size( functor ) );
-
- const dim3 grid( block_count , 1 , 1 );
- const dim3 block( 1 , policy.team_size() , 1 ); // REQUIRED DIMENSIONS ( 1 , N , 1 )
-
- CudaParallelLaunch< ParallelReduce >( *this, grid, block, shmem_size_total ); // copy to device and execute
-
- Cuda::fence();
-
- if ( result.ptr_on_device() ) {
- if ( m_unified_space ) {
- const int count = ValueTraits::value_count( m_functor );
- for ( int i = 0 ; i < count ; ++i ) { result.ptr_on_device()[i] = pointer_type(m_unified_space)[i] ; }
- }
- else {
- const int size = ValueTraits::value_size( m_functor );
- DeepCopy<HostSpace,CudaSpace>( result.ptr_on_device() , m_scratch_space , size );
- }
+ if ( m_team_size >
+ Kokkos::Impl::cuda_get_max_block_size< ParallelReduce >
+ ( arg_functor , arg_policy.vector_length(), arg_policy.scratch_size() ) / arg_policy.vector_length()) {
+ Kokkos::Impl::throw_runtime_exception(std::string("Kokkos::Impl::ParallelReduce< Cuda > requested too large team size."));
}
+
}
};
} // namespace Impl
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
template< class FunctorType , class Arg0 , class Arg1 , class Arg2 >
-class ParallelScan< FunctorType , Kokkos::RangePolicy< Arg0 , Arg1 , Arg2 , Kokkos::Cuda > >
+class ParallelScan< FunctorType
+ , Kokkos::RangePolicy< Arg0 , Arg1 , Arg2 , Kokkos::Cuda >
+ >
{
private:
- typedef Kokkos::RangePolicy<Arg0,Arg1,Arg2, Kokkos::Cuda > Policy ;
- typedef typename Policy::WorkRange work_range ;
- typedef typename Policy::work_tag work_tag ;
- typedef Kokkos::Impl::FunctorValueTraits< FunctorType , work_tag > ValueTraits ;
- typedef Kokkos::Impl::FunctorValueInit< FunctorType , work_tag > ValueInit ;
- typedef Kokkos::Impl::FunctorValueOps< FunctorType , work_tag > ValueOps ;
+ typedef Kokkos::RangePolicy<Arg0,Arg1,Arg2,Kokkos::Cuda> Policy ;
+ typedef typename Policy::member_type Member ;
+ typedef typename Policy::work_tag WorkTag ;
+ typedef typename Policy::WorkRange WorkRange ;
+
+ typedef Kokkos::Impl::FunctorValueTraits< FunctorType, WorkTag > ValueTraits ;
+ typedef Kokkos::Impl::FunctorValueInit< FunctorType, WorkTag > ValueInit ;
+ typedef Kokkos::Impl::FunctorValueOps< FunctorType, WorkTag > ValueOps ;
public:
+
typedef typename ValueTraits::pointer_type pointer_type ;
typedef typename ValueTraits::reference_type reference_type ;
typedef FunctorType functor_type ;
typedef Cuda::size_type size_type ;
+private:
+
// Algorithmic constraints:
// (a) blockDim.y is a power of two
// (b) blockDim.y == blockDim.z == 1
// (c) gridDim.x <= blockDim.y * blockDim.y
// (d) gridDim.y == gridDim.z == 1
- // Determine block size constrained by shared memory:
- static inline
- unsigned local_block_size( const FunctorType & f )
- {
- // blockDim.y must be power of two = 128 (4 warps) or 256 (8 warps) or 512 (16 warps)
- // gridDim.x <= blockDim.y * blockDim.y
- //
- // 4 warps was 10% faster than 8 warps and 20% faster than 16 warps in unit testing
-
- unsigned n = CudaTraits::WarpSize * 4 ;
- while ( n && CudaTraits::SharedMemoryCapacity < cuda_single_inter_block_reduce_scan_shmem<false,FunctorType,work_tag>( f , n ) ) { n >>= 1 ; }
- return n ;
- }
-
const FunctorType m_functor ;
const Policy m_policy ;
size_type * m_scratch_space ;
size_type * m_scratch_flags ;
- size_type m_final ;
-
- template< class Tag >
- inline static
- __device__
- void driver( const FunctorType & functor
- , typename Impl::enable_if< Impl::is_same< Tag , void >::value
- , typename Policy::member_type const & >::type iwork
- , reference_type value
- , const bool final )
- { functor( iwork , value , final ); }
-
- template< class Tag >
- inline static
- __device__
- void driver( const FunctorType & functor
- , typename Impl::enable_if< ! Impl::is_same< Tag , void >::value
- , typename Policy::member_type const & >::type iwork
- , reference_type value
- , const bool final )
- { functor( Tag() , iwork , value , final ); }
+ size_type m_final ;
+
+ template< class TagType >
+ __device__ inline
+ typename std::enable_if< std::is_same< TagType , void >::value >::type
+ exec_range( const Member & i , reference_type update , const bool final ) const
+ { m_functor( i , update , final ); }
+
+ template< class TagType >
+ __device__ inline
+ typename std::enable_if< ! std::is_same< TagType , void >::value >::type
+ exec_range( const Member & i , reference_type update , const bool final ) const
+ { m_functor( TagType() , i , update , final ); }
//----------------------------------------
__device__ inline
void initial(void) const
{
const integral_nonzero_constant< size_type , ValueTraits::StaticValueSize / sizeof(size_type) >
word_count( ValueTraits::value_size( m_functor ) / sizeof(size_type) );
size_type * const shared_value = kokkos_impl_cuda_shared_memory<size_type>() + word_count.value * threadIdx.y ;
ValueInit::init( m_functor , shared_value );
// Number of blocks is bounded so that the reduction can be limited to two passes.
// Each thread block is given an approximately equal amount of work to perform.
// Accumulate the values for this block.
// The accumulation ordering does not match the final pass, but is arithmatically equivalent.
- const work_range range( m_policy , blockIdx.x , gridDim.x );
+ const WorkRange range( m_policy , blockIdx.x , gridDim.x );
- for ( typename Policy::member_type iwork = range.begin() + threadIdx.y , iwork_end = range.end() ;
+ for ( Member iwork = range.begin() + threadIdx.y , iwork_end = range.end() ;
iwork < iwork_end ; iwork += blockDim.y ) {
- ParallelScan::template driver< work_tag >
- ( m_functor , iwork , ValueOps::reference( shared_value ) , false );
+ this-> template exec_range< WorkTag >( iwork , ValueOps::reference( shared_value ) , false );
}
// Reduce and scan, writing out scan of blocks' totals and block-groups' totals.
// Blocks' scan values are written to 'blockIdx.x' location.
// Block-groups' scan values are at: i = ( j * blockDim.y - 1 ) for i < gridDim.x
- cuda_single_inter_block_reduce_scan<true,FunctorType,work_tag>( m_functor , blockIdx.x , gridDim.x , kokkos_impl_cuda_shared_memory<size_type>() , m_scratch_space , m_scratch_flags );
+ cuda_single_inter_block_reduce_scan<true,FunctorType,WorkTag>( m_functor , blockIdx.x , gridDim.x , kokkos_impl_cuda_shared_memory<size_type>() , m_scratch_space , m_scratch_flags );
}
//----------------------------------------
__device__ inline
void final(void) const
{
const integral_nonzero_constant< size_type , ValueTraits::StaticValueSize / sizeof(size_type) >
word_count( ValueTraits::value_size( m_functor ) / sizeof(size_type) );
// Use shared memory as an exclusive scan: { 0 , value[0] , value[1] , value[2] , ... }
size_type * const shared_data = kokkos_impl_cuda_shared_memory<size_type>();
size_type * const shared_prefix = shared_data + word_count.value * threadIdx.y ;
size_type * const shared_accum = shared_data + word_count.value * ( blockDim.y + 1 );
// Starting value for this thread block is the previous block's total.
if ( blockIdx.x ) {
size_type * const block_total = m_scratch_space + word_count.value * ( blockIdx.x - 1 );
for ( unsigned i = threadIdx.y ; i < word_count.value ; ++i ) { shared_accum[i] = block_total[i] ; }
}
else if ( 0 == threadIdx.y ) {
ValueInit::init( m_functor , shared_accum );
}
- const work_range range( m_policy , blockIdx.x , gridDim.x );
+ const WorkRange range( m_policy , blockIdx.x , gridDim.x );
for ( typename Policy::member_type iwork_base = range.begin(); iwork_base < range.end() ; iwork_base += blockDim.y ) {
const typename Policy::member_type iwork = iwork_base + threadIdx.y ;
__syncthreads(); // Don't overwrite previous iteration values until they are used
ValueInit::init( m_functor , shared_prefix + word_count.value );
// Copy previous block's accumulation total into thread[0] prefix and inclusive scan value of this block
for ( unsigned i = threadIdx.y ; i < word_count.value ; ++i ) {
shared_data[i + word_count.value] = shared_data[i] = shared_accum[i] ;
}
if ( CudaTraits::WarpSize < word_count.value ) { __syncthreads(); } // Protect against large scan values.
// Call functor to accumulate inclusive scan value for this work item
if ( iwork < range.end() ) {
- ParallelScan::template driver< work_tag >
- ( m_functor , iwork , ValueOps::reference( shared_prefix + word_count.value ) , false );
+ this-> template exec_range< WorkTag >( iwork , ValueOps::reference( shared_prefix + word_count.value ) , false );
}
// Scan block values into locations shared_data[1..blockDim.y]
- cuda_intra_block_reduce_scan<true,FunctorType,work_tag>( m_functor , ValueTraits::pointer_type(shared_data+word_count.value) );
+ cuda_intra_block_reduce_scan<true,FunctorType,WorkTag>( m_functor , ValueTraits::pointer_type(shared_data+word_count.value) );
{
size_type * const block_total = shared_data + word_count.value * blockDim.y ;
for ( unsigned i = threadIdx.y ; i < word_count.value ; ++i ) { shared_accum[i] = block_total[i]; }
}
// Call functor with exclusive scan value
if ( iwork < range.end() ) {
- ParallelScan::template driver< work_tag >
- ( m_functor , iwork , ValueOps::reference( shared_prefix ) , true );
+ this-> template exec_range< WorkTag >( iwork , ValueOps::reference( shared_prefix ) , true );
}
}
}
+public:
+
//----------------------------------------
__device__ inline
void operator()(void) const
{
if ( ! m_final ) {
initial();
}
else {
final();
}
}
- ParallelScan( const FunctorType & functor ,
- const Policy & policy )
- : m_functor( functor )
- , m_policy( policy )
- , m_scratch_space( 0 )
- , m_scratch_flags( 0 )
- , m_final( false )
- {
- enum { GridMaxComputeCapability_2x = 0x0ffff };
+ // Determine block size constrained by shared memory:
+ static inline
+ unsigned local_block_size( const FunctorType & f )
+ {
+ // blockDim.y must be power of two = 128 (4 warps) or 256 (8 warps) or 512 (16 warps)
+ // gridDim.x <= blockDim.y * blockDim.y
+ //
+ // 4 warps was 10% faster than 8 warps and 20% faster than 16 warps in unit testing
- const int block_size = local_block_size( functor );
+ unsigned n = CudaTraits::WarpSize * 4 ;
+ while ( n && CudaTraits::SharedMemoryCapacity < cuda_single_inter_block_reduce_scan_shmem<false,FunctorType,WorkTag>( f , n ) ) { n >>= 1 ; }
+ return n ;
+ }
- const int grid_max = ( block_size * block_size ) < GridMaxComputeCapability_2x ?
- ( block_size * block_size ) : GridMaxComputeCapability_2x ;
+ inline
+ void execute()
+ {
+ enum { GridMaxComputeCapability_2x = 0x0ffff };
- // At most 'max_grid' blocks:
- const int nwork = policy.end() - policy.begin();
- const int max_grid = std::min( int(grid_max) , int(( nwork + block_size - 1 ) / block_size ));
+ const int block_size = local_block_size( m_functor );
- // How much work per block:
- const int work_per_block = ( nwork + max_grid - 1 ) / max_grid ;
+ const int grid_max =
+ ( block_size * block_size ) < GridMaxComputeCapability_2x ?
+ ( block_size * block_size ) : GridMaxComputeCapability_2x ;
- // How many block are really needed for this much work:
- const dim3 grid( ( nwork + work_per_block - 1 ) / work_per_block , 1 , 1 );
- const dim3 block( 1 , block_size , 1 ); // REQUIRED DIMENSIONS ( 1 , N , 1 )
- const int shmem = ValueTraits::value_size( functor ) * ( block_size + 2 );
+ // At most 'max_grid' blocks:
+ const int nwork = m_policy.end() - m_policy.begin();
+ const int max_grid = std::min( int(grid_max) , int(( nwork + block_size - 1 ) / block_size ));
- m_scratch_space = cuda_internal_scratch_space( ValueTraits::value_size( functor ) * grid.x );
- m_scratch_flags = cuda_internal_scratch_flags( sizeof(size_type) * 1 );
+ // How much work per block:
+ const int work_per_block = ( nwork + max_grid - 1 ) / max_grid ;
- m_final = false ;
- CudaParallelLaunch< ParallelScan >( *this, grid, block, shmem ); // copy to device and execute
+ // How many block are really needed for this much work:
+ const int grid_x = ( nwork + work_per_block - 1 ) / work_per_block ;
- m_final = true ;
- CudaParallelLaunch< ParallelScan >( *this, grid, block, shmem ); // copy to device and execute
- }
+ m_scratch_space = cuda_internal_scratch_space( ValueTraits::value_size( m_functor ) * grid_x );
+ m_scratch_flags = cuda_internal_scratch_flags( sizeof(size_type) * 1 );
+
+ const dim3 grid( grid_x , 1 , 1 );
+ const dim3 block( 1 , block_size , 1 ); // REQUIRED DIMENSIONS ( 1 , N , 1 )
+ const int shmem = ValueTraits::value_size( m_functor ) * ( block_size + 2 );
- void wait() const { Cuda::fence(); }
+ m_final = false ;
+ CudaParallelLaunch< ParallelScan >( *this, grid, block, shmem ); // copy to device and execute
+
+ m_final = true ;
+ CudaParallelLaunch< ParallelScan >( *this, grid, block, shmem ); // copy to device and execute
+ }
+
+ ParallelScan( const FunctorType & arg_functor ,
+ const Policy & arg_policy )
+ : m_functor( arg_functor )
+ , m_policy( arg_policy )
+ , m_scratch_space( 0 )
+ , m_scratch_flags( 0 )
+ , m_final( false )
+ { }
};
} // namespace Impl
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
template<typename iType>
struct TeamThreadRangeBoundariesStruct<iType,CudaTeamMember> {
typedef iType index_type;
const iType start;
const iType end;
const iType increment;
const CudaTeamMember& thread;
#ifdef __CUDA_ARCH__
__device__ inline
TeamThreadRangeBoundariesStruct (const CudaTeamMember& thread_, const iType& count):
start( threadIdx.y ),
end( count ),
increment( blockDim.y ),
thread(thread_)
{}
__device__ inline
TeamThreadRangeBoundariesStruct (const CudaTeamMember& thread_, const iType& begin_, const iType& end_):
start( begin_+threadIdx.y ),
end( end_ ),
increment( blockDim.y ),
thread(thread_)
{}
#else
KOKKOS_INLINE_FUNCTION
TeamThreadRangeBoundariesStruct (const CudaTeamMember& thread_, const iType& count):
start( 0 ),
end( count ),
increment( 1 ),
thread(thread_)
{}
KOKKOS_INLINE_FUNCTION
TeamThreadRangeBoundariesStruct (const CudaTeamMember& thread_, const iType& begin_, const iType& end_):
start( begin_ ),
end( end_ ),
increment( 1 ),
thread(thread_)
{}
#endif
};
template<typename iType>
struct ThreadVectorRangeBoundariesStruct<iType,CudaTeamMember> {
typedef iType index_type;
const iType start;
const iType end;
const iType increment;
#ifdef __CUDA_ARCH__
__device__ inline
ThreadVectorRangeBoundariesStruct (const CudaTeamMember& thread, const iType& count):
start( threadIdx.x ),
end( count ),
increment( blockDim.x )
{}
#else
KOKKOS_INLINE_FUNCTION
ThreadVectorRangeBoundariesStruct (const CudaTeamMember& thread_, const iType& count):
start( 0 ),
end( count ),
increment( 1 )
{}
#endif
};
} // namespace Impl
template<typename iType>
KOKKOS_INLINE_FUNCTION
Impl::TeamThreadRangeBoundariesStruct<iType,Impl::CudaTeamMember>
TeamThreadRange(const Impl::CudaTeamMember& thread, const iType& count) {
return Impl::TeamThreadRangeBoundariesStruct<iType,Impl::CudaTeamMember>(thread,count);
}
template<typename iType>
KOKKOS_INLINE_FUNCTION
Impl::TeamThreadRangeBoundariesStruct<iType,Impl::CudaTeamMember>
TeamThreadRange(const Impl::CudaTeamMember& thread, const iType& begin, const iType& end) {
return Impl::TeamThreadRangeBoundariesStruct<iType,Impl::CudaTeamMember>(thread,begin,end);
}
template<typename iType>
KOKKOS_INLINE_FUNCTION
Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::CudaTeamMember >
ThreadVectorRange(const Impl::CudaTeamMember& thread, const iType& count) {
return Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::CudaTeamMember >(thread,count);
}
KOKKOS_INLINE_FUNCTION
Impl::ThreadSingleStruct<Impl::CudaTeamMember> PerTeam(const Impl::CudaTeamMember& thread) {
return Impl::ThreadSingleStruct<Impl::CudaTeamMember>(thread);
}
KOKKOS_INLINE_FUNCTION
Impl::VectorSingleStruct<Impl::CudaTeamMember> PerThread(const Impl::CudaTeamMember& thread) {
return Impl::VectorSingleStruct<Impl::CudaTeamMember>(thread);
}
} // namespace Kokkos
namespace Kokkos {
/** \brief Inter-thread parallel_for. Executes lambda(iType i) for each i=0..N-1.
*
* The range i=0..N-1 is mapped to all threads of the the calling thread team.
* This functionality requires C++11 support.*/
template<typename iType, class Lambda>
KOKKOS_INLINE_FUNCTION
void parallel_for(const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::CudaTeamMember>& loop_boundaries, const Lambda& lambda) {
#ifdef __CUDA_ARCH__
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment)
lambda(i);
#endif
}
/** \brief Inter-thread vector parallel_reduce. Executes lambda(iType i, ValueType & val) for each i=0..N-1.
*
* The range i=0..N-1 is mapped to all threads of the the calling thread team and a summation of
* val is performed and put into result. This functionality requires C++11 support.*/
template< typename iType, class Lambda, typename ValueType >
KOKKOS_INLINE_FUNCTION
void parallel_reduce(const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::CudaTeamMember>& loop_boundaries,
const Lambda & lambda, ValueType& result) {
#ifdef __CUDA_ARCH__
result = ValueType();
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) {
lambda(i,result);
}
Impl::cuda_intra_warp_reduction(result,[&] (ValueType& dst, const ValueType& src) { dst+=src; });
Impl::cuda_inter_warp_reduction(result,[&] (ValueType& dst, const ValueType& src) { dst+=src; });
#endif
}
/** \brief Intra-thread vector parallel_reduce. Executes lambda(iType i, ValueType & val) for each i=0..N-1.
*
* The range i=0..N-1 is mapped to all vector lanes of the the calling thread and a reduction of
* val is performed using JoinType(ValueType& val, const ValueType& update) and put into init_result.
* The input value of init_result is used as initializer for temporary variables of ValueType. Therefore
* the input value should be the neutral element with respect to the join operation (e.g. '0 for +-' or
* '1 for *'). This functionality requires C++11 support.*/
template< typename iType, class Lambda, typename ValueType, class JoinType >
KOKKOS_INLINE_FUNCTION
void parallel_reduce(const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::CudaTeamMember>& loop_boundaries,
const Lambda & lambda, const JoinType& join, ValueType& init_result) {
#ifdef __CUDA_ARCH__
ValueType result = init_result;
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) {
lambda(i,result);
}
Impl::cuda_intra_warp_reduction(result, join );
Impl::cuda_inter_warp_reduction(result, join );
init_result = result;
#endif
}
} //namespace Kokkos
namespace Kokkos {
/** \brief Intra-thread vector parallel_for. Executes lambda(iType i) for each i=0..N-1.
*
* The range i=0..N-1 is mapped to all vector lanes of the the calling thread.
* This functionality requires C++11 support.*/
template<typename iType, class Lambda>
KOKKOS_INLINE_FUNCTION
void parallel_for(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::CudaTeamMember >&
loop_boundaries, const Lambda& lambda) {
#ifdef __CUDA_ARCH__
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment)
lambda(i);
#endif
}
/** \brief Intra-thread vector parallel_reduce. Executes lambda(iType i, ValueType & val) for each i=0..N-1.
*
* The range i=0..N-1 is mapped to all vector lanes of the the calling thread and a summation of
* val is performed and put into result. This functionality requires C++11 support.*/
template< typename iType, class Lambda, typename ValueType >
KOKKOS_INLINE_FUNCTION
void parallel_reduce(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::CudaTeamMember >&
loop_boundaries, const Lambda & lambda, ValueType& result) {
#ifdef __CUDA_ARCH__
ValueType val = ValueType();
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) {
lambda(i,val);
}
result = val;
if (loop_boundaries.increment > 1)
result += shfl_down(result, 1,loop_boundaries.increment);
if (loop_boundaries.increment > 2)
result += shfl_down(result, 2,loop_boundaries.increment);
if (loop_boundaries.increment > 4)
result += shfl_down(result, 4,loop_boundaries.increment);
if (loop_boundaries.increment > 8)
result += shfl_down(result, 8,loop_boundaries.increment);
if (loop_boundaries.increment > 16)
result += shfl_down(result, 16,loop_boundaries.increment);
result = shfl(result,0,loop_boundaries.increment);
#endif
}
/** \brief Intra-thread vector parallel_reduce. Executes lambda(iType i, ValueType & val) for each i=0..N-1.
*
* The range i=0..N-1 is mapped to all vector lanes of the the calling thread and a reduction of
* val is performed using JoinType(ValueType& val, const ValueType& update) and put into init_result.
* The input value of init_result is used as initializer for temporary variables of ValueType. Therefore
* the input value should be the neutral element with respect to the join operation (e.g. '0 for +-' or
* '1 for *'). This functionality requires C++11 support.*/
template< typename iType, class Lambda, typename ValueType, class JoinType >
KOKKOS_INLINE_FUNCTION
void parallel_reduce(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::CudaTeamMember >&
loop_boundaries, const Lambda & lambda, const JoinType& join, ValueType& init_result) {
#ifdef __CUDA_ARCH__
ValueType result = init_result;
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) {
lambda(i,result);
}
if (loop_boundaries.increment > 1)
join( result, shfl_down(result, 1,loop_boundaries.increment));
if (loop_boundaries.increment > 2)
join( result, shfl_down(result, 2,loop_boundaries.increment));
if (loop_boundaries.increment > 4)
join( result, shfl_down(result, 4,loop_boundaries.increment));
if (loop_boundaries.increment > 8)
join( result, shfl_down(result, 8,loop_boundaries.increment));
if (loop_boundaries.increment > 16)
join( result, shfl_down(result, 16,loop_boundaries.increment));
init_result = shfl(result,0,loop_boundaries.increment);
#endif
}
/** \brief Intra-thread vector parallel exclusive prefix sum. Executes lambda(iType i, ValueType & val, bool final)
* for each i=0..N-1.
*
* The range i=0..N-1 is mapped to all vector lanes in the thread and a scan operation is performed.
* Depending on the target execution space the operator might be called twice: once with final=false
* and once with final=true. When final==true val contains the prefix sum value. The contribution of this
* "i" needs to be added to val no matter whether final==true or not. In a serial execution
* (i.e. team_size==1) the operator is only called once with final==true. Scan_val will be set
* to the final sum value over all vector lanes.
* This functionality requires C++11 support.*/
template< typename iType, class FunctorType >
KOKKOS_INLINE_FUNCTION
void parallel_scan(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::CudaTeamMember >&
loop_boundaries, const FunctorType & lambda) {
#ifdef __CUDA_ARCH__
typedef Kokkos::Impl::FunctorValueTraits< FunctorType , void > ValueTraits ;
typedef typename ValueTraits::value_type value_type ;
value_type scan_val = value_type();
const int VectorLength = blockDim.x;
iType loop_bound = ((loop_boundaries.end+VectorLength-1)/VectorLength) * VectorLength;
for(int _i = threadIdx.x; _i < loop_bound; _i += VectorLength) {
value_type val = value_type();
if(_i<loop_boundaries.end)
lambda(_i , val , false);
value_type tmp = val;
value_type result_i;
if(threadIdx.x%VectorLength == 0)
result_i = tmp;
if (VectorLength > 1) {
const value_type tmp2 = shfl_up(tmp, 1,VectorLength);
if(threadIdx.x > 0)
tmp+=tmp2;
}
if(threadIdx.x%VectorLength == 1)
result_i = tmp;
if (VectorLength > 3) {
const value_type tmp2 = shfl_up(tmp, 2,VectorLength);
if(threadIdx.x > 1)
tmp+=tmp2;
}
if ((threadIdx.x%VectorLength >= 2) &&
(threadIdx.x%VectorLength < 4))
result_i = tmp;
if (VectorLength > 7) {
const value_type tmp2 = shfl_up(tmp, 4,VectorLength);
if(threadIdx.x > 3)
tmp+=tmp2;
}
if ((threadIdx.x%VectorLength >= 4) &&
(threadIdx.x%VectorLength < 8))
result_i = tmp;
if (VectorLength > 15) {
const value_type tmp2 = shfl_up(tmp, 8,VectorLength);
if(threadIdx.x > 7)
tmp+=tmp2;
}
if ((threadIdx.x%VectorLength >= 8) &&
(threadIdx.x%VectorLength < 16))
result_i = tmp;
if (VectorLength > 31) {
const value_type tmp2 = shfl_up(tmp, 16,VectorLength);
if(threadIdx.x > 15)
tmp+=tmp2;
}
if (threadIdx.x%VectorLength >= 16)
result_i = tmp;
val = scan_val + result_i - val;
scan_val += shfl(tmp,VectorLength-1,VectorLength);
if(_i<loop_boundaries.end)
lambda(_i , val , true);
}
#endif
}
}
namespace Kokkos {
template<class FunctorType>
KOKKOS_INLINE_FUNCTION
void single(const Impl::VectorSingleStruct<Impl::CudaTeamMember>& , const FunctorType& lambda) {
#ifdef __CUDA_ARCH__
if(threadIdx.x == 0) lambda();
#endif
}
template<class FunctorType>
KOKKOS_INLINE_FUNCTION
void single(const Impl::ThreadSingleStruct<Impl::CudaTeamMember>& , const FunctorType& lambda) {
#ifdef __CUDA_ARCH__
if(threadIdx.x == 0 && threadIdx.y == 0) lambda();
#endif
}
template<class FunctorType, class ValueType>
KOKKOS_INLINE_FUNCTION
void single(const Impl::VectorSingleStruct<Impl::CudaTeamMember>& , const FunctorType& lambda, ValueType& val) {
#ifdef __CUDA_ARCH__
if(threadIdx.x == 0) lambda(val);
val = shfl(val,0,blockDim.x);
#endif
}
template<class FunctorType, class ValueType>
KOKKOS_INLINE_FUNCTION
void single(const Impl::ThreadSingleStruct<Impl::CudaTeamMember>& single_struct, const FunctorType& lambda, ValueType& val) {
#ifdef __CUDA_ARCH__
if(threadIdx.x == 0 && threadIdx.y == 0) {
lambda(val);
}
single_struct.team_member.team_broadcast(val,0);
#endif
}
}
namespace Kokkos {
namespace Impl {
template< class FunctorType, class ExecPolicy, class ValueType , class Tag = typename ExecPolicy::work_tag>
struct CudaFunctorAdapter {
const FunctorType f;
typedef ValueType value_type;
CudaFunctorAdapter(const FunctorType& f_):f(f_) {}
__device__ inline
void operator() (typename ExecPolicy::work_tag, const typename ExecPolicy::member_type& i, ValueType& val) const {
//Insert Static Assert with decltype on ValueType equals third argument type of FunctorType::operator()
f(typename ExecPolicy::work_tag(), i,val);
}
};
template< class FunctorType, class ExecPolicy, class ValueType >
struct CudaFunctorAdapter<FunctorType,ExecPolicy,ValueType,void> {
const FunctorType f;
typedef ValueType value_type;
CudaFunctorAdapter(const FunctorType& f_):f(f_) {}
__device__ inline
void operator() (const typename ExecPolicy::member_type& i, ValueType& val) const {
//Insert Static Assert with decltype on ValueType equals second argument type of FunctorType::operator()
f(i,val);
}
};
template< class FunctorType, class Enable = void>
struct ReduceFunctorHasInit {
enum {value = false};
};
template< class FunctorType>
struct ReduceFunctorHasInit<FunctorType, typename Impl::enable_if< 0 < sizeof( & FunctorType::init ) >::type > {
enum {value = true};
};
template< class FunctorType, class Enable = void>
struct ReduceFunctorHasJoin {
enum {value = false};
};
template< class FunctorType>
struct ReduceFunctorHasJoin<FunctorType, typename Impl::enable_if< 0 < sizeof( & FunctorType::join ) >::type > {
enum {value = true};
};
template< class FunctorType, class Enable = void>
struct ReduceFunctorHasFinal {
enum {value = false};
};
template< class FunctorType>
struct ReduceFunctorHasFinal<FunctorType, typename Impl::enable_if< 0 < sizeof( & FunctorType::final ) >::type > {
enum {value = true};
};
template< class FunctorType, bool Enable =
( FunctorDeclaresValueType<FunctorType,void>::value) ||
( ReduceFunctorHasInit<FunctorType>::value ) ||
( ReduceFunctorHasJoin<FunctorType>::value ) ||
( ReduceFunctorHasFinal<FunctorType>::value )
>
struct IsNonTrivialReduceFunctor {
enum {value = false};
};
template< class FunctorType>
struct IsNonTrivialReduceFunctor<FunctorType, true> {
enum {value = true};
};
template<class FunctorType, class ResultType, class Tag, bool Enable = IsNonTrivialReduceFunctor<FunctorType>::value >
struct FunctorReferenceType {
typedef ResultType& reference_type;
};
template<class FunctorType, class ResultType, class Tag>
struct FunctorReferenceType<FunctorType, ResultType, Tag, true> {
typedef typename Kokkos::Impl::FunctorValueTraits< FunctorType ,Tag >::reference_type reference_type;
};
}
// general policy and view ouput
template< class ExecPolicy , class FunctorTypeIn , class ViewType >
inline
void parallel_reduce( const ExecPolicy & policy
, const FunctorTypeIn & functor_in
, const ViewType & result_view
, const std::string& str = ""
, typename Impl::enable_if<
- ( Impl::is_view<ViewType>::value && ! Impl::is_integral< ExecPolicy >::value &&
+ ( Kokkos::is_view<ViewType>::value && ! Impl::is_integral< ExecPolicy >::value &&
Impl::is_same<typename ExecPolicy::execution_space,Kokkos::Cuda>::value
)>::type * = 0 )
{
enum {FunctorHasValueType = Impl::IsNonTrivialReduceFunctor<FunctorTypeIn>::value };
typedef typename Kokkos::Impl::if_c<FunctorHasValueType, FunctorTypeIn, Impl::CudaFunctorAdapter<FunctorTypeIn,ExecPolicy,typename ViewType::value_type> >::type FunctorType;
FunctorType functor = Impl::if_c<FunctorHasValueType,FunctorTypeIn,FunctorType>::select(functor_in,FunctorType(functor_in));
#ifdef KOKKOSP_ENABLE_PROFILING
uint64_t kpID = 0;
if(Kokkos::Experimental::profileLibraryLoaded()) {
Kokkos::Experimental::beginParallelScan("" == str ? typeid(FunctorType).name() : str, 0, &kpID);
}
#endif
-
- (void) Impl::ParallelReduce< FunctorType, ExecPolicy >( functor , policy , result_view );
+
+ Kokkos::Impl::shared_allocation_tracking_claim_and_disable();
+ Impl::ParallelReduce< FunctorType, ExecPolicy > closure( functor , policy , result_view );
+ Kokkos::Impl::shared_allocation_tracking_release_and_enable();
+
+ closure.execute();
#ifdef KOKKOSP_ENABLE_PROFILING
if(Kokkos::Experimental::profileLibraryLoaded()) {
Kokkos::Experimental::endParallelScan(kpID);
}
#endif
}
// general policy and pod or array of pod output
template< class ExecPolicy , class FunctorTypeIn , class ResultType>
inline
void parallel_reduce( const ExecPolicy & policy
, const FunctorTypeIn & functor_in
, ResultType& result_ref
, const std::string& str = ""
, typename Impl::enable_if<
- ( ! Impl::is_view<ResultType>::value &&
+ ( ! Kokkos::is_view<ResultType>::value &&
! Impl::IsNonTrivialReduceFunctor<FunctorTypeIn>::value &&
! Impl::is_integral< ExecPolicy >::value &&
Impl::is_same<typename ExecPolicy::execution_space,Kokkos::Cuda>::value )>::type * = 0 )
{
typedef typename Impl::CudaFunctorAdapter<FunctorTypeIn,ExecPolicy,ResultType> FunctorType;
typedef Kokkos::Impl::FunctorValueTraits< FunctorType , typename ExecPolicy::work_tag > ValueTraits ;
typedef Kokkos::Impl::FunctorValueOps< FunctorType , typename ExecPolicy::work_tag > ValueOps ;
// Wrap the result output request in a view to inform the implementation
// of the type and memory space.
typedef typename Kokkos::Impl::if_c< (ValueTraits::StaticValueSize != 0)
, typename ValueTraits::value_type
, typename ValueTraits::pointer_type
>::type value_type ;
Kokkos::View< value_type
, HostSpace
, Kokkos::MemoryUnmanaged
>
result_view( ValueOps::pointer( result_ref )
, 1
);
#ifdef KOKKOSP_ENABLE_PROFILING
uint64_t kpID = 0;
if(Kokkos::Experimental::profileLibraryLoaded()) {
Kokkos::Experimental::beginParallelScan("" == str ? typeid(FunctorType).name() : str, 0, &kpID);
}
#endif
- (void) Impl::ParallelReduce< FunctorType, ExecPolicy >( FunctorType(functor_in) , policy , result_view );
+ Kokkos::Impl::shared_allocation_tracking_claim_and_disable();
+ Impl::ParallelReduce< FunctorType, ExecPolicy > closure( FunctorType(functor_in) , policy , result_view );
+ Kokkos::Impl::shared_allocation_tracking_release_and_enable();
+
+ closure.execute();
#ifdef KOKKOSP_ENABLE_PROFILING
if(Kokkos::Experimental::profileLibraryLoaded()) {
Kokkos::Experimental::endParallelScan(kpID);
}
#endif
}
// general policy and pod or array of pod output
template< class ExecPolicy , class FunctorType>
inline
void parallel_reduce( const ExecPolicy & policy
, const FunctorType & functor
, typename Kokkos::Impl::FunctorValueTraits< FunctorType , typename ExecPolicy::work_tag >::reference_type result_ref
, const std::string& str = ""
, typename Impl::enable_if<
( Impl::IsNonTrivialReduceFunctor<FunctorType>::value &&
! Impl::is_integral< ExecPolicy >::value &&
Impl::is_same<typename ExecPolicy::execution_space,Kokkos::Cuda>::value )>::type * = 0 )
{
typedef Kokkos::Impl::FunctorValueTraits< FunctorType , typename ExecPolicy::work_tag > ValueTraits ;
typedef Kokkos::Impl::FunctorValueOps< FunctorType , typename ExecPolicy::work_tag > ValueOps ;
// Wrap the result output request in a view to inform the implementation
// of the type and memory space.
typedef typename Kokkos::Impl::if_c< (ValueTraits::StaticValueSize != 0)
, typename ValueTraits::value_type
, typename ValueTraits::pointer_type
>::type value_type ;
Kokkos::View< value_type
, HostSpace
, Kokkos::MemoryUnmanaged
>
result_view( ValueOps::pointer( result_ref )
, ValueTraits::value_count( functor )
);
#ifdef KOKKOSP_ENABLE_PROFILING
uint64_t kpID = 0;
if(Kokkos::Experimental::profileLibraryLoaded()) {
Kokkos::Experimental::beginParallelScan("" == str ? typeid(FunctorType).name() : str, 0, &kpID);
}
#endif
- (void) Impl::ParallelReduce< FunctorType, ExecPolicy >( functor , policy , result_view );
+ Kokkos::Impl::shared_allocation_tracking_claim_and_disable();
+ Impl::ParallelReduce< FunctorType, ExecPolicy > closure( functor , policy , result_view );
+ Kokkos::Impl::shared_allocation_tracking_release_and_enable();
+
+ closure.execute();
#ifdef KOKKOSP_ENABLE_PROFILING
if(Kokkos::Experimental::profileLibraryLoaded()) {
Kokkos::Experimental::endParallelScan(kpID);
}
#endif
}
// integral range policy and view ouput
template< class FunctorTypeIn , class ViewType >
inline
void parallel_reduce( const size_t work_count
, const FunctorTypeIn & functor_in
, const ViewType & result_view
, const std::string& str = ""
- , typename Impl::enable_if<( Impl::is_view<ViewType>::value &&
+ , typename Impl::enable_if<( Kokkos::is_view<ViewType>::value &&
Impl::is_same<
typename Impl::FunctorPolicyExecutionSpace< FunctorTypeIn , void >::execution_space,
Kokkos::Cuda>::value
)>::type * = 0 )
{
enum {FunctorHasValueType = Impl::IsNonTrivialReduceFunctor<FunctorTypeIn>::value };
typedef typename
Impl::FunctorPolicyExecutionSpace< FunctorTypeIn , void >::execution_space
execution_space ;
typedef RangePolicy< execution_space > ExecPolicy ;
typedef typename Kokkos::Impl::if_c<FunctorHasValueType, FunctorTypeIn, Impl::CudaFunctorAdapter<FunctorTypeIn,ExecPolicy,typename ViewType::value_type> >::type FunctorType;
FunctorType functor = Impl::if_c<FunctorHasValueType,FunctorTypeIn,FunctorType>::select(functor_in,FunctorType(functor_in));
#ifdef KOKKOSP_ENABLE_PROFILING
uint64_t kpID = 0;
if(Kokkos::Experimental::profileLibraryLoaded()) {
Kokkos::Experimental::beginParallelScan("" == str ? typeid(FunctorType).name() : str, 0, &kpID);
}
#endif
- (void) Impl::ParallelReduce< FunctorType, ExecPolicy >( functor , ExecPolicy(0,work_count) , result_view );
+ Kokkos::Impl::shared_allocation_tracking_claim_and_disable();
+ Impl::ParallelReduce< FunctorType, ExecPolicy > closure( functor , ExecPolicy(0,work_count) , result_view );
+ Kokkos::Impl::shared_allocation_tracking_release_and_enable();
+
+ closure.execute();
#ifdef KOKKOSP_ENABLE_PROFILING
if(Kokkos::Experimental::profileLibraryLoaded()) {
Kokkos::Experimental::endParallelScan(kpID);
}
#endif
}
// integral range policy and pod or array of pod output
template< class FunctorTypeIn , class ResultType>
inline
void parallel_reduce( const size_t work_count
, const FunctorTypeIn & functor_in
, ResultType& result
, const std::string& str = ""
- , typename Impl::enable_if< ! Impl::is_view<ResultType>::value &&
+ , typename Impl::enable_if< ! Kokkos::is_view<ResultType>::value &&
! Impl::IsNonTrivialReduceFunctor<FunctorTypeIn>::value &&
Impl::is_same<
typename Impl::FunctorPolicyExecutionSpace< FunctorTypeIn , void >::execution_space,
Kokkos::Cuda>::value >::type * = 0 )
{
typedef typename
Kokkos::Impl::FunctorPolicyExecutionSpace< FunctorTypeIn , void >::execution_space
execution_space ;
typedef Kokkos::RangePolicy< execution_space > ExecPolicy ;
typedef Impl::CudaFunctorAdapter<FunctorTypeIn,ExecPolicy,ResultType> FunctorType;
typedef Kokkos::Impl::FunctorValueTraits< FunctorType , void > ValueTraits ;
typedef Kokkos::Impl::FunctorValueOps< FunctorType , void > ValueOps ;
// Wrap the result output request in a view to inform the implementation
// of the type and memory space.
typedef typename Kokkos::Impl::if_c< (ValueTraits::StaticValueSize != 0)
, typename ValueTraits::value_type
, typename ValueTraits::pointer_type
>::type value_type ;
Kokkos::View< value_type
, HostSpace
, Kokkos::MemoryUnmanaged
>
result_view( ValueOps::pointer( result )
, 1
);
#ifdef KOKKOSP_ENABLE_PROFILING
uint64_t kpID = 0;
if(Kokkos::Experimental::profileLibraryLoaded()) {
Kokkos::Experimental::beginParallelScan("" == str ? typeid(FunctorType).name() : str, 0, &kpID);
}
#endif
- (void) Impl::ParallelReduce< FunctorType , ExecPolicy >( FunctorType(functor_in) , ExecPolicy(0,work_count) , result_view );
+ Kokkos::Impl::shared_allocation_tracking_claim_and_disable();
+ Impl::ParallelReduce< FunctorType , ExecPolicy > closure( FunctorType(functor_in) , ExecPolicy(0,work_count) , result_view );
+ Kokkos::Impl::shared_allocation_tracking_release_and_enable();
+
+ closure.execute();
#ifdef KOKKOSP_ENABLE_PROFILING
if(Kokkos::Experimental::profileLibraryLoaded()) {
Kokkos::Experimental::endParallelScan(kpID);
}
#endif
}
template< class FunctorType>
inline
void parallel_reduce( const size_t work_count
, const FunctorType & functor
, typename Kokkos::Impl::FunctorValueTraits< FunctorType , void >::reference_type result
, const std::string& str = ""
, typename Impl::enable_if< Impl::IsNonTrivialReduceFunctor<FunctorType>::value &&
Impl::is_same<
typename Impl::FunctorPolicyExecutionSpace< FunctorType , void >::execution_space,
Kokkos::Cuda>::value >::type * = 0 )
{
typedef typename
Kokkos::Impl::FunctorPolicyExecutionSpace< FunctorType , void >::execution_space
execution_space ;
typedef Kokkos::RangePolicy< execution_space > ExecPolicy ;
typedef Kokkos::Impl::FunctorValueTraits< FunctorType , void > ValueTraits ;
typedef Kokkos::Impl::FunctorValueOps< FunctorType , void > ValueOps ;
// Wrap the result output request in a view to inform the implementation
// of the type and memory space.
typedef typename Kokkos::Impl::if_c< (ValueTraits::StaticValueSize != 0)
, typename ValueTraits::value_type
, typename ValueTraits::pointer_type
>::type value_type ;
Kokkos::View< value_type
, HostSpace
, Kokkos::MemoryUnmanaged
>
result_view( ValueOps::pointer( result )
, ValueTraits::value_count( functor )
);
#ifdef KOKKOSP_ENABLE_PROFILING
uint64_t kpID = 0;
if(Kokkos::Experimental::profileLibraryLoaded()) {
Kokkos::Experimental::beginParallelScan("" == str ? typeid(FunctorType).name() : str, 0, &kpID);
}
#endif
- (void) Impl::ParallelReduce< FunctorType , ExecPolicy >( functor , ExecPolicy(0,work_count) , result_view );
+ Kokkos::Impl::shared_allocation_tracking_claim_and_disable();
+ Impl::ParallelReduce< FunctorType , ExecPolicy > closure( functor , ExecPolicy(0,work_count) , result_view );
+ Kokkos::Impl::shared_allocation_tracking_release_and_enable();
+
+ closure.execute();
#ifdef KOKKOSP_ENABLE_PROFILING
if(Kokkos::Experimental::profileLibraryLoaded()) {
Kokkos::Experimental::endParallelScan(kpID);
}
#endif
}
#ifdef KOKKOS_HAVE_CUDA
template< class ExecPolicy , class FunctorType , class ResultType >
inline
void parallel_reduce( const std::string & str
, const ExecPolicy & policy
, const FunctorType & functor
, ResultType * result)
{
#if KOKKOS_ENABLE_DEBUG_PRINT_KERNEL_NAMES
Kokkos::fence();
std::cout << "KOKKOS_DEBUG Start parallel_reduce kernel: " << str << std::endl;
#endif
parallel_reduce(policy,functor,result,str);
#if KOKKOS_ENABLE_DEBUG_PRINT_KERNEL_NAMES
Kokkos::fence();
std::cout << "KOKKOS_DEBUG End parallel_reduce kernel: " << str << std::endl;
#endif
(void) str;
}
template< class ExecPolicy , class FunctorType , class ResultType >
inline
void parallel_reduce( const std::string & str
, const ExecPolicy & policy
, const FunctorType & functor
, ResultType & result)
{
#if KOKKOS_ENABLE_DEBUG_PRINT_KERNEL_NAMES
Kokkos::fence();
std::cout << "KOKKOS_DEBUG Start parallel_reduce kernel: " << str << std::endl;
#endif
parallel_reduce(policy,functor,result,str);
#if KOKKOS_ENABLE_DEBUG_PRINT_KERNEL_NAMES
Kokkos::fence();
std::cout << "KOKKOS_DEBUG End parallel_reduce kernel: " << str << std::endl;
#endif
(void) str;
}
template< class ExecPolicy , class FunctorType >
inline
void parallel_reduce( const std::string & str
, const ExecPolicy & policy
, const FunctorType & functor)
{
#if KOKKOS_ENABLE_DEBUG_PRINT_KERNEL_NAMES
Kokkos::fence();
std::cout << "KOKKOS_DEBUG Start parallel_reduce kernel: " << str << std::endl;
#endif
parallel_reduce(policy,functor,str);
#if KOKKOS_ENABLE_DEBUG_PRINT_KERNEL_NAMES
Kokkos::fence();
std::cout << "KOKKOS_DEBUG End parallel_reduce kernel: " << str << std::endl;
#endif
(void) str;
}
#endif
} // namespace Kokkos
#endif /* defined( __CUDACC__ ) */
#endif /* #ifndef KOKKOS_CUDA_PARALLEL_HPP */
diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_ReduceScan.hpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_ReduceScan.hpp
index 5ef16711e..11871a6ab 100644
--- a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_ReduceScan.hpp
+++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_ReduceScan.hpp
@@ -1,424 +1,427 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_CUDA_REDUCESCAN_HPP
#define KOKKOS_CUDA_REDUCESCAN_HPP
#include <Kokkos_Macros.hpp>
/* only compile this file if CUDA is enabled for Kokkos */
#if defined( __CUDACC__ ) && defined( KOKKOS_HAVE_CUDA )
#include <utility>
#include <Kokkos_Parallel.hpp>
#include <impl/Kokkos_FunctorAdapter.hpp>
#include <impl/Kokkos_Error.hpp>
#include <Cuda/Kokkos_Cuda_Vectorization.hpp>
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
//Shfl based reductions
/*
* Algorithmic constraints:
* (a) threads with same threadIdx.y have same value
* (b) blockDim.x == power of two
* (c) blockDim.z == 1
*/
template< class ValueType , class JoinOp>
__device__
inline void cuda_intra_warp_reduction( ValueType& result,
const JoinOp& join,
const int max_active_thread = blockDim.y) {
unsigned int shift = 1;
//Reduce over values from threads with different threadIdx.y
while(blockDim.x * shift < 32 ) {
const ValueType tmp = shfl_down(result, blockDim.x*shift,32u);
//Only join if upper thread is active (this allows non power of two for blockDim.y
if(threadIdx.y + shift < max_active_thread)
join(result , tmp);
shift*=2;
}
result = shfl(result,0,32);
}
template< class ValueType , class JoinOp>
__device__
inline void cuda_inter_warp_reduction( ValueType& value,
const JoinOp& join,
const int max_active_thread = blockDim.y) {
#define STEP_WIDTH 4
__shared__ char sh_result[sizeof(ValueType)*STEP_WIDTH];
ValueType* result = (ValueType*) & sh_result;
const unsigned step = 32 / blockDim.x;
unsigned shift = STEP_WIDTH;
const int id = threadIdx.y%step==0?threadIdx.y/step:65000;
if(id < STEP_WIDTH ) {
result[id] = value;
}
__syncthreads();
while (shift<=max_active_thread/step) {
if(shift<=id && shift+STEP_WIDTH>id && threadIdx.x==0) {
join(result[id%STEP_WIDTH],value);
}
__syncthreads();
shift+=STEP_WIDTH;
}
value = result[0];
- for(int i = 1; (i*step<=max_active_thread) && i<STEP_WIDTH; i++)
+ for(int i = 1; (i*step<max_active_thread) && i<STEP_WIDTH; i++)
join(value,result[i]);
}
template< class ValueType , class JoinOp>
__device__
inline void cuda_intra_block_reduction( ValueType& value,
const JoinOp& join,
const int max_active_thread = blockDim.y) {
cuda_intra_warp_reduction(value,join,max_active_thread);
cuda_inter_warp_reduction(value,join,max_active_thread);
}
template< class FunctorType , class JoinOp>
__device__
bool cuda_inter_block_reduction( typename FunctorValueTraits< FunctorType , void >::reference_type value,
const JoinOp& join,
Cuda::size_type * const m_scratch_space,
typename FunctorValueTraits< FunctorType , void >::pointer_type const result,
Cuda::size_type * const m_scratch_flags,
const int max_active_thread = blockDim.y) {
typedef typename FunctorValueTraits< FunctorType , void >::pointer_type pointer_type;
typedef typename FunctorValueTraits< FunctorType , void >::value_type value_type;
//Do the intra-block reduction with shfl operations and static shared memory
cuda_intra_block_reduction(value,join,max_active_thread);
const unsigned id = threadIdx.y*blockDim.x + threadIdx.x;
//One thread in the block writes block result to global scratch_memory
if(id == 0 ) {
pointer_type global = ((pointer_type) m_scratch_space) + blockIdx.x;
*global = value;
}
//One warp of last block performs inter block reduction through loading the block values from global scratch_memory
bool last_block = false;
__syncthreads();
if ( id < 32 ) {
Cuda::size_type count;
//Figure out whether this is the last block
if(id == 0)
count = Kokkos::atomic_fetch_add(m_scratch_flags,1);
count = Kokkos::shfl(count,0,32);
//Last block does the inter block reduction
if( count == gridDim.x - 1) {
//set flag back to zero
if(id == 0)
*m_scratch_flags = 0;
last_block = true;
value = 0;
pointer_type const volatile global = (pointer_type) m_scratch_space ;
//Reduce all global values with splitting work over threads in one warp
const int step_size = blockDim.x*blockDim.y < 32 ? blockDim.x*blockDim.y : 32;
for(int i=id; i<gridDim.x; i+=step_size) {
value_type tmp = global[i];
join(value, tmp);
}
//Perform shfl reductions within the warp only join if contribution is valid (allows gridDim.x non power of two and <32)
if (blockDim.x*blockDim.y > 1) {
value_type tmp = Kokkos::shfl_down(value, 1,32);
if( id + 1 < gridDim.x )
join(value, tmp);
}
if (blockDim.x*blockDim.y > 2) {
value_type tmp = Kokkos::shfl_down(value, 2,32);
if( id + 2 < gridDim.x )
join(value, tmp);
}
if (blockDim.x*blockDim.y > 4) {
value_type tmp = Kokkos::shfl_down(value, 4,32);
if( id + 4 < gridDim.x )
join(value, tmp);
}
if (blockDim.x*blockDim.y > 8) {
value_type tmp = Kokkos::shfl_down(value, 8,32);
if( id + 8 < gridDim.x )
join(value, tmp);
}
if (blockDim.x*blockDim.y > 16) {
value_type tmp = Kokkos::shfl_down(value, 16,32);
if( id + 16 < gridDim.x )
join(value, tmp);
}
}
}
//The last block has in its thread=0 the global reduction value through "value"
return last_block;
}
//----------------------------------------------------------------------------
// See section B.17 of Cuda C Programming Guide Version 3.2
// for discussion of
// __launch_bounds__(maxThreadsPerBlock,minBlocksPerMultiprocessor)
// function qualifier which could be used to improve performance.
//----------------------------------------------------------------------------
// Maximize shared memory and minimize L1 cache:
// cudaFuncSetCacheConfig(MyKernel, cudaFuncCachePreferShared );
// For 2.0 capability: 48 KB shared and 16 KB L1
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
/*
* Algorithmic constraints:
* (a) blockDim.y is a power of two
* (b) blockDim.y <= 512
* (c) blockDim.x == blockDim.z == 1
*/
template< bool DoScan , class FunctorType , class ArgTag >
__device__
void cuda_intra_block_reduce_scan( const FunctorType & functor ,
const typename FunctorValueTraits< FunctorType , ArgTag >::pointer_type base_data )
{
typedef FunctorValueTraits< FunctorType , ArgTag > ValueTraits ;
typedef FunctorValueJoin< FunctorType , ArgTag > ValueJoin ;
typedef typename ValueTraits::pointer_type pointer_type ;
const unsigned value_count = ValueTraits::value_count( functor );
const unsigned BlockSizeMask = blockDim.y - 1 ;
// Must have power of two thread count
if ( BlockSizeMask & blockDim.y ) { Kokkos::abort("Cuda::cuda_intra_block_scan requires power-of-two blockDim"); }
#define BLOCK_REDUCE_STEP( R , TD , S ) \
if ( ! ( R & ((1<<(S+1))-1) ) ) { ValueJoin::join( functor , TD , (TD - (value_count<<S)) ); }
#define BLOCK_SCAN_STEP( TD , N , S ) \
if ( N == (1<<S) ) { ValueJoin::join( functor , TD , (TD - (value_count<<S))); }
const unsigned rtid_intra = threadIdx.y ^ BlockSizeMask ;
const pointer_type tdata_intra = base_data + value_count * threadIdx.y ;
{ // Intra-warp reduction:
BLOCK_REDUCE_STEP(rtid_intra,tdata_intra,0)
BLOCK_REDUCE_STEP(rtid_intra,tdata_intra,1)
BLOCK_REDUCE_STEP(rtid_intra,tdata_intra,2)
BLOCK_REDUCE_STEP(rtid_intra,tdata_intra,3)
BLOCK_REDUCE_STEP(rtid_intra,tdata_intra,4)
}
__syncthreads(); // Wait for all warps to reduce
{ // Inter-warp reduce-scan by a single warp to avoid extra synchronizations
const unsigned rtid_inter = ( threadIdx.y ^ BlockSizeMask ) << CudaTraits::WarpIndexShift ;
if ( rtid_inter < blockDim.y ) {
const pointer_type tdata_inter = base_data + value_count * ( rtid_inter ^ BlockSizeMask );
if ( (1<<5) < BlockSizeMask ) { BLOCK_REDUCE_STEP(rtid_inter,tdata_inter,5) }
if ( (1<<6) < BlockSizeMask ) { __threadfence_block(); BLOCK_REDUCE_STEP(rtid_inter,tdata_inter,6) }
if ( (1<<7) < BlockSizeMask ) { __threadfence_block(); BLOCK_REDUCE_STEP(rtid_inter,tdata_inter,7) }
if ( (1<<8) < BlockSizeMask ) { __threadfence_block(); BLOCK_REDUCE_STEP(rtid_inter,tdata_inter,8) }
if ( DoScan ) {
int n = ( rtid_inter & 32 ) ? 32 : (
( rtid_inter & 64 ) ? 64 : (
( rtid_inter & 128 ) ? 128 : (
( rtid_inter & 256 ) ? 256 : 0 )));
if ( ! ( rtid_inter + n < blockDim.y ) ) n = 0 ;
BLOCK_SCAN_STEP(tdata_inter,n,8)
BLOCK_SCAN_STEP(tdata_inter,n,7)
BLOCK_SCAN_STEP(tdata_inter,n,6)
BLOCK_SCAN_STEP(tdata_inter,n,5)
}
}
}
__syncthreads(); // Wait for inter-warp reduce-scan to complete
if ( DoScan ) {
int n = ( rtid_intra & 1 ) ? 1 : (
( rtid_intra & 2 ) ? 2 : (
( rtid_intra & 4 ) ? 4 : (
( rtid_intra & 8 ) ? 8 : (
( rtid_intra & 16 ) ? 16 : 0 ))));
if ( ! ( rtid_intra + n < blockDim.y ) ) n = 0 ;
BLOCK_SCAN_STEP(tdata_intra,n,4) __threadfence_block();
BLOCK_SCAN_STEP(tdata_intra,n,3) __threadfence_block();
BLOCK_SCAN_STEP(tdata_intra,n,2) __threadfence_block();
BLOCK_SCAN_STEP(tdata_intra,n,1) __threadfence_block();
BLOCK_SCAN_STEP(tdata_intra,n,0)
}
#undef BLOCK_SCAN_STEP
#undef BLOCK_REDUCE_STEP
}
//----------------------------------------------------------------------------
/**\brief Input value-per-thread starting at 'shared_data'.
* Reduction value at last thread's location.
*
* If 'DoScan' then write blocks' scan values and block-groups' scan values.
*
* Global reduce result is in the last threads' 'shared_data' location.
*/
template< bool DoScan , class FunctorType , class ArgTag >
__device__
bool cuda_single_inter_block_reduce_scan( const FunctorType & functor ,
const Cuda::size_type block_id ,
const Cuda::size_type block_count ,
Cuda::size_type * const shared_data ,
Cuda::size_type * const global_data ,
Cuda::size_type * const global_flags )
{
typedef Cuda::size_type size_type ;
typedef FunctorValueTraits< FunctorType , ArgTag > ValueTraits ;
typedef FunctorValueJoin< FunctorType , ArgTag > ValueJoin ;
typedef FunctorValueInit< FunctorType , ArgTag > ValueInit ;
typedef FunctorValueOps< FunctorType , ArgTag > ValueOps ;
typedef typename ValueTraits::pointer_type pointer_type ;
typedef typename ValueTraits::reference_type reference_type ;
+ // '__ffs' = position of the least significant bit set to 1.
+ // 'blockDim.y' is guaranteed to be a power of two so this
+ // is the integral shift value that can replace an integral divide.
+ const unsigned BlockSizeShift = __ffs( blockDim.y ) - 1 ;
const unsigned BlockSizeMask = blockDim.y - 1 ;
- const unsigned BlockSizeShift = power_of_two_if_valid( blockDim.y );
// Must have power of two thread count
if ( BlockSizeMask & blockDim.y ) { Kokkos::abort("Cuda::cuda_single_inter_block_reduce_scan requires power-of-two blockDim"); }
const integral_nonzero_constant< size_type , ValueTraits::StaticValueSize / sizeof(size_type) >
word_count( ValueTraits::value_size( functor ) / sizeof(size_type) );
// Reduce the accumulation for the entire block.
cuda_intra_block_reduce_scan<false,FunctorType,ArgTag>( functor , pointer_type(shared_data) );
{
// Write accumulation total to global scratch space.
// Accumulation total is the last thread's data.
size_type * const shared = shared_data + word_count.value * BlockSizeMask ;
size_type * const global = global_data + word_count.value * block_id ;
for ( size_type i = threadIdx.y ; i < word_count.value ; i += blockDim.y ) { global[i] = shared[i] ; }
}
// Contributing blocks note that their contribution has been completed via an atomic-increment flag
// If this block is not the last block to contribute to this group then the block is done.
const bool is_last_block =
! __syncthreads_or( threadIdx.y ? 0 : ( 1 + atomicInc( global_flags , block_count - 1 ) < block_count ) );
if ( is_last_block ) {
const size_type b = ( long(block_count) * long(threadIdx.y) ) >> BlockSizeShift ;
const size_type e = ( long(block_count) * long( threadIdx.y + 1 ) ) >> BlockSizeShift ;
{
void * const shared_ptr = shared_data + word_count.value * threadIdx.y ;
reference_type shared_value = ValueInit::init( functor , shared_ptr );
for ( size_type i = b ; i < e ; ++i ) {
ValueJoin::join( functor , shared_ptr , global_data + word_count.value * i );
}
}
cuda_intra_block_reduce_scan<DoScan,FunctorType,ArgTag>( functor , pointer_type(shared_data) );
if ( DoScan ) {
size_type * const shared_value = shared_data + word_count.value * ( threadIdx.y ? threadIdx.y - 1 : blockDim.y );
if ( ! threadIdx.y ) { ValueInit::init( functor , shared_value ); }
// Join previous inclusive scan value to each member
for ( size_type i = b ; i < e ; ++i ) {
size_type * const global_value = global_data + word_count.value * i ;
ValueJoin::join( functor , shared_value , global_value );
ValueOps ::copy( functor , global_value , shared_value );
}
}
}
return is_last_block ;
}
// Size in bytes required for inter block reduce or scan
template< bool DoScan , class FunctorType , class ArgTag >
inline
unsigned cuda_single_inter_block_reduce_scan_shmem( const FunctorType & functor , const unsigned BlockSize )
{
return ( BlockSize + 2 ) * Impl::FunctorValueTraits< FunctorType , ArgTag >::value_size( functor );
}
} // namespace Impl
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#endif /* #if defined( __CUDACC__ ) */
#endif /* KOKKOS_CUDA_REDUCESCAN_HPP */
diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_View.hpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_View.hpp
index 9074c249f..c054f4561 100644
--- a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_View.hpp
+++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_View.hpp
@@ -1,427 +1,432 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_CUDA_VIEW_HPP
#define KOKKOS_CUDA_VIEW_HPP
#include <Kokkos_Macros.hpp>
/* only compile this file if CUDA is enabled for Kokkos */
#ifdef KOKKOS_HAVE_CUDA
#include <cstring>
#include <Kokkos_HostSpace.hpp>
#include <Kokkos_CudaSpace.hpp>
+#include <impl/Kokkos_Shape.hpp>
#include <Kokkos_View.hpp>
#include <Cuda/Kokkos_Cuda_BasicAllocators.hpp>
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
template<>
struct AssertShapeBoundsAbort< CudaSpace >
{
KOKKOS_INLINE_FUNCTION
static void apply( const size_t /* rank */ ,
const size_t /* n0 */ , const size_t /* n1 */ ,
const size_t /* n2 */ , const size_t /* n3 */ ,
const size_t /* n4 */ , const size_t /* n5 */ ,
const size_t /* n6 */ , const size_t /* n7 */ ,
const size_t /* arg_rank */ ,
const size_t /* i0 */ , const size_t /* i1 */ ,
const size_t /* i2 */ , const size_t /* i3 */ ,
const size_t /* i4 */ , const size_t /* i5 */ ,
const size_t /* i6 */ , const size_t /* i7 */ )
{
Kokkos::abort("Kokkos::View array bounds violation");
}
};
}
}
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
+#if ! defined( KOKKOS_USING_EXPERIMENTAL_VIEW )
+
namespace Kokkos {
namespace Impl {
//----------------------------------------------------------------------------
// Cuda Texture fetches can be performed for 4, 8 and 16 byte objects (int,int2,int4)
// Via reinterpret_case this can be used to support all scalar types of those sizes.
// Any other scalar type falls back to either normal reads out of global memory,
// or using the __ldg intrinsic on Kepler GPUs or newer (Compute Capability >= 3.0)
template< typename ValueType
, class MemorySpace
, class AliasType =
typename Kokkos::Impl::if_c< ( sizeof(ValueType) == 4 ) , int ,
typename Kokkos::Impl::if_c< ( sizeof(ValueType) == 8 ) , ::int2 ,
typename Kokkos::Impl::if_c< ( sizeof(ValueType) == 16 ) , ::int4 ,
typename Kokkos::Impl::if_c< ( sizeof(ValueType) == 32 ) , ::float4 ,void
>::type
>::type
>::type
>::type
>
class CudaTextureFetch {
private:
cuda_texture_object_type m_obj ;
const ValueType * m_alloc_ptr ;
int m_offset ;
void attach( const ValueType * const arg_ptr, AllocationTracker const & tracker )
{
typedef char const * const byte;
m_alloc_ptr = reinterpret_cast<ValueType *>(tracker.alloc_ptr());
size_t byte_offset = reinterpret_cast<byte>(arg_ptr) - reinterpret_cast<byte>(m_alloc_ptr);
const bool ok_aligned = 0 == byte_offset % sizeof(ValueType);
const size_t count = tracker.alloc_size() / sizeof(ValueType);
const bool ok_contains = (m_alloc_ptr <= arg_ptr) && (arg_ptr < (m_alloc_ptr + count));
if (ok_aligned && ok_contains) {
if (tracker.attribute() == NULL ) {
MemorySpace::texture_object_attach(
tracker
, sizeof(ValueType)
, cudaCreateChannelDesc< AliasType >()
);
}
m_obj = dynamic_cast<TextureAttribute*>(tracker.attribute())->m_tex_obj;
m_offset = arg_ptr - m_alloc_ptr;
}
else if( !ok_contains ) {
throw_runtime_exception("Error: cannot attach a texture object to a tracker which does not bound the pointer.");
}
else {
throw_runtime_exception("Error: cannot attach a texture object to an incorrectly aligned pointer.");
}
}
public:
KOKKOS_INLINE_FUNCTION
CudaTextureFetch() : m_obj() , m_alloc_ptr() , m_offset() {}
KOKKOS_INLINE_FUNCTION
~CudaTextureFetch() {}
KOKKOS_INLINE_FUNCTION
CudaTextureFetch( const CudaTextureFetch & rhs )
: m_obj( rhs.m_obj )
, m_alloc_ptr( rhs.m_alloc_ptr )
, m_offset( rhs.m_offset )
{}
KOKKOS_INLINE_FUNCTION
CudaTextureFetch & operator = ( const CudaTextureFetch & rhs )
{
m_obj = rhs.m_obj ;
m_alloc_ptr = rhs.m_alloc_ptr ;
m_offset = rhs.m_offset ;
return *this ;
}
KOKKOS_INLINE_FUNCTION explicit
CudaTextureFetch( const ValueType * const arg_ptr, AllocationTracker const & tracker )
: m_obj( 0 ) , m_alloc_ptr(0) , m_offset(0)
{
#if defined( KOKKOS_USE_LDG_INTRINSIC )
m_alloc_ptr(arg_ptr);
#elif defined( __CUDACC__ ) && ! defined( __CUDA_ARCH__ )
if ( arg_ptr != NULL ) {
if ( tracker.is_valid() ) {
attach( arg_ptr, tracker );
}
else {
AllocationTracker found_tracker = AllocationTracker::find<typename MemorySpace::allocator>(arg_ptr);
if ( found_tracker.is_valid() ) {
attach( arg_ptr, found_tracker );
} else {
throw_runtime_exception("Error: cannot attach a texture object to an untracked pointer!");
}
}
}
#endif
}
KOKKOS_INLINE_FUNCTION
operator const ValueType * () const { return m_alloc_ptr + m_offset ; }
template< typename iType >
KOKKOS_INLINE_FUNCTION
ValueType operator[]( const iType & i ) const
{
#if defined( KOKKOS_USE_LDG_INTRINSIC ) && defined( __CUDA_ARCH__ ) && ( 300 <= __CUDA_ARCH__ )
AliasType v = __ldg(reinterpret_cast<AliasType*>(&m_alloc_ptr[i]));
return *(reinterpret_cast<ValueType*> (&v));
#elif defined( __CUDA_ARCH__ ) && ( 300 <= __CUDA_ARCH__ )
AliasType v = tex1Dfetch<AliasType>( m_obj , i + m_offset );
return *(reinterpret_cast<ValueType*> (&v));
#else
return m_alloc_ptr[ i + m_offset ];
#endif
}
};
template< typename ValueType, class MemorySpace >
class CudaTextureFetch< const ValueType, MemorySpace, float4 > {
private:
typedef float4 AliasType;
cuda_texture_object_type m_obj ;
const ValueType * m_alloc_ptr ;
int m_offset ;
void attach( const ValueType * const arg_ptr, AllocationTracker const & tracker )
{
typedef char const * const byte;
m_alloc_ptr = reinterpret_cast<ValueType *>(tracker.alloc_ptr());
size_t byte_offset = reinterpret_cast<byte>(arg_ptr) - reinterpret_cast<byte>(m_alloc_ptr);
const bool ok_aligned = 0 == byte_offset % sizeof(ValueType);
const size_t count = tracker.alloc_size() / sizeof(ValueType);
const bool ok_contains = (m_alloc_ptr <= arg_ptr) && (arg_ptr < (m_alloc_ptr + count));
if (ok_aligned && ok_contains) {
if (tracker.attribute() == NULL ) {
MemorySpace::texture_object_attach(
tracker
, sizeof(ValueType)
, cudaCreateChannelDesc< AliasType >()
);
}
m_obj = dynamic_cast<TextureAttribute*>(tracker.attribute())->m_tex_obj;
m_offset = arg_ptr - m_alloc_ptr;
}
else if( !ok_contains ) {
throw_runtime_exception("Error: cannot attach a texture object to a tracker which does not bound the pointer.");
}
else {
throw_runtime_exception("Error: cannot attach a texture object to an incorrectly aligned pointer.");
}
}
public:
KOKKOS_INLINE_FUNCTION
CudaTextureFetch() : m_obj() , m_alloc_ptr() , m_offset() {}
KOKKOS_INLINE_FUNCTION
~CudaTextureFetch() {}
KOKKOS_INLINE_FUNCTION
CudaTextureFetch( const CudaTextureFetch & rhs )
: m_obj( rhs.m_obj )
, m_alloc_ptr( rhs.m_alloc_ptr )
, m_offset( rhs.m_offset )
{}
KOKKOS_INLINE_FUNCTION
CudaTextureFetch & operator = ( const CudaTextureFetch & rhs )
{
m_obj = rhs.m_obj ;
m_alloc_ptr = rhs.m_alloc_ptr ;
m_offset = rhs.m_offset ;
return *this ;
}
KOKKOS_INLINE_FUNCTION explicit
CudaTextureFetch( const ValueType * const arg_ptr, AllocationTracker const & tracker )
: m_obj( 0 ) , m_alloc_ptr(0) , m_offset(0)
{
#if defined( KOKKOS_USE_LDG_INTRINSIC )
m_alloc_ptr(arg_ptr);
#elif defined( __CUDACC__ ) && ! defined( __CUDA_ARCH__ )
if ( arg_ptr != NULL ) {
if ( tracker.is_valid() ) {
attach( arg_ptr, tracker );
}
else {
AllocationTracker found_tracker = AllocationTracker::find<typename MemorySpace::allocator>(arg_ptr);
if ( found_tracker.is_valid() ) {
attach( arg_ptr, found_tracker );
} else {
throw_runtime_exception("Error: cannot attach a texture object to an untracked pointer!");
}
}
}
#endif
}
KOKKOS_INLINE_FUNCTION
operator const ValueType * () const { return m_alloc_ptr + m_offset ; }
template< typename iType >
KOKKOS_INLINE_FUNCTION
ValueType operator[]( const iType & i ) const
{
#if defined( KOKKOS_USE_LDG_INTRINSIC ) && defined( __CUDA_ARCH__ ) && ( 300 <= __CUDA_ARCH__ )
AliasType v = __ldg(reinterpret_cast<AliasType*>(&m_alloc_ptr[i]));
return *(reinterpret_cast<ValueType*> (&v));
#elif defined( __CUDA_ARCH__ ) && ( 300 <= __CUDA_ARCH__ )
union Float4ValueType {
float4 f4[2];
ValueType val;
};
Float4ValueType convert;
convert.f4[0] = tex1Dfetch<AliasType>( m_obj , 2*(i + m_offset) );
convert.f4[1] = tex1Dfetch<AliasType>( m_obj , 2*(i + m_offset)+1 );
return convert.val;
#else
return m_alloc_ptr[ i + m_offset ];
#endif
}
};
template< typename ValueType, class MemorySpace >
class CudaTextureFetch< const ValueType, MemorySpace, void >
{
private:
const ValueType * m_ptr ;
public:
KOKKOS_INLINE_FUNCTION
CudaTextureFetch() : m_ptr(0) {};
KOKKOS_INLINE_FUNCTION
~CudaTextureFetch() {
}
KOKKOS_INLINE_FUNCTION
CudaTextureFetch( const ValueType * ptr, const AllocationTracker & ) : m_ptr(ptr) {}
KOKKOS_INLINE_FUNCTION
CudaTextureFetch( const CudaTextureFetch & rhs ) : m_ptr(rhs.m_ptr) {}
KOKKOS_INLINE_FUNCTION
CudaTextureFetch & operator = ( const CudaTextureFetch & rhs ) {
m_ptr = rhs.m_ptr;
return *this ;
}
explicit KOKKOS_INLINE_FUNCTION
CudaTextureFetch( ValueType * const base_view_ptr, AllocationTracker const & /*tracker*/ ) {
m_ptr = base_view_ptr;
}
KOKKOS_INLINE_FUNCTION
CudaTextureFetch & operator = (const ValueType* base_view_ptr) {
m_ptr = base_view_ptr;
return *this;
}
KOKKOS_INLINE_FUNCTION
operator const ValueType * () const { return m_ptr ; }
template< typename iType >
KOKKOS_INLINE_FUNCTION
ValueType operator[]( const iType & i ) const
{
return m_ptr[ i ];
}
};
} // namespace Impl
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
/** \brief Replace Default ViewDataHandle with Cuda texture fetch specialization
* if 'const' value type, CudaSpace and random access.
*/
template< class ViewTraits >
class ViewDataHandle< ViewTraits ,
typename enable_if< ( is_same< typename ViewTraits::memory_space,CudaSpace>::value ||
is_same< typename ViewTraits::memory_space,CudaUVMSpace>::value )
&&
is_same<typename ViewTraits::const_value_type,typename ViewTraits::value_type>::value
&&
ViewTraits::memory_traits::RandomAccess
>::type >
{
public:
enum { ReturnTypeIsReference = false };
typedef Impl::CudaTextureFetch< typename ViewTraits::value_type
, typename ViewTraits::memory_space> handle_type;
KOKKOS_INLINE_FUNCTION
static handle_type create_handle( typename ViewTraits::value_type * arg_data_ptr, AllocationTracker const & arg_tracker )
{
return handle_type(arg_data_ptr, arg_tracker);
}
typedef typename ViewTraits::value_type return_type;
};
}
}
+#endif /* #if ! defined( KOKKOS_USING_EXPERIMENTAL_VIEW ) */
+
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#endif // KOKKOS_HAVE_CUDA
#endif /* #ifndef KOKKOS_CUDA_VIEW_HPP */
diff --git a/lib/kokkos/core/src/KokkosExp_View.hpp b/lib/kokkos/core/src/KokkosExp_View.hpp
index fef76a457..1fb11abde 100644
--- a/lib/kokkos/core/src/KokkosExp_View.hpp
+++ b/lib/kokkos/core/src/KokkosExp_View.hpp
@@ -1,1989 +1,1833 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_EXPERIMENTAL_VIEW_HPP
#define KOKKOS_EXPERIMENTAL_VIEW_HPP
#include <string>
+#include <algorithm>
#include <type_traits>
#include <initializer_list>
#include <Kokkos_Core_fwd.hpp>
#include <Kokkos_HostSpace.hpp>
#include <Kokkos_MemoryTraits.hpp>
+#include <Kokkos_ExecPolicy.hpp>
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Experimental {
namespace Impl {
+template< class DstMemorySpace , class SrcMemorySpace >
+struct DeepCopy ;
+
template< class DataType >
struct ViewArrayAnalysis ;
-template< class DataType , class ValueType , class ArrayLayout >
+template< class DataType , class ArrayLayout
+ , typename ValueType =
+ typename ViewArrayAnalysis< DataType >::non_const_value_type
+ >
struct ViewDataAnalysis ;
-template< class , class = void , typename Enable = void >
-class ViewMapping { enum { is_assignable = false }; };
-
-template< class DstMemorySpace , class SrcMemorySpace >
-struct DeepCopy ;
+template< class , class ... >
+class ViewMapping { public: enum { is_assignable = false }; };
+
+template< class MemorySpace >
+struct ViewOperatorBoundsErrorAbort ;
+
+template<>
+struct ViewOperatorBoundsErrorAbort< Kokkos::HostSpace > {
+ static void apply( const size_t rank
+ , const size_t n0 , const size_t n1
+ , const size_t n2 , const size_t n3
+ , const size_t n4 , const size_t n5
+ , const size_t n6 , const size_t n7
+ , const size_t i0 , const size_t i1
+ , const size_t i2 , const size_t i3
+ , const size_t i4 , const size_t i5
+ , const size_t i6 , const size_t i7 );
+};
} /* namespace Impl */
} /* namespace Experimental */
} /* namespace Kokkos */
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Experimental {
/** \class ViewTraits
* \brief Traits class for accessing attributes of a View.
*
* This is an implementation detail of View. It is only of interest
* to developers implementing a new specialization of View.
*
- * Template argument permutations:
- * - View< DataType , void , void , void >
- * - View< DataType , Space , void , void >
- * - View< DataType , Space , MemoryTraits , void >
- * - View< DataType , Space , void , MemoryTraits >
- * - View< DataType , ArrayLayout , void , void >
- * - View< DataType , ArrayLayout , Space , void >
- * - View< DataType , ArrayLayout , MemoryTraits , void >
- * - View< DataType , ArrayLayout , Space , MemoryTraits >
- * - View< DataType , MemoryTraits , void , void >
+ * Template argument options:
+ * - View< DataType >
+ * - View< DataType , Space >
+ * - View< DataType , Space , MemoryTraits >
+ * - View< DataType , ArrayLayout >
+ * - View< DataType , ArrayLayout , Space >
+ * - View< DataType , ArrayLayout , MemoryTraits >
+ * - View< DataType , ArrayLayout , Space , MemoryTraits >
+ * - View< DataType , MemoryTraits >
*/
-template< class DataType ,
- class Arg1 = void ,
- class Arg2 = void ,
- class Arg3 = void >
-class ViewTraits {
+template< class DataType , class ... Properties >
+struct ViewTraits ;
+
+template<>
+struct ViewTraits< void >
+{
+ typedef void execution_space ;
+ typedef void memory_space ;
+ typedef void array_layout ;
+ typedef void memory_traits ;
+};
+
+template< class ... Prop >
+struct ViewTraits< void , void , Prop ... >
+{
+ // Ignore an extraneous 'void'
+ typedef typename ViewTraits<void,Prop...>::execution_space execution_space ;
+ typedef typename ViewTraits<void,Prop...>::memory_space memory_space ;
+ typedef typename ViewTraits<void,Prop...>::array_layout array_layout ;
+ typedef typename ViewTraits<void,Prop...>::memory_traits memory_traits ;
+};
+
+template< class ArrayLayout , class ... Prop >
+struct ViewTraits< typename std::enable_if< Kokkos::Impl::is_array_layout<ArrayLayout>::value >::type , ArrayLayout , Prop ... >
+{
+ // Specify layout, keep subsequent space and memory traits arguments
+
+ typedef typename ViewTraits<void,Prop...>::execution_space execution_space ;
+ typedef typename ViewTraits<void,Prop...>::memory_space memory_space ;
+ typedef ArrayLayout array_layout ;
+ typedef typename ViewTraits<void,Prop...>::memory_traits memory_traits ;
+};
+
+template< class Space , class ... Prop >
+struct ViewTraits< typename std::enable_if< Kokkos::Impl::is_space<Space>::value >::type , Space , Prop ... >
+{
+ // Specify Space, memory traits should be the only subsequent argument
+
+ static_assert( std::is_same< typename ViewTraits<void,Prop...>::execution_space , void >::value ||
+ std::is_same< typename ViewTraits<void,Prop...>::memory_space , void >::value ||
+ std::is_same< typename ViewTraits<void,Prop...>::array_layout , void >::value
+ , "Only one View Execution or Memory Space template argument" );
+
+ typedef typename Space::execution_space execution_space ;
+ typedef typename Space::memory_space memory_space ;
+ typedef typename execution_space::array_layout array_layout ;
+ typedef typename ViewTraits<void,Prop...>::memory_traits memory_traits ;
+};
+
+template< class MemoryTraits , class ... Prop >
+struct ViewTraits< typename std::enable_if< Kokkos::Impl::is_memory_traits<MemoryTraits>::value >::type , MemoryTraits , Prop ... >
+{
+ // Specify memory trait, should not be any subsequent arguments
+
+ static_assert( std::is_same< typename ViewTraits<void,Prop...>::execution_space , void >::value ||
+ std::is_same< typename ViewTraits<void,Prop...>::memory_space , void >::value ||
+ std::is_same< typename ViewTraits<void,Prop...>::array_layout , void >::value ||
+ std::is_same< typename ViewTraits<void,Prop...>::memory_traits , void >::value
+ , "MemoryTrait is the final optional template argument for a View" );
+
+ typedef void execution_space ;
+ typedef void memory_space ;
+ typedef void array_layout ;
+ typedef MemoryTraits memory_traits ;
+};
+
+
+template< class DataType , class ... Properties >
+struct ViewTraits {
private:
- // Layout, Space, and MemoryTraits are optional
- // but need to appear in that order. That means Layout
- // can only be Arg1, Space can be Arg1 or Arg2, and
- // MemoryTraits can be Arg1, Arg2 or Arg3
-
- enum { Arg1IsLayout = Kokkos::Impl::is_array_layout<Arg1>::value };
-
- enum { Arg1IsSpace = Kokkos::Impl::is_space<Arg1>::value };
- enum { Arg2IsSpace = Kokkos::Impl::is_space<Arg2>::value };
-
- enum { Arg1IsMemoryTraits = Kokkos::Impl::is_memory_traits<Arg1>::value };
- enum { Arg2IsMemoryTraits = Kokkos::Impl::is_memory_traits<Arg2>::value };
- enum { Arg3IsMemoryTraits = Kokkos::Impl::is_memory_traits<Arg3>::value };
-
- enum { Arg1IsVoid = std::is_same< Arg1 , void >::value };
- enum { Arg2IsVoid = std::is_same< Arg2 , void >::value };
- enum { Arg3IsVoid = std::is_same< Arg3 , void >::value };
-
- static_assert( 1 == Arg1IsLayout + Arg1IsSpace + Arg1IsMemoryTraits + Arg1IsVoid
- , "Template argument #1 must be layout, space, traits, or void" );
-
- // If Arg1 is Layout then Arg2 is Space, MemoryTraits, or void
- // If Arg1 is Space then Arg2 is MemoryTraits or void
- // If Arg1 is MemoryTraits then Arg2 is void
- // If Arg1 is Void then Arg2 is void
-
- static_assert( ( Arg1IsLayout && ( 1 == Arg2IsSpace + Arg2IsMemoryTraits + Arg2IsVoid ) ) ||
- ( Arg1IsSpace && ( 0 == Arg2IsSpace ) && ( 1 == Arg2IsMemoryTraits + Arg2IsVoid ) ) ||
- ( Arg1IsMemoryTraits && Arg2IsVoid ) ||
- ( Arg1IsVoid && Arg2IsVoid )
- , "Template argument #2 must be space, traits, or void" );
-
- // Arg3 is MemoryTraits or void and at most one argument is MemoryTraits
- static_assert( ( 1 == Arg3IsMemoryTraits + Arg3IsVoid ) &&
- ( Arg1IsMemoryTraits + Arg2IsMemoryTraits + Arg3IsMemoryTraits <= 1 )
- , "Template argument #3 must be traits or void" );
-
- typedef
- typename std::conditional< Arg1IsSpace , Arg1 ,
- typename std::conditional< Arg2IsSpace , Arg2 , Kokkos::DefaultExecutionSpace
- >::type >::type::execution_space
- ExecutionSpace ;
-
- typedef
- typename std::conditional< Arg1IsSpace , Arg1 ,
- typename std::conditional< Arg2IsSpace , Arg2 , Kokkos::DefaultExecutionSpace
- >::type >::type::memory_space
- MemorySpace ;
+ // Unpack the properties arguments
+ typedef ViewTraits< void , Properties ... > prop ;
- typedef
- typename Kokkos::Impl::is_space<
- typename std::conditional< Arg1IsSpace , Arg1 ,
- typename std::conditional< Arg2IsSpace , Arg2 , Kokkos::DefaultExecutionSpace
- >::type >::type >::host_mirror_space
- HostMirrorSpace ;
+ typedef typename
+ std::conditional< ! std::is_same< typename prop::execution_space , void >::value
+ , typename prop::execution_space
+ , Kokkos::DefaultExecutionSpace
+ >::type
+ ExecutionSpace ;
- typedef
- typename std::conditional< Arg1IsLayout , Arg1 , typename ExecutionSpace::array_layout >::type
+ typedef typename
+ std::conditional< ! std::is_same< typename prop::memory_space , void >::value
+ , typename prop::memory_space
+ , typename ExecutionSpace::memory_space
+ >::type
+ MemorySpace ;
+
+ typedef typename
+ std::conditional< ! std::is_same< typename prop::array_layout , void >::value
+ , typename prop::array_layout
+ , typename ExecutionSpace::array_layout
+ >::type
ArrayLayout ;
- // Arg1, Arg2, or Arg3 may be memory traits
- typedef
- typename std::conditional< Arg1IsMemoryTraits , Arg1 ,
- typename std::conditional< Arg2IsMemoryTraits , Arg2 ,
- typename std::conditional< Arg3IsMemoryTraits , Arg3 , MemoryManaged
- >::type >::type >::type
- MemoryTraits ;
+ typedef typename Kokkos::Impl::is_space< ExecutionSpace >::host_mirror_space
+ HostMirrorSpace ;
- // Analyze data type's array properties
- typedef Kokkos::Experimental::Impl::ViewArrayAnalysis< DataType > array_analysis ;
+ typedef typename
+ std::conditional< ! std::is_same< typename prop::memory_traits , void >::value
+ , typename prop::memory_traits
+ , typename Kokkos::MemoryManaged
+ >::type
+ MemoryTraits ;
- // Analyze data type's properties with opportunity to specialize based upon the array value type
- typedef Kokkos::Experimental::Impl::
- ViewDataAnalysis< DataType
- , typename array_analysis::non_const_value_type
- , ArrayLayout
- > data_analysis ;
+ // Analyze data type's properties,
+ // May be specialized based upon the layout and value type
+ typedef Kokkos::Experimental::Impl::ViewDataAnalysis< DataType , ArrayLayout > data_analysis ;
public:
//------------------------------------
// Data type traits:
typedef typename data_analysis::type data_type ;
typedef typename data_analysis::const_type const_data_type ;
typedef typename data_analysis::non_const_type non_const_data_type ;
//------------------------------------
// Compatible array of trivial type traits:
typedef typename data_analysis::array_scalar_type array_scalar_type ;
typedef typename data_analysis::const_array_scalar_type const_array_scalar_type ;
typedef typename data_analysis::non_const_array_scalar_type non_const_array_scalar_type ;
//------------------------------------
// Value type traits:
typedef typename data_analysis::value_type value_type ;
typedef typename data_analysis::const_value_type const_value_type ;
typedef typename data_analysis::non_const_value_type non_const_value_type ;
//------------------------------------
// Mapping traits:
typedef ArrayLayout array_layout ;
typedef typename data_analysis::dimension dimension ;
typedef typename data_analysis::specialize specialize /* mapping specialization tag */ ;
enum { rank = dimension::rank };
enum { rank_dynamic = dimension::rank_dynamic };
//------------------------------------
// Execution space, memory space, memory access traits, and host mirror space.
- typedef ExecutionSpace execution_space ;
- typedef MemorySpace memory_space ;
- typedef Device<ExecutionSpace,MemorySpace> device_type ;
- typedef MemoryTraits memory_traits ;
- typedef HostMirrorSpace host_mirror_space ;
+ typedef ExecutionSpace execution_space ;
+ typedef MemorySpace memory_space ;
+ typedef Kokkos::Device<ExecutionSpace,MemorySpace> device_type ;
+ typedef MemoryTraits memory_traits ;
+ typedef HostMirrorSpace host_mirror_space ;
- typedef typename memory_space::size_type size_type ;
+ typedef typename MemorySpace::size_type size_type ;
- enum { is_hostspace = std::is_same< memory_space , HostSpace >::value };
- enum { is_managed = memory_traits::Unmanaged == 0 };
- enum { is_random_access = memory_traits::RandomAccess == 1 };
+ enum { is_hostspace = std::is_same< MemorySpace , HostSpace >::value };
+ enum { is_managed = MemoryTraits::Unmanaged == 0 };
+ enum { is_random_access = MemoryTraits::RandomAccess == 1 };
//------------------------------------
};
/** \class View
* \brief View to an array of data.
*
* A View represents an array of one or more dimensions.
* For details, please refer to Kokkos' tutorial materials.
*
* \section Kokkos_View_TemplateParameters Template parameters
*
* This class has both required and optional template parameters. The
* \c DataType parameter must always be provided, and must always be
* first. The parameters \c Arg1Type, \c Arg2Type, and \c Arg3Type are
* placeholders for different template parameters. The default value
* of the fifth template parameter \c Specialize suffices for most use
* cases. When explaining the template parameters, we won't refer to
* \c Arg1Type, \c Arg2Type, and \c Arg3Type; instead, we will refer
* to the valid categories of template parameters, in whatever order
* they may occur.
*
* Valid ways in which template arguments may be specified:
- * - View< DataType , Space >
- * - View< DataType , Space , MemoryTraits >
- * - View< DataType , Space , void , MemoryTraits >
+ * - View< DataType >
+ * - View< DataType , Layout >
* - View< DataType , Layout , Space >
* - View< DataType , Layout , Space , MemoryTraits >
+ * - View< DataType , Space >
+ * - View< DataType , Space , MemoryTraits >
+ * - View< DataType , MemoryTraits >
*
* \tparam DataType (required) This indicates both the type of each
* entry of the array, and the combination of compile-time and
* run-time array dimension(s). For example, <tt>double*</tt>
* indicates a one-dimensional array of \c double with run-time
* dimension, and <tt>int*[3]</tt> a two-dimensional array of \c int
* with run-time first dimension and compile-time second dimension
* (of 3). In general, the run-time dimensions (if any) must go
* first, followed by zero or more compile-time dimensions. For
* more examples, please refer to the tutorial materials.
*
* \tparam Space (required) The memory space.
*
* \tparam Layout (optional) The array's layout in memory. For
* example, LayoutLeft indicates a column-major (Fortran style)
* layout, and LayoutRight a row-major (C style) layout. If not
* specified, this defaults to the preferred layout for the
* <tt>Space</tt>.
*
* \tparam MemoryTraits (optional) Assertion of the user's intended
* access behavior. For example, RandomAccess indicates read-only
* access with limited spatial locality, and Unmanaged lets users
* wrap externally allocated memory in a View without automatic
* deallocation.
*
* \section Kokkos_View_MT MemoryTraits discussion
*
* \subsection Kokkos_View_MT_Interp MemoryTraits interpretation depends on Space
*
* Some \c MemoryTraits options may have different interpretations for
* different \c Space types. For example, with the Cuda device,
* \c RandomAccess tells Kokkos to fetch the data through the texture
* cache, whereas the non-GPU devices have no such hardware construct.
*
* \subsection Kokkos_View_MT_PrefUse Preferred use of MemoryTraits
*
* Users should defer applying the optional \c MemoryTraits parameter
* until the point at which they actually plan to rely on it in a
* computational kernel. This minimizes the number of template
* parameters exposed in their code, which reduces the cost of
* compilation. Users may always assign a View without specified
* \c MemoryTraits to a compatible View with that specification.
* For example:
* \code
* // Pass in the simplest types of View possible.
* void
* doSomething (View<double*, Cuda> out,
* View<const double*, Cuda> in)
* {
* // Assign the "generic" View in to a RandomAccess View in_rr.
* // Note that RandomAccess View objects must have const data.
* View<const double*, Cuda, RandomAccess> in_rr = in;
* // ... do something with in_rr and out ...
* }
* \endcode
*/
-template< class DataType
- , class Arg1 = void /* ArrayLayout, SpaceType, or MemoryTraits */
- , class Arg2 = void /* SpaceType or MemoryTraits */
- , class Arg3 = void /* MemoryTraits */ >
+template< class DataType , class ... Properties >
class View ;
} /* namespace Experimental */
} /* namespace Kokkos */
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#include <impl/KokkosExp_ViewMapping.hpp>
#include <impl/KokkosExp_ViewAllocProp.hpp>
#include <impl/KokkosExp_ViewArray.hpp>
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Experimental {
namespace {
constexpr Kokkos::Experimental::Impl::ALL_t
ALL = Kokkos::Experimental::Impl::ALL_t();
constexpr Kokkos::Experimental::Impl::WithoutInitializing_t
WithoutInitializing = Kokkos::Experimental::Impl::WithoutInitializing_t();
constexpr Kokkos::Experimental::Impl::AllowPadding_t
AllowPadding = Kokkos::Experimental::Impl::AllowPadding_t();
}
/** \brief Create View allocation parameter bundle from argument list.
*
* Valid argument list members are:
* 1) label as a "string" or std::string
* 2) memory space instance of the View::memory_space type
* 3) execution space instance compatible with the View::memory_space
* 4) Kokkos::WithoutInitializing to bypass initialization
* 4) Kokkos::AllowPadding to allow allocation to pad dimensions for memory alignment
*/
template< class ... Args >
inline
Kokkos::Experimental::Impl::ViewAllocProp< Args ... >
view_alloc( Args ... args )
{
return Kokkos::Experimental::Impl::ViewAllocProp< Args ... >( args ... );
}
} /* namespace Experimental */
} /* namespace Kokkos */
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Experimental {
-/**\brief Each R? template argument designates whether the subview argument is a range */
-template< class V
- , bool R0 = false , bool R1 = false , bool R2 = false , bool R3 = false
- , bool R4 = false , bool R5 = false , bool R6 = false , bool R7 = false >
-using Subview = typename Kokkos::Experimental::Impl::SubviewType< V, R0 , R1 , R2 , R3 , R4 , R5 , R6 , R7 >::type ;
+template< class DataType , class ... Properties >
+class View ;
+
+template< class > struct is_view : public std::false_type {};
+
+template< class D, class ... P >
+struct is_view< View<D,P...> > : public std::true_type {};
-template< class DataType , class Arg1 , class Arg2 , class Arg3 >
-class View : public ViewTraits< DataType , Arg1 , Arg2 , Arg3 > {
+template< class DataType , class ... Properties >
+class View : public ViewTraits< DataType , Properties ... > {
private:
- template< class , class , class , class > friend class View ;
+ template< class , class ... > friend class View ;
+ template< class , class ... > friend class Impl::ViewMapping ;
- typedef ViewTraits< DataType , Arg1 , Arg2 , Arg3 > traits ;
- typedef Kokkos::Experimental::Impl::ViewMapping< traits > map_type ;
- typedef Kokkos::Experimental::Impl::SharedAllocationTracker track_type ;
+ typedef ViewTraits< DataType , Properties ... > traits ;
+ typedef Kokkos::Experimental::Impl::ViewMapping< traits , void > map_type ;
+ typedef Kokkos::Experimental::Impl::SharedAllocationTracker track_type ;
track_type m_track ;
map_type m_map ;
public:
//----------------------------------------
/** \brief Compatible view of array of scalar types */
typedef View< typename traits::array_scalar_type ,
typename traits::array_layout ,
typename traits::device_type ,
typename traits::memory_traits >
array_type ;
/** \brief Compatible view of const data type */
typedef View< typename traits::const_data_type ,
typename traits::array_layout ,
typename traits::device_type ,
typename traits::memory_traits >
const_type ;
/** \brief Compatible view of non-const data type */
typedef View< typename traits::non_const_data_type ,
- typename traits::array_layout ,
- typename traits::device_type ,
- typename traits::memory_traits >
+ typename traits::array_layout ,
+ typename traits::device_type ,
+ typename traits::memory_traits >
non_const_type ;
/** \brief Compatible HostMirror view */
typedef View< typename traits::non_const_data_type ,
typename traits::array_layout ,
- typename traits::host_mirror_space ,
- void >
+ typename traits::host_mirror_space >
HostMirror ;
//----------------------------------------
// Domain dimensions
enum { Rank = map_type::Rank };
+ template< typename iType >
+ KOKKOS_INLINE_FUNCTION constexpr
+ typename std::enable_if< std::is_integral<iType>::value , size_t >::type
+ extent( const iType & r ) const
+ {
+ return r == 0 ? m_map.dimension_0() : (
+ r == 1 ? m_map.dimension_1() : (
+ r == 2 ? m_map.dimension_2() : (
+ r == 3 ? m_map.dimension_3() : (
+ r == 4 ? m_map.dimension_4() : (
+ r == 5 ? m_map.dimension_5() : (
+ r == 6 ? m_map.dimension_6() : (
+ r == 7 ? m_map.dimension_7() : 1 )))))));
+ }
+
KOKKOS_INLINE_FUNCTION constexpr size_t dimension_0() const { return m_map.dimension_0(); }
KOKKOS_INLINE_FUNCTION constexpr size_t dimension_1() const { return m_map.dimension_1(); }
KOKKOS_INLINE_FUNCTION constexpr size_t dimension_2() const { return m_map.dimension_2(); }
KOKKOS_INLINE_FUNCTION constexpr size_t dimension_3() const { return m_map.dimension_3(); }
KOKKOS_INLINE_FUNCTION constexpr size_t dimension_4() const { return m_map.dimension_4(); }
KOKKOS_INLINE_FUNCTION constexpr size_t dimension_5() const { return m_map.dimension_5(); }
KOKKOS_INLINE_FUNCTION constexpr size_t dimension_6() const { return m_map.dimension_6(); }
KOKKOS_INLINE_FUNCTION constexpr size_t dimension_7() const { return m_map.dimension_7(); }
+ KOKKOS_INLINE_FUNCTION constexpr size_t size() const { return m_map.dimension_0() *
+ m_map.dimension_1() *
+ m_map.dimension_2() *
+ m_map.dimension_3() *
+ m_map.dimension_4() *
+ m_map.dimension_5() *
+ m_map.dimension_6() *
+ m_map.dimension_7(); }
+
KOKKOS_INLINE_FUNCTION constexpr size_t stride_0() const { return m_map.stride_0(); }
KOKKOS_INLINE_FUNCTION constexpr size_t stride_1() const { return m_map.stride_1(); }
KOKKOS_INLINE_FUNCTION constexpr size_t stride_2() const { return m_map.stride_2(); }
KOKKOS_INLINE_FUNCTION constexpr size_t stride_3() const { return m_map.stride_3(); }
KOKKOS_INLINE_FUNCTION constexpr size_t stride_4() const { return m_map.stride_4(); }
KOKKOS_INLINE_FUNCTION constexpr size_t stride_5() const { return m_map.stride_5(); }
KOKKOS_INLINE_FUNCTION constexpr size_t stride_6() const { return m_map.stride_6(); }
KOKKOS_INLINE_FUNCTION constexpr size_t stride_7() const { return m_map.stride_7(); }
+ template< typename iType >
+ KOKKOS_INLINE_FUNCTION void stride( iType * const s ) const { m_map.stride(s); }
+
//----------------------------------------
// Range span is the span which contains all members.
typedef typename map_type::reference_type reference_type ;
+ typedef typename map_type::pointer_type pointer_type ;
enum { reference_type_is_lvalue_reference = std::is_lvalue_reference< reference_type >::value };
KOKKOS_INLINE_FUNCTION constexpr size_t span() const { return m_map.span(); }
+ // Deprecated, use 'span()' instead
+ KOKKOS_INLINE_FUNCTION constexpr size_t capacity() const { return m_map.span(); }
KOKKOS_INLINE_FUNCTION constexpr bool span_is_contiguous() const { return m_map.span_is_contiguous(); }
- KOKKOS_INLINE_FUNCTION constexpr typename traits::value_type * data() const { return m_map.data(); }
+ KOKKOS_INLINE_FUNCTION constexpr pointer_type data() const { return m_map.data(); }
// Deprecated, use 'span_is_contigous()' instead
KOKKOS_INLINE_FUNCTION constexpr bool is_contiguous() const { return m_map.span_is_contiguous(); }
// Deprecated, use 'data()' instead
- KOKKOS_INLINE_FUNCTION constexpr typename traits::value_type * ptr_on_device() const { return m_map.data(); }
+ KOKKOS_INLINE_FUNCTION constexpr pointer_type ptr_on_device() const { return m_map.data(); }
//----------------------------------------
// Allow specializations to query their specialized map
KOKKOS_INLINE_FUNCTION
const map_type & implementation_map() const { return m_map ; }
//----------------------------------------
private:
typedef typename
std::conditional< Rank == 0 , reference_type
, Kokkos::Experimental::Impl::Error_view_scalar_reference_to_non_scalar_view >::type
scalar_operator_reference_type ;
typedef typename
std::conditional< Rank == 0 , const int
, Kokkos::Experimental::Impl::Error_view_scalar_reference_to_non_scalar_view >::type
scalar_operator_index_type ;
+ enum { is_default_map =
+ std::is_same< typename traits::specialize , void >::value &&
+ ( std::is_same< typename traits::array_layout , Kokkos::LayoutLeft >::value ||
+ std::is_same< typename traits::array_layout , Kokkos::LayoutRight >::value ||
+ std::is_same< typename traits::array_layout , Kokkos::LayoutStride >::value
+ ) };
+
+ template < bool F , unsigned R
+ , typename I0 = int
+ , typename I1 = int
+ , typename I2 = int
+ , typename I3 = int
+ , typename I4 = int
+ , typename I5 = int
+ , typename I6 = int
+ , typename I7 = int >
+ struct enable {
+ enum { value = F && ( R == traits::rank ) &&
+ std::is_integral<I0>::value &&
+ std::is_integral<I1>::value &&
+ std::is_integral<I2>::value &&
+ std::is_integral<I3>::value &&
+ std::is_integral<I4>::value &&
+ std::is_integral<I5>::value &&
+ std::is_integral<I6>::value &&
+ std::is_integral<I7>::value };
+ };
+
+ KOKKOS_INLINE_FUNCTION
+ void verify_operator_bounds( size_t i0 = 0 , size_t i1 = 0 , size_t i2 = 0 , size_t i3 = 0
+ , size_t i4 = 0 , size_t i5 = 0 , size_t i6 = 0 , size_t i7 = 0 ) const
+ {
+ if ( ( m_map.dimension_0() <= i0 ) ||
+ ( m_map.dimension_1() <= i1 ) ||
+ ( m_map.dimension_2() <= i2 ) ||
+ ( m_map.dimension_3() <= i3 ) ||
+ ( m_map.dimension_4() <= i4 ) ||
+ ( m_map.dimension_5() <= i5 ) ||
+ ( m_map.dimension_6() <= i6 ) ||
+ ( m_map.dimension_7() <= i7 ) ) {
+ Kokkos::Experimental::Impl::
+ ViewOperatorBoundsErrorAbort< Kokkos::Impl::ActiveExecutionMemorySpace >::
+ apply( Rank
+ , m_map.dimension_0() , m_map.dimension_1()
+ , m_map.dimension_2() , m_map.dimension_3()
+ , m_map.dimension_4() , m_map.dimension_5()
+ , m_map.dimension_6() , m_map.dimension_7()
+ , i0 , i1 , i2 , i3 , i4 , i5 , i6 , i7 );
+ }
+ }
+
+#if defined( KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK )
+
+#define KOKKOS_VIEW_OPERATOR_VERIFY( I0 , I1 , I2 , I3 , I4 , I5 , I6 , I7 ) \
+ Kokkos::Impl::VerifyExecutionCanAccessMemorySpace \
+ < Kokkos::Impl::ActiveExecutionMemorySpace , typename traits::memory_space >::verify(); \
+ verify_operator_bounds(I0,I1,I2,I3,I4,I5,I6,I7);
+
+#else
+
+#define KOKKOS_VIEW_OPERATOR_VERIFY( I0 , I1 , I2 , I3 , I4 , I5 , I6 , I7 ) \
+ Kokkos::Impl::VerifyExecutionCanAccessMemorySpace \
+ < Kokkos::Impl::ActiveExecutionMemorySpace , typename traits::memory_space >::verify();
+
+#endif
+
public:
+ //------------------------------
// Rank == 0
KOKKOS_FORCEINLINE_FUNCTION
scalar_operator_reference_type operator()() const
{
- KOKKOS_ASSERT_VIEW_MAPPING_ACCESS( typename traits::memory_space, m_map, Rank, 0, 0, 0, 0, 0, 0, 0, 0 );
+ KOKKOS_VIEW_OPERATOR_VERIFY(0,0,0,0,0,0,0,0)
return scalar_operator_reference_type( m_map.reference() );
}
KOKKOS_FORCEINLINE_FUNCTION
reference_type
operator()( scalar_operator_index_type i0
, const int i1 = 0 , const int i2 = 0 , const int i3 = 0
, const int i4 = 0 , const int i5 = 0 , const int i6 = 0 , const int i7 = 0 ) const
{
- KOKKOS_ASSERT_VIEW_MAPPING_ACCESS( typename traits::memory_space, m_map, Rank, i0, i1, i2, i3, i4, i5, i6, i7 );
- return m_map.reference();
+ KOKKOS_VIEW_OPERATOR_VERIFY(i0,i1,i2,i3,i4,i5,i6,i7)
+ return scalar_operator_reference_type( m_map.reference() );
}
+ //------------------------------
// Rank == 1
template< typename I0 >
KOKKOS_FORCEINLINE_FUNCTION
- typename std::enable_if<( Rank == 1 && std::is_integral<I0>::value
- ), reference_type >::type
+ typename std::enable_if< View::enable< is_default_map , 1 , I0 >::value , reference_type >::type
+ operator[]( const I0 & i0 ) const
+ {
+ KOKKOS_VIEW_OPERATOR_VERIFY(i0,0,0,0,0,0,0,0)
+ return m_map.m_handle[ m_map.m_offset(i0) ];
+ }
+
+ template< typename I0 >
+ KOKKOS_FORCEINLINE_FUNCTION
+ typename std::enable_if< View::enable< ! is_default_map , 1 , I0 >::value , reference_type >::type
operator[]( const I0 & i0 ) const
{
- KOKKOS_ASSERT_VIEW_MAPPING_ACCESS( typename traits::memory_space, m_map, Rank, i0, 0, 0, 0, 0, 0, 0, 0 );
+ KOKKOS_VIEW_OPERATOR_VERIFY(i0,0,0,0,0,0,0,0)
return m_map.reference(i0);
}
template< typename I0 >
KOKKOS_FORCEINLINE_FUNCTION
- typename std::enable_if<( Rank == 1 && std::is_integral<I0>::value
- ), reference_type >::type
+ typename std::enable_if< View::enable< is_default_map , 1 , I0 >::value , reference_type >::type
+ operator()( const I0 & i0 ) const
+ {
+ KOKKOS_VIEW_OPERATOR_VERIFY(i0,0,0,0,0,0,0,0)
+ return m_map.m_handle[ m_map.m_offset(i0) ];
+ }
+
+ template< typename I0 >
+ KOKKOS_FORCEINLINE_FUNCTION
+ typename std::enable_if< View::enable< ! is_default_map , 1 , I0 >::value , reference_type >::type
operator()( const I0 & i0 ) const
{
- KOKKOS_ASSERT_VIEW_MAPPING_ACCESS( typename traits::memory_space, m_map, Rank, i0, 0, 0, 0, 0, 0, 0, 0 );
+ KOKKOS_VIEW_OPERATOR_VERIFY(i0,0,0,0,0,0,0,0)
return m_map.reference(i0);
}
template< typename I0 >
KOKKOS_FORCEINLINE_FUNCTION
- reference_type
+ typename std::enable_if< View::enable< true , 1 , I0 >::value , reference_type >::type
operator()( const I0 & i0
- , typename std::enable_if<( Rank == 1 && std::is_integral<I0>::value ), const int >::type i1
- , const int i2 = 0 , const int i3 = 0
+ , const int i1 , const int i2 = 0 , const int i3 = 0
, const int i4 = 0 , const int i5 = 0 , const int i6 = 0 , const int i7 = 0 ) const
{
- KOKKOS_ASSERT_VIEW_MAPPING_ACCESS( typename traits::memory_space, m_map, Rank, i0, i1, i2, i3, i4, i5, i6, i7 );
+ KOKKOS_VIEW_OPERATOR_VERIFY(i0,i1,i2,i3,i4,i5,i6,i7)
return m_map.reference(i0);
}
+ //------------------------------
// Rank == 2
template< typename I0 , typename I1 >
KOKKOS_FORCEINLINE_FUNCTION
- typename std::enable_if<( Rank == 2 &&
- std::is_integral<I0>::value &&
- std::is_integral<I1>::value
- ), reference_type >::type
+ typename std::enable_if< View::enable< is_default_map , 2 , I0 , I1 >::value , reference_type >::type
operator()( const I0 & i0 , const I1 & i1 ) const
{
- KOKKOS_ASSERT_VIEW_MAPPING_ACCESS( typename traits::memory_space, m_map, Rank, i0, i1, 0, 0, 0, 0, 0, 0 );
+ KOKKOS_VIEW_OPERATOR_VERIFY(i0,i1,0,0,0,0,0,0)
+ return m_map.m_handle[ m_map.m_offset(i0,i1) ];
+ }
+
+ template< typename I0 , typename I1 >
+ KOKKOS_FORCEINLINE_FUNCTION
+ typename std::enable_if< View::enable< ! is_default_map , 2 , I0 , I1 >::value , reference_type >::type
+ operator()( const I0 & i0 , const I1 & i1 ) const
+ {
+ KOKKOS_VIEW_OPERATOR_VERIFY(i0,i1,0,0,0,0,0,0)
return m_map.reference(i0,i1);
}
template< typename I0 , typename I1 >
KOKKOS_FORCEINLINE_FUNCTION
- reference_type
+ typename std::enable_if< View::enable< true , 2 , I0 , I1 >::value , reference_type >::type
operator()( const I0 & i0 , const I1 & i1
- , typename std::enable_if<( Rank == 2 &&
- std::is_integral<I0>::value &&
- std::is_integral<I1>::value
- ), const int >::type i2
- , const int i3 = 0
+ , const int i2 , const int i3 = 0
, const int i4 = 0 , const int i5 = 0 , const int i6 = 0 , const int i7 = 0 ) const
{
- KOKKOS_ASSERT_VIEW_MAPPING_ACCESS( typename traits::memory_space, m_map, Rank, i0, i1, i2, i3, i4, i5, i6, i7 );
+ KOKKOS_VIEW_OPERATOR_VERIFY(i0,i1,i2,i3,i4,i5,i6,i7)
return m_map.reference(i0,i1);
}
+ //------------------------------
// Rank == 3
template< typename I0 , typename I1 , typename I2 >
KOKKOS_FORCEINLINE_FUNCTION
- typename std::enable_if<( Rank == 3 &&
- std::is_integral<I0>::value &&
- std::is_integral<I1>::value &&
- std::is_integral<I2>::value
- ), reference_type >::type
+ typename std::enable_if< View::enable< is_default_map , 3 , I0 , I1 , I2 >::value , reference_type >::type
+ operator()( const I0 & i0 , const I1 & i1 , const I2 & i2 ) const
+ {
+ KOKKOS_VIEW_OPERATOR_VERIFY(i0,i1,i2,0,0,0,0,0)
+ return m_map.m_handle[ m_map.m_offset(i0,i1,i2) ];
+ }
+
+ template< typename I0 , typename I1 , typename I2 >
+ KOKKOS_FORCEINLINE_FUNCTION
+ typename std::enable_if< View::enable< ! is_default_map , 3 , I0 , I1 , I2 >::value , reference_type >::type
operator()( const I0 & i0 , const I1 & i1 , const I2 & i2 ) const
{
- KOKKOS_ASSERT_VIEW_MAPPING_ACCESS( typename traits::memory_space, m_map, Rank, i0, i1, i2, 0, 0, 0, 0, 0 );
+ KOKKOS_VIEW_OPERATOR_VERIFY(i0,i1,i2,0,0,0,0,0)
return m_map.reference(i0,i1,i2);
}
template< typename I0 , typename I1 , typename I2 >
KOKKOS_FORCEINLINE_FUNCTION
- reference_type
+ typename std::enable_if< View::enable< true , 3 , I0 , I1 , I2 >::value , reference_type >::type
operator()( const I0 & i0 , const I1 & i1 , const I2 & i2
- , typename std::enable_if<( Rank == 3 &&
- std::is_integral<I0>::value &&
- std::is_integral<I1>::value &&
- std::is_integral<I2>::value
- ), const int >::type i3
+ , const int i3
, const int i4 = 0 , const int i5 = 0 , const int i6 = 0 , const int i7 = 0 ) const
{
- KOKKOS_ASSERT_VIEW_MAPPING_ACCESS( typename traits::memory_space, m_map, Rank, i0, i1, i2, i3, i4, i5, i6, i7 );
+ KOKKOS_VIEW_OPERATOR_VERIFY(i0,i1,i2,i3,i4,i5,i6,i7)
return m_map.reference(i0,i1,i2);
}
+ //------------------------------
// Rank == 4
template< typename I0 , typename I1 , typename I2 , typename I3 >
KOKKOS_FORCEINLINE_FUNCTION
- typename std::enable_if<( Rank == 4 &&
- std::is_integral<I0>::value &&
- std::is_integral<I1>::value &&
- std::is_integral<I2>::value &&
- std::is_integral<I3>::value
- ), reference_type >::type
+ typename std::enable_if< View::enable< is_default_map , 4 , I0 , I1 , I2 , I3 >::value , reference_type >::type
+ operator()( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3 ) const
+ {
+ KOKKOS_VIEW_OPERATOR_VERIFY(i0,i1,i2,i3,0,0,0,0)
+ return m_map.m_handle[ m_map.m_offset(i0,i1,i2,i3) ];
+ }
+
+ template< typename I0 , typename I1 , typename I2 , typename I3 >
+ KOKKOS_FORCEINLINE_FUNCTION
+ typename std::enable_if< View::enable< ! is_default_map , 4 , I0 , I1 , I2 , I3 >::value , reference_type >::type
operator()( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3 ) const
{
- KOKKOS_ASSERT_VIEW_MAPPING_ACCESS( typename traits::memory_space, m_map, Rank, i0, i1, i2, i3, 0, 0, 0, 0 );
+ KOKKOS_VIEW_OPERATOR_VERIFY(i0,i1,i2,i3,0,0,0,0)
return m_map.reference(i0,i1,i2,i3);
}
template< typename I0 , typename I1 , typename I2 , typename I3 >
KOKKOS_FORCEINLINE_FUNCTION
- reference_type
+ typename std::enable_if< View::enable< true , 4 , I0 , I1 , I2 , I3 >::value , reference_type >::type
operator()( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3
- , typename std::enable_if<( Rank == 4 &&
- std::is_integral<I0>::value &&
- std::is_integral<I1>::value &&
- std::is_integral<I2>::value &&
- std::is_integral<I3>::value
- ), const int >::type i4
+ , const int i4
, const int i5 = 0 , const int i6 = 0 , const int i7 = 0 ) const
{
- KOKKOS_ASSERT_VIEW_MAPPING_ACCESS( typename traits::memory_space, m_map, Rank, i0, i1, i2, i3, i4, i5, i6, i7 );
+ KOKKOS_VIEW_OPERATOR_VERIFY(i0,i1,i2,i3,i4,i5,i6,i7)
return m_map.reference(i0,i1,i2,i3);
}
+ //------------------------------
// Rank == 5
template< typename I0 , typename I1 , typename I2 , typename I3
, typename I4 >
KOKKOS_FORCEINLINE_FUNCTION
- typename std::enable_if<( Rank == 5 &&
- std::is_integral<I0>::value &&
- std::is_integral<I1>::value &&
- std::is_integral<I2>::value &&
- std::is_integral<I3>::value &&
- std::is_integral<I4>::value
- ), reference_type >::type
+ typename std::enable_if< View::enable< is_default_map , 5 , I0 , I1 , I2 , I3 , I4 >::value , reference_type >::type
operator()( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3
, const I4 & i4 ) const
{
- KOKKOS_ASSERT_VIEW_MAPPING_ACCESS( typename traits::memory_space, m_map, Rank, i0, i1, i2, i3, i4, 0, 0, 0 );
+ KOKKOS_VIEW_OPERATOR_VERIFY(i0,i1,i2,i3,i4,0,0,0)
+ return m_map.m_handle[ m_map.m_offset(i0,i1,i2,i3,i4) ];
+ }
+
+ template< typename I0 , typename I1 , typename I2 , typename I3
+ , typename I4 >
+ KOKKOS_FORCEINLINE_FUNCTION
+ typename std::enable_if< View::enable< ! is_default_map , 5 , I0 , I1 , I2 , I3 , I4 >::value , reference_type >::type
+ operator()( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3
+ , const I4 & i4 ) const
+ {
+ KOKKOS_VIEW_OPERATOR_VERIFY(i0,i1,i2,i3,i4,0,0,0)
return m_map.reference(i0,i1,i2,i3,i4);
}
template< typename I0 , typename I1 , typename I2 , typename I3
, typename I4 >
KOKKOS_FORCEINLINE_FUNCTION
- reference_type
+ typename std::enable_if< View::enable< true , 5 , I0 , I1 , I2 , I3 , I4 >::value , reference_type >::type
operator()( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3
, const I4 & i4
- , typename std::enable_if<( Rank == 5 &&
- std::is_integral<I0>::value &&
- std::is_integral<I1>::value &&
- std::is_integral<I2>::value &&
- std::is_integral<I3>::value &&
- std::is_integral<I4>::value
- ), const int >::type i5
+ , const int i5
, const int i6 = 0 , const int i7 = 0 ) const
{
- KOKKOS_ASSERT_VIEW_MAPPING_ACCESS( typename traits::memory_space, m_map, Rank, i0, i1, i2, i3, i4, i5, i6, i7 );
+ KOKKOS_VIEW_OPERATOR_VERIFY(i0,i1,i2,i3,i4,i5,i6,i7)
return m_map.reference(i0,i1,i2,i3,i4);
}
+ //------------------------------
// Rank == 6
template< typename I0 , typename I1 , typename I2 , typename I3
, typename I4 , typename I5 >
KOKKOS_FORCEINLINE_FUNCTION
- typename std::enable_if<( Rank == 6 &&
- std::is_integral<I0>::value &&
- std::is_integral<I1>::value &&
- std::is_integral<I2>::value &&
- std::is_integral<I3>::value &&
- std::is_integral<I4>::value &&
- std::is_integral<I5>::value
- ), reference_type >::type
+ typename std::enable_if< View::enable< is_default_map , 6 , I0 , I1 , I2 , I3 , I4 , I5 >::value , reference_type >::type
+ operator()( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3
+ , const I4 & i4 , const I5 & i5 ) const
+ {
+ KOKKOS_VIEW_OPERATOR_VERIFY(i0,i1,i2,i3,i4,i5,0,0)
+ return m_map.m_handle[ m_map.m_offset(i0,i1,i2,i3,i4,i5) ];
+ }
+
+ template< typename I0 , typename I1 , typename I2 , typename I3
+ , typename I4 , typename I5 >
+ KOKKOS_FORCEINLINE_FUNCTION
+ typename std::enable_if< View::enable< ! is_default_map , 6 , I0 , I1 , I2 , I3 , I4 , I5 >::value , reference_type >::type
operator()( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3
, const I4 & i4 , const I5 & i5 ) const
{
- KOKKOS_ASSERT_VIEW_MAPPING_ACCESS( typename traits::memory_space, m_map, Rank, i0, i1, i2, i3, i4, i5, 0, 0 );
+ KOKKOS_VIEW_OPERATOR_VERIFY(i0,i1,i2,i3,i4,i5,0,0)
return m_map.reference(i0,i1,i2,i3,i4,i5);
}
template< typename I0 , typename I1 , typename I2 , typename I3
, typename I4 , typename I5 >
KOKKOS_FORCEINLINE_FUNCTION
- reference_type
+ typename std::enable_if< View::enable< true , 6 , I0 , I1 , I2 , I3 , I4 , I5 >::value , reference_type >::type
operator()( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3
, const I4 & i4 , const I5 & i5
- , typename std::enable_if<( Rank == 6 &&
- std::is_integral<I0>::value &&
- std::is_integral<I1>::value &&
- std::is_integral<I2>::value &&
- std::is_integral<I3>::value &&
- std::is_integral<I4>::value
- ), const int >::type i6
+ , const int i6
, const int i7 = 0 ) const
{
- KOKKOS_ASSERT_VIEW_MAPPING_ACCESS( typename traits::memory_space, m_map, Rank, i0, i1, i2, i3, i4, i5, i6, i7 );
+ KOKKOS_VIEW_OPERATOR_VERIFY(i0,i1,i2,i3,i4,i5,i6,i7)
return m_map.reference(i0,i1,i2,i3,i4,i5);
}
+ //------------------------------
// Rank == 7
template< typename I0 , typename I1 , typename I2 , typename I3
, typename I4 , typename I5 , typename I6 >
KOKKOS_FORCEINLINE_FUNCTION
- typename std::enable_if<( Rank == 7 &&
- std::is_integral<I0>::value &&
- std::is_integral<I1>::value &&
- std::is_integral<I2>::value &&
- std::is_integral<I3>::value &&
- std::is_integral<I4>::value &&
- std::is_integral<I5>::value &&
- std::is_integral<I6>::value
- ), reference_type >::type
+ typename std::enable_if< View::enable< is_default_map , 7 , I0 , I1 , I2 , I3 , I4 , I5 , I6 >::value , reference_type >::type
operator()( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3
, const I4 & i4 , const I5 & i5 , const I6 & i6 ) const
{
- KOKKOS_ASSERT_VIEW_MAPPING_ACCESS( typename traits::memory_space, m_map, Rank, i0, i1, i2, i3, i4, i5, i6, 0 );
+ KOKKOS_VIEW_OPERATOR_VERIFY(i0,i1,i2,i3,i4,i5,i6,0)
+ return m_map.m_handle[ m_map.m_offset(i0,i1,i2,i3,i4,i5,i6) ];
+ }
+
+ template< typename I0 , typename I1 , typename I2 , typename I3
+ , typename I4 , typename I5 , typename I6 >
+ KOKKOS_FORCEINLINE_FUNCTION
+ typename std::enable_if< View::enable< ! is_default_map , 7 , I0 , I1 , I2 , I3 , I4 , I5 , I6 >::value , reference_type >::type
+ operator()( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3
+ , const I4 & i4 , const I5 & i5 , const I6 & i6 ) const
+ {
+ KOKKOS_VIEW_OPERATOR_VERIFY(i0,i1,i2,i3,i4,i5,i6,0)
return m_map.reference(i0,i1,i2,i3,i4,i5,i6);
}
template< typename I0 , typename I1 , typename I2 , typename I3
, typename I4 , typename I5 , typename I6 >
KOKKOS_FORCEINLINE_FUNCTION
- reference_type
+ typename std::enable_if< View::enable< true , 7 , I0 , I1 , I2 , I3 , I4 , I5 , I6 >::value , reference_type >::type
operator()( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3
, const I4 & i4 , const I5 & i5 , const I6 & i6
- , typename std::enable_if<( Rank == 7 &&
- std::is_integral<I0>::value &&
- std::is_integral<I1>::value &&
- std::is_integral<I2>::value &&
- std::is_integral<I3>::value &&
- std::is_integral<I4>::value
- ), const int >::type i7
+ , const int i7
) const
{
- KOKKOS_ASSERT_VIEW_MAPPING_ACCESS( typename traits::memory_space, m_map, Rank, i0, i1, i2, i3, i4, i5, i6, i7 );
+ KOKKOS_VIEW_OPERATOR_VERIFY(i0,i1,i2,i3,i4,i5,i6,i7)
return m_map.reference(i0,i1,i2,i3,i4,i5,i6);
}
+ //------------------------------
// Rank == 8
template< typename I0 , typename I1 , typename I2 , typename I3
, typename I4 , typename I5 , typename I6 , typename I7 >
KOKKOS_FORCEINLINE_FUNCTION
- typename std::enable_if<( Rank == 8 &&
- std::is_integral<I0>::value &&
- std::is_integral<I1>::value &&
- std::is_integral<I2>::value &&
- std::is_integral<I3>::value &&
- std::is_integral<I4>::value &&
- std::is_integral<I5>::value &&
- std::is_integral<I6>::value &&
- std::is_integral<I7>::value
- ), reference_type >::type
+ typename std::enable_if< View::enable< is_default_map , 8 , I0 , I1 , I2 , I3 , I4 , I5 , I6 , I7 >::value , reference_type >::type
operator()( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3
, const I4 & i4 , const I5 & i5 , const I6 & i6 , const I7 & i7 ) const
{
- KOKKOS_ASSERT_VIEW_MAPPING_ACCESS( typename traits::memory_space, m_map, Rank, i0, i1, i2, i3, i4, i5, i6, i7 );
+ KOKKOS_VIEW_OPERATOR_VERIFY(i0,i1,i2,i3,i4,i5,i6,i7)
+ return m_map.m_handle[ m_map.m_offset(i0,i1,i2,i3,i4,i5,i6,i7) ];
+ }
+
+ template< typename I0 , typename I1 , typename I2 , typename I3
+ , typename I4 , typename I5 , typename I6 , typename I7 >
+ KOKKOS_FORCEINLINE_FUNCTION
+ typename std::enable_if< View::enable< ! is_default_map , 8 , I0 , I1 , I2 , I3 , I4 , I5 , I6 , I7 >::value , reference_type >::type
+ operator()( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3
+ , const I4 & i4 , const I5 & i5 , const I6 & i6 , const I7 & i7 ) const
+ {
+ KOKKOS_VIEW_OPERATOR_VERIFY(i0,i1,i2,i3,i4,i5,i6,i7)
return m_map.reference(i0,i1,i2,i3,i4,i5,i6,i7);
}
+#undef KOKKOS_VIEW_OPERATOR_VERIFY
+
//----------------------------------------
+ // Standard destructor, constructors, and assignment operators
KOKKOS_INLINE_FUNCTION
~View() {}
KOKKOS_INLINE_FUNCTION
View() : m_track(), m_map() {}
KOKKOS_INLINE_FUNCTION
View( const View & rhs ) : m_track( rhs.m_track ), m_map( rhs.m_map ) {}
KOKKOS_INLINE_FUNCTION
View( View && rhs ) : m_track( rhs.m_track ), m_map( rhs.m_map ) {}
KOKKOS_INLINE_FUNCTION
View & operator = ( const View & rhs ) { m_track = rhs.m_track ; m_map = rhs.m_map ; return *this ; }
KOKKOS_INLINE_FUNCTION
View & operator = ( View && rhs ) { m_track = rhs.m_track ; m_map = rhs.m_map ; return *this ; }
//----------------------------------------
+ // Compatible view copy constructor and assignment
+ // may assign unmanaged from managed.
- template< class RT , class R1 , class R2 , class R3 >
+ template< class RT , class ... RP >
KOKKOS_INLINE_FUNCTION
- View( const View<RT,R1,R2,R3> & rhs )
- : m_track( rhs.m_track )
+ View( const View<RT,RP...> & rhs )
+ : m_track( rhs.m_track , traits::is_managed )
, m_map()
{
- typedef typename View<RT,R1,R2,R3>::traits SrcTraits ;
- typedef Kokkos::Experimental::Impl::ViewMapping< traits , SrcTraits > Mapping ;
+ typedef typename View<RT,RP...>::traits SrcTraits ;
+ typedef Kokkos::Experimental::Impl::ViewMapping< traits , SrcTraits , void > Mapping ;
static_assert( Mapping::is_assignable , "Incompatible View copy construction" );
Mapping::assign( m_map , rhs.m_map , rhs.m_track );
}
- template< class RT , class R1 , class R2 , class R3 >
- KOKKOS_INLINE_FUNCTION
- View( View<RT,R1,R2,R3> && rhs )
- : m_track( rhs.m_track )
- , m_map()
- {
- typedef typename View<RT,R1,R2,R3>::traits SrcTraits ;
- typedef Kokkos::Experimental::Impl::ViewMapping< traits , SrcTraits > Mapping ;
- static_assert( Mapping::is_assignable , "Incompatible View move construction" );
- Mapping::assign( m_map , rhs.m_map , rhs.m_track );
- }
-
- template< class RT , class R1 , class R2 , class R3 >
+ template< class RT , class ... RP >
KOKKOS_INLINE_FUNCTION
- View & operator = ( const View<RT,R1,R2,R3> & rhs )
+ View & operator = ( const View<RT,RP...> & rhs )
{
- typedef typename View<RT,R1,R2,R3>::traits SrcTraits ;
- typedef Kokkos::Experimental::Impl::ViewMapping< traits , SrcTraits > Mapping ;
+ typedef typename View<RT,RP...>::traits SrcTraits ;
+ typedef Kokkos::Experimental::Impl::ViewMapping< traits , SrcTraits , void > Mapping ;
static_assert( Mapping::is_assignable , "Incompatible View copy assignment" );
Mapping::assign( m_map , rhs.m_map , rhs.m_track );
- m_track.operator=( rhs.m_track );
+ m_track.assign( rhs.m_track , traits::is_managed );
return *this ;
}
- template< class RT , class R1 , class R2 , class R3 >
+ //----------------------------------------
+ // Compatible subview constructor
+ // may assign unmanaged from managed.
+
+ template< class RT , class ... RP , class Arg0 , class ... Args >
KOKKOS_INLINE_FUNCTION
- View & operator = ( View<RT,R1,R2,R3> && rhs )
+ View( const View< RT , RP... > & src_view
+ , const Arg0 & arg0 , Args ... args )
+ : m_track( src_view.m_track , traits::is_managed )
+ , m_map()
{
- typedef typename View<RT,R1,R2,R3>::traits SrcTraits ;
- typedef Kokkos::Experimental::Impl::ViewMapping< traits , SrcTraits > Mapping ;
- static_assert( Mapping::is_assignable , "Incompatible View move assignment" );
- Mapping::assign( m_map , rhs.m_map , rhs.m_track );
- m_track.operator=( rhs.m_track );
- return *this ;
+ typedef View< RT , RP... > SrcType ;
+
+ typedef Kokkos::Experimental::Impl::ViewMapping
+ < void /* deduce destination view type from source view traits */
+ , typename SrcType::traits
+ , Arg0 , Args... > Mapping ;
+
+ typedef typename Mapping::type DstType ;
+
+ static_assert( Kokkos::Experimental::Impl::ViewMapping< View , DstType , void >::is_assignable
+ , "Subview construction requires compatible view and subview arguments" );
+
+ Mapping::assign( m_map, src_view.m_map, arg0 , args... );
}
//----------------------------------------
// Allocation according to allocation properties
private:
// Must call destructor for non-trivial types
template< class ExecSpace >
struct DestroyFunctor {
map_type m_map ;
ExecSpace m_space ;
- KOKKOS_INLINE_FUNCTION
void destroy_shared_allocation() { m_map.destroy( m_space ); }
};
public:
+ KOKKOS_INLINE_FUNCTION
+ int use_count() const { return m_track.use_count(); }
+
inline
const std::string label() const { return m_track.template get_label< typename traits::memory_space >(); }
+ // Disambiguate from subview constructor.
template< class Prop >
explicit inline
View( const Prop & arg_prop
- , const size_t arg_N0 = 0
+ , typename std::enable_if< ! is_view<Prop>::value ,
+ const size_t >::type arg_N0 = 0
, const size_t arg_N1 = 0
, const size_t arg_N2 = 0
, const size_t arg_N3 = 0
, const size_t arg_N4 = 0
, const size_t arg_N5 = 0
, const size_t arg_N6 = 0
, const size_t arg_N7 = 0
)
: m_track()
, m_map()
{
// Merge the < execution_space , memory_space > into the properties.
typedef Kokkos::Experimental::Impl::ViewAllocProp< typename traits::device_type , Prop > alloc_prop ;
typedef typename alloc_prop::execution_space execution_space ;
typedef typename traits::memory_space memory_space ;
typedef DestroyFunctor< execution_space > destroy_functor ;
typedef Kokkos::Experimental::Impl::SharedAllocationRecord< memory_space , destroy_functor > record_type ;
static_assert( traits::is_managed , "View allocation constructor requires managed memory" );
const alloc_prop prop( arg_prop );
// If initializing view data then the execution space must be initialized.
if ( prop.initialize.value && ! prop.execution.is_initialized() ) {
Kokkos::Impl::throw_runtime_exception("Constructing View and initializing data with uninitialized execution space");
}
// Query the mapping for byte-size of allocation.
const size_t alloc_size = map_type::memory_span( prop.allow_padding
, arg_N0 , arg_N1 , arg_N2 , arg_N3
, arg_N4 , arg_N5 , arg_N6 , arg_N7 );
// Allocate memory from the memory space.
record_type * const record = record_type::allocate( prop.memory , prop.label , alloc_size );
// Construct the mapping object prior to start of tracking
// to assign destroy functor and possibly initialize.
- m_map = map_type( record->data()
+ m_map = map_type( reinterpret_cast< pointer_type >( record->data() )
, prop.allow_padding
, arg_N0 , arg_N1 , arg_N2 , arg_N3
, arg_N4 , arg_N5 , arg_N6 , arg_N7 );
- // Copy the destroy functor into the allocation record before initiating tracking.
- record->m_destroy.m_map = m_map ;
- record->m_destroy.m_space = prop.execution ;
-
+ // If constructing the plan for destructing as well
+ // Copy the destroy functor into the allocation record
+ // before initiating tracking.
if ( prop.initialize.value ) {
m_map.construct( prop.execution );
+
+ record->m_destroy.m_map = m_map ;
+ record->m_destroy.m_space = prop.execution ;
}
- // Destroy functor assigned and initialization complete, start tracking
- m_track = track_type( record );
+ // Setup and initialization complete, start tracking
+ m_track.assign_allocated_record_to_uninitialized( record );
}
template< class Prop >
explicit inline
View( const Prop & arg_prop
, const typename traits::array_layout & arg_layout
)
: m_track()
, m_map()
{
// Merge the < execution_space , memory_space > into the properties.
typedef Kokkos::Experimental::Impl::ViewAllocProp< typename traits::device_type , Prop > alloc_prop ;
typedef typename alloc_prop::execution_space execution_space ;
typedef typename traits::memory_space memory_space ;
typedef DestroyFunctor< execution_space > destroy_functor ;
typedef Kokkos::Experimental::Impl::SharedAllocationRecord< memory_space , destroy_functor > record_type ;
static_assert( traits::is_managed , "View allocation constructor requires managed memory" );
const alloc_prop prop( arg_prop );
// If initializing view data then the execution space must be initialized.
if ( prop.initialize.value && ! prop.execution.is_initialized() ) {
Kokkos::Impl::throw_runtime_exception("Constructing View and initializing data with uninitialized execution space");
}
// Query the mapping for byte-size of allocation.
const size_t alloc_size = map_type::memory_span( prop.allow_padding , arg_layout );
// Allocate memory from the memory space.
record_type * const record = record_type::allocate( prop.memory , prop.label , alloc_size );
// Construct the mapping object prior to start of tracking
// to assign destroy functor and possibly initialize.
- m_map = map_type( record->data() , prop.allow_padding , arg_layout );
+ m_map = map_type( reinterpret_cast< pointer_type >( record->data() ) , prop.allow_padding , arg_layout );
// Copy the destroy functor into the allocation record before initiating tracking.
- record->m_destroy.m_map = m_map ;
- record->m_destroy.m_space = prop.execution ;
if ( prop.initialize.value ) {
m_map.construct( prop.execution );
+
+ record->m_destroy.m_map = m_map ;
+ record->m_destroy.m_space = prop.execution ;
}
- // Destroy functor assigned and initialization complete, start tracking
- m_track = track_type( record );
+ // Setup and initialization complete, start tracking
+ m_track.assign_allocated_record_to_uninitialized( record );
}
//----------------------------------------
// Memory span required to wrap these dimensions.
static constexpr size_t memory_span( const size_t arg_N0 = 0
, const size_t arg_N1 = 0
, const size_t arg_N2 = 0
, const size_t arg_N3 = 0
, const size_t arg_N4 = 0
, const size_t arg_N5 = 0
, const size_t arg_N6 = 0
, const size_t arg_N7 = 0
)
{
return map_type::memory_span( std::integral_constant<bool,false>()
, arg_N0 , arg_N1 , arg_N2 , arg_N3
, arg_N4 , arg_N5 , arg_N6 , arg_N7 );
}
explicit inline
- View( typename traits::value_type * const arg_ptr
+ View( pointer_type arg_ptr
, const size_t arg_N0 = 0
, const size_t arg_N1 = 0
, const size_t arg_N2 = 0
, const size_t arg_N3 = 0
, const size_t arg_N4 = 0
, const size_t arg_N5 = 0
, const size_t arg_N6 = 0
, const size_t arg_N7 = 0
)
: m_track() // No memory tracking
, m_map( arg_ptr , std::integral_constant<bool,false>()
, arg_N0 , arg_N1 , arg_N2 , arg_N3
, arg_N4 , arg_N5 , arg_N6 , arg_N7 )
{}
explicit inline
- View( typename traits::value_type * const arg_ptr
+ View( pointer_type arg_ptr
, typename traits::array_layout & arg_layout
)
: m_track() // No memory tracking
, m_map( arg_ptr , std::integral_constant<bool,false>(), arg_layout )
{}
//----------------------------------------
// Shared scratch memory constructor
static inline
size_t shmem_size( const size_t arg_N0 = 0 ,
const size_t arg_N1 = 0 ,
const size_t arg_N2 = 0 ,
const size_t arg_N3 = 0 ,
const size_t arg_N4 = 0 ,
const size_t arg_N5 = 0 ,
const size_t arg_N6 = 0 ,
const size_t arg_N7 = 0 )
{
return map_type::memory_span( std::integral_constant<bool,false>()
, arg_N0 , arg_N1 , arg_N2 , arg_N3
, arg_N4 , arg_N5 , arg_N6 , arg_N7 );
}
explicit KOKKOS_INLINE_FUNCTION
View( const typename traits::execution_space::scratch_memory_space & arg_space
, const size_t arg_N0 = 0
, const size_t arg_N1 = 0
, const size_t arg_N2 = 0
, const size_t arg_N3 = 0
, const size_t arg_N4 = 0
, const size_t arg_N5 = 0
, const size_t arg_N6 = 0
, const size_t arg_N7 = 0 )
: m_track() // No memory tracking
- , m_map( arg_space.get_shmem( map_type::memory_span( std::integral_constant<bool,false>()
- , arg_N0 , arg_N1 , arg_N2 , arg_N3
- , arg_N4 , arg_N5 , arg_N6 , arg_N7 ) )
- , std::integral_constant<bool,false>()
- , arg_N0 , arg_N1 , arg_N2 , arg_N3
- , arg_N4 , arg_N5 , arg_N6 , arg_N7 )
- {}
-
- //----------------------------------------
- // Subviews
-
-private:
-
- /**\brief Private method to support extensibility of subview construction */
- KOKKOS_INLINE_FUNCTION
- View( const track_type & arg_track , const map_type & arg_map )
- : m_track( arg_track )
- , m_map( arg_map )
+ , m_map( reinterpret_cast<pointer_type>(
+ arg_space.get_shmem(
+ map_type::memory_span( std::integral_constant<bool,false>()
+ , arg_N0 , arg_N1 , arg_N2 , arg_N3
+ , arg_N4 , arg_N5 , arg_N6 , arg_N7 ) ) )
+ , std::integral_constant<bool,false>()
+ , arg_N0 , arg_N1 , arg_N2 , arg_N3
+ , arg_N4 , arg_N5 , arg_N6 , arg_N7 )
{}
-
- explicit KOKKOS_INLINE_FUNCTION
- View( const track_type & rhs )
- : m_track( rhs )
- , m_map()
- {}
-
-public:
-
- template< class D , class A1 , class A2 , class A3
- , class T0 , class T1 , class T2 , class T3
- , class T4 , class T5 , class T6 , class T7 >
- friend
- KOKKOS_INLINE_FUNCTION
- Kokkos::Experimental::Subview< View< D , A1 , A2 , A3 >
- , Kokkos::Experimental::Impl::ViewOffsetRange<T0>::is_range
- , Kokkos::Experimental::Impl::ViewOffsetRange<T1>::is_range
- , Kokkos::Experimental::Impl::ViewOffsetRange<T2>::is_range
- , Kokkos::Experimental::Impl::ViewOffsetRange<T3>::is_range
- , Kokkos::Experimental::Impl::ViewOffsetRange<T4>::is_range
- , Kokkos::Experimental::Impl::ViewOffsetRange<T5>::is_range
- , Kokkos::Experimental::Impl::ViewOffsetRange<T6>::is_range
- , Kokkos::Experimental::Impl::ViewOffsetRange<T7>::is_range
- >
- subview( const View< D , A1 , A2 , A3 > & src
- , T0 const & arg0 , T1 const & arg1 , T2 const & arg2 , T3 const & arg3
- , T4 const & arg4 , T5 const & arg5 , T6 const & arg6 , T7 const & arg7
- );
-
- template< class D , class A1 , class A2 , class A3
- , class T0 , class T1 , class T2 , class T3
- , class T4 , class T5 , class T6 >
- friend
- KOKKOS_INLINE_FUNCTION
- Kokkos::Experimental::Subview< View< D , A1 , A2 , A3 >
- , Kokkos::Experimental::Impl::ViewOffsetRange<T0>::is_range
- , Kokkos::Experimental::Impl::ViewOffsetRange<T1>::is_range
- , Kokkos::Experimental::Impl::ViewOffsetRange<T2>::is_range
- , Kokkos::Experimental::Impl::ViewOffsetRange<T3>::is_range
- , Kokkos::Experimental::Impl::ViewOffsetRange<T4>::is_range
- , Kokkos::Experimental::Impl::ViewOffsetRange<T5>::is_range
- , Kokkos::Experimental::Impl::ViewOffsetRange<T6>::is_range
- >
- subview( const View< D , A1 , A2 , A3 > & src
- , T0 const & arg0 , T1 const & arg1 , T2 const & arg2 , T3 const & arg3
- , T4 const & arg4 , T5 const & arg5 , T6 const & arg6
- );
-
- template< class D , class A1 , class A2 , class A3
- , class T0 , class T1 , class T2 , class T3
- , class T4 , class T5 >
- friend
- KOKKOS_INLINE_FUNCTION
- Kokkos::Experimental::Subview< View< D , A1 , A2 , A3 >
- , Kokkos::Experimental::Impl::ViewOffsetRange<T0>::is_range
- , Kokkos::Experimental::Impl::ViewOffsetRange<T1>::is_range
- , Kokkos::Experimental::Impl::ViewOffsetRange<T2>::is_range
- , Kokkos::Experimental::Impl::ViewOffsetRange<T3>::is_range
- , Kokkos::Experimental::Impl::ViewOffsetRange<T4>::is_range
- , Kokkos::Experimental::Impl::ViewOffsetRange<T5>::is_range
- >
- subview( const View< D , A1 , A2 , A3 > & src
- , T0 const & arg0 , T1 const & arg1 , T2 const & arg2 , T3 const & arg3
- , T4 const & arg4 , T5 const & arg5
- );
-
- template< class D , class A1 , class A2 , class A3
- , class T0 , class T1 , class T2 , class T3
- , class T4 >
- friend
- KOKKOS_INLINE_FUNCTION
- Kokkos::Experimental::Subview< View< D , A1 , A2 , A3 >
- , Kokkos::Experimental::Impl::ViewOffsetRange<T0>::is_range
- , Kokkos::Experimental::Impl::ViewOffsetRange<T1>::is_range
- , Kokkos::Experimental::Impl::ViewOffsetRange<T2>::is_range
- , Kokkos::Experimental::Impl::ViewOffsetRange<T3>::is_range
- , Kokkos::Experimental::Impl::ViewOffsetRange<T4>::is_range
- >
- subview( const View< D , A1 , A2 , A3 > & src
- , T0 const & arg0 , T1 const & arg1 , T2 const & arg2 , T3 const & arg3
- , T4 const & arg4
- );
-
- template< class D , class A1 , class A2 , class A3
- , class T0 , class T1 , class T2 , class T3 >
- friend
- KOKKOS_INLINE_FUNCTION
- Kokkos::Experimental::Subview< View< D , A1 , A2 , A3 >
- , Kokkos::Experimental::Impl::ViewOffsetRange<T0>::is_range
- , Kokkos::Experimental::Impl::ViewOffsetRange<T1>::is_range
- , Kokkos::Experimental::Impl::ViewOffsetRange<T2>::is_range
- , Kokkos::Experimental::Impl::ViewOffsetRange<T3>::is_range
- >
- subview( const View< D , A1 , A2 , A3 > & src
- , T0 const & arg0 , T1 const & arg1 , T2 const & arg2 , T3 const & arg3
- );
-
- template< class D , class A1 , class A2 , class A3
- , class T0 , class T1 , class T2 >
- friend
- KOKKOS_INLINE_FUNCTION
- Kokkos::Experimental::Subview< View< D , A1 , A2 , A3 >
- , Kokkos::Experimental::Impl::ViewOffsetRange<T0>::is_range
- , Kokkos::Experimental::Impl::ViewOffsetRange<T1>::is_range
- , Kokkos::Experimental::Impl::ViewOffsetRange<T2>::is_range
- >
- subview( const View< D , A1 , A2 , A3 > & src
- , T0 const & arg0 , T1 const & arg1 , T2 const & arg2
- );
-
- template< class D , class A1 , class A2 , class A3
- , class T0 , class T1 >
- friend
- KOKKOS_INLINE_FUNCTION
- Kokkos::Experimental::Subview< View< D , A1 , A2 , A3 >
- , Kokkos::Experimental::Impl::ViewOffsetRange<T0>::is_range
- , Kokkos::Experimental::Impl::ViewOffsetRange<T1>::is_range
- >
- subview( const View< D , A1 , A2 , A3 > & src
- , T0 const & arg0 , T1 const & arg1
- );
-
- template< class D, class A1, class A2, class A3, class T0 >
- friend
- KOKKOS_INLINE_FUNCTION
- Kokkos::Experimental::Subview< View< D, A1, A2, A3 >
- , Kokkos::Experimental::Impl::ViewOffsetRange<T0>::is_range
- >
- subview( const View< D, A1, A2, A3 > & src , T0 const & arg0 );
-
};
-template< class > struct is_view : public std::false_type {};
-
-template< class D, class A1, class A2, class A3 >
-struct is_view< View<D,A1,A2,A3> > : public std::true_type {};
-
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
-template< class D, class A1, class A2, class A3
- , class T0 , class T1 , class T2 , class T3
- , class T4 , class T5 , class T6 , class T7 >
-KOKKOS_INLINE_FUNCTION
-Kokkos::Experimental::Subview< View< D, A1, A2, A3 >
- , Kokkos::Experimental::Impl::ViewOffsetRange<T0>::is_range
- , Kokkos::Experimental::Impl::ViewOffsetRange<T1>::is_range
- , Kokkos::Experimental::Impl::ViewOffsetRange<T2>::is_range
- , Kokkos::Experimental::Impl::ViewOffsetRange<T3>::is_range
- , Kokkos::Experimental::Impl::ViewOffsetRange<T4>::is_range
- , Kokkos::Experimental::Impl::ViewOffsetRange<T5>::is_range
- , Kokkos::Experimental::Impl::ViewOffsetRange<T6>::is_range
- , Kokkos::Experimental::Impl::ViewOffsetRange<T7>::is_range
- >
-subview( const View< D, A1, A2, A3 > & src
- , T0 const & arg0 , T1 const & arg1 , T2 const & arg2 , T3 const & arg3
- , T4 const & arg4 , T5 const & arg5 , T6 const & arg6 , T7 const & arg7
- )
-{
- typedef View< D, A1, A2, A3 > SrcView ;
-
- typedef Kokkos::Experimental::Impl::SubviewMapping
- < typename SrcView::traits
- , Kokkos::Experimental::Impl::ViewOffsetRange<T0>::is_range
- , Kokkos::Experimental::Impl::ViewOffsetRange<T1>::is_range
- , Kokkos::Experimental::Impl::ViewOffsetRange<T2>::is_range
- , Kokkos::Experimental::Impl::ViewOffsetRange<T3>::is_range
- , Kokkos::Experimental::Impl::ViewOffsetRange<T4>::is_range
- , Kokkos::Experimental::Impl::ViewOffsetRange<T5>::is_range
- , Kokkos::Experimental::Impl::ViewOffsetRange<T6>::is_range
- , Kokkos::Experimental::Impl::ViewOffsetRange<T7>::is_range
- > Mapping ;
-
- typedef typename Mapping::type DstView ;
-
- static_assert( SrcView::Rank == 8 , "Subview of rank 8 View requires 8 arguments" );
-
- DstView dst( src.m_track );
-
- Mapping::assign( dst.m_map, src.m_map, arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7 );
-
- return dst ;
-}
-
-template< class D, class A1, class A2, class A3
- , class T0 , class T1 , class T2 , class T3
- , class T4 , class T5 , class T6 >
-KOKKOS_INLINE_FUNCTION
-Kokkos::Experimental::Subview< View< D, A1, A2, A3 >
- , Kokkos::Experimental::Impl::ViewOffsetRange<T0>::is_range
- , Kokkos::Experimental::Impl::ViewOffsetRange<T1>::is_range
- , Kokkos::Experimental::Impl::ViewOffsetRange<T2>::is_range
- , Kokkos::Experimental::Impl::ViewOffsetRange<T3>::is_range
- , Kokkos::Experimental::Impl::ViewOffsetRange<T4>::is_range
- , Kokkos::Experimental::Impl::ViewOffsetRange<T5>::is_range
- , Kokkos::Experimental::Impl::ViewOffsetRange<T6>::is_range
- >
-subview( const View< D, A1, A2, A3 > & src
- , T0 const & arg0 , T1 const & arg1 , T2 const & arg2 , T3 const & arg3
- , T4 const & arg4 , T5 const & arg5 , T6 const & arg6
- )
-{
- typedef View< D, A1, A2, A3 > SrcView ;
-
- typedef Kokkos::Experimental::Impl::SubviewMapping
- < typename SrcView::traits
- , Kokkos::Experimental::Impl::ViewOffsetRange<T0>::is_range
- , Kokkos::Experimental::Impl::ViewOffsetRange<T1>::is_range
- , Kokkos::Experimental::Impl::ViewOffsetRange<T2>::is_range
- , Kokkos::Experimental::Impl::ViewOffsetRange<T3>::is_range
- , Kokkos::Experimental::Impl::ViewOffsetRange<T4>::is_range
- , Kokkos::Experimental::Impl::ViewOffsetRange<T5>::is_range
- , Kokkos::Experimental::Impl::ViewOffsetRange<T6>::is_range
- > Mapping ;
-
- typedef typename Mapping::type DstView ;
-
- static_assert( SrcView::Rank == 7 , "Subview of rank 7 View requires 7 arguments" );
-
- DstView dst( src.m_track );
-
- Mapping::assign( dst.m_map, src.m_map, arg0, arg1, arg2, arg3, arg4, arg5, arg6, 0 );
-
- return dst ;
-}
-
-template< class D, class A1, class A2, class A3
- , class T0 , class T1 , class T2 , class T3
- , class T4 , class T5 >
-KOKKOS_INLINE_FUNCTION
-Kokkos::Experimental::Subview< View< D, A1, A2, A3 >
- , Kokkos::Experimental::Impl::ViewOffsetRange<T0>::is_range
- , Kokkos::Experimental::Impl::ViewOffsetRange<T1>::is_range
- , Kokkos::Experimental::Impl::ViewOffsetRange<T2>::is_range
- , Kokkos::Experimental::Impl::ViewOffsetRange<T3>::is_range
- , Kokkos::Experimental::Impl::ViewOffsetRange<T4>::is_range
- , Kokkos::Experimental::Impl::ViewOffsetRange<T5>::is_range
- >
-subview( const View< D, A1, A2, A3 > & src
- , T0 const & arg0 , T1 const & arg1 , T2 const & arg2 , T3 const & arg3
- , T4 const & arg4 , T5 const & arg5
- )
-{
- typedef View< D, A1, A2, A3 > SrcView ;
-
- typedef Kokkos::Experimental::Impl::SubviewMapping
- < typename SrcView::traits
- , Kokkos::Experimental::Impl::ViewOffsetRange<T0>::is_range
- , Kokkos::Experimental::Impl::ViewOffsetRange<T1>::is_range
- , Kokkos::Experimental::Impl::ViewOffsetRange<T2>::is_range
- , Kokkos::Experimental::Impl::ViewOffsetRange<T3>::is_range
- , Kokkos::Experimental::Impl::ViewOffsetRange<T4>::is_range
- , Kokkos::Experimental::Impl::ViewOffsetRange<T5>::is_range
- > Mapping ;
-
- typedef typename Mapping::type DstView ;
-
- static_assert( SrcView::Rank == 6 , "Subview of rank 6 View requires 6 arguments" );
+template< class V , class ... Args >
+using Subview =
+ typename Kokkos::Experimental::Impl::ViewMapping
+ < void /* deduce subview type from source view traits */
+ , V
+ , Args ...
+ >::type ;
- DstView dst( src.m_track );
-
- Mapping::assign( dst.m_map, src.m_map, arg0, arg1, arg2, arg3, arg4, arg5, 0, 0 );
-
- return dst ;
-}
-
-template< class D, class A1, class A2, class A3
- , class T0 , class T1 , class T2 , class T3
- , class T4 >
-KOKKOS_INLINE_FUNCTION
-Kokkos::Experimental::Subview< View< D, A1, A2, A3 >
- , Kokkos::Experimental::Impl::ViewOffsetRange<T0>::is_range
- , Kokkos::Experimental::Impl::ViewOffsetRange<T1>::is_range
- , Kokkos::Experimental::Impl::ViewOffsetRange<T2>::is_range
- , Kokkos::Experimental::Impl::ViewOffsetRange<T3>::is_range
- , Kokkos::Experimental::Impl::ViewOffsetRange<T4>::is_range
- >
-subview( const View< D, A1, A2, A3 > & src
- , T0 const & arg0 , T1 const & arg1 , T2 const & arg2 , T3 const & arg3
- , T4 const & arg4
- )
-{
- typedef View< D, A1, A2, A3 > SrcView ;
-
- typedef Kokkos::Experimental::Impl::SubviewMapping
- < typename SrcView::traits
- , Kokkos::Experimental::Impl::ViewOffsetRange<T0>::is_range
- , Kokkos::Experimental::Impl::ViewOffsetRange<T1>::is_range
- , Kokkos::Experimental::Impl::ViewOffsetRange<T2>::is_range
- , Kokkos::Experimental::Impl::ViewOffsetRange<T3>::is_range
- , Kokkos::Experimental::Impl::ViewOffsetRange<T4>::is_range
- > Mapping ;
-
- typedef typename Mapping::type DstView ;
-
- static_assert( SrcView::Rank == 5 , "Subview of rank 5 View requires 5 arguments" );
-
- DstView dst( src.m_track );
-
- Mapping::assign( dst.m_map, src.m_map, arg0, arg1, arg2, arg3, arg4, 0, 0, 0 );
-
- return dst ;
-}
-
-template< class D, class A1, class A2, class A3
- , class T0 , class T1 , class T2 , class T3 >
+template< class D, class ... P , class ... Args >
KOKKOS_INLINE_FUNCTION
-Kokkos::Experimental::Subview< View< D, A1, A2, A3 >
- , Kokkos::Experimental::Impl::ViewOffsetRange<T0>::is_range
- , Kokkos::Experimental::Impl::ViewOffsetRange<T1>::is_range
- , Kokkos::Experimental::Impl::ViewOffsetRange<T2>::is_range
- , Kokkos::Experimental::Impl::ViewOffsetRange<T3>::is_range
- >
-subview( const View< D, A1, A2, A3 > & src
- , T0 const & arg0 , T1 const & arg1 , T2 const & arg2 , T3 const & arg3
- )
+typename Kokkos::Experimental::Impl::ViewMapping
+ < void /* deduce subview type from source view traits */
+ , ViewTraits< D , P... >
+ , Args ...
+ >::type
+subview( const View< D, P... > & src , Args ... args )
{
- typedef View< D, A1, A2, A3 > SrcView ;
-
- typedef Kokkos::Experimental::Impl::SubviewMapping
- < typename SrcView::traits
- , Kokkos::Experimental::Impl::ViewOffsetRange<T0>::is_range
- , Kokkos::Experimental::Impl::ViewOffsetRange<T1>::is_range
- , Kokkos::Experimental::Impl::ViewOffsetRange<T2>::is_range
- , Kokkos::Experimental::Impl::ViewOffsetRange<T3>::is_range
- > Mapping ;
-
- typedef typename Mapping::type DstView ;
-
- static_assert( SrcView::Rank == 4 , "Subview of rank 4 View requires 4 arguments" );
-
- DstView dst( src.m_track );
-
- Mapping::assign( dst.m_map, src.m_map, arg0, arg1, arg2, arg3, 0, 0, 0, 0 );
-
- return dst ;
+ static_assert( View< D , P... >::Rank == sizeof...(Args) ,
+ "subview requires one argument for each source View rank" );
+
+ return typename
+ Kokkos::Experimental::Impl::ViewMapping
+ < void /* deduce subview type from source view traits */
+ , ViewTraits< D , P ... >
+ , Args ... >::type( src , args ... );
}
-template< class D, class A1, class A2, class A3
- , class T0 , class T1 , class T2 >
+template< class MemoryTraits , class D, class ... P , class ... Args >
KOKKOS_INLINE_FUNCTION
-Kokkos::Experimental::Subview< View< D, A1, A2, A3 >
- , Kokkos::Experimental::Impl::ViewOffsetRange<T0>::is_range
- , Kokkos::Experimental::Impl::ViewOffsetRange<T1>::is_range
- , Kokkos::Experimental::Impl::ViewOffsetRange<T2>::is_range
- >
-subview( const View< D, A1, A2, A3 > & src
- , T0 const & arg0 , T1 const & arg1 , T2 const & arg2
- )
+typename Kokkos::Experimental::Impl::ViewMapping
+ < void /* deduce subview type from source view traits */
+ , ViewTraits< D , P... >
+ , Args ...
+ >::template apply< MemoryTraits >::type
+subview( const View< D, P... > & src , Args ... args )
{
- typedef View< D, A1, A2, A3 > SrcView ;
-
- typedef Kokkos::Experimental::Impl::SubviewMapping
- < typename SrcView::traits
- , Kokkos::Experimental::Impl::ViewOffsetRange<T0>::is_range
- , Kokkos::Experimental::Impl::ViewOffsetRange<T1>::is_range
- , Kokkos::Experimental::Impl::ViewOffsetRange<T2>::is_range
- > Mapping ;
-
- typedef typename Mapping::type DstView ;
-
- static_assert( SrcView::Rank == 3 , "Subview of rank 3 View requires 3 arguments" );
-
- DstView dst( src.m_track );
-
- Mapping::assign( dst.m_map, src.m_map, arg0, arg1, arg2, 0, 0, 0, 0, 0 );
-
- return dst ;
+ static_assert( View< D , P... >::Rank == sizeof...(Args) ,
+ "subview requires one argument for each source View rank" );
+
+ return typename
+ Kokkos::Experimental::Impl::ViewMapping
+ < void /* deduce subview type from source view traits */
+ , ViewTraits< D , P ... >
+ , Args ... >
+ ::template apply< MemoryTraits >
+ ::type( src , args ... );
}
-template< class D, class A1, class A2, class A3
- , class T0 , class T1 >
-KOKKOS_INLINE_FUNCTION
-Kokkos::Experimental::Subview< View< D, A1, A2, A3 >
- , Kokkos::Experimental::Impl::ViewOffsetRange<T0>::is_range
- , Kokkos::Experimental::Impl::ViewOffsetRange<T1>::is_range
- >
-subview( const View< D, A1, A2, A3 > & src
- , T0 const & arg0 , T1 const & arg1
- )
-{
- typedef View< D, A1, A2, A3 > SrcView ;
-
- typedef Kokkos::Experimental::Impl::SubviewMapping
- < typename SrcView::traits
- , Kokkos::Experimental::Impl::ViewOffsetRange<T0>::is_range
- , Kokkos::Experimental::Impl::ViewOffsetRange<T1>::is_range
- > Mapping ;
-
- typedef typename Mapping::type DstView ;
-
- static_assert( SrcView::Rank == 2 , "Subview of rank 2 View requires 2 arguments" );
-
- DstView dst( src.m_track );
- Mapping::assign( dst.m_map, src.m_map, arg0, arg1, 0, 0, 0, 0, 0, 0 );
-
- return dst ;
-}
-
-template< class D, class A1, class A2, class A3, class T0 >
-KOKKOS_INLINE_FUNCTION
-Kokkos::Experimental::Subview< View< D, A1, A2, A3 >
- , Kokkos::Experimental::Impl::ViewOffsetRange<T0>::is_range
- >
-subview( const View< D, A1, A2, A3 > & src , T0 const & arg0 )
-{
- typedef View< D, A1, A2, A3 > SrcView ;
-
- typedef Kokkos::Experimental::Impl::SubviewMapping
- < typename SrcView::traits
- , Kokkos::Experimental::Impl::ViewOffsetRange<T0>::is_range
- > Mapping ;
-
- typedef typename Mapping::type DstView ;
-
- static_assert( SrcView::Rank == 1 , "Subview of rank 1 View requires 1 arguments" );
-
- DstView dst( src.m_track );
-
- Mapping::assign( dst.m_map , src.m_map , arg0, 0, 0, 0, 0, 0, 0, 0 );
-
- return dst ;
-}
} /* namespace Experimental */
} /* namespace Kokkos */
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Experimental {
-template< class LT , class L1 , class L2 , class L3
- , class RT , class R1 , class R2 , class R3 >
+template< class LT , class ... LP , class RT , class ... RP >
KOKKOS_INLINE_FUNCTION
-bool operator == ( const View<LT,L1,L2,L3> & lhs ,
- const View<RT,R1,R2,R3> & rhs )
+bool operator == ( const View<LT,LP...> & lhs ,
+ const View<RT,RP...> & rhs )
{
// Same data, layout, dimensions
- typedef ViewTraits<LT,L1,L2,L3> lhs_traits ;
- typedef ViewTraits<RT,R1,R2,R3> rhs_traits ;
+ typedef ViewTraits<LT,LP...> lhs_traits ;
+ typedef ViewTraits<RT,RP...> rhs_traits ;
return
std::is_same< typename lhs_traits::const_value_type ,
typename rhs_traits::const_value_type >::value &&
std::is_same< typename lhs_traits::array_layout ,
typename rhs_traits::array_layout >::value &&
std::is_same< typename lhs_traits::memory_space ,
typename rhs_traits::memory_space >::value &&
- lhs_traits::Rank == rhs_traits::Rank &&
+ lhs_traits::rank == rhs_traits::rank &&
lhs.data() == rhs.data() &&
lhs.span() == rhs.span() &&
lhs.dimension_0() == rhs.dimension_0() &&
lhs.dimension_1() == rhs.dimension_1() &&
lhs.dimension_2() == rhs.dimension_2() &&
lhs.dimension_3() == rhs.dimension_3() &&
lhs.dimension_4() == rhs.dimension_4() &&
lhs.dimension_5() == rhs.dimension_5() &&
lhs.dimension_6() == rhs.dimension_6() &&
lhs.dimension_7() == rhs.dimension_7();
}
-template< class LT , class L1 , class L2 , class L3
- , class RT , class R1 , class R2 , class R3 >
+template< class LT , class ... LP , class RT , class ... RP >
KOKKOS_INLINE_FUNCTION
-bool operator != ( const View<LT,L1,L2,L3> & lhs ,
- const View<RT,R1,R2,R3> & rhs )
+bool operator != ( const View<LT,LP...> & lhs ,
+ const View<RT,RP...> & rhs )
{
return ! ( operator==(lhs,rhs) );
}
} /* namespace Experimental */
} /* namespace Kokkos */
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
+namespace Kokkos {
+namespace Impl {
+
+#if defined( KOKKOS_USING_EXPERIMENTAL_VIEW )
+
+inline
+void shared_allocation_tracking_claim_and_disable()
+{ Kokkos::Experimental::Impl::SharedAllocationRecord<void,void>::tracking_claim_and_disable(); }
+
+inline
+void shared_allocation_tracking_release_and_enable()
+{ Kokkos::Experimental::Impl::SharedAllocationRecord<void,void>::tracking_release_and_enable(); }
+
+#else
+
+inline
+void shared_allocation_tracking_claim_and_disable()
+{ Kokkos::Impl::AllocationTracker::disable_tracking(); }
+
+inline
+void shared_allocation_tracking_release_and_enable()
+{ Kokkos::Impl::AllocationTracker::enable_tracking(); }
+
+#endif
+
+} /* namespace Impl */
+} /* namespace Kokkos */
+
+//----------------------------------------------------------------------------
+//----------------------------------------------------------------------------
+
namespace Kokkos {
namespace Experimental {
namespace Impl {
template< class OutputView , typename Enable = void >
struct ViewFill {
typedef typename OutputView::const_value_type const_value_type ;
const OutputView output ;
const_value_type input ;
KOKKOS_INLINE_FUNCTION
void operator()( const size_t i0 ) const
{
const size_t n1 = output.dimension_1();
const size_t n2 = output.dimension_2();
const size_t n3 = output.dimension_3();
const size_t n4 = output.dimension_4();
const size_t n5 = output.dimension_5();
const size_t n6 = output.dimension_6();
const size_t n7 = output.dimension_7();
for ( size_t i1 = 0 ; i1 < n1 ; ++i1 ) {
for ( size_t i2 = 0 ; i2 < n2 ; ++i2 ) {
for ( size_t i3 = 0 ; i3 < n3 ; ++i3 ) {
for ( size_t i4 = 0 ; i4 < n4 ; ++i4 ) {
for ( size_t i5 = 0 ; i5 < n5 ; ++i5 ) {
for ( size_t i6 = 0 ; i6 < n6 ; ++i6 ) {
for ( size_t i7 = 0 ; i7 < n7 ; ++i7 ) {
output(i0,i1,i2,i3,i4,i5,i6,i7) = input ;
}}}}}}}
}
ViewFill( const OutputView & arg_out , const_value_type & arg_in )
: output( arg_out ), input( arg_in )
{
typedef typename OutputView::execution_space execution_space ;
typedef Kokkos::RangePolicy< execution_space > Policy ;
- (void) Kokkos::Impl::ParallelFor< ViewFill , Policy >( *this , Policy( 0 , output.dimension_0() ) );
+ const Kokkos::Impl::ParallelFor< ViewFill , Policy > closure( *this , Policy( 0 , output.dimension_0() ) );
+
+ closure.execute();
execution_space::fence();
}
};
template< class OutputView >
struct ViewFill< OutputView , typename std::enable_if< OutputView::Rank == 0 >::type > {
ViewFill( const OutputView & dst , const typename OutputView::const_value_type & src )
{
Kokkos::Impl::DeepCopy< typename OutputView::memory_space , Kokkos::HostSpace >
( dst.data() , & src , sizeof(typename OutputView::const_value_type) );
}
};
template< class OutputView , class InputView >
struct ViewRemap {
const OutputView output ;
const InputView input ;
const size_t n0 ;
const size_t n1 ;
const size_t n2 ;
const size_t n3 ;
const size_t n4 ;
const size_t n5 ;
const size_t n6 ;
const size_t n7 ;
ViewRemap( const OutputView & arg_out , const InputView & arg_in )
: output( arg_out ), input( arg_in )
, n0( std::min( (size_t)arg_out.dimension_0() , (size_t)arg_in.dimension_0() ) )
, n1( std::min( (size_t)arg_out.dimension_1() , (size_t)arg_in.dimension_1() ) )
, n2( std::min( (size_t)arg_out.dimension_2() , (size_t)arg_in.dimension_2() ) )
, n3( std::min( (size_t)arg_out.dimension_3() , (size_t)arg_in.dimension_3() ) )
, n4( std::min( (size_t)arg_out.dimension_4() , (size_t)arg_in.dimension_4() ) )
, n5( std::min( (size_t)arg_out.dimension_5() , (size_t)arg_in.dimension_5() ) )
, n6( std::min( (size_t)arg_out.dimension_6() , (size_t)arg_in.dimension_6() ) )
, n7( std::min( (size_t)arg_out.dimension_7() , (size_t)arg_in.dimension_7() ) )
{
typedef typename OutputView::execution_space execution_space ;
typedef Kokkos::RangePolicy< execution_space > Policy ;
- (void) Kokkos::Impl::ParallelFor< ViewRemap , Policy >( *this , Policy( 0 , n0 ) );
+ const Kokkos::Impl::ParallelFor< ViewRemap , Policy > closure( *this , Policy( 0 , n0 ) );
+ closure.execute();
}
KOKKOS_INLINE_FUNCTION
void operator()( const size_t i0 ) const
{
for ( size_t i1 = 0 ; i1 < n1 ; ++i1 ) {
for ( size_t i2 = 0 ; i2 < n2 ; ++i2 ) {
for ( size_t i3 = 0 ; i3 < n3 ; ++i3 ) {
for ( size_t i4 = 0 ; i4 < n4 ; ++i4 ) {
for ( size_t i5 = 0 ; i5 < n5 ; ++i5 ) {
for ( size_t i6 = 0 ; i6 < n6 ; ++i6 ) {
for ( size_t i7 = 0 ; i7 < n7 ; ++i7 ) {
output(i0,i1,i2,i3,i4,i5,i6,i7) = input(i0,i1,i2,i3,i4,i5,i6,i7);
}}}}}}}
}
};
} /* namespace Impl */
} /* namespace Experimental */
} /* namespace Kokkos */
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Experimental {
/** \brief Deep copy a value from Host memory into a view. */
-template< class DT , class D1 , class D2 , class D3 >
+template< class DT , class ... DP >
inline
-void deep_copy( const View<DT,D1,D2,D3> & dst
- , typename ViewTraits<DT,D1,D2,D3>::const_value_type & value )
+void deep_copy
+ ( const View<DT,DP...> & dst
+ , typename ViewTraits<DT,DP...>::const_value_type & value
+ , typename std::enable_if<
+ std::is_same< typename ViewTraits<DT,DP...>::specialize , void >::value
+ >::type * = 0 )
{
- static_assert( std::is_same< typename ViewTraits<DT,D1,D2,D3>::non_const_value_type ,
- typename ViewTraits<DT,D1,D2,D3>::value_type >::value
- , "ERROR: Incompatible deep_copy( View , value )" );
+ static_assert(
+ std::is_same< typename ViewTraits<DT,DP...>::non_const_value_type ,
+ typename ViewTraits<DT,DP...>::value_type >::value
+ , "deep_copy requires non-const type" );
- Kokkos::Experimental::Impl::ViewFill< View<DT,D1,D2,D3> >( dst , value );
+ Kokkos::Experimental::Impl::ViewFill< View<DT,DP...> >( dst , value );
}
/** \brief Deep copy into a value in Host memory from a view. */
-template< class ST , class S1 , class S2 , class S3 >
+template< class ST , class ... SP >
inline
-void deep_copy( ST & dst , const View<ST,S1,S2,S3> & src )
+void deep_copy
+ ( ST & dst
+ , const View<ST,SP...> & src
+ , typename std::enable_if<
+ std::is_same< typename ViewTraits<ST,SP...>::specialize , void >::value
+ >::type * = 0 )
{
- static_assert( ViewTraits<ST,S1,S2,S3>::rank == 0
+ static_assert( ViewTraits<ST,SP...>::rank == 0
, "ERROR: Non-rank-zero view in deep_copy( value , View )" );
- typedef ViewTraits<ST,S1,S2,S3> src_traits ;
+ typedef ViewTraits<ST,SP...> src_traits ;
typedef typename src_traits::memory_space src_memory_space ;
Kokkos::Impl::DeepCopy< HostSpace , src_memory_space >( & dst , src.data() , sizeof(ST) );
}
//----------------------------------------------------------------------------
/** \brief A deep copy between views of compatible type, and rank zero. */
-template< class DT , class D1 , class D2 , class D3
- , class ST , class S1 , class S2 , class S3 >
+template< class DT , class ... DP , class ST , class ... SP >
inline
-void deep_copy( const View<DT,D1,D2,D3> & dst ,
- const View<ST,S1,S2,S3> & src ,
- typename std::enable_if<(
- // Rank zero:
- ( unsigned(ViewTraits<DT,D1,D2,D3>::rank) == unsigned(0) ) &&
- ( unsigned(ViewTraits<ST,S1,S2,S3>::rank) == unsigned(0) ) &&
- // Same type and destination is not constant:
- std::is_same< typename ViewTraits<DT,D1,D2,D3>::value_type ,
- typename ViewTraits<ST,S1,S2,S3>::non_const_value_type >::value
- )>::type * = 0 )
+void deep_copy
+ ( const View<DT,DP...> & dst
+ , const View<ST,SP...> & src
+ , typename std::enable_if<(
+ std::is_same< typename ViewTraits<DT,DP...>::specialize , void >::value &&
+ std::is_same< typename ViewTraits<ST,SP...>::specialize , void >::value &&
+ ( unsigned(ViewTraits<DT,DP...>::rank) == unsigned(0) &&
+ unsigned(ViewTraits<ST,SP...>::rank) == unsigned(0) )
+ )>::type * = 0 )
{
- typedef View<DT,D1,D2,D3> dst_type ;
- typedef View<ST,S1,S2,S3> src_type ;
+ static_assert(
+ std::is_same< typename ViewTraits<DT,DP...>::value_type ,
+ typename ViewTraits<ST,SP...>::non_const_value_type >::value
+ , "deep_copy requires matching non-const destination type" );
+
+ typedef View<DT,DP...> dst_type ;
+ typedef View<ST,SP...> src_type ;
typedef typename dst_type::value_type value_type ;
typedef typename dst_type::memory_space dst_memory_space ;
typedef typename src_type::memory_space src_memory_space ;
if ( dst.data() != src.data() ) {
Kokkos::Impl::DeepCopy< dst_memory_space , src_memory_space >( dst.data() , src.data() , sizeof(value_type) );
}
}
//----------------------------------------------------------------------------
/** \brief A deep copy between views of the default specialization, compatible type,
* same non-zero rank, same contiguous layout.
*/
-template< class DT , class D1 , class D2 , class D3 ,
- class ST , class S1 , class S2 , class S3 >
+template< class DT , class ... DP , class ST , class ... SP >
inline
-void deep_copy( const View<DT,D1,D2,D3> & dst ,
- const View<ST,S1,S2,S3> & src ,
- typename std::enable_if<(
- // destination is non-const.
- std::is_same< typename ViewTraits<DT,D1,D2,D3>::value_type ,
- typename ViewTraits<DT,D1,D2,D3>::non_const_value_type >::value
- &&
- // Same non-zero rank:
- ( unsigned(ViewTraits<DT,D1,D2,D3>::rank) != 0 )
- &&
- ( unsigned(ViewTraits<DT,D1,D2,D3>::rank) ==
- unsigned(ViewTraits<ST,S1,S2,S3>::rank) )
- &&
- // Not specialized, default ViewMapping
- std::is_same< typename ViewTraits<DT,D1,D2,D3>::specialize , void >::value
- &&
- std::is_same< typename ViewTraits<ST,S1,S2,S3>::specialize , void >::value
- )>::type * = 0 )
+void deep_copy
+ ( const View<DT,DP...> & dst
+ , const View<ST,SP...> & src
+ , typename std::enable_if<(
+ std::is_same< typename ViewTraits<DT,DP...>::specialize , void >::value &&
+ std::is_same< typename ViewTraits<ST,SP...>::specialize , void >::value &&
+ ( unsigned(ViewTraits<DT,DP...>::rank) != 0 ||
+ unsigned(ViewTraits<ST,SP...>::rank) != 0 )
+ )>::type * = 0 )
{
- typedef View<DT,D1,D2,D3> dst_type ;
- typedef View<ST,S1,S2,S3> src_type ;
+ static_assert(
+ std::is_same< typename ViewTraits<DT,DP...>::value_type ,
+ typename ViewTraits<DT,DP...>::non_const_value_type >::value
+ , "deep_copy requires non-const destination type" );
+
+ static_assert(
+ ( unsigned(ViewTraits<DT,DP...>::rank) ==
+ unsigned(ViewTraits<ST,SP...>::rank) )
+ , "deep_copy requires Views of equal rank" );
+
+ typedef View<DT,DP...> dst_type ;
+ typedef View<ST,SP...> src_type ;
typedef typename dst_type::execution_space dst_execution_space ;
typedef typename dst_type::memory_space dst_memory_space ;
typedef typename src_type::memory_space src_memory_space ;
enum { DstExecCanAccessSrc =
Kokkos::Impl::VerifyExecutionCanAccessMemorySpace< typename dst_execution_space::memory_space , src_memory_space >::value };
if ( (void *) dst.data() != (void*) src.data() ) {
// Concern: If overlapping views then a parallel copy will be erroneous.
// ...
// If same type, equal layout, equal dimensions, equal span, and contiguous memory then can byte-wise copy
- if ( std::is_same< typename ViewTraits<DT,D1,D2,D3>::value_type ,
- typename ViewTraits<ST,S1,S2,S3>::non_const_value_type >::value &&
- std::is_same< typename ViewTraits<DT,D1,D2,D3>::array_layout ,
- typename ViewTraits<ST,S1,S2,S3>::array_layout >::value &&
+ if ( std::is_same< typename ViewTraits<DT,DP...>::value_type ,
+ typename ViewTraits<ST,SP...>::non_const_value_type >::value &&
+ std::is_same< typename ViewTraits<DT,DP...>::array_layout ,
+ typename ViewTraits<ST,SP...>::array_layout >::value &&
dst.span_is_contiguous() &&
src.span_is_contiguous() &&
dst.span() == src.span() &&
dst.dimension_0() == src.dimension_0() &&
dst.dimension_1() == src.dimension_1() &&
dst.dimension_2() == src.dimension_2() &&
dst.dimension_3() == src.dimension_3() &&
dst.dimension_4() == src.dimension_4() &&
dst.dimension_5() == src.dimension_5() &&
dst.dimension_6() == src.dimension_6() &&
dst.dimension_7() == src.dimension_7() ) {
const size_t nbytes = sizeof(typename dst_type::value_type) * dst.span();
Kokkos::Impl::DeepCopy< dst_memory_space , src_memory_space >( dst.data() , src.data() , nbytes );
}
else if ( DstExecCanAccessSrc ) {
// Copying data between views in accessible memory spaces and either non-contiguous or incompatible shape.
Kokkos::Experimental::Impl::ViewRemap< dst_type , src_type >( dst , src );
}
else {
Kokkos::Impl::throw_runtime_exception("deep_copy given views that would require a temporary allocation");
}
}
}
} /* namespace Experimental */
} /* namespace Kokkos */
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Experimental {
-template< class T , class A1, class A2, class A3 >
+template< class T , class ... P >
inline
-typename Kokkos::Experimental::View<T,A1,A2,A3>::HostMirror
-create_mirror( const Kokkos::Experimental::View<T,A1,A2,A3> & src
+typename Kokkos::Experimental::View<T,P...>::HostMirror
+create_mirror( const Kokkos::Experimental::View<T,P...> & src
, typename std::enable_if<
- ! std::is_same< typename Kokkos::Experimental::ViewTraits<T,A1,A2,A3>::array_layout
+ ! std::is_same< typename Kokkos::Experimental::ViewTraits<T,P...>::array_layout
, Kokkos::LayoutStride >::value
>::type * = 0
)
{
- typedef View<T,A1,A2,A3> src_type ;
+ typedef View<T,P...> src_type ;
typedef typename src_type::HostMirror dst_type ;
return dst_type( std::string( src.label() ).append("_mirror")
, src.dimension_0()
, src.dimension_1()
, src.dimension_2()
, src.dimension_3()
, src.dimension_4()
, src.dimension_5()
, src.dimension_6()
, src.dimension_7() );
}
-template< class T , class A1, class A2, class A3 >
+template< class T , class ... P >
inline
-typename Kokkos::Experimental::View<T,A1,A2,A3>::HostMirror
-create_mirror( const Kokkos::Experimental::View<T,A1,A2,A3> & src
+typename Kokkos::Experimental::View<T,P...>::HostMirror
+create_mirror( const Kokkos::Experimental::View<T,P...> & src
, typename std::enable_if<
- std::is_same< typename Kokkos::Experimental::ViewTraits<T,A1,A2,A3>::array_layout
+ std::is_same< typename Kokkos::Experimental::ViewTraits<T,P...>::array_layout
, Kokkos::LayoutStride >::value
>::type * = 0
)
{
- typedef View<T,A1,A2,A3> src_type ;
+ typedef View<T,P...> src_type ;
typedef typename src_type::HostMirror dst_type ;
Kokkos::LayoutStride layout ;
layout.dimension[0] = src.dimension_0();
layout.dimension[1] = src.dimension_1();
layout.dimension[2] = src.dimension_2();
layout.dimension[3] = src.dimension_3();
layout.dimension[4] = src.dimension_4();
layout.dimension[5] = src.dimension_5();
layout.dimension[6] = src.dimension_6();
layout.dimension[7] = src.dimension_7();
layout.stride[0] = src.stride_0();
layout.stride[1] = src.stride_1();
layout.stride[2] = src.stride_2();
layout.stride[3] = src.stride_3();
layout.stride[4] = src.stride_4();
layout.stride[5] = src.stride_5();
layout.stride[6] = src.stride_6();
layout.stride[7] = src.stride_7();
return dst_type( std::string( src.label() ).append("_mirror") , layout );
}
-template< class T , class A1 , class A2 , class A3 >
+template< class T , class ... P >
inline
-typename Kokkos::Experimental::View<T,A1,A2,A3>::HostMirror
-create_mirror_view( const Kokkos::Experimental::View<T,A1,A2,A3> & src
+typename Kokkos::Experimental::View<T,P...>::HostMirror
+create_mirror_view( const Kokkos::Experimental::View<T,P...> & src
, typename std::enable_if<(
- std::is_same< typename Kokkos::Experimental::View<T,A1,A2,A3>::memory_space
- , typename Kokkos::Experimental::View<T,A1,A2,A3>::HostMirror::memory_space
+ std::is_same< typename Kokkos::Experimental::View<T,P...>::memory_space
+ , typename Kokkos::Experimental::View<T,P...>::HostMirror::memory_space
>::value
&&
- std::is_same< typename Kokkos::Experimental::View<T,A1,A2,A3>::data_type
- , typename Kokkos::Experimental::View<T,A1,A2,A3>::HostMirror::data_type
+ std::is_same< typename Kokkos::Experimental::View<T,P...>::data_type
+ , typename Kokkos::Experimental::View<T,P...>::HostMirror::data_type
>::value
)>::type * = 0
)
{
return src ;
}
-template< class T , class A1 , class A2 , class A3 >
+template< class T , class ... P >
inline
-typename Kokkos::Experimental::View<T,A1,A2,A3>::HostMirror
-create_mirror_view( const Kokkos::Experimental::View<T,A1,A2,A3> & src
+typename Kokkos::Experimental::View<T,P...>::HostMirror
+create_mirror_view( const Kokkos::Experimental::View<T,P...> & src
, typename std::enable_if< ! (
- std::is_same< typename Kokkos::Experimental::View<T,A1,A2,A3>::memory_space
- , typename Kokkos::Experimental::View<T,A1,A2,A3>::HostMirror::memory_space
+ std::is_same< typename Kokkos::Experimental::View<T,P...>::memory_space
+ , typename Kokkos::Experimental::View<T,P...>::HostMirror::memory_space
>::value
&&
- std::is_same< typename Kokkos::Experimental::View<T,A1,A2,A3>::data_type
- , typename Kokkos::Experimental::View<T,A1,A2,A3>::HostMirror::data_type
+ std::is_same< typename Kokkos::Experimental::View<T,P...>::data_type
+ , typename Kokkos::Experimental::View<T,P...>::HostMirror::data_type
>::value
)>::type * = 0
)
{
return Kokkos::Experimental::create_mirror( src );
}
} /* namespace Experimental */
} /* namespace Kokkos */
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Experimental {
/** \brief Resize a view with copying old data to new data at the corresponding indices. */
-template< class T , class A1 , class A2 , class A3 >
+template< class T , class ... P >
inline
-void resize( Kokkos::Experimental::View<T,A1,A2,A3> & v ,
+void resize( Kokkos::Experimental::View<T,P...> & v ,
const size_t n0 = 0 ,
const size_t n1 = 0 ,
const size_t n2 = 0 ,
const size_t n3 = 0 ,
const size_t n4 = 0 ,
const size_t n5 = 0 ,
const size_t n6 = 0 ,
const size_t n7 = 0 )
{
- typedef Kokkos::Experimental::View<T,A1,A2,A3> view_type ;
+ typedef Kokkos::Experimental::View<T,P...> view_type ;
- static_assert( Kokkos::Experimental::ViewTraits<T,A1,A2,A3>::is_managed , "Can only resize managed views" );
+ static_assert( Kokkos::Experimental::ViewTraits<T,P...>::is_managed , "Can only resize managed views" );
view_type v_resized( v.label(), n0, n1, n2, n3, n4, n5, n6, n7 );
Kokkos::Experimental::Impl::ViewRemap< view_type , view_type >( v_resized , v );
v = v_resized ;
}
/** \brief Resize a view with copying old data to new data at the corresponding indices. */
-template< class T , class A1 , class A2 , class A3 >
+template< class T , class ... P >
inline
-void realloc( Kokkos::Experimental::View<T,A1,A2,A3> & v ,
+void realloc( Kokkos::Experimental::View<T,P...> & v ,
const size_t n0 = 0 ,
const size_t n1 = 0 ,
const size_t n2 = 0 ,
const size_t n3 = 0 ,
const size_t n4 = 0 ,
const size_t n5 = 0 ,
const size_t n6 = 0 ,
const size_t n7 = 0 )
{
- typedef Kokkos::Experimental::View<T,A1,A2,A3> view_type ;
+ typedef Kokkos::Experimental::View<T,P...> view_type ;
- static_assert( Kokkos::Experimental::ViewTraits<T,A1,A2,A3>::is_managed , "Can only realloc managed views" );
+ static_assert( Kokkos::Experimental::ViewTraits<T,P...>::is_managed , "Can only realloc managed views" );
const std::string label = v.label();
v = view_type(); // Deallocate first, if the only view to allocation
v = view_type( label, n0, n1, n2, n3, n4, n5, n6, n7 );
}
} /* namespace Experimental */
} /* namespace Kokkos */
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#if defined( KOKKOS_USING_EXPERIMENTAL_VIEW )
namespace Kokkos {
-template< class D , class A1 = void , class A2 = void , class A3 = void >
-using ViewTraits = Kokkos::Experimental::ViewTraits<D,A1,A2,A3> ;
+template< class D , class ... P >
+using ViewTraits = Kokkos::Experimental::ViewTraits<D,P...> ;
-template< class D , class A1 = void , class A2 = void , class A3 = void , class S = void >
-using View = Kokkos::Experimental::View<D,A1,A2,A3> ;
+template< class D , class ... P >
+using View = Kokkos::Experimental::View<D,P...> ;
+using Kokkos::Experimental::ALL ;
using Kokkos::Experimental::deep_copy ;
using Kokkos::Experimental::create_mirror ;
using Kokkos::Experimental::create_mirror_view ;
using Kokkos::Experimental::subview ;
using Kokkos::Experimental::resize ;
using Kokkos::Experimental::realloc ;
+using Kokkos::Experimental::is_view ;
namespace Impl {
using Kokkos::Experimental::is_view ;
class ViewDefault {};
template< class SrcViewType
, class Arg0Type
, class Arg1Type
, class Arg2Type
, class Arg3Type
, class Arg4Type
, class Arg5Type
, class Arg6Type
, class Arg7Type
>
struct ViewSubview /* { typedef ... type ; } */ ;
}
} /* namespace Kokkos */
#include <impl/Kokkos_Atomic_View.hpp>
#endif /* #if defined( KOKKOS_USING_EXPERIMENTAL_VIEW ) */
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#endif
diff --git a/lib/kokkos/core/src/Kokkos_Array.hpp b/lib/kokkos/core/src/Kokkos_Array.hpp
index 7fe8b1c39..80a388901 100644
--- a/lib/kokkos/core/src/Kokkos_Array.hpp
+++ b/lib/kokkos/core/src/Kokkos_Array.hpp
@@ -1,300 +1,301 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_ARRAY
#define KOKKOS_ARRAY
#include <type_traits>
+#include <algorithm>
#include <limits>
namespace Kokkos {
/**\brief Derived from the C++17 'std::array'.
* Dropping the iterator interface.
*/
template< class T = void
, size_t N = ~size_t(0)
, class Proxy = void
>
struct Array {
private:
T m_elem[N];
public:
typedef T & reference ;
typedef typename std::add_const<T>::type & const_reference ;
typedef size_t size_type ;
typedef ptrdiff_t difference_type ;
typedef T value_type ;
typedef T * pointer ;
typedef typename std::add_const<T>::type * const_pointer ;
KOKKOS_INLINE_FUNCTION static constexpr size_type size() { return N ; }
KOKKOS_INLINE_FUNCTION static constexpr bool empty(){ return false ; }
template< typename iType >
KOKKOS_INLINE_FUNCTION
reference operator[]( const iType & i )
{
static_assert( std::is_integral<iType>::value , "Must be integral argument" );
return m_elem[i];
}
template< typename iType >
KOKKOS_INLINE_FUNCTION
const_reference operator[]( const iType & i ) const
{
static_assert( std::is_integral<iType>::value , "Must be integral argument" );
return m_elem[i];
}
KOKKOS_INLINE_FUNCTION pointer data() { return & m_elem[0] ; }
KOKKOS_INLINE_FUNCTION const_pointer data() const { return & m_elem[0] ; }
~Array() = default ;
Array() = default ;
Array( const Array & ) = default ;
Array & operator = ( const Array & ) = default ;
// Some supported compilers are not sufficiently C++11 compliant
// for default move constructor and move assignment operator.
// Array( Array && ) = default ;
// Array & operator = ( Array && ) = default ;
};
template< class T , class Proxy >
struct Array<T,0,Proxy> {
public:
typedef typename std::add_const<T>::type & reference ;
typedef typename std::add_const<T>::type & const_reference ;
typedef size_t size_type ;
typedef ptrdiff_t difference_type ;
typedef typename std::add_const<T>::type value_type ;
typedef typename std::add_const<T>::type * pointer ;
typedef typename std::add_const<T>::type * const_pointer ;
KOKKOS_INLINE_FUNCTION static constexpr size_type size() { return 0 ; }
KOKKOS_INLINE_FUNCTION static constexpr bool empty() { return true ; }
template< typename iType >
KOKKOS_INLINE_FUNCTION
value_type operator[]( const iType & )
{
static_assert( std::is_integral<iType>::value , "Must be integer argument" );
return value_type();
}
template< typename iType >
KOKKOS_INLINE_FUNCTION
value_type operator[]( const iType & ) const
{
static_assert( std::is_integral<iType>::value , "Must be integer argument" );
return value_type();
}
KOKKOS_INLINE_FUNCTION pointer data() { return pointer(0) ; }
KOKKOS_INLINE_FUNCTION const_pointer data() const { return const_pointer(0); }
~Array() = default ;
Array() = default ;
Array( const Array & ) = default ;
Array & operator = ( const Array & ) = default ;
// Some supported compilers are not sufficiently C++11 compliant
// for default move constructor and move assignment operator.
// Array( Array && ) = default ;
// Array & operator = ( Array && ) = default ;
};
template<>
struct Array<void,~size_t(0),void>
{
struct contiguous {};
struct strided {};
};
template< class T >
struct Array< T , ~size_t(0) , Array<>::contiguous >
{
private:
T * m_elem ;
size_t m_size ;
public:
typedef T & reference ;
typedef typename std::add_const<T>::type & const_reference ;
typedef size_t size_type ;
typedef ptrdiff_t difference_type ;
typedef T value_type ;
typedef T * pointer ;
typedef typename std::add_const<T>::type * const_pointer ;
KOKKOS_INLINE_FUNCTION constexpr size_type size() const { return m_size ; }
KOKKOS_INLINE_FUNCTION constexpr bool empty() const { return 0 != m_size ; }
template< typename iType >
KOKKOS_INLINE_FUNCTION
reference operator[]( const iType & i )
{
static_assert( std::is_integral<iType>::value , "Must be integral argument" );
return m_elem[i];
}
template< typename iType >
KOKKOS_INLINE_FUNCTION
const_reference operator[]( const iType & i ) const
{
static_assert( std::is_integral<iType>::value , "Must be integral argument" );
return m_elem[i];
}
KOKKOS_INLINE_FUNCTION pointer data() { return m_elem ; }
KOKKOS_INLINE_FUNCTION const_pointer data() const { return m_elem ; }
~Array() = default ;
Array() = delete ;
Array( const Array & rhs ) = delete ;
// Some supported compilers are not sufficiently C++11 compliant
// for default move constructor and move assignment operator.
// Array( Array && rhs ) = default ;
// Array & operator = ( Array && rhs ) = delete ;
KOKKOS_INLINE_FUNCTION
Array & operator = ( const Array & rhs )
{
const size_t n = std::min( m_size , rhs.size() );
for ( size_t i = 0 ; i < n ; ++i ) m_elem[i] = rhs[i] ;
return *this ;
}
template< size_t N , class P >
KOKKOS_INLINE_FUNCTION
Array & operator = ( const Array<T,N,P> & rhs )
{
const size_t n = std::min( m_size , rhs.size() );
for ( size_t i = 0 ; i < n ; ++i ) m_elem[i] = rhs[i] ;
return *this ;
}
KOKKOS_INLINE_FUNCTION constexpr Array( pointer arg_ptr , size_type arg_size , size_type = 0 )
: m_elem(arg_ptr), m_size(arg_size) {}
};
template< class T >
struct Array< T , ~size_t(0) , Array<>::strided >
{
private:
T * m_elem ;
size_t m_size ;
size_t m_stride ;
public:
typedef T & reference ;
typedef typename std::add_const<T>::type & const_reference ;
typedef size_t size_type ;
typedef ptrdiff_t difference_type ;
typedef T value_type ;
typedef T * pointer ;
typedef typename std::add_const<T>::type * const_pointer ;
KOKKOS_INLINE_FUNCTION constexpr size_type size() const { return m_size ; }
KOKKOS_INLINE_FUNCTION constexpr bool empty() const { return 0 != m_size ; }
template< typename iType >
KOKKOS_INLINE_FUNCTION
reference operator[]( const iType & i )
{
static_assert( std::is_integral<iType>::value , "Must be integral argument" );
return m_elem[i*m_stride];
}
template< typename iType >
KOKKOS_INLINE_FUNCTION
const_reference operator[]( const iType & i ) const
{
static_assert( std::is_integral<iType>::value , "Must be integral argument" );
return m_elem[i*m_stride];
}
KOKKOS_INLINE_FUNCTION pointer data() { return m_elem ; }
KOKKOS_INLINE_FUNCTION const_pointer data() const { return m_elem ; }
~Array() = default ;
Array() = delete ;
Array( const Array & ) = delete ;
// Some supported compilers are not sufficiently C++11 compliant
// for default move constructor and move assignment operator.
// Array( Array && rhs ) = default ;
// Array & operator = ( Array && rhs ) = delete ;
KOKKOS_INLINE_FUNCTION
Array & operator = ( const Array & rhs )
{
const size_t n = std::min( m_size , rhs.size() );
for ( size_t i = 0 ; i < n ; ++i ) m_elem[i] = rhs[i] ;
return *this ;
}
template< size_t N , class P >
KOKKOS_INLINE_FUNCTION
Array & operator = ( const Array<T,N,P> & rhs )
{
const size_t n = std::min( m_size , rhs.size() );
for ( size_t i = 0 ; i < n ; ++i ) m_elem[i] = rhs[i] ;
return *this ;
}
KOKKOS_INLINE_FUNCTION constexpr Array( pointer arg_ptr , size_type arg_size , size_type arg_stride )
: m_elem(arg_ptr), m_size(arg_size), m_stride(arg_stride) {}
};
} // namespace Kokkos
#endif /* #ifndef KOKKOS_ARRAY */
diff --git a/lib/kokkos/core/src/Kokkos_Complex.hpp b/lib/kokkos/core/src/Kokkos_Complex.hpp
new file mode 100644
index 000000000..11aaf9617
--- /dev/null
+++ b/lib/kokkos/core/src/Kokkos_Complex.hpp
@@ -0,0 +1,529 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+// Kokkos v. 2.0
+// Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+#ifndef KOKKOS_COMPLEX_HPP
+#define KOKKOS_COMPLEX_HPP
+
+#include <Kokkos_Atomic.hpp>
+#include <complex>
+#include <iostream>
+
+namespace Kokkos {
+
+/// \class complex
+/// \brief Partial reimplementation of std::complex that works as the
+/// result of a Kokkos::parallel_reduce.
+/// \tparam RealType The type of the real and imaginary parts of the
+/// complex number. As with std::complex, this is only defined for
+/// \c float, \c double, and <tt>long double</tt>. The latter is
+/// currently forbidden in CUDA device kernels.
+template<class RealType>
+class complex {
+private:
+ RealType re_, im_;
+
+public:
+ //! The type of the real or imaginary parts of this complex number.
+ typedef RealType value_type;
+
+ //! Default constructor (initializes both real and imaginary parts to zero).
+ KOKKOS_INLINE_FUNCTION complex () :
+ re_ (0.0), im_ (0.0)
+ {}
+
+ //! Copy constructor.
+ KOKKOS_INLINE_FUNCTION complex (const complex<RealType>& src) :
+ re_ (src.re_), im_ (src.im_)
+ {}
+
+ //! Copy constructor from volatile.
+ KOKKOS_INLINE_FUNCTION complex (const volatile complex<RealType>& src) :
+ re_ (src.re_), im_ (src.im_)
+ {}
+
+ /// \brief Conversion constructor from std::complex.
+ ///
+ /// This constructor cannot be called in a CUDA device function,
+ /// because std::complex's methods and nonmember functions are not
+ /// marked as CUDA device functions.
+ template<class InputRealType>
+ complex (const std::complex<InputRealType>& src) :
+ re_ (std::real (src)), im_ (std::imag (src))
+ {}
+
+ /// \brief Conversion operator to std::complex.
+ ///
+ /// This operator cannot be called in a CUDA device function,
+ /// because std::complex's methods and nonmember functions are not
+ /// marked as CUDA device functions.
+ operator std::complex<RealType> () const {
+ return std::complex<RealType> (re_, im_);
+ }
+
+ /// \brief Constructor that takes just the real part, and sets the
+ /// imaginary part to zero.
+ template<class InputRealType>
+ KOKKOS_INLINE_FUNCTION complex (const InputRealType& val) :
+ re_ (val), im_ (0.0)
+ {}
+
+ //! Constructor that takes the real and imaginary parts.
+ template<class RealType1, class RealType2>
+ KOKKOS_INLINE_FUNCTION complex (const RealType1& re, const RealType2& im) :
+ re_ (re), im_ (im)
+ {}
+
+ //! Assignment operator.
+ template<class InputRealType>
+ KOKKOS_INLINE_FUNCTION
+ complex<RealType>& operator= (const complex<InputRealType>& src) {
+ re_ = src.re_;
+ im_ = src.im_;
+ return *this;
+ }
+
+ //! Assignment operator.
+ template<class InputRealType>
+ KOKKOS_INLINE_FUNCTION
+ volatile complex<RealType>& operator= (const complex<InputRealType>& src) volatile {
+ re_ = src.re_;
+ im_ = src.im_;
+ return *this;
+ }
+
+ //! Assignment operator.
+ template<class InputRealType>
+ KOKKOS_INLINE_FUNCTION
+ volatile complex<RealType>& operator= (const volatile complex<InputRealType>& src) volatile {
+ re_ = src.re_;
+ im_ = src.im_;
+ return *this;
+ }
+
+ //! Assignment operator.
+ template<class InputRealType>
+ KOKKOS_INLINE_FUNCTION
+ complex<RealType>& operator= (const volatile complex<InputRealType>& src) {
+ re_ = src.re_;
+ im_ = src.im_;
+ return *this;
+ }
+
+ //! Assignment operator (from a real number).
+ template<class InputRealType>
+ KOKKOS_INLINE_FUNCTION
+ complex<RealType>& operator= (const InputRealType& val) {
+ re_ = val;
+ im_ = static_cast<RealType> (0.0);
+ return *this;
+ }
+
+ //! Assignment operator (from a real number).
+ template<class InputRealType>
+ KOKKOS_INLINE_FUNCTION
+ void operator= (const InputRealType& val) volatile {
+ re_ = val;
+ im_ = static_cast<RealType> (0.0);
+ }
+
+ /// \brief Assignment operator from std::complex.
+ ///
+ /// This constructor cannot be called in a CUDA device function,
+ /// because std::complex's methods and nonmember functions are not
+ /// marked as CUDA device functions.
+ template<class InputRealType>
+ complex<RealType>& operator= (const std::complex<InputRealType>& src) {
+ re_ = std::real (src);
+ im_ = std::imag (src);
+ return *this;
+ }
+
+ //! The imaginary part of this complex number.
+ KOKKOS_INLINE_FUNCTION RealType& imag () {
+ return im_;
+ }
+
+ //! The real part of this complex number.
+ KOKKOS_INLINE_FUNCTION RealType& real () {
+ return re_;
+ }
+
+ //! The imaginary part of this complex number.
+ KOKKOS_INLINE_FUNCTION const RealType imag () const {
+ return im_;
+ }
+
+ //! The real part of this complex number.
+ KOKKOS_INLINE_FUNCTION const RealType real () const {
+ return re_;
+ }
+
+ //! The imaginary part of this complex number (volatile overload).
+ KOKKOS_INLINE_FUNCTION volatile RealType& imag () volatile {
+ return im_;
+ }
+
+ //! The real part of this complex number (volatile overload).
+ KOKKOS_INLINE_FUNCTION volatile RealType& real () volatile {
+ return re_;
+ }
+
+ //! The imaginary part of this complex number (volatile overload).
+ KOKKOS_INLINE_FUNCTION const RealType imag () const volatile {
+ return im_;
+ }
+
+ //! The real part of this complex number (volatile overload).
+ KOKKOS_INLINE_FUNCTION const RealType real () const volatile {
+ return re_;
+ }
+
+ KOKKOS_INLINE_FUNCTION
+ complex<RealType>& operator += (const complex<RealType>& src) {
+ re_ += src.re_;
+ im_ += src.im_;
+ return *this;
+ }
+
+ KOKKOS_INLINE_FUNCTION
+ void operator += (const volatile complex<RealType>& src) volatile {
+ re_ += src.re_;
+ im_ += src.im_;
+ }
+
+ KOKKOS_INLINE_FUNCTION
+ complex<RealType>& operator += (const RealType& src) {
+ re_ += src;
+ return *this;
+ }
+
+ KOKKOS_INLINE_FUNCTION
+ void operator += (const volatile RealType& src) volatile {
+ re_ += src;
+ }
+
+ KOKKOS_INLINE_FUNCTION
+ complex<RealType>& operator -= (const complex<RealType>& src) {
+ re_ -= src.re_;
+ im_ -= src.im_;
+ return *this;
+ }
+
+ KOKKOS_INLINE_FUNCTION
+ complex<RealType>& operator -= (const RealType& src) {
+ re_ -= src;
+ return *this;
+ }
+
+ KOKKOS_INLINE_FUNCTION
+ complex<RealType>& operator *= (const complex<RealType>& src) {
+ const RealType realPart = re_ * src.re_ - im_ * src.im_;
+ const RealType imagPart = re_ * src.im_ + im_ * src.re_;
+ re_ = realPart;
+ im_ = imagPart;
+ return *this;
+ }
+
+ KOKKOS_INLINE_FUNCTION
+ void operator *= (const volatile complex<RealType>& src) volatile {
+ const RealType realPart = re_ * src.re_ - im_ * src.im_;
+ const RealType imagPart = re_ * src.im_ + im_ * src.re_;
+ re_ = realPart;
+ im_ = imagPart;
+ }
+
+ KOKKOS_INLINE_FUNCTION
+ complex<RealType>& operator *= (const RealType& src) {
+ re_ *= src;
+ im_ *= src;
+ return *this;
+ }
+
+ KOKKOS_INLINE_FUNCTION
+ void operator *= (const volatile RealType& src) volatile {
+ re_ *= src;
+ im_ *= src;
+ }
+
+ KOKKOS_INLINE_FUNCTION
+ complex<RealType>& operator /= (const complex<RealType>& y) {
+ // Scale (by the "1-norm" of y) to avoid unwarranted overflow.
+ // If the real part is +/-Inf and the imaginary part is -/+Inf,
+ // this won't change the result.
+ const RealType s = ::fabs (y.real ()) + ::fabs (y.imag ());
+
+ // If s is 0, then y is zero, so x/y == real(x)/0 + i*imag(x)/0.
+ // In that case, the relation x/y == (x/s) / (y/s) doesn't hold,
+ // because y/s is NaN.
+ if (s == 0.0) {
+ this->re_ /= s;
+ this->im_ /= s;
+ }
+ else {
+ const complex<RealType> x_scaled (this->re_ / s, this->im_ / s);
+ const complex<RealType> y_conj_scaled (y.re_ / s, -(y.im_) / s);
+ const RealType y_scaled_abs = y_conj_scaled.re_ * y_conj_scaled.re_ +
+ y_conj_scaled.im_ * y_conj_scaled.im_; // abs(y) == abs(conj(y))
+ *this = x_scaled * y_conj_scaled;
+ *this /= y_scaled_abs;
+ }
+ return *this;
+ }
+
+ KOKKOS_INLINE_FUNCTION
+ complex<RealType>& operator /= (const RealType& src) {
+ re_ /= src;
+ im_ /= src;
+ return *this;
+ }
+};
+
+//! Binary + operator for complex.
+template<class RealType>
+KOKKOS_INLINE_FUNCTION
+complex<RealType>
+operator + (const complex<RealType>& x, const complex<RealType>& y) {
+ return complex<RealType> (x.real () + y.real (), x.imag () + y.imag ());
+}
+
+//! Unary + operator for complex.
+template<class RealType>
+KOKKOS_INLINE_FUNCTION
+complex<RealType>
+operator + (const complex<RealType>& x) {
+ return x;
+}
+
+//! Binary - operator for complex.
+template<class RealType>
+KOKKOS_INLINE_FUNCTION
+complex<RealType>
+operator - (const complex<RealType>& x, const complex<RealType>& y) {
+ return complex<RealType> (x.real () - y.real (), x.imag () - y.imag ());
+}
+
+//! Unary - operator for complex.
+template<class RealType>
+KOKKOS_INLINE_FUNCTION
+complex<RealType>
+operator - (const complex<RealType>& x) {
+ return complex<RealType> (-x.real (), -x.imag ());
+}
+
+//! Binary * operator for complex.
+template<class RealType>
+KOKKOS_INLINE_FUNCTION
+complex<RealType>
+operator * (const complex<RealType>& x, const complex<RealType>& y) {
+ return complex<RealType> (x.real () * y.real () - x.imag () * y.imag (),
+ x.real () * y.imag () + x.imag () * y.real ());
+}
+
+/// \brief Binary * operator for std::complex and complex.
+///
+/// This function exists because GCC 4.7.2 (and perhaps other
+/// compilers) are not able to deduce that they can multiply
+/// std::complex by Kokkos::complex, by first converting std::complex
+/// to Kokkos::complex.
+///
+/// This function cannot be called in a CUDA device function, because
+/// std::complex's methods and nonmember functions are not marked as
+/// CUDA device functions.
+template<class RealType>
+complex<RealType>
+operator * (const std::complex<RealType>& x, const complex<RealType>& y) {
+ return complex<RealType> (x.real () * y.real () - x.imag () * y.imag (),
+ x.real () * y.imag () + x.imag () * y.real ());
+}
+
+/// \brief Binary * operator for RealType times complex.
+///
+/// This function exists because the compiler doesn't know that
+/// RealType and complex<RealType> commute with respect to operator*.
+template<class RealType>
+KOKKOS_INLINE_FUNCTION
+complex<RealType>
+operator * (const RealType& x, const complex<RealType>& y) {
+ return complex<RealType> (x * y.real (), x * y.imag ());
+}
+
+
+//! Imaginary part of a complex number.
+template<class RealType>
+KOKKOS_INLINE_FUNCTION
+RealType imag (const complex<RealType>& x) {
+ return x.imag ();
+}
+
+//! Real part of a complex number.
+template<class RealType>
+KOKKOS_INLINE_FUNCTION
+RealType real (const complex<RealType>& x) {
+ return x.real ();
+}
+
+//! Absolute value (magnitude) of a complex number.
+template<class RealType>
+KOKKOS_INLINE_FUNCTION
+RealType abs (const complex<RealType>& x) {
+ // FIXME (mfh 31 Oct 2014) Scale to avoid unwarranted overflow.
+ return ::sqrt (real (x) * real (x) + imag (x) * imag (x));
+}
+
+//! Conjugate of a complex number.
+template<class RealType>
+KOKKOS_INLINE_FUNCTION
+complex<RealType> conj (const complex<RealType>& x) {
+ return complex<RealType> (real (x), -imag (x));
+}
+
+
+//! Binary operator / for complex and real numbers
+template<class RealType1, class RealType2>
+KOKKOS_INLINE_FUNCTION
+complex<RealType1>
+operator / (const complex<RealType1>& x, const RealType2& y) {
+ return complex<RealType1> (real (x) / y, imag (x) / y);
+}
+
+//! Binary operator / for complex.
+template<class RealType>
+KOKKOS_INLINE_FUNCTION
+complex<RealType>
+operator / (const complex<RealType>& x, const complex<RealType>& y) {
+ // Scale (by the "1-norm" of y) to avoid unwarranted overflow.
+ // If the real part is +/-Inf and the imaginary part is -/+Inf,
+ // this won't change the result.
+ const RealType s = ::fabs (real (y)) + ::fabs (imag (y));
+
+ // If s is 0, then y is zero, so x/y == real(x)/0 + i*imag(x)/0.
+ // In that case, the relation x/y == (x/s) / (y/s) doesn't hold,
+ // because y/s is NaN.
+ if (s == 0.0) {
+ return complex<RealType> (real (x) / s, imag (x) / s);
+ }
+ else {
+ const complex<RealType> x_scaled (real (x) / s, imag (x) / s);
+ const complex<RealType> y_conj_scaled (real (y) / s, -imag (y) / s);
+ const RealType y_scaled_abs = real (y_conj_scaled) * real (y_conj_scaled) +
+ imag (y_conj_scaled) * imag (y_conj_scaled); // abs(y) == abs(conj(y))
+ complex<RealType> result = x_scaled * y_conj_scaled;
+ result /= y_scaled_abs;
+ return result;
+ }
+}
+
+//! Equality operator for two complex numbers.
+template<class RealType>
+KOKKOS_INLINE_FUNCTION
+bool operator == (const complex<RealType>& x, const complex<RealType>& y) {
+ return real (x) == real (y) && imag (x) == imag (y);
+}
+
+//! Equality operator for std::complex and Kokkos::complex.
+template<class RealType>
+KOKKOS_INLINE_FUNCTION
+bool operator == (const std::complex<RealType>& x, const complex<RealType>& y) {
+ return std::real (x) == real (y) && std::imag (x) == imag (y);
+}
+
+//! Equality operator for complex and real number.
+template<class RealType1, class RealType2>
+KOKKOS_INLINE_FUNCTION
+bool operator == (const complex<RealType1>& x, const RealType2& y) {
+ return real (x) == y && imag (x) == static_cast<RealType1> (0.0);
+}
+
+//! Equality operator for real and complex number.
+template<class RealType>
+KOKKOS_INLINE_FUNCTION
+bool operator == (const RealType& x, const complex<RealType>& y) {
+ return y == x;
+}
+
+//! Inequality operator for two complex numbers.
+template<class RealType>
+KOKKOS_INLINE_FUNCTION
+bool operator != (const complex<RealType>& x, const complex<RealType>& y) {
+ return real (x) != real (y) || imag (x) != imag (y);
+}
+
+//! Inequality operator for std::complex and Kokkos::complex.
+template<class RealType>
+KOKKOS_INLINE_FUNCTION
+bool operator != (const std::complex<RealType>& x, const complex<RealType>& y) {
+ return std::real (x) != real (y) || std::imag (x) != imag (y);
+}
+
+//! Inequality operator for complex and real number.
+template<class RealType1, class RealType2>
+KOKKOS_INLINE_FUNCTION
+bool operator != (const complex<RealType1>& x, const RealType2& y) {
+ return real (x) != y || imag (x) != static_cast<RealType1> (0.0);
+}
+
+//! Inequality operator for real and complex number.
+template<class RealType>
+KOKKOS_INLINE_FUNCTION
+bool operator != (const RealType& x, const complex<RealType>& y) {
+ return y != x;
+}
+
+template<class RealType>
+std::ostream& operator << (std::ostream& os, const complex<RealType>& x) {
+ const std::complex<RealType> x_std (Kokkos::real (x), Kokkos::imag (x));
+ os << x_std;
+ return os;
+}
+
+template<class RealType>
+std::ostream& operator >> (std::ostream& os, complex<RealType>& x) {
+ std::complex<RealType> x_std;
+ os >> x_std;
+ x = x_std; // only assigns on success of above
+ return os;
+}
+
+
+} // namespace Kokkos
+
+#endif // KOKKOS_COMPLEX_HPP
diff --git a/lib/kokkos/core/src/Kokkos_Core.hpp b/lib/kokkos/core/src/Kokkos_Core.hpp
index 2578313d7..ba4d2de15 100644
--- a/lib/kokkos/core/src/Kokkos_Core.hpp
+++ b/lib/kokkos/core/src/Kokkos_Core.hpp
@@ -1,235 +1,244 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_CORE_HPP
#define KOKKOS_CORE_HPP
//----------------------------------------------------------------------------
// Include the execution space header files for the enabled execution spaces.
#include <Kokkos_Core_fwd.hpp>
-#if defined( KOKKOS_HAVE_CUDA )
-#include <Kokkos_Cuda.hpp>
+#if defined( KOKKOS_HAVE_SERIAL )
+#include <Kokkos_Serial.hpp>
#endif
#if defined( KOKKOS_HAVE_OPENMP )
#include <Kokkos_OpenMP.hpp>
#endif
-#if defined( KOKKOS_HAVE_SERIAL )
-#include <Kokkos_Serial.hpp>
-#endif
-
#if defined( KOKKOS_HAVE_PTHREAD )
#include <Kokkos_Threads.hpp>
#endif
+#if defined( KOKKOS_HAVE_CUDA )
+#include <Kokkos_Cuda.hpp>
+#endif
+
#include <Kokkos_Pair.hpp>
#include <Kokkos_Array.hpp>
#include <Kokkos_View.hpp>
#include <Kokkos_Vectorization.hpp>
#include <Kokkos_Atomic.hpp>
#include <Kokkos_hwloc.hpp>
-#include <iostream>
-
#ifdef KOKKOS_HAVE_CXX11
-////#include <Kokkos_Complex.hpp>
+#include <Kokkos_Complex.hpp>
#endif
//----------------------------------------------------------------------------
namespace Kokkos {
struct InitArguments {
int num_threads;
int num_numa;
int device_id;
InitArguments() {
num_threads = -1;
num_numa = -1;
device_id = -1;
}
};
void initialize(int& narg, char* arg[]);
void initialize(const InitArguments& args = InitArguments());
/** \brief Finalize the spaces that were initialized via Kokkos::initialize */
void finalize();
/** \brief Finalize all known execution spaces */
void finalize_all();
void fence();
+} // namespace Kokkos
+
+//----------------------------------------------------------------------------
+//----------------------------------------------------------------------------
+
+namespace Kokkos {
+namespace Experimental {
+
+/* Allocate memory from a memory space.
+ * The allocation is tracked in Kokkos memory tracking system, so
+ * leaked memory can be identified.
+ */
+template< class Space = typename Kokkos::DefaultExecutionSpace::memory_space >
+inline
+void * kokkos_malloc( const std::string & arg_alloc_label
+ , const size_t arg_alloc_size )
+{
+ typedef typename Space::memory_space MemorySpace ;
+ return Impl::SharedAllocationRecord< MemorySpace >::
+ allocate_tracked( MemorySpace() , arg_alloc_label , arg_alloc_size );
+}
+
+template< class Space = typename Kokkos::DefaultExecutionSpace::memory_space >
+inline
+void * kokkos_malloc( const size_t arg_alloc_size )
+{
+ typedef typename Space::memory_space MemorySpace ;
+ return Impl::SharedAllocationRecord< MemorySpace >::
+ allocate_tracked( MemorySpace() , "no-label" , arg_alloc_size );
}
-#ifdef KOKKOS_HAVE_CXX11
+template< class Space = typename Kokkos::DefaultExecutionSpace::memory_space >
+inline
+void kokkos_free( void * arg_alloc )
+{
+ typedef typename Space::memory_space MemorySpace ;
+ return Impl::SharedAllocationRecord< MemorySpace >::
+ deallocate_tracked( arg_alloc );
+}
+
+template< class Space = typename Kokkos::DefaultExecutionSpace::memory_space >
+inline
+void * kokkos_realloc( void * arg_alloc , const size_t arg_alloc_size )
+{
+ typedef typename Space::memory_space MemorySpace ;
+ return Impl::SharedAllocationRecord< MemorySpace >::
+ reallocate_tracked( arg_alloc , arg_alloc_size );
+}
+
+} // namespace Experimental
+} // namespace Kokkos
+
+#if defined( KOKKOS_USING_EXPERIMENTAL_VIEW )
+
+namespace Kokkos {
+
+using Kokkos::Experimental::kokkos_malloc ;
+using Kokkos::Experimental::kokkos_realloc ;
+using Kokkos::Experimental::kokkos_free ;
+
+}
+
+#else
+
namespace Kokkos {
namespace Impl {
// should only by used by kokkos_malloc and kokkos_free
struct MallocHelper
{
static void increment_ref_count( AllocationTracker const & tracker )
{
tracker.increment_ref_count();
}
static void decrement_ref_count( AllocationTracker const & tracker )
{
tracker.decrement_ref_count();
}
};
} // namespace Impl
/* Allocate memory from a memory space.
* The allocation is tracked in Kokkos memory tracking system, so
* leaked memory can be identified.
*/
template< class Arg = DefaultExecutionSpace>
void* kokkos_malloc(const std::string label, size_t count) {
if(count == 0) return NULL;
typedef typename Arg::memory_space MemorySpace;
Impl::AllocationTracker tracker = MemorySpace::allocate_and_track(label,count);;
Impl::MallocHelper::increment_ref_count( tracker );
return tracker.alloc_ptr();
}
template< class Arg = DefaultExecutionSpace>
void* kokkos_malloc(const size_t& count) {
return kokkos_malloc<Arg>("DefaultLabel",count);
}
/* Free memory from a memory space.
*/
template< class Arg = DefaultExecutionSpace>
void kokkos_free(const void* ptr) {
typedef typename Arg::memory_space MemorySpace;
typedef typename MemorySpace::allocator allocator;
Impl::AllocationTracker tracker = Impl::AllocationTracker::find<allocator>(ptr);
if (tracker.is_valid()) {
Impl::MallocHelper::decrement_ref_count( tracker );
}
}
template< class Arg = DefaultExecutionSpace>
-const void* kokkos_realloc(const void* old_ptr, size_t size) {
+void* kokkos_realloc(const void* old_ptr, size_t size) {
+ if(old_ptr == NULL)
+ return kokkos_malloc<Arg>(size);
+
typedef typename Arg::memory_space MemorySpace;
typedef typename MemorySpace::allocator allocator;
Impl::AllocationTracker tracker = Impl::AllocationTracker::find<allocator>(old_ptr);
tracker.reallocate(size);
return tracker.alloc_ptr();
}
} // namespace Kokkos
+
#endif
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
-namespace Kokkos {
-namespace Experimental {
-
-template< class Space = typename Kokkos::DefaultExecutionSpace::memory_space >
-inline
-void * kokkos_malloc( const size_t arg_alloc_size )
-{
- typedef typename Space::memory_space MemorySpace ;
- typedef Kokkos::Experimental::Impl::SharedAllocationRecord< void , void > RecordBase ;
- typedef Kokkos::Experimental::Impl::SharedAllocationRecord< MemorySpace , void > RecordHost ;
-
- RecordHost * const r = RecordHost::allocate( MemorySpace() , "kokkos_malloc" , arg_alloc_size );
-
- RecordBase::increment( r );
-
- return r->data();
-}
-
-template< class Space = typename Kokkos::DefaultExecutionSpace::memory_space >
-inline
-void kokkos_free( void * arg_alloc )
-{
- typedef typename Space::memory_space MemorySpace ;
- typedef Kokkos::Experimental::Impl::SharedAllocationRecord< void , void > RecordBase ;
- typedef Kokkos::Experimental::Impl::SharedAllocationRecord< MemorySpace , void > RecordHost ;
-
- RecordHost * const r = RecordHost::get_record( arg_alloc );
-
- RecordBase::decrement( r );
-}
-
-template< class Space = typename Kokkos::DefaultExecutionSpace::memory_space >
-inline
-void * kokkos_realloc( void * arg_alloc , const size_t arg_alloc_size )
-{
- typedef typename Space::memory_space MemorySpace ;
- typedef Kokkos::Experimental::Impl::SharedAllocationRecord< void , void > RecordBase ;
- typedef Kokkos::Experimental::Impl::SharedAllocationRecord< MemorySpace , void > RecordHost ;
-
- RecordHost * const r_old = RecordHost::get_record( arg_alloc );
- RecordHost * const r_new = RecordHost::allocate( MemorySpace() , "kokkos_malloc" , arg_alloc_size );
-
- Kokkos::Impl::DeepCopy<MemorySpace,MemorySpace>( r_new->data() , r_old->data()
- , std::min( r_old->size() , r_new->size() ) );
-
- RecordBase::increment( r_new );
- RecordBase::decrement( r_old );
-
- return r_new->data();
-}
-
-} // namespace Experimental
-} // namespace Kokkos
-
#endif
diff --git a/lib/kokkos/core/src/Kokkos_Core_fwd.hpp b/lib/kokkos/core/src/Kokkos_Core_fwd.hpp
index 2cde9299a..7e1888421 100644
--- a/lib/kokkos/core/src/Kokkos_Core_fwd.hpp
+++ b/lib/kokkos/core/src/Kokkos_Core_fwd.hpp
@@ -1,170 +1,198 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_CORE_FWD_HPP
#define KOKKOS_CORE_FWD_HPP
//----------------------------------------------------------------------------
// Kokkos_Macros.hpp does introspection on configuration options
// and compiler environment then sets a collection of #define macros.
#include <Kokkos_Macros.hpp>
+//----------------------------------------------------------------------------
+//----------------------------------------------------------------------------
+
+namespace Kokkos {
+
+struct AUTO_t {
+ KOKKOS_INLINE_FUNCTION
+ constexpr const AUTO_t & operator()() const { return *this ; }
+};
+
+namespace {
+/**\brief Token to indicate that a parameter's value is to be automatically selected */
+constexpr AUTO_t AUTO = Kokkos::AUTO_t();
+}
+}
+
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
// Forward declarations for class inter-relationships
namespace Kokkos {
class HostSpace ; ///< Memory space for main process and CPU execution spaces
+#ifdef KOKKOS_HAVE_HBWSPACE
+namespace Experimental {
+class HBWSpace ; /// Memory space for hbw_malloc from memkind (e.g. for KNL processor)
+}
+#endif
+
#if defined( KOKKOS_HAVE_SERIAL )
class Serial ; ///< Execution space main process on CPU
#endif // defined( KOKKOS_HAVE_SERIAL )
#if defined( KOKKOS_HAVE_PTHREAD )
class Threads ; ///< Execution space with pthreads back-end
#endif
#if defined( KOKKOS_HAVE_OPENMP )
class OpenMP ; ///< OpenMP execution space
#endif
#if defined( KOKKOS_HAVE_CUDA )
class CudaSpace ; ///< Memory space on Cuda GPU
class CudaUVMSpace ; ///< Memory space on Cuda GPU with UVM
class CudaHostPinnedSpace ; ///< Memory space on Host accessible to Cuda GPU
class Cuda ; ///< Execution space for Cuda GPU
#endif
template<class ExecutionSpace, class MemorySpace>
struct Device;
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
// Set the default execution space.
/// Define Kokkos::DefaultExecutionSpace as per configuration option
/// or chosen from the enabled execution spaces in the following order:
/// Kokkos::Cuda, Kokkos::OpenMP, Kokkos::Threads, Kokkos::Serial
namespace Kokkos {
#if defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_CUDA )
typedef Cuda DefaultExecutionSpace ;
#elif defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_OPENMP )
typedef OpenMP DefaultExecutionSpace ;
#elif defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_THREADS )
typedef Threads DefaultExecutionSpace ;
#elif defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_SERIAL )
typedef Serial DefaultExecutionSpace ;
#else
# error "At least one of the following execution spaces must be defined in order to use Kokkos: Kokkos::Cuda, Kokkos::OpenMP, Kokkos::Serial, or Kokkos::Threads."
#endif
#if defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_OPENMP )
typedef OpenMP DefaultHostExecutionSpace ;
#elif defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_THREADS )
typedef Threads DefaultHostExecutionSpace ;
#elif defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_SERIAL )
typedef Serial DefaultHostExecutionSpace ;
#elif defined ( KOKKOS_HAVE_OPENMP )
typedef OpenMP DefaultHostExecutionSpace ;
#elif defined ( KOKKOS_HAVE_PTHREAD )
typedef Threads DefaultHostExecutionSpace ;
#elif defined ( KOKKOS_HAVE_SERIAL )
typedef Serial DefaultHostExecutionSpace ;
#else
# error "At least one of the following execution spaces must be defined in order to use Kokkos: Kokkos::OpenMP, Kokkos::Serial, or Kokkos::Threads."
#endif
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
// Detect the active execution space and define its memory space.
// This is used to verify whether a running kernel can access
// a given memory space.
namespace Kokkos {
namespace Impl {
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_CUDA ) && defined (KOKKOS_HAVE_CUDA)
typedef Kokkos::CudaSpace ActiveExecutionMemorySpace ;
#elif defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
typedef Kokkos::HostSpace ActiveExecutionMemorySpace ;
#else
typedef void ActiveExecutionMemorySpace ;
#endif
template< class ActiveSpace , class MemorySpace >
struct VerifyExecutionCanAccessMemorySpace {
enum {value = 0};
};
template< class Space >
struct VerifyExecutionCanAccessMemorySpace< Space , Space >
{
enum {value = 1};
KOKKOS_INLINE_FUNCTION static void verify(void) {}
KOKKOS_INLINE_FUNCTION static void verify(const void *) {}
};
} // namespace Impl
} // namespace Kokkos
#define KOKKOS_RESTRICT_EXECUTION_TO_DATA( DATA_SPACE , DATA_PTR ) \
Kokkos::Impl::VerifyExecutionCanAccessMemorySpace< \
Kokkos::Impl::ActiveExecutionMemorySpace , DATA_SPACE >::verify( DATA_PTR )
#define KOKKOS_RESTRICT_EXECUTION_TO_( DATA_SPACE ) \
Kokkos::Impl::VerifyExecutionCanAccessMemorySpace< \
Kokkos::Impl::ActiveExecutionMemorySpace , DATA_SPACE >::verify()
+//----------------------------------------------------------------------------
+//----------------------------------------------------------------------------
+
namespace Kokkos {
void fence();
}
+//----------------------------------------------------------------------------
+//----------------------------------------------------------------------------
+
#endif /* #ifndef KOKKOS_CORE_FWD_HPP */
diff --git a/lib/kokkos/core/src/Kokkos_CudaSpace.hpp b/lib/kokkos/core/src/Kokkos_CudaSpace.hpp
index 3caf25053..e6b337eca 100644
--- a/lib/kokkos/core/src/Kokkos_CudaSpace.hpp
+++ b/lib/kokkos/core/src/Kokkos_CudaSpace.hpp
@@ -1,790 +1,853 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_CUDASPACE_HPP
#define KOKKOS_CUDASPACE_HPP
#include <Kokkos_Core_fwd.hpp>
#if defined( KOKKOS_HAVE_CUDA )
#include <iosfwd>
#include <typeinfo>
#include <string>
#include <Kokkos_HostSpace.hpp>
#include <impl/Kokkos_AllocationTracker.hpp>
#include <Cuda/Kokkos_Cuda_abort.hpp>
#include <Cuda/Kokkos_Cuda_BasicAllocators.hpp>
/*--------------------------------------------------------------------------*/
namespace Kokkos {
/** \brief Cuda on-device memory management */
class CudaSpace {
public:
//! Tag this class as a kokkos memory space
typedef CudaSpace memory_space ;
typedef Kokkos::Cuda execution_space ;
typedef Kokkos::Device<execution_space,memory_space> device_type;
typedef unsigned int size_type ;
+ /*--------------------------------*/
+
+#if ! defined( KOKKOS_USING_EXPERIMENTAL_VIEW )
+
typedef Impl::CudaMallocAllocator allocator;
/** \brief Allocate a contiguous block of memory.
*
* The input label is associated with the block of memory.
* The block of memory is tracked via reference counting where
* allocation gives it a reference count of one.
*/
static Impl::AllocationTracker allocate_and_track( const std::string & label, const size_t size );
/*--------------------------------*/
/** \brief Cuda specific function to attached texture object to an allocation.
* Output the texture object, base pointer, and offset from the input pointer.
*/
#if defined( __CUDACC__ )
static void texture_object_attach( Impl::AllocationTracker const & tracker
, unsigned type_size
, ::cudaChannelFormatDesc const & desc
);
#endif
+#endif /* #if ! defined( KOKKOS_USING_EXPERIMENTAL_VIEW ) */
+
/*--------------------------------*/
CudaSpace();
CudaSpace( const CudaSpace & rhs ) = default ;
CudaSpace & operator = ( const CudaSpace & rhs ) = default ;
~CudaSpace() = default ;
- /**\brief Allocate memory in the cuda space */
+ /**\brief Allocate untracked memory in the cuda space */
void * allocate( const size_t arg_alloc_size ) const ;
- /**\brief Deallocate memory in the cuda space */
+ /**\brief Deallocate untracked memory in the cuda space */
void deallocate( void * const arg_alloc_ptr
, const size_t arg_alloc_size ) const ;
/*--------------------------------*/
/** \brief Error reporting for HostSpace attempt to access CudaSpace */
static void access_error();
static void access_error( const void * const );
private:
int m_device ; ///< Which Cuda device
// friend class Kokkos::Experimental::Impl::SharedAllocationRecord< Kokkos::CudaSpace , void > ;
};
namespace Impl {
/// \brief Initialize lock array for arbitrary size atomics.
///
/// Arbitrary atomics are implemented using a hash table of locks
/// where the hash value is derived from the address of the
/// object for which an atomic operation is performed.
/// This function initializes the locks to zero (unset).
void init_lock_array_cuda_space();
/// \brief Retrieve the pointer to the lock array for arbitrary size atomics.
///
/// Arbitrary atomics are implemented using a hash table of locks
/// where the hash value is derived from the address of the
/// object for which an atomic operation is performed.
/// This function retrieves the lock array pointer.
/// If the array is not yet allocated it will do so.
int* lock_array_cuda_space_ptr(bool deallocate = false);
}
} // namespace Kokkos
/*--------------------------------------------------------------------------*/
/*--------------------------------------------------------------------------*/
namespace Kokkos {
/** \brief Cuda memory that is accessible to Host execution space
* through Cuda's unified virtual memory (UVM) runtime.
*/
class CudaUVMSpace {
public:
//! Tag this class as a kokkos memory space
typedef CudaUVMSpace memory_space ;
typedef Cuda execution_space ;
typedef Kokkos::Device<execution_space,memory_space> device_type;
typedef unsigned int size_type ;
/** \brief If UVM capability is available */
static bool available();
+ /*--------------------------------*/
+
+#if ! defined( KOKKOS_USING_EXPERIMENTAL_VIEW )
+
typedef Impl::CudaUVMAllocator allocator;
/** \brief Allocate a contiguous block of memory.
*
* The input label is associated with the block of memory.
* The block of memory is tracked via reference counting where
* allocation gives it a reference count of one.
*/
static Impl::AllocationTracker allocate_and_track( const std::string & label, const size_t size );
/** \brief Cuda specific function to attached texture object to an allocation.
* Output the texture object, base pointer, and offset from the input pointer.
*/
#if defined( __CUDACC__ )
static void texture_object_attach( Impl::AllocationTracker const & tracker
, unsigned type_size
, ::cudaChannelFormatDesc const & desc
);
#endif
+
+#endif /* #if ! defined( KOKKOS_USING_EXPERIMENTAL_VIEW ) */
+
/*--------------------------------*/
CudaUVMSpace();
CudaUVMSpace( const CudaUVMSpace & rhs ) = default ;
CudaUVMSpace & operator = ( const CudaUVMSpace & rhs ) = default ;
~CudaUVMSpace() = default ;
- /**\brief Allocate memory in the cuda space */
+ /**\brief Allocate untracked memory in the cuda space */
void * allocate( const size_t arg_alloc_size ) const ;
- /**\brief Deallocate memory in the cuda space */
+ /**\brief Deallocate untracked memory in the cuda space */
void deallocate( void * const arg_alloc_ptr
, const size_t arg_alloc_size ) const ;
/*--------------------------------*/
private:
int m_device ; ///< Which Cuda device
};
} // namespace Kokkos
/*--------------------------------------------------------------------------*/
/*--------------------------------------------------------------------------*/
namespace Kokkos {
/** \brief Host memory that is accessible to Cuda execution space
* through Cuda's host-pinned memory allocation.
*/
class CudaHostPinnedSpace {
public:
//! Tag this class as a kokkos memory space
/** \brief Memory is in HostSpace so use the HostSpace::execution_space */
typedef HostSpace::execution_space execution_space ;
typedef CudaHostPinnedSpace memory_space ;
typedef Kokkos::Device<execution_space,memory_space> device_type;
typedef unsigned int size_type ;
+ /*--------------------------------*/
+
+#if ! defined( KOKKOS_USING_EXPERIMENTAL_VIEW )
typedef Impl::CudaHostAllocator allocator ;
/** \brief Allocate a contiguous block of memory.
*
* The input label is associated with the block of memory.
* The block of memory is tracked via reference counting where
* allocation gives it a reference count of one.
*/
static Impl::AllocationTracker allocate_and_track( const std::string & label, const size_t size );
+#endif /* #if ! defined( KOKKOS_USING_EXPERIMENTAL_VIEW ) */
+
/*--------------------------------*/
CudaHostPinnedSpace();
CudaHostPinnedSpace( const CudaHostPinnedSpace & rhs ) = default ;
CudaHostPinnedSpace & operator = ( const CudaHostPinnedSpace & rhs ) = default ;
~CudaHostPinnedSpace() = default ;
- /**\brief Allocate memory in the cuda space */
+ /**\brief Allocate untracked memory in the space */
void * allocate( const size_t arg_alloc_size ) const ;
- /**\brief Deallocate memory in the cuda space */
+ /**\brief Deallocate untracked memory in the space */
void deallocate( void * const arg_alloc_ptr
, const size_t arg_alloc_size ) const ;
/*--------------------------------*/
};
} // namespace Kokkos
/*--------------------------------------------------------------------------*/
/*--------------------------------------------------------------------------*/
namespace Kokkos {
namespace Impl {
void DeepCopyAsyncCuda( void * dst , const void * src , size_t n);
template<> struct DeepCopy< CudaSpace , CudaSpace , Cuda>
{
DeepCopy( void * dst , const void * src , size_t );
DeepCopy( const Cuda & , void * dst , const void * src , size_t );
};
template<> struct DeepCopy< CudaSpace , HostSpace , Cuda >
{
DeepCopy( void * dst , const void * src , size_t );
DeepCopy( const Cuda & , void * dst , const void * src , size_t );
};
template<> struct DeepCopy< HostSpace , CudaSpace , Cuda >
{
DeepCopy( void * dst , const void * src , size_t );
DeepCopy( const Cuda & , void * dst , const void * src , size_t );
};
template<class ExecutionSpace> struct DeepCopy< CudaSpace , CudaSpace , ExecutionSpace >
{
inline
DeepCopy( void * dst , const void * src , size_t n )
{ (void) DeepCopy< CudaSpace , CudaSpace , Cuda >( dst , src , n ); }
inline
DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n )
{
exec.fence();
DeepCopyAsyncCuda (dst,src,n);
}
};
template<class ExecutionSpace> struct DeepCopy< CudaSpace , HostSpace , ExecutionSpace >
{
inline
DeepCopy( void * dst , const void * src , size_t n )
{ (void) DeepCopy< CudaSpace , HostSpace , Cuda>( dst , src , n ); }
inline
DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n )
{
exec.fence();
DeepCopyAsyncCuda (dst,src,n);
}
};
template<class ExecutionSpace>
struct DeepCopy< HostSpace , CudaSpace , ExecutionSpace >
{
inline
DeepCopy( void * dst , const void * src , size_t n )
{ (void) DeepCopy< HostSpace , CudaSpace , Cuda >( dst , src , n ); }
inline
DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n )
{
exec.fence();
DeepCopyAsyncCuda (dst,src,n);
}
};
template<class ExecutionSpace>
struct DeepCopy< CudaSpace , CudaUVMSpace , ExecutionSpace >
{
inline
DeepCopy( void * dst , const void * src , size_t n )
{ (void) DeepCopy< CudaSpace , CudaSpace , Cuda >( dst , src , n ); }
inline
DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n )
{
exec.fence();
DeepCopyAsyncCuda (dst,src,n);
}
};
template<class ExecutionSpace>
struct DeepCopy< CudaSpace , CudaHostPinnedSpace , ExecutionSpace>
{
inline
DeepCopy( void * dst , const void * src , size_t n )
{ (void) DeepCopy< CudaSpace , HostSpace , Cuda >( dst , src , n ); }
inline
DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n )
{
exec.fence();
DeepCopyAsyncCuda (dst,src,n);
}
};
template<class ExecutionSpace>
struct DeepCopy< CudaUVMSpace , CudaSpace , ExecutionSpace>
{
inline
DeepCopy( void * dst , const void * src , size_t n )
{ (void) DeepCopy< CudaSpace , CudaSpace , Cuda >( dst , src , n ); }
inline
DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n )
{
exec.fence();
DeepCopyAsyncCuda (dst,src,n);
}
};
template<class ExecutionSpace>
struct DeepCopy< CudaUVMSpace , CudaUVMSpace , ExecutionSpace>
{
inline
DeepCopy( void * dst , const void * src , size_t n )
{ (void) DeepCopy< CudaSpace , CudaSpace , Cuda >( dst , src , n ); }
inline
DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n )
{
exec.fence();
DeepCopyAsyncCuda (dst,src,n);
}
};
template<class ExecutionSpace>
struct DeepCopy< CudaUVMSpace , CudaHostPinnedSpace , ExecutionSpace>
{
inline
DeepCopy( void * dst , const void * src , size_t n )
{ (void) DeepCopy< CudaSpace , HostSpace , Cuda >( dst , src , n ); }
inline
DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n )
{
exec.fence();
DeepCopyAsyncCuda (dst,src,n);
}
};
template<class ExecutionSpace> struct DeepCopy< CudaUVMSpace , HostSpace , ExecutionSpace >
{
inline
DeepCopy( void * dst , const void * src , size_t n )
{ (void) DeepCopy< CudaSpace , HostSpace , Cuda >( dst , src , n ); }
inline
DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n )
{
exec.fence();
DeepCopyAsyncCuda (dst,src,n);
}
};
template<class ExecutionSpace> struct DeepCopy< CudaHostPinnedSpace , CudaSpace , ExecutionSpace >
{
inline
DeepCopy( void * dst , const void * src , size_t n )
{ (void) DeepCopy< HostSpace , CudaSpace , Cuda >( dst , src , n ); }
inline
DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n )
{
exec.fence();
DeepCopyAsyncCuda (dst,src,n);
}
};
template<class ExecutionSpace> struct DeepCopy< CudaHostPinnedSpace , CudaUVMSpace , ExecutionSpace >
{
inline
DeepCopy( void * dst , const void * src , size_t n )
{ (void) DeepCopy< HostSpace , CudaSpace , Cuda >( dst , src , n ); }
inline
DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n )
{
exec.fence();
DeepCopyAsyncCuda (dst,src,n);
}
};
template<class ExecutionSpace> struct DeepCopy< CudaHostPinnedSpace , CudaHostPinnedSpace , ExecutionSpace >
{
inline
DeepCopy( void * dst , const void * src , size_t n )
{ (void) DeepCopy< HostSpace , HostSpace , Cuda >( dst , src , n ); }
inline
DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n )
{
exec.fence();
DeepCopyAsyncCuda (dst,src,n);
}
};
template<class ExecutionSpace> struct DeepCopy< CudaHostPinnedSpace , HostSpace , ExecutionSpace >
{
inline
DeepCopy( void * dst , const void * src , size_t n )
{ (void) DeepCopy< HostSpace , HostSpace , Cuda >( dst , src , n ); }
inline
DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n )
{
exec.fence();
DeepCopyAsyncCuda (dst,src,n);
}
};
template<class ExecutionSpace> struct DeepCopy< HostSpace , CudaUVMSpace , ExecutionSpace >
{
inline
DeepCopy( void * dst , const void * src , size_t n )
{ (void) DeepCopy< HostSpace , CudaSpace , Cuda >( dst , src , n ); }
inline
DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n )
{
exec.fence();
DeepCopyAsyncCuda (dst,src,n);
}
};
template<class ExecutionSpace> struct DeepCopy< HostSpace , CudaHostPinnedSpace , ExecutionSpace >
{
inline
DeepCopy( void * dst , const void * src , size_t n )
{ (void) DeepCopy< HostSpace , HostSpace , Cuda >( dst , src , n ); }
inline
DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n )
{
exec.fence();
DeepCopyAsyncCuda (dst,src,n);
}
};
} // namespace Impl
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
/** Running in CudaSpace attempting to access HostSpace: error */
template<>
struct VerifyExecutionCanAccessMemorySpace< Kokkos::CudaSpace , Kokkos::HostSpace >
{
enum { value = false };
KOKKOS_INLINE_FUNCTION static void verify( void )
{ Kokkos::abort("Cuda code attempted to access HostSpace memory"); }
KOKKOS_INLINE_FUNCTION static void verify( const void * )
{ Kokkos::abort("Cuda code attempted to access HostSpace memory"); }
};
/** Running in CudaSpace accessing CudaUVMSpace: ok */
template<>
struct VerifyExecutionCanAccessMemorySpace< Kokkos::CudaSpace , Kokkos::CudaUVMSpace >
{
enum { value = true };
KOKKOS_INLINE_FUNCTION static void verify( void ) { }
KOKKOS_INLINE_FUNCTION static void verify( const void * ) { }
};
/** Running in CudaSpace accessing CudaHostPinnedSpace: ok */
template<>
struct VerifyExecutionCanAccessMemorySpace< Kokkos::CudaSpace , Kokkos::CudaHostPinnedSpace >
{
enum { value = true };
KOKKOS_INLINE_FUNCTION static void verify( void ) { }
KOKKOS_INLINE_FUNCTION static void verify( const void * ) { }
};
/** Running in CudaSpace attempting to access an unknown space: error */
template< class OtherSpace >
struct VerifyExecutionCanAccessMemorySpace<
typename enable_if< ! is_same<Kokkos::CudaSpace,OtherSpace>::value , Kokkos::CudaSpace >::type ,
OtherSpace >
{
enum { value = false };
KOKKOS_INLINE_FUNCTION static void verify( void )
{ Kokkos::abort("Cuda code attempted to access unknown Space memory"); }
KOKKOS_INLINE_FUNCTION static void verify( const void * )
{ Kokkos::abort("Cuda code attempted to access unknown Space memory"); }
};
//----------------------------------------------------------------------------
/** Running in HostSpace attempting to access CudaSpace */
template<>
struct VerifyExecutionCanAccessMemorySpace< Kokkos::HostSpace , Kokkos::CudaSpace >
{
enum { value = false };
inline static void verify( void ) { CudaSpace::access_error(); }
inline static void verify( const void * p ) { CudaSpace::access_error(p); }
};
/** Running in HostSpace accessing CudaUVMSpace is OK */
template<>
struct VerifyExecutionCanAccessMemorySpace< Kokkos::HostSpace , Kokkos::CudaUVMSpace >
{
enum { value = true };
inline static void verify( void ) { }
inline static void verify( const void * ) { }
};
/** Running in HostSpace accessing CudaHostPinnedSpace is OK */
template<>
struct VerifyExecutionCanAccessMemorySpace< Kokkos::HostSpace , Kokkos::CudaHostPinnedSpace >
{
enum { value = true };
KOKKOS_INLINE_FUNCTION static void verify( void ) {}
KOKKOS_INLINE_FUNCTION static void verify( const void * ) {}
};
} // namespace Impl
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Experimental {
namespace Impl {
template<>
class SharedAllocationRecord< Kokkos::CudaSpace , void >
: public SharedAllocationRecord< void , void >
{
private:
friend class SharedAllocationRecord< Kokkos::CudaUVMSpace , void > ;
typedef SharedAllocationRecord< void , void > RecordBase ;
SharedAllocationRecord( const SharedAllocationRecord & ) = delete ;
SharedAllocationRecord & operator = ( const SharedAllocationRecord & ) = delete ;
static void deallocate( RecordBase * );
static ::cudaTextureObject_t
attach_texture_object( const unsigned sizeof_alias
, void * const alloc_ptr
, const size_t alloc_size );
static RecordBase s_root_record ;
::cudaTextureObject_t m_tex_obj ;
const Kokkos::CudaSpace m_space ;
protected:
~SharedAllocationRecord();
SharedAllocationRecord() : RecordBase(), m_tex_obj(0), m_space() {}
SharedAllocationRecord( const Kokkos::CudaSpace & arg_space
, const std::string & arg_label
, const size_t arg_alloc_size
, const RecordBase::function_type arg_dealloc = & deallocate
);
public:
std::string get_label() const ;
static SharedAllocationRecord * allocate( const Kokkos::CudaSpace & arg_space
, const std::string & arg_label
- , const size_t arg_alloc_size
- );
+ , const size_t arg_alloc_size );
+
+ /**\brief Allocate tracked memory in the space */
+ static
+ void * allocate_tracked( const Kokkos::CudaSpace & arg_space
+ , const std::string & arg_label
+ , const size_t arg_alloc_size );
+
+ /**\brief Reallocate tracked memory in the space */
+ static
+ void * reallocate_tracked( void * const arg_alloc_ptr
+ , const size_t arg_alloc_size );
+
+ /**\brief Deallocate tracked memory in the space */
+ static
+ void deallocate_tracked( void * const arg_alloc_ptr );
+
+ static SharedAllocationRecord * get_record( void * arg_alloc_ptr );
template< typename AliasType >
inline
::cudaTextureObject_t attach_texture_object()
{
static_assert( ( std::is_same< AliasType , int >::value ||
std::is_same< AliasType , ::int2 >::value ||
std::is_same< AliasType , ::int4 >::value )
, "Cuda texture fetch only supported for alias types of int, ::int2, or ::int4" );
if ( m_tex_obj == 0 ) {
m_tex_obj = attach_texture_object( sizeof(AliasType)
, (void*) RecordBase::m_alloc_ptr
, RecordBase::m_alloc_size );
}
return m_tex_obj ;
}
template< typename AliasType >
inline
int attach_texture_object_offset( const AliasType * const ptr )
{
// Texture object is attached to the entire allocation range
return ptr - reinterpret_cast<AliasType*>( RecordBase::m_alloc_ptr );
}
- static SharedAllocationRecord * get_record( void * arg_alloc_ptr );
-
static void print_records( std::ostream & , const Kokkos::CudaSpace & , bool detail = false );
};
template<>
class SharedAllocationRecord< Kokkos::CudaUVMSpace , void >
: public SharedAllocationRecord< void , void >
{
private:
typedef SharedAllocationRecord< void , void > RecordBase ;
SharedAllocationRecord( const SharedAllocationRecord & ) = delete ;
SharedAllocationRecord & operator = ( const SharedAllocationRecord & ) = delete ;
static void deallocate( RecordBase * );
static RecordBase s_root_record ;
::cudaTextureObject_t m_tex_obj ;
const Kokkos::CudaUVMSpace m_space ;
protected:
~SharedAllocationRecord();
SharedAllocationRecord() : RecordBase(), m_tex_obj(0), m_space() {}
SharedAllocationRecord( const Kokkos::CudaUVMSpace & arg_space
, const std::string & arg_label
, const size_t arg_alloc_size
, const RecordBase::function_type arg_dealloc = & deallocate
);
public:
std::string get_label() const ;
static SharedAllocationRecord * allocate( const Kokkos::CudaUVMSpace & arg_space
, const std::string & arg_label
, const size_t arg_alloc_size
);
+ /**\brief Allocate tracked memory in the space */
+ static
+ void * allocate_tracked( const Kokkos::CudaUVMSpace & arg_space
+ , const std::string & arg_label
+ , const size_t arg_alloc_size );
+
+ /**\brief Reallocate tracked memory in the space */
+ static
+ void * reallocate_tracked( void * const arg_alloc_ptr
+ , const size_t arg_alloc_size );
+
+ /**\brief Deallocate tracked memory in the space */
+ static
+ void deallocate_tracked( void * const arg_alloc_ptr );
+
+ static SharedAllocationRecord * get_record( void * arg_alloc_ptr );
+
+
template< typename AliasType >
inline
::cudaTextureObject_t attach_texture_object()
{
static_assert( ( std::is_same< AliasType , int >::value ||
std::is_same< AliasType , ::int2 >::value ||
std::is_same< AliasType , ::int4 >::value )
, "Cuda texture fetch only supported for alias types of int, ::int2, or ::int4" );
if ( m_tex_obj == 0 ) {
m_tex_obj = SharedAllocationRecord< Kokkos::CudaSpace , void >::
attach_texture_object( sizeof(AliasType)
, (void*) RecordBase::m_alloc_ptr
, RecordBase::m_alloc_size );
}
return m_tex_obj ;
}
template< typename AliasType >
inline
int attach_texture_object_offset( const AliasType * const ptr )
{
// Texture object is attached to the entire allocation range
return ptr - reinterpret_cast<AliasType*>( RecordBase::m_alloc_ptr );
}
- static SharedAllocationRecord * get_record( void * arg_alloc_ptr );
-
static void print_records( std::ostream & , const Kokkos::CudaUVMSpace & , bool detail = false );
};
template<>
class SharedAllocationRecord< Kokkos::CudaHostPinnedSpace , void >
: public SharedAllocationRecord< void , void >
{
private:
typedef SharedAllocationRecord< void , void > RecordBase ;
SharedAllocationRecord( const SharedAllocationRecord & ) = delete ;
SharedAllocationRecord & operator = ( const SharedAllocationRecord & ) = delete ;
static void deallocate( RecordBase * );
static RecordBase s_root_record ;
const Kokkos::CudaHostPinnedSpace m_space ;
protected:
~SharedAllocationRecord();
SharedAllocationRecord() : RecordBase(), m_space() {}
SharedAllocationRecord( const Kokkos::CudaHostPinnedSpace & arg_space
, const std::string & arg_label
, const size_t arg_alloc_size
, const RecordBase::function_type arg_dealloc = & deallocate
);
public:
std::string get_label() const ;
static SharedAllocationRecord * allocate( const Kokkos::CudaHostPinnedSpace & arg_space
, const std::string & arg_label
, const size_t arg_alloc_size
);
+ /**\brief Allocate tracked memory in the space */
+ static
+ void * allocate_tracked( const Kokkos::CudaHostPinnedSpace & arg_space
+ , const std::string & arg_label
+ , const size_t arg_alloc_size );
+
+ /**\brief Reallocate tracked memory in the space */
+ static
+ void * reallocate_tracked( void * const arg_alloc_ptr
+ , const size_t arg_alloc_size );
+
+ /**\brief Deallocate tracked memory in the space */
+ static
+ void deallocate_tracked( void * const arg_alloc_ptr );
+
static SharedAllocationRecord * get_record( void * arg_alloc_ptr );
static void print_records( std::ostream & , const Kokkos::CudaHostPinnedSpace & , bool detail = false );
};
} // namespace Impl
} // namespace Experimental
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#endif /* #if defined( KOKKOS_HAVE_CUDA ) */
#endif /* #define KOKKOS_CUDASPACE_HPP */
diff --git a/lib/kokkos/core/src/Kokkos_ExecPolicy.hpp b/lib/kokkos/core/src/Kokkos_ExecPolicy.hpp
index 807cb5cb4..4f6f0f09c 100644
--- a/lib/kokkos/core/src/Kokkos_ExecPolicy.hpp
+++ b/lib/kokkos/core/src/Kokkos_ExecPolicy.hpp
@@ -1,497 +1,542 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_EXECPOLICY_HPP
#define KOKKOS_EXECPOLICY_HPP
#include <Kokkos_Core_fwd.hpp>
#include <impl/Kokkos_Traits.hpp>
#include <impl/Kokkos_StaticAssert.hpp>
#include <impl/Kokkos_Tags.hpp>
//----------------------------------------------------------------------------
namespace Kokkos {
/** \brief Execution policy for work over a range of an integral type.
*
* Valid template argument options:
*
* With a specified execution space:
* < ExecSpace , WorkTag , { IntConst | IntType } >
* < ExecSpace , WorkTag , void >
* < ExecSpace , { IntConst | IntType } , void >
* < ExecSpace , void , void >
*
* With the default execution space:
* < WorkTag , { IntConst | IntType } , void >
* < WorkTag , void , void >
* < { IntConst | IntType } , void , void >
* < void , void , void >
*
* IntType is a fundamental integral type
* IntConst is an Impl::integral_constant< IntType , Blocking >
*
* Blocking is the granularity of partitioning the range among threads.
*/
template< class Arg0 = void , class Arg1 = void , class Arg2 = void
, class ExecSpace =
// The first argument is the execution space,
// otherwise use the default execution space.
- typename Impl::if_c< Impl::is_execution_space< Arg0 >::value , Arg0
- , Kokkos::DefaultExecutionSpace >::type
+ typename std::conditional
+ < Impl::is_execution_space< Arg0 >::value , Arg0
+ , Kokkos::DefaultExecutionSpace >::type
>
class RangePolicy {
private:
// Default integral type and blocking factor:
typedef int DefaultIntType ;
enum { DefaultIntValue = 8 };
enum { Arg0_Void = Impl::is_same< Arg0 , void >::value };
enum { Arg1_Void = Impl::is_same< Arg1 , void >::value };
enum { Arg2_Void = Impl::is_same< Arg2 , void >::value };
enum { Arg0_ExecSpace = Impl::is_execution_space< Arg0 >::value };
enum { Arg0_IntConst = Impl::is_integral_constant< Arg0 >::value };
enum { Arg1_IntConst = Impl::is_integral_constant< Arg1 >::value };
enum { Arg2_IntConst = Impl::is_integral_constant< Arg2 >::value };
enum { Arg0_IntType = Impl::is_integral< Arg0 >::value };
enum { Arg1_IntType = Impl::is_integral< Arg1 >::value };
enum { Arg2_IntType = Impl::is_integral< Arg2 >::value };
enum { Arg0_WorkTag = ! Arg0_ExecSpace && ! Arg0_IntConst && ! Arg0_IntType && ! Arg0_Void };
enum { Arg1_WorkTag = Arg0_ExecSpace && ! Arg1_IntConst && ! Arg1_IntType && ! Arg1_Void };
enum { ArgOption_OK = Impl::StaticAssert< (
( Arg0_ExecSpace && Arg1_WorkTag && ( Arg2_IntConst || Arg2_IntType ) ) ||
( Arg0_ExecSpace && Arg1_WorkTag && Arg2_Void ) ||
( Arg0_ExecSpace && ( Arg1_IntConst || Arg1_IntType ) && Arg2_Void ) ||
( Arg0_ExecSpace && Arg1_Void && Arg2_Void ) ||
( Arg0_WorkTag && ( Arg1_IntConst || Arg1_IntType ) && Arg2_Void ) ||
( Arg0_WorkTag && Arg1_Void && Arg2_Void ) ||
( ( Arg0_IntConst || Arg0_IntType ) && Arg1_Void && Arg2_Void ) ||
( Arg0_Void && Arg1_Void && Arg2_Void )
) >::value };
// The work argument tag is the first or second argument
- typedef typename Impl::if_c< Arg0_WorkTag , Arg0 ,
- typename Impl::if_c< Arg1_WorkTag , Arg1 , void
+ typedef typename std::conditional< Arg0_WorkTag , Arg0 ,
+ typename std::conditional< Arg1_WorkTag , Arg1 , void
>::type >::type
WorkTag ;
enum { Granularity = Arg0_IntConst ? unsigned(Impl::is_integral_constant<Arg0>::integral_value) : (
Arg1_IntConst ? unsigned(Impl::is_integral_constant<Arg1>::integral_value) : (
Arg2_IntConst ? unsigned(Impl::is_integral_constant<Arg2>::integral_value) : (
unsigned(DefaultIntValue) ))) };
// Only accept the integral type if the blocking is a power of two
- typedef typename Impl::enable_if< Impl::is_power_of_two< Granularity >::value ,
- typename Impl::if_c< Arg0_IntType , Arg0 ,
- typename Impl::if_c< Arg1_IntType , Arg1 ,
- typename Impl::if_c< Arg2_IntType , Arg2 ,
- typename Impl::if_c< Arg0_IntConst , typename Impl::is_integral_constant<Arg0>::integral_type ,
- typename Impl::if_c< Arg1_IntConst , typename Impl::is_integral_constant<Arg1>::integral_type ,
- typename Impl::if_c< Arg2_IntConst , typename Impl::is_integral_constant<Arg2>::integral_type ,
- DefaultIntType
- >::type >::type >::type
- >::type >::type >::type
- >::type
+ static_assert( Impl::is_integral_power_of_two( Granularity )
+ , "RangePolicy blocking granularity must be power of two" );
+
+ typedef typename std::conditional< Arg0_IntType , Arg0 ,
+ typename std::conditional< Arg1_IntType , Arg1 ,
+ typename std::conditional< Arg2_IntType , Arg2 ,
+ typename std::conditional< Arg0_IntConst , typename Impl::is_integral_constant<Arg0>::integral_type ,
+ typename std::conditional< Arg1_IntConst , typename Impl::is_integral_constant<Arg1>::integral_type ,
+ typename std::conditional< Arg2_IntConst , typename Impl::is_integral_constant<Arg2>::integral_type ,
+ DefaultIntType
+ >::type >::type >::type
+ >::type >::type >::type
IntType ;
enum { GranularityMask = IntType(Granularity) - 1 };
ExecSpace m_space ;
IntType m_begin ;
IntType m_end ;
public:
//! Tag this class as an execution policy
typedef ExecSpace execution_space ;
typedef RangePolicy execution_policy ;
typedef WorkTag work_tag ;
typedef IntType member_type ;
KOKKOS_INLINE_FUNCTION const execution_space & space() const { return m_space ; }
KOKKOS_INLINE_FUNCTION member_type begin() const { return m_begin ; }
KOKKOS_INLINE_FUNCTION member_type end() const { return m_end ; }
inline RangePolicy() : m_space(), m_begin(0), m_end(0) {}
/** \brief Total range */
inline
RangePolicy( const member_type work_begin
, const member_type work_end
)
: m_space()
, m_begin( work_begin < work_end ? work_begin : 0 )
, m_end( work_begin < work_end ? work_end : 0 )
{}
/** \brief Total range */
inline
RangePolicy( const execution_space & work_space
, const member_type work_begin
, const member_type work_end
)
: m_space( work_space )
, m_begin( work_begin < work_end ? work_begin : 0 )
, m_end( work_begin < work_end ? work_end : 0 )
{}
/** \brief Subrange for a partition's rank and size.
*
* Typically used to partition a range over a group of threads.
*/
struct WorkRange {
- typedef RangePolicy::work_tag work_tag ;
- typedef RangePolicy::member_type member_type ;
+ typedef typename RangePolicy::work_tag work_tag ;
+ typedef typename RangePolicy::member_type member_type ;
KOKKOS_INLINE_FUNCTION member_type begin() const { return m_begin ; }
KOKKOS_INLINE_FUNCTION member_type end() const { return m_end ; }
/** \brief Subrange for a partition's rank and size.
*
* Typically used to partition a range over a group of threads.
*/
KOKKOS_INLINE_FUNCTION
WorkRange( const RangePolicy & range
, const int part_rank
, const int part_size
)
: m_begin(0), m_end(0)
{
if ( part_size ) {
// Split evenly among partitions, then round up to the granularity.
const member_type work_part =
( ( ( ( range.end() - range.begin() ) + ( part_size - 1 ) ) / part_size )
+ GranularityMask ) & ~member_type(GranularityMask);
m_begin = range.begin() + work_part * part_rank ;
m_end = m_begin + work_part ;
if ( range.end() < m_begin ) m_begin = range.end() ;
if ( range.end() < m_end ) m_end = range.end() ;
}
}
private:
member_type m_begin ;
member_type m_end ;
WorkRange();
WorkRange & operator = ( const WorkRange & );
};
};
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
+namespace Experimental {
+
+/** \brief Scratch memory request accepting per team and per thread value
+ *
+ * An instance of this class can be given as the last argument to a
+ * TeamPolicy constructor. It sets the amount of user requested shared
+ * memory for the team.
+ */
+
+template< class MemorySpace >
+class TeamScratchRequest {
+ size_t m_per_team;
+ size_t m_per_thread;
+
+public:
+ TeamScratchRequest(size_t per_team_, size_t per_thread_ = 0):
+ m_per_team(per_team_), m_per_thread(per_thread_) {
+ }
+
+ size_t per_team() const {
+ return m_per_team;
+ }
+ size_t per_thread() const {
+ return m_per_thread;
+ }
+ size_t total(const size_t team_size) const {
+ return m_per_team + m_per_thread * team_size;
+ }
+};
+
+}
+
/** \brief Execution policy for parallel work over a league of teams of threads.
*
* The work functor is called for each thread of each team such that
* the team's member threads are guaranteed to be concurrent.
*
* The team's threads have access to team shared scratch memory and
* team collective operations.
*
* If the WorkTag is non-void then the first calling argument of the
* work functor's parentheses operator is 'const WorkTag &'.
* This allows a functor to have multiple work member functions.
*
* template argument option with specified execution space:
* < ExecSpace , WorkTag >
* < ExecSpace , void >
*
* template argument option with default execution space:
* < WorkTag , void >
* < void , void >
*/
template< class Arg0 = void
, class Arg1 = void
, class ExecSpace =
// If the first argument is not an execution
// then use the default execution space.
- typename Impl::if_c< Impl::is_execution_space< Arg0 >::value , Arg0
- , Kokkos::DefaultExecutionSpace >::type
+ typename std::conditional
+ < Impl::is_execution_space< Arg0 >::value , Arg0
+ , Kokkos::DefaultExecutionSpace >::type
>
class TeamPolicy {
private:
enum { Arg0_ExecSpace = Impl::is_execution_space< Arg0 >::value };
enum { Arg1_Void = Impl::is_same< Arg1 , void >::value };
enum { ArgOption_OK = Impl::StaticAssert< ( Arg0_ExecSpace || Arg1_Void ) >::value };
- typedef typename Impl::if_c< Arg0_ExecSpace , Arg1 , Arg0 >::type WorkTag ;
+ typedef typename std::conditional< Arg0_ExecSpace , Arg1 , Arg0 >::type WorkTag ;
public:
//! Tag this class as an execution policy
typedef TeamPolicy execution_policy ;
typedef ExecSpace execution_space ;
typedef WorkTag work_tag ;
//----------------------------------------
/** \brief Query maximum team size for a given functor.
*
* This size takes into account execution space concurrency limitations and
* scratch memory space limitations for reductions, team reduce/scan, and
* team shared memory.
*/
template< class FunctorType >
static int team_size_max( const FunctorType & );
/** \brief Query recommended team size for a given functor.
*
* This size takes into account execution space concurrency limitations and
* scratch memory space limitations for reductions, team reduce/scan, and
* team shared memory.
*/
template< class FunctorType >
static int team_size_recommended( const FunctorType & );
template< class FunctorType >
static int team_size_recommended( const FunctorType & , const int&);
//----------------------------------------
/** \brief Construct policy with the given instance of the execution space */
- TeamPolicy( const execution_space & , int league_size_request , int team_size_request );
+ TeamPolicy( const execution_space & , int league_size_request , int team_size_request , int vector_length_request = 1 );
+
+ TeamPolicy( const execution_space & , int league_size_request , const Kokkos::AUTO_t & , int vector_length_request = 1 );
/** \brief Construct policy with the default instance of the execution space */
- TeamPolicy( int league_size_request , int team_size_request );
+ TeamPolicy( int league_size_request , int team_size_request , int vector_length_request = 1 );
+
+ TeamPolicy( int league_size_request , const Kokkos::AUTO_t & , int vector_length_request = 1 );
+
+ template<class MemorySpace>
+ TeamPolicy( int league_size_request , int team_size_request , const Experimental::TeamScratchRequest<MemorySpace>& team_scratch_memory_request );
+
+ template<class MemorySpace>
+ TeamPolicy( int league_size_request , const Kokkos::AUTO_t & , const Experimental::TeamScratchRequest<MemorySpace>& team_scratch_memory_request );
/** \brief The actual league size (number of teams) of the policy.
*
* This may be smaller than the requested league size due to limitations
* of the execution space.
*/
KOKKOS_INLINE_FUNCTION int league_size() const ;
/** \brief The actual team size (number of threads per team) of the policy.
*
* This may be smaller than the requested team size due to limitations
* of the execution space.
*/
KOKKOS_INLINE_FUNCTION int team_size() const ;
/** \brief Parallel execution of a functor calls the functor once with
* each member of the execution policy.
*/
struct member_type {
/** \brief Handle to the currently executing team shared scratch memory */
KOKKOS_INLINE_FUNCTION
typename execution_space::scratch_memory_space team_shmem() const ;
/** \brief Rank of this team within the league of teams */
KOKKOS_INLINE_FUNCTION int league_rank() const ;
/** \brief Number of teams in the league */
KOKKOS_INLINE_FUNCTION int league_size() const ;
/** \brief Rank of this thread within this team */
KOKKOS_INLINE_FUNCTION int team_rank() const ;
/** \brief Number of threads in this team */
KOKKOS_INLINE_FUNCTION int team_size() const ;
/** \brief Barrier among the threads of this team */
KOKKOS_INLINE_FUNCTION void team_barrier() const ;
/** \brief Intra-team reduction. Returns join of all values of the team members. */
template< class JoinOp >
KOKKOS_INLINE_FUNCTION
typename JoinOp::value_type team_reduce( const typename JoinOp::value_type
, const JoinOp & ) const ;
/** \brief Intra-team exclusive prefix sum with team_rank() ordering.
*
* The highest rank thread can compute the reduction total as
* reduction_total = dev.team_scan( value ) + value ;
*/
template< typename Type >
KOKKOS_INLINE_FUNCTION Type team_scan( const Type & value ) const ;
/** \brief Intra-team exclusive prefix sum with team_rank() ordering
* with intra-team non-deterministic ordering accumulation.
*
* The global inter-team accumulation value will, at the end of the
* league's parallel execution, be the scan's total.
* Parallel execution ordering of the league's teams is non-deterministic.
* As such the base value for each team's scan operation is similarly
* non-deterministic.
*/
template< typename Type >
KOKKOS_INLINE_FUNCTION Type team_scan( const Type & value , Type * const global_accum ) const ;
};
};
} // namespace Kokkos
namespace Kokkos {
namespace Impl {
template<typename iType, class TeamMemberType>
struct TeamThreadRangeBoundariesStruct {
private:
KOKKOS_INLINE_FUNCTION static
iType ibegin( const iType & arg_begin
, const iType & arg_end
, const iType & arg_rank
, const iType & arg_size
)
{
return arg_begin + ( ( arg_end - arg_begin + arg_size - 1 ) / arg_size ) * arg_rank ;
}
KOKKOS_INLINE_FUNCTION static
iType iend( const iType & arg_begin
, const iType & arg_end
, const iType & arg_rank
, const iType & arg_size
)
{
const iType end_ = arg_begin + ( ( arg_end - arg_begin + arg_size - 1 ) / arg_size ) * ( arg_rank + 1 );
return end_ < arg_end ? end_ : arg_end ;
}
public:
typedef iType index_type;
const iType start;
const iType end;
enum {increment = 1};
const TeamMemberType& thread;
KOKKOS_INLINE_FUNCTION
TeamThreadRangeBoundariesStruct( const TeamMemberType& arg_thread
, const iType& arg_end
)
: start( ibegin( 0 , arg_end , arg_thread.team_rank() , arg_thread.team_size() ) )
, end( iend( 0 , arg_end , arg_thread.team_rank() , arg_thread.team_size() ) )
, thread( arg_thread )
{}
KOKKOS_INLINE_FUNCTION
TeamThreadRangeBoundariesStruct( const TeamMemberType& arg_thread
, const iType& arg_begin
, const iType& arg_end
)
: start( ibegin( arg_begin , arg_end , arg_thread.team_rank() , arg_thread.team_size() ) )
, end( iend( arg_begin , arg_end , arg_thread.team_rank() , arg_thread.team_size() ) )
, thread( arg_thread )
{}
};
template<typename iType, class TeamMemberType>
struct ThreadVectorRangeBoundariesStruct {
typedef iType index_type;
enum {start = 0};
const iType end;
enum {increment = 1};
KOKKOS_INLINE_FUNCTION
ThreadVectorRangeBoundariesStruct (const TeamMemberType& thread, const iType& count):
end( count )
{}
};
template<class TeamMemberType>
struct ThreadSingleStruct {
const TeamMemberType& team_member;
KOKKOS_INLINE_FUNCTION
ThreadSingleStruct(const TeamMemberType& team_member_):team_member(team_member_){}
};
template<class TeamMemberType>
struct VectorSingleStruct {
const TeamMemberType& team_member;
KOKKOS_INLINE_FUNCTION
VectorSingleStruct(const TeamMemberType& team_member_):team_member(team_member_){}
};
} // namespace Impl
/** \brief Execution policy for parallel work over a threads within a team.
*
* The range is split over all threads in a team. The Mapping scheme depends on the architecture.
* This policy is used together with a parallel pattern as a nested layer within a kernel launched
* with the TeamPolicy. This variant expects a single count. So the range is (0,count].
*/
template<typename iType, class TeamMemberType>
KOKKOS_INLINE_FUNCTION
Impl::TeamThreadRangeBoundariesStruct<iType,TeamMemberType> TeamThreadRange(const TeamMemberType&, const iType& count);
/** \brief Execution policy for parallel work over a threads within a team.
*
* The range is split over all threads in a team. The Mapping scheme depends on the architecture.
* This policy is used together with a parallel pattern as a nested layer within a kernel launched
* with the TeamPolicy. This variant expects a begin and end. So the range is (begin,end].
*/
template<typename iType, class TeamMemberType>
KOKKOS_INLINE_FUNCTION
Impl::TeamThreadRangeBoundariesStruct<iType,TeamMemberType> TeamThreadRange(const TeamMemberType&, const iType& begin, const iType& end);
/** \brief Execution policy for a vector parallel loop.
*
* The range is split over all vector lanes in a thread. The Mapping scheme depends on the architecture.
* This policy is used together with a parallel pattern as a nested layer within a kernel launched
* with the TeamPolicy. This variant expects a single count. So the range is (0,count].
*/
template<typename iType, class TeamMemberType>
KOKKOS_INLINE_FUNCTION
Impl::ThreadVectorRangeBoundariesStruct<iType,TeamMemberType> ThreadVectorRange(const TeamMemberType&, const iType& count);
} // namespace Kokkos
#endif /* #define KOKKOS_EXECPOLICY_HPP */
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
diff --git a/lib/kokkos/core/src/Kokkos_HostSpace.hpp b/lib/kokkos/core/src/Kokkos_HBWSpace.hpp
similarity index 67%
copy from lib/kokkos/core/src/Kokkos_HostSpace.hpp
copy to lib/kokkos/core/src/Kokkos_HBWSpace.hpp
index 2aa809e7c..94988e60b 100644
--- a/lib/kokkos/core/src/Kokkos_HostSpace.hpp
+++ b/lib/kokkos/core/src/Kokkos_HBWSpace.hpp
@@ -1,276 +1,327 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
-#ifndef KOKKOS_HOSTSPACE_HPP
-#define KOKKOS_HOSTSPACE_HPP
+#ifndef KOKKOS_HBWSPACE_HPP
+#define KOKKOS_HBWSPACE_HPP
-#include <cstring>
-#include <string>
-#include <iosfwd>
-#include <typeinfo>
-#include <Kokkos_Core_fwd.hpp>
-#include <Kokkos_MemoryTraits.hpp>
-
-#include <impl/Kokkos_Traits.hpp>
-#include <impl/Kokkos_Error.hpp>
-
-#include <impl/Kokkos_AllocationTracker.hpp>
-#include <impl/Kokkos_BasicAllocators.hpp>
-
-#include <impl/KokkosExp_SharedAlloc.hpp>
+#include <Kokkos_HostSpace.hpp>
+#include <impl/Kokkos_HBWAllocators.hpp>
/*--------------------------------------------------------------------------*/
+#ifdef KOKKOS_HAVE_HBWSPACE
namespace Kokkos {
+namespace Experimental {
namespace Impl {
/// \brief Initialize lock array for arbitrary size atomics.
///
/// Arbitrary atomics are implemented using a hash table of locks
/// where the hash value is derived from the address of the
/// object for which an atomic operation is performed.
/// This function initializes the locks to zero (unset).
-void init_lock_array_host_space();
+void init_lock_array_hbw_space();
/// \brief Aquire a lock for the address
///
/// This function tries to aquire the lock for the hash value derived
/// from the provided ptr. If the lock is successfully aquired the
/// function returns true. Otherwise it returns false.
-bool lock_address_host_space(void* ptr);
+bool lock_address_hbw_space(void* ptr);
/// \brief Release lock for the address
///
/// This function releases the lock for the hash value derived
/// from the provided ptr. This function should only be called
/// after previously successfully aquiring a lock with
/// lock_address.
-void unlock_address_host_space(void* ptr);
+void unlock_address_hbw_space(void* ptr);
} // namespace Impl
+} // neamspace Experimental
} // namespace Kokkos
namespace Kokkos {
+namespace Experimental {
-/// \class HostSpace
+/// \class HBWSpace
/// \brief Memory management for host memory.
///
-/// HostSpace is a memory space that governs host memory. "Host"
+/// HBWSpace is a memory space that governs host memory. "Host"
/// memory means the usual CPU-accessible memory.
-class HostSpace {
+class HBWSpace {
public:
//! Tag this class as a kokkos memory space
- typedef HostSpace memory_space ;
+ typedef HBWSpace memory_space ;
typedef size_t size_type ;
/// \typedef execution_space
/// \brief Default execution space for this memory space.
///
/// Every memory space has a default execution space. This is
/// useful for things like initializing a View (which happens in
/// parallel using the View's default execution space).
#if defined( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_OPENMP )
typedef Kokkos::OpenMP execution_space ;
#elif defined( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_THREADS )
typedef Kokkos::Threads execution_space ;
#elif defined( KOKKOS_HAVE_OPENMP )
typedef Kokkos::OpenMP execution_space ;
#elif defined( KOKKOS_HAVE_PTHREAD )
typedef Kokkos::Threads execution_space ;
#elif defined( KOKKOS_HAVE_SERIAL )
typedef Kokkos::Serial execution_space ;
#else
# error "At least one of the following host execution spaces must be defined: Kokkos::OpenMP, Kokkos::Serial, or Kokkos::Threads. You might be seeing this message if you disabled the Kokkos::Serial device explicitly using the Kokkos_ENABLE_Serial:BOOL=OFF CMake option, but did not enable any of the other host execution space devices."
#endif
//! This memory space preferred device_type
typedef Kokkos::Device<execution_space,memory_space> device_type;
+ /*--------------------------------*/
+#if ! defined( KOKKOS_USING_EXPERIMENTAL_VIEW )
-#if defined( KOKKOS_USE_PAGE_ALIGNED_HOST_MEMORY )
- typedef Impl::PageAlignedAllocator allocator ;
-#else
- typedef Impl::AlignedAllocator allocator ;
-#endif
+ typedef Impl::HBWMallocAllocator allocator ;
/** \brief Allocate a contiguous block of memory.
*
* The input label is associated with the block of memory.
* The block of memory is tracked via reference counting where
* allocation gives it a reference count of one.
*/
- static Impl::AllocationTracker allocate_and_track( const std::string & label, const size_t size );
+ static Kokkos::Impl::AllocationTracker allocate_and_track( const std::string & label, const size_t size );
+
+#endif /* #if ! defined( KOKKOS_USING_EXPERIMENTAL_VIEW ) */
/*--------------------------------*/
- /* Functions unique to the HostSpace */
+ /* Functions unique to the HBWSpace */
static int in_parallel();
static void register_in_parallel( int (*)() );
/*--------------------------------*/
/**\brief Default memory space instance */
- HostSpace();
- HostSpace( const HostSpace & rhs ) = default ;
- HostSpace & operator = ( const HostSpace & ) = default ;
- ~HostSpace() = default ;
+ HBWSpace();
+ HBWSpace( const HBWSpace & rhs ) = default ;
+ HBWSpace & operator = ( const HBWSpace & ) = default ;
+ ~HBWSpace() = default ;
/**\brief Non-default memory space instance to choose allocation mechansim, if available */
enum AllocationMechanism { STD_MALLOC , POSIX_MEMALIGN , POSIX_MMAP , INTEL_MM_ALLOC };
explicit
- HostSpace( const AllocationMechanism & );
+ HBWSpace( const AllocationMechanism & );
- /**\brief Allocate memory in the host space */
+ /**\brief Allocate untracked memory in the space */
void * allocate( const size_t arg_alloc_size ) const ;
- /**\brief Deallocate memory in the host space */
+ /**\brief Deallocate untracked memory in the space */
void deallocate( void * const arg_alloc_ptr
, const size_t arg_alloc_size ) const ;
private:
AllocationMechanism m_alloc_mech ;
- friend class Kokkos::Experimental::Impl::SharedAllocationRecord< Kokkos::HostSpace , void > ;
+ friend class Kokkos::Experimental::Impl::SharedAllocationRecord< Kokkos::Experimental::HBWSpace , void > ;
};
+} // namespace Experimental
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Experimental {
namespace Impl {
template<>
-class SharedAllocationRecord< Kokkos::HostSpace , void >
+class SharedAllocationRecord< Kokkos::Experimental::HBWSpace , void >
: public SharedAllocationRecord< void , void >
{
private:
- friend Kokkos::HostSpace ;
+ friend Kokkos::Experimental::HBWSpace ;
typedef SharedAllocationRecord< void , void > RecordBase ;
SharedAllocationRecord( const SharedAllocationRecord & ) = delete ;
SharedAllocationRecord & operator = ( const SharedAllocationRecord & ) = delete ;
static void deallocate( RecordBase * );
- /**\brief Root record for tracked allocations from this HostSpace instance */
+ /**\brief Root record for tracked allocations from this HBWSpace instance */
static RecordBase s_root_record ;
- const Kokkos::HostSpace m_space ;
+ const Kokkos::Experimental::HBWSpace m_space ;
protected:
~SharedAllocationRecord();
SharedAllocationRecord() = default ;
- SharedAllocationRecord( const Kokkos::HostSpace & arg_space
+ SharedAllocationRecord( const Kokkos::Experimental::HBWSpace & arg_space
, const std::string & arg_label
, const size_t arg_alloc_size
, const RecordBase::function_type arg_dealloc = & deallocate
);
public:
inline
std::string get_label() const
{
return std::string( RecordBase::head()->m_label );
}
KOKKOS_INLINE_FUNCTION static
- SharedAllocationRecord * allocate( const Kokkos::HostSpace & arg_space
+ SharedAllocationRecord * allocate( const Kokkos::Experimental::HBWSpace & arg_space
, const std::string & arg_label
, const size_t arg_alloc_size
)
{
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
return new SharedAllocationRecord( arg_space , arg_label , arg_alloc_size );
#else
return (SharedAllocationRecord *) 0 ;
#endif
}
+ /**\brief Allocate tracked memory in the space */
+ static
+ void * allocate_tracked( const Kokkos::Experimental::HBWSpace & arg_space
+ , const std::string & arg_label
+ , const size_t arg_alloc_size );
+
+ /**\brief Reallocate tracked memory in the space */
+ static
+ void * reallocate_tracked( void * const arg_alloc_ptr
+ , const size_t arg_alloc_size );
+
+ /**\brief Deallocate tracked memory in the space */
+ static
+ void deallocate_tracked( void * const arg_alloc_ptr );
+
static SharedAllocationRecord * get_record( void * arg_alloc_ptr );
- static void print_records( std::ostream & , const Kokkos::HostSpace & , bool detail = false );
+ static void print_records( std::ostream & , const Kokkos::Experimental::HBWSpace & , bool detail = false );
};
} // namespace Impl
} // namespace Experimental
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
-template< class DstSpace, class SrcSpace, class ExecutionSpace = typename DstSpace::execution_space> struct DeepCopy ;
template<class ExecutionSpace>
-struct DeepCopy<HostSpace,HostSpace,ExecutionSpace> {
+struct DeepCopy<Experimental::HBWSpace,Experimental::HBWSpace,ExecutionSpace> {
+ DeepCopy( void * dst , const void * src , size_t n ) {
+ memcpy( dst , src , n );
+ }
+ DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n ) {
+ exec.fence();
+ memcpy( dst , src , n );
+ }
+};
+
+template<class ExecutionSpace>
+struct DeepCopy<HostSpace,Experimental::HBWSpace,ExecutionSpace> {
+ DeepCopy( void * dst , const void * src , size_t n ) {
+ memcpy( dst , src , n );
+ }
+ DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n ) {
+ exec.fence();
+ memcpy( dst , src , n );
+ }
+};
+
+template<class ExecutionSpace>
+struct DeepCopy<Experimental::HBWSpace,HostSpace,ExecutionSpace> {
DeepCopy( void * dst , const void * src , size_t n ) {
memcpy( dst , src , n );
}
DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n ) {
exec.fence();
memcpy( dst , src , n );
}
};
} // namespace Impl
} // namespace Kokkos
+namespace Kokkos {
+namespace Impl {
-#endif /* #define KOKKOS_HOSTSPACE_HPP */
+template<>
+struct VerifyExecutionCanAccessMemorySpace< Kokkos::HostSpace , Kokkos::Experimental::HBWSpace >
+{
+ enum { value = true };
+ inline static void verify( void ) { }
+ inline static void verify( const void * ) { }
+};
+
+template<>
+struct VerifyExecutionCanAccessMemorySpace< Kokkos::Experimental::HBWSpace , Kokkos::HostSpace >
+{
+ enum { value = true };
+ inline static void verify( void ) { }
+ inline static void verify( const void * ) { }
+};
+
+} // namespace Impl
+} // namespace Kokkos
+
+#endif
+#endif /* #define KOKKOS_HBWSPACE_HPP */
diff --git a/lib/kokkos/core/src/Kokkos_HostSpace.hpp b/lib/kokkos/core/src/Kokkos_HostSpace.hpp
index 2aa809e7c..6e707f060 100644
--- a/lib/kokkos/core/src/Kokkos_HostSpace.hpp
+++ b/lib/kokkos/core/src/Kokkos_HostSpace.hpp
@@ -1,276 +1,295 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_HOSTSPACE_HPP
#define KOKKOS_HOSTSPACE_HPP
#include <cstring>
#include <string>
#include <iosfwd>
#include <typeinfo>
#include <Kokkos_Core_fwd.hpp>
#include <Kokkos_MemoryTraits.hpp>
#include <impl/Kokkos_Traits.hpp>
#include <impl/Kokkos_Error.hpp>
#include <impl/Kokkos_AllocationTracker.hpp>
#include <impl/Kokkos_BasicAllocators.hpp>
#include <impl/KokkosExp_SharedAlloc.hpp>
/*--------------------------------------------------------------------------*/
namespace Kokkos {
namespace Impl {
/// \brief Initialize lock array for arbitrary size atomics.
///
/// Arbitrary atomics are implemented using a hash table of locks
/// where the hash value is derived from the address of the
/// object for which an atomic operation is performed.
/// This function initializes the locks to zero (unset).
void init_lock_array_host_space();
/// \brief Aquire a lock for the address
///
/// This function tries to aquire the lock for the hash value derived
/// from the provided ptr. If the lock is successfully aquired the
/// function returns true. Otherwise it returns false.
bool lock_address_host_space(void* ptr);
/// \brief Release lock for the address
///
/// This function releases the lock for the hash value derived
/// from the provided ptr. This function should only be called
/// after previously successfully aquiring a lock with
/// lock_address.
void unlock_address_host_space(void* ptr);
} // namespace Impl
} // namespace Kokkos
namespace Kokkos {
/// \class HostSpace
/// \brief Memory management for host memory.
///
/// HostSpace is a memory space that governs host memory. "Host"
/// memory means the usual CPU-accessible memory.
class HostSpace {
public:
//! Tag this class as a kokkos memory space
typedef HostSpace memory_space ;
typedef size_t size_type ;
/// \typedef execution_space
/// \brief Default execution space for this memory space.
///
/// Every memory space has a default execution space. This is
/// useful for things like initializing a View (which happens in
/// parallel using the View's default execution space).
#if defined( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_OPENMP )
typedef Kokkos::OpenMP execution_space ;
#elif defined( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_THREADS )
typedef Kokkos::Threads execution_space ;
#elif defined( KOKKOS_HAVE_OPENMP )
typedef Kokkos::OpenMP execution_space ;
#elif defined( KOKKOS_HAVE_PTHREAD )
typedef Kokkos::Threads execution_space ;
#elif defined( KOKKOS_HAVE_SERIAL )
typedef Kokkos::Serial execution_space ;
#else
# error "At least one of the following host execution spaces must be defined: Kokkos::OpenMP, Kokkos::Serial, or Kokkos::Threads. You might be seeing this message if you disabled the Kokkos::Serial device explicitly using the Kokkos_ENABLE_Serial:BOOL=OFF CMake option, but did not enable any of the other host execution space devices."
#endif
//! This memory space preferred device_type
typedef Kokkos::Device<execution_space,memory_space> device_type;
+ /*--------------------------------*/
+#if ! defined( KOKKOS_USING_EXPERIMENTAL_VIEW )
#if defined( KOKKOS_USE_PAGE_ALIGNED_HOST_MEMORY )
typedef Impl::PageAlignedAllocator allocator ;
#else
typedef Impl::AlignedAllocator allocator ;
#endif
/** \brief Allocate a contiguous block of memory.
*
* The input label is associated with the block of memory.
* The block of memory is tracked via reference counting where
* allocation gives it a reference count of one.
*/
static Impl::AllocationTracker allocate_and_track( const std::string & label, const size_t size );
+#endif /* #if ! defined( KOKKOS_USING_EXPERIMENTAL_VIEW ) */
+
/*--------------------------------*/
/* Functions unique to the HostSpace */
static int in_parallel();
static void register_in_parallel( int (*)() );
/*--------------------------------*/
/**\brief Default memory space instance */
HostSpace();
HostSpace( const HostSpace & rhs ) = default ;
HostSpace & operator = ( const HostSpace & ) = default ;
~HostSpace() = default ;
/**\brief Non-default memory space instance to choose allocation mechansim, if available */
enum AllocationMechanism { STD_MALLOC , POSIX_MEMALIGN , POSIX_MMAP , INTEL_MM_ALLOC };
explicit
HostSpace( const AllocationMechanism & );
- /**\brief Allocate memory in the host space */
+ /**\brief Allocate untracked memory in the space */
void * allocate( const size_t arg_alloc_size ) const ;
- /**\brief Deallocate memory in the host space */
+ /**\brief Deallocate untracked memory in the space */
void deallocate( void * const arg_alloc_ptr
, const size_t arg_alloc_size ) const ;
private:
AllocationMechanism m_alloc_mech ;
friend class Kokkos::Experimental::Impl::SharedAllocationRecord< Kokkos::HostSpace , void > ;
};
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Experimental {
namespace Impl {
template<>
class SharedAllocationRecord< Kokkos::HostSpace , void >
: public SharedAllocationRecord< void , void >
{
private:
friend Kokkos::HostSpace ;
typedef SharedAllocationRecord< void , void > RecordBase ;
SharedAllocationRecord( const SharedAllocationRecord & ) = delete ;
SharedAllocationRecord & operator = ( const SharedAllocationRecord & ) = delete ;
static void deallocate( RecordBase * );
/**\brief Root record for tracked allocations from this HostSpace instance */
static RecordBase s_root_record ;
const Kokkos::HostSpace m_space ;
protected:
~SharedAllocationRecord();
SharedAllocationRecord() = default ;
SharedAllocationRecord( const Kokkos::HostSpace & arg_space
, const std::string & arg_label
, const size_t arg_alloc_size
, const RecordBase::function_type arg_dealloc = & deallocate
);
public:
inline
std::string get_label() const
{
return std::string( RecordBase::head()->m_label );
}
KOKKOS_INLINE_FUNCTION static
SharedAllocationRecord * allocate( const Kokkos::HostSpace & arg_space
, const std::string & arg_label
, const size_t arg_alloc_size
)
{
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
return new SharedAllocationRecord( arg_space , arg_label , arg_alloc_size );
#else
return (SharedAllocationRecord *) 0 ;
#endif
}
+ /**\brief Allocate tracked memory in the space */
+ static
+ void * allocate_tracked( const Kokkos::HostSpace & arg_space
+ , const std::string & arg_label
+ , const size_t arg_alloc_size );
+
+ /**\brief Reallocate tracked memory in the space */
+ static
+ void * reallocate_tracked( void * const arg_alloc_ptr
+ , const size_t arg_alloc_size );
+
+ /**\brief Deallocate tracked memory in the space */
+ static
+ void deallocate_tracked( void * const arg_alloc_ptr );
+
static SharedAllocationRecord * get_record( void * arg_alloc_ptr );
static void print_records( std::ostream & , const Kokkos::HostSpace & , bool detail = false );
};
} // namespace Impl
} // namespace Experimental
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
template< class DstSpace, class SrcSpace, class ExecutionSpace = typename DstSpace::execution_space> struct DeepCopy ;
template<class ExecutionSpace>
struct DeepCopy<HostSpace,HostSpace,ExecutionSpace> {
DeepCopy( void * dst , const void * src , size_t n ) {
memcpy( dst , src , n );
}
DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n ) {
exec.fence();
memcpy( dst , src , n );
}
};
} // namespace Impl
} // namespace Kokkos
#endif /* #define KOKKOS_HOSTSPACE_HPP */
diff --git a/lib/kokkos/core/src/Kokkos_Layout.hpp b/lib/kokkos/core/src/Kokkos_Layout.hpp
index 32822889d..e7d38a902 100644
--- a/lib/kokkos/core/src/Kokkos_Layout.hpp
+++ b/lib/kokkos/core/src/Kokkos_Layout.hpp
@@ -1,174 +1,179 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
/// \file Kokkos_Layout.hpp
/// \brief Declaration of various \c MemoryLayout options.
#ifndef KOKKOS_LAYOUT_HPP
#define KOKKOS_LAYOUT_HPP
#include <stddef.h>
#include <impl/Kokkos_Traits.hpp>
#include <impl/Kokkos_Tags.hpp>
namespace Kokkos {
//----------------------------------------------------------------------------
/// \struct LayoutLeft
/// \brief Memory layout tag indicating left-to-right (Fortran scheme)
/// striding of multi-indices.
///
/// This is an example of a \c MemoryLayout template parameter of
/// View. The memory layout describes how View maps from a
/// multi-index (i0, i1, ..., ik) to a memory location.
///
/// "Layout left" indicates a mapping where the leftmost index i0
/// refers to contiguous access, and strides increase for dimensions
/// going right from there (i1, i2, ...). This layout imitates how
/// Fortran stores multi-dimensional arrays. For the special case of
/// a two-dimensional array, "layout left" is also called "column
/// major."
struct LayoutLeft {
//! Tag this class as a kokkos array layout
typedef LayoutLeft array_layout ;
};
//----------------------------------------------------------------------------
/// \struct LayoutRight
/// \brief Memory layout tag indicating right-to-left (C or
/// lexigraphical scheme) striding of multi-indices.
///
/// This is an example of a \c MemoryLayout template parameter of
/// View. The memory layout describes how View maps from a
/// multi-index (i0, i1, ..., ik) to a memory location.
///
/// "Right layout" indicates a mapping where the rightmost index ik
/// refers to contiguous access, and strides increase for dimensions
/// going left from there. This layout imitates how C stores
/// multi-dimensional arrays. For the special case of a
/// two-dimensional array, "layout right" is also called "row major."
struct LayoutRight {
//! Tag this class as a kokkos array layout
typedef LayoutRight array_layout ;
};
//----------------------------------------------------------------------------
/// \struct LayoutStride
/// \brief Memory layout tag indicated arbitrarily strided
/// multi-index mapping into contiguous memory.
struct LayoutStride {
//! Tag this class as a kokkos array layout
typedef LayoutStride array_layout ;
enum { MAX_RANK = 8 };
size_t dimension[ MAX_RANK ] ;
size_t stride[ MAX_RANK ] ;
/** \brief Compute strides from ordered dimensions.
*
* Values of order uniquely form the set [0..rank)
* and specify ordering of the dimensions.
* Order = {0,1,2,...} is LayoutLeft
* Order = {...,2,1,0} is LayoutRight
*/
template< typename iTypeOrder , typename iTypeDimen >
KOKKOS_INLINE_FUNCTION static
LayoutStride order_dimensions( int const rank
, iTypeOrder const * const order
, iTypeDimen const * const dimen )
{
LayoutStride tmp ;
// Verify valid rank order:
int check_input = MAX_RANK < rank ? 0 : int( 1 << rank ) - 1 ;
for ( int r = 0 ; r < MAX_RANK ; ++r ) {
tmp.dimension[r] = 0 ;
tmp.stride[r] = 0 ;
check_input &= ~int( 1 << order[r] );
}
if ( 0 == check_input ) {
size_t n = 1 ;
for ( int r = 0 ; r < rank ; ++r ) {
tmp.stride[ order[r] ] = n ;
n *= ( dimen[order[r]] );
tmp.dimension[r] = dimen[r];
}
}
return tmp ;
}
};
//----------------------------------------------------------------------------
/// \struct LayoutTileLeft
/// \brief Memory layout tag indicating left-to-right (Fortran scheme)
/// striding of multi-indices by tiles.
///
/// This is an example of a \c MemoryLayout template parameter of
/// View. The memory layout describes how View maps from a
/// multi-index (i0, i1, ..., ik) to a memory location.
///
/// "Tiled layout" indicates a mapping to contiguously stored
/// <tt>ArgN0</tt> by <tt>ArgN1</tt> tiles for the rightmost two
/// dimensions. Indices are LayoutLeft within each tile, and the
/// tiles themselves are arranged using LayoutLeft. Note that the
/// dimensions <tt>ArgN0</tt> and <tt>ArgN1</tt> of the tiles must be
/// compile-time constants. This speeds up index calculations. If
/// both tile dimensions are powers of two, Kokkos can optimize
/// further.
template < unsigned ArgN0 , unsigned ArgN1 ,
- bool IsPowerOfTwo = ( Impl::is_power_of_two<ArgN0>::value &&
- Impl::is_power_of_two<ArgN1>::value )
+ bool IsPowerOfTwo = ( Impl::is_integral_power_of_two(ArgN0) &&
+ Impl::is_integral_power_of_two(ArgN1) )
>
struct LayoutTileLeft {
+
+ static_assert( Impl::is_integral_power_of_two(ArgN0) &&
+ Impl::is_integral_power_of_two(ArgN1)
+ , "LayoutTileLeft must be given power-of-two tile dimensions" );
+
//! Tag this class as a kokkos array layout
typedef LayoutTileLeft<ArgN0,ArgN1,IsPowerOfTwo> array_layout ;
enum { N0 = ArgN0 };
enum { N1 = ArgN1 };
};
} // namespace Kokkos
#endif // #ifndef KOKKOS_LAYOUT_HPP
diff --git a/lib/kokkos/core/src/Kokkos_Macros.hpp b/lib/kokkos/core/src/Kokkos_Macros.hpp
index c221c2f9f..2386c9d2c 100644
--- a/lib/kokkos/core/src/Kokkos_Macros.hpp
+++ b/lib/kokkos/core/src/Kokkos_Macros.hpp
@@ -1,420 +1,426 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_MACROS_HPP
#define KOKKOS_MACROS_HPP
//----------------------------------------------------------------------------
/** Pick up configure/build options via #define macros:
*
* KOKKOS_HAVE_CUDA Kokkos::Cuda execution and memory spaces
* KOKKOS_HAVE_PTHREAD Kokkos::Threads execution space
* KOKKOS_HAVE_QTHREAD Kokkos::Qthread execution space
* KOKKOS_HAVE_OPENMP Kokkos::OpenMP execution space
* KOKKOS_HAVE_HWLOC HWLOC library is available
* KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK insert array bounds checks, is expensive!
* KOKKOS_HAVE_CXX11 enable C++11 features
*
* KOKKOS_HAVE_MPI negotiate MPI/execution space interactions
*
* KOKKOS_USE_CUDA_UVM Use CUDA UVM for Cuda memory space
*/
#ifndef KOKKOS_DONT_INCLUDE_CORE_CONFIG_H
#include <KokkosCore_config.h>
#endif
//----------------------------------------------------------------------------
/** Pick up compiler specific #define macros:
*
* Macros for known compilers evaluate to an integral version value
*
* KOKKOS_COMPILER_NVCC
* KOKKOS_COMPILER_GNU
* KOKKOS_COMPILER_INTEL
* KOKKOS_COMPILER_IBM
* KOKKOS_COMPILER_CRAYC
* KOKKOS_COMPILER_APPLECC
* KOKKOS_COMPILER_CLANG
* KOKKOS_COMPILER_PGI
*
* Macros for which compiler extension to use for atomics on intrinsice types
*
* KOKKOS_ATOMICS_USE_CUDA
* KOKKOS_ATOMICS_USE_GNU
* KOKKOS_ATOMICS_USE_INTEL
* KOKKOS_ATOMICS_USE_OPENMP31
*
* A suite of 'KOKKOS_HAVE_PRAGMA_...' are defined for internal use.
*
* Macros for marking functions to run in an execution space:
*
* KOKKOS_FUNCTION
* KOKKOS_INLINE_FUNCTION request compiler to inline
* KOKKOS_FORCEINLINE_FUNCTION force compiler to inline, use with care!
*/
//----------------------------------------------------------------------------
#if defined( KOKKOS_HAVE_CUDA ) && defined( __CUDACC__ )
/* Compiling with a CUDA compiler.
*
* Include <cuda.h> to pick up the CUDA_VERSION macro defined as:
* CUDA_VERSION = ( MAJOR_VERSION * 1000 ) + ( MINOR_VERSION * 10 )
*
* When generating device code the __CUDA_ARCH__ macro is defined as:
* __CUDA_ARCH__ = ( MAJOR_CAPABILITY * 100 ) + ( MINOR_CAPABILITY * 10 )
*/
#include <cuda_runtime.h>
#include <cuda.h>
#if ! defined( CUDA_VERSION )
#error "#include <cuda.h> did not define CUDA_VERSION"
#endif
#if ( CUDA_VERSION < 6050 )
// CUDA supports (inofficially) C++11 in device code starting with
// version 6.5. This includes auto type and device code internal
// lambdas.
#error "Cuda version 6.5 or greater required"
#endif
#if defined( __CUDA_ARCH__ ) && ( __CUDA_ARCH__ < 300 )
/* Compiling with CUDA compiler for device code. */
#error "Cuda device capability >= 3.0 is required"
#endif
#ifdef KOKKOS_CUDA_USE_LAMBDA
#if ( CUDA_VERSION < 7000 )
// CUDA supports C++11 lambdas generated in host code to be given
// to the device starting with version 7.5. But the release candidate (7.5.6)
// still identifies as 7.0
#error "Cuda version 7.5 or greater required for host-to-device Lambda support"
#endif
#define KOKKOS_LAMBDA [=]__device__
#define KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA 1
#endif
#endif /* #if defined( KOKKOS_HAVE_CUDA ) && defined( __CUDACC__ ) */
/*--------------------------------------------------------------------------*/
/* Language info: C++, CUDA, OPENMP */
#if defined( __CUDA_ARCH__ ) && defined( KOKKOS_HAVE_CUDA )
// Compiling Cuda code to 'ptx'
#define KOKKOS_FORCEINLINE_FUNCTION __device__ __host__ __forceinline__
#define KOKKOS_INLINE_FUNCTION __device__ __host__ inline
#define KOKKOS_FUNCTION __device__ __host__
#endif /* #if defined( __CUDA_ARCH__ ) */
#if defined( _OPENMP )
/* Compiling with OpenMP.
* The value of _OPENMP is an integer value YYYYMM
* where YYYY and MM are the year and month designation
* of the supported OpenMP API version.
*/
#endif /* #if defined( _OPENMP ) */
/*--------------------------------------------------------------------------*/
/* Mapping compiler built-ins to KOKKOS_COMPILER_*** macros */
#if defined( __NVCC__ )
// NVIDIA compiler is being used.
// Code is parsed and separated into host and device code.
// Host code is compiled again with another compiler.
// Device code is compile to 'ptx'.
#define KOKKOS_COMPILER_NVCC __NVCC__
#else
#if defined( KOKKOS_HAVE_CXX11 ) && ! defined( KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA )
// CUDA (including version 6.5) does not support giving lambdas as
// arguments to global functions. Thus its not currently possible
// to dispatch lambdas from the host.
#define KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA 1
#endif
#endif /* #if defined( __NVCC__ ) */
#if defined( KOKKOS_HAVE_CXX11 ) && !defined (KOKKOS_LAMBDA)
#define KOKKOS_LAMBDA [=]
#endif
#if ! defined( __CUDA_ARCH__ ) /* Not compiling Cuda code to 'ptx'. */
/* Intel compiler for host code */
#if defined( __INTEL_COMPILER )
#define KOKKOS_COMPILER_INTEL __INTEL_COMPILER
#elif defined( __ICC )
// Old define
#define KOKKOS_COMPILER_INTEL __ICC
#elif defined( __ECC )
// Very old define
#define KOKKOS_COMPILER_INTEL __ECC
#endif
/* CRAY compiler for host code */
#if defined( _CRAYC )
#define KOKKOS_COMPILER_CRAYC _CRAYC
#endif
#if defined( __IBMCPP__ )
// IBM C++
#define KOKKOS_COMPILER_IBM __IBMCPP__
#elif defined( __IBMC__ )
#define KOKKOS_COMPILER_IBM __IBMC__
#endif
#if defined( __APPLE_CC__ )
#define KOKKOS_COMPILER_APPLECC __APPLE_CC__
#endif
#if defined (__clang__) && !defined (KOKKOS_COMPILER_INTEL)
#define KOKKOS_COMPILER_CLANG __clang_major__*100+__clang_minor__*10+__clang_patchlevel__
#endif
#if ! defined( __clang__ ) && ! defined( KOKKOS_COMPILER_INTEL ) &&defined( __GNUC__ )
#define KOKKOS_COMPILER_GNU __GNUC__*100+__GNUC_MINOR__*10+__GNUC_PATCHLEVEL__
#if ( 472 > KOKKOS_COMPILER_GNU )
#error "Compiling with GCC version earlier than 4.7.2 is not supported."
#endif
#endif
#if defined( __PGIC__ ) && ! defined( __GNUC__ )
#define KOKKOS_COMPILER_PGI __PGIC__*100+__PGIC_MINOR__*10+__PGIC_PATCHLEVEL__
#if ( 1540 > KOKKOS_COMPILER_PGI )
#error "Compiling with PGI version earlier than 15.4 is not supported."
#endif
#endif
#endif /* #if ! defined( __CUDA_ARCH__ ) */
/*--------------------------------------------------------------------------*/
/*--------------------------------------------------------------------------*/
/* Intel compiler macros */
#if defined( KOKKOS_COMPILER_INTEL )
#define KOKKOS_HAVE_PRAGMA_UNROLL 1
#define KOKKOS_HAVE_PRAGMA_IVDEP 1
#define KOKKOS_HAVE_PRAGMA_LOOPCOUNT 1
#define KOKKOS_HAVE_PRAGMA_VECTOR 1
#define KOKKOS_HAVE_PRAGMA_SIMD 1
#if ( 1400 > KOKKOS_COMPILER_INTEL )
#if ( 1300 > KOKKOS_COMPILER_INTEL )
#error "Compiling with Intel version earlier than 13.0 is not supported. Official minimal version is 14.0."
#else
#warning "Compiling with Intel version 13.x probably works but is not officially supported. Official minimal version is 14.0."
#endif
#endif
#if ( 1200 <= KOKKOS_COMPILER_INTEL ) && ! defined( KOKKOS_ENABLE_ASM ) && ! defined( _WIN32 )
#define KOKKOS_ENABLE_ASM 1
#endif
#if ( 1200 <= KOKKOS_COMPILER_INTEL ) && ! defined( KOKKOS_FORCEINLINE_FUNCTION )
#if !defined (_WIN32)
#define KOKKOS_FORCEINLINE_FUNCTION inline __attribute__((always_inline))
#else
#define KOKKOS_FORCEINLINE_FUNCTION inline
#endif
#endif
#if defined( __MIC__ )
// Compiling for Xeon Phi
#endif
#endif
/*--------------------------------------------------------------------------*/
/* Cray compiler macros */
#if defined( KOKKOS_COMPILER_CRAYC )
#endif
/*--------------------------------------------------------------------------*/
/* IBM Compiler macros */
#if defined( KOKKOS_COMPILER_IBM )
#define KOKKOS_HAVE_PRAGMA_UNROLL 1
//#define KOKKOS_HAVE_PRAGMA_IVDEP 1
//#define KOKKOS_HAVE_PRAGMA_LOOPCOUNT 1
//#define KOKKOS_HAVE_PRAGMA_VECTOR 1
//#define KOKKOS_HAVE_PRAGMA_SIMD 1
#endif
/*--------------------------------------------------------------------------*/
/* CLANG compiler macros */
#if defined( KOKKOS_COMPILER_CLANG )
//#define KOKKOS_HAVE_PRAGMA_UNROLL 1
//#define KOKKOS_HAVE_PRAGMA_IVDEP 1
//#define KOKKOS_HAVE_PRAGMA_LOOPCOUNT 1
//#define KOKKOS_HAVE_PRAGMA_VECTOR 1
//#define KOKKOS_HAVE_PRAGMA_SIMD 1
#if ! defined( KOKKOS_FORCEINLINE_FUNCTION )
#define KOKKOS_FORCEINLINE_FUNCTION inline __attribute__((always_inline))
#endif
#endif
/*--------------------------------------------------------------------------*/
/* GNU Compiler macros */
#if defined( KOKKOS_COMPILER_GNU )
//#define KOKKOS_HAVE_PRAGMA_UNROLL 1
//#define KOKKOS_HAVE_PRAGMA_IVDEP 1
//#define KOKKOS_HAVE_PRAGMA_LOOPCOUNT 1
//#define KOKKOS_HAVE_PRAGMA_VECTOR 1
//#define KOKKOS_HAVE_PRAGMA_SIMD 1
#if ! defined( KOKKOS_FORCEINLINE_FUNCTION )
#define KOKKOS_FORCEINLINE_FUNCTION inline __attribute__((always_inline))
#endif
#if ! defined( KOKKOS_ENABLE_ASM ) && \
! ( defined( __powerpc) || \
defined(__powerpc__) || \
defined(__powerpc64__) || \
defined(__POWERPC__) || \
defined(__ppc__) || \
defined(__ppc64__) || \
defined(__PGIC__) )
#define KOKKOS_ENABLE_ASM 1
#endif
#endif
/*--------------------------------------------------------------------------*/
#if defined( KOKKOS_COMPILER_PGI )
#define KOKKOS_HAVE_PRAGMA_UNROLL 1
#define KOKKOS_HAVE_PRAGMA_IVDEP 1
//#define KOKKOS_HAVE_PRAGMA_LOOPCOUNT 1
#define KOKKOS_HAVE_PRAGMA_VECTOR 1
//#define KOKKOS_HAVE_PRAGMA_SIMD 1
#endif
/*--------------------------------------------------------------------------*/
#if defined( KOKKOS_COMPILER_NVCC )
#if defined(__CUDA_ARCH__ )
#define KOKKOS_HAVE_PRAGMA_UNROLL 1
#endif
#endif
//----------------------------------------------------------------------------
/** Define function marking macros if compiler specific macros are undefined: */
#if ! defined( KOKKOS_FORCEINLINE_FUNCTION )
#define KOKKOS_FORCEINLINE_FUNCTION inline
#endif
#if ! defined( KOKKOS_INLINE_FUNCTION )
#define KOKKOS_INLINE_FUNCTION inline
#endif
#if ! defined( KOKKOS_FUNCTION )
#define KOKKOS_FUNCTION /**/
#endif
//----------------------------------------------------------------------------
/** Determine the default execution space for parallel dispatch.
* There is zero or one default execution space specified.
*/
#if 1 < ( ( defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_CUDA ) ? 1 : 0 ) + \
( defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_OPENMP ) ? 1 : 0 ) + \
( defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_THREADS ) ? 1 : 0 ) + \
( defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_SERIAL ) ? 1 : 0 ) )
#error "More than one KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_* specified" ;
#endif
/** If default is not specified then chose from enabled execution spaces.
* Priority: CUDA, OPENMP, THREADS, SERIAL
*/
#if defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_CUDA )
#elif defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_OPENMP )
#elif defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_THREADS )
#elif defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_SERIAL )
#elif defined ( KOKKOS_HAVE_CUDA )
#define KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_CUDA
#elif defined ( KOKKOS_HAVE_OPENMP )
#define KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_OPENMP
#elif defined ( KOKKOS_HAVE_PTHREAD )
#define KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_THREADS
#else
#define KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_SERIAL
#endif
//----------------------------------------------------------------------------
/** Determine for what space the code is being compiled: */
#if defined( __CUDACC__ ) && defined( __CUDA_ARCH__ ) && defined (KOKKOS_HAVE_CUDA)
#define KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_CUDA
#else
#define KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
#endif
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
+#if ( defined( _POSIX_C_SOURCE ) && _POSIX_C_SOURCE >= 200112L ) || \
+ ( defined( _XOPEN_SOURCE ) && _XOPEN_SOURCE >= 600 )
+#if defined(KOKKOS_ENABLE_PERFORMANCE_POSIX_MEMALIGN)
+#define KOKKOS_POSIX_MEMALIGN_AVAILABLE 1
+#endif
+#endif
#endif /* #ifndef KOKKOS_MACROS_HPP */
diff --git a/lib/kokkos/core/src/Kokkos_MemoryTraits.hpp b/lib/kokkos/core/src/Kokkos_MemoryTraits.hpp
index b581c7da2..5ee1f16fe 100644
--- a/lib/kokkos/core/src/Kokkos_MemoryTraits.hpp
+++ b/lib/kokkos/core/src/Kokkos_MemoryTraits.hpp
@@ -1,116 +1,116 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_MEMORYTRAITS_HPP
#define KOKKOS_MEMORYTRAITS_HPP
#include <impl/Kokkos_Traits.hpp>
#include <impl/Kokkos_Tags.hpp>
//----------------------------------------------------------------------------
namespace Kokkos {
/** \brief Memory access traits for views, an extension point.
*
* These traits should be orthogonal. If there are dependencies then
* the MemoryTraits template must detect and enforce dependencies.
*
* A zero value is the default for a View, indicating that none of
* these traits are present.
*/
enum MemoryTraitsFlags
{ Unmanaged = 0x01
, RandomAccess = 0x02
, Atomic = 0x04
};
template < unsigned T >
struct MemoryTraits {
//! Tag this class as a kokkos memory traits:
typedef MemoryTraits memory_traits ;
enum { Unmanaged = T & unsigned(Kokkos::Unmanaged) };
enum { RandomAccess = T & unsigned(Kokkos::RandomAccess) };
enum { Atomic = T & unsigned(Kokkos::Atomic) };
};
} // namespace Kokkos
//----------------------------------------------------------------------------
namespace Kokkos {
typedef Kokkos::MemoryTraits<0> MemoryManaged ;
typedef Kokkos::MemoryTraits< Kokkos::Unmanaged > MemoryUnmanaged ;
typedef Kokkos::MemoryTraits< Kokkos::Unmanaged | Kokkos::RandomAccess > MemoryRandomAccess ;
} // namespace Kokkos
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
/** \brief Memory alignment settings
*
* Sets global value for memory alignment. Must be a power of two!
* Enable compatibility of views from different devices with static stride.
* Use compiler flag to enable overwrites.
*/
enum { MEMORY_ALIGNMENT =
#if defined( KOKKOS_MEMORY_ALIGNMENT )
- ( 1 << Kokkos::Impl::power_of_two< KOKKOS_MEMORY_ALIGNMENT >::value )
+ ( 1 << Kokkos::Impl::integral_power_of_two( KOKKOS_MEMORY_ALIGNMENT ) )
#else
- ( 1 << Kokkos::Impl::power_of_two< 128 >::value )
+ ( 1 << Kokkos::Impl::integral_power_of_two( 128 ) )
#endif
, MEMORY_ALIGNMENT_THRESHOLD = 4
};
} //namespace Impl
} // namespace Kokkos
#endif /* #ifndef KOKKOS_MEMORYTRAITS_HPP */
diff --git a/lib/kokkos/core/src/Kokkos_OpenMP.hpp b/lib/kokkos/core/src/Kokkos_OpenMP.hpp
index 508da04c8..e7dbf9a0e 100644
--- a/lib/kokkos/core/src/Kokkos_OpenMP.hpp
+++ b/lib/kokkos/core/src/Kokkos_OpenMP.hpp
@@ -1,175 +1,182 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_OPENMP_HPP
#define KOKKOS_OPENMP_HPP
#include <Kokkos_Core_fwd.hpp>
#if defined( KOKKOS_HAVE_OPENMP ) && defined( _OPENMP )
#include <omp.h>
#include <cstddef>
#include <iosfwd>
#include <Kokkos_HostSpace.hpp>
+#ifdef KOKKOS_HAVE_HBWSPACE
+#include <Kokkos_HBWSpace.hpp>
+#endif
#include <Kokkos_ScratchSpace.hpp>
#include <Kokkos_Parallel.hpp>
#include <Kokkos_Layout.hpp>
#include <impl/Kokkos_Tags.hpp>
/*--------------------------------------------------------------------------*/
namespace Kokkos {
/// \class OpenMP
/// \brief Kokkos device for multicore processors in the host memory space.
class OpenMP {
public:
//------------------------------------
//! \name Type declarations that all Kokkos devices must provide.
//@{
//! Tag this class as a kokkos execution space
typedef OpenMP execution_space ;
+ #ifdef KOKKOS_HAVE_HBWSPACE
+ typedef Experimental::HBWSpace memory_space ;
+ #else
typedef HostSpace memory_space ;
+ #endif
//! This execution space preferred device_type
typedef Kokkos::Device<execution_space,memory_space> device_type;
typedef LayoutRight array_layout ;
- typedef HostSpace::size_type size_type ;
+ typedef memory_space::size_type size_type ;
typedef ScratchMemorySpace< OpenMP > scratch_memory_space ;
//@}
//------------------------------------
//! \name Functions that all Kokkos devices must implement.
//@{
inline static bool in_parallel() { return omp_in_parallel(); }
/** \brief Set the device in a "sleep" state. A noop for OpenMP. */
static bool sleep();
/** \brief Wake the device from the 'sleep' state. A noop for OpenMP. */
static bool wake();
/** \brief Wait until all dispatched functors complete. A noop for OpenMP. */
static void fence() {}
/// \brief Print configuration information to the given output stream.
static void print_configuration( std::ostream & , const bool detail = false );
/// \brief Free any resources being consumed by the device.
static void finalize();
/** \brief Initialize the device.
*
* 1) If the hardware locality library is enabled and OpenMP has not
* already bound threads then bind OpenMP threads to maximize
* core utilization and group for memory hierarchy locality.
*
* 2) Allocate a HostThread for each OpenMP thread to hold its
* topology and fan in/out data.
*/
static void initialize( unsigned thread_count = 0 ,
unsigned use_numa_count = 0 ,
unsigned use_cores_per_numa = 0 );
static int is_initialized();
//@}
//------------------------------------
/** \brief This execution space has a topological thread pool which can be queried.
*
* All threads within a pool have a common memory space for which they are cache coherent.
* depth = 0 gives the number of threads in the whole pool.
* depth = 1 gives the number of threads in a NUMA region, typically sharing L3 cache.
* depth = 2 gives the number of threads at the finest granularity, typically sharing L1 cache.
*/
inline static int thread_pool_size( int depth = 0 );
/** \brief The rank of the executing thread in this thread pool */
KOKKOS_INLINE_FUNCTION static int thread_pool_rank();
//------------------------------------
inline static unsigned max_hardware_threads() { return thread_pool_size(0); }
KOKKOS_INLINE_FUNCTION static
unsigned hardware_thread_id() { return thread_pool_rank(); }
};
} // namespace Kokkos
/*--------------------------------------------------------------------------*/
/*--------------------------------------------------------------------------*/
namespace Kokkos {
namespace Impl {
template<>
struct VerifyExecutionCanAccessMemorySpace
< Kokkos::OpenMP::memory_space
, Kokkos::OpenMP::scratch_memory_space
>
{
enum { value = true };
inline static void verify( void ) { }
inline static void verify( const void * ) { }
};
} // namespace Impl
} // namespace Kokkos
/*--------------------------------------------------------------------------*/
/*--------------------------------------------------------------------------*/
#include <OpenMP/Kokkos_OpenMPexec.hpp>
#include <OpenMP/Kokkos_OpenMP_Parallel.hpp>
/*--------------------------------------------------------------------------*/
#endif /* #if defined( KOKKOS_HAVE_OPENMP ) && defined( _OPENMP ) */
#endif /* #ifndef KOKKOS_OPENMP_HPP */
diff --git a/lib/kokkos/core/src/Kokkos_Parallel.hpp b/lib/kokkos/core/src/Kokkos_Parallel.hpp
index 93bffcc78..696ff4042 100644
--- a/lib/kokkos/core/src/Kokkos_Parallel.hpp
+++ b/lib/kokkos/core/src/Kokkos_Parallel.hpp
@@ -1,908 +1,948 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
/// \file Kokkos_Parallel.hpp
/// \brief Declaration of parallel operators
#ifndef KOKKOS_PARALLEL_HPP
#define KOKKOS_PARALLEL_HPP
#include <cstddef>
#include <Kokkos_Core_fwd.hpp>
#include <Kokkos_View.hpp>
#include <Kokkos_ExecPolicy.hpp>
#ifdef KOKKOSP_ENABLE_PROFILING
#include <impl/Kokkos_Profiling_Interface.hpp>
#include <typeinfo>
#endif
#include <impl/Kokkos_AllocationTracker.hpp>
#include <impl/Kokkos_Tags.hpp>
#include <impl/Kokkos_Traits.hpp>
#include <impl/Kokkos_FunctorAdapter.hpp>
#ifdef KOKKOS_HAVE_DEBUG
#include<iostream>
#endif
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
//----------------------------------------------------------------------------
/** \brief Given a Functor and Execution Policy query an execution space.
*
* if the Policy has an execution space use that
* else if the Functor has an execution_space use that
* else if the Functor has a device_type use that for backward compatibility
* else use the default
*/
template< class Functor
, class Policy
, class EnableFunctor = void
, class EnablePolicy = void
>
struct FunctorPolicyExecutionSpace {
typedef Kokkos::DefaultExecutionSpace execution_space ;
};
template< class Functor , class Policy >
struct FunctorPolicyExecutionSpace
< Functor , Policy
, typename enable_if_type< typename Functor::device_type >::type
, typename enable_if_type< typename Policy ::execution_space >::type
>
{
typedef typename Policy ::execution_space execution_space ;
};
template< class Functor , class Policy >
struct FunctorPolicyExecutionSpace
< Functor , Policy
, typename enable_if_type< typename Functor::execution_space >::type
, typename enable_if_type< typename Policy ::execution_space >::type
>
{
typedef typename Policy ::execution_space execution_space ;
};
template< class Functor , class Policy , class EnableFunctor >
struct FunctorPolicyExecutionSpace
< Functor , Policy
, EnableFunctor
, typename enable_if_type< typename Policy::execution_space >::type
>
{
typedef typename Policy ::execution_space execution_space ;
};
template< class Functor , class Policy , class EnablePolicy >
struct FunctorPolicyExecutionSpace
< Functor , Policy
, typename enable_if_type< typename Functor::device_type >::type
, EnablePolicy
>
{
typedef typename Functor::device_type execution_space ;
};
template< class Functor , class Policy , class EnablePolicy >
struct FunctorPolicyExecutionSpace
< Functor , Policy
, typename enable_if_type< typename Functor::execution_space >::type
, EnablePolicy
>
{
typedef typename Functor::execution_space execution_space ;
};
//----------------------------------------------------------------------------
/// \class ParallelFor
/// \brief Implementation of the ParallelFor operator that has a
/// partial specialization for the device.
///
/// This is an implementation detail of parallel_for. Users should
/// skip this and go directly to the nonmember function parallel_for.
template< class FunctorType , class ExecPolicy > class ParallelFor ;
/// \class ParallelReduce
/// \brief Implementation detail of parallel_reduce.
///
/// This is an implementation detail of parallel_reduce. Users should
/// skip this and go directly to the nonmember function parallel_reduce.
template< class FunctorType , class ExecPolicy > class ParallelReduce ;
/// \class ParallelScan
/// \brief Implementation detail of parallel_scan.
///
/// This is an implementation detail of parallel_scan. Users should
/// skip this and go directly to the documentation of the nonmember
/// template function Kokkos::parallel_scan.
template< class FunctorType , class ExecPolicy > class ParallelScan ;
} // namespace Impl
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
/** \brief Execute \c functor in parallel according to the execution \c policy.
*
* A "functor" is a class containing the function to execute in parallel,
* data needed for that execution, and an optional \c execution_space
* typedef. Here is an example functor for parallel_for:
*
* \code
* class FunctorType {
* public:
* typedef ... execution_space ;
* void operator() ( WorkType iwork ) const ;
* };
* \endcode
*
* In the above example, \c WorkType is any integer type for which a
* valid conversion from \c size_t to \c IntType exists. Its
* <tt>operator()</tt> method defines the operation to parallelize,
* over the range of integer indices <tt>iwork=[0,work_count-1]</tt>.
* This compares to a single iteration \c iwork of a \c for loop.
* If \c execution_space is not defined DefaultExecutionSpace will be used.
*/
template< class ExecPolicy , class FunctorType >
inline
void parallel_for( const ExecPolicy & policy
, const FunctorType & functor
, const std::string& str = ""
, typename Impl::enable_if< ! Impl::is_integral< ExecPolicy >::value >::type * = 0
)
{
#ifdef KOKKOSP_ENABLE_PROFILING
uint64_t kpID = 0;
if(Kokkos::Experimental::profileLibraryLoaded()) {
Kokkos::Experimental::beginParallelFor("" == str ? typeid(FunctorType).name() : str, 0, &kpID);
}
#endif
- (void) Impl::ParallelFor< FunctorType , ExecPolicy >( Impl::CopyWithoutTracking::apply(functor) , policy );
+ Kokkos::Impl::shared_allocation_tracking_claim_and_disable();
+ Impl::ParallelFor< FunctorType , ExecPolicy > closure( functor , policy );
+ Kokkos::Impl::shared_allocation_tracking_release_and_enable();
+ closure.execute();
+
#ifdef KOKKOSP_ENABLE_PROFILING
if(Kokkos::Experimental::profileLibraryLoaded()) {
Kokkos::Experimental::endParallelFor(kpID);
}
#endif
}
template< class FunctorType >
inline
void parallel_for( const size_t work_count
, const FunctorType & functor
, const std::string& str = ""
)
{
typedef typename
Impl::FunctorPolicyExecutionSpace< FunctorType , void >::execution_space
execution_space ;
typedef RangePolicy< execution_space > policy ;
#ifdef KOKKOSP_ENABLE_PROFILING
uint64_t kpID = 0;
if(Kokkos::Experimental::profileLibraryLoaded()) {
Kokkos::Experimental::beginParallelFor("" == str ? typeid(FunctorType).name() : str, 0, &kpID);
}
#endif
- (void) Impl::ParallelFor< FunctorType , policy >( Impl::CopyWithoutTracking::apply(functor) , policy(0,work_count) );
+ Kokkos::Impl::shared_allocation_tracking_claim_and_disable();
+ Impl::ParallelFor< FunctorType , policy > closure( functor , policy(0,work_count) );
+ Kokkos::Impl::shared_allocation_tracking_release_and_enable();
+
+ closure.execute();
#ifdef KOKKOSP_ENABLE_PROFILING
if(Kokkos::Experimental::profileLibraryLoaded()) {
Kokkos::Experimental::endParallelFor(kpID);
}
#endif
}
template< class ExecPolicy , class FunctorType >
inline
void parallel_for( const std::string & str
, const ExecPolicy & policy
, const FunctorType & functor )
{
#if KOKKOS_ENABLE_DEBUG_PRINT_KERNEL_NAMES
Kokkos::fence();
std::cout << "KOKKOS_DEBUG Start parallel_for kernel: " << str << std::endl;
#endif
parallel_for(policy,functor,str);
#if KOKKOS_ENABLE_DEBUG_PRINT_KERNEL_NAMES
Kokkos::fence();
std::cout << "KOKKOS_DEBUG End parallel_for kernel: " << str << std::endl;
#endif
(void) str;
}
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
/** \brief Parallel reduction
*
* Example of a parallel_reduce functor for a POD (plain old data) value type:
* \code
* class FunctorType { // For POD value type
* public:
* typedef ... execution_space ;
* typedef <podType> value_type ;
* void operator()( <intType> iwork , <podType> & update ) const ;
* void init( <podType> & update ) const ;
* void join( volatile <podType> & update ,
* volatile const <podType> & input ) const ;
*
* typedef true_type has_final ;
* void final( <podType> & update ) const ;
* };
* \endcode
*
* Example of a parallel_reduce functor for an array of POD (plain old data) values:
* \code
* class FunctorType { // For array of POD value
* public:
* typedef ... execution_space ;
* typedef <podType> value_type[] ;
* void operator()( <intType> , <podType> update[] ) const ;
* void init( <podType> update[] ) const ;
* void join( volatile <podType> update[] ,
* volatile const <podType> input[] ) const ;
*
* typedef true_type has_final ;
* void final( <podType> update[] ) const ;
* };
* \endcode
*/
template< class ExecPolicy , class FunctorType >
inline
void parallel_reduce( const ExecPolicy & policy
, const FunctorType & functor
, const std::string& str = ""
, typename Impl::enable_if< ! Impl::is_integral< ExecPolicy >::value >::type * = 0
)
{
// typedef typename
// Impl::FunctorPolicyExecutionSpace< FunctorType , ExecPolicy >::execution_space
// execution_space ;
typedef Kokkos::Impl::FunctorValueTraits< FunctorType , typename ExecPolicy::work_tag > ValueTraits ;
typedef typename Kokkos::Impl::if_c< (ValueTraits::StaticValueSize != 0)
, typename ValueTraits::value_type
, typename ValueTraits::pointer_type
>::type value_type ;
Kokkos::View< value_type
, HostSpace
, Kokkos::MemoryUnmanaged
>
result_view ;
#ifdef KOKKOSP_ENABLE_PROFILING
uint64_t kpID = 0;
if(Kokkos::Experimental::profileLibraryLoaded()) {
Kokkos::Experimental::beginParallelReduce("" == str ? typeid(FunctorType).name() : str, 0, &kpID);
}
#endif
- (void) Impl::ParallelReduce< FunctorType , ExecPolicy >( Impl::CopyWithoutTracking::apply(functor) , policy , result_view );
+ Kokkos::Impl::shared_allocation_tracking_claim_and_disable();
+ Impl::ParallelReduce< FunctorType , ExecPolicy > closure( functor , policy , result_view );
+ Kokkos::Impl::shared_allocation_tracking_release_and_enable();
+
+ closure.execute();
#ifdef KOKKOSP_ENABLE_PROFILING
if(Kokkos::Experimental::profileLibraryLoaded()) {
Kokkos::Experimental::endParallelReduce(kpID);
}
#endif
}
// integral range policy
template< class FunctorType >
inline
void parallel_reduce( const size_t work_count
, const FunctorType & functor
, const std::string& str = ""
)
{
typedef typename
Impl::FunctorPolicyExecutionSpace< FunctorType , void >::execution_space
execution_space ;
typedef RangePolicy< execution_space > policy ;
typedef Kokkos::Impl::FunctorValueTraits< FunctorType , void > ValueTraits ;
typedef typename Kokkos::Impl::if_c< (ValueTraits::StaticValueSize != 0)
, typename ValueTraits::value_type
, typename ValueTraits::pointer_type
>::type value_type ;
Kokkos::View< value_type
, HostSpace
, Kokkos::MemoryUnmanaged
>
result_view ;
#ifdef KOKKOSP_ENABLE_PROFILING
uint64_t kpID = 0;
if(Kokkos::Experimental::profileLibraryLoaded()) {
Kokkos::Experimental::beginParallelReduce("" == str ? typeid(FunctorType).name() : str, 0, &kpID);
}
#endif
- (void) Impl::ParallelReduce< FunctorType , policy >( Impl::CopyWithoutTracking::apply(functor) , policy(0,work_count) , result_view );
+ Kokkos::Impl::shared_allocation_tracking_claim_and_disable();
+ Impl::ParallelReduce< FunctorType , policy > closure( functor , policy(0,work_count) , result_view );
+ Kokkos::Impl::shared_allocation_tracking_release_and_enable();
+
+ closure.execute();
#ifdef KOKKOSP_ENABLE_PROFILING
if(Kokkos::Experimental::profileLibraryLoaded()) {
Kokkos::Experimental::endParallelReduce(kpID);
}
#endif
}
// general policy and view ouput
template< class ExecPolicy , class FunctorType , class ViewType >
inline
void parallel_reduce( const ExecPolicy & policy
, const FunctorType & functor
, const ViewType & result_view
, const std::string& str = ""
, typename Impl::enable_if<
- ( Impl::is_view<ViewType>::value && ! Impl::is_integral< ExecPolicy >::value
+ ( Kokkos::is_view<ViewType>::value && ! Impl::is_integral< ExecPolicy >::value
#ifdef KOKKOS_HAVE_CUDA
&& ! Impl::is_same<typename ExecPolicy::execution_space,Kokkos::Cuda>::value
#endif
)>::type * = 0 )
{
#ifdef KOKKOSP_ENABLE_PROFILING
uint64_t kpID = 0;
if(Kokkos::Experimental::profileLibraryLoaded()) {
Kokkos::Experimental::beginParallelReduce("" == str ? typeid(FunctorType).name() : str, 0, &kpID);
}
#endif
- (void) Impl::ParallelReduce< FunctorType, ExecPolicy >( Impl::CopyWithoutTracking::apply(functor) , policy , Impl::CopyWithoutTracking::apply(result_view) );
+ Kokkos::Impl::shared_allocation_tracking_claim_and_disable();
+ Impl::ParallelReduce< FunctorType, ExecPolicy > closure( functor , policy , result_view );
+ Kokkos::Impl::shared_allocation_tracking_release_and_enable();
+
+ closure.execute();
#ifdef KOKKOSP_ENABLE_PROFILING
if(Kokkos::Experimental::profileLibraryLoaded()) {
Kokkos::Experimental::endParallelReduce(kpID);
}
#endif
}
// general policy and pod or array of pod output
template< class ExecPolicy , class FunctorType >
void parallel_reduce( const ExecPolicy & policy
, const FunctorType & functor
#ifdef KOKKOS_HAVE_CUDA
, typename Impl::enable_if<
( ! Impl::is_integral< ExecPolicy >::value &&
! Impl::is_same<typename ExecPolicy::execution_space,Kokkos::Cuda>::value )
, typename Kokkos::Impl::FunctorValueTraits< FunctorType , typename ExecPolicy::work_tag >::reference_type>::type result_ref
, const std::string& str = ""
, typename Impl::enable_if<! Impl::is_same<typename ExecPolicy::execution_space,Kokkos::Cuda>::value >::type* = 0
)
#else
, typename Impl::enable_if<
( ! Impl::is_integral< ExecPolicy >::value)
, typename Kokkos::Impl::FunctorValueTraits< FunctorType , typename ExecPolicy::work_tag >::reference_type
>::type result_ref
, const std::string& str = ""
)
#endif
{
typedef Kokkos::Impl::FunctorValueTraits< FunctorType , typename ExecPolicy::work_tag > ValueTraits ;
typedef Kokkos::Impl::FunctorValueOps< FunctorType , typename ExecPolicy::work_tag > ValueOps ;
// Wrap the result output request in a view to inform the implementation
// of the type and memory space.
typedef typename Kokkos::Impl::if_c< (ValueTraits::StaticValueSize != 0)
, typename ValueTraits::value_type
, typename ValueTraits::pointer_type
>::type value_type ;
Kokkos::View< value_type
, HostSpace
, Kokkos::MemoryUnmanaged
>
result_view( ValueOps::pointer( result_ref )
, ValueTraits::value_count( functor )
);
#ifdef KOKKOSP_ENABLE_PROFILING
uint64_t kpID = 0;
if(Kokkos::Experimental::profileLibraryLoaded()) {
Kokkos::Experimental::beginParallelReduce("" == str ? typeid(FunctorType).name() : str, 0, &kpID);
}
#endif
- (void) Impl::ParallelReduce< FunctorType, ExecPolicy >( Impl::CopyWithoutTracking::apply(functor) , policy , Impl::CopyWithoutTracking::apply(result_view) );
+ Kokkos::Impl::shared_allocation_tracking_claim_and_disable();
+ Impl::ParallelReduce< FunctorType, ExecPolicy > closure( functor , policy , result_view );
+ Kokkos::Impl::shared_allocation_tracking_release_and_enable();
+
+ closure.execute();
#ifdef KOKKOSP_ENABLE_PROFILING
if(Kokkos::Experimental::profileLibraryLoaded()) {
Kokkos::Experimental::endParallelReduce(kpID);
}
#endif
}
// integral range policy and view ouput
template< class FunctorType , class ViewType >
inline
void parallel_reduce( const size_t work_count
, const FunctorType & functor
, const ViewType & result_view
, const std::string& str = ""
- , typename Impl::enable_if<( Impl::is_view<ViewType>::value
+ , typename Impl::enable_if<( Kokkos::is_view<ViewType>::value
#ifdef KOKKOS_HAVE_CUDA
&& ! Impl::is_same<
typename Impl::FunctorPolicyExecutionSpace< FunctorType , void >::execution_space,
Kokkos::Cuda>::value
#endif
)>::type * = 0 )
{
typedef typename
Impl::FunctorPolicyExecutionSpace< FunctorType , void >::execution_space
execution_space ;
typedef RangePolicy< execution_space > ExecPolicy ;
#ifdef KOKKOSP_ENABLE_PROFILING
uint64_t kpID = 0;
if(Kokkos::Experimental::profileLibraryLoaded()) {
Kokkos::Experimental::beginParallelReduce("" == str ? typeid(FunctorType).name() : str, 0, &kpID);
}
#endif
- (void) Impl::ParallelReduce< FunctorType, ExecPolicy >( Impl::CopyWithoutTracking::apply(functor) , ExecPolicy(0,work_count) , Impl::CopyWithoutTracking::apply(result_view) );
+ Kokkos::Impl::shared_allocation_tracking_claim_and_disable();
+ Impl::ParallelReduce< FunctorType, ExecPolicy > closure( functor , ExecPolicy(0,work_count) , result_view );
+ Kokkos::Impl::shared_allocation_tracking_release_and_enable();
+
+ closure.execute();
#ifdef KOKKOSP_ENABLE_PROFILING
if(Kokkos::Experimental::profileLibraryLoaded()) {
Kokkos::Experimental::endParallelReduce(kpID);
}
#endif
}
// integral range policy and pod or array of pod output
template< class FunctorType >
inline
void parallel_reduce( const size_t work_count
, const FunctorType & functor
, typename Kokkos::Impl::FunctorValueTraits<
typename Impl::if_c<Impl::is_execution_policy<FunctorType>::value ||
Impl::is_integral<FunctorType>::value,
void,FunctorType>::type
, void >::reference_type result
, const std::string& str = ""
, typename Impl::enable_if< true
#ifdef KOKKOS_HAVE_CUDA
&& ! Impl::is_same<
typename Impl::FunctorPolicyExecutionSpace< FunctorType , void >::execution_space,
Kokkos::Cuda>::value
#endif
>::type * = 0 )
{
typedef Kokkos::Impl::FunctorValueTraits< FunctorType , void > ValueTraits ;
typedef Kokkos::Impl::FunctorValueOps< FunctorType , void > ValueOps ;
typedef typename
Kokkos::Impl::FunctorPolicyExecutionSpace< FunctorType , void >::execution_space
execution_space ;
typedef Kokkos::RangePolicy< execution_space > policy ;
// Wrap the result output request in a view to inform the implementation
// of the type and memory space.
typedef typename Kokkos::Impl::if_c< (ValueTraits::StaticValueSize != 0)
, typename ValueTraits::value_type
, typename ValueTraits::pointer_type
>::type value_type ;
Kokkos::View< value_type
, HostSpace
, Kokkos::MemoryUnmanaged
>
result_view( ValueOps::pointer( result )
, ValueTraits::value_count( functor )
);
#ifdef KOKKOSP_ENABLE_PROFILING
uint64_t kpID = 0;
if(Kokkos::Experimental::profileLibraryLoaded()) {
Kokkos::Experimental::beginParallelReduce("" == str ? typeid(FunctorType).name() : str, 0, &kpID);
}
#endif
- (void) Impl::ParallelReduce< FunctorType , policy >( Impl::CopyWithoutTracking::apply(functor) , policy(0,work_count) , Impl::CopyWithoutTracking::apply(result_view) );
+ Kokkos::Impl::shared_allocation_tracking_claim_and_disable();
+ Impl::ParallelReduce< FunctorType , policy > closure( functor , policy(0,work_count) , result_view );
+ Kokkos::Impl::shared_allocation_tracking_release_and_enable();
+
+ closure.execute();
#ifdef KOKKOSP_ENABLE_PROFILING
if(Kokkos::Experimental::profileLibraryLoaded()) {
Kokkos::Experimental::endParallelReduce(kpID);
}
#endif
}
#ifndef KOKKOS_HAVE_CUDA
template< class ExecPolicy , class FunctorType , class ResultType >
inline
void parallel_reduce( const std::string & str
, const ExecPolicy & policy
, const FunctorType & functor
, ResultType * result)
{
#if KOKKOS_ENABLE_DEBUG_PRINT_KERNEL_NAMES
Kokkos::fence();
std::cout << "KOKKOS_DEBUG Start parallel_reduce kernel: " << str << std::endl;
#endif
parallel_reduce(policy,functor,result,str);
#if KOKKOS_ENABLE_DEBUG_PRINT_KERNEL_NAMES
Kokkos::fence();
std::cout << "KOKKOS_DEBUG End parallel_reduce kernel: " << str << std::endl;
#endif
(void) str;
}
template< class ExecPolicy , class FunctorType , class ResultType >
inline
void parallel_reduce( const std::string & str
, const ExecPolicy & policy
, const FunctorType & functor
, ResultType & result)
{
#if KOKKOS_ENABLE_DEBUG_PRINT_KERNEL_NAMES
Kokkos::fence();
std::cout << "KOKKOS_DEBUG Start parallel_reduce kernel: " << str << std::endl;
#endif
parallel_reduce(policy,functor,result,str);
#if KOKKOS_ENABLE_DEBUG_PRINT_KERNEL_NAMES
Kokkos::fence();
std::cout << "KOKKOS_DEBUG End parallel_reduce kernel: " << str << std::endl;
#endif
(void) str;
}
template< class ExecPolicy , class FunctorType >
inline
void parallel_reduce( const std::string & str
, const ExecPolicy & policy
, const FunctorType & functor)
{
#if KOKKOS_ENABLE_DEBUG_PRINT_KERNEL_NAMES
Kokkos::fence();
std::cout << "KOKKOS_DEBUG Start parallel_reduce kernel: " << str << std::endl;
#endif
parallel_reduce(policy,functor,str);
#if KOKKOS_ENABLE_DEBUG_PRINT_KERNEL_NAMES
Kokkos::fence();
std::cout << "KOKKOS_DEBUG End parallel_reduce kernel: " << str << std::endl;
#endif
(void) str;
}
#endif
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
/// \fn parallel_scan
/// \tparam ExecutionPolicy The execution policy type.
/// \tparam FunctorType The scan functor type.
///
/// \param policy [in] The execution policy.
/// \param functor [in] The scan functor.
///
/// This function implements a parallel scan pattern. The scan can
/// be either inclusive or exclusive, depending on how you implement
/// the scan functor.
///
/// A scan functor looks almost exactly like a reduce functor, except
/// that its operator() takes a third \c bool argument, \c final_pass,
/// which indicates whether this is the last pass of the scan
/// operation. We will show below how to use the \c final_pass
/// argument to control whether the scan is inclusive or exclusive.
///
/// Here is the minimum required interface of a scan functor for a POD
/// (plain old data) value type \c PodType. That is, the result is a
/// View of zero or more PodType. It is also possible for the result
/// to be an array of (same-sized) arrays of PodType, but we do not
/// show the required interface for that here.
/// \code
/// template< class ExecPolicy , class FunctorType >
/// class ScanFunctor {
/// public:
/// // The Kokkos device type
/// typedef ... execution_space;
/// // Type of an entry of the array containing the result;
/// // also the type of each of the entries combined using
/// // operator() or join().
/// typedef PodType value_type;
///
/// void operator () (const ExecPolicy::member_type & i, value_type& update, const bool final_pass) const;
/// void init (value_type& update) const;
/// void join (volatile value_type& update, volatile const value_type& input) const
/// };
/// \endcode
///
/// Here is an example of a functor which computes an inclusive plus-scan
/// of an array of \c int, in place. If given an array [1, 2, 3, 4], this
/// scan will overwrite that array with [1, 3, 6, 10].
///
/// \code
/// template<class SpaceType>
/// class InclScanFunctor {
/// public:
/// typedef SpaceType execution_space;
/// typedef int value_type;
/// typedef typename SpaceType::size_type size_type;
///
/// InclScanFunctor( Kokkos::View<value_type*, execution_space> x
/// , Kokkos::View<value_type*, execution_space> y ) : m_x(x), m_y(y) {}
///
/// void operator () (const size_type i, value_type& update, const bool final_pass) const {
/// update += m_x(i);
/// if (final_pass) {
/// m_y(i) = update;
/// }
/// }
/// void init (value_type& update) const {
/// update = 0;
/// }
/// void join (volatile value_type& update, volatile const value_type& input) const {
/// update += input;
/// }
///
/// private:
/// Kokkos::View<value_type*, execution_space> m_x;
/// Kokkos::View<value_type*, execution_space> m_y;
/// };
/// \endcode
///
/// Here is an example of a functor which computes an <i>exclusive</i>
/// scan of an array of \c int, in place. In operator(), note both
/// that the final_pass test and the update have switched places, and
/// the use of a temporary. If given an array [1, 2, 3, 4], this scan
/// will overwrite that array with [0, 1, 3, 6].
///
/// \code
/// template<class SpaceType>
/// class ExclScanFunctor {
/// public:
/// typedef SpaceType execution_space;
/// typedef int value_type;
/// typedef typename SpaceType::size_type size_type;
///
/// ExclScanFunctor (Kokkos::View<value_type*, execution_space> x) : x_ (x) {}
///
/// void operator () (const size_type i, value_type& update, const bool final_pass) const {
/// const value_type x_i = x_(i);
/// if (final_pass) {
/// x_(i) = update;
/// }
/// update += x_i;
/// }
/// void init (value_type& update) const {
/// update = 0;
/// }
/// void join (volatile value_type& update, volatile const value_type& input) const {
/// update += input;
/// }
///
/// private:
/// Kokkos::View<value_type*, execution_space> x_;
/// };
/// \endcode
///
/// Here is an example of a functor which builds on the above
/// exclusive scan example, to compute an offsets array from a
/// population count array, in place. We assume that the pop count
/// array has an extra entry at the end to store the final count. If
/// given an array [1, 2, 3, 4, 0], this scan will overwrite that
/// array with [0, 1, 3, 6, 10].
///
/// \code
/// template<class SpaceType>
/// class OffsetScanFunctor {
/// public:
/// typedef SpaceType execution_space;
/// typedef int value_type;
/// typedef typename SpaceType::size_type size_type;
///
/// // lastIndex_ is the last valid index (zero-based) of x.
/// // If x has length zero, then lastIndex_ won't be used anyway.
/// OffsetScanFunctor( Kokkos::View<value_type*, execution_space> x
/// , Kokkos::View<value_type*, execution_space> y )
/// : m_x(x), m_y(y), last_index_ (x.dimension_0 () == 0 ? 0 : x.dimension_0 () - 1)
/// {}
///
/// void operator () (const size_type i, int& update, const bool final_pass) const {
/// if (final_pass) {
/// m_y(i) = update;
/// }
/// update += m_x(i);
/// // The last entry of m_y gets the final sum.
/// if (final_pass && i == last_index_) {
/// m_y(i+1) = update;
/// }
/// }
/// void init (value_type& update) const {
/// update = 0;
/// }
/// void join (volatile value_type& update, volatile const value_type& input) const {
/// update += input;
/// }
///
/// private:
/// Kokkos::View<value_type*, execution_space> m_x;
/// Kokkos::View<value_type*, execution_space> m_y;
/// const size_type last_index_;
/// };
/// \endcode
///
template< class ExecutionPolicy , class FunctorType >
inline
void parallel_scan( const ExecutionPolicy & policy
, const FunctorType & functor
, const std::string& str = ""
, typename Impl::enable_if< ! Impl::is_integral< ExecutionPolicy >::value >::type * = 0
)
{
#ifdef KOKKOSP_ENABLE_PROFILING
uint64_t kpID = 0;
if(Kokkos::Experimental::profileLibraryLoaded()) {
Kokkos::Experimental::beginParallelScan("" == str ? typeid(FunctorType).name() : str, 0, &kpID);
}
#endif
- Impl::ParallelScan< FunctorType , ExecutionPolicy > scan( Impl::CopyWithoutTracking::apply(functor) , policy );
+ Kokkos::Impl::shared_allocation_tracking_claim_and_disable();
+ Impl::ParallelScan< FunctorType , ExecutionPolicy > closure( functor , policy );
+ Kokkos::Impl::shared_allocation_tracking_release_and_enable();
+
+ closure.execute();
#ifdef KOKKOSP_ENABLE_PROFILING
if(Kokkos::Experimental::profileLibraryLoaded()) {
Kokkos::Experimental::endParallelScan(kpID);
}
#endif
}
template< class FunctorType >
inline
void parallel_scan( const size_t work_count
, const FunctorType & functor
, const std::string& str = "" )
{
typedef typename
Kokkos::Impl::FunctorPolicyExecutionSpace< FunctorType , void >::execution_space
execution_space ;
typedef Kokkos::RangePolicy< execution_space > policy ;
#ifdef KOKKOSP_ENABLE_PROFILING
uint64_t kpID = 0;
if(Kokkos::Experimental::profileLibraryLoaded()) {
Kokkos::Experimental::beginParallelScan("" == str ? typeid(FunctorType).name() : str, 0, &kpID);
}
#endif
- (void) Impl::ParallelScan< FunctorType , policy >( Impl::CopyWithoutTracking::apply(functor) , policy(0,work_count) );
+ Kokkos::Impl::shared_allocation_tracking_claim_and_disable();
+ Impl::ParallelScan< FunctorType , policy > closure( functor , policy(0,work_count) );
+ Kokkos::Impl::shared_allocation_tracking_release_and_enable();
+
+ closure.execute();
#ifdef KOKKOSP_ENABLE_PROFILING
if(Kokkos::Experimental::profileLibraryLoaded()) {
Kokkos::Experimental::endParallelScan(kpID);
}
#endif
}
template< class ExecutionPolicy , class FunctorType >
inline
void parallel_scan( const std::string& str
, const ExecutionPolicy & policy
, const FunctorType & functor)
{
#if KOKKOS_ENABLE_DEBUG_PRINT_KERNEL_NAMES
Kokkos::fence();
std::cout << "KOKKOS_DEBUG Start parallel_scan kernel: " << str << std::endl;
#endif
parallel_scan(policy,functor,str);
#if KOKKOS_ENABLE_DEBUG_PRINT_KERNEL_NAMES
Kokkos::fence();
std::cout << "KOKKOS_DEBUG End parallel_scan kernel: " << str << std::endl;
#endif
(void) str;
}
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
template< class FunctorType , class Enable = void >
struct FunctorTeamShmemSize
{
static inline size_t value( const FunctorType & , int ) { return 0 ; }
};
template< class FunctorType >
struct FunctorTeamShmemSize< FunctorType , typename Impl::enable_if< 0 < sizeof( & FunctorType::team_shmem_size ) >::type >
{
static inline size_t value( const FunctorType & f , int team_size ) { return f.team_shmem_size( team_size ) ; }
};
template< class FunctorType >
struct FunctorTeamShmemSize< FunctorType , typename Impl::enable_if< 0 < sizeof( & FunctorType::shmem_size ) >::type >
{
static inline size_t value( const FunctorType & f , int team_size ) { return f.shmem_size( team_size ) ; }
};
} // namespace Impl
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#endif /* KOKKOS_PARALLEL_HPP */
diff --git a/lib/kokkos/core/src/Kokkos_Serial.hpp b/lib/kokkos/core/src/Kokkos_Serial.hpp
index 5773a18b3..8be973d44 100644
--- a/lib/kokkos/core/src/Kokkos_Serial.hpp
+++ b/lib/kokkos/core/src/Kokkos_Serial.hpp
@@ -1,892 +1,1003 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
/// \file Kokkos_Serial.hpp
/// \brief Declaration and definition of Kokkos::Serial device.
#ifndef KOKKOS_SERIAL_HPP
#define KOKKOS_SERIAL_HPP
#include <cstddef>
#include <iosfwd>
#include <Kokkos_Parallel.hpp>
#include <Kokkos_Layout.hpp>
#include <Kokkos_HostSpace.hpp>
#include <Kokkos_ScratchSpace.hpp>
#include <Kokkos_MemoryTraits.hpp>
#include <impl/Kokkos_Tags.hpp>
#include <impl/Kokkos_FunctorAdapter.hpp>
#if defined( KOKKOS_HAVE_SERIAL )
namespace Kokkos {
/// \class Serial
/// \brief Kokkos device for non-parallel execution
///
/// A "device" represents a parallel execution model. It tells Kokkos
/// how to parallelize the execution of kernels in a parallel_for or
/// parallel_reduce. For example, the Threads device uses Pthreads or
/// C++11 threads on a CPU, the OpenMP device uses the OpenMP language
/// extensions, and the Cuda device uses NVIDIA's CUDA programming
/// model. The Serial device executes "parallel" kernels
/// sequentially. This is useful if you really do not want to use
/// threads, or if you want to explore different combinations of MPI
/// and shared-memory parallel programming models.
class Serial {
public:
//! \name Type declarations that all Kokkos devices must provide.
//@{
//! Tag this class as an execution space:
typedef Serial execution_space ;
//! The size_type typedef best suited for this device.
typedef HostSpace::size_type size_type ;
//! This device's preferred memory space.
typedef HostSpace memory_space ;
//! This execution space preferred device_type
typedef Kokkos::Device<execution_space,memory_space> device_type;
//! This device's preferred array layout.
typedef LayoutRight array_layout ;
/// \brief Scratch memory space
typedef ScratchMemorySpace< Kokkos::Serial > scratch_memory_space ;
//@}
/// \brief True if and only if this method is being called in a
/// thread-parallel function.
///
/// For the Serial device, this method <i>always</i> returns false,
/// because parallel_for or parallel_reduce with the Serial device
/// always execute sequentially.
inline static int in_parallel() { return false ; }
/** \brief Set the device in a "sleep" state.
*
* This function sets the device in a "sleep" state in which it is
* not ready for work. This may consume less resources than if the
* device were in an "awake" state, but it may also take time to
* bring the device from a sleep state to be ready for work.
*
* \return True if the device is in the "sleep" state, else false if
* the device is actively working and could not enter the "sleep"
* state.
*/
static bool sleep();
/// \brief Wake the device from the 'sleep' state so it is ready for work.
///
/// \return True if the device is in the "ready" state, else "false"
/// if the device is actively working (which also means that it's
/// awake).
static bool wake();
/// \brief Wait until all dispatched functors complete.
///
/// The parallel_for or parallel_reduce dispatch of a functor may
/// return asynchronously, before the functor completes. This
/// method does not return until all dispatched functors on this
/// device have completed.
static void fence() {}
static void initialize( unsigned threads_count = 1 ,
unsigned use_numa_count = 0 ,
unsigned use_cores_per_numa = 0 ,
bool allow_asynchronous_threadpool = false) {
(void) threads_count;
(void) use_numa_count;
(void) use_cores_per_numa;
(void) allow_asynchronous_threadpool;
// Init the array of locks used for arbitrarily sized atomics
Impl::init_lock_array_host_space();
}
static int is_initialized() { return 1 ; }
//! Free any resources being consumed by the device.
static void finalize() {}
//! Print configuration information to the given output stream.
- static void print_configuration( std::ostream & , const bool detail = false ) {}
+ static void print_configuration( std::ostream & , const bool /* detail */ = false ) {}
//--------------------------------------------------------------------------
inline static int thread_pool_size( int = 0 ) { return 1 ; }
KOKKOS_INLINE_FUNCTION static int thread_pool_rank() { return 0 ; }
//--------------------------------------------------------------------------
KOKKOS_INLINE_FUNCTION static unsigned hardware_thread_id() { return thread_pool_rank(); }
inline static unsigned max_hardware_threads() { return thread_pool_size(0); }
//--------------------------------------------------------------------------
static void * scratch_memory_resize( unsigned reduce_size , unsigned shared_size );
//--------------------------------------------------------------------------
};
} // namespace Kokkos
/*--------------------------------------------------------------------------*/
/*--------------------------------------------------------------------------*/
namespace Kokkos {
namespace Impl {
template<>
struct VerifyExecutionCanAccessMemorySpace
< Kokkos::Serial::memory_space
, Kokkos::Serial::scratch_memory_space
>
{
enum { value = true };
inline static void verify( void ) { }
inline static void verify( const void * ) { }
};
namespace SerialImpl {
struct Sentinel {
void * m_scratch ;
unsigned m_reduce_end ;
unsigned m_shared_end ;
Sentinel();
~Sentinel();
static Sentinel & singleton();
};
inline
unsigned align( unsigned n );
}
} // namespace Impl
} // namespace Kokkos
/*--------------------------------------------------------------------------*/
/*--------------------------------------------------------------------------*/
namespace Kokkos {
namespace Impl {
class SerialTeamMember {
private:
typedef Kokkos::ScratchMemorySpace< Kokkos::Serial > scratch_memory_space ;
const scratch_memory_space m_space ;
const int m_league_rank ;
const int m_league_size ;
SerialTeamMember & operator = ( const SerialTeamMember & );
public:
KOKKOS_INLINE_FUNCTION
const scratch_memory_space & team_shmem() const { return m_space ; }
KOKKOS_INLINE_FUNCTION int league_rank() const { return m_league_rank ; }
KOKKOS_INLINE_FUNCTION int league_size() const { return m_league_size ; }
KOKKOS_INLINE_FUNCTION int team_rank() const { return 0 ; }
KOKKOS_INLINE_FUNCTION int team_size() const { return 1 ; }
KOKKOS_INLINE_FUNCTION void team_barrier() const {}
template<class ValueType>
KOKKOS_INLINE_FUNCTION
void team_broadcast(const ValueType& , const int& ) const {}
template< class ValueType, class JoinOp >
KOKKOS_INLINE_FUNCTION
ValueType team_reduce( const ValueType & value , const JoinOp & ) const
{
return value ;
}
/** \brief Intra-team exclusive prefix sum with team_rank() ordering
* with intra-team non-deterministic ordering accumulation.
*
* The global inter-team accumulation value will, at the end of the
* league's parallel execution, be the scan's total.
* Parallel execution ordering of the league's teams is non-deterministic.
* As such the base value for each team's scan operation is similarly
* non-deterministic.
*/
template< typename Type >
KOKKOS_INLINE_FUNCTION Type team_scan( const Type & value , Type * const global_accum ) const
{
const Type tmp = global_accum ? *global_accum : Type(0) ;
if ( global_accum ) { *global_accum += value ; }
return tmp ;
}
/** \brief Intra-team exclusive prefix sum with team_rank() ordering.
*
* The highest rank thread can compute the reduction total as
* reduction_total = dev.team_scan( value ) + value ;
*/
template< typename Type >
KOKKOS_INLINE_FUNCTION Type team_scan( const Type & ) const
{ return Type(0); }
//----------------------------------------
// Execution space specific:
SerialTeamMember( int arg_league_rank
, int arg_league_size
, int arg_shared_size
);
};
} // namespace Impl
/*
* < Kokkos::Serial , WorkArgTag >
* < WorkArgTag , Impl::enable_if< Impl::is_same< Kokkos::Serial , Kokkos::DefaultExecutionSpace >::value >::type >
*
*/
template< class Arg0 , class Arg1 >
class TeamPolicy< Arg0 , Arg1 , Kokkos::Serial >
{
private:
const int m_league_size ;
+ const int m_scratch_size ;
public:
//! Tag this class as a kokkos execution policy
typedef TeamPolicy execution_policy ;
//! Execution space of this execution policy:
typedef Kokkos::Serial execution_space ;
typedef typename
Impl::if_c< ! Impl::is_same< Kokkos::Serial , Arg0 >::value , Arg0 , Arg1 >::type
work_tag ;
//----------------------------------------
template< class FunctorType >
static
int team_size_max( const FunctorType & ) { return 1 ; }
template< class FunctorType >
static
int team_size_recommended( const FunctorType & ) { return 1 ; }
template< class FunctorType >
static
int team_size_recommended( const FunctorType & , const int& ) { return 1 ; }
//----------------------------------------
inline int team_size() const { return 1 ; }
inline int league_size() const { return m_league_size ; }
+ inline size_t scratch_size() const { return m_scratch_size ; }
/** \brief Specify league size, request team size */
- TeamPolicy( execution_space & , int league_size_request , int /* team_size_request */ , int vector_length_request = 1 )
+ TeamPolicy( execution_space &
+ , int league_size_request
+ , int /* team_size_request */
+ , int /* vector_length_request */ = 1 )
: m_league_size( league_size_request )
- { (void) vector_length_request; }
+ , m_scratch_size ( 0 )
+ {}
+
+ TeamPolicy( execution_space &
+ , int league_size_request
+ , const Kokkos::AUTO_t & /* team_size_request */
+ , int /* vector_length_request */ = 1 )
+ : m_league_size( league_size_request )
+ , m_scratch_size ( 0 )
+ {}
- TeamPolicy( int league_size_request , int /* team_size_request */ , int vector_length_request = 1 )
+ TeamPolicy( int league_size_request
+ , int /* team_size_request */
+ , int /* vector_length_request */ = 1 )
: m_league_size( league_size_request )
- { (void) vector_length_request; }
+ , m_scratch_size ( 0 )
+ {}
+
+ TeamPolicy( int league_size_request
+ , const Kokkos::AUTO_t & /* team_size_request */
+ , int /* vector_length_request */ = 1 )
+ : m_league_size( league_size_request )
+ , m_scratch_size ( 0 )
+ {}
+
+ template<class MemorySpace>
+ TeamPolicy( int league_size_request
+ , int /* team_size_request */
+ , const Experimental::TeamScratchRequest<MemorySpace> & scratch_request )
+ : m_league_size(league_size_request)
+ , m_scratch_size(scratch_request.total(1))
+ {}
+
+
+ template<class MemorySpace>
+ TeamPolicy( int league_size_request
+ , const Kokkos::AUTO_t & /* team_size_request */
+ , const Experimental::TeamScratchRequest<MemorySpace> & scratch_request )
+ : m_league_size(league_size_request)
+ , m_scratch_size(scratch_request.total(1))
+ {}
typedef Impl::SerialTeamMember member_type ;
};
} /* namespace Kokkos */
/*--------------------------------------------------------------------------*/
/*--------------------------------------------------------------------------*/
/*--------------------------------------------------------------------------*/
/*--------------------------------------------------------------------------*/
+/* Parallel patterns for Kokkos::Serial with RangePolicy */
namespace Kokkos {
namespace Impl {
template< class FunctorType , class Arg0 , class Arg1 , class Arg2 >
-class ParallelFor< FunctorType , Kokkos::RangePolicy< Arg0 , Arg1 , Arg2 , Kokkos::Serial > >
+class ParallelFor< FunctorType
+ , Kokkos::RangePolicy< Arg0 , Arg1 , Arg2 , Kokkos::Serial >
+ >
{
private:
typedef Kokkos::RangePolicy< Arg0 , Arg1 , Arg2 , Kokkos::Serial > Policy ;
-public:
- // work tag is void
- template< class PType >
- inline
- ParallelFor( typename Impl::enable_if<
- ( Impl::is_same< PType , Policy >::value &&
- Impl::is_same< typename PType::work_tag , void >::value
- ), const FunctorType & >::type functor
- , const PType & policy )
+ const FunctorType m_functor ;
+ const Policy m_policy ;
+
+ template< class TagType >
+ KOKKOS_INLINE_FUNCTION
+ typename std::enable_if< std::is_same< TagType , void >::value >::type
+ exec() const
{
- const typename PType::member_type e = policy.end();
- for ( typename PType::member_type i = policy.begin() ; i < e ; ++i ) {
- functor( i );
+ const typename Policy::member_type e = m_policy.end();
+ for ( typename Policy::member_type i = m_policy.begin() ; i < e ; ++i ) {
+ m_functor( i );
}
}
- // work tag is non-void
- template< class PType >
- inline
- ParallelFor( typename Impl::enable_if<
- ( Impl::is_same< PType , Policy >::value &&
- ! Impl::is_same< typename PType::work_tag , void >::value
- ), const FunctorType & >::type functor
- , const PType & policy )
+ template< class TagType >
+ KOKKOS_INLINE_FUNCTION
+ typename std::enable_if< ! std::is_same< TagType , void >::value >::type
+ exec() const
{
- const typename PType::member_type e = policy.end();
- for ( typename PType::member_type i = policy.begin() ; i < e ; ++i ) {
- functor( typename PType::work_tag() , i );
+ const TagType t{} ;
+ const typename Policy::member_type e = m_policy.end();
+ for ( typename Policy::member_type i = m_policy.begin() ; i < e ; ++i ) {
+ m_functor( t , i );
}
}
+
+public:
+
+ inline
+ void execute() const
+ { this-> template exec< typename Policy::work_tag >(); }
+
+ inline
+ ParallelFor( const FunctorType & arg_functor
+ , const Policy & arg_policy )
+ : m_functor( arg_functor )
+ , m_policy( arg_policy )
+ {}
};
+/*--------------------------------------------------------------------------*/
+
template< class FunctorType , class Arg0 , class Arg1 , class Arg2 >
-class ParallelReduce< FunctorType , Kokkos::RangePolicy< Arg0 , Arg1 , Arg2 , Kokkos::Serial > >
+class ParallelReduce< FunctorType
+ , Kokkos::RangePolicy< Arg0 , Arg1 , Arg2 , Kokkos::Serial >
+ >
{
-public:
+private:
+
typedef Kokkos::RangePolicy< Arg0 , Arg1 , Arg2 , Kokkos::Serial > Policy ;
typedef typename Policy::work_tag WorkTag ;
typedef Kokkos::Impl::FunctorValueTraits< FunctorType , WorkTag > ValueTraits ;
typedef Kokkos::Impl::FunctorValueInit< FunctorType , WorkTag > ValueInit ;
typedef typename ValueTraits::pointer_type pointer_type ;
typedef typename ValueTraits::reference_type reference_type ;
- // Work tag is void
- template< class ViewType , class PType >
- ParallelReduce( typename Impl::enable_if<
- ( Impl::is_view< ViewType >::value &&
- Impl::is_same< typename ViewType::memory_space , HostSpace >::value &&
- Impl::is_same< PType , Policy >::value &&
- Impl::is_same< typename PType::work_tag , void >::value
- ), const FunctorType & >::type functor
- , const PType & policy
- , const ViewType & result
- )
- {
- pointer_type result_ptr = result.ptr_on_device();
+ const FunctorType m_functor ;
+ const Policy m_policy ;
+ const pointer_type m_result_ptr ;
- if ( ! result_ptr ) {
- result_ptr = (pointer_type)
- Kokkos::Serial::scratch_memory_resize( ValueTraits::value_size( functor ) , 0 );
- }
- reference_type update = ValueInit::init( functor , result_ptr );
+ template< class TagType >
+ inline
+ typename std::enable_if< std::is_same< TagType , void >::value >::type
+ exec( pointer_type ptr ) const
+ {
+ reference_type update = ValueInit::init( m_functor , ptr );
- const typename PType::member_type e = policy.end();
- for ( typename PType::member_type i = policy.begin() ; i < e ; ++i ) {
- functor( i , update );
+ const typename Policy::member_type e = m_policy.end();
+ for ( typename Policy::member_type i = m_policy.begin() ; i < e ; ++i ) {
+ m_functor( i , update );
}
- Kokkos::Impl::FunctorFinal< FunctorType , WorkTag >::final( functor , result_ptr );
+ Kokkos::Impl::FunctorFinal< FunctorType , TagType >::
+ final( m_functor , ptr );
}
- // Work tag is non-void
- template< class ViewType , class PType >
- ParallelReduce( typename Impl::enable_if<
- ( Impl::is_view< ViewType >::value &&
- Impl::is_same< typename ViewType::memory_space , HostSpace >::value &&
- Impl::is_same< PType , Policy >::value &&
- ! Impl::is_same< typename PType::work_tag , void >::value
- ), const FunctorType & >::type functor
- , const PType & policy
- , const ViewType & result
- )
+ template< class TagType >
+ inline
+ typename std::enable_if< ! std::is_same< TagType , void >::value >::type
+ exec( pointer_type ptr ) const
{
- pointer_type result_ptr = result.ptr_on_device();
+ const TagType t{} ;
+ reference_type update = ValueInit::init( m_functor , ptr );
- if ( ! result_ptr ) {
- result_ptr = (pointer_type)
- Kokkos::Serial::scratch_memory_resize( ValueTraits::value_size( functor ) , 0 );
+ const typename Policy::member_type e = m_policy.end();
+ for ( typename Policy::member_type i = m_policy.begin() ; i < e ; ++i ) {
+ m_functor( t , i , update );
}
- typename ValueTraits::reference_type update = ValueInit::init( functor , result_ptr );
+ Kokkos::Impl::FunctorFinal< FunctorType , TagType >::
+ final( m_functor , ptr );
+ }
- const typename PType::member_type e = policy.end();
- for ( typename PType::member_type i = policy.begin() ; i < e ; ++i ) {
- functor( typename PType::work_tag() , i , update );
- }
+public:
- Kokkos::Impl::FunctorFinal< FunctorType , WorkTag >::final( functor , result_ptr );
+ inline
+ void execute() const
+ {
+ pointer_type ptr = (pointer_type) Kokkos::Serial::scratch_memory_resize
+ ( ValueTraits::value_size( m_functor ) , 0 );
+
+ this-> template exec< WorkTag >( m_result_ptr ? m_result_ptr : ptr );
+ }
+
+ template< class ViewType >
+ ParallelReduce( const FunctorType & arg_functor
+ , const Policy & arg_policy
+ , const ViewType & arg_result )
+ : m_functor( arg_functor )
+ , m_policy( arg_policy )
+ , m_result_ptr( arg_result.ptr_on_device() )
+ {
+ static_assert( Kokkos::is_view< ViewType >::value
+ , "Reduction result on Kokkos::Serial must be a Kokkos::View" );
+
+ static_assert( std::is_same< typename ViewType::memory_space
+ , Kokkos::HostSpace >::value
+ , "Reduction result on Kokkos::Serial must be a Kokkos::View in HostSpace" );
}
};
+/*--------------------------------------------------------------------------*/
+
template< class FunctorType , class Arg0 , class Arg1 , class Arg2 >
-class ParallelScan< FunctorType , Kokkos::RangePolicy< Arg0 , Arg1 , Arg2 , Kokkos::Serial > >
+class ParallelScan< FunctorType
+ , Kokkos::RangePolicy< Arg0 , Arg1 , Arg2 , Kokkos::Serial >
+ >
{
private:
typedef Kokkos::RangePolicy< Arg0 , Arg1 , Arg2 , Kokkos::Serial > Policy ;
-
- typedef Kokkos::Impl::FunctorValueTraits< FunctorType , typename Policy::work_tag > ValueTraits ;
- typedef Kokkos::Impl::FunctorValueInit< FunctorType , typename Policy::work_tag > ValueInit ;
-
-public:
+ typedef typename Policy::work_tag WorkTag ;
+ typedef Kokkos::Impl::FunctorValueTraits< FunctorType , WorkTag > ValueTraits ;
+ typedef Kokkos::Impl::FunctorValueInit< FunctorType , WorkTag > ValueInit ;
typedef typename ValueTraits::pointer_type pointer_type ;
typedef typename ValueTraits::reference_type reference_type ;
- // work tag is void
- template< class PType >
+ const FunctorType m_functor ;
+ const Policy m_policy ;
+
+ template< class TagType >
inline
- ParallelScan( typename Impl::enable_if<
- ( Impl::is_same< PType , Policy >::value &&
- Impl::is_same< typename PType::work_tag , void >::value
- ), const FunctorType & >::type functor
- , const PType & policy )
+ typename std::enable_if< std::is_same< TagType , void >::value >::type
+ exec( pointer_type ptr ) const
{
- pointer_type result_ptr = (pointer_type)
- Kokkos::Serial::scratch_memory_resize( ValueTraits::value_size( functor ) , 0 );
-
- reference_type update = ValueInit::init( functor , result_ptr );
+ reference_type update = ValueInit::init( m_functor , ptr );
- const typename PType::member_type e = policy.end();
- for ( typename PType::member_type i = policy.begin() ; i < e ; ++i ) {
- functor( i , update , true );
+ const typename Policy::member_type e = m_policy.end();
+ for ( typename Policy::member_type i = m_policy.begin() ; i < e ; ++i ) {
+ m_functor( i , update , true );
}
-
- Kokkos::Impl::FunctorFinal< FunctorType , typename Policy::work_tag >::final( functor , result_ptr );
}
- // work tag is non-void
- template< class PType >
+ template< class TagType >
inline
- ParallelScan( typename Impl::enable_if<
- ( Impl::is_same< PType , Policy >::value &&
- ! Impl::is_same< typename PType::work_tag , void >::value
- ), const FunctorType & >::type functor
- , const PType & policy )
+ typename std::enable_if< ! std::is_same< TagType , void >::value >::type
+ exec( pointer_type ptr ) const
{
- pointer_type result_ptr = (pointer_type)
- Kokkos::Serial::scratch_memory_resize( ValueTraits::value_size( functor ) , 0 );
+ const TagType t{} ;
+ reference_type update = ValueInit::init( m_functor , ptr );
- reference_type update = ValueInit::init( functor , result_ptr );
-
- const typename PType::member_type e = policy.end();
- for ( typename PType::member_type i = policy.begin() ; i < e ; ++i ) {
- functor( typename PType::work_tag() , i , update , true );
+ const typename Policy::member_type e = m_policy.end();
+ for ( typename Policy::member_type i = m_policy.begin() ; i < e ; ++i ) {
+ m_functor( t , i , update , true );
}
+ }
+
+public:
- Kokkos::Impl::FunctorFinal< FunctorType , typename Policy::work_tag >::final( functor , result_ptr );
+ inline
+ void execute() const
+ {
+ pointer_type ptr = (pointer_type)
+ Kokkos::Serial::scratch_memory_resize( ValueTraits::value_size( m_functor ) , 0 );
+ this-> template exec< WorkTag >( ptr );
}
+
+ inline
+ ParallelScan( const FunctorType & arg_functor
+ , const Policy & arg_policy
+ )
+ : m_functor( arg_functor )
+ , m_policy( arg_policy )
+ {}
};
} // namespace Impl
} // namespace Kokkos
/*--------------------------------------------------------------------------*/
/*--------------------------------------------------------------------------*/
+/* Parallel patterns for Kokkos::Serial with TeamPolicy */
namespace Kokkos {
namespace Impl {
template< class FunctorType , class Arg0 , class Arg1 >
-class ParallelFor< FunctorType , Kokkos::TeamPolicy< Arg0 , Arg1 , Kokkos::Serial > >
+class ParallelFor< FunctorType
+ , Kokkos::TeamPolicy< Arg0 , Arg1 , Kokkos::Serial >
+ >
{
private:
typedef Kokkos::TeamPolicy< Arg0 , Arg1 , Kokkos::Serial > Policy ;
+ typedef typename Policy::member_type Member ;
+
+ const FunctorType m_functor ;
+ const int m_league ;
+ const int m_shared ;
template< class TagType >
- KOKKOS_FORCEINLINE_FUNCTION static
- void driver( typename Impl::enable_if< Impl::is_same< TagType , void >::value ,
- const FunctorType & >::type functor
- , const typename Policy::member_type & member )
- { functor( member ); }
+ inline
+ typename std::enable_if< std::is_same< TagType , void >::value >::type
+ exec() const
+ {
+ for ( int ileague = 0 ; ileague < m_league ; ++ileague ) {
+ m_functor( Member(ileague,m_league,m_shared) );
+ }
+ }
template< class TagType >
- KOKKOS_FORCEINLINE_FUNCTION static
- void driver( typename Impl::enable_if< ! Impl::is_same< TagType , void >::value ,
- const FunctorType & >::type functor
- , const typename Policy::member_type & member )
- { functor( TagType() , member ); }
+ inline
+ typename std::enable_if< ! std::is_same< TagType , void >::value >::type
+ exec() const
+ {
+ const TagType t{} ;
+ for ( int ileague = 0 ; ileague < m_league ; ++ileague ) {
+ m_functor( t , Member(ileague,m_league,m_shared) );
+ }
+ }
public:
- ParallelFor( const FunctorType & functor
- , const Policy & policy )
+ inline
+ void execute() const
{
- const int shared_size = FunctorTeamShmemSize< FunctorType >::value( functor , policy.team_size() );
-
- Kokkos::Serial::scratch_memory_resize( 0 , shared_size );
-
- for ( int ileague = 0 ; ileague < policy.league_size() ; ++ileague ) {
- ParallelFor::template driver< typename Policy::work_tag >
- ( functor , typename Policy::member_type(ileague,policy.league_size(),shared_size) );
- // functor( typename Policy::member_type(ileague,policy.league_size(),shared_size) );
- }
+ Kokkos::Serial::scratch_memory_resize( 0 , m_shared );
+ this-> template exec< typename Policy::work_tag >();
}
+
+ ParallelFor( const FunctorType & arg_functor
+ , const Policy & arg_policy )
+ : m_functor( arg_functor )
+ , m_league( arg_policy.league_size() )
+ , m_shared( arg_policy.scratch_size() + FunctorTeamShmemSize< FunctorType >::value( arg_functor , 1 ) )
+ { }
};
+/*--------------------------------------------------------------------------*/
+
template< class FunctorType , class Arg0 , class Arg1 >
-class ParallelReduce< FunctorType , Kokkos::TeamPolicy< Arg0 , Arg1 , Kokkos::Serial > >
+class ParallelReduce< FunctorType
+ , Kokkos::TeamPolicy< Arg0 , Arg1 , Kokkos::Serial >
+ >
{
private:
typedef Kokkos::TeamPolicy< Arg0 , Arg1 , Kokkos::Serial > Policy ;
- typedef Kokkos::Impl::FunctorValueTraits< FunctorType , typename Policy::work_tag > ValueTraits ;
- typedef Kokkos::Impl::FunctorValueInit< FunctorType , typename Policy::work_tag > ValueInit ;
-
-public:
+ typedef typename Policy::member_type Member ;
+ typedef typename Policy::work_tag WorkTag ;
+ typedef Kokkos::Impl::FunctorValueTraits< FunctorType , WorkTag > ValueTraits ;
+ typedef Kokkos::Impl::FunctorValueInit< FunctorType , WorkTag > ValueInit ;
typedef typename ValueTraits::pointer_type pointer_type ;
typedef typename ValueTraits::reference_type reference_type ;
-private:
+ const FunctorType m_functor ;
+ const int m_league ;
+ const int m_shared ;
+ pointer_type m_result_ptr ;
template< class TagType >
- KOKKOS_FORCEINLINE_FUNCTION static
- void driver( typename Impl::enable_if< Impl::is_same< TagType , void >::value ,
- const FunctorType & >::type functor
- , const typename Policy::member_type & member
- , reference_type update )
- { functor( member , update ); }
+ inline
+ typename std::enable_if< std::is_same< TagType , void >::value >::type
+ exec( pointer_type ptr ) const
+ {
+ reference_type update = ValueInit::init( m_functor , ptr );
+
+ for ( int ileague = 0 ; ileague < m_league ; ++ileague ) {
+ m_functor( Member(ileague,m_league,m_shared) , update );
+ }
+
+ Kokkos::Impl::FunctorFinal< FunctorType , TagType >::
+ final( m_functor , ptr );
+ }
template< class TagType >
- KOKKOS_FORCEINLINE_FUNCTION static
- void driver( typename Impl::enable_if< ! Impl::is_same< TagType , void >::value ,
- const FunctorType & >::type functor
- , const typename Policy::member_type & member
- , reference_type update )
- { functor( TagType() , member , update ); }
+ inline
+ typename std::enable_if< ! std::is_same< TagType , void >::value >::type
+ exec( pointer_type ptr ) const
+ {
+ const TagType t{} ;
+
+ reference_type update = ValueInit::init( m_functor , ptr );
+
+ for ( int ileague = 0 ; ileague < m_league ; ++ileague ) {
+ m_functor( t , Member(ileague,m_league,m_shared) , update );
+ }
+
+ Kokkos::Impl::FunctorFinal< FunctorType , TagType >::
+ final( m_functor , ptr );
+ }
public:
- template< class ViewType >
- ParallelReduce( const FunctorType & functor
- , const Policy & policy
- , const ViewType & result
- )
+ inline
+ void execute() const
{
- const int reduce_size = ValueTraits::value_size( functor );
- const int shared_size = FunctorTeamShmemSize< FunctorType >::value( functor , policy.team_size() );
- void * const scratch_reduce = Kokkos::Serial::scratch_memory_resize( reduce_size , shared_size );
+ pointer_type ptr = (pointer_type) Kokkos::Serial::scratch_memory_resize
+ ( ValueTraits::value_size( m_functor ) , m_shared );
- const pointer_type result_ptr =
- result.ptr_on_device() ? result.ptr_on_device()
- : (pointer_type) scratch_reduce ;
-
- reference_type update = ValueInit::init( functor , result_ptr );
+ this-> template exec< WorkTag >( m_result_ptr ? m_result_ptr : ptr );
+ }
- for ( int ileague = 0 ; ileague < policy.league_size() ; ++ileague ) {
- ParallelReduce::template driver< typename Policy::work_tag >
- ( functor , typename Policy::member_type(ileague,policy.league_size(),shared_size) , update );
- }
+ template< class ViewType >
+ ParallelReduce( const FunctorType & arg_functor
+ , const Policy & arg_policy
+ , const ViewType & arg_result
+ )
+ : m_functor( arg_functor )
+ , m_league( arg_policy.league_size() )
+ , m_shared( arg_policy.scratch_size() + FunctorTeamShmemSize< FunctorType >::value( m_functor , 1 ) )
+ , m_result_ptr( arg_result.ptr_on_device() )
+ {
+ static_assert( Kokkos::is_view< ViewType >::value
+ , "Reduction result on Kokkos::Serial must be a Kokkos::View" );
- Kokkos::Impl::FunctorFinal< FunctorType , typename Policy::work_tag >::final( functor , result_ptr );
+ static_assert( std::is_same< typename ViewType::memory_space
+ , Kokkos::HostSpace >::value
+ , "Reduction result on Kokkos::Serial must be a Kokkos::View in HostSpace" );
}
+
};
} // namespace Impl
} // namespace Kokkos
-namespace Kokkos {
+/*--------------------------------------------------------------------------*/
+/*--------------------------------------------------------------------------*/
+/* Nested parallel patterns for Kokkos::Serial with TeamPolicy */
+namespace Kokkos {
namespace Impl {
template<typename iType>
struct TeamThreadRangeBoundariesStruct<iType,SerialTeamMember> {
typedef iType index_type;
const iType begin ;
const iType end ;
enum {increment = 1};
const SerialTeamMember& thread;
KOKKOS_INLINE_FUNCTION
TeamThreadRangeBoundariesStruct (const SerialTeamMember& arg_thread, const iType& arg_count)
: begin(0)
, end(arg_count)
, thread(arg_thread)
{}
KOKKOS_INLINE_FUNCTION
TeamThreadRangeBoundariesStruct (const SerialTeamMember& arg_thread, const iType& arg_begin, const iType & arg_end )
: begin( arg_begin )
, end( arg_end)
, thread( arg_thread )
{}
};
template<typename iType>
struct ThreadVectorRangeBoundariesStruct<iType,SerialTeamMember> {
typedef iType index_type;
enum {start = 0};
const iType end;
enum {increment = 1};
KOKKOS_INLINE_FUNCTION
ThreadVectorRangeBoundariesStruct (const SerialTeamMember& thread, const iType& count):
end( count )
{}
};
} // namespace Impl
template<typename iType>
KOKKOS_INLINE_FUNCTION
Impl::TeamThreadRangeBoundariesStruct<iType,Impl::SerialTeamMember>
TeamThreadRange( const Impl::SerialTeamMember& thread, const iType & count )
{
return Impl::TeamThreadRangeBoundariesStruct<iType,Impl::SerialTeamMember>(thread,count);
}
template<typename iType>
KOKKOS_INLINE_FUNCTION
Impl::TeamThreadRangeBoundariesStruct<iType,Impl::SerialTeamMember>
TeamThreadRange( const Impl::SerialTeamMember& thread, const iType & begin , const iType & end )
{
return Impl::TeamThreadRangeBoundariesStruct<iType,Impl::SerialTeamMember>(thread,begin,end);
}
template<typename iType>
KOKKOS_INLINE_FUNCTION
Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::SerialTeamMember >
ThreadVectorRange(const Impl::SerialTeamMember& thread, const iType& count) {
return Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::SerialTeamMember >(thread,count);
}
KOKKOS_INLINE_FUNCTION
Impl::ThreadSingleStruct<Impl::SerialTeamMember> PerTeam(const Impl::SerialTeamMember& thread) {
return Impl::ThreadSingleStruct<Impl::SerialTeamMember>(thread);
}
KOKKOS_INLINE_FUNCTION
Impl::VectorSingleStruct<Impl::SerialTeamMember> PerThread(const Impl::SerialTeamMember& thread) {
return Impl::VectorSingleStruct<Impl::SerialTeamMember>(thread);
}
} // namespace Kokkos
namespace Kokkos {
/** \brief Inter-thread parallel_for. Executes lambda(iType i) for each i=0..N-1.
*
* The range i=0..N-1 is mapped to all threads of the the calling thread team.
* This functionality requires C++11 support.*/
template<typename iType, class Lambda>
KOKKOS_INLINE_FUNCTION
void parallel_for(const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::SerialTeamMember>& loop_boundaries, const Lambda& lambda) {
for( iType i = loop_boundaries.begin; i < loop_boundaries.end; i+=loop_boundaries.increment)
lambda(i);
}
/** \brief Inter-thread vector parallel_reduce. Executes lambda(iType i, ValueType & val) for each i=0..N-1.
*
* The range i=0..N-1 is mapped to all threads of the the calling thread team and a summation of
* val is performed and put into result. This functionality requires C++11 support.*/
template< typename iType, class Lambda, typename ValueType >
KOKKOS_INLINE_FUNCTION
void parallel_reduce(const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::SerialTeamMember>& loop_boundaries,
const Lambda & lambda, ValueType& result) {
result = ValueType();
for( iType i = loop_boundaries.begin; i < loop_boundaries.end; i+=loop_boundaries.increment) {
ValueType tmp = ValueType();
lambda(i,tmp);
result+=tmp;
}
result = loop_boundaries.thread.team_reduce(result,Impl::JoinAdd<ValueType>());
}
-#ifdef KOKKOS_HAVE_CXX11
-
/** \brief Intra-thread vector parallel_reduce. Executes lambda(iType i, ValueType & val) for each i=0..N-1.
*
* The range i=0..N-1 is mapped to all vector lanes of the the calling thread and a reduction of
* val is performed using JoinType(ValueType& val, const ValueType& update) and put into init_result.
* The input value of init_result is used as initializer for temporary variables of ValueType. Therefore
* the input value should be the neutral element with respect to the join operation (e.g. '0 for +-' or
* '1 for *'). This functionality requires C++11 support.*/
template< typename iType, class Lambda, typename ValueType, class JoinType >
KOKKOS_INLINE_FUNCTION
void parallel_reduce(const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::SerialTeamMember>& loop_boundaries,
const Lambda & lambda, const JoinType& join, ValueType& init_result) {
ValueType result = init_result;
for( iType i = loop_boundaries.begin; i < loop_boundaries.end; i+=loop_boundaries.increment) {
ValueType tmp = ValueType();
lambda(i,tmp);
join(result,tmp);
}
init_result = loop_boundaries.thread.team_reduce(result,Impl::JoinLambdaAdapter<ValueType,JoinType>(join));
}
-#endif // KOKKOS_HAVE_CXX11
-
} //namespace Kokkos
namespace Kokkos {
/** \brief Intra-thread vector parallel_for. Executes lambda(iType i) for each i=0..N-1.
*
* The range i=0..N-1 is mapped to all vector lanes of the the calling thread.
* This functionality requires C++11 support.*/
template<typename iType, class Lambda>
KOKKOS_INLINE_FUNCTION
void parallel_for(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::SerialTeamMember >&
loop_boundaries, const Lambda& lambda) {
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
#pragma ivdep
#endif
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment)
lambda(i);
}
/** \brief Intra-thread vector parallel_reduce. Executes lambda(iType i, ValueType & val) for each i=0..N-1.
*
* The range i=0..N-1 is mapped to all vector lanes of the the calling thread and a summation of
* val is performed and put into result. This functionality requires C++11 support.*/
template< typename iType, class Lambda, typename ValueType >
KOKKOS_INLINE_FUNCTION
void parallel_reduce(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::SerialTeamMember >&
loop_boundaries, const Lambda & lambda, ValueType& result) {
result = ValueType();
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
#pragma ivdep
#endif
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) {
ValueType tmp = ValueType();
lambda(i,tmp);
result+=tmp;
}
}
/** \brief Intra-thread vector parallel_reduce. Executes lambda(iType i, ValueType & val) for each i=0..N-1.
*
* The range i=0..N-1 is mapped to all vector lanes of the the calling thread and a reduction of
* val is performed using JoinType(ValueType& val, const ValueType& update) and put into init_result.
* The input value of init_result is used as initializer for temporary variables of ValueType. Therefore
* the input value should be the neutral element with respect to the join operation (e.g. '0 for +-' or
* '1 for *'). This functionality requires C++11 support.*/
template< typename iType, class Lambda, typename ValueType, class JoinType >
KOKKOS_INLINE_FUNCTION
void parallel_reduce(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::SerialTeamMember >&
loop_boundaries, const Lambda & lambda, const JoinType& join, ValueType& init_result) {
ValueType result = init_result;
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
#pragma ivdep
#endif
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) {
ValueType tmp = ValueType();
lambda(i,tmp);
join(result,tmp);
}
init_result = result;
}
/** \brief Intra-thread vector parallel exclusive prefix sum. Executes lambda(iType i, ValueType & val, bool final)
* for each i=0..N-1.
*
* The range i=0..N-1 is mapped to all vector lanes in the thread and a scan operation is performed.
* Depending on the target execution space the operator might be called twice: once with final=false
* and once with final=true. When final==true val contains the prefix sum value. The contribution of this
* "i" needs to be added to val no matter whether final==true or not. In a serial execution
* (i.e. team_size==1) the operator is only called once with final==true. Scan_val will be set
* to the final sum value over all vector lanes.
* This functionality requires C++11 support.*/
template< typename iType, class FunctorType >
KOKKOS_INLINE_FUNCTION
void parallel_scan(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::SerialTeamMember >&
loop_boundaries, const FunctorType & lambda) {
typedef Kokkos::Impl::FunctorValueTraits< FunctorType , void > ValueTraits ;
typedef typename ValueTraits::value_type value_type ;
value_type scan_val = value_type();
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
#pragma ivdep
#endif
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) {
lambda(i,scan_val,true);
}
}
} // namespace Kokkos
namespace Kokkos {
template<class FunctorType>
KOKKOS_INLINE_FUNCTION
void single(const Impl::VectorSingleStruct<Impl::SerialTeamMember>& , const FunctorType& lambda) {
lambda();
}
template<class FunctorType>
KOKKOS_INLINE_FUNCTION
void single(const Impl::ThreadSingleStruct<Impl::SerialTeamMember>& , const FunctorType& lambda) {
lambda();
}
template<class FunctorType, class ValueType>
KOKKOS_INLINE_FUNCTION
void single(const Impl::VectorSingleStruct<Impl::SerialTeamMember>& , const FunctorType& lambda, ValueType& val) {
lambda(val);
}
template<class FunctorType, class ValueType>
KOKKOS_INLINE_FUNCTION
void single(const Impl::ThreadSingleStruct<Impl::SerialTeamMember>& , const FunctorType& lambda, ValueType& val) {
lambda(val);
}
}
#endif // defined( KOKKOS_HAVE_SERIAL )
#endif /* #define KOKKOS_SERIAL_HPP */
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
diff --git a/lib/kokkos/core/src/Kokkos_View.hpp b/lib/kokkos/core/src/Kokkos_View.hpp
index 531218b5d..2f93f3541 100644
--- a/lib/kokkos/core/src/Kokkos_View.hpp
+++ b/lib/kokkos/core/src/Kokkos_View.hpp
@@ -1,2120 +1,2110 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_VIEW_HPP
#define KOKKOS_VIEW_HPP
#include <type_traits>
#include <string>
#include <Kokkos_Core_fwd.hpp>
-#include <Kokkos_HostSpace.hpp>
-#include <Kokkos_MemoryTraits.hpp>
#if ! defined( KOKKOS_USING_EXPERIMENTAL_VIEW )
+#include <Kokkos_HostSpace.hpp>
+#include <Kokkos_MemoryTraits.hpp>
+
#include <impl/Kokkos_StaticAssert.hpp>
#include <impl/Kokkos_Traits.hpp>
#include <impl/Kokkos_Shape.hpp>
#include <impl/Kokkos_AnalyzeShape.hpp>
#include <impl/Kokkos_Tags.hpp>
// Must define before includng <impl/Kokkos_ViewOffset.hpp>
namespace Kokkos { struct ALL ; }
#include <impl/Kokkos_ViewOffset.hpp>
#include <impl/Kokkos_ViewSupport.hpp>
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
/** \brief View specialization mapping of view traits to a specialization tag */
template< class ValueType ,
class ArraySpecialize ,
class ArrayLayout ,
class MemorySpace ,
class MemoryTraits >
struct ViewSpecialize ;
/** \brief Defines the type of a subview given a source view type
* and subview argument types.
*/
template< class SrcViewType
, class Arg0Type
, class Arg1Type
, class Arg2Type
, class Arg3Type
, class Arg4Type
, class Arg5Type
, class Arg6Type
, class Arg7Type
>
struct ViewSubview /* { typedef ... type ; } */ ;
template< class DstViewSpecialize ,
class SrcViewSpecialize = void ,
class Enable = void >
struct ViewAssignment ;
template< class DstMemorySpace , class SrcMemorySpace , class ExecutionSpace>
struct DeepCopy ;
} /* namespace Impl */
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
/** \class ViewTraits
* \brief Traits class for accessing attributes of a View.
*
* This is an implementation detail of View. It is only of interest
* to developers implementing a new specialization of View.
*
* Template argument permutations:
* - View< DataType , void , void , void >
* - View< DataType , Space , void , void >
* - View< DataType , Space , MemoryTraits , void >
* - View< DataType , Space , void , MemoryTraits >
* - View< DataType , ArrayLayout , void , void >
* - View< DataType , ArrayLayout , Space , void >
* - View< DataType , ArrayLayout , MemoryTraits , void >
* - View< DataType , ArrayLayout , Space , MemoryTraits >
* - View< DataType , MemoryTraits , void , void >
*/
template< class DataType ,
class Arg1 = void ,
class Arg2 = void ,
class Arg3 = void >
class ViewTraits {
private:
// Layout, Space, and MemoryTraits are optional
// but need to appear in that order. That means Layout
// can only be Arg1, Space can be Arg1 or Arg2, and
// MemoryTraits can be Arg1, Arg2 or Arg3
enum { Arg1IsLayout = Impl::is_array_layout<Arg1>::value };
enum { Arg1IsSpace = Impl::is_space<Arg1>::value };
enum { Arg2IsSpace = Impl::is_space<Arg2>::value };
enum { Arg1IsMemoryTraits = Impl::is_memory_traits<Arg1>::value };
enum { Arg2IsMemoryTraits = Impl::is_memory_traits<Arg2>::value };
enum { Arg3IsMemoryTraits = Impl::is_memory_traits<Arg3>::value };
enum { Arg1IsVoid = Impl::is_same< Arg1 , void >::value };
enum { Arg2IsVoid = Impl::is_same< Arg2 , void >::value };
enum { Arg3IsVoid = Impl::is_same< Arg3 , void >::value };
// Arg1 is Layout, Space, MemoryTraits, or void
typedef typename
Impl::StaticAssert<
( 1 == Arg1IsLayout + Arg1IsSpace + Arg1IsMemoryTraits + Arg1IsVoid )
, Arg1 >::type Arg1Verified ;
// If Arg1 is Layout then Arg2 is Space, MemoryTraits, or void
// If Arg1 is Space then Arg2 is MemoryTraits or void
// If Arg1 is MemoryTraits then Arg2 is void
// If Arg1 is Void then Arg2 is void
typedef typename
Impl::StaticAssert<
( Arg1IsLayout && ( 1 == Arg2IsSpace + Arg2IsMemoryTraits + Arg2IsVoid ) ) ||
( Arg1IsSpace && ( 0 == Arg2IsSpace ) && ( 1 == Arg2IsMemoryTraits + Arg2IsVoid ) ) ||
( Arg1IsMemoryTraits && Arg2IsVoid ) ||
( Arg1IsVoid && Arg2IsVoid )
, Arg2 >::type Arg2Verified ;
// Arg3 is MemoryTraits or void and at most one argument is MemoryTraits
typedef typename
Impl::StaticAssert<
( 1 == Arg3IsMemoryTraits + Arg3IsVoid ) &&
( Arg1IsMemoryTraits + Arg2IsMemoryTraits + Arg3IsMemoryTraits <= 1 )
, Arg3 >::type Arg3Verified ;
// Arg1 or Arg2 may have execution and memory spaces
typedef typename Impl::if_c<( Arg1IsSpace ), Arg1Verified ,
typename Impl::if_c<( Arg2IsSpace ), Arg2Verified ,
Kokkos::DefaultExecutionSpace
>::type >::type::execution_space ExecutionSpace ;
typedef typename Impl::if_c<( Arg1IsSpace ), Arg1Verified ,
typename Impl::if_c<( Arg2IsSpace ), Arg2Verified ,
Kokkos::DefaultExecutionSpace
>::type >::type::memory_space MemorySpace ;
typedef typename Impl::is_space<
typename Impl::if_c<( Arg1IsSpace ), Arg1Verified ,
typename Impl::if_c<( Arg2IsSpace ), Arg2Verified ,
Kokkos::DefaultExecutionSpace
>::type >::type >::host_mirror_space HostMirrorSpace ;
// Arg1 may be array layout
typedef typename Impl::if_c< Arg1IsLayout , Arg1Verified ,
typename ExecutionSpace::array_layout
>::type ArrayLayout ;
// Arg1, Arg2, or Arg3 may be memory traits
typedef typename Impl::if_c< Arg1IsMemoryTraits , Arg1Verified ,
typename Impl::if_c< Arg2IsMemoryTraits , Arg2Verified ,
typename Impl::if_c< Arg3IsMemoryTraits , Arg3Verified ,
MemoryManaged
>::type >::type >::type MemoryTraits ;
typedef Impl::AnalyzeShape<DataType> analysis ;
public:
//------------------------------------
// Data type traits:
typedef DataType data_type ;
typedef typename analysis::const_type const_data_type ;
typedef typename analysis::non_const_type non_const_data_type ;
//------------------------------------
// Array of intrinsic scalar type traits:
typedef typename analysis::array_intrinsic_type array_intrinsic_type ;
typedef typename analysis::const_array_intrinsic_type const_array_intrinsic_type ;
typedef typename analysis::non_const_array_intrinsic_type non_const_array_intrinsic_type ;
//------------------------------------
// Value type traits:
typedef typename analysis::value_type value_type ;
typedef typename analysis::const_value_type const_value_type ;
typedef typename analysis::non_const_value_type non_const_value_type ;
//------------------------------------
// Layout and shape traits:
typedef ArrayLayout array_layout ;
typedef typename analysis::shape shape_type ;
enum { rank = shape_type::rank };
enum { rank_dynamic = shape_type::rank_dynamic };
//------------------------------------
// Execution space, memory space, memory access traits, and host mirror space.
typedef ExecutionSpace execution_space ;
typedef MemorySpace memory_space ;
typedef Device<ExecutionSpace,MemorySpace> device_type ;
typedef MemoryTraits memory_traits ;
typedef HostMirrorSpace host_mirror_space ;
typedef typename memory_space::size_type size_type ;
enum { is_hostspace = Impl::is_same< memory_space , HostSpace >::value };
enum { is_managed = memory_traits::Unmanaged == 0 };
enum { is_random_access = memory_traits::RandomAccess == 1 };
//------------------------------------
//------------------------------------
// Specialization tag:
typedef typename
Impl::ViewSpecialize< value_type
, typename analysis::specialize
, array_layout
, memory_space
, memory_traits
>::type specialize ;
};
} /* namespace Kokkos */
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
class ViewDefault {};
/** \brief Default view specialization has LayoutLeft, LayoutRight, or LayoutStride.
*/
template< class ValueType , class MemorySpace , class MemoryTraits >
struct ViewSpecialize< ValueType , void , LayoutLeft , MemorySpace , MemoryTraits >
{ typedef ViewDefault type ; };
template< class ValueType , class MemorySpace , class MemoryTraits >
struct ViewSpecialize< ValueType , void , LayoutRight , MemorySpace , MemoryTraits >
{ typedef ViewDefault type ; };
template< class ValueType , class MemorySpace , class MemoryTraits >
struct ViewSpecialize< ValueType , void , LayoutStride , MemorySpace , MemoryTraits >
{ typedef ViewDefault type ; };
} /* namespace Impl */
} /* namespace Kokkos */
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
/** \brief Types for compile-time detection of View usage errors */
namespace ViewError {
struct allocation_constructor_requires_managed {};
struct allocation_constructor_requires_nonconst {};
struct user_pointer_constructor_requires_unmanaged {};
struct device_shmem_constructor_requires_unmanaged {};
struct scalar_operator_called_from_non_scalar_view {};
} /* namespace ViewError */
//----------------------------------------------------------------------------
/** \brief Enable view parentheses operator for
* match of layout and integral arguments.
* If correct rank define type from traits,
* otherwise define type as an error message.
*/
template< class ReturnType , class Traits , class Layout , unsigned Rank ,
typename iType0 = int , typename iType1 = int ,
typename iType2 = int , typename iType3 = int ,
typename iType4 = int , typename iType5 = int ,
typename iType6 = int , typename iType7 = int ,
class Enable = void >
struct ViewEnableArrayOper ;
template< class ReturnType , class Traits , class Layout , unsigned Rank ,
typename iType0 , typename iType1 ,
typename iType2 , typename iType3 ,
typename iType4 , typename iType5 ,
typename iType6 , typename iType7 >
struct ViewEnableArrayOper<
ReturnType , Traits , Layout , Rank ,
iType0 , iType1 , iType2 , iType3 ,
iType4 , iType5 , iType6 , iType7 ,
typename enable_if<
iType0(0) == 0 && iType1(0) == 0 && iType2(0) == 0 && iType3(0) == 0 &&
iType4(0) == 0 && iType5(0) == 0 && iType6(0) == 0 && iType7(0) == 0 &&
is_same< typename Traits::array_layout , Layout >::value &&
( unsigned(Traits::rank) == Rank )
>::type >
{
typedef ReturnType type ;
};
} /* namespace Impl */
} /* namespace Kokkos */
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
/** \class View
* \brief View to an array of data.
*
* A View represents an array of one or more dimensions.
* For details, please refer to Kokkos' tutorial materials.
*
* \section Kokkos_View_TemplateParameters Template parameters
*
* This class has both required and optional template parameters. The
* \c DataType parameter must always be provided, and must always be
* first. The parameters \c Arg1Type, \c Arg2Type, and \c Arg3Type are
* placeholders for different template parameters. The default value
* of the fifth template parameter \c Specialize suffices for most use
* cases. When explaining the template parameters, we won't refer to
* \c Arg1Type, \c Arg2Type, and \c Arg3Type; instead, we will refer
* to the valid categories of template parameters, in whatever order
* they may occur.
*
* Valid ways in which template arguments may be specified:
* - View< DataType , Space >
* - View< DataType , Space , MemoryTraits >
* - View< DataType , Space , void , MemoryTraits >
* - View< DataType , Layout , Space >
* - View< DataType , Layout , Space , MemoryTraits >
*
* \tparam DataType (required) This indicates both the type of each
* entry of the array, and the combination of compile-time and
* run-time array dimension(s). For example, <tt>double*</tt>
* indicates a one-dimensional array of \c double with run-time
* dimension, and <tt>int*[3]</tt> a two-dimensional array of \c int
* with run-time first dimension and compile-time second dimension
* (of 3). In general, the run-time dimensions (if any) must go
* first, followed by zero or more compile-time dimensions. For
* more examples, please refer to the tutorial materials.
*
* \tparam Space (required) The memory space.
*
* \tparam Layout (optional) The array's layout in memory. For
* example, LayoutLeft indicates a column-major (Fortran style)
* layout, and LayoutRight a row-major (C style) layout. If not
* specified, this defaults to the preferred layout for the
* <tt>Space</tt>.
*
* \tparam MemoryTraits (optional) Assertion of the user's intended
* access behavior. For example, RandomAccess indicates read-only
* access with limited spatial locality, and Unmanaged lets users
* wrap externally allocated memory in a View without automatic
* deallocation.
*
* \section Kokkos_View_MT MemoryTraits discussion
*
* \subsection Kokkos_View_MT_Interp MemoryTraits interpretation depends on Space
*
* Some \c MemoryTraits options may have different interpretations for
* different \c Space types. For example, with the Cuda device,
* \c RandomAccess tells Kokkos to fetch the data through the texture
* cache, whereas the non-GPU devices have no such hardware construct.
*
* \subsection Kokkos_View_MT_PrefUse Preferred use of MemoryTraits
*
* Users should defer applying the optional \c MemoryTraits parameter
* until the point at which they actually plan to rely on it in a
* computational kernel. This minimizes the number of template
* parameters exposed in their code, which reduces the cost of
* compilation. Users may always assign a View without specified
* \c MemoryTraits to a compatible View with that specification.
* For example:
* \code
* // Pass in the simplest types of View possible.
* void
* doSomething (View<double*, Cuda> out,
* View<const double*, Cuda> in)
* {
* // Assign the "generic" View in to a RandomAccess View in_rr.
* // Note that RandomAccess View objects must have const data.
* View<const double*, Cuda, RandomAccess> in_rr = in;
* // ... do something with in_rr and out ...
* }
* \endcode
*/
template< class DataType ,
class Arg1Type = void , /* ArrayLayout, SpaceType, or MemoryTraits */
class Arg2Type = void , /* SpaceType or MemoryTraits */
class Arg3Type = void , /* MemoryTraits */
class Specialize =
typename ViewTraits<DataType,Arg1Type,Arg2Type,Arg3Type>::specialize >
class View ;
-namespace Impl {
-
template< class C >
-struct is_view : public bool_< false > {};
+struct is_view : public Impl::bool_< false > {};
template< class D , class A1 , class A2 , class A3 , class S >
-struct is_view< View< D , A1 , A2 , A3 , S > > : public bool_< true > {};
+struct is_view< View< D , A1 , A2 , A3 , S > > : public Impl::bool_< true > {};
+namespace Impl {
+using Kokkos::is_view ;
}
//----------------------------------------------------------------------------
template< class DataType ,
class Arg1Type ,
class Arg2Type ,
class Arg3Type >
class View< DataType , Arg1Type , Arg2Type , Arg3Type , Impl::ViewDefault >
: public ViewTraits< DataType , Arg1Type , Arg2Type, Arg3Type >
{
public:
typedef ViewTraits< DataType , Arg1Type , Arg2Type, Arg3Type > traits ;
private:
// Assignment of compatible views requirement:
template< class , class , class , class , class > friend class View ;
// Assignment of compatible subview requirement:
template< class , class , class > friend struct Impl::ViewAssignment ;
// Dimensions, cardinality, capacity, and offset computation for
// multidimensional array view of contiguous memory.
// Inherits from Impl::Shape
typedef Impl::ViewOffset< typename traits::shape_type
, typename traits::array_layout
> offset_map_type ;
// Intermediary class for data management and access
typedef Impl::ViewDataManagement< traits > view_data_management ;
//----------------------------------------
// Data members:
typename view_data_management::handle_type m_ptr_on_device ;
offset_map_type m_offset_map ;
view_data_management m_management ;
Impl::AllocationTracker m_tracker ;
//----------------------------------------
public:
/** return type for all indexing operators */
typedef typename view_data_management::return_type reference_type ;
enum { reference_type_is_lvalue = view_data_management::ReturnTypeIsReference };
typedef View< typename traits::array_intrinsic_type ,
typename traits::array_layout ,
typename traits::device_type ,
typename traits::memory_traits > array_type ;
typedef View< typename traits::const_data_type ,
typename traits::array_layout ,
typename traits::device_type ,
typename traits::memory_traits > const_type ;
typedef View< typename traits::non_const_data_type ,
typename traits::array_layout ,
typename traits::device_type ,
typename traits::memory_traits > non_const_type ;
typedef View< typename traits::non_const_data_type ,
typename traits::array_layout ,
typename traits::host_mirror_space ,
void > HostMirror ;
//------------------------------------
// Shape
enum { Rank = traits::rank };
KOKKOS_INLINE_FUNCTION offset_map_type shape() const { return m_offset_map ; }
KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_0() const { return m_offset_map.N0 ; }
KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_1() const { return m_offset_map.N1 ; }
KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_2() const { return m_offset_map.N2 ; }
KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_3() const { return m_offset_map.N3 ; }
KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_4() const { return m_offset_map.N4 ; }
KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_5() const { return m_offset_map.N5 ; }
KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_6() const { return m_offset_map.N6 ; }
KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_7() const { return m_offset_map.N7 ; }
KOKKOS_INLINE_FUNCTION typename traits::size_type size() const { return m_offset_map.cardinality(); }
template< typename iType >
KOKKOS_INLINE_FUNCTION
typename traits::size_type dimension( const iType & i ) const
{ return Impl::dimension( m_offset_map , i ); }
//------------------------------------
// Destructor, constructors, assignment operators:
KOKKOS_INLINE_FUNCTION
~View() {}
KOKKOS_INLINE_FUNCTION
View()
: m_ptr_on_device()
, m_offset_map()
, m_management()
, m_tracker()
{ m_offset_map.assign(0, 0,0,0,0,0,0,0,0); }
KOKKOS_INLINE_FUNCTION
View( const View & rhs )
: m_ptr_on_device()
, m_offset_map()
, m_management()
, m_tracker()
{
(void) Impl::ViewAssignment<
typename traits::specialize ,
typename traits::specialize >( *this , rhs );
}
KOKKOS_INLINE_FUNCTION
View & operator = ( const View & rhs )
{
(void) Impl::ViewAssignment<
typename traits::specialize ,
typename traits::specialize >( *this , rhs );
return *this ;
}
//------------------------------------
// Construct or assign compatible view:
template< class RT , class RL , class RD , class RM , class RS >
KOKKOS_INLINE_FUNCTION
View( const View<RT,RL,RD,RM,RS> & rhs )
: m_ptr_on_device()
, m_offset_map()
, m_management()
, m_tracker()
{
(void) Impl::ViewAssignment<
typename traits::specialize , RS >( *this , rhs );
}
template< class RT , class RL , class RD , class RM , class RS >
KOKKOS_INLINE_FUNCTION
View & operator = ( const View<RT,RL,RD,RM,RS> & rhs )
{
(void) Impl::ViewAssignment<
typename traits::specialize , RS >( *this , rhs );
return *this ;
}
//------------------------------------
/**\brief Allocation of a managed view with possible alignment padding.
*
* Allocation properties for allocating and initializing to the default value_type:
* Kokkos::ViewAllocate()
* Kokkos::ViewAllocate("label") OR "label"
* Kokkos::ViewAllocate(std::string("label")) OR std::string("label")
*
* Allocation properties for allocating and bypassing initialization:
* Kokkos::ViewAllocateWithoutInitializing()
* Kokkos::ViewAllocateWithoutInitializing("label")
*/
template< class AllocationProperties >
explicit inline
View( const AllocationProperties & prop ,
// Impl::ViewAllocProp::size_type exists when the traits and allocation properties
// are valid for allocating viewed memory.
const typename Impl::ViewAllocProp< traits , AllocationProperties >::size_type n0 = 0 ,
const size_t n1 = 0 ,
const size_t n2 = 0 ,
const size_t n3 = 0 ,
const size_t n4 = 0 ,
const size_t n5 = 0 ,
const size_t n6 = 0 ,
const size_t n7 = 0 ,
const size_t n8 = 0 )
: m_ptr_on_device()
, m_offset_map()
, m_management()
, m_tracker()
{
typedef Impl::ViewAllocProp< traits , AllocationProperties > Alloc ;
static_assert(!std::is_same<typename traits::array_layout, LayoutStride>::value,
"LayoutStride does not support View constructor which takes dimensions directly!");
m_offset_map.assign( n0, n1, n2, n3, n4, n5, n6, n7, n8 );
if(Alloc::AllowPadding)
m_offset_map.set_padding();
m_ptr_on_device = view_data_management::template allocate< Alloc::Initialize >( Alloc::label(prop) , m_offset_map, m_tracker );
}
template< class AllocationProperties >
explicit inline
View( const AllocationProperties & prop ,
const typename traits::array_layout & layout ,
// Impl::ViewAllocProp::size_type exists when the traits and allocation properties
// are valid for allocating viewed memory.
const typename Impl::ViewAllocProp< traits , AllocationProperties >::size_type = 0 )
: m_ptr_on_device()
, m_offset_map()
, m_management()
, m_tracker()
{
typedef Impl::ViewAllocProp< traits , AllocationProperties > Alloc ;
m_offset_map.assign( layout );
if(Alloc::AllowPadding)
m_offset_map.set_padding();
m_ptr_on_device = view_data_management::template allocate< Alloc::Initialize >( Alloc::label(prop) , m_offset_map, m_tracker );
m_management.set_noncontiguous();
}
//------------------------------------
// Assign an unmanaged View from pointer, can be called in functors.
// No alignment padding is performed.
template< class Type >
explicit KOKKOS_INLINE_FUNCTION
View( Type * ptr ,
typename Impl::ViewRawPointerProp< traits , Type >::size_type n0 = 0 ,
const size_t n1 = 0 ,
const size_t n2 = 0 ,
const size_t n3 = 0 ,
const size_t n4 = 0 ,
const size_t n5 = 0 ,
const size_t n6 = 0 ,
const size_t n7 = 0 ,
const size_t n8 = 0 )
: m_ptr_on_device(ptr)
, m_offset_map()
, m_management()
, m_tracker()
{
m_offset_map.assign( n0, n1, n2, n3, n4, n5, n6, n7, n8 );
m_management.set_unmanaged();
}
template< class Type >
explicit KOKKOS_INLINE_FUNCTION
View( Type * ptr ,
typename traits::array_layout const & layout ,
typename Impl::ViewRawPointerProp< traits , Type >::size_type = 0 )
: m_ptr_on_device(ptr)
, m_offset_map()
, m_management()
, m_tracker()
{
m_offset_map.assign( layout );
m_management.set_unmanaged();
m_management.set_noncontiguous();
}
//------------------------------------
// Assign a View from an AllocationTracker,
// The allocator used must be compatiable with the memory space of the view
// No alignment padding is performed.
// TODO: Should these allow padding??? DJS 01/15/15
explicit
View( Impl::AllocationTracker const &arg_tracker ,
const size_t n0 = 0 ,
const size_t n1 = 0 ,
const size_t n2 = 0 ,
const size_t n3 = 0 ,
const size_t n4 = 0 ,
const size_t n5 = 0 ,
const size_t n6 = 0 ,
const size_t n7 = 0 ,
const size_t n8 = 0 )
: m_ptr_on_device(reinterpret_cast<typename traits::value_type*>(arg_tracker.alloc_ptr()))
, m_offset_map()
, m_management()
, m_tracker(arg_tracker)
{
m_offset_map.assign( n0, n1, n2, n3, n4, n5, n6, n7, n8 );
const size_t req_size = m_offset_map.capacity() * sizeof(typename traits::value_type);
if ( m_tracker.alloc_size() < req_size ) {
Impl::throw_runtime_exception("Error: tracker.alloc_size() < req_size");
}
}
explicit
View( Impl::AllocationTracker const & arg_tracker
, typename traits::array_layout const & layout )
: m_ptr_on_device(reinterpret_cast<typename traits::value_type*>(arg_tracker.alloc_ptr()))
, m_offset_map()
, m_management()
, m_tracker(arg_tracker)
{
m_offset_map.assign( layout );
const size_t req_size = m_offset_map.capacity() * sizeof(typename traits::value_type);
if ( m_tracker.alloc_size() < req_size ) {
Impl::throw_runtime_exception("Error: tracker.alloc_size() < req_size");
}
m_management.set_noncontiguous();
}
//------------------------------------
/** \brief Constructors for subviews requires following
* type-compatibility condition, enforce via StaticAssert.
*
* Impl::is_same< View ,
* typename Impl::ViewSubview< View<D,A1,A2,A3,Impl::ViewDefault>
* , ArgType0 , ArgType1 , ArgType2 , ArgType3
* , ArgType4 , ArgType5 , ArgType6 , ArgType7
* >::type >::value
*/
template< class D , class A1 , class A2 , class A3
, class SubArg0_type , class SubArg1_type , class SubArg2_type , class SubArg3_type
, class SubArg4_type , class SubArg5_type , class SubArg6_type , class SubArg7_type
>
KOKKOS_INLINE_FUNCTION
View( const View<D,A1,A2,A3,Impl::ViewDefault> & src
, const SubArg0_type & arg0 , const SubArg1_type & arg1
, const SubArg2_type & arg2 , const SubArg3_type & arg3
, const SubArg4_type & arg4 , const SubArg5_type & arg5
, const SubArg6_type & arg6 , const SubArg7_type & arg7
);
template< class D , class A1 , class A2 , class A3
, class SubArg0_type , class SubArg1_type , class SubArg2_type , class SubArg3_type
, class SubArg4_type , class SubArg5_type , class SubArg6_type
>
KOKKOS_INLINE_FUNCTION
View( const View<D,A1,A2,A3,Impl::ViewDefault> & src
, const SubArg0_type & arg0 , const SubArg1_type & arg1
, const SubArg2_type & arg2 , const SubArg3_type & arg3
, const SubArg4_type & arg4 , const SubArg5_type & arg5
, const SubArg6_type & arg6
);
template< class D , class A1 , class A2 , class A3
, class SubArg0_type , class SubArg1_type , class SubArg2_type , class SubArg3_type
, class SubArg4_type , class SubArg5_type
>
KOKKOS_INLINE_FUNCTION
View( const View<D,A1,A2,A3,Impl::ViewDefault> & src
, const SubArg0_type & arg0 , const SubArg1_type & arg1
, const SubArg2_type & arg2 , const SubArg3_type & arg3
, const SubArg4_type & arg4 , const SubArg5_type & arg5
);
template< class D , class A1 , class A2 , class A3
, class SubArg0_type , class SubArg1_type , class SubArg2_type , class SubArg3_type
, class SubArg4_type
>
KOKKOS_INLINE_FUNCTION
View( const View<D,A1,A2,A3,Impl::ViewDefault> & src
, const SubArg0_type & arg0 , const SubArg1_type & arg1
, const SubArg2_type & arg2 , const SubArg3_type & arg3
, const SubArg4_type & arg4
);
template< class D , class A1 , class A2 , class A3
, class SubArg0_type , class SubArg1_type , class SubArg2_type , class SubArg3_type
>
KOKKOS_INLINE_FUNCTION
View( const View<D,A1,A2,A3,Impl::ViewDefault> & src
, const SubArg0_type & arg0 , const SubArg1_type & arg1
, const SubArg2_type & arg2 , const SubArg3_type & arg3
);
template< class D , class A1 , class A2 , class A3
, class SubArg0_type , class SubArg1_type , class SubArg2_type
>
KOKKOS_INLINE_FUNCTION
View( const View<D,A1,A2,A3,Impl::ViewDefault> & src
, const SubArg0_type & arg0 , const SubArg1_type & arg1
, const SubArg2_type & arg2
);
template< class D , class A1 , class A2 , class A3
, class SubArg0_type , class SubArg1_type
>
KOKKOS_INLINE_FUNCTION
View( const View<D,A1,A2,A3,Impl::ViewDefault> & src
, const SubArg0_type & arg0 , const SubArg1_type & arg1
);
template< class D , class A1 , class A2 , class A3
, class SubArg0_type
>
KOKKOS_INLINE_FUNCTION
View( const View<D,A1,A2,A3,Impl::ViewDefault> & src
, const SubArg0_type & arg0
);
//------------------------------------
// Assign unmanaged View to portion of execution space's shared memory
typedef Impl::if_c< ! traits::is_managed ,
const typename traits::execution_space::scratch_memory_space & ,
Impl::ViewError::device_shmem_constructor_requires_unmanaged >
if_scratch_memory_constructor ;
explicit KOKKOS_INLINE_FUNCTION
View( typename if_scratch_memory_constructor::type space ,
const unsigned n0 = 0 ,
const unsigned n1 = 0 ,
const unsigned n2 = 0 ,
const unsigned n3 = 0 ,
const unsigned n4 = 0 ,
const unsigned n5 = 0 ,
const unsigned n6 = 0 ,
const unsigned n7 = 0 )
: m_ptr_on_device()
, m_offset_map()
, m_management()
, m_tracker()
{
typedef typename traits::value_type value_type_ ;
enum { align = 8 };
enum { mask = align - 1 };
m_offset_map.assign( n0, n1, n2, n3, n4, n5, n6, n7 );
typedef Impl::if_c< ! traits::is_managed ,
value_type_ * ,
Impl::ViewError::device_shmem_constructor_requires_unmanaged >
if_device_shmem_pointer ;
// Select the first argument:
m_ptr_on_device = if_device_shmem_pointer::select(
(value_type_*) space.get_shmem( unsigned( sizeof(value_type_) * m_offset_map.capacity() + unsigned(mask) ) & ~unsigned(mask) ) );
}
explicit KOKKOS_INLINE_FUNCTION
View( typename if_scratch_memory_constructor::type space ,
typename traits::array_layout const & layout)
: m_ptr_on_device()
, m_offset_map()
, m_management()
, m_tracker()
{
typedef typename traits::value_type value_type_ ;
typedef Impl::if_c< ! traits::is_managed ,
value_type_ * ,
Impl::ViewError::device_shmem_constructor_requires_unmanaged >
if_device_shmem_pointer ;
m_offset_map.assign( layout );
m_management.set_unmanaged();
m_management.set_noncontiguous();
enum { align = 8 };
enum { mask = align - 1 };
// Select the first argument:
m_ptr_on_device = if_device_shmem_pointer::select(
(value_type_*) space.get_shmem( unsigned( sizeof(value_type_) * m_offset_map.capacity() + unsigned(mask) ) & ~unsigned(mask) ) );
}
static inline
unsigned shmem_size( const unsigned n0 = 0 ,
const unsigned n1 = 0 ,
const unsigned n2 = 0 ,
const unsigned n3 = 0 ,
const unsigned n4 = 0 ,
const unsigned n5 = 0 ,
const unsigned n6 = 0 ,
const unsigned n7 = 0 )
{
enum { align = 8 };
enum { mask = align - 1 };
typedef typename traits::value_type value_type_ ;
offset_map_type offset_map ;
offset_map.assign( n0, n1, n2, n3, n4, n5, n6, n7 );
return unsigned( sizeof(value_type_) * offset_map.capacity() + unsigned(mask) ) & ~unsigned(mask) ;
}
//------------------------------------
// Is not allocated
KOKKOS_FORCEINLINE_FUNCTION
bool is_null() const { return 0 == ptr_on_device() ; }
//------------------------------------
// Operators for scalar (rank zero) views.
typedef Impl::if_c< traits::rank == 0 ,
typename traits::value_type ,
Impl::ViewError::scalar_operator_called_from_non_scalar_view >
if_scalar_operator ;
+ typedef Impl::if_c< traits::rank == 0 ,
+ reference_type ,
+ Impl::ViewError::scalar_operator_called_from_non_scalar_view >
+ if_scalar_operator_return ;
KOKKOS_INLINE_FUNCTION
const View & operator = ( const typename if_scalar_operator::type & rhs ) const
{
KOKKOS_RESTRICT_EXECUTION_TO_DATA( typename traits::memory_space , ptr_on_device() );
- *m_ptr_on_device = if_scalar_operator::select( rhs );
+ m_ptr_on_device[ 0 ] = if_scalar_operator::select( rhs );
return *this ;
}
KOKKOS_FORCEINLINE_FUNCTION
- operator typename if_scalar_operator::type & () const
+ operator typename if_scalar_operator_return::type () const
{
KOKKOS_RESTRICT_EXECUTION_TO_DATA( typename traits::memory_space , ptr_on_device() );
- return if_scalar_operator::select( *m_ptr_on_device );
+ return if_scalar_operator_return::select( m_ptr_on_device[ 0 ] );
}
KOKKOS_FORCEINLINE_FUNCTION
- typename if_scalar_operator::type & operator()() const
+ typename if_scalar_operator_return::type operator()() const
{
KOKKOS_RESTRICT_EXECUTION_TO_DATA( typename traits::memory_space , ptr_on_device() );
- return if_scalar_operator::select( *m_ptr_on_device );
+ return if_scalar_operator_return::select( m_ptr_on_device[ 0 ] );
}
KOKKOS_FORCEINLINE_FUNCTION
- typename if_scalar_operator::type & operator*() const
+ typename if_scalar_operator_return::type operator*() const
{
KOKKOS_RESTRICT_EXECUTION_TO_DATA( typename traits::memory_space , ptr_on_device() );
- return if_scalar_operator::select( *m_ptr_on_device );
+ return if_scalar_operator_return::select( m_ptr_on_device[ 0 ] );
}
//------------------------------------
// Array member access operators enabled if
// (1) a zero value of all argument types are compile-time comparable to zero
// (2) the rank matches the number of arguments
// (3) the memory space is valid for the access
//------------------------------------
// rank 1:
// Specialisation for LayoutLeft and LayoutRight since we know its stride 1
template< typename iType0 >
KOKKOS_FORCEINLINE_FUNCTION
typename Impl::ViewEnableArrayOper< reference_type , traits, LayoutLeft, 1, iType0 >::type
operator[] ( const iType0 & i0 ) const
{
KOKKOS_ASSERT_SHAPE_BOUNDS_1( m_offset_map, i0 );
KOKKOS_RESTRICT_EXECUTION_TO_DATA( typename traits::memory_space , ptr_on_device() );
return m_ptr_on_device[ i0 ];
}
template< typename iType0 >
KOKKOS_FORCEINLINE_FUNCTION
typename Impl::ViewEnableArrayOper< reference_type , traits, LayoutLeft, 1, iType0 >::type
operator() ( const iType0 & i0 ) const
{
KOKKOS_ASSERT_SHAPE_BOUNDS_1( m_offset_map, i0 );
KOKKOS_RESTRICT_EXECUTION_TO_DATA( typename traits::memory_space , ptr_on_device() );
return m_ptr_on_device[ i0 ];
}
template< typename iType0 >
KOKKOS_FORCEINLINE_FUNCTION
typename Impl::ViewEnableArrayOper< reference_type , traits, LayoutLeft, 1, iType0 >::type
at( const iType0 & i0 , const int , const int , const int ,
const int , const int , const int , const int ) const
{
KOKKOS_ASSERT_SHAPE_BOUNDS_1( m_offset_map, i0 );
KOKKOS_RESTRICT_EXECUTION_TO_DATA( typename traits::memory_space , ptr_on_device() );
return m_ptr_on_device[ i0 ];
}
template< typename iType0 >
KOKKOS_FORCEINLINE_FUNCTION
typename Impl::ViewEnableArrayOper< reference_type , traits, LayoutRight, 1, iType0 >::type
operator[] ( const iType0 & i0 ) const
{
KOKKOS_ASSERT_SHAPE_BOUNDS_1( m_offset_map, i0 );
KOKKOS_RESTRICT_EXECUTION_TO_DATA( typename traits::memory_space , ptr_on_device() );
return m_ptr_on_device[ i0 ];
}
template< typename iType0 >
KOKKOS_FORCEINLINE_FUNCTION
typename Impl::ViewEnableArrayOper< reference_type , traits, LayoutRight, 1, iType0 >::type
operator() ( const iType0 & i0 ) const
{
KOKKOS_ASSERT_SHAPE_BOUNDS_1( m_offset_map, i0 );
KOKKOS_RESTRICT_EXECUTION_TO_DATA( typename traits::memory_space , ptr_on_device() );
return m_ptr_on_device[ i0 ];
}
template< typename iType0 >
KOKKOS_FORCEINLINE_FUNCTION
typename Impl::ViewEnableArrayOper< reference_type , traits, LayoutRight, 1, iType0 >::type
at( const iType0 & i0 , const int , const int , const int ,
const int , const int , const int , const int ) const
{
KOKKOS_ASSERT_SHAPE_BOUNDS_1( m_offset_map, i0 );
KOKKOS_RESTRICT_EXECUTION_TO_DATA( typename traits::memory_space , ptr_on_device() );
return m_ptr_on_device[ i0 ];
}
template< typename iType0 >
KOKKOS_FORCEINLINE_FUNCTION
typename Impl::ViewEnableArrayOper< reference_type , traits,
typename Impl::if_c<
Impl::is_same<typename traits::array_layout, LayoutRight>::value ||
Impl::is_same<typename traits::array_layout, LayoutLeft>::value ,
void, typename traits::array_layout>::type,
1, iType0 >::type
operator[] ( const iType0 & i0 ) const
{
KOKKOS_ASSERT_SHAPE_BOUNDS_1( m_offset_map, i0 );
KOKKOS_RESTRICT_EXECUTION_TO_DATA( typename traits::memory_space , ptr_on_device() );
return m_ptr_on_device[ m_offset_map(i0) ];
}
template< typename iType0 >
KOKKOS_FORCEINLINE_FUNCTION
typename Impl::ViewEnableArrayOper< reference_type , traits,
typename Impl::if_c<
Impl::is_same<typename traits::array_layout, LayoutRight>::value ||
Impl::is_same<typename traits::array_layout, LayoutLeft>::value ,
void, typename traits::array_layout>::type,
1, iType0 >::type
operator() ( const iType0 & i0 ) const
{
KOKKOS_ASSERT_SHAPE_BOUNDS_1( m_offset_map, i0 );
KOKKOS_RESTRICT_EXECUTION_TO_DATA( typename traits::memory_space , ptr_on_device() );
return m_ptr_on_device[ m_offset_map(i0) ];
}
template< typename iType0 >
KOKKOS_FORCEINLINE_FUNCTION
typename Impl::ViewEnableArrayOper< reference_type , traits,
typename Impl::if_c<
Impl::is_same<typename traits::array_layout, LayoutRight>::value ||
Impl::is_same<typename traits::array_layout, LayoutLeft>::value ,
void, typename traits::array_layout>::type,
1, iType0 >::type
at( const iType0 & i0 , const int , const int , const int ,
const int , const int , const int , const int ) const
{
KOKKOS_ASSERT_SHAPE_BOUNDS_1( m_offset_map, i0 );
KOKKOS_RESTRICT_EXECUTION_TO_DATA( typename traits::memory_space , ptr_on_device() );
return m_ptr_on_device[ m_offset_map(i0) ];
}
// rank 2:
template< typename iType0 , typename iType1 >
KOKKOS_FORCEINLINE_FUNCTION
typename Impl::ViewEnableArrayOper< reference_type ,
traits, typename traits::array_layout, 2, iType0, iType1 >::type
operator() ( const iType0 & i0 , const iType1 & i1 ) const
{
KOKKOS_ASSERT_SHAPE_BOUNDS_2( m_offset_map, i0,i1 );
KOKKOS_RESTRICT_EXECUTION_TO_DATA( typename traits::memory_space , ptr_on_device() );
return m_ptr_on_device[ m_offset_map(i0,i1) ];
}
template< typename iType0 , typename iType1 >
KOKKOS_FORCEINLINE_FUNCTION
typename Impl::ViewEnableArrayOper< reference_type ,
traits, typename traits::array_layout, 2, iType0, iType1 >::type
at( const iType0 & i0 , const iType1 & i1 , const int , const int ,
const int , const int , const int , const int ) const
{
KOKKOS_ASSERT_SHAPE_BOUNDS_2( m_offset_map, i0,i1 );
KOKKOS_RESTRICT_EXECUTION_TO_DATA( typename traits::memory_space , ptr_on_device() );
return m_ptr_on_device[ m_offset_map(i0,i1) ];
}
// rank 3:
template< typename iType0 , typename iType1 , typename iType2 >
KOKKOS_FORCEINLINE_FUNCTION
typename Impl::ViewEnableArrayOper< reference_type ,
traits, typename traits::array_layout, 3, iType0, iType1, iType2 >::type
operator() ( const iType0 & i0 , const iType1 & i1 , const iType2 & i2 ) const
{
KOKKOS_ASSERT_SHAPE_BOUNDS_3( m_offset_map, i0,i1,i2 );
KOKKOS_RESTRICT_EXECUTION_TO_DATA( typename traits::memory_space , ptr_on_device() );
return m_ptr_on_device[ m_offset_map(i0,i1,i2) ];
}
template< typename iType0 , typename iType1 , typename iType2 >
KOKKOS_FORCEINLINE_FUNCTION
typename Impl::ViewEnableArrayOper< reference_type ,
traits, typename traits::array_layout, 3, iType0, iType1, iType2 >::type
at( const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const int ,
const int , const int , const int , const int ) const
{
KOKKOS_ASSERT_SHAPE_BOUNDS_3( m_offset_map, i0,i1,i2 );
KOKKOS_RESTRICT_EXECUTION_TO_DATA( typename traits::memory_space , ptr_on_device() );
return m_ptr_on_device[ m_offset_map(i0,i1,i2) ];
}
// rank 4:
template< typename iType0 , typename iType1 , typename iType2 , typename iType3 >
KOKKOS_FORCEINLINE_FUNCTION
typename Impl::ViewEnableArrayOper< reference_type ,
traits, typename traits::array_layout, 4, iType0, iType1, iType2, iType3 >::type
operator() ( const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 ) const
{
KOKKOS_ASSERT_SHAPE_BOUNDS_4( m_offset_map, i0,i1,i2,i3 );
KOKKOS_RESTRICT_EXECUTION_TO_DATA( typename traits::memory_space , ptr_on_device() );
return m_ptr_on_device[ m_offset_map(i0,i1,i2,i3) ];
}
template< typename iType0 , typename iType1 , typename iType2 , typename iType3 >
KOKKOS_FORCEINLINE_FUNCTION
typename Impl::ViewEnableArrayOper< reference_type ,
traits, typename traits::array_layout, 4, iType0, iType1, iType2, iType3 >::type
at( const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 ,
const int , const int , const int , const int ) const
{
KOKKOS_ASSERT_SHAPE_BOUNDS_4( m_offset_map, i0,i1,i2,i3 );
KOKKOS_RESTRICT_EXECUTION_TO_DATA( typename traits::memory_space , ptr_on_device() );
return m_ptr_on_device[ m_offset_map(i0,i1,i2,i3) ];
}
// rank 5:
template< typename iType0 , typename iType1 , typename iType2 , typename iType3 ,
typename iType4 >
KOKKOS_FORCEINLINE_FUNCTION
typename Impl::ViewEnableArrayOper< reference_type ,
traits, typename traits::array_layout, 5, iType0, iType1, iType2, iType3 , iType4 >::type
operator() ( const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 ,
const iType4 & i4 ) const
{
KOKKOS_ASSERT_SHAPE_BOUNDS_5( m_offset_map, i0,i1,i2,i3,i4 );
KOKKOS_RESTRICT_EXECUTION_TO_DATA( typename traits::memory_space , ptr_on_device() );
return m_ptr_on_device[ m_offset_map(i0,i1,i2,i3,i4) ];
}
template< typename iType0 , typename iType1 , typename iType2 , typename iType3 ,
typename iType4 >
KOKKOS_FORCEINLINE_FUNCTION
typename Impl::ViewEnableArrayOper< reference_type ,
traits, typename traits::array_layout, 5, iType0, iType1, iType2, iType3 , iType4 >::type
at( const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 ,
const iType4 & i4 , const int , const int , const int ) const
{
KOKKOS_ASSERT_SHAPE_BOUNDS_5( m_offset_map, i0,i1,i2,i3,i4 );
KOKKOS_RESTRICT_EXECUTION_TO_DATA( typename traits::memory_space , ptr_on_device() );
return m_ptr_on_device[ m_offset_map(i0,i1,i2,i3,i4) ];
}
// rank 6:
template< typename iType0 , typename iType1 , typename iType2 , typename iType3 ,
typename iType4 , typename iType5 >
KOKKOS_FORCEINLINE_FUNCTION
typename Impl::ViewEnableArrayOper< reference_type ,
traits, typename traits::array_layout, 6,
iType0, iType1, iType2, iType3 , iType4, iType5 >::type
operator() ( const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 ,
const iType4 & i4 , const iType5 & i5 ) const
{
KOKKOS_ASSERT_SHAPE_BOUNDS_6( m_offset_map, i0,i1,i2,i3,i4,i5 );
KOKKOS_RESTRICT_EXECUTION_TO_DATA( typename traits::memory_space , ptr_on_device() );
return m_ptr_on_device[ m_offset_map(i0,i1,i2,i3,i4,i5) ];
}
template< typename iType0 , typename iType1 , typename iType2 , typename iType3 ,
typename iType4 , typename iType5 >
KOKKOS_FORCEINLINE_FUNCTION
typename Impl::ViewEnableArrayOper< reference_type ,
traits, typename traits::array_layout, 6,
iType0, iType1, iType2, iType3 , iType4, iType5 >::type
at( const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 ,
const iType4 & i4 , const iType5 & i5 , const int , const int ) const
{
KOKKOS_ASSERT_SHAPE_BOUNDS_6( m_offset_map, i0,i1,i2,i3,i4,i5 );
KOKKOS_RESTRICT_EXECUTION_TO_DATA( typename traits::memory_space , ptr_on_device() );
return m_ptr_on_device[ m_offset_map(i0,i1,i2,i3,i4,i5) ];
}
// rank 7:
template< typename iType0 , typename iType1 , typename iType2 , typename iType3 ,
typename iType4 , typename iType5 , typename iType6 >
KOKKOS_FORCEINLINE_FUNCTION
typename Impl::ViewEnableArrayOper< reference_type ,
traits, typename traits::array_layout, 7,
iType0, iType1, iType2, iType3 , iType4, iType5, iType6 >::type
operator() ( const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 ,
const iType4 & i4 , const iType5 & i5 , const iType6 & i6 ) const
{
KOKKOS_ASSERT_SHAPE_BOUNDS_7( m_offset_map, i0,i1,i2,i3,i4,i5,i6 );
KOKKOS_RESTRICT_EXECUTION_TO_DATA( typename traits::memory_space , ptr_on_device() );
return m_ptr_on_device[ m_offset_map(i0,i1,i2,i3,i4,i5,i6) ];
}
template< typename iType0 , typename iType1 , typename iType2 , typename iType3 ,
typename iType4 , typename iType5 , typename iType6 >
KOKKOS_FORCEINLINE_FUNCTION
typename Impl::ViewEnableArrayOper< reference_type ,
traits, typename traits::array_layout, 7,
iType0, iType1, iType2, iType3 , iType4, iType5, iType6 >::type
at( const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 ,
const iType4 & i4 , const iType5 & i5 , const iType6 & i6 , const int ) const
{
KOKKOS_ASSERT_SHAPE_BOUNDS_7( m_offset_map, i0,i1,i2,i3,i4,i5,i6 );
KOKKOS_RESTRICT_EXECUTION_TO_DATA( typename traits::memory_space , ptr_on_device() );
return m_ptr_on_device[ m_offset_map(i0,i1,i2,i3,i4,i5,i6) ];
}
// rank 8:
template< typename iType0 , typename iType1 , typename iType2 , typename iType3 ,
typename iType4 , typename iType5 , typename iType6 , typename iType7 >
KOKKOS_FORCEINLINE_FUNCTION
typename Impl::ViewEnableArrayOper< reference_type ,
traits, typename traits::array_layout, 8,
iType0, iType1, iType2, iType3 , iType4, iType5, iType6, iType7 >::type
operator() ( const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 ,
const iType4 & i4 , const iType5 & i5 , const iType6 & i6 , const iType7 & i7 ) const
{
KOKKOS_ASSERT_SHAPE_BOUNDS_8( m_offset_map, i0,i1,i2,i3,i4,i5,i6,i7 );
KOKKOS_RESTRICT_EXECUTION_TO_DATA( typename traits::memory_space , ptr_on_device() );
return m_ptr_on_device[ m_offset_map(i0,i1,i2,i3,i4,i5,i6,i7) ];
}
template< typename iType0 , typename iType1 , typename iType2 , typename iType3 ,
typename iType4 , typename iType5 , typename iType6 , typename iType7 >
KOKKOS_FORCEINLINE_FUNCTION
typename Impl::ViewEnableArrayOper< reference_type ,
traits, typename traits::array_layout, 8,
iType0, iType1, iType2, iType3 , iType4, iType5, iType6, iType7 >::type
at( const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 ,
const iType4 & i4 , const iType5 & i5 , const iType6 & i6 , const iType7 & i7 ) const
{
KOKKOS_ASSERT_SHAPE_BOUNDS_8( m_offset_map, i0,i1,i2,i3,i4,i5,i6,i7 );
KOKKOS_RESTRICT_EXECUTION_TO_DATA( typename traits::memory_space , ptr_on_device() );
return m_ptr_on_device[ m_offset_map(i0,i1,i2,i3,i4,i5,i6,i7) ];
}
//------------------------------------
// Access to the underlying contiguous storage of this view specialization.
// These methods are specific to specialization of a view.
KOKKOS_FORCEINLINE_FUNCTION
typename traits::value_type * ptr_on_device() const
{ return (typename traits::value_type *) m_ptr_on_device ; }
// Stride of physical storage, dimensioned to at least Rank
template< typename iType >
KOKKOS_INLINE_FUNCTION
void stride( iType * const s ) const
{ m_offset_map.stride(s); }
// Count of contiguously allocated data members including padding.
KOKKOS_INLINE_FUNCTION
typename traits::size_type capacity() const
{ return m_offset_map.capacity(); }
// If the view data can be treated (deep copied)
// as a contiguous block of memory.
KOKKOS_INLINE_FUNCTION
bool is_contiguous() const
{ return m_management.is_contiguous(); }
const Impl::AllocationTracker & tracker() const { return m_tracker; }
};
} /* namespace Kokkos */
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
template< class LT , class LL , class LD , class LM , class LS ,
class RT , class RL , class RD , class RM , class RS >
KOKKOS_INLINE_FUNCTION
typename Impl::enable_if<( Impl::is_same< LS , RS >::value ), bool >::type
operator == ( const View<LT,LL,LD,LM,LS> & lhs ,
const View<RT,RL,RD,RM,RS> & rhs )
{
// Same data, layout, dimensions
typedef ViewTraits<LT,LL,LD,LM> lhs_traits ;
typedef ViewTraits<RT,RL,RD,RM> rhs_traits ;
return
Impl::is_same< typename lhs_traits::const_data_type ,
typename rhs_traits::const_data_type >::value &&
Impl::is_same< typename lhs_traits::array_layout ,
typename rhs_traits::array_layout >::value &&
Impl::is_same< typename lhs_traits::memory_space ,
typename rhs_traits::memory_space >::value &&
Impl::is_same< typename lhs_traits::specialize ,
typename rhs_traits::specialize >::value &&
lhs.ptr_on_device() == rhs.ptr_on_device() &&
lhs.shape() == rhs.shape() ;
}
template< class LT , class LL , class LD , class LM , class LS ,
class RT , class RL , class RD , class RM , class RS >
KOKKOS_INLINE_FUNCTION
bool operator != ( const View<LT,LL,LD,LM,LS> & lhs ,
const View<RT,RL,RD,RM,RS> & rhs )
{
return ! operator==( lhs , rhs );
}
//----------------------------------------------------------------------------
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
//----------------------------------------------------------------------------
/** \brief Deep copy a value into a view.
*/
template< class DT , class DL , class DD , class DM , class DS >
inline
void deep_copy( const View<DT,DL,DD,DM,DS> & dst ,
typename Impl::enable_if<(
Impl::is_same< typename ViewTraits<DT,DL,DD,DM>::non_const_value_type ,
typename ViewTraits<DT,DL,DD,DM>::value_type >::value
), typename ViewTraits<DT,DL,DD,DM>::const_value_type >::type & value )
{
Impl::ViewFill< View<DT,DL,DD,DM,DS> >( dst , value );
}
template< class ST , class SL , class SD , class SM , class SS >
inline
typename Impl::enable_if<( ViewTraits<ST,SL,SD,SM>::rank == 0 )>::type
deep_copy( ST & dst , const View<ST,SL,SD,SM,SS> & src )
{
typedef ViewTraits<ST,SL,SD,SM> src_traits ;
typedef typename src_traits::memory_space src_memory_space ;
Impl::DeepCopy< HostSpace , src_memory_space >( & dst , src.ptr_on_device() , sizeof(ST) );
}
//----------------------------------------------------------------------------
/** \brief A deep copy between views of compatible type, and rank zero.
*/
template< class DT , class DL , class DD , class DM , class DS ,
class ST , class SL , class SD , class SM , class SS >
inline
void deep_copy( const View<DT,DL,DD,DM,DS> & dst ,
const View<ST,SL,SD,SM,SS> & src ,
typename Impl::enable_if<(
// Same type and destination is not constant:
Impl::is_same< typename View<DT,DL,DD,DM,DS>::value_type ,
typename View<ST,SL,SD,SM,SS>::non_const_value_type >::value
&&
// Rank zero:
( unsigned(View<DT,DL,DD,DM,DS>::rank) == unsigned(0) ) &&
( unsigned(View<ST,SL,SD,SM,SS>::rank) == unsigned(0) )
)>::type * = 0 )
{
typedef View<DT,DL,DD,DM,DS> dst_type ;
typedef View<ST,SL,SD,SM,SS> src_type ;
typedef typename dst_type::memory_space dst_memory_space ;
typedef typename src_type::memory_space src_memory_space ;
typedef typename src_type::value_type value_type ;
if ( dst.ptr_on_device() != src.ptr_on_device() ) {
Impl::DeepCopy< dst_memory_space , src_memory_space >( dst.ptr_on_device() , src.ptr_on_device() , sizeof(value_type) );
}
}
//----------------------------------------------------------------------------
/** \brief A deep copy between views of the default specialization, compatible type,
* same non-zero rank, same contiguous layout.
*/
template< class DT , class DL , class DD , class DM ,
class ST , class SL , class SD , class SM >
inline
void deep_copy( const View<DT,DL,DD,DM,Impl::ViewDefault> & dst ,
const View<ST,SL,SD,SM,Impl::ViewDefault> & src ,
typename Impl::enable_if<(
// Same type and destination is not constant:
Impl::is_same< typename View<DT,DL,DD,DM,Impl::ViewDefault>::value_type ,
typename View<ST,SL,SD,SM,Impl::ViewDefault>::non_const_value_type >::value
&&
// Same non-zero rank:
( unsigned(View<DT,DL,DD,DM,Impl::ViewDefault>::rank) ==
unsigned(View<ST,SL,SD,SM,Impl::ViewDefault>::rank) )
&&
( 0 < unsigned(View<DT,DL,DD,DM,Impl::ViewDefault>::rank) )
&&
// Same layout:
Impl::is_same< typename View<DT,DL,DD,DM,Impl::ViewDefault>::array_layout ,
typename View<ST,SL,SD,SM,Impl::ViewDefault>::array_layout >::value
)>::type * = 0 )
{
typedef View<DT,DL,DD,DM,Impl::ViewDefault> dst_type ;
typedef View<ST,SL,SD,SM,Impl::ViewDefault> src_type ;
typedef typename dst_type::memory_space dst_memory_space ;
typedef typename src_type::memory_space src_memory_space ;
enum { is_contiguous = // Contiguous (e.g., non-strided, non-tiled) layout
Impl::is_same< typename View<DT,DL,DD,DM,Impl::ViewDefault>::array_layout , LayoutLeft >::value ||
Impl::is_same< typename View<DT,DL,DD,DM,Impl::ViewDefault>::array_layout , LayoutRight >::value };
if ( dst.ptr_on_device() != src.ptr_on_device() ) {
// Same shape (dimensions)
const bool shapes_are_equal = dst.shape() == src.shape();
if ( shapes_are_equal && is_contiguous && dst.capacity() == src.capacity() ) {
// Views span equal length contiguous range.
// Assuming can perform a straight memory copy over this range.
const size_t nbytes = sizeof(typename dst_type::value_type) * dst.capacity();
Impl::DeepCopy< dst_memory_space , src_memory_space >( dst.ptr_on_device() , src.ptr_on_device() , nbytes );
}
else {
// Destination view's execution space must be able to directly access source memory space
// in order for the ViewRemap functor run in the destination memory space's execution space.
size_t stride[8];
src.stride(stride);
size_t size_stride = stride[0]*src.dimension_0();
size_t size_dim = src.dimension_0();
for(int i = 1; i<src.rank; i++) {
if(stride[i]*src.dimension(i)>size_stride)
size_stride = stride[i]*src.dimension(i);
size_dim*=src.dimension(i);
}
if( shapes_are_equal && size_stride == size_dim) {
const size_t nbytes = sizeof(typename dst_type::value_type) * dst.capacity();
Impl::DeepCopy< dst_memory_space , src_memory_space >( dst.ptr_on_device() , src.ptr_on_device() , nbytes );
} else {
Impl::ViewRemap< dst_type , src_type >( dst , src );
}
}
}
}
/** \brief Deep copy equal dimension arrays in the same space which
* have different layouts or specializations.
*/
template< class DT , class DL , class DD , class DM , class DS ,
class ST , class SL , class SD , class SM , class SS >
inline
void deep_copy( const View< DT, DL, DD, DM, DS > & dst ,
const View< ST, SL, SD, SM, SS > & src ,
const typename Impl::enable_if<(
// Same type and destination is not constant:
Impl::is_same< typename View<DT,DL,DD,DM,DS>::value_type ,
typename View<DT,DL,DD,DM,DS>::non_const_value_type >::value
&&
// Source memory space is accessible to destination memory space
Impl::VerifyExecutionCanAccessMemorySpace< typename View<DT,DL,DD,DM,DS>::memory_space
, typename View<ST,SL,SD,SM,SS>::memory_space >::value
&&
// Same non-zero rank
( unsigned( View<DT,DL,DD,DM,DS>::rank ) ==
unsigned( View<ST,SL,SD,SM,SS>::rank ) )
&&
( 0 < unsigned( View<DT,DL,DD,DM,DS>::rank ) )
&&
// Different layout or different specialization:
( ( ! Impl::is_same< typename View<DT,DL,DD,DM,DS>::array_layout ,
typename View<ST,SL,SD,SM,SS>::array_layout >::value )
||
( ! Impl::is_same< DS , SS >::value )
)
)>::type * = 0 )
{
typedef View< DT, DL, DD, DM, DS > dst_type ;
typedef View< ST, SL, SD, SM, SS > src_type ;
assert_shapes_equal_dimension( dst.shape() , src.shape() );
Impl::ViewRemap< dst_type , src_type >( dst , src );
}
}
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
//----------------------------------------------------------------------------
/** \brief Deep copy a value into a view.
*/
template< class ExecSpace, class DT , class DL , class DD , class DM , class DS >
inline
void deep_copy( const ExecSpace&, const View<DT,DL,DD,DM,DS> & dst ,
typename Impl::enable_if<(
Impl::is_same< typename ViewTraits<DT,DL,DD,DM>::non_const_value_type ,
typename ViewTraits<DT,DL,DD,DM>::value_type >::value
), typename ViewTraits<DT,DL,DD,DM>::const_value_type >::type & value )
{
Impl::ViewFill< View<DT,DL,DD,DM,DS> >( dst , value );
}
template< class ExecSpace, class ST , class SL , class SD , class SM , class SS >
inline
typename Impl::enable_if<( ViewTraits<ST,SL,SD,SM>::rank == 0 )>::type
deep_copy( const ExecSpace& exec, ST & dst , const View<ST,SL,SD,SM,SS> & src )
{
typedef ViewTraits<ST,SL,SD,SM> src_traits ;
typedef typename src_traits::memory_space src_memory_space ;
Impl::DeepCopy< HostSpace , src_memory_space , ExecSpace >( exec , & dst , src.ptr_on_device() , sizeof(ST) );
}
//----------------------------------------------------------------------------
/** \brief A deep copy between views of compatible type, and rank zero.
*/
template< class ExecSpace ,
class DT , class DL , class DD , class DM , class DS ,
class ST , class SL , class SD , class SM , class SS >
inline
void deep_copy( const ExecSpace& exec,
const View<DT,DL,DD,DM,DS> & dst ,
const View<ST,SL,SD,SM,SS> & src ,
typename Impl::enable_if<(
// Same type and destination is not constant:
Impl::is_same< typename View<DT,DL,DD,DM,DS>::value_type ,
typename View<ST,SL,SD,SM,SS>::non_const_value_type >::value
&&
// Rank zero:
( unsigned(View<DT,DL,DD,DM,DS>::rank) == unsigned(0) ) &&
( unsigned(View<ST,SL,SD,SM,SS>::rank) == unsigned(0) )
)>::type * = 0 )
{
typedef View<DT,DL,DD,DM,DS> dst_type ;
typedef View<ST,SL,SD,SM,SS> src_type ;
typedef typename dst_type::memory_space dst_memory_space ;
typedef typename src_type::memory_space src_memory_space ;
typedef typename src_type::value_type value_type ;
if ( dst.ptr_on_device() != src.ptr_on_device() ) {
Impl::DeepCopy< dst_memory_space , src_memory_space , ExecSpace >( exec , dst.ptr_on_device() , src.ptr_on_device() , sizeof(value_type) );
}
}
//----------------------------------------------------------------------------
/** \brief A deep copy between views of the default specialization, compatible type,
* same non-zero rank, same contiguous layout.
*/
template< class ExecSpace ,
class DT , class DL , class DD , class DM ,
class ST , class SL , class SD , class SM >
inline
void deep_copy( const ExecSpace & exec,
const View<DT,DL,DD,DM,Impl::ViewDefault> & dst ,
const View<ST,SL,SD,SM,Impl::ViewDefault> & src ,
typename Impl::enable_if<(
// Same type and destination is not constant:
Impl::is_same< typename View<DT,DL,DD,DM,Impl::ViewDefault>::value_type ,
typename View<ST,SL,SD,SM,Impl::ViewDefault>::non_const_value_type >::value
&&
// Same non-zero rank:
( unsigned(View<DT,DL,DD,DM,Impl::ViewDefault>::rank) ==
unsigned(View<ST,SL,SD,SM,Impl::ViewDefault>::rank) )
&&
( 0 < unsigned(View<DT,DL,DD,DM,Impl::ViewDefault>::rank) )
&&
// Same layout:
Impl::is_same< typename View<DT,DL,DD,DM,Impl::ViewDefault>::array_layout ,
typename View<ST,SL,SD,SM,Impl::ViewDefault>::array_layout >::value
)>::type * = 0 )
{
typedef View<DT,DL,DD,DM,Impl::ViewDefault> dst_type ;
typedef View<ST,SL,SD,SM,Impl::ViewDefault> src_type ;
typedef typename dst_type::memory_space dst_memory_space ;
typedef typename src_type::memory_space src_memory_space ;
enum { is_contiguous = // Contiguous (e.g., non-strided, non-tiled) layout
Impl::is_same< typename View<DT,DL,DD,DM,Impl::ViewDefault>::array_layout , LayoutLeft >::value ||
Impl::is_same< typename View<DT,DL,DD,DM,Impl::ViewDefault>::array_layout , LayoutRight >::value };
if ( dst.ptr_on_device() != src.ptr_on_device() ) {
// Same shape (dimensions)
const bool shapes_are_equal = dst.shape() == src.shape();
if ( shapes_are_equal && is_contiguous && dst.capacity() == src.capacity() ) {
// Views span equal length contiguous range.
// Assuming can perform a straight memory copy over this range.
const size_t nbytes = sizeof(typename dst_type::value_type) * dst.capacity();
Impl::DeepCopy< dst_memory_space , src_memory_space , ExecSpace >( exec , dst.ptr_on_device() , src.ptr_on_device() , nbytes );
}
else {
// Destination view's execution space must be able to directly access source memory space
// in order for the ViewRemap functor run in the destination memory space's execution space.
size_t stride[8];
src.stride(stride);
size_t size_stride = stride[0]*src.dimension_0();
size_t size_dim = src.dimension_0();
for(int i = 1; i<src.rank; i++) {
if(stride[i]*src.dimension(i)>size_stride)
size_stride = stride[i]*src.dimension(i);
size_dim*=src.dimension(i);
}
if( shapes_are_equal && size_stride == size_dim) {
const size_t nbytes = sizeof(typename dst_type::value_type) * dst.capacity();
Impl::DeepCopy< dst_memory_space , src_memory_space , ExecSpace >( exec , dst.ptr_on_device() , src.ptr_on_device() , nbytes );
} else {
Impl::ViewRemap< dst_type , src_type >( dst , src );
}
}
}
}
/** \brief Deep copy equal dimension arrays in the same space which
* have different layouts or specializations.
*/
template< class ExecSpace ,
class DT , class DL , class DD , class DM , class DS ,
class ST , class SL , class SD , class SM , class SS >
inline
void deep_copy( const ExecSpace& ,
const View< DT, DL, DD, DM, DS > & dst ,
const View< ST, SL, SD, SM, SS > & src ,
const typename Impl::enable_if<(
// Same type and destination is not constant:
Impl::is_same< typename View<DT,DL,DD,DM,DS>::value_type ,
typename View<DT,DL,DD,DM,DS>::non_const_value_type >::value
&&
// Source memory space is accessible to destination memory space
Impl::VerifyExecutionCanAccessMemorySpace< typename View<DT,DL,DD,DM,DS>::memory_space
, typename View<ST,SL,SD,SM,SS>::memory_space >::value
&&
// Same non-zero rank
( unsigned( View<DT,DL,DD,DM,DS>::rank ) ==
unsigned( View<ST,SL,SD,SM,SS>::rank ) )
&&
( 0 < unsigned( View<DT,DL,DD,DM,DS>::rank ) )
&&
// Different layout or different specialization:
( ( ! Impl::is_same< typename View<DT,DL,DD,DM,DS>::array_layout ,
typename View<ST,SL,SD,SM,SS>::array_layout >::value )
||
( ! Impl::is_same< DS , SS >::value )
)
)>::type * = 0 )
{
typedef View< DT, DL, DD, DM, DS > dst_type ;
typedef View< ST, SL, SD, SM, SS > src_type ;
assert_shapes_equal_dimension( dst.shape() , src.shape() );
Impl::ViewRemap< dst_type , src_type >( dst , src );
}
}
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
template< class T , class L , class D , class M , class S >
typename Impl::enable_if<(
View<T,L,D,M,S>::is_managed &&
!Impl::is_same<L,LayoutStride>::value
), typename View<T,L,D,M,S>::HostMirror >::type
inline
create_mirror( const View<T,L,D,M,S> & src )
{
typedef View<T,L,D,M,S> view_type ;
typedef typename view_type::HostMirror host_view_type ;
// 'view' is managed therefore we can allocate a
// compatible host_view through the ordinary constructor.
std::string label = src.tracker().label();
label.append("_mirror");
return host_view_type( label ,
src.dimension_0() ,
src.dimension_1() ,
src.dimension_2() ,
src.dimension_3() ,
src.dimension_4() ,
src.dimension_5() ,
src.dimension_6() ,
src.dimension_7() );
}
template< class T , class L , class D , class M , class S >
typename Impl::enable_if<(
View<T,L,D,M,S>::is_managed &&
Impl::is_same<L,LayoutStride>::value
), typename View<T,L,D,M,S>::HostMirror >::type
inline
create_mirror( const View<T,L,D,M,S> & src )
{
typedef View<T,L,D,M,S> view_type ;
typedef typename view_type::HostMirror host_view_type ;
// 'view' is managed therefore we can allocate a
// compatible host_view through the ordinary constructor.
std::string label = src.tracker().label();
label.append("_mirror");
LayoutStride layout;
src.stride(layout.stride);
layout.dimension[0] = src.dimension_0();
layout.dimension[1] = src.dimension_1();
layout.dimension[2] = src.dimension_2();
layout.dimension[3] = src.dimension_3();
layout.dimension[4] = src.dimension_4();
layout.dimension[5] = src.dimension_5();
layout.dimension[6] = src.dimension_6();
layout.dimension[7] = src.dimension_7();
return host_view_type( label , layout );
}
template< class T , class L , class D , class M , class S >
typename Impl::enable_if<(
View<T,L,D,M,S>::is_managed &&
Impl::ViewAssignable< typename View<T,L,D,M,S>::HostMirror , View<T,L,D,M,S> >::value
), typename View<T,L,D,M,S>::HostMirror >::type
inline
create_mirror_view( const View<T,L,D,M,S> & src )
{
return src ;
}
template< class T , class L , class D , class M , class S >
typename Impl::enable_if<(
View<T,L,D,M,S>::is_managed &&
! Impl::ViewAssignable< typename View<T,L,D,M,S>::HostMirror , View<T,L,D,M,S> >::value
), typename View<T,L,D,M,S>::HostMirror >::type
inline
create_mirror_view( const View<T,L,D,M,S> & src )
{
return create_mirror( src );
}
//----------------------------------------------------------------------------
/** \brief Resize a view with copying old data to new data at the corresponding indices. */
template< class T , class L , class D , class M , class S >
inline
void resize( View<T,L,D,M,S> & v ,
const typename Impl::enable_if< ViewTraits<T,L,D,M>::is_managed , size_t >::type n0 ,
const size_t n1 = 0 ,
const size_t n2 = 0 ,
const size_t n3 = 0 ,
const size_t n4 = 0 ,
const size_t n5 = 0 ,
const size_t n6 = 0 ,
const size_t n7 = 0 )
{
typedef View<T,L,D,M,S> view_type ;
const std::string label = v.tracker().label();
view_type v_resized( label, n0, n1, n2, n3, n4, n5, n6, n7 );
Impl::ViewRemap< view_type , view_type >( v_resized , v );
+ view_type::execution_space::fence();
+
v = v_resized ;
}
/** \brief Reallocate a view without copying old data to new data */
template< class T , class L , class D , class M , class S >
inline
void realloc( View<T,L,D,M,S> & v ,
const typename Impl::enable_if< ViewTraits<T,L,D,M>::is_managed , size_t >::type n0 ,
const size_t n1 = 0 ,
const size_t n2 = 0 ,
const size_t n3 = 0 ,
const size_t n4 = 0 ,
const size_t n5 = 0 ,
const size_t n6 = 0 ,
const size_t n7 = 0 )
{
typedef View<T,L,D,M,S> view_type ;
// Query the current label and reuse it.
const std::string label = v.tracker().label();
v = view_type(); // deallocate first, if the only view to memory.
v = view_type( label, n0, n1, n2, n3, n4, n5, n6, n7 );
}
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
template< class D , class A1 , class A2 , class A3 , class S ,
class ArgType0 , class ArgType1 , class ArgType2 , class ArgType3 ,
class ArgType4 , class ArgType5 , class ArgType6 , class ArgType7 >
KOKKOS_INLINE_FUNCTION
typename Impl::ViewSubview< View<D,A1,A2,A3,S>
, ArgType0 , ArgType1 , ArgType2 , ArgType3
, ArgType4 , ArgType5 , ArgType6 , ArgType7
>::type
subview( const View<D,A1,A2,A3,S> & src ,
const ArgType0 & arg0 ,
const ArgType1 & arg1 ,
const ArgType2 & arg2 ,
const ArgType3 & arg3 ,
const ArgType4 & arg4 ,
const ArgType5 & arg5 ,
const ArgType6 & arg6 ,
const ArgType7 & arg7 )
{
typedef typename
Impl::ViewSubview< View<D,A1,A2,A3,S>
, ArgType0 , ArgType1 , ArgType2 , ArgType3
, ArgType4 , ArgType5 , ArgType6 , ArgType7
>::type
DstViewType ;
return DstViewType( src, arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7 );
}
template< class D , class A1 , class A2 , class A3 , class S ,
class ArgType0 , class ArgType1 , class ArgType2 , class ArgType3 ,
class ArgType4 , class ArgType5 , class ArgType6 >
KOKKOS_INLINE_FUNCTION
typename Impl::ViewSubview< View<D,A1,A2,A3,S>
, ArgType0 , ArgType1 , ArgType2 , ArgType3
, ArgType4 , ArgType5 , ArgType6 , void
>::type
subview( const View<D,A1,A2,A3,S> & src ,
const ArgType0 & arg0 ,
const ArgType1 & arg1 ,
const ArgType2 & arg2 ,
const ArgType3 & arg3 ,
const ArgType4 & arg4 ,
const ArgType5 & arg5 ,
const ArgType6 & arg6 )
{
typedef typename
Impl::ViewSubview< View<D,A1,A2,A3,S>
, ArgType0 , ArgType1 , ArgType2 , ArgType3
, ArgType4 , ArgType5 , ArgType6 , void
>::type
DstViewType ;
return DstViewType( src, arg0, arg1, arg2, arg3, arg4, arg5, arg6 );
}
template< class D , class A1 , class A2 , class A3 , class S ,
class ArgType0 , class ArgType1 , class ArgType2 , class ArgType3 ,
class ArgType4 , class ArgType5 >
KOKKOS_INLINE_FUNCTION
typename Impl::ViewSubview< View<D,A1,A2,A3,S>
, ArgType0 , ArgType1 , ArgType2 , ArgType3
, ArgType4 , ArgType5 , void , void
>::type
subview( const View<D,A1,A2,A3,S> & src ,
const ArgType0 & arg0 ,
const ArgType1 & arg1 ,
const ArgType2 & arg2 ,
const ArgType3 & arg3 ,
const ArgType4 & arg4 ,
const ArgType5 & arg5 )
{
typedef typename
Impl::ViewSubview< View<D,A1,A2,A3,S>
, ArgType0 , ArgType1 , ArgType2 , ArgType3
, ArgType4 , ArgType5 , void , void
>::type
DstViewType ;
return DstViewType( src, arg0, arg1, arg2, arg3, arg4, arg5 );
}
template< class D , class A1 , class A2 , class A3 , class S ,
class ArgType0 , class ArgType1 , class ArgType2 , class ArgType3 ,
class ArgType4 >
KOKKOS_INLINE_FUNCTION
typename Impl::ViewSubview< View<D,A1,A2,A3,S>
, ArgType0 , ArgType1 , ArgType2 , ArgType3
, ArgType4 , void , void , void
>::type
subview( const View<D,A1,A2,A3,S> & src ,
const ArgType0 & arg0 ,
const ArgType1 & arg1 ,
const ArgType2 & arg2 ,
const ArgType3 & arg3 ,
const ArgType4 & arg4 )
{
typedef typename
Impl::ViewSubview< View<D,A1,A2,A3,S>
, ArgType0 , ArgType1 , ArgType2 , ArgType3
, ArgType4 , void , void , void
>::type
DstViewType ;
return DstViewType( src, arg0, arg1, arg2, arg3, arg4 );
}
template< class D , class A1 , class A2 , class A3 , class S ,
class ArgType0 , class ArgType1 , class ArgType2 , class ArgType3 >
KOKKOS_INLINE_FUNCTION
typename Impl::ViewSubview< View<D,A1,A2,A3,S>
, ArgType0 , ArgType1 , ArgType2 , ArgType3
, void , void , void , void
>::type
subview( const View<D,A1,A2,A3,S> & src ,
const ArgType0 & arg0 ,
const ArgType1 & arg1 ,
const ArgType2 & arg2 ,
const ArgType3 & arg3 )
{
typedef typename
Impl::ViewSubview< View<D,A1,A2,A3,S>
, ArgType0 , ArgType1 , ArgType2 , ArgType3
, void , void , void , void
>::type
DstViewType ;
return DstViewType( src, arg0, arg1, arg2, arg3 );
}
template< class D , class A1 , class A2 , class A3 , class S ,
class ArgType0 , class ArgType1 , class ArgType2 >
KOKKOS_INLINE_FUNCTION
typename Impl::ViewSubview< View<D,A1,A2,A3,S>
, ArgType0 , ArgType1 , ArgType2 , void
, void , void , void , void
>::type
subview( const View<D,A1,A2,A3,S> & src ,
const ArgType0 & arg0 ,
const ArgType1 & arg1 ,
const ArgType2 & arg2 )
{
typedef typename
Impl::ViewSubview< View<D,A1,A2,A3,S>
, ArgType0 , ArgType1 , ArgType2 , void
, void , void , void , void
>::type
DstViewType ;
return DstViewType( src, arg0, arg1, arg2 );
}
template< class D , class A1 , class A2 , class A3 , class S ,
class ArgType0 , class ArgType1 >
KOKKOS_INLINE_FUNCTION
typename Impl::ViewSubview< View<D,A1,A2,A3,S>
, ArgType0 , ArgType1 , void , void
, void , void , void , void
>::type
subview( const View<D,A1,A2,A3,S> & src ,
const ArgType0 & arg0 ,
const ArgType1 & arg1 )
{
typedef typename
Impl::ViewSubview< View<D,A1,A2,A3,S>
, ArgType0 , ArgType1 , void , void
, void , void , void , void
>::type
DstViewType ;
return DstViewType( src, arg0, arg1 );
}
template< class D , class A1 , class A2 , class A3 , class S ,
class ArgType0 >
KOKKOS_INLINE_FUNCTION
typename Impl::ViewSubview< View<D,A1,A2,A3,S>
, ArgType0 , void , void , void
, void , void , void , void
>::type
subview( const View<D,A1,A2,A3,S> & src ,
const ArgType0 & arg0 )
{
typedef typename
Impl::ViewSubview< View<D,A1,A2,A3,S>
, ArgType0 , void , void , void
, void , void , void , void
>::type
DstViewType ;
return DstViewType( src, arg0 );
}
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#include <impl/Kokkos_ViewDefault.hpp>
#include <impl/Kokkos_Atomic_View.hpp>
#include <impl/Kokkos_ViewOffset.hpp>
#include <impl/Kokkos_ViewSupport.hpp>
namespace Kokkos {
/** \brief Tag denoting that a subview should capture all of a dimension */
struct ALL { KOKKOS_INLINE_FUNCTION ALL(){} };
}
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
-#include <KokkosExp_View.hpp>
-
-#else
-
-// Must define before includng <impl/Kokkos_ViewOffset.hpp>
-namespace Kokkos {
-namespace Experimental {
-namespace Impl {
-struct ALL_t ;
-}
-}
-using ALL = Experimental::Impl::ALL_t ;
-}
-
-#include <impl/Kokkos_ViewOffset.hpp>
-#include <impl/Kokkos_ViewSupport.hpp>
+#endif /* #if ! defined( KOKKOS_USING_EXPERIMENTAL_VIEW ) */
#include <KokkosExp_View.hpp>
-#endif /* #if defined( KOKKOS_USING_EXPERIMENTAL_VIEW ) */
-
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#endif
diff --git a/lib/kokkos/core/src/Kokkos_hwloc.hpp b/lib/kokkos/core/src/Kokkos_hwloc.hpp
index a0b007f64..ff713c952 100644
--- a/lib/kokkos/core/src/Kokkos_hwloc.hpp
+++ b/lib/kokkos/core/src/Kokkos_hwloc.hpp
@@ -1,140 +1,144 @@
/*
//@HEADER
// ************************************************************************
-//
+//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
-//
+//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
-//
+//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
-//
+//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_HWLOC_HPP
#define KOKKOS_HWLOC_HPP
#include <utility>
namespace Kokkos {
/** \brief Minimal subset of logical 'hwloc' functionality available
* from http://www.open-mpi.org/projects/hwloc/.
*
* The calls are NOT thread safe in order to avoid mutexes,
* memory allocations, or other actions which could give the
* runtime system an opportunity to migrate the threads or
* touch allocated memory during the function calls.
*
* All calls to these functions should be performed by a thread
* when it has guaranteed exclusive access; e.g., for OpenMP
* within a 'critical' region.
*/
namespace hwloc {
/** \brief Query if hwloc is available */
bool available();
/** \brief Query number of available NUMA regions.
* This will be less than the hardware capacity
* if the MPI process is pinned to a NUMA region.
*/
unsigned get_available_numa_count();
/** \brief Query number of available cores per NUMA regions.
* This will be less than the hardware capacity
* if the MPI process is pinned to a set of cores.
*/
unsigned get_available_cores_per_numa();
/** \brief Query number of available "hard" threads per core; i.e., hyperthreads */
unsigned get_available_threads_per_core();
} /* namespace hwloc */
} /* namespace Kokkos */
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
// Internal functions for binding persistent spawned threads.
namespace Kokkos {
namespace hwloc {
/** \brief Recommend mapping of threads onto cores.
*
* If thread_count == 0 then choose and set a value.
* If use_numa_count == 0 then choose and set a value.
* If use_cores_per_numa == 0 then choose and set a value.
*
* Return 0 if asynchronous,
* Return 1 if synchronous and threads_coord[0] is process core
*/
unsigned thread_mapping( const char * const label ,
const bool allow_async ,
unsigned & thread_count ,
unsigned & use_numa_count ,
unsigned & use_cores_per_numa ,
std::pair<unsigned,unsigned> threads_coord[] );
/** \brief Query core-coordinate of the current thread
* with respect to the core_topology.
*
- * As long as the thread is running within the
+ * As long as the thread is running within the
* process binding the following condition holds.
*
* core_coordinate.first < core_topology.first
* core_coordinate.second < core_topology.second
*/
std::pair<unsigned,unsigned> get_this_thread_coordinate();
/** \brief Bind the current thread to a core. */
bool bind_this_thread( const std::pair<unsigned,unsigned> );
+
+/** \brief Can hwloc bind threads? */
+bool can_bind_threads();
+
/** \brief Bind the current thread to one of the cores in the list.
* Set that entry to (~0,~0) and return the index.
* If binding fails return ~0.
*/
unsigned bind_this_thread( const unsigned coordinate_count ,
std::pair<unsigned,unsigned> coordinate[] );
/** \brief Unbind the current thread back to the original process binding */
bool unbind_this_thread();
} /* namespace hwloc */
} /* namespace Kokkos */
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#endif /* #define KOKKOS_HWLOC_HPP */
diff --git a/lib/kokkos/core/src/Makefile b/lib/kokkos/core/src/Makefile
index 8bb350859..e7dc1ebee 100644
--- a/lib/kokkos/core/src/Makefile
+++ b/lib/kokkos/core/src/Makefile
@@ -1,127 +1,131 @@
KOKKOS_PATH = ../..
PREFIX ?= /usr/local/lib/kokkos
default: messages build-lib
echo "End Build"
-
+
include $(KOKKOS_PATH)/Makefile.kokkos
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
- CXX = nvcc_wrapper
+ CXX = $(NVCC_WRAPPER)
CXXFLAGS ?= -O3
- LINK = nvcc_wrapper
+ LINK = $(NVCC_WRAPPER)
LINKFLAGS ?=
else
CXX ?= g++
CXXFLAGS ?= -O3
LINK ?= g++
LINKFLAGS ?=
endif
PWD = $(shell pwd)
KOKKOS_HEADERS_INCLUDE = $(wildcard $(KOKKOS_PATH)/core/src/*.hpp)
KOKKOS_HEADERS_INCLUDE_IMPL = $(wildcard $(KOKKOS_PATH)/core/src/impl/*.hpp)
KOKKOS_HEADERS_INCLUDE += $(wildcard $(KOKKOS_PATH)/containers/src/*.hpp)
KOKKOS_HEADERS_INCLUDE_IMPL += $(wildcard $(KOKKOS_PATH)/containers/src/impl/*.hpp)
KOKKOS_HEADERS_INCLUDE += $(wildcard $(KOKKOS_PATH)/algorithms/src/*.hpp)
CONDITIONAL_COPIES =
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
KOKKOS_HEADERS_CUDA += $(wildcard $(KOKKOS_PATH)/core/src/Cuda/*.hpp)
CONDITIONAL_COPIES += copy-cuda
endif
ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1)
KOKKOS_HEADERS_THREADS += $(wildcard $(KOKKOS_PATH)/core/src/Threads/*.hpp)
CONDITIONAL_COPIES += copy-threads
endif
ifeq ($(KOKKOS_INTERNAL_USE_QTHREAD), 1)
KOKKOS_HEADERS_QTHREAD += $(wildcard $(KOKKOS_PATH)/core/src/Qthread/*.hpp)
CONDITIONAL_COPIES += copy-qthread
endif
ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1)
KOKKOS_HEADERS_OPENMP += $(wildcard $(KOKKOS_PATH)/core/src/OpenMP/*.hpp)
CONDITIONAL_COPIES += copy-openmp
endif
messages:
echo "Start Build"
build-makefile-kokkos:
rm -f Makefile.kokkos
echo "#Global Settings used to generate this library" >> Makefile.kokkos
echo "KOKKOS_PATH = $(PREFIX)" >> Makefile.kokkos
echo "KOKKOS_DEVICES = $(KOKKOS_DEVICES)" >> Makefile.kokkos
echo "KOKKOS_ARCH = $(KOKKOS_ARCH)" >> Makefile.kokkos
echo "KOKKOS_DEBUG = $(KOKKOS_DEBUG)" >> Makefile.kokkos
echo "KOKKOS_USE_TPLS = $(KOKKOS_USE_TPLS)" >> Makefile.kokkos
echo "KOKKOS_CXX_STANDARD = $(KOKKOS_CXX_STANDARD)" >> Makefile.kokkos
+ echo "KOKKOS_OPTIONS = $(KOKKOS_OPTIONS)" >> Makefile.kokkos
echo "KOKKOS_CUDA_OPTIONS = $(KOKKOS_CUDA_OPTIONS)" >> Makefile.kokkos
echo "CXX ?= $(CXX)" >> Makefile.kokkos
+ echo "NVCC_WRAPPER ?= $(PREFIX)/bin/nvcc_wrapper" >> Makefile.kokkos
echo "" >> Makefile.kokkos
echo "#Source and Header files of Kokkos relative to KOKKOS_PATH" >> Makefile.kokkos
echo "KOKKOS_HEADERS = $(KOKKOS_HEADERS)" >> Makefile.kokkos
echo "KOKKOS_SRC = $(KOKKOS_SRC)" >> Makefile.kokkos
echo "" >> Makefile.kokkos
echo "#Variables used in application Makefiles" >> Makefile.kokkos
echo "KOKKOS_CPP_DEPENDS = $(KOKKOS_CPP_DEPENDS)" >> Makefile.kokkos
echo "KOKKOS_CXXFLAGS = $(KOKKOS_CXXFLAGS)" >> Makefile.kokkos
echo "KOKKOS_CPPFLAGS = $(KOKKOS_CPPFLAGS)" >> Makefile.kokkos
echo "KOKKOS_LINK_DEPENDS = $(KOKKOS_LINK_DEPENDS)" >> Makefile.kokkos
echo "KOKKOS_LIBS = $(KOKKOS_LIBS)" >> Makefile.kokkos
echo "KOKKOS_LDFLAGS = $(KOKKOS_LDFLAGS)" >> Makefile.kokkos
sed \
-e 's|$(KOKKOS_PATH)/core/src|$(PREFIX)/include|g' \
-e 's|$(KOKKOS_PATH)/containers/src|$(PREFIX)/include|g' \
-e 's|$(KOKKOS_PATH)/algorithms/src|$(PREFIX)/include|g' \
-e 's|-L$(PWD)|-L$(PREFIX)/lib|g' \
-e 's|= libkokkos.a|= $(PREFIX)/lib/libkokkos.a|g' \
-e 's|= KokkosCore_config.h|= $(PREFIX)/include/KokkosCore_config.h|g' Makefile.kokkos \
> Makefile.kokkos.tmp
mv -f Makefile.kokkos.tmp Makefile.kokkos
build-lib: build-makefile-kokkos $(KOKKOS_LINK_DEPENDS)
mkdir:
mkdir -p $(PREFIX)
+ mkdir -p $(PREFIX)/bin
mkdir -p $(PREFIX)/include
mkdir -p $(PREFIX)/lib
mkdir -p $(PREFIX)/include/impl
copy-cuda: mkdir
mkdir -p $(PREFIX)/include/Cuda
cp $(KOKKOS_HEADERS_CUDA) $(PREFIX)/include/Cuda
-
+
copy-threads: mkdir
mkdir -p $(PREFIX)/include/Threads
cp $(KOKKOS_HEADERS_THREADS) $(PREFIX)/include/Threads
copy-qthread: mkdir
mkdir -p $(PREFIX)/include/Qthread
cp $(KOKKOS_HEADERS_QTHREAD) $(PREFIX)/include/Qthread
copy-openmp: mkdir
mkdir -p $(PREFIX)/include/OpenMP
cp $(KOKKOS_HEADERS_OPENMP) $(PREFIX)/include/OpenMP
install: mkdir $(CONDITIONAL_COPIES) build-lib
+ cp $(NVCC_WRAPPER) $(PREFIX)/bin
cp $(KOKKOS_HEADERS_INCLUDE) $(PREFIX)/include
cp $(KOKKOS_HEADERS_INCLUDE_IMPL) $(PREFIX)/include/impl
cp Makefile.kokkos $(PREFIX)
cp libkokkos.a $(PREFIX)/lib
cp KokkosCore_config.h $(PREFIX)/include
-
+
clean: kokkos-clean
rm Makefile.kokkos
diff --git a/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Parallel.hpp b/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Parallel.hpp
index f8393611e..f1a8397e9 100644
--- a/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Parallel.hpp
+++ b/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Parallel.hpp
@@ -1,496 +1,579 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_OPENMP_PARALLEL_HPP
#define KOKKOS_OPENMP_PARALLEL_HPP
#include <omp.h>
#include <Kokkos_Parallel.hpp>
#include <OpenMP/Kokkos_OpenMPexec.hpp>
#include <impl/Kokkos_FunctorAdapter.hpp>
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
template< class FunctorType , class Arg0 , class Arg1 , class Arg2 >
-class ParallelFor< FunctorType , Kokkos::RangePolicy< Arg0 , Arg1 , Arg2 , Kokkos::OpenMP > >
+class ParallelFor< FunctorType
+ , Kokkos::RangePolicy< Arg0 , Arg1 , Arg2 , Kokkos::OpenMP >
+ >
{
private:
typedef Kokkos::RangePolicy< Arg0 , Arg1 , Arg2 , Kokkos::OpenMP > Policy ;
+ typedef typename Policy::work_tag WorkTag ;
+ typedef typename Policy::WorkRange WorkRange ;
+ typedef typename Policy::member_type Member ;
- template< class PType >
- KOKKOS_FORCEINLINE_FUNCTION static
- void driver( typename Impl::enable_if< Impl::is_same< typename PType::work_tag , void >::value ,
- const FunctorType & >::type functor
- , const PType & range )
+ const FunctorType m_functor ;
+ const Policy m_policy ;
+
+ template< class TagType >
+ inline static
+ typename std::enable_if< std::is_same< TagType , void >::value >::type
+ exec_range( const FunctorType & functor
+ , const Member ibeg , const Member iend )
{
- const typename PType::member_type work_end = range.end();
- for ( typename PType::member_type iwork = range.begin() ; iwork < work_end ; ++iwork ) {
+ #ifdef KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION
+ #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
+ #pragma ivdep
+ #endif
+ #endif
+ for ( Member iwork = ibeg ; iwork < iend ; ++iwork ) {
functor( iwork );
}
}
- template< class PType >
- KOKKOS_FORCEINLINE_FUNCTION static
- void driver( typename Impl::enable_if< ! Impl::is_same< typename PType::work_tag , void >::value ,
- const FunctorType & >::type functor
- , const PType & range )
+ template< class TagType >
+ inline static
+ typename std::enable_if< ! std::is_same< TagType , void >::value >::type
+ exec_range( const FunctorType & functor
+ , const Member ibeg , const Member iend )
{
- const typename PType::member_type work_end = range.end();
- for ( typename PType::member_type iwork = range.begin() ; iwork < work_end ; ++iwork ) {
- functor( typename PType::work_tag() , iwork );
+ const TagType t{} ;
+ #ifdef KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION
+ #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
+ #pragma ivdep
+ #endif
+ #endif
+ for ( Member iwork = ibeg ; iwork < iend ; ++iwork ) {
+ functor( t , iwork );
}
}
public:
inline
- ParallelFor( const FunctorType & functor
- , const Policy & policy )
+ void execute() const
{
OpenMPexec::verify_is_process("Kokkos::OpenMP parallel_for");
OpenMPexec::verify_initialized("Kokkos::OpenMP parallel_for");
#pragma omp parallel
{
OpenMPexec & exec = * OpenMPexec::get_thread_omp();
- driver( functor , typename Policy::WorkRange( policy , exec.pool_rank() , exec.pool_size() ) );
+
+ const WorkRange range( m_policy, exec.pool_rank(), exec.pool_size() );
+
+ ParallelFor::template exec_range< WorkTag >( m_functor , range.begin() , range.end() );
}
/* END #pragma omp parallel */
}
+
+ inline
+ ParallelFor( const FunctorType & arg_functor
+ , const Policy & arg_policy )
+ : m_functor( arg_functor )
+ , m_policy( arg_policy )
+ {}
};
} // namespace Impl
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
template< class FunctorType , class Arg0 , class Arg1 , class Arg2 >
-class ParallelReduce< FunctorType , Kokkos::RangePolicy< Arg0 , Arg1 , Arg2 , Kokkos::OpenMP > >
+class ParallelReduce< FunctorType
+ , Kokkos::RangePolicy< Arg0 , Arg1 , Arg2 , Kokkos::OpenMP >
+ >
{
private:
typedef Kokkos::RangePolicy< Arg0 , Arg1 , Arg2 , Kokkos::OpenMP > Policy ;
- typedef typename Policy::work_tag WorkTag ;
- typedef Kokkos::Impl::FunctorValueTraits< FunctorType , WorkTag > ValueTraits ;
- typedef Kokkos::Impl::FunctorValueInit< FunctorType , WorkTag > ValueInit ;
- typedef Kokkos::Impl::FunctorValueJoin< FunctorType , WorkTag > ValueJoin ;
+
+ typedef typename Policy::work_tag WorkTag ;
+ typedef typename Policy::WorkRange WorkRange ;
+ typedef typename Policy::member_type Member ;
+
+ typedef Kokkos::Impl::FunctorValueTraits< FunctorType, WorkTag > ValueTraits ;
+ typedef Kokkos::Impl::FunctorValueInit< FunctorType, WorkTag > ValueInit ;
+ typedef Kokkos::Impl::FunctorValueJoin< FunctorType, WorkTag > ValueJoin ;
typedef typename ValueTraits::pointer_type pointer_type ;
typedef typename ValueTraits::reference_type reference_type ;
- template< class PType >
- KOKKOS_FORCEINLINE_FUNCTION static
- void driver( typename Impl::enable_if< Impl::is_same< typename PType::work_tag , void >::value ,
- const FunctorType & >::type functor
- , reference_type update
- , const PType & range )
+ const FunctorType m_functor ;
+ const Policy m_policy ;
+ const pointer_type m_result_ptr ;
+
+ template< class TagType >
+ inline static
+ typename std::enable_if< std::is_same< TagType , void >::value >::type
+ exec_range( const FunctorType & functor
+ , const Member ibeg , const Member iend
+ , reference_type update )
{
- const typename PType::member_type work_end = range.end();
- for ( typename PType::member_type iwork = range.begin() ; iwork < work_end ; ++iwork ) {
+ #ifdef KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION
+ #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
+ #pragma ivdep
+ #endif
+ #endif
+ for ( Member iwork = ibeg ; iwork < iend ; ++iwork ) {
functor( iwork , update );
}
}
- template< class PType >
- KOKKOS_FORCEINLINE_FUNCTION static
- void driver( typename Impl::enable_if< ! Impl::is_same< typename PType::work_tag , void >::value ,
- const FunctorType & >::type functor
- , reference_type update
- , const PType & range )
+ template< class TagType >
+ inline static
+ typename std::enable_if< ! std::is_same< TagType , void >::value >::type
+ exec_range( const FunctorType & functor
+ , const Member ibeg , const Member iend
+ , reference_type update )
{
- const typename PType::member_type work_end = range.end();
- for ( typename PType::member_type iwork = range.begin() ; iwork < work_end ; ++iwork ) {
- functor( typename PType::work_tag() , iwork , update );
+ const TagType t{} ;
+ #ifdef KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION
+ #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
+ #pragma ivdep
+ #endif
+ #endif
+ for ( Member iwork = ibeg ; iwork < iend ; ++iwork ) {
+ functor( t , iwork , update );
}
}
public:
- //----------------------------------------
-
- template< class ViewType >
inline
- ParallelReduce( typename Impl::enable_if<
- ( Impl::is_view< ViewType >::value &&
- Impl::is_same< typename ViewType::memory_space , HostSpace >::value
- ), const FunctorType & >::type functor
- , const Policy & policy
- , const ViewType & result_view )
- {
- OpenMPexec::verify_is_process("Kokkos::OpenMP parallel_reduce");
- OpenMPexec::verify_initialized("Kokkos::OpenMP parallel_reduce");
-
- OpenMPexec::resize_scratch( ValueTraits::value_size( functor ) , 0 );
-
-#pragma omp parallel
+ void execute() const
{
- OpenMPexec & exec = * OpenMPexec::get_thread_omp();
+ OpenMPexec::verify_is_process("Kokkos::OpenMP parallel_reduce");
+ OpenMPexec::verify_initialized("Kokkos::OpenMP parallel_reduce");
- driver( functor
- , ValueInit::init( functor , exec.scratch_reduce() )
- , typename Policy::WorkRange( policy , exec.pool_rank() , exec.pool_size() )
- );
- }
+ OpenMPexec::resize_scratch( ValueTraits::value_size( m_functor ) , 0 );
+
+#pragma omp parallel
+ {
+ OpenMPexec & exec = * OpenMPexec::get_thread_omp();
+ const WorkRange range( m_policy, exec.pool_rank(), exec.pool_size() );
+ ParallelReduce::template exec_range< WorkTag >
+ ( m_functor , range.begin() , range.end()
+ , ValueInit::init( m_functor , exec.scratch_reduce() ) );
+ }
/* END #pragma omp parallel */
- {
+ // Reduction:
+
const pointer_type ptr = pointer_type( OpenMPexec::pool_rev(0)->scratch_reduce() );
for ( int i = 1 ; i < OpenMPexec::pool_size() ; ++i ) {
- ValueJoin::join( functor , ptr , OpenMPexec::pool_rev(i)->scratch_reduce() );
+ ValueJoin::join( m_functor , ptr , OpenMPexec::pool_rev(i)->scratch_reduce() );
}
- Kokkos::Impl::FunctorFinal< FunctorType , WorkTag >::final( functor , ptr );
+ Kokkos::Impl::FunctorFinal< FunctorType , WorkTag >::final( m_functor , ptr );
- if ( result_view.ptr_on_device() ) {
- const int n = ValueTraits::value_count( functor );
+ if ( m_result_ptr ) {
+ const int n = ValueTraits::value_count( m_functor );
- for ( int j = 0 ; j < n ; ++j ) { result_view.ptr_on_device()[j] = ptr[j] ; }
+ for ( int j = 0 ; j < n ; ++j ) { m_result_ptr[j] = ptr[j] ; }
}
}
- }
+
+ //----------------------------------------
+
+ template< class ViewType >
+ inline
+ ParallelReduce( const FunctorType & arg_functor
+ , const Policy & arg_policy
+ , const ViewType & arg_result_view )
+ : m_functor( arg_functor )
+ , m_policy( arg_policy )
+ , m_result_ptr( arg_result_view.ptr_on_device() )
+ {
+ static_assert( Kokkos::is_view< ViewType >::value
+ , "Reduction result on Kokkos::OpenMP must be a Kokkos::View" );
+
+ static_assert( std::is_same< typename ViewType::memory_space
+ , Kokkos::HostSpace >::value
+ , "Reduction result on Kokkos::OpenMP must be a Kokkos::View in HostSpace" );
+ }
};
} // namespace Impl
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
template< class FunctorType , class Arg0 , class Arg1 , class Arg2 >
-class ParallelScan< FunctorType , Kokkos::RangePolicy< Arg0 , Arg1 , Arg2 , Kokkos::OpenMP > >
+class ParallelScan< FunctorType
+ , Kokkos::RangePolicy< Arg0 , Arg1 , Arg2 , Kokkos::OpenMP >
+ >
{
private:
typedef Kokkos::RangePolicy< Arg0 , Arg1 , Arg2 , Kokkos::OpenMP > Policy ;
- typedef typename Policy::work_tag WorkTag ;
- typedef Kokkos::Impl::FunctorValueTraits< FunctorType , WorkTag > ValueTraits ;
- typedef Kokkos::Impl::FunctorValueInit< FunctorType , WorkTag > ValueInit ;
- typedef Kokkos::Impl::FunctorValueJoin< FunctorType , WorkTag > ValueJoin ;
- typedef Kokkos::Impl::FunctorValueOps< FunctorType , WorkTag > ValueOps ;
+
+ typedef typename Policy::work_tag WorkTag ;
+ typedef typename Policy::WorkRange WorkRange ;
+ typedef typename Policy::member_type Member ;
+
+ typedef Kokkos::Impl::FunctorValueTraits< FunctorType, WorkTag > ValueTraits ;
+ typedef Kokkos::Impl::FunctorValueInit< FunctorType, WorkTag > ValueInit ;
+ typedef Kokkos::Impl::FunctorValueJoin< FunctorType, WorkTag > ValueJoin ;
+ typedef Kokkos::Impl::FunctorValueOps< FunctorType, WorkTag > ValueOps ;
typedef typename ValueTraits::pointer_type pointer_type ;
typedef typename ValueTraits::reference_type reference_type ;
- template< class PType >
- KOKKOS_FORCEINLINE_FUNCTION static
- void driver( typename Impl::enable_if< Impl::is_same< typename PType::work_tag , void >::value ,
- const FunctorType & >::type functor
- , reference_type update
- , const PType & range
- , const bool final )
+ const FunctorType m_functor ;
+ const Policy m_policy ;
+
+ template< class TagType >
+ inline static
+ typename std::enable_if< std::is_same< TagType , void >::value >::type
+ exec_range( const FunctorType & functor
+ , const Member ibeg , const Member iend
+ , reference_type update , const bool final )
{
- const typename PType::member_type work_end = range.end();
- for ( typename PType::member_type iwork = range.begin() ; iwork < work_end ; ++iwork ) {
+ #ifdef KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION
+ #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
+ #pragma ivdep
+ #endif
+ #endif
+ for ( Member iwork = ibeg ; iwork < iend ; ++iwork ) {
functor( iwork , update , final );
}
}
- template< class PType >
- KOKKOS_FORCEINLINE_FUNCTION static
- void driver( typename Impl::enable_if< ! Impl::is_same< typename PType::work_tag , void >::value ,
- const FunctorType & >::type functor
- , reference_type update
- , const PType & range
- , const bool final )
+ template< class TagType >
+ inline static
+ typename std::enable_if< ! std::is_same< TagType , void >::value >::type
+ exec_range( const FunctorType & functor
+ , const Member ibeg , const Member iend
+ , reference_type update , const bool final )
{
- const typename PType::member_type work_end = range.end();
- for ( typename PType::member_type iwork = range.begin() ; iwork < work_end ; ++iwork ) {
- functor( typename PType::work_tag() , iwork , update , final );
+ const TagType t{} ;
+ #ifdef KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION
+ #ifdef KOKKOS_HAVE_PRAGMA_IVDEP
+ #pragma ivdep
+ #endif
+ #endif
+ for ( Member iwork = ibeg ; iwork < iend ; ++iwork ) {
+ functor( t , iwork , update , final );
}
}
public:
- //----------------------------------------
-
inline
- ParallelScan( const FunctorType & functor
- , const Policy & policy )
- {
- OpenMPexec::verify_is_process("Kokkos::OpenMP parallel_scan");
- OpenMPexec::verify_initialized("Kokkos::OpenMP parallel_scan");
+ void execute() const
+ {
+ OpenMPexec::verify_is_process("Kokkos::OpenMP parallel_scan");
+ OpenMPexec::verify_initialized("Kokkos::OpenMP parallel_scan");
- OpenMPexec::resize_scratch( 2 * ValueTraits::value_size( functor ) , 0 );
+ OpenMPexec::resize_scratch( 2 * ValueTraits::value_size( m_functor ) , 0 );
#pragma omp parallel
- {
- OpenMPexec & exec = * OpenMPexec::get_thread_omp();
-
- driver( functor
- , ValueInit::init( functor , pointer_type( exec.scratch_reduce() ) + ValueTraits::value_count( functor ) )
- , typename Policy::WorkRange( policy , exec.pool_rank() , exec.pool_size() )
- , false );
- }
+ {
+ OpenMPexec & exec = * OpenMPexec::get_thread_omp();
+ const WorkRange range( m_policy, exec.pool_rank(), exec.pool_size() );
+ const pointer_type ptr =
+ pointer_type( exec.scratch_reduce() ) +
+ ValueTraits::value_count( m_functor );
+ ParallelScan::template exec_range< WorkTag >
+ ( m_functor , range.begin() , range.end()
+ , ValueInit::init( m_functor , ptr ) , false );
+ }
/* END #pragma omp parallel */
- {
- const unsigned thread_count = OpenMPexec::pool_size();
- const unsigned value_count = ValueTraits::value_count( functor );
+ {
+ const unsigned thread_count = OpenMPexec::pool_size();
+ const unsigned value_count = ValueTraits::value_count( m_functor );
- pointer_type ptr_prev = 0 ;
+ pointer_type ptr_prev = 0 ;
- for ( unsigned rank_rev = thread_count ; rank_rev-- ; ) {
+ for ( unsigned rank_rev = thread_count ; rank_rev-- ; ) {
- pointer_type ptr = pointer_type( OpenMPexec::pool_rev(rank_rev)->scratch_reduce() );
+ pointer_type ptr = pointer_type( OpenMPexec::pool_rev(rank_rev)->scratch_reduce() );
- if ( ptr_prev ) {
- for ( unsigned i = 0 ; i < value_count ; ++i ) { ptr[i] = ptr_prev[ i + value_count ] ; }
- ValueJoin::join( functor , ptr + value_count , ptr );
- }
- else {
- ValueInit::init( functor , ptr );
- }
+ if ( ptr_prev ) {
+ for ( unsigned i = 0 ; i < value_count ; ++i ) { ptr[i] = ptr_prev[ i + value_count ] ; }
+ ValueJoin::join( m_functor , ptr + value_count , ptr );
+ }
+ else {
+ ValueInit::init( m_functor , ptr );
+ }
- ptr_prev = ptr ;
+ ptr_prev = ptr ;
+ }
}
- }
#pragma omp parallel
- {
- OpenMPexec & exec = * OpenMPexec::get_thread_omp();
-
- driver( functor
- , ValueOps::reference( pointer_type( exec.scratch_reduce() ) )
- , typename Policy::WorkRange( policy , exec.pool_rank() , exec.pool_size() )
- , true );
- }
+ {
+ OpenMPexec & exec = * OpenMPexec::get_thread_omp();
+ const WorkRange range( m_policy, exec.pool_rank(), exec.pool_size() );
+ const pointer_type ptr = pointer_type( exec.scratch_reduce() );
+ ParallelScan::template exec_range< WorkTag >
+ ( m_functor , range.begin() , range.end()
+ , ValueOps::reference( ptr ) , true );
+ }
/* END #pragma omp parallel */
+ }
- }
+ //----------------------------------------
+
+ inline
+ ParallelScan( const FunctorType & arg_functor
+ , const Policy & arg_policy )
+ : m_functor( arg_functor )
+ , m_policy( arg_policy )
+ {}
//----------------------------------------
};
} // namespace Impl
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
template< class FunctorType , class Arg0 , class Arg1 >
-class ParallelFor< FunctorType , Kokkos::TeamPolicy< Arg0 , Arg1 , Kokkos::OpenMP > >
+class ParallelFor< FunctorType
+ , Kokkos::TeamPolicy< Arg0 , Arg1 , Kokkos::OpenMP >
+ >
{
private:
typedef Kokkos::TeamPolicy< Arg0 , Arg1 , Kokkos::OpenMP > Policy ;
+ typedef typename Policy::work_tag WorkTag ;
+ typedef typename Policy::member_type Member ;
+
+ const FunctorType m_functor ;
+ const Policy m_policy ;
+ const int m_shmem_size ;
template< class TagType >
- KOKKOS_FORCEINLINE_FUNCTION static
- void driver( typename Impl::enable_if< Impl::is_same< TagType , void >::value ,
- const FunctorType & >::type functor
- , const typename Policy::member_type & member )
- { functor( member ); }
+ inline static
+ typename std::enable_if< std::is_same< TagType , void >::value >::type
+ exec_team( const FunctorType & functor , Member member )
+ {
+ for ( ; member.valid() ; member.next() ) {
+ functor( member );
+ }
+ }
template< class TagType >
- KOKKOS_FORCEINLINE_FUNCTION static
- void driver( typename Impl::enable_if< ! Impl::is_same< TagType , void >::value ,
- const FunctorType & >::type functor
- , const typename Policy::member_type & member )
- { functor( TagType() , member ); }
+ inline static
+ typename std::enable_if< ! std::is_same< TagType , void >::value >::type
+ exec_team( const FunctorType & functor , Member member )
+ {
+ const TagType t{} ;
+ for ( ; member.valid() ; member.next() ) {
+ functor( t , member );
+ }
+ }
public:
inline
- ParallelFor( const FunctorType & functor ,
- const Policy & policy )
- {
- OpenMPexec::verify_is_process("Kokkos::OpenMP parallel_for");
- OpenMPexec::verify_initialized("Kokkos::OpenMP parallel_for");
+ void execute() const
+ {
+ OpenMPexec::verify_is_process("Kokkos::OpenMP parallel_for");
+ OpenMPexec::verify_initialized("Kokkos::OpenMP parallel_for");
- const size_t team_reduce_size = Policy::member_type::team_reduce_size();
- const size_t team_shmem_size = FunctorTeamShmemSize< FunctorType >::value( functor , policy.team_size() );
+ const size_t team_reduce_size = Policy::member_type::team_reduce_size();
- OpenMPexec::resize_scratch( 0 , team_reduce_size + team_shmem_size );
+ OpenMPexec::resize_scratch( 0 , team_reduce_size + m_shmem_size );
#pragma omp parallel
- {
- typename Policy::member_type member( * OpenMPexec::get_thread_omp() , policy , team_shmem_size );
-
- for ( ; member.valid() ; member.next() ) {
- ParallelFor::template driver< typename Policy::work_tag >( functor , member );
+ {
+ ParallelFor::template exec_team< WorkTag >
+ ( m_functor
+ , Member( * OpenMPexec::get_thread_omp(), m_policy, m_shmem_size) );
}
- }
/* END #pragma omp parallel */
- }
+ }
- void wait() {}
+ inline
+ ParallelFor( const FunctorType & arg_functor ,
+ const Policy & arg_policy )
+ : m_functor( arg_functor )
+ , m_policy( arg_policy )
+ , m_shmem_size( arg_policy.scratch_size() + FunctorTeamShmemSize< FunctorType >::value( arg_functor , arg_policy.team_size() ) )
+ {}
};
template< class FunctorType , class Arg0 , class Arg1 >
-class ParallelReduce< FunctorType , Kokkos::TeamPolicy< Arg0 , Arg1 , Kokkos::OpenMP > >
+class ParallelReduce< FunctorType
+ , Kokkos::TeamPolicy< Arg0 , Arg1 , Kokkos::OpenMP >
+ >
{
private:
typedef Kokkos::TeamPolicy< Arg0 , Arg1 , Kokkos::OpenMP > Policy ;
- typedef typename Policy::work_tag WorkTag ;
+
+ typedef typename Policy::work_tag WorkTag ;
+ typedef typename Policy::member_type Member ;
+
typedef Kokkos::Impl::FunctorValueTraits< FunctorType , WorkTag > ValueTraits ;
typedef Kokkos::Impl::FunctorValueInit< FunctorType , WorkTag > ValueInit ;
typedef Kokkos::Impl::FunctorValueJoin< FunctorType , WorkTag > ValueJoin ;
typedef typename ValueTraits::pointer_type pointer_type ;
typedef typename ValueTraits::reference_type reference_type ;
+ const FunctorType m_functor ;
+ const Policy m_policy ;
+ const pointer_type m_result_ptr ;
+ const int m_shmem_size ;
- template< class PType >
- KOKKOS_FORCEINLINE_FUNCTION static
- void driver( typename Impl::enable_if< Impl::is_same< typename PType::work_tag , void >::value ,
- const FunctorType & >::type functor
- , const typename PType::member_type & member
- , reference_type update )
- { functor( member , update ); }
-
- template< class PType >
- KOKKOS_FORCEINLINE_FUNCTION static
- void driver( typename Impl::enable_if< ! Impl::is_same< typename PType::work_tag , void >::value ,
- const FunctorType & >::type functor
- , const typename PType::member_type & member
- , reference_type update )
- { functor( typename PType::work_tag() , member , update ); }
-
-public:
-
- inline
- ParallelReduce( const FunctorType & functor ,
- const Policy & policy )
- {
- OpenMPexec::verify_is_process("Kokkos::OpenMP parallel_reduce");
-
- const size_t team_reduce_size = Policy::member_type::team_reduce_size();
- const size_t team_shmem_size = FunctorTeamShmemSize< FunctorType >::value( functor , policy.team_size() );
-
- OpenMPexec::resize_scratch( ValueTraits::value_size( functor ) , team_reduce_size + team_shmem_size );
-
-#pragma omp parallel
+ template< class TagType >
+ inline static
+ typename std::enable_if< std::is_same< TagType , void >::value >::type
+ exec_team( const FunctorType & functor , Member member , reference_type update )
{
- OpenMPexec & exec = * OpenMPexec::get_thread_omp();
-
- reference_type update = ValueInit::init( functor , exec.scratch_reduce() );
-
- for ( typename Policy::member_type member( exec , policy , team_shmem_size ); member.valid() ; member.next() ) {
- ParallelReduce::template driver< Policy >( functor , member , update );
+ for ( ; member.valid() ; member.next() ) {
+ functor( member , update );
}
}
-/* END #pragma omp parallel */
+ template< class TagType >
+ inline static
+ typename std::enable_if< ! std::is_same< TagType , void >::value >::type
+ exec_team( const FunctorType & functor , Member member , reference_type update )
{
- typedef Kokkos::Impl::FunctorValueJoin< FunctorType , WorkTag , reference_type > Join ;
-
- const pointer_type ptr = pointer_type( OpenMPexec::pool_rev(0)->scratch_reduce() );
-
- for ( int i = 1 ; i < OpenMPexec::pool_size() ; ++i ) {
- Join::join( functor , ptr , OpenMPexec::pool_rev(i)->scratch_reduce() );
+ const TagType t{} ;
+ for ( ; member.valid() ; member.next() ) {
+ functor( t , member , update );
}
-
- Kokkos::Impl::FunctorFinal< FunctorType , WorkTag >::final( functor , ptr );
}
- }
- template< class ViewType >
+public:
+
inline
- ParallelReduce( const FunctorType & functor ,
- const Policy & policy ,
- const ViewType & result )
- {
- OpenMPexec::verify_is_process("Kokkos::OpenMP parallel_reduce");
+ void execute() const
+ {
+ OpenMPexec::verify_is_process("Kokkos::OpenMP parallel_reduce");
- const size_t team_reduce_size = Policy::member_type::team_reduce_size();
- const size_t team_shmem_size = FunctorTeamShmemSize< FunctorType >::value( functor , policy.team_size() );
+ const size_t team_reduce_size = Policy::member_type::team_reduce_size();
- OpenMPexec::resize_scratch( ValueTraits::value_size( functor ) , team_reduce_size + team_shmem_size );
+ OpenMPexec::resize_scratch( ValueTraits::value_size( m_functor ) , team_reduce_size + m_shmem_size );
#pragma omp parallel
- {
- OpenMPexec & exec = * OpenMPexec::get_thread_omp();
-
- reference_type update = ValueInit::init( functor , exec.scratch_reduce() );
+ {
+ OpenMPexec & exec = * OpenMPexec::get_thread_omp();
- for ( typename Policy::member_type member( exec , policy , team_shmem_size ); member.valid() ; member.next() ) {
- ParallelReduce::template driver< Policy >( functor , member , update );
+ ParallelReduce::template exec_team< WorkTag >
+ ( m_functor
+ , Member( exec , m_policy , m_shmem_size )
+ , ValueInit::init( m_functor , exec.scratch_reduce() ) );
}
- }
/* END #pragma omp parallel */
- {
- const pointer_type ptr = pointer_type( OpenMPexec::pool_rev(0)->scratch_reduce() );
+ {
+ const pointer_type ptr = pointer_type( OpenMPexec::pool_rev(0)->scratch_reduce() );
- for ( int i = 1 ; i < OpenMPexec::pool_size() ; ++i ) {
- ValueJoin::join( functor , ptr , OpenMPexec::pool_rev(i)->scratch_reduce() );
- }
+ int max_active_threads = OpenMPexec::pool_size();
+ if( max_active_threads > m_policy.league_size()* m_policy.team_size() )
+ max_active_threads = m_policy.league_size()* m_policy.team_size();
+
+ for ( int i = 1 ; i < max_active_threads ; ++i ) {
+ ValueJoin::join( m_functor , ptr , OpenMPexec::pool_rev(i)->scratch_reduce() );
+ }
- Kokkos::Impl::FunctorFinal< FunctorType , WorkTag >::final( functor , ptr );
+ Kokkos::Impl::FunctorFinal< FunctorType , WorkTag >::final( m_functor , ptr );
- const int n = ValueTraits::value_count( functor );
+ if ( m_result_ptr ) {
+ const int n = ValueTraits::value_count( m_functor );
- for ( int j = 0 ; j < n ; ++j ) { result.ptr_on_device()[j] = ptr[j] ; }
+ for ( int j = 0 ; j < n ; ++j ) { m_result_ptr[j] = ptr[j] ; }
+ }
+ }
}
- }
- void wait() {}
+ template< class ViewType >
+ inline
+ ParallelReduce( const FunctorType & arg_functor ,
+ const Policy & arg_policy ,
+ const ViewType & arg_result )
+ : m_functor( arg_functor )
+ , m_policy( arg_policy )
+ , m_result_ptr( arg_result.ptr_on_device() )
+ , m_shmem_size( arg_policy.scratch_size() + FunctorTeamShmemSize< FunctorType >::value( arg_functor , arg_policy.team_size() ) )
+ {}
};
} // namespace Impl
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#endif /* KOKKOS_OPENMP_PARALLEL_HPP */
diff --git a/lib/kokkos/core/src/OpenMP/Kokkos_OpenMPexec.cpp b/lib/kokkos/core/src/OpenMP/Kokkos_OpenMPexec.cpp
index ed98fd2f9..3e0fc42a6 100644
--- a/lib/kokkos/core/src/OpenMP/Kokkos_OpenMPexec.cpp
+++ b/lib/kokkos/core/src/OpenMP/Kokkos_OpenMPexec.cpp
@@ -1,364 +1,410 @@
/*
//@HEADER
// ************************************************************************
-//
+//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
-//
+//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
-//
+//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
-//
+//
// ************************************************************************
//@HEADER
*/
#include <stdio.h>
#include <limits>
#include <iostream>
#include <vector>
#include <Kokkos_Core.hpp>
#include <impl/Kokkos_Error.hpp>
#include <iostream>
#ifdef KOKKOS_HAVE_OPENMP
namespace Kokkos {
namespace Impl {
namespace {
KOKKOS_INLINE_FUNCTION
int kokkos_omp_in_parallel();
int kokkos_omp_in_critical_region = ( Kokkos::HostSpace::register_in_parallel( kokkos_omp_in_parallel ) , 0 );
KOKKOS_INLINE_FUNCTION
int kokkos_omp_in_parallel()
{
#ifndef __CUDA_ARCH__
return omp_in_parallel() && ! kokkos_omp_in_critical_region ;
#else
return 0;
#endif
}
bool s_using_hwloc = false;
} // namespace
} // namespace Impl
} // namespace Kokkos
namespace Kokkos {
namespace Impl {
int OpenMPexec::m_map_rank[ OpenMPexec::MAX_THREAD_COUNT ] = { 0 };
int OpenMPexec::m_pool_topo[ 4 ] = { 0 };
+#if ! defined( KOKKOS_USING_EXPERIMENTAL_VIEW )
+
OpenMPexec::Pool OpenMPexec::m_pool;
+#else
+
+OpenMPexec * OpenMPexec::m_pool[ OpenMPexec::MAX_THREAD_COUNT ] = { 0 };
+
+#endif
+
void OpenMPexec::verify_is_process( const char * const label )
{
if ( omp_in_parallel() ) {
std::string msg( label );
msg.append( " ERROR: in parallel" );
Kokkos::Impl::throw_runtime_exception( msg );
}
}
void OpenMPexec::verify_initialized( const char * const label )
{
if ( 0 == m_pool[0] ) {
std::string msg( label );
msg.append( " ERROR: not initialized" );
Kokkos::Impl::throw_runtime_exception( msg );
}
+
+ if ( omp_get_max_threads() != Kokkos::OpenMP::thread_pool_size(0) ) {
+ std::string msg( label );
+ msg.append( " ERROR: Initialized but threads modified inappropriately" );
+ Kokkos::Impl::throw_runtime_exception( msg );
+ }
+
}
void OpenMPexec::clear_scratch()
{
#pragma omp parallel
{
const int rank_rev = m_map_rank[ omp_get_thread_num() ];
+#if defined( KOKKOS_USING_EXPERIMENTAL_VIEW )
+ typedef Kokkos::Experimental::Impl::SharedAllocationRecord< Kokkos::HostSpace , void > Record ;
+ if ( m_pool[ rank_rev ] ) {
+ Record * const r = Record::get_record( m_pool[ rank_rev ] );
+ m_pool[ rank_rev ] = 0 ;
+ Record::decrement( r );
+ }
+#else
m_pool.at(rank_rev).clear();
+#endif
}
/* END #pragma omp parallel */
}
void OpenMPexec::resize_scratch( size_t reduce_size , size_t thread_size )
{
enum { ALIGN_MASK = Kokkos::Impl::MEMORY_ALIGNMENT - 1 };
enum { ALLOC_EXEC = ( sizeof(OpenMPexec) + ALIGN_MASK ) & ~ALIGN_MASK };
const size_t old_reduce_size = m_pool[0] ? m_pool[0]->m_scratch_reduce_end : 0 ;
const size_t old_thread_size = m_pool[0] ? m_pool[0]->m_scratch_thread_end - m_pool[0]->m_scratch_reduce_end : 0 ;
reduce_size = ( reduce_size + ALIGN_MASK ) & ~ALIGN_MASK ;
thread_size = ( thread_size + ALIGN_MASK ) & ~ALIGN_MASK ;
// Requesting allocation and old allocation is too small:
const bool allocate = ( old_reduce_size < reduce_size ) ||
( old_thread_size < thread_size );
if ( allocate ) {
if ( reduce_size < old_reduce_size ) { reduce_size = old_reduce_size ; }
if ( thread_size < old_thread_size ) { thread_size = old_thread_size ; }
}
const size_t alloc_size = allocate ? ALLOC_EXEC + reduce_size + thread_size : 0 ;
const int pool_size = m_pool_topo[0] ;
if ( allocate ) {
clear_scratch();
#pragma omp parallel
{
const int rank_rev = m_map_rank[ omp_get_thread_num() ];
const int rank = pool_size - ( rank_rev + 1 );
- m_pool.at(rank_rev) = HostSpace::allocate_and_track( "openmp_scratch", alloc_size );
+#if defined( KOKKOS_USING_EXPERIMENTAL_VIEW )
+
+ typedef Kokkos::Experimental::Impl::SharedAllocationRecord< Kokkos::HostSpace , void > Record ;
+
+ Record * const r = Record::allocate( Kokkos::HostSpace()
+ , "openmp_scratch"
+ , alloc_size );
+
+ Record::increment( r );
+
+ m_pool[ rank_rev ] = reinterpret_cast<OpenMPexec*>( r->data() );
+
+#else
+
+ #pragma omp critical
+ {
+ m_pool.at(rank_rev) = HostSpace::allocate_and_track( "openmp_scratch", alloc_size );
+ }
+
+#endif
+
new ( m_pool[ rank_rev ] ) OpenMPexec( rank , ALLOC_EXEC , reduce_size , thread_size );
}
/* END #pragma omp parallel */
}
}
} // namespace Impl
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
//----------------------------------------------------------------------------
int OpenMP::is_initialized()
{ return 0 != Impl::OpenMPexec::m_pool[0]; }
void OpenMP::initialize( unsigned thread_count ,
unsigned use_numa_count ,
unsigned use_cores_per_numa )
{
// Before any other call to OMP query the maximum number of threads
// and save the value for re-initialization unit testing.
//Using omp_get_max_threads(); is problematic in conjunction with
//Hwloc on Intel (essentially an initial call to the OpenMP runtime
//without a parallel region before will set a process mask for a single core
//The runtime will than bind threads for a parallel region to other cores on the
//entering the first parallel region and make the process mask the aggregate of
//the thread masks. The intend seems to be to make serial code run fast, if you
//compile with OpenMP enabled but don't actually use parallel regions or so
//static int omp_max_threads = omp_get_max_threads();
int nthreads = 0;
#pragma omp parallel
{
#pragma omp atomic
nthreads++;
}
static int omp_max_threads = nthreads;
const bool is_initialized = 0 != Impl::OpenMPexec::m_pool[0] ;
bool thread_spawn_failed = false ;
if ( ! is_initialized ) {
// Use hwloc thread pinning if concerned with locality.
// If spreading threads across multiple NUMA regions.
// If hyperthreading is enabled.
Impl::s_using_hwloc = hwloc::available() && (
( 1 < Kokkos::hwloc::get_available_numa_count() ) ||
( 1 < Kokkos::hwloc::get_available_threads_per_core() ) );
std::pair<unsigned,unsigned> threads_coord[ Impl::OpenMPexec::MAX_THREAD_COUNT ];
// If hwloc available then use it's maximum value.
if ( thread_count == 0 ) {
thread_count = Impl::s_using_hwloc
? Kokkos::hwloc::get_available_numa_count() *
Kokkos::hwloc::get_available_cores_per_numa() *
Kokkos::hwloc::get_available_threads_per_core()
: omp_max_threads ;
}
if(Impl::s_using_hwloc)
hwloc::thread_mapping( "Kokkos::OpenMP::initialize" ,
false /* do not allow asynchronous */ ,
thread_count ,
use_numa_count ,
use_cores_per_numa ,
threads_coord );
// Spawn threads:
omp_set_num_threads( thread_count );
// Verify OMP interaction:
if ( int(thread_count) != omp_get_max_threads() ) {
thread_spawn_failed = true ;
}
// Verify spawning and bind threads:
#pragma omp parallel
{
#pragma omp critical
{
if ( int(thread_count) != omp_get_num_threads() ) {
thread_spawn_failed = true ;
}
// Call to 'bind_this_thread' is not thread safe so place this whole block in a critical region.
// Call to 'new' may not be thread safe as well.
// Reverse the rank for threads so that the scan operation reduces to the highest rank thread.
const unsigned omp_rank = omp_get_thread_num();
- const unsigned thread_r = Impl::s_using_hwloc ? Kokkos::hwloc::bind_this_thread( thread_count , threads_coord ) : omp_rank ;
+ const unsigned thread_r = Impl::s_using_hwloc && Kokkos::hwloc::can_bind_threads()
+ ? Kokkos::hwloc::bind_this_thread( thread_count , threads_coord )
+ : omp_rank ;
Impl::OpenMPexec::m_map_rank[ omp_rank ] = thread_r ;
}
/* END #pragma omp critical */
}
/* END #pragma omp parallel */
if ( ! thread_spawn_failed ) {
Impl::OpenMPexec::m_pool_topo[0] = thread_count ;
Impl::OpenMPexec::m_pool_topo[1] = Impl::s_using_hwloc ? thread_count / use_numa_count : thread_count;
Impl::OpenMPexec::m_pool_topo[2] = Impl::s_using_hwloc ? thread_count / ( use_numa_count * use_cores_per_numa ) : 1;
Impl::OpenMPexec::resize_scratch( 1024 , 1024 );
}
}
if ( is_initialized || thread_spawn_failed ) {
std::string msg("Kokkos::OpenMP::initialize ERROR");
if ( is_initialized ) { msg.append(" : already initialized"); }
if ( thread_spawn_failed ) { msg.append(" : failed spawning threads"); }
Kokkos::Impl::throw_runtime_exception(msg);
}
// Init the array for used for arbitrarily sized atomics
Impl::init_lock_array_host_space();
}
//----------------------------------------------------------------------------
void OpenMP::finalize()
{
Impl::OpenMPexec::verify_initialized( "OpenMP::finalize" );
Impl::OpenMPexec::verify_is_process( "OpenMP::finalize" );
Impl::OpenMPexec::clear_scratch();
Impl::OpenMPexec::m_pool_topo[0] = 0 ;
Impl::OpenMPexec::m_pool_topo[1] = 0 ;
Impl::OpenMPexec::m_pool_topo[2] = 0 ;
omp_set_num_threads(1);
- if ( Impl::s_using_hwloc ) {
+ if ( Impl::s_using_hwloc && Kokkos::hwloc::can_bind_threads() ) {
hwloc::unbind_this_thread();
}
}
//----------------------------------------------------------------------------
void OpenMP::print_configuration( std::ostream & s , const bool detail )
{
Impl::OpenMPexec::verify_is_process( "OpenMP::print_configuration" );
s << "Kokkos::OpenMP" ;
#if defined( KOKKOS_HAVE_OPENMP )
s << " KOKKOS_HAVE_OPENMP" ;
#endif
#if defined( KOKKOS_HAVE_HWLOC )
const unsigned numa_count_ = Kokkos::hwloc::get_available_numa_count();
const unsigned cores_per_numa = Kokkos::hwloc::get_available_cores_per_numa();
const unsigned threads_per_core = Kokkos::hwloc::get_available_threads_per_core();
s << " hwloc[" << numa_count_ << "x" << cores_per_numa << "x" << threads_per_core << "]"
<< " hwloc_binding_" << ( Impl::s_using_hwloc ? "enabled" : "disabled" )
;
#endif
const bool is_initialized = 0 != Impl::OpenMPexec::m_pool[0] ;
if ( is_initialized ) {
const int numa_count = Kokkos::Impl::OpenMPexec::m_pool_topo[0] / Kokkos::Impl::OpenMPexec::m_pool_topo[1] ;
const int core_per_numa = Kokkos::Impl::OpenMPexec::m_pool_topo[1] / Kokkos::Impl::OpenMPexec::m_pool_topo[2] ;
const int thread_per_core = Kokkos::Impl::OpenMPexec::m_pool_topo[2] ;
s << " thread_pool_topology[ " << numa_count
<< " x " << core_per_numa
<< " x " << thread_per_core
<< " ]"
<< std::endl ;
if ( detail ) {
std::vector< std::pair<unsigned,unsigned> > coord( Kokkos::Impl::OpenMPexec::m_pool_topo[0] );
#pragma omp parallel
{
#pragma omp critical
{
coord[ omp_get_thread_num() ] = hwloc::get_this_thread_coordinate();
}
/* END #pragma omp critical */
}
/* END #pragma omp parallel */
for ( unsigned i = 0 ; i < coord.size() ; ++i ) {
s << " thread omp_rank[" << i << "]"
<< " kokkos_rank[" << Impl::OpenMPexec::m_map_rank[ i ] << "]"
<< " hwloc_coord[" << coord[i].first << "." << coord[i].second << "]"
<< std::endl ;
}
}
}
else {
s << " not initialized" << std::endl ;
}
}
} // namespace Kokkos
#endif //KOKKOS_HAVE_OPENMP
diff --git a/lib/kokkos/core/src/OpenMP/Kokkos_OpenMPexec.hpp b/lib/kokkos/core/src/OpenMP/Kokkos_OpenMPexec.hpp
index 1ab08f648..d0086a243 100644
--- a/lib/kokkos/core/src/OpenMP/Kokkos_OpenMPexec.hpp
+++ b/lib/kokkos/core/src/OpenMP/Kokkos_OpenMPexec.hpp
@@ -1,767 +1,823 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_OPENMPEXEC_HPP
#define KOKKOS_OPENMPEXEC_HPP
#include <impl/Kokkos_Traits.hpp>
#include <impl/Kokkos_spinwait.hpp>
#include <impl/Kokkos_AllocationTracker.hpp>
#include <Kokkos_Atomic.hpp>
namespace Kokkos {
namespace Impl {
//----------------------------------------------------------------------------
/** \brief Data for OpenMP thread execution */
class OpenMPexec {
public:
enum { MAX_THREAD_COUNT = 4096 };
+#if ! defined( KOKKOS_USING_EXPERIMENTAL_VIEW )
+
struct Pool
{
Pool() : m_trackers() {}
AllocationTracker m_trackers[ MAX_THREAD_COUNT ];
OpenMPexec * operator[](int i)
{
return reinterpret_cast<OpenMPexec *>(m_trackers[i].alloc_ptr());
}
AllocationTracker & at(int i)
{
return m_trackers[i];
}
};
+
+private:
+
+ static Pool m_pool; // Indexed by: m_pool_rank_rev
+
+#else
+
private:
+ static OpenMPexec * m_pool[ MAX_THREAD_COUNT ]; // Indexed by: m_pool_rank_rev
+
+#endif
+
static int m_pool_topo[ 4 ];
static int m_map_rank[ MAX_THREAD_COUNT ];
- static Pool m_pool; // Indexed by: m_pool_rank_rev
friend class Kokkos::OpenMP ;
int const m_pool_rank ;
int const m_pool_rank_rev ;
int const m_scratch_exec_end ;
int const m_scratch_reduce_end ;
int const m_scratch_thread_end ;
int volatile m_barrier_state ;
OpenMPexec();
OpenMPexec( const OpenMPexec & );
OpenMPexec & operator = ( const OpenMPexec & );
static void clear_scratch();
public:
// Topology of a cache coherent thread pool:
// TOTAL = NUMA x GRAIN
// pool_size( depth = 0 )
// pool_size(0) = total number of threads
// pool_size(1) = number of threads per NUMA
// pool_size(2) = number of threads sharing finest grain memory hierarchy
inline static
int pool_size( int depth = 0 ) { return m_pool_topo[ depth ]; }
inline static
OpenMPexec * pool_rev( int pool_rank_rev ) { return m_pool[ pool_rank_rev ]; }
inline int pool_rank() const { return m_pool_rank ; }
inline int pool_rank_rev() const { return m_pool_rank_rev ; }
inline void * scratch_reduce() const { return ((char *) this) + m_scratch_exec_end ; }
inline void * scratch_thread() const { return ((char *) this) + m_scratch_reduce_end ; }
inline
void state_wait( int state )
{ Impl::spinwait( m_barrier_state , state ); }
inline
void state_set( int state ) { m_barrier_state = state ; }
~OpenMPexec() {}
OpenMPexec( const int poolRank
, const int scratch_exec_size
, const int scratch_reduce_size
, const int scratch_thread_size )
: m_pool_rank( poolRank )
, m_pool_rank_rev( pool_size() - ( poolRank + 1 ) )
, m_scratch_exec_end( scratch_exec_size )
, m_scratch_reduce_end( m_scratch_exec_end + scratch_reduce_size )
, m_scratch_thread_end( m_scratch_reduce_end + scratch_thread_size )
, m_barrier_state(0)
{}
static void finalize();
static void initialize( const unsigned team_count ,
const unsigned threads_per_team ,
const unsigned numa_count ,
const unsigned cores_per_numa );
static void verify_is_process( const char * const );
static void verify_initialized( const char * const );
static void resize_scratch( size_t reduce_size , size_t thread_size );
inline static
OpenMPexec * get_thread_omp() { return m_pool[ m_map_rank[ omp_get_thread_num() ] ]; }
};
} // namespace Impl
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
class OpenMPexecTeamMember {
private:
enum { TEAM_REDUCE_SIZE = 512 };
/** \brief Thread states for team synchronization */
enum { Active = 0 , Rendezvous = 1 };
typedef Kokkos::OpenMP execution_space ;
typedef execution_space::scratch_memory_space scratch_memory_space ;
Impl::OpenMPexec & m_exec ;
scratch_memory_space m_team_shared ;
int m_team_shmem ;
int m_team_base_rev ;
int m_team_rank_rev ;
int m_team_rank ;
int m_team_size ;
int m_league_rank ;
int m_league_end ;
int m_league_size ;
// Fan-in team threads, root of the fan-in which does not block returns true
inline
bool team_fan_in() const
{
+ memory_fence();
for ( int n = 1 , j ; ( ( j = m_team_rank_rev + n ) < m_team_size ) && ! ( m_team_rank_rev & n ) ; n <<= 1 ) {
m_exec.pool_rev( m_team_base_rev + j )->state_wait( Active );
}
if ( m_team_rank_rev ) {
m_exec.state_set( Rendezvous );
+ memory_fence();
m_exec.state_wait( Rendezvous );
}
return 0 == m_team_rank_rev ;
}
inline
void team_fan_out() const
{
+ memory_fence();
for ( int n = 1 , j ; ( ( j = m_team_rank_rev + n ) < m_team_size ) && ! ( m_team_rank_rev & n ) ; n <<= 1 ) {
m_exec.pool_rev( m_team_base_rev + j )->state_set( Active );
+ memory_fence();
}
}
public:
KOKKOS_INLINE_FUNCTION
const execution_space::scratch_memory_space & team_shmem() const
{ return m_team_shared ; }
KOKKOS_INLINE_FUNCTION int league_rank() const { return m_league_rank ; }
KOKKOS_INLINE_FUNCTION int league_size() const { return m_league_size ; }
KOKKOS_INLINE_FUNCTION int team_rank() const { return m_team_rank ; }
KOKKOS_INLINE_FUNCTION int team_size() const { return m_team_size ; }
KOKKOS_INLINE_FUNCTION void team_barrier() const
#if ! defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
{}
#else
{
if ( 1 < m_team_size ) {
team_fan_in();
team_fan_out();
}
}
#endif
template<class ValueType>
KOKKOS_INLINE_FUNCTION
void team_broadcast(ValueType& value, const int& thread_id) const
{
#if ! defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
{ }
#else
// Make sure there is enough scratch space:
typedef typename if_c< sizeof(ValueType) < TEAM_REDUCE_SIZE
, ValueType , void >::type type ;
type * const local_value = ((type*) m_exec.scratch_thread());
if(team_rank() == thread_id)
*local_value = value;
memory_fence();
team_barrier();
value = *local_value;
#endif
}
#ifdef KOKKOS_HAVE_CXX11
template< class ValueType, class JoinOp >
KOKKOS_INLINE_FUNCTION ValueType
team_reduce( const ValueType & value
, const JoinOp & op_in ) const
#if ! defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
{ return ValueType(); }
#else
{
+ memory_fence();
typedef ValueType value_type;
const JoinLambdaAdapter<value_type,JoinOp> op(op_in);
#endif
#else // KOKKOS_HAVE_CXX11
template< class JoinOp >
KOKKOS_INLINE_FUNCTION typename JoinOp::value_type
team_reduce( const typename JoinOp::value_type & value
, const JoinOp & op ) const
#if ! defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
{ return typename JoinOp::value_type(); }
#else
{
typedef typename JoinOp::value_type value_type;
#endif
#endif // KOKKOS_HAVE_CXX11
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
// Make sure there is enough scratch space:
typedef typename if_c< sizeof(value_type) < TEAM_REDUCE_SIZE
, value_type , void >::type type ;
type * const local_value = ((type*) m_exec.scratch_thread());
// Set this thread's contribution
*local_value = value ;
// Fence to make sure the base team member has access:
memory_fence();
if ( team_fan_in() ) {
// The last thread to synchronize returns true, all other threads wait for team_fan_out()
type * const team_value = ((type*) m_exec.pool_rev( m_team_base_rev )->scratch_thread());
// Join to the team value:
for ( int i = 1 ; i < m_team_size ; ++i ) {
op.join( *team_value , *((type*) m_exec.pool_rev( m_team_base_rev + i )->scratch_thread()) );
}
+ memory_fence();
// The base team member may "lap" the other team members,
// copy to their local value before proceeding.
for ( int i = 1 ; i < m_team_size ; ++i ) {
*((type*) m_exec.pool_rev( m_team_base_rev + i )->scratch_thread()) = *team_value ;
}
// Fence to make sure all team members have access
memory_fence();
}
team_fan_out();
return *((type volatile const *)local_value);
}
#endif
/** \brief Intra-team exclusive prefix sum with team_rank() ordering
* with intra-team non-deterministic ordering accumulation.
*
* The global inter-team accumulation value will, at the end of the
* league's parallel execution, be the scan's total.
* Parallel execution ordering of the league's teams is non-deterministic.
* As such the base value for each team's scan operation is similarly
* non-deterministic.
*/
template< typename ArgType >
KOKKOS_INLINE_FUNCTION ArgType team_scan( const ArgType & value , ArgType * const global_accum ) const
#if ! defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
{ return ArgType(); }
#else
{
// Make sure there is enough scratch space:
typedef typename if_c< sizeof(ArgType) < TEAM_REDUCE_SIZE , ArgType , void >::type type ;
volatile type * const work_value = ((type*) m_exec.scratch_thread());
*work_value = value ;
memory_fence();
if ( team_fan_in() ) {
// The last thread to synchronize returns true, all other threads wait for team_fan_out()
// m_team_base[0] == highest ranking team member
// m_team_base[ m_team_size - 1 ] == lowest ranking team member
//
// 1) copy from lower to higher rank, initialize lowest rank to zero
// 2) prefix sum from lowest to highest rank, skipping lowest rank
type accum = 0 ;
if ( global_accum ) {
for ( int i = m_team_size ; i-- ; ) {
type & val = *((type*) m_exec.pool_rev( m_team_base_rev + i )->scratch_thread());
accum += val ;
}
accum = atomic_fetch_add( global_accum , accum );
}
for ( int i = m_team_size ; i-- ; ) {
type & val = *((type*) m_exec.pool_rev( m_team_base_rev + i )->scratch_thread());
const type offset = accum ;
accum += val ;
val = offset ;
}
memory_fence();
}
team_fan_out();
return *work_value ;
}
#endif
/** \brief Intra-team exclusive prefix sum with team_rank() ordering.
*
* The highest rank thread can compute the reduction total as
* reduction_total = dev.team_scan( value ) + value ;
*/
template< typename Type >
KOKKOS_INLINE_FUNCTION Type team_scan( const Type & value ) const
{ return this-> template team_scan<Type>( value , 0 ); }
//----------------------------------------
// Private for the driver
private:
typedef execution_space::scratch_memory_space space ;
public:
template< class Arg0 , class Arg1 >
inline
OpenMPexecTeamMember( Impl::OpenMPexec & exec
, const TeamPolicy< Arg0 , Arg1 , Kokkos::OpenMP > & team
, const int shmem_size
)
: m_exec( exec )
, m_team_shared(0,0)
, m_team_shmem( shmem_size )
, m_team_base_rev(0)
, m_team_rank_rev(0)
, m_team_rank(0)
, m_team_size( team.team_size() )
, m_league_rank(0)
, m_league_end(0)
, m_league_size( team.league_size() )
{
const int pool_rank_rev = m_exec.pool_rank_rev();
const int pool_team_rank_rev = pool_rank_rev % team.team_alloc();
const int pool_league_rank_rev = pool_rank_rev / team.team_alloc();
const int league_iter_end = team.league_size() - pool_league_rank_rev * team.team_iter();
if ( pool_team_rank_rev < m_team_size && 0 < league_iter_end ) {
m_team_base_rev = team.team_alloc() * pool_league_rank_rev ;
m_team_rank_rev = pool_team_rank_rev ;
m_team_rank = m_team_size - ( m_team_rank_rev + 1 );
m_league_end = league_iter_end ;
m_league_rank = league_iter_end > team.team_iter() ? league_iter_end - team.team_iter() : 0 ;
new( (void*) &m_team_shared ) space( ( (char*) m_exec.pool_rev(m_team_base_rev)->scratch_thread() ) + TEAM_REDUCE_SIZE , m_team_shmem );
}
}
bool valid() const
{ return m_league_rank < m_league_end ; }
void next()
{
if ( ++m_league_rank < m_league_end ) {
team_barrier();
new( (void*) &m_team_shared ) space( ( (char*) m_exec.pool_rev(m_team_base_rev)->scratch_thread() ) + TEAM_REDUCE_SIZE , m_team_shmem );
}
}
static inline int team_reduce_size() { return TEAM_REDUCE_SIZE ; }
};
} // namespace Impl
template< class Arg0 , class Arg1 >
class TeamPolicy< Arg0 , Arg1 , Kokkos::OpenMP >
{
public:
//! Tag this class as a kokkos execution policy
typedef TeamPolicy execution_policy ;
//! Execution space of this execution policy.
typedef Kokkos::OpenMP execution_space ;
typedef typename
Impl::if_c< ! Impl::is_same< Kokkos::OpenMP , Arg0 >::value , Arg0 , Arg1 >::type
work_tag ;
//----------------------------------------
template< class FunctorType >
inline static
int team_size_max( const FunctorType & )
{ return execution_space::thread_pool_size(1); }
template< class FunctorType >
inline static
int team_size_recommended( const FunctorType & )
{ return execution_space::thread_pool_size(2); }
template< class FunctorType >
inline static
int team_size_recommended( const FunctorType &, const int& )
{ return execution_space::thread_pool_size(2); }
//----------------------------------------
private:
int m_league_size ;
int m_team_size ;
int m_team_alloc ;
int m_team_iter ;
+ size_t m_scratch_size;
+
inline void init( const int league_size_request
, const int team_size_request )
{
const int pool_size = execution_space::thread_pool_size(0);
const int team_max = execution_space::thread_pool_size(1);
const int team_grain = execution_space::thread_pool_size(2);
m_league_size = league_size_request ;
m_team_size = team_size_request < team_max ?
team_size_request : team_max ;
// Round team size up to a multiple of 'team_gain'
const int team_size_grain = team_grain * ( ( m_team_size + team_grain - 1 ) / team_grain );
const int team_count = pool_size / team_size_grain ;
// Constraint : pool_size = m_team_alloc * team_count
m_team_alloc = pool_size / team_count ;
// Maxumum number of iterations each team will take:
m_team_iter = ( m_league_size + team_count - 1 ) / team_count ;
}
public:
inline int team_size() const { return m_team_size ; }
inline int league_size() const { return m_league_size ; }
+ inline size_t scratch_size() const { return m_scratch_size ; }
/** \brief Specify league size, request team size */
- TeamPolicy( execution_space & , int league_size_request , int team_size_request , int vector_length_request = 1)
- { init( league_size_request , team_size_request ); (void) vector_length_request; }
-
- TeamPolicy( int league_size_request , int team_size_request , int vector_length_request = 1 )
- { init( league_size_request , team_size_request ); (void) vector_length_request; }
+ TeamPolicy( execution_space &
+ , int league_size_request
+ , int team_size_request
+ , int /* vector_length_request */ = 1 )
+ : m_scratch_size ( 0 )
+ { init( league_size_request , team_size_request ); }
+
+ TeamPolicy( execution_space &
+ , int league_size_request
+ , const Kokkos::AUTO_t & /* team_size_request */
+ , int /* vector_length_request */ = 1)
+ : m_scratch_size ( 0 )
+ { init( league_size_request , execution_space::thread_pool_size(2) ); }
+
+ TeamPolicy( int league_size_request
+ , int team_size_request
+ , int /* vector_length_request */ = 1 )
+ : m_scratch_size ( 0 )
+ { init( league_size_request , team_size_request ); }
+
+ TeamPolicy( int league_size_request
+ , const Kokkos::AUTO_t & /* team_size_request */
+ , int /* vector_length_request */ = 1 )
+ : m_scratch_size ( 0 )
+ { init( league_size_request , execution_space::thread_pool_size(2) ); }
+
+ template<class MemorySpace>
+ TeamPolicy( int league_size_request
+ , int team_size_request
+ , const Experimental::TeamScratchRequest<MemorySpace> & scratch_request )
+ : m_scratch_size(scratch_request.total(team_size_request))
+ { init(league_size_request,team_size_request); }
+
+
+ template<class MemorySpace>
+ TeamPolicy( int league_size_request
+ , const Kokkos::AUTO_t & /* team_size_request */
+ , const Experimental::TeamScratchRequest<MemorySpace> & scratch_request )
+ : m_scratch_size(scratch_request.total(execution_space::thread_pool_size(2)))
+ { init(league_size_request,execution_space::thread_pool_size(2)); }
inline int team_alloc() const { return m_team_alloc ; }
inline int team_iter() const { return m_team_iter ; }
typedef Impl::OpenMPexecTeamMember member_type ;
};
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
inline
int OpenMP::thread_pool_size( int depth )
{
return Impl::OpenMPexec::pool_size(depth);
}
KOKKOS_INLINE_FUNCTION
int OpenMP::thread_pool_rank()
{
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
return Impl::OpenMPexec::m_map_rank[ omp_get_thread_num() ];
#else
return -1 ;
#endif
}
} // namespace Kokkos
namespace Kokkos {
template<typename iType>
KOKKOS_INLINE_FUNCTION
Impl::TeamThreadRangeBoundariesStruct<iType,Impl::OpenMPexecTeamMember>
TeamThreadRange(const Impl::OpenMPexecTeamMember& thread, const iType& count) {
return Impl::TeamThreadRangeBoundariesStruct<iType,Impl::OpenMPexecTeamMember>(thread,count);
}
template<typename iType>
KOKKOS_INLINE_FUNCTION
Impl::TeamThreadRangeBoundariesStruct<iType,Impl::OpenMPexecTeamMember>
TeamThreadRange(const Impl::OpenMPexecTeamMember& thread, const iType& begin, const iType& end) {
return Impl::TeamThreadRangeBoundariesStruct<iType,Impl::OpenMPexecTeamMember>(thread,begin,end);
}
template<typename iType>
KOKKOS_INLINE_FUNCTION
Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::OpenMPexecTeamMember >
ThreadVectorRange(const Impl::OpenMPexecTeamMember& thread, const iType& count) {
return Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::OpenMPexecTeamMember >(thread,count);
}
KOKKOS_INLINE_FUNCTION
Impl::ThreadSingleStruct<Impl::OpenMPexecTeamMember> PerTeam(const Impl::OpenMPexecTeamMember& thread) {
return Impl::ThreadSingleStruct<Impl::OpenMPexecTeamMember>(thread);
}
KOKKOS_INLINE_FUNCTION
Impl::VectorSingleStruct<Impl::OpenMPexecTeamMember> PerThread(const Impl::OpenMPexecTeamMember& thread) {
return Impl::VectorSingleStruct<Impl::OpenMPexecTeamMember>(thread);
}
} // namespace Kokkos
namespace Kokkos {
/** \brief Inter-thread parallel_for. Executes lambda(iType i) for each i=0..N-1.
*
* The range i=0..N-1 is mapped to all threads of the the calling thread team.
* This functionality requires C++11 support.*/
template<typename iType, class Lambda>
KOKKOS_INLINE_FUNCTION
void parallel_for(const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::OpenMPexecTeamMember>& loop_boundaries, const Lambda& lambda) {
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment)
lambda(i);
}
/** \brief Inter-thread vector parallel_reduce. Executes lambda(iType i, ValueType & val) for each i=0..N-1.
*
* The range i=0..N-1 is mapped to all threads of the the calling thread team and a summation of
* val is performed and put into result. This functionality requires C++11 support.*/
template< typename iType, class Lambda, typename ValueType >
KOKKOS_INLINE_FUNCTION
void parallel_reduce(const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::OpenMPexecTeamMember>& loop_boundaries,
const Lambda & lambda, ValueType& result) {
result = ValueType();
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) {
ValueType tmp = ValueType();
lambda(i,tmp);
result+=tmp;
}
result = loop_boundaries.thread.team_reduce(result,Impl::JoinAdd<ValueType>());
}
/** \brief Intra-thread vector parallel_reduce. Executes lambda(iType i, ValueType & val) for each i=0..N-1.
*
* The range i=0..N-1 is mapped to all vector lanes of the the calling thread and a reduction of
* val is performed using JoinType(ValueType& val, const ValueType& update) and put into init_result.
* The input value of init_result is used as initializer for temporary variables of ValueType. Therefore
* the input value should be the neutral element with respect to the join operation (e.g. '0 for +-' or
* '1 for *'). This functionality requires C++11 support.*/
template< typename iType, class Lambda, typename ValueType, class JoinType >
KOKKOS_INLINE_FUNCTION
void parallel_reduce(const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::OpenMPexecTeamMember>& loop_boundaries,
const Lambda & lambda, const JoinType& join, ValueType& init_result) {
ValueType result = init_result;
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) {
ValueType tmp = ValueType();
lambda(i,tmp);
join(result,tmp);
}
init_result = loop_boundaries.thread.team_reduce(result,join);
}
} //namespace Kokkos
namespace Kokkos {
/** \brief Intra-thread vector parallel_for. Executes lambda(iType i) for each i=0..N-1.
*
* The range i=0..N-1 is mapped to all vector lanes of the the calling thread.
* This functionality requires C++11 support.*/
template<typename iType, class Lambda>
KOKKOS_INLINE_FUNCTION
void parallel_for(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::OpenMPexecTeamMember >&
loop_boundaries, const Lambda& lambda) {
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
#pragma ivdep
#endif
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment)
lambda(i);
}
/** \brief Intra-thread vector parallel_reduce. Executes lambda(iType i, ValueType & val) for each i=0..N-1.
*
* The range i=0..N-1 is mapped to all vector lanes of the the calling thread and a summation of
* val is performed and put into result. This functionality requires C++11 support.*/
template< typename iType, class Lambda, typename ValueType >
KOKKOS_INLINE_FUNCTION
void parallel_reduce(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::OpenMPexecTeamMember >&
loop_boundaries, const Lambda & lambda, ValueType& result) {
result = ValueType();
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
#pragma ivdep
#endif
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) {
ValueType tmp = ValueType();
lambda(i,tmp);
result+=tmp;
}
}
/** \brief Intra-thread vector parallel_reduce. Executes lambda(iType i, ValueType & val) for each i=0..N-1.
*
* The range i=0..N-1 is mapped to all vector lanes of the the calling thread and a reduction of
* val is performed using JoinType(ValueType& val, const ValueType& update) and put into init_result.
* The input value of init_result is used as initializer for temporary variables of ValueType. Therefore
* the input value should be the neutral element with respect to the join operation (e.g. '0 for +-' or
* '1 for *'). This functionality requires C++11 support.*/
template< typename iType, class Lambda, typename ValueType, class JoinType >
KOKKOS_INLINE_FUNCTION
void parallel_reduce(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::OpenMPexecTeamMember >&
loop_boundaries, const Lambda & lambda, const JoinType& join, ValueType& init_result) {
ValueType result = init_result;
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
#pragma ivdep
#endif
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) {
ValueType tmp = ValueType();
lambda(i,tmp);
join(result,tmp);
}
init_result = result;
}
/** \brief Intra-thread vector parallel exclusive prefix sum. Executes lambda(iType i, ValueType & val, bool final)
* for each i=0..N-1.
*
* The range i=0..N-1 is mapped to all vector lanes in the thread and a scan operation is performed.
* Depending on the target execution space the operator might be called twice: once with final=false
* and once with final=true. When final==true val contains the prefix sum value. The contribution of this
* "i" needs to be added to val no matter whether final==true or not. In a serial execution
* (i.e. team_size==1) the operator is only called once with final==true. Scan_val will be set
* to the final sum value over all vector lanes.
* This functionality requires C++11 support.*/
template< typename iType, class FunctorType >
KOKKOS_INLINE_FUNCTION
void parallel_scan(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::OpenMPexecTeamMember >&
loop_boundaries, const FunctorType & lambda) {
typedef Kokkos::Impl::FunctorValueTraits< FunctorType , void > ValueTraits ;
typedef typename ValueTraits::value_type value_type ;
value_type scan_val = value_type();
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
#pragma ivdep
#endif
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) {
lambda(i,scan_val,true);
}
}
} // namespace Kokkos
namespace Kokkos {
template<class FunctorType>
KOKKOS_INLINE_FUNCTION
void single(const Impl::VectorSingleStruct<Impl::OpenMPexecTeamMember>& single_struct, const FunctorType& lambda) {
lambda();
}
template<class FunctorType>
KOKKOS_INLINE_FUNCTION
void single(const Impl::ThreadSingleStruct<Impl::OpenMPexecTeamMember>& single_struct, const FunctorType& lambda) {
if(single_struct.team_member.team_rank()==0) lambda();
}
template<class FunctorType, class ValueType>
KOKKOS_INLINE_FUNCTION
void single(const Impl::VectorSingleStruct<Impl::OpenMPexecTeamMember>& single_struct, const FunctorType& lambda, ValueType& val) {
lambda(val);
}
template<class FunctorType, class ValueType>
KOKKOS_INLINE_FUNCTION
void single(const Impl::ThreadSingleStruct<Impl::OpenMPexecTeamMember>& single_struct, const FunctorType& lambda, ValueType& val) {
if(single_struct.team_member.team_rank()==0) {
lambda(val);
}
single_struct.team_member.team_broadcast(val,0);
}
}
#endif /* #ifndef KOKKOS_OPENMPEXEC_HPP */
diff --git a/lib/kokkos/core/src/Qthread/Kokkos_QthreadExec.hpp b/lib/kokkos/core/src/Qthread/Kokkos_QthreadExec.hpp
index d772aee2b..e3702167e 100644
--- a/lib/kokkos/core/src/Qthread/Kokkos_QthreadExec.hpp
+++ b/lib/kokkos/core/src/Qthread/Kokkos_QthreadExec.hpp
@@ -1,617 +1,617 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_QTHREADEXEC_HPP
#define KOKKOS_QTHREADEXEC_HPP
#include <impl/Kokkos_spinwait.hpp>
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
//----------------------------------------------------------------------------
class QthreadExec ;
typedef void (*QthreadExecFunctionPointer)( QthreadExec & , const void * );
class QthreadExec {
private:
enum { Inactive = 0 , Active = 1 };
const QthreadExec * const * m_worker_base ;
const QthreadExec * const * m_shepherd_base ;
void * m_scratch_alloc ; ///< Scratch memory [ reduce , team , shared ]
int m_reduce_end ; ///< End of scratch reduction memory
int m_shepherd_rank ;
int m_shepherd_size ;
int m_shepherd_worker_rank ;
int m_shepherd_worker_size ;
/*
* m_worker_rank = m_shepherd_rank * m_shepherd_worker_size + m_shepherd_worker_rank
* m_worker_size = m_shepherd_size * m_shepherd_worker_size
*/
int m_worker_rank ;
int m_worker_size ;
int mutable volatile m_worker_state ;
friend class Kokkos::Qthread ;
~QthreadExec();
QthreadExec( const QthreadExec & );
QthreadExec & operator = ( const QthreadExec & );
public:
QthreadExec();
/** Execute the input function on all available Qthread workers */
static void exec_all( Qthread & , QthreadExecFunctionPointer , const void * );
//----------------------------------------
/** Barrier across all workers participating in the 'exec_all' */
void exec_all_barrier() const
{
const int rev_rank = m_worker_size - ( m_worker_rank + 1 );
int n , j ;
for ( n = 1 ; ( ! ( rev_rank & n ) ) && ( ( j = rev_rank + n ) < m_worker_size ) ; n <<= 1 ) {
Impl::spinwait( m_worker_base[j]->m_worker_state , QthreadExec::Active );
}
if ( rev_rank ) {
m_worker_state = QthreadExec::Inactive ;
Impl::spinwait( m_worker_state , QthreadExec::Inactive );
}
for ( n = 1 ; ( ! ( rev_rank & n ) ) && ( ( j = rev_rank + n ) < m_worker_size ) ; n <<= 1 ) {
m_worker_base[j]->m_worker_state = QthreadExec::Active ;
}
}
/** Barrier across workers within the shepherd with rank < team_rank */
void shepherd_barrier( const int team_size ) const
{
if ( m_shepherd_worker_rank < team_size ) {
const int rev_rank = team_size - ( m_shepherd_worker_rank + 1 );
int n , j ;
for ( n = 1 ; ( ! ( rev_rank & n ) ) && ( ( j = rev_rank + n ) < team_size ) ; n <<= 1 ) {
Impl::spinwait( m_shepherd_base[j]->m_worker_state , QthreadExec::Active );
}
if ( rev_rank ) {
m_worker_state = QthreadExec::Inactive ;
Impl::spinwait( m_worker_state , QthreadExec::Inactive );
}
for ( n = 1 ; ( ! ( rev_rank & n ) ) && ( ( j = rev_rank + n ) < team_size ) ; n <<= 1 ) {
m_shepherd_base[j]->m_worker_state = QthreadExec::Active ;
}
}
}
//----------------------------------------
/** Reduce across all workers participating in the 'exec_all' */
template< class FunctorType , class ArgTag >
inline
void exec_all_reduce( const FunctorType & func ) const
{
typedef Kokkos::Impl::FunctorValueJoin< FunctorType , ArgTag > ValueJoin ;
const int rev_rank = m_worker_size - ( m_worker_rank + 1 );
int n , j ;
for ( n = 1 ; ( ! ( rev_rank & n ) ) && ( ( j = rev_rank + n ) < m_worker_size ) ; n <<= 1 ) {
const QthreadExec & fan = *m_worker_base[j];
Impl::spinwait( fan.m_worker_state , QthreadExec::Active );
ValueJoin::join( func , m_scratch_alloc , fan.m_scratch_alloc );
}
if ( rev_rank ) {
m_worker_state = QthreadExec::Inactive ;
Impl::spinwait( m_worker_state , QthreadExec::Inactive );
}
for ( n = 1 ; ( ! ( rev_rank & n ) ) && ( ( j = rev_rank + n ) < m_worker_size ) ; n <<= 1 ) {
m_worker_base[j]->m_worker_state = QthreadExec::Active ;
}
}
//----------------------------------------
/** Scall across all workers participating in the 'exec_all' */
template< class FunctorType , class ArgTag >
inline
void exec_all_scan( const FunctorType & func ) const
{
typedef Kokkos::Impl::FunctorValueInit< FunctorType , ArgTag > ValueInit ;
typedef Kokkos::Impl::FunctorValueJoin< FunctorType , ArgTag > ValueJoin ;
typedef Kokkos::Impl::FunctorValueOps< FunctorType , ArgTag > ValueOps ;
const int rev_rank = m_worker_size - ( m_worker_rank + 1 );
int n , j ;
for ( n = 1 ; ( ! ( rev_rank & n ) ) && ( ( j = rev_rank + n ) < m_worker_size ) ; n <<= 1 ) {
Impl::spinwait( m_worker_base[j]->m_worker_state , QthreadExec::Active );
}
if ( rev_rank ) {
m_worker_state = QthreadExec::Inactive ;
Impl::spinwait( m_worker_state , QthreadExec::Inactive );
}
else {
// Root thread scans across values before releasing threads
// Worker data is in reverse order, so m_worker_base[0] is the
// highest ranking thread.
// Copy from lower ranking to higher ranking worker.
for ( int i = 1 ; i < m_worker_size ; ++i ) {
ValueOps::copy( func
, m_worker_base[i-1]->m_scratch_alloc
, m_worker_base[i]->m_scratch_alloc
);
}
ValueInit::init( func , m_worker_base[m_worker_size-1]->m_scratch_alloc );
// Join from lower ranking to higher ranking worker.
// Value at m_worker_base[n-1] is zero so skip adding it to m_worker_base[n-2].
- for ( int i = m_worker_size - 1 ; --i ; ) {
+ for ( int i = m_worker_size - 1 ; --i > 0 ; ) {
ValueJoin::join( func , m_worker_base[i-1]->m_scratch_alloc , m_worker_base[i]->m_scratch_alloc );
}
}
for ( n = 1 ; ( ! ( rev_rank & n ) ) && ( ( j = rev_rank + n ) < m_worker_size ) ; n <<= 1 ) {
m_worker_base[j]->m_worker_state = QthreadExec::Active ;
}
}
//----------------------------------------
template< class Type>
inline
volatile Type * shepherd_team_scratch_value() const
{ return (volatile Type*)(((unsigned char *) m_scratch_alloc) + m_reduce_end); }
template< class Type >
inline
void shepherd_broadcast( Type & value , const int team_size , const int team_rank ) const
{
if ( m_shepherd_base ) {
Type * const shared_value = m_shepherd_base[0]->shepherd_team_scratch_value<Type>();
if ( m_shepherd_worker_rank == team_rank ) { *shared_value = value ; }
memory_fence();
shepherd_barrier( team_size );
value = *shared_value ;
}
}
template< class Type >
inline
Type shepherd_reduce( const int team_size , const Type & value ) const
{
*shepherd_team_scratch_value<Type>() = value ;
memory_fence();
const int rev_rank = team_size - ( m_shepherd_worker_rank + 1 );
int n , j ;
for ( n = 1 ; ( ! ( rev_rank & n ) ) && ( ( j = rev_rank + n ) < team_size ) ; n <<= 1 ) {
Impl::spinwait( m_shepherd_base[j]->m_worker_state , QthreadExec::Active );
}
if ( rev_rank ) {
m_worker_state = QthreadExec::Inactive ;
Impl::spinwait( m_worker_state , QthreadExec::Inactive );
}
else {
Type & accum = * m_shepherd_base[0]->shepherd_team_scratch_value<Type>();
for ( int i = 1 ; i < n ; ++i ) {
accum += * m_shepherd_base[i]->shepherd_team_scratch_value<Type>();
}
for ( int i = 1 ; i < n ; ++i ) {
* m_shepherd_base[i]->shepherd_team_scratch_value<Type>() = accum ;
}
memory_fence();
}
for ( n = 1 ; ( ! ( rev_rank & n ) ) && ( ( j = rev_rank + n ) < team_size ) ; n <<= 1 ) {
m_shepherd_base[j]->m_worker_state = QthreadExec::Active ;
}
return *shepherd_team_scratch_value<Type>();
}
template< class JoinOp >
inline
typename JoinOp::value_type
shepherd_reduce( const int team_size
, const typename JoinOp::value_type & value
, const JoinOp & op ) const
{
typedef typename JoinOp::value_type Type ;
*shepherd_team_scratch_value<Type>() = value ;
memory_fence();
const int rev_rank = team_size - ( m_shepherd_worker_rank + 1 );
int n , j ;
for ( n = 1 ; ( ! ( rev_rank & n ) ) && ( ( j = rev_rank + n ) < team_size ) ; n <<= 1 ) {
Impl::spinwait( m_shepherd_base[j]->m_worker_state , QthreadExec::Active );
}
if ( rev_rank ) {
m_worker_state = QthreadExec::Inactive ;
Impl::spinwait( m_worker_state , QthreadExec::Inactive );
}
else {
volatile Type & accum = * m_shepherd_base[0]->shepherd_team_scratch_value<Type>();
for ( int i = 1 ; i < team_size ; ++i ) {
op.join( accum , * m_shepherd_base[i]->shepherd_team_scratch_value<Type>() );
}
for ( int i = 1 ; i < team_size ; ++i ) {
* m_shepherd_base[i]->shepherd_team_scratch_value<Type>() = accum ;
}
memory_fence();
}
for ( n = 1 ; ( ! ( rev_rank & n ) ) && ( ( j = rev_rank + n ) < team_size ) ; n <<= 1 ) {
m_shepherd_base[j]->m_worker_state = QthreadExec::Active ;
}
return *shepherd_team_scratch_value<Type>();
}
template< class Type >
inline
Type shepherd_scan( const int team_size
, const Type & value
, Type * const global_value = 0 ) const
{
*shepherd_team_scratch_value<Type>() = value ;
memory_fence();
const int rev_rank = team_size - ( m_shepherd_worker_rank + 1 );
int n , j ;
for ( n = 1 ; ( ! ( rev_rank & n ) ) && ( ( j = rev_rank + n ) < team_size ) ; n <<= 1 ) {
Impl::spinwait( m_shepherd_base[j]->m_worker_state , QthreadExec::Active );
}
if ( rev_rank ) {
m_worker_state = QthreadExec::Inactive ;
Impl::spinwait( m_worker_state , QthreadExec::Inactive );
}
else {
// Root thread scans across values before releasing threads
// Worker data is in reverse order, so m_shepherd_base[0] is the
// highest ranking thread.
// Copy from lower ranking to higher ranking worker.
Type accum = * m_shepherd_base[0]->shepherd_team_scratch_value<Type>();
for ( int i = 1 ; i < team_size ; ++i ) {
const Type tmp = * m_shepherd_base[i]->shepherd_team_scratch_value<Type>();
accum += tmp ;
* m_shepherd_base[i-1]->shepherd_team_scratch_value<Type>() = tmp ;
}
* m_shepherd_base[team_size-1]->shepherd_team_scratch_value<Type>() =
global_value ? atomic_fetch_add( global_value , accum ) : 0 ;
// Join from lower ranking to higher ranking worker.
for ( int i = team_size ; --i ; ) {
* m_shepherd_base[i-1]->shepherd_team_scratch_value<Type>() += * m_shepherd_base[i]->shepherd_team_scratch_value<Type>();
}
memory_fence();
}
for ( n = 1 ; ( ! ( rev_rank & n ) ) && ( ( j = rev_rank + n ) < team_size ) ; n <<= 1 ) {
m_shepherd_base[j]->m_worker_state = QthreadExec::Active ;
}
return *shepherd_team_scratch_value<Type>();
}
//----------------------------------------
static inline
int align_alloc( int size )
{
enum { ALLOC_GRAIN = 1 << 6 /* power of two, 64bytes */};
enum { ALLOC_GRAIN_MASK = ALLOC_GRAIN - 1 };
return ( size + ALLOC_GRAIN_MASK ) & ~ALLOC_GRAIN_MASK ;
}
void shared_reset( Qthread::scratch_memory_space & );
void * exec_all_reduce_value() const { return m_scratch_alloc ; }
static void * exec_all_reduce_result();
static void resize_worker_scratch( const int reduce_size , const int shared_size );
static void clear_workers();
//----------------------------------------
inline int worker_rank() const { return m_worker_rank ; }
inline int worker_size() const { return m_worker_size ; }
inline int shepherd_worker_rank() const { return m_shepherd_worker_rank ; }
inline int shepherd_worker_size() const { return m_shepherd_worker_size ; }
inline int shepherd_rank() const { return m_shepherd_rank ; }
inline int shepherd_size() const { return m_shepherd_size ; }
static int worker_per_shepherd();
};
} /* namespace Impl */
} /* namespace Kokkos */
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
class QthreadTeamPolicyMember {
private:
typedef Kokkos::Qthread execution_space ;
typedef execution_space::scratch_memory_space scratch_memory_space ;
Impl::QthreadExec & m_exec ;
scratch_memory_space m_team_shared ;
const int m_team_size ;
const int m_team_rank ;
const int m_league_size ;
const int m_league_end ;
int m_league_rank ;
public:
KOKKOS_INLINE_FUNCTION
const scratch_memory_space & team_shmem() const { return m_team_shared ; }
KOKKOS_INLINE_FUNCTION int league_rank() const { return m_league_rank ; }
KOKKOS_INLINE_FUNCTION int league_size() const { return m_league_size ; }
KOKKOS_INLINE_FUNCTION int team_rank() const { return m_team_rank ; }
KOKKOS_INLINE_FUNCTION int team_size() const { return m_team_size ; }
KOKKOS_INLINE_FUNCTION void team_barrier() const
#if ! defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
{}
#else
{ m_exec.shepherd_barrier( m_team_size ); }
#endif
template< typename Type >
KOKKOS_INLINE_FUNCTION Type team_broadcast( const Type & value , int rank ) const
#if ! defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
{ return Type(); }
#else
{ return m_exec.template shepherd_broadcast<Type>( value , m_team_size , rank ); }
#endif
template< typename Type >
KOKKOS_INLINE_FUNCTION Type team_reduce( const Type & value ) const
#if ! defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
{ return Type(); }
#else
{ return m_exec.template shepherd_reduce<Type>( m_team_size , value ); }
#endif
template< typename JoinOp >
KOKKOS_INLINE_FUNCTION typename JoinOp::value_type
team_reduce( const typename JoinOp::value_type & value
, const JoinOp & op ) const
#if ! defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
{ return typename JoinOp::value_type(); }
#else
{ return m_exec.template shepherd_reduce<JoinOp>( m_team_size , value , op ); }
#endif
/** \brief Intra-team exclusive prefix sum with team_rank() ordering.
*
* The highest rank thread can compute the reduction total as
* reduction_total = dev.team_scan( value ) + value ;
*/
template< typename Type >
KOKKOS_INLINE_FUNCTION Type team_scan( const Type & value ) const
#if ! defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
{ return Type(); }
#else
{ return m_exec.template shepherd_scan<Type>( m_team_size , value ); }
#endif
/** \brief Intra-team exclusive prefix sum with team_rank() ordering
* with intra-team non-deterministic ordering accumulation.
*
* The global inter-team accumulation value will, at the end of the
* league's parallel execution, be the scan's total.
* Parallel execution ordering of the league's teams is non-deterministic.
* As such the base value for each team's scan operation is similarly
* non-deterministic.
*/
template< typename Type >
KOKKOS_INLINE_FUNCTION Type team_scan( const Type & value , Type * const global_accum ) const
#if ! defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
{ return Type(); }
#else
{ return m_exec.template shepherd_scan<Type>( m_team_size , value , global_accum ); }
#endif
//----------------------------------------
// Private driver for task-team parallel
struct TaskTeam {};
QthreadTeamPolicyMember();
explicit QthreadTeamPolicyMember( const TaskTeam & );
//----------------------------------------
// Private for the driver ( for ( member_type i(exec,team); i ; i.next_team() ) { ... }
// Initialize
template< class Arg0 , class Arg1 >
QthreadTeamPolicyMember( Impl::QthreadExec & exec , const TeamPolicy<Arg0,Arg1,Qthread> & team )
: m_exec( exec )
, m_team_shared(0,0)
, m_team_size( team.m_team_size )
, m_team_rank( exec.shepherd_worker_rank() )
, m_league_size( team.m_league_size )
, m_league_end( team.m_league_size - team.m_shepherd_iter * ( exec.shepherd_size() - ( exec.shepherd_rank() + 1 ) ) )
, m_league_rank( m_league_end > team.m_shepherd_iter ? m_league_end - team.m_shepherd_iter : 0 )
{
m_exec.shared_reset( m_team_shared );
}
// Continue
operator bool () const { return m_league_rank < m_league_end ; }
// iterate
void next_team() { ++m_league_rank ; m_exec.shared_reset( m_team_shared ); }
};
} // namespace Impl
template< class Arg0 , class Arg1 >
class TeamPolicy< Arg0 , Arg1 , Kokkos::Qthread >
{
private:
const int m_league_size ;
const int m_team_size ;
const int m_shepherd_iter ;
public:
//! Tag this class as a kokkos execution policy
typedef TeamPolicy execution_policy ;
typedef Qthread execution_space ;
typedef typename
Impl::if_c< ! Impl::is_same< Kokkos::Qthread , Arg0 >::value , Arg0 , Arg1 >::type
work_tag ;
//----------------------------------------
template< class FunctorType >
inline static
int team_size_max( const FunctorType & )
{ return Qthread::instance().shepherd_worker_size(); }
template< class FunctorType >
static int team_size_recommended( const FunctorType & f )
{ return team_size_max( f ); }
template< class FunctorType >
inline static
int team_size_recommended( const FunctorType & f , const int& )
{ return team_size_max( f ); }
//----------------------------------------
inline int team_size() const { return m_team_size ; }
inline int league_size() const { return m_league_size ; }
// One active team per shepherd
TeamPolicy( Kokkos::Qthread & q
, const int league_size
, const int team_size
)
: m_league_size( league_size )
, m_team_size( team_size < q.shepherd_worker_size()
? team_size : q.shepherd_worker_size() )
, m_shepherd_iter( ( league_size + q.shepherd_size() - 1 ) / q.shepherd_size() )
{
}
// One active team per shepherd
TeamPolicy( const int league_size
, const int team_size
)
: m_league_size( league_size )
, m_team_size( team_size < Qthread::instance().shepherd_worker_size()
? team_size : Qthread::instance().shepherd_worker_size() )
, m_shepherd_iter( ( league_size + Qthread::instance().shepherd_size() - 1 ) / Qthread::instance().shepherd_size() )
{
}
typedef Impl::QthreadTeamPolicyMember member_type ;
friend class Impl::QthreadTeamPolicyMember ;
};
} /* namespace Kokkos */
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#endif /* #define KOKKOS_QTHREADEXEC_HPP */
diff --git a/lib/kokkos/core/src/Qthread/Kokkos_Qthread_Parallel.hpp b/lib/kokkos/core/src/Qthread/Kokkos_Qthread_Parallel.hpp
index dc76a0c42..50e2a058c 100644
--- a/lib/kokkos/core/src/Qthread/Kokkos_Qthread_Parallel.hpp
+++ b/lib/kokkos/core/src/Qthread/Kokkos_Qthread_Parallel.hpp
@@ -1,643 +1,696 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_QTHREAD_PARALLEL_HPP
#define KOKKOS_QTHREAD_PARALLEL_HPP
#include <vector>
#include <Kokkos_Parallel.hpp>
#include <impl/Kokkos_StaticAssert.hpp>
#include <impl/Kokkos_FunctorAdapter.hpp>
#include <Qthread/Kokkos_QthreadExec.hpp>
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
//----------------------------------------------------------------------------
template< class FunctorType , class Arg0 , class Arg1 , class Arg2 >
-class ParallelFor< FunctorType , Kokkos::RangePolicy< Arg0 , Arg1 , Arg2 , Kokkos::Qthread > >
+class ParallelFor< FunctorType
+ , Kokkos::RangePolicy< Arg0 , Arg1 , Arg2 , Kokkos::Qthread >
+ >
{
private:
typedef Kokkos::RangePolicy< Arg0 , Arg1 , Arg2 , Kokkos::Qthread > Policy ;
- const FunctorType m_func ;
+ typedef typename Policy::work_tag WorkTag ;
+ typedef typename Policy::member_type Member ;
+ typedef typename Policy::WorkRange WorkRange ;
+
+ const FunctorType m_functor ;
const Policy m_policy ;
- template< class PType >
- KOKKOS_FORCEINLINE_FUNCTION static
- void driver( typename Impl::enable_if<
- ( Impl::is_same< typename PType::work_tag , void >::value )
- , const FunctorType & >::type functor
- , const PType & range )
+ template< class TagType >
+ inline static
+ typename std::enable_if< std::is_same< TagType , void >::value >::type
+ exec_range( const FunctorType & functor , const Member ibeg , const Member iend )
{
- const typename PType::member_type e = range.end();
- for ( typename PType::member_type i = range.begin() ; i < e ; ++i ) {
+ for ( Member i = ibeg ; i < iend ; ++i ) {
functor( i );
}
}
- template< class PType >
- KOKKOS_FORCEINLINE_FUNCTION static
- void driver( typename Impl::enable_if<
- ( ! Impl::is_same< typename PType::work_tag , void >::value )
- , const FunctorType & >::type functor
- , const PType & range )
+ template< class TagType >
+ inline static
+ typename std::enable_if< ! std::is_same< TagType , void >::value >::type
+ exec_range( const FunctorType & functor , const Member ibeg , const Member iend )
{
- const typename PType::member_type e = range.end();
- for ( typename PType::member_type i = range.begin() ; i < e ; ++i ) {
- functor( typename PType::work_tag() , i );
+ const TagType t{} ;
+ for ( Member i = ibeg ; i < iend ; ++i ) {
+ functor( t , i );
}
}
// Function is called once by every concurrent thread.
- static void execute( QthreadExec & exec , const void * arg )
+ static void exec( QthreadExec & exec , const void * arg )
{
const ParallelFor & self = * ((const ParallelFor *) arg );
- driver( self.m_func , typename Policy::WorkRange( self.m_policy , exec.worker_rank() , exec.worker_size() ) );
+ const WorkRange range( self.m_policy, exec.worker_rank(), exec.worker_size() );
+
+ ParallelFor::template exec_range< WorkTag > ( self.m_functor , range.begin() , range.end() );
// All threads wait for completion.
exec.exec_all_barrier();
}
public:
- ParallelFor( const FunctorType & functor
- , const Policy & policy
- )
- : m_func( functor )
- , m_policy( policy )
+ inline
+ void execute() const
{
- Impl::QthreadExec::exec_all( Qthread::instance() , & ParallelFor::execute , this );
+ Impl::QthreadExec::exec_all( Qthread::instance() , & ParallelFor::exec , this );
+
}
+
+ ParallelFor( const FunctorType & arg_functor
+ , const Policy & arg_policy
+ )
+ : m_functor( arg_functor )
+ , m_policy( arg_policy )
+ { }
};
//----------------------------------------------------------------------------
template< class FunctorType , class Arg0 , class Arg1 , class Arg2 >
-class ParallelReduce< FunctorType , Kokkos::RangePolicy< Arg0 , Arg1 , Arg2 , Kokkos::Qthread > >
+class ParallelReduce< FunctorType
+ , Kokkos::RangePolicy< Arg0, Arg1, Arg2, Kokkos::Qthread >
+ >
{
private:
typedef Kokkos::RangePolicy< Arg0 , Arg1 , Arg2 , Kokkos::Qthread > Policy ;
- typedef Kokkos::Impl::FunctorValueTraits< FunctorType , typename Policy::work_tag > ValueTraits ;
- typedef Kokkos::Impl::FunctorValueInit< FunctorType , typename Policy::work_tag > ValueInit ;
+
+ typedef typename Policy::work_tag WorkTag ;
+ typedef typename Policy::member_type Member ;
+ typedef typename Policy::WorkRange WorkRange ;
+
+ typedef Kokkos::Impl::FunctorValueTraits< FunctorType, WorkTag > ValueTraits ;
+ typedef Kokkos::Impl::FunctorValueInit< FunctorType, WorkTag > ValueInit ;
typedef typename ValueTraits::pointer_type pointer_type ;
typedef typename ValueTraits::reference_type reference_type ;
- const FunctorType m_func ;
+ const FunctorType m_functor ;
const Policy m_policy ;
+ const pointer_type m_result_ptr ;
- template< class PType >
- KOKKOS_FORCEINLINE_FUNCTION static
- void driver( typename Impl::enable_if<
- ( Impl::is_same< typename PType::work_tag , void >::value )
- , const FunctorType & >::type functor
- , reference_type update
- , const PType & range )
+ template< class TagType >
+ inline static
+ typename std::enable_if< std::is_same< TagType , void >::value >::type
+ exec_range( const FunctorType & functor
+ , const Member ibeg , const Member iend
+ , reference_type update )
{
- const typename PType::member_type e = range.end();
- for ( typename PType::member_type i = range.begin() ; i < e ; ++i ) {
+ for ( Member i = ibeg ; i < iend ; ++i ) {
functor( i , update );
}
}
- template< class PType >
- KOKKOS_FORCEINLINE_FUNCTION static
- void driver( typename Impl::enable_if<
- ( ! Impl::is_same< typename PType::work_tag , void >::value )
- , const FunctorType & >::type functor
- , reference_type update
- , const PType & range )
+ template< class TagType >
+ inline static
+ typename std::enable_if< ! std::is_same< TagType , void >::value >::type
+ exec_range( const FunctorType & functor
+ , const Member ibeg , const Member iend
+ , reference_type update )
{
- const typename PType::member_type e = range.end();
- for ( typename PType::member_type i = range.begin() ; i < e ; ++i ) {
- functor( typename PType::work_tag() , i , update );
+ const TagType t{} ;
+ for ( Member i = ibeg ; i < iend ; ++i ) {
+ functor( t , i , update );
}
}
- static void execute( QthreadExec & exec , const void * arg )
+ static void exec( QthreadExec & exec , const void * arg )
{
const ParallelReduce & self = * ((const ParallelReduce *) arg );
- driver( self.m_func
- , ValueInit::init( self.m_func , exec.exec_all_reduce_value() )
- , typename Policy::WorkRange( self.m_policy , exec.worker_rank() , exec.worker_size() )
- );
+ const WorkRange range( self.m_policy, exec.worker_rank(), exec.worker_size() );
+
+ ParallelReduce::template exec_range< WorkTag >(
+ self.m_functor, range.begin(), range.end(),
+ ValueInit::init( self.m_functor , exec.exec_all_reduce_value() ) );
- exec.template exec_all_reduce<FunctorType, typename Policy::work_tag >( self.m_func );
+ exec.template exec_all_reduce<FunctorType, WorkTag >( self.m_functor );
}
public:
- template< class HostViewType >
- ParallelReduce( const FunctorType & functor
- , const Policy & policy
- , const HostViewType & result_view )
- : m_func( functor )
- , m_policy( policy )
+ inline
+ void execute() const
{
- QthreadExec::resize_worker_scratch( ValueTraits::value_size( m_func ) , 0 );
-
- Impl::QthreadExec::exec_all( Qthread::instance() , & ParallelReduce::execute , this );
+ QthreadExec::resize_worker_scratch( ValueTraits::value_size( m_functor ) , 0 );
+ Impl::QthreadExec::exec_all( Qthread::instance() , & ParallelReduce::exec , this );
const pointer_type data = (pointer_type) QthreadExec::exec_all_reduce_result();
- Kokkos::Impl::FunctorFinal< FunctorType , typename Policy::work_tag >::final( m_func , data );
+ Kokkos::Impl::FunctorFinal< FunctorType , typename Policy::work_tag >::final( m_functor , data );
- if ( result_view.ptr_on_device() ) {
- const unsigned n = ValueTraits::value_count( m_func );
- for ( unsigned i = 0 ; i < n ; ++i ) { result_view.ptr_on_device()[i] = data[i]; }
+ if ( m_result_ptr ) {
+ const unsigned n = ValueTraits::value_count( m_functor );
+ for ( unsigned i = 0 ; i < n ; ++i ) { m_result_ptr[i] = data[i]; }
}
}
+
+ template< class HostViewType >
+ ParallelReduce( const FunctorType & arg_functor
+ , const Policy & arg_policy
+ , const HostViewType & arg_result_view )
+ : m_functor( arg_functor )
+ , m_policy( arg_policy )
+ , m_result_ptr( arg_result_view.ptr_on_device() )
+ { }
};
//----------------------------------------------------------------------------
template< class FunctorType , class Arg0 , class Arg1 >
class ParallelFor< FunctorType , TeamPolicy< Arg0 , Arg1 , Kokkos::Qthread > >
{
private:
typedef TeamPolicy< Arg0 , Arg1 , Kokkos::Qthread > Policy ;
+ typedef typename Policy::member_type Member ;
+ typedef typename Policy::work_tag WorkTag ;
- const FunctorType m_func ;
- const Policy m_team ;
+ const FunctorType m_functor ;
+ const Policy m_policy ;
template< class TagType >
- KOKKOS_FORCEINLINE_FUNCTION
- void driver( typename Impl::enable_if< Impl::is_same< TagType , void >::value ,
- const typename Policy::member_type & >::type member ) const
- { m_func( member ); }
+ inline static
+ typename std::enable_if< std::is_same< TagType , void >::value >::type
+ exec_team( const FunctorType & functor , Member member )
+ {
+ while ( member ) {
+ functor( member );
+ member.team_barrier();
+ member.next_team();
+ }
+ }
template< class TagType >
- KOKKOS_FORCEINLINE_FUNCTION
- void driver( typename Impl::enable_if< ! Impl::is_same< TagType , void >::value ,
- const typename Policy::member_type & >::type member ) const
- { m_func( TagType() , member ); }
+ inline static
+ typename std::enable_if< ! std::is_same< TagType , void >::value >::type
+ exec_team( const FunctorType & functor , Member member )
+ {
+ const TagType t{} ;
+ while ( member ) {
+ functor( t , member );
+ member.team_barrier();
+ member.next_team();
+ }
+ }
- static void execute( QthreadExec & exec , const void * arg )
+ static void exec( QthreadExec & exec , const void * arg )
{
const ParallelFor & self = * ((const ParallelFor *) arg );
- typename Policy::member_type member( exec , self.m_team );
-
- while ( member ) {
- self.ParallelFor::template driver< typename Policy::work_tag >( member );
- member.team_barrier();
- member.next_team();
- }
+ ParallelFor::template exec_team< WorkTag >
+ ( self.m_functor , Member( exec , self.m_policy ) );
exec.exec_all_barrier();
}
public:
- ParallelFor( const FunctorType & functor ,
- const Policy & policy )
- : m_func( functor )
- , m_team( policy )
+ inline
+ void execute() const
{
QthreadExec::resize_worker_scratch
( /* reduction memory */ 0
- , /* team shared memory */ FunctorTeamShmemSize< FunctorType >::value( functor , policy.team_size() ) );
-
- Impl::QthreadExec::exec_all( Qthread::instance() , & ParallelFor::execute , this );
+ , /* team shared memory */ FunctorTeamShmemSize< FunctorType >::value( m_functor , m_policy.team_size() ) );
+ Impl::QthreadExec::exec_all( Qthread::instance() , & ParallelFor::exec , this );
}
+
+ ParallelFor( const FunctorType & arg_functor ,
+ const Policy & arg_policy )
+ : m_functor( arg_functor )
+ , m_policy( arg_policy )
+ { }
};
//----------------------------------------------------------------------------
template< class FunctorType , class Arg0 , class Arg1 >
class ParallelReduce< FunctorType , TeamPolicy< Arg0 , Arg1 , Kokkos::Qthread > >
{
private:
typedef TeamPolicy< Arg0 , Arg1 , Kokkos::Qthread > Policy ;
- typedef Kokkos::Impl::FunctorValueTraits< FunctorType , typename Policy::work_tag > ValueTraits ;
- typedef Kokkos::Impl::FunctorValueInit< FunctorType , typename Policy::work_tag > ValueInit ;
+ typedef typename Policy::work_tag WorkTag ;
+ typedef typename Policy::member_type Member ;
+
+ typedef Kokkos::Impl::FunctorValueTraits< FunctorType, WorkTag > ValueTraits ;
+ typedef Kokkos::Impl::FunctorValueInit< FunctorType, WorkTag > ValueInit ;
typedef typename ValueTraits::pointer_type pointer_type ;
typedef typename ValueTraits::reference_type reference_type ;
- const FunctorType m_func ;
- const Policy m_team ;
+ const FunctorType m_functor ;
+ const Policy m_policy ;
+ const pointer_type m_result_ptr ;
template< class TagType >
- KOKKOS_FORCEINLINE_FUNCTION
- void driver( typename Impl::enable_if< Impl::is_same< TagType , void >::value ,
- const typename Policy::member_type & >::type member
- , reference_type update ) const
- { m_func( member , update ); }
+ inline static
+ typename std::enable_if< std::is_same< TagType , void >::value >::type
+ exec_team( const FunctorType & functor , Member member , reference_type update )
+ {
+ while ( member ) {
+ functor( member , update );
+ member.team_barrier();
+ member.next_team();
+ }
+ }
template< class TagType >
- KOKKOS_FORCEINLINE_FUNCTION
- void driver( typename Impl::enable_if< ! Impl::is_same< TagType , void >::value ,
- const typename Policy::member_type & >::type member
- , reference_type update ) const
- { m_func( TagType() , member , update ); }
+ inline static
+ typename std::enable_if< ! std::is_same< TagType , void >::value >::type
+ exec_team( const FunctorType & functor , Member member , reference_type update )
+ {
+ const TagType t{} ;
+ while ( member ) {
+ functor( t , member , update );
+ member.team_barrier();
+ member.next_team();
+ }
+ }
- static void execute( QthreadExec & exec , const void * arg )
+ static void exec( QthreadExec & exec , const void * arg )
{
const ParallelReduce & self = * ((const ParallelReduce *) arg );
- // Initialize thread-local value
- reference_type update = ValueInit::init( self.m_func , exec.exec_all_reduce_value() );
-
- typename Policy::member_type member( exec , self.m_team );
-
- while ( member ) {
- self.ParallelReduce::template driver< typename Policy::work_tag >( member , update );
- member.team_barrier();
- member.next_team();
- }
+ ParallelReduce::template exec_team< WorkTag >
+ ( self.m_functor
+ , Member( exec , self.m_policy )
+ , ValueInit::init( self.m_functor , exec.exec_all_reduce_value() ) );
- exec.template exec_all_reduce< FunctorType , typename Policy::work_tag >( self.m_func );
+ exec.template exec_all_reduce< FunctorType , WorkTag >( self.m_functor );
}
public:
- template< class ViewType >
- ParallelReduce( const FunctorType & functor ,
- const Policy & policy ,
- const ViewType & result )
- : m_func( functor )
- , m_team( policy )
+ inline
+ void execute() const
{
QthreadExec::resize_worker_scratch
- ( /* reduction memory */ ValueTraits::value_size( functor )
- , /* team shared memory */ FunctorTeamShmemSize< FunctorType >::value( functor , policy.team_size() ) );
+ ( /* reduction memory */ ValueTraits::value_size( m_functor )
+ , /* team shared memory */ FunctorTeamShmemSize< FunctorType >::value( m_functor , m_policy.team_size() ) );
- Impl::QthreadExec::exec_all( Qthread::instance() , & ParallelReduce::execute , this );
+ Impl::QthreadExec::exec_all( Qthread::instance() , & ParallelReduce::exec , this );
const pointer_type data = (pointer_type) QthreadExec::exec_all_reduce_result();
- Kokkos::Impl::FunctorFinal< FunctorType , typename Policy::work_tag >::final( m_func , data );
+ Kokkos::Impl::FunctorFinal< FunctorType , typename Policy::work_tag >::final( m_functor , data );
- const unsigned n = ValueTraits::value_count( m_func );
- for ( unsigned i = 0 ; i < n ; ++i ) { result.ptr_on_device()[i] = data[i]; }
+ if ( m_result_ptr ) {
+ const unsigned n = ValueTraits::value_count( m_functor );
+ for ( unsigned i = 0 ; i < n ; ++i ) { m_result_ptr[i] = data[i]; }
+ }
}
+
+ template< class ViewType >
+ ParallelReduce( const FunctorType & arg_functor ,
+ const Policy & arg_policy ,
+ const ViewType & arg_result )
+ : m_functor( arg_functor )
+ , m_policy( arg_policy )
+ , m_result_ptr( arg_result.ptr_on_device() )
+ { }
};
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
template< class FunctorType , class Arg0 , class Arg1 , class Arg2 >
-class ParallelScan< FunctorType , Kokkos::RangePolicy< Arg0 , Arg1 , Arg2 , Kokkos::Qthread > >
+class ParallelScan< FunctorType
+ , Kokkos::RangePolicy< Arg0 , Arg1 , Arg2 , Kokkos::Qthread >
+ >
{
private:
typedef Kokkos::RangePolicy< Arg0 , Arg1 , Arg2 , Kokkos::Qthread > Policy ;
- typedef Kokkos::Impl::FunctorValueTraits< FunctorType , typename Policy::work_tag > ValueTraits ;
- typedef Kokkos::Impl::FunctorValueInit< FunctorType , typename Policy::work_tag > ValueInit ;
+
+ typedef typename Policy::work_tag WorkTag ;
+ typedef typename Policy::member_type Member ;
+ typedef typename Policy::WorkRange WorkRange ;
+
+ typedef Kokkos::Impl::FunctorValueTraits< FunctorType, WorkTag > ValueTraits ;
+ typedef Kokkos::Impl::FunctorValueInit< FunctorType, WorkTag > ValueInit ;
typedef typename ValueTraits::pointer_type pointer_type ;
typedef typename ValueTraits::reference_type reference_type ;
- const FunctorType m_func ;
+ const FunctorType m_functor ;
const Policy m_policy ;
- template< class PType >
- KOKKOS_FORCEINLINE_FUNCTION static
- void driver( typename Impl::enable_if<
- ( Impl::is_same< typename PType::work_tag , void >::value )
- , const FunctorType & >::type functor
- , reference_type update
- , const bool final
- , const PType & range )
+ template< class TagType >
+ inline static
+ typename std::enable_if< std::is_same< TagType , void >::value >::type
+ exec_range( const FunctorType & functor
+ , const Member ibeg , const Member iend
+ , reference_type update , const bool final )
{
- const typename PType::member_type e = range.end();
- for ( typename PType::member_type i = range.begin() ; i < e ; ++i ) {
+ for ( Member i = ibeg ; i < iend ; ++i ) {
functor( i , update , final );
}
}
- template< class PType >
- KOKKOS_FORCEINLINE_FUNCTION static
- void driver( typename Impl::enable_if<
- ( ! Impl::is_same< typename PType::work_tag , void >::value )
- , const FunctorType & >::type functor
- , reference_type update
- , const bool final
- , const PType & range )
+ template< class TagType >
+ inline static
+ typename std::enable_if< ! std::is_same< TagType , void >::value >::type
+ exec_range( const FunctorType & functor
+ , const Member ibeg , const Member iend
+ , reference_type update , const bool final )
{
- const typename PType::member_type e = range.end();
- for ( typename PType::member_type i = range.begin() ; i < e ; ++i ) {
- functor( typename PType::work_tag() , i , update , final );
+ const TagType t{} ;
+ for ( Member i = ibeg ; i < iend ; ++i ) {
+ functor( t , i , update , final );
}
}
- static void execute( QthreadExec & exec , const void * arg )
+ static void exec( QthreadExec & exec , const void * arg )
{
const ParallelScan & self = * ((const ParallelScan *) arg );
- const typename Policy::WorkRange range( self.m_policy , exec.worker_rank() , exec.worker_size() );
+ const WorkRange range( self.m_policy , exec.worker_rank() , exec.worker_size() );
// Initialize thread-local value
- reference_type update = ValueInit::init( self.m_func , exec.exec_all_reduce_value() );
+ reference_type update = ValueInit::init( self.m_functor , exec.exec_all_reduce_value() );
- driver( self.m_func , update , false , range );
+ ParallelScan::template exec_range< WorkTag >( self.m_functor, range.begin() , range.end() , update , false );
- exec.template exec_all_scan< FunctorType , typename Policy::work_tag >( self.m_func );
+ exec.template exec_all_scan< FunctorType , typename Policy::work_tag >( self.m_functor );
- driver( self.m_func , update , true , range );
+ ParallelScan::template exec_range< WorkTag >( self.m_functor , range.begin() , range.end() , update , true );
exec.exec_all_barrier();
}
public:
- ParallelScan( const FunctorType & functor
- , const Policy & policy
- )
- : m_func( functor )
- , m_policy( policy )
+ inline
+ void execute() const
{
- QthreadExec::resize_worker_scratch( ValueTraits::value_size( m_func ) , 0 );
+ QthreadExec::resize_worker_scratch( ValueTraits::value_size( m_functor ) , 0 );
+ Impl::QthreadExec::exec_all( Qthread::instance() , & ParallelScan::exec , this );
+ }
- Impl::QthreadExec::exec_all( Qthread::instance() , & ParallelScan::execute , this );
+ ParallelScan( const FunctorType & arg_functor
+ , const Policy & arg_policy
+ )
+ : m_functor( arg_functor )
+ , m_policy( arg_policy )
+ {
}
};
} // namespace Impl
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
template<typename iType>
KOKKOS_INLINE_FUNCTION
Impl::TeamThreadRangeBoundariesStruct<iType,Impl::QthreadTeamPolicyMember>
TeamThreadRange(const Impl::QthreadTeamPolicyMember& thread, const iType& count)
{
return Impl::TeamThreadRangeBoundariesStruct<iType,Impl::QthreadTeamPolicyMember>(thread,count);
}
template<typename iType>
KOKKOS_INLINE_FUNCTION
Impl::TeamThreadRangeBoundariesStruct<iType,Impl::QthreadTeamPolicyMember>
TeamThreadRange( const Impl::QthreadTeamPolicyMember& thread
, const iType & begin
, const iType & end
)
{
return Impl::TeamThreadRangeBoundariesStruct<iType,Impl::QthreadTeamPolicyMember>(thread,begin,end);
}
template<typename iType>
KOKKOS_INLINE_FUNCTION
Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::QthreadTeamPolicyMember >
ThreadVectorRange(const Impl::QthreadTeamPolicyMember& thread, const iType& count) {
return Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::QthreadTeamPolicyMember >(thread,count);
}
KOKKOS_INLINE_FUNCTION
Impl::ThreadSingleStruct<Impl::QthreadTeamPolicyMember> PerTeam(const Impl::QthreadTeamPolicyMember& thread) {
return Impl::ThreadSingleStruct<Impl::QthreadTeamPolicyMember>(thread);
}
KOKKOS_INLINE_FUNCTION
Impl::VectorSingleStruct<Impl::QthreadTeamPolicyMember> PerThread(const Impl::QthreadTeamPolicyMember& thread) {
return Impl::VectorSingleStruct<Impl::QthreadTeamPolicyMember>(thread);
}
} // namespace Kokkos
namespace Kokkos {
/** \brief Inter-thread parallel_for. Executes lambda(iType i) for each i=0..N-1.
*
* The range i=0..N-1 is mapped to all threads of the the calling thread team.
* This functionality requires C++11 support.*/
template<typename iType, class Lambda>
KOKKOS_INLINE_FUNCTION
void parallel_for(const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::QthreadTeamPolicyMember>& loop_boundaries, const Lambda& lambda) {
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment)
lambda(i);
}
/** \brief Inter-thread vector parallel_reduce. Executes lambda(iType i, ValueType & val) for each i=0..N-1.
*
* The range i=0..N-1 is mapped to all threads of the the calling thread team and a summation of
* val is performed and put into result. This functionality requires C++11 support.*/
template< typename iType, class Lambda, typename ValueType >
KOKKOS_INLINE_FUNCTION
void parallel_reduce(const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::QthreadTeamPolicyMember>& loop_boundaries,
const Lambda & lambda, ValueType& result) {
result = ValueType();
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) {
ValueType tmp = ValueType();
lambda(i,tmp);
result+=tmp;
}
result = loop_boundaries.thread.team_reduce(result,Impl::JoinAdd<ValueType>());
}
#if defined( KOKKOS_HAVE_CXX11 )
/** \brief Intra-thread vector parallel_reduce. Executes lambda(iType i, ValueType & val) for each i=0..N-1.
*
* The range i=0..N-1 is mapped to all vector lanes of the the calling thread and a reduction of
* val is performed using JoinType(ValueType& val, const ValueType& update) and put into init_result.
* The input value of init_result is used as initializer for temporary variables of ValueType. Therefore
* the input value should be the neutral element with respect to the join operation (e.g. '0 for +-' or
* '1 for *'). This functionality requires C++11 support.*/
template< typename iType, class Lambda, typename ValueType, class JoinType >
KOKKOS_INLINE_FUNCTION
void parallel_reduce(const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::QthreadTeamPolicyMember>& loop_boundaries,
const Lambda & lambda, const JoinType& join, ValueType& init_result) {
ValueType result = init_result;
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) {
ValueType tmp = ValueType();
lambda(i,tmp);
join(result,tmp);
}
init_result = loop_boundaries.thread.team_reduce(result,Impl::JoinLambdaAdapter<ValueType,JoinType>(join));
}
#endif /* #if defined( KOKKOS_HAVE_CXX11 ) */
} // namespace Kokkos
namespace Kokkos {
/** \brief Intra-thread vector parallel_for. Executes lambda(iType i) for each i=0..N-1.
*
* The range i=0..N-1 is mapped to all vector lanes of the the calling thread.
* This functionality requires C++11 support.*/
template<typename iType, class Lambda>
KOKKOS_INLINE_FUNCTION
void parallel_for(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::QthreadTeamPolicyMember >&
loop_boundaries, const Lambda& lambda) {
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
#pragma ivdep
#endif
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment)
lambda(i);
}
/** \brief Intra-thread vector parallel_reduce. Executes lambda(iType i, ValueType & val) for each i=0..N-1.
*
* The range i=0..N-1 is mapped to all vector lanes of the the calling thread and a summation of
* val is performed and put into result. This functionality requires C++11 support.*/
template< typename iType, class Lambda, typename ValueType >
KOKKOS_INLINE_FUNCTION
void parallel_reduce(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::QthreadTeamPolicyMember >&
loop_boundaries, const Lambda & lambda, ValueType& result) {
result = ValueType();
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
#pragma ivdep
#endif
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) {
ValueType tmp = ValueType();
lambda(i,tmp);
result+=tmp;
}
}
/** \brief Intra-thread vector parallel_reduce. Executes lambda(iType i, ValueType & val) for each i=0..N-1.
*
* The range i=0..N-1 is mapped to all vector lanes of the the calling thread and a reduction of
* val is performed using JoinType(ValueType& val, const ValueType& update) and put into init_result.
* The input value of init_result is used as initializer for temporary variables of ValueType. Therefore
* the input value should be the neutral element with respect to the join operation (e.g. '0 for +-' or
* '1 for *'). This functionality requires C++11 support.*/
template< typename iType, class Lambda, typename ValueType, class JoinType >
KOKKOS_INLINE_FUNCTION
void parallel_reduce(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::QthreadTeamPolicyMember >&
loop_boundaries, const Lambda & lambda, const JoinType& join, ValueType& init_result) {
ValueType result = init_result;
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
#pragma ivdep
#endif
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) {
ValueType tmp = ValueType();
lambda(i,tmp);
join(result,tmp);
}
init_result = result;
}
/** \brief Intra-thread vector parallel exclusive prefix sum. Executes lambda(iType i, ValueType & val, bool final)
* for each i=0..N-1.
*
* The range i=0..N-1 is mapped to all vector lanes in the thread and a scan operation is performed.
* Depending on the target execution space the operator might be called twice: once with final=false
* and once with final=true. When final==true val contains the prefix sum value. The contribution of this
* "i" needs to be added to val no matter whether final==true or not. In a serial execution
* (i.e. team_size==1) the operator is only called once with final==true. Scan_val will be set
* to the final sum value over all vector lanes.
* This functionality requires C++11 support.*/
template< typename iType, class FunctorType >
KOKKOS_INLINE_FUNCTION
void parallel_scan(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::QthreadTeamPolicyMember >&
loop_boundaries, const FunctorType & lambda) {
typedef Kokkos::Impl::FunctorValueTraits< FunctorType , void > ValueTraits ;
typedef typename ValueTraits::value_type value_type ;
value_type scan_val = value_type();
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
#pragma ivdep
#endif
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) {
lambda(i,scan_val,true);
}
}
} // namespace Kokkos
namespace Kokkos {
template<class FunctorType>
KOKKOS_INLINE_FUNCTION
void single(const Impl::VectorSingleStruct<Impl::QthreadTeamPolicyMember>& single_struct, const FunctorType& lambda) {
lambda();
}
template<class FunctorType>
KOKKOS_INLINE_FUNCTION
void single(const Impl::ThreadSingleStruct<Impl::QthreadTeamPolicyMember>& single_struct, const FunctorType& lambda) {
if(single_struct.team_member.team_rank()==0) lambda();
}
template<class FunctorType, class ValueType>
KOKKOS_INLINE_FUNCTION
void single(const Impl::VectorSingleStruct<Impl::QthreadTeamPolicyMember>& single_struct, const FunctorType& lambda, ValueType& val) {
lambda(val);
}
template<class FunctorType, class ValueType>
KOKKOS_INLINE_FUNCTION
void single(const Impl::ThreadSingleStruct<Impl::QthreadTeamPolicyMember>& single_struct, const FunctorType& lambda, ValueType& val) {
if(single_struct.team_member.team_rank()==0) {
lambda(val);
}
single_struct.team_member.team_broadcast(val,0);
}
} // namespace Kokkos
#endif /* #define KOKKOS_QTHREAD_PARALLEL_HPP */
diff --git a/lib/kokkos/core/src/Qthread/Kokkos_Qthread_TaskPolicy.cpp b/lib/kokkos/core/src/Qthread/Kokkos_Qthread_TaskPolicy.cpp
index 9de9748de..4f0ad49fa 100644
--- a/lib/kokkos/core/src/Qthread/Kokkos_Qthread_TaskPolicy.cpp
+++ b/lib/kokkos/core/src/Qthread/Kokkos_Qthread_TaskPolicy.cpp
@@ -1,483 +1,488 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
// Experimental unified task-data parallel manycore LDRD
#include <Kokkos_Core_fwd.hpp>
#if defined( KOKKOS_HAVE_QTHREAD )
#include <stdio.h>
#include <stdlib.h>
#include <stdexcept>
#include <iostream>
#include <sstream>
#include <string>
#include <Kokkos_Atomic.hpp>
#include <Qthread/Kokkos_Qthread_TaskPolicy.hpp>
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Experimental {
namespace Impl {
typedef TaskMember< Kokkos::Qthread , void , void > Task ;
namespace {
inline
unsigned padded_sizeof_derived( unsigned sizeof_derived )
{
return sizeof_derived +
( sizeof_derived % sizeof(Task*) ? sizeof(Task*) - sizeof_derived % sizeof(Task*) : 0 );
}
// int lock_alloc_dealloc = 0 ;
} // namespace
void Task::deallocate( void * ptr )
{
// Counting on 'free' thread safety so lock/unlock not required.
// However, isolate calls here to mitigate future need to introduce lock/unlock.
// lock
// while ( ! Kokkos::atomic_compare_exchange_strong( & lock_alloc_dealloc , 0 , 1 ) );
free( ptr );
// unlock
// Kokkos::atomic_compare_exchange_strong( & lock_alloc_dealloc , 1 , 0 );
}
void * Task::allocate( const unsigned arg_sizeof_derived
, const unsigned arg_dependence_capacity )
{
// Counting on 'malloc' thread safety so lock/unlock not required.
// However, isolate calls here to mitigate future need to introduce lock/unlock.
// lock
// while ( ! Kokkos::atomic_compare_exchange_strong( & lock_alloc_dealloc , 0 , 1 ) );
void * const ptr = malloc( padded_sizeof_derived( arg_sizeof_derived ) + arg_dependence_capacity * sizeof(Task*) );
// unlock
// Kokkos::atomic_compare_exchange_strong( & lock_alloc_dealloc , 1 , 0 );
return ptr ;
}
Task::~TaskMember()
{
}
Task::TaskMember( const function_verify_type arg_verify
, const function_dealloc_type arg_dealloc
, const function_apply_single_type arg_apply_single
, const function_apply_team_type arg_apply_team
, volatile int & arg_active_count
, const unsigned arg_sizeof_derived
, const unsigned arg_dependence_capacity
)
: m_dealloc( arg_dealloc )
, m_verify( arg_verify )
, m_apply_single( arg_apply_single )
, m_apply_team( arg_apply_team )
, m_active_count( & arg_active_count )
, m_qfeb(0)
, m_dep( (Task **)( ((unsigned char *) this) + padded_sizeof_derived( arg_sizeof_derived ) ) )
, m_dep_capacity( arg_dependence_capacity )
, m_dep_size( 0 )
, m_ref_count( 0 )
, m_state( Kokkos::Experimental::TASK_STATE_CONSTRUCTING )
{
qthread_empty( & m_qfeb ); // Set to full when complete
for ( unsigned i = 0 ; i < arg_dependence_capacity ; ++i ) m_dep[i] = 0 ;
}
Task::TaskMember( const function_dealloc_type arg_dealloc
, const function_apply_single_type arg_apply_single
, const function_apply_team_type arg_apply_team
, volatile int & arg_active_count
, const unsigned arg_sizeof_derived
, const unsigned arg_dependence_capacity
)
: m_dealloc( arg_dealloc )
, m_verify( & Task::verify_type<void> )
, m_apply_single( arg_apply_single )
, m_apply_team( arg_apply_team )
, m_active_count( & arg_active_count )
, m_qfeb(0)
, m_dep( (Task **)( ((unsigned char *) this) + padded_sizeof_derived( arg_sizeof_derived ) ) )
, m_dep_capacity( arg_dependence_capacity )
, m_dep_size( 0 )
, m_ref_count( 0 )
, m_state( Kokkos::Experimental::TASK_STATE_CONSTRUCTING )
{
qthread_empty( & m_qfeb ); // Set to full when complete
for ( unsigned i = 0 ; i < arg_dependence_capacity ; ++i ) m_dep[i] = 0 ;
}
//----------------------------------------------------------------------------
void Task::throw_error_add_dependence() const
{
std::cerr << "TaskMember< Qthread >::add_dependence ERROR"
<< " state(" << m_state << ")"
<< " dep_size(" << m_dep_size << ")"
<< std::endl ;
throw std::runtime_error("TaskMember< Qthread >::add_dependence ERROR");
}
void Task::throw_error_verify_type()
{
throw std::runtime_error("TaskMember< Qthread >::verify_type ERROR");
}
//----------------------------------------------------------------------------
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
void Task::assign( Task ** const lhs , Task * rhs , const bool no_throw )
{
static const char msg_error_header[] = "Kokkos::Impl::TaskManager<Kokkos::Qthread>::assign ERROR" ;
static const char msg_error_count[] = ": negative reference count" ;
static const char msg_error_complete[] = ": destroy task that is not complete" ;
static const char msg_error_dependences[] = ": destroy task that has dependences" ;
static const char msg_error_exception[] = ": caught internal exception" ;
if ( rhs ) { Kokkos::atomic_fetch_add( & (*rhs).m_ref_count , 1 ); }
Task * const lhs_val = Kokkos::atomic_exchange( lhs , rhs );
if ( lhs_val ) {
const int count = Kokkos::atomic_fetch_add( & (*lhs_val).m_ref_count , -1 );
const char * msg_error = 0 ;
try {
if ( 1 == count ) {
// Reference count at zero, delete it
// Should only be deallocating a completed task
if ( (*lhs_val).m_state == Kokkos::Experimental::TASK_STATE_COMPLETE ) {
// A completed task should not have dependences...
for ( int i = 0 ; i < (*lhs_val).m_dep_size && 0 == msg_error ; ++i ) {
if ( (*lhs_val).m_dep[i] ) msg_error = msg_error_dependences ;
}
}
else {
msg_error = msg_error_complete ;
}
if ( 0 == msg_error ) {
// Get deletion function and apply it
const Task::function_dealloc_type d = (*lhs_val).m_dealloc ;
(*d)( lhs_val );
}
}
else if ( count <= 0 ) {
msg_error = msg_error_count ;
}
}
catch( ... ) {
if ( 0 == msg_error ) msg_error = msg_error_exception ;
}
if ( 0 != msg_error ) {
if ( no_throw ) {
std::cerr << msg_error_header << msg_error << std::endl ;
std::cerr.flush();
}
else {
std::string msg(msg_error_header);
msg.append(msg_error);
throw std::runtime_error( msg );
}
}
}
}
#endif
//----------------------------------------------------------------------------
+void Task::closeout()
+{
+ enum { RESPAWN = int( Kokkos::Experimental::TASK_STATE_WAITING ) |
+ int( Kokkos::Experimental::TASK_STATE_EXECUTING ) };
+
+#if 0
+fprintf( stdout
+ , "worker(%d.%d) task 0x%.12lx %s\n"
+ , qthread_shep()
+ , qthread_worker_local(NULL)
+ , reinterpret_cast<unsigned long>(this)
+ , ( m_state == RESPAWN ? "respawn" : "complete" )
+ );
+fflush(stdout);
+#endif
+
+ // When dependent tasks run there would be a race
+ // condition between destroying this task and
+ // querying the active count pointer from this task.
+ int volatile * const active_count = m_active_count ;
+
+ if ( m_state == RESPAWN ) {
+ // Task requests respawn, set state to waiting and reschedule the task
+ m_state = Kokkos::Experimental::TASK_STATE_WAITING ;
+ schedule();
+ }
+ else {
+
+ // Task did not respawn, is complete
+ m_state = Kokkos::Experimental::TASK_STATE_COMPLETE ;
+
+ // Release dependences before allowing dependent tasks to run.
+ // Otherwise there is a thread race condition for removing dependences.
+ for ( int i = 0 ; i < m_dep_size ; ++i ) {
+ assign( & m_dep[i] , 0 );
+ }
+
+ // Set qthread FEB to full so that dependent tasks are allowed to execute.
+ // This 'task' may be deleted immediately following this function call.
+ qthread_fill( & m_qfeb );
+
+ // The dependent task could now complete and destroy 'this' task
+ // before the call to 'qthread_fill' returns. Therefore, for
+ // thread safety assume that 'this' task has now been destroyed.
+ }
+
+ // Decrement active task count before returning.
+ Kokkos::atomic_decrement( active_count );
+}
+
aligned_t Task::qthread_func( void * arg )
{
Task * const task = reinterpret_cast< Task * >(arg);
// First member of the team change state to executing.
// Use compare-exchange to avoid race condition with a respawn.
Kokkos::atomic_compare_exchange_strong( & task->m_state
, int(Kokkos::Experimental::TASK_STATE_WAITING)
, int(Kokkos::Experimental::TASK_STATE_EXECUTING)
);
// It is a single thread's responsibility to close out
// this task's execution.
bool close_out = false ;
if ( task->m_apply_team && ! task->m_apply_single ) {
const Kokkos::Impl::QthreadTeamPolicyMember::TaskTeam task_team_tag ;
// Initialize team size and rank with shephered info
Kokkos::Impl::QthreadTeamPolicyMember member( task_team_tag );
(*task->m_apply_team)( task , member );
#if 0
fprintf( stdout
, "worker(%d.%d) task 0x%.12lx executed by member(%d:%d)\n"
, qthread_shep()
, qthread_worker_local(NULL)
, reinterpret_cast<unsigned long>(task)
, member.team_rank()
, member.team_size()
);
fflush(stdout);
#endif
member.team_barrier();
-
- close_out = member.team_rank() == 0 ;
+ if ( member.team_rank() == 0 ) task->closeout();
+ member.team_barrier();
}
else if ( task->m_apply_team && task->m_apply_single == reinterpret_cast<function_apply_single_type>(1) ) {
// Team hard-wired to one, no cloning
Kokkos::Impl::QthreadTeamPolicyMember member ;
(*task->m_apply_team)( task , member );
- close_out = true ;
+ task->closeout();
}
else {
(*task->m_apply_single)( task );
-
- close_out = true ;
- }
-
- if ( close_out ) {
-
- // When dependent tasks run there would be a race
- // condition between destroying this task and
- // querying the active count pointer from this task.
- int volatile * active_count = task->m_active_count ;
-
- if ( task->m_state == ( Kokkos::Experimental::TASK_STATE_WAITING | Kokkos::Experimental::TASK_STATE_EXECUTING ) ) {
-
-#if 0
-fprintf( stdout
- , "worker(%d.%d) task 0x%.12lx respawn\n"
- , qthread_shep()
- , qthread_worker_local(NULL)
- , reinterpret_cast<unsigned long>(task)
- );
-fflush(stdout);
-#endif
-
- // Task respawned, set state to waiting and reschedule the task
- task->m_state = Kokkos::Experimental::TASK_STATE_WAITING ;
- task->schedule();
- }
- else {
-
- // Task did not respawn, is complete
- task->m_state = Kokkos::Experimental::TASK_STATE_COMPLETE ;
-
- // Release dependences before allowing dependent tasks to run.
- // Otherwise there is a thread race condition for removing dependences.
- for ( int i = 0 ; i < task->m_dep_size ; ++i ) {
- assign( & task->m_dep[i] , 0 );
- }
-
- // Set qthread FEB to full so that dependent tasks are allowed to execute.
- // This 'task' may be deleted immediately following this function call.
- qthread_fill( & task->m_qfeb );
- }
-
- // Decrement active task count before returning.
- Kokkos::atomic_decrement( active_count );
+ task->closeout();
}
#if 0
fprintf( stdout
, "worker(%d.%d) task 0x%.12lx return\n"
, qthread_shep()
, qthread_worker_local(NULL)
, reinterpret_cast<unsigned long>(task)
);
fflush(stdout);
#endif
return 0 ;
}
void Task::respawn()
{
// Change state from pure executing to ( waiting | executing )
// to avoid confusion with simply waiting.
Kokkos::atomic_compare_exchange_strong( & m_state
, int(Kokkos::Experimental::TASK_STATE_EXECUTING)
, int(Kokkos::Experimental::TASK_STATE_WAITING |
Kokkos::Experimental::TASK_STATE_EXECUTING)
);
}
void Task::schedule()
{
// Is waiting for execution
// Increment active task count before spawning.
Kokkos::atomic_increment( m_active_count );
// spawn in qthread. must malloc the precondition array and give to qthread.
// qthread will eventually free this allocation so memory will not be leaked.
// concern with thread safety of malloc, does this need to be guarded?
aligned_t ** qprecon = (aligned_t **) malloc( ( m_dep_size + 1 ) * sizeof(aligned_t *) );
qprecon[0] = reinterpret_cast<aligned_t *>( uintptr_t(m_dep_size) );
for ( int i = 0 ; i < m_dep_size ; ++i ) {
qprecon[i+1] = & m_dep[i]->m_qfeb ; // Qthread precondition flag
}
if ( m_apply_team && ! m_apply_single ) {
// If more than one shepherd spawn on a shepherd other than this shepherd
const int num_shepherd = qthread_num_shepherds();
const int num_worker_per_shepherd = qthread_num_workers_local(NO_SHEPHERD);
const int this_shepherd = qthread_shep();
int spawn_shepherd = ( this_shepherd + 1 ) % num_shepherd ;
#if 0
fprintf( stdout
, "worker(%d.%d) task 0x%.12lx spawning on shepherd(%d) clone(%d)\n"
, qthread_shep()
, qthread_worker_local(NULL)
, reinterpret_cast<unsigned long>(this)
, spawn_shepherd
, num_worker_per_shepherd - 1
);
fflush(stdout);
#endif
qthread_spawn_cloneable
( & Task::qthread_func
, this
, 0
, NULL
, m_dep_size , qprecon /* dependences */
, spawn_shepherd
- // , unsigned( QTHREAD_SPAWN_SIMPLE | QTHREAD_SPAWN_LOCAL_PRIORITY )
- , unsigned( QTHREAD_SPAWN_LOCAL_PRIORITY )
+ , unsigned( QTHREAD_SPAWN_SIMPLE | QTHREAD_SPAWN_LOCAL_PRIORITY )
, num_worker_per_shepherd - 1
);
}
else {
qthread_spawn( & Task::qthread_func /* function */
, this /* function argument */
, 0
, NULL
, m_dep_size , qprecon /* dependences */
, NO_SHEPHERD
, QTHREAD_SPAWN_SIMPLE /* allows optimization for non-blocking task */
);
}
}
} // namespace Impl
} // namespace Experimental
} // namespace Kokkos
namespace Kokkos {
namespace Experimental {
TaskPolicy< Kokkos::Qthread >::
TaskPolicy( const unsigned arg_default_dependence_capacity
, const unsigned arg_team_size )
: m_default_dependence_capacity( arg_default_dependence_capacity )
, m_team_size( arg_team_size != 0 ? arg_team_size : unsigned(qthread_num_workers_local(NO_SHEPHERD)) )
, m_active_count_root(0)
, m_active_count( m_active_count_root )
{
const unsigned num_worker_per_shepherd = unsigned( qthread_num_workers_local(NO_SHEPHERD) );
if ( m_team_size != 1 && m_team_size != num_worker_per_shepherd ) {
std::ostringstream msg ;
msg << "Kokkos::Experimental::TaskPolicy< Kokkos::Qthread >( "
<< "default_depedence = " << arg_default_dependence_capacity
<< " , team_size = " << arg_team_size
<< " ) ERROR, valid team_size arguments are { (omitted) , 1 , " << num_worker_per_shepherd << " }" ;
Kokkos::Impl::throw_runtime_exception(msg.str());
}
}
TaskPolicy< Kokkos::Qthread >::member_type &
TaskPolicy< Kokkos::Qthread >::member_single()
{
static member_type s ;
return s ;
}
void wait( Kokkos::Experimental::TaskPolicy< Kokkos::Qthread > & policy )
{
volatile int * const active_task_count = & policy.m_active_count ;
while ( *active_task_count ) qthread_yield();
}
} // namespace Experimental
} // namespace Kokkos
#endif /* #if defined( KOKKOS_HAVE_QTHREAD ) */
diff --git a/lib/kokkos/core/src/Qthread/Kokkos_Qthread_TaskPolicy.hpp b/lib/kokkos/core/src/Qthread/Kokkos_Qthread_TaskPolicy.hpp
index 1f4a622eb..9ff27de37 100644
--- a/lib/kokkos/core/src/Qthread/Kokkos_Qthread_TaskPolicy.hpp
+++ b/lib/kokkos/core/src/Qthread/Kokkos_Qthread_TaskPolicy.hpp
@@ -1,642 +1,643 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
// Experimental unified task-data parallel manycore LDRD
#ifndef KOKKOS_QTHREAD_TASKPOLICY_HPP
#define KOKKOS_QTHREAD_TASKPOLICY_HPP
#include <string>
#include <typeinfo>
#include <stdexcept>
//----------------------------------------------------------------------------
// Defines to enable experimental Qthread functionality
#define QTHREAD_LOCAL_PRIORITY
#define CLONED_TASKS
#include <qthread.h>
#undef QTHREAD_LOCAL_PRIORITY
#undef CLONED_TASKS
//----------------------------------------------------------------------------
#include <Kokkos_Qthread.hpp>
#include <Kokkos_TaskPolicy.hpp>
#include <Kokkos_View.hpp>
#include <impl/Kokkos_FunctorAdapter.hpp>
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Experimental {
namespace Impl {
template<>
class TaskMember< Kokkos::Qthread , void , void >
{
public:
typedef void (* function_apply_single_type) ( TaskMember * );
typedef void (* function_apply_team_type) ( TaskMember * , Kokkos::Impl::QthreadTeamPolicyMember & );
typedef void (* function_dealloc_type)( TaskMember * );
typedef TaskMember * (* function_verify_type) ( TaskMember * );
private:
const function_dealloc_type m_dealloc ; ///< Deallocation
const function_verify_type m_verify ; ///< Result type verification
const function_apply_single_type m_apply_single ; ///< Apply function
const function_apply_team_type m_apply_team ; ///< Apply function
int volatile * const m_active_count ; ///< Count of active tasks on this policy
aligned_t m_qfeb ; ///< Qthread full/empty bit
TaskMember ** const m_dep ; ///< Dependences
const int m_dep_capacity ; ///< Capacity of dependences
int m_dep_size ; ///< Actual count of dependences
int m_ref_count ; ///< Reference count
int m_state ; ///< State of the task
TaskMember() /* = delete */ ;
TaskMember( const TaskMember & ) /* = delete */ ;
TaskMember & operator = ( const TaskMember & ) /* = delete */ ;
static aligned_t qthread_func( void * arg );
static void * allocate( const unsigned arg_sizeof_derived , const unsigned arg_dependence_capacity );
static void deallocate( void * );
void throw_error_add_dependence() const ;
static void throw_error_verify_type();
template < class DerivedTaskType >
static
void deallocate( TaskMember * t )
{
DerivedTaskType * ptr = static_cast< DerivedTaskType * >(t);
ptr->~DerivedTaskType();
deallocate( (void *) ptr );
}
void schedule();
+ void closeout();
protected :
~TaskMember();
// Used by TaskMember< Qthread , ResultType , void >
TaskMember( const function_verify_type arg_verify
, const function_dealloc_type arg_dealloc
, const function_apply_single_type arg_apply_single
, const function_apply_team_type arg_apply_team
, volatile int & arg_active_count
, const unsigned arg_sizeof_derived
, const unsigned arg_dependence_capacity
);
// Used for TaskMember< Qthread , void , void >
TaskMember( const function_dealloc_type arg_dealloc
, const function_apply_single_type arg_apply_single
, const function_apply_team_type arg_apply_team
, volatile int & arg_active_count
, const unsigned arg_sizeof_derived
, const unsigned arg_dependence_capacity
);
public:
template< typename ResultType >
KOKKOS_FUNCTION static
TaskMember * verify_type( TaskMember * t )
{
enum { check_type = ! Kokkos::Impl::is_same< ResultType , void >::value };
if ( check_type && t != 0 ) {
// Verify that t->m_verify is this function
const function_verify_type self = & TaskMember::template verify_type< ResultType > ;
if ( t->m_verify != self ) {
t = 0 ;
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
throw_error_verify_type();
#endif
}
}
return t ;
}
//----------------------------------------
/* Inheritence Requirements on task types:
* typedef FunctorType::value_type value_type ;
* class DerivedTaskType
* : public TaskMember< Qthread , value_type , FunctorType >
* { ... };
* class TaskMember< Qthread , value_type , FunctorType >
* : public TaskMember< Qthread , value_type , void >
* , public Functor
* { ... };
* If value_type != void
* class TaskMember< Qthread , value_type , void >
* : public TaskMember< Qthread , void , void >
*
* Allocate space for DerivedTaskType followed by TaskMember*[ dependence_capacity ]
*
*/
/** \brief Allocate and construct a single-thread task */
template< class DerivedTaskType >
static
TaskMember * create_single( const typename DerivedTaskType::functor_type & arg_functor
, volatile int & arg_active_count
, const unsigned arg_dependence_capacity )
{
typedef typename DerivedTaskType::functor_type functor_type ;
typedef typename functor_type::value_type value_type ;
DerivedTaskType * const task =
new( allocate( sizeof(DerivedTaskType) , arg_dependence_capacity ) )
DerivedTaskType( & TaskMember::template deallocate< DerivedTaskType >
, & TaskMember::template apply_single< functor_type , value_type >
, 0
, arg_active_count
, sizeof(DerivedTaskType)
, arg_dependence_capacity
, arg_functor );
return static_cast< TaskMember * >( task );
}
/** \brief Allocate and construct a team-thread task */
template< class DerivedTaskType >
static
TaskMember * create_team( const typename DerivedTaskType::functor_type & arg_functor
, volatile int & arg_active_count
, const unsigned arg_dependence_capacity
, const bool arg_is_team )
{
typedef typename DerivedTaskType::functor_type functor_type ;
typedef typename functor_type::value_type value_type ;
const function_apply_single_type flag = reinterpret_cast<function_apply_single_type>( arg_is_team ? 0 : 1 );
DerivedTaskType * const task =
new( allocate( sizeof(DerivedTaskType) , arg_dependence_capacity ) )
DerivedTaskType( & TaskMember::template deallocate< DerivedTaskType >
, flag
, & TaskMember::template apply_team< functor_type , value_type >
, arg_active_count
, sizeof(DerivedTaskType)
, arg_dependence_capacity
, arg_functor );
return static_cast< TaskMember * >( task );
}
void respawn();
void spawn()
{
m_state = Kokkos::Experimental::TASK_STATE_WAITING ;
schedule();
}
//----------------------------------------
typedef FutureValueTypeIsVoidError get_result_type ;
KOKKOS_INLINE_FUNCTION
get_result_type get() const { return get_result_type() ; }
KOKKOS_INLINE_FUNCTION
Kokkos::Experimental::TaskState get_state() const { return Kokkos::Experimental::TaskState( m_state ); }
//----------------------------------------
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
static
void assign( TaskMember ** const lhs , TaskMember * const rhs , const bool no_throw = false );
#else
KOKKOS_INLINE_FUNCTION static
void assign( TaskMember ** const lhs , TaskMember * const rhs , const bool no_throw = false ) {}
#endif
KOKKOS_INLINE_FUNCTION
TaskMember * get_dependence( int i ) const
{ return ( Kokkos::Experimental::TASK_STATE_EXECUTING == m_state && 0 <= i && i < m_dep_size ) ? m_dep[i] : (TaskMember*) 0 ; }
KOKKOS_INLINE_FUNCTION
int get_dependence() const
{ return m_dep_size ; }
KOKKOS_INLINE_FUNCTION
void clear_dependence()
{
for ( int i = 0 ; i < m_dep_size ; ++i ) assign( m_dep + i , 0 );
m_dep_size = 0 ;
}
KOKKOS_INLINE_FUNCTION
void add_dependence( TaskMember * before )
{
if ( ( Kokkos::Experimental::TASK_STATE_CONSTRUCTING == m_state ||
Kokkos::Experimental::TASK_STATE_EXECUTING == m_state ) &&
m_dep_size < m_dep_capacity ) {
assign( m_dep + m_dep_size , before );
++m_dep_size ;
}
else {
throw_error_add_dependence();
}
}
//----------------------------------------
template< class FunctorType , class ResultType >
KOKKOS_INLINE_FUNCTION static
void apply_single( typename Kokkos::Impl::enable_if< ! Kokkos::Impl::is_same< ResultType , void >::value , TaskMember * >::type t )
{
typedef TaskMember< Kokkos::Qthread , ResultType , FunctorType > derived_type ;
// TaskMember< Kokkos::Qthread , ResultType , FunctorType >
// : public TaskMember< Kokkos::Qthread , ResultType , void >
// , public FunctorType
// { ... };
derived_type & m = * static_cast< derived_type * >( t );
Kokkos::Impl::FunctorApply< FunctorType , void , ResultType & >::apply( (FunctorType &) m , & m.m_result );
}
template< class FunctorType , class ResultType >
KOKKOS_INLINE_FUNCTION static
void apply_single( typename Kokkos::Impl::enable_if< Kokkos::Impl::is_same< ResultType , void >::value , TaskMember * >::type t )
{
typedef TaskMember< Kokkos::Qthread , ResultType , FunctorType > derived_type ;
// TaskMember< Kokkos::Qthread , ResultType , FunctorType >
// : public TaskMember< Kokkos::Qthread , ResultType , void >
// , public FunctorType
// { ... };
derived_type & m = * static_cast< derived_type * >( t );
Kokkos::Impl::FunctorApply< FunctorType , void , void >::apply( (FunctorType &) m );
}
//----------------------------------------
template< class FunctorType , class ResultType >
KOKKOS_INLINE_FUNCTION static
void apply_team( typename Kokkos::Impl::enable_if< ! Kokkos::Impl::is_same< ResultType , void >::value , TaskMember * >::type t
, Kokkos::Impl::QthreadTeamPolicyMember & member )
{
typedef TaskMember< Kokkos::Qthread , ResultType , FunctorType > derived_type ;
derived_type & m = * static_cast< derived_type * >( t );
m.FunctorType::apply( member , m.m_result );
}
template< class FunctorType , class ResultType >
KOKKOS_INLINE_FUNCTION static
void apply_team( typename Kokkos::Impl::enable_if< Kokkos::Impl::is_same< ResultType , void >::value , TaskMember * >::type t
, Kokkos::Impl::QthreadTeamPolicyMember & member )
{
typedef TaskMember< Kokkos::Qthread , ResultType , FunctorType > derived_type ;
derived_type & m = * static_cast< derived_type * >( t );
m.FunctorType::apply( member );
}
};
//----------------------------------------------------------------------------
/** \brief Base class for tasks with a result value in the Qthread execution space.
*
* The FunctorType must be void because this class is accessed by the
* Future class for the task and result value.
*
* Must be derived from TaskMember<S,void,void> 'root class' so the Future class
* can correctly static_cast from the 'root class' to this class.
*/
template < class ResultType >
class TaskMember< Kokkos::Qthread , ResultType , void >
: public TaskMember< Kokkos::Qthread , void , void >
{
public:
ResultType m_result ;
typedef const ResultType & get_result_type ;
KOKKOS_INLINE_FUNCTION
get_result_type get() const { return m_result ; }
protected:
typedef TaskMember< Kokkos::Qthread , void , void > task_root_type ;
typedef task_root_type::function_dealloc_type function_dealloc_type ;
typedef task_root_type::function_apply_single_type function_apply_single_type ;
typedef task_root_type::function_apply_team_type function_apply_team_type ;
inline
TaskMember( const function_dealloc_type arg_dealloc
, const function_apply_single_type arg_apply_single
, const function_apply_team_type arg_apply_team
, volatile int & arg_active_count
, const unsigned arg_sizeof_derived
, const unsigned arg_dependence_capacity
)
: task_root_type( & task_root_type::template verify_type< ResultType >
, arg_dealloc
, arg_apply_single
, arg_apply_team
, arg_active_count
, arg_sizeof_derived
, arg_dependence_capacity )
, m_result()
{}
};
template< class ResultType , class FunctorType >
class TaskMember< Kokkos::Qthread , ResultType , FunctorType >
: public TaskMember< Kokkos::Qthread , ResultType , void >
, public FunctorType
{
public:
typedef FunctorType functor_type ;
typedef TaskMember< Kokkos::Qthread , void , void > task_root_type ;
typedef TaskMember< Kokkos::Qthread , ResultType , void > task_base_type ;
typedef task_root_type::function_dealloc_type function_dealloc_type ;
typedef task_root_type::function_apply_single_type function_apply_single_type ;
typedef task_root_type::function_apply_team_type function_apply_team_type ;
inline
TaskMember( const function_dealloc_type arg_dealloc
, const function_apply_single_type arg_apply_single
, const function_apply_team_type arg_apply_team
, volatile int & arg_active_count
, const unsigned arg_sizeof_derived
, const unsigned arg_dependence_capacity
, const functor_type & arg_functor
)
: task_base_type( arg_dealloc
, arg_apply_single
, arg_apply_team
, arg_active_count
, arg_sizeof_derived
, arg_dependence_capacity )
, functor_type( arg_functor )
{}
};
} /* namespace Impl */
} /* namespace Experimental */
} /* namespace Kokkos */
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Experimental {
void wait( TaskPolicy< Kokkos::Qthread > & );
template<>
class TaskPolicy< Kokkos::Qthread >
{
public:
typedef Kokkos::Qthread execution_space ;
typedef Kokkos::Impl::QthreadTeamPolicyMember member_type ;
private:
typedef Impl::TaskMember< execution_space , void , void > task_root_type ;
TaskPolicy & operator = ( const TaskPolicy & ) /* = delete */ ;
template< class FunctorType >
static inline
const task_root_type * get_task_root( const FunctorType * f )
{
typedef Impl::TaskMember< execution_space , typename FunctorType::value_type , FunctorType > task_type ;
return static_cast< const task_root_type * >( static_cast< const task_type * >(f) );
}
template< class FunctorType >
static inline
task_root_type * get_task_root( FunctorType * f )
{
typedef Impl::TaskMember< execution_space , typename FunctorType::value_type , FunctorType > task_type ;
return static_cast< task_root_type * >( static_cast< task_type * >(f) );
}
const unsigned m_default_dependence_capacity ;
const unsigned m_team_size ;
volatile int m_active_count_root ;
volatile int & m_active_count ;
public:
explicit
TaskPolicy( const unsigned arg_default_dependence_capacity = 4
, const unsigned arg_team_size = 0 /* assign default */ );
KOKKOS_INLINE_FUNCTION
TaskPolicy( const TaskPolicy & rhs )
: m_default_dependence_capacity( rhs.m_default_dependence_capacity )
- , m_team_size( m_team_size )
+ , m_team_size( rhs.m_team_size )
, m_active_count_root(0)
, m_active_count( rhs.m_active_count )
{}
KOKKOS_INLINE_FUNCTION
TaskPolicy( const TaskPolicy & rhs
, const unsigned arg_default_dependence_capacity )
: m_default_dependence_capacity( arg_default_dependence_capacity )
- , m_team_size( m_team_size )
+ , m_team_size( rhs.m_team_size )
, m_active_count_root(0)
, m_active_count( rhs.m_active_count )
{}
//----------------------------------------
template< class ValueType >
const Future< ValueType , execution_space > &
spawn( const Future< ValueType , execution_space > & f ) const
{
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
f.m_task->spawn();
#endif
return f ;
}
// Create single-thread task
template< class FunctorType >
Future< typename FunctorType::value_type , execution_space >
create( const FunctorType & functor
, const unsigned dependence_capacity = ~0u ) const
{
typedef typename FunctorType::value_type value_type ;
typedef Impl::TaskMember< execution_space , value_type , FunctorType > task_type ;
return Future< value_type , execution_space >(
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
task_root_type::create_single< task_type >
( functor
, m_active_count
, ( ~0u == dependence_capacity ? m_default_dependence_capacity : dependence_capacity )
)
#endif
);
}
// Create thread-team task
template< class FunctorType >
KOKKOS_INLINE_FUNCTION
Future< typename FunctorType::value_type , execution_space >
create_team( const FunctorType & functor
, const unsigned dependence_capacity = ~0u ) const
{
typedef typename FunctorType::value_type value_type ;
typedef Impl::TaskMember< execution_space , value_type , FunctorType > task_type ;
return Future< value_type , execution_space >(
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
task_root_type::create_team< task_type >
( functor
, m_active_count
, ( ~0u == dependence_capacity ? m_default_dependence_capacity : dependence_capacity )
, 1 < m_team_size
)
#endif
);
}
// Add dependence
template< class A1 , class A2 , class A3 , class A4 >
void add_dependence( const Future<A1,A2> & after
, const Future<A3,A4> & before
, typename Kokkos::Impl::enable_if
< Kokkos::Impl::is_same< typename Future<A1,A2>::execution_space , execution_space >::value
&&
Kokkos::Impl::is_same< typename Future<A3,A4>::execution_space , execution_space >::value
>::type * = 0
)
{
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
after.m_task->add_dependence( before.m_task );
#endif
}
//----------------------------------------
// Functions for an executing task functor to query dependences,
// set new dependences, and respawn itself.
template< class FunctorType >
Future< void , execution_space >
get_dependence( const FunctorType * task_functor , int i ) const
{
return Future<void,execution_space>(
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
get_task_root(task_functor)->get_dependence(i)
#endif
);
}
template< class FunctorType >
int get_dependence( const FunctorType * task_functor ) const
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
{ return get_task_root(task_functor)->get_dependence(); }
#else
{ return 0 ; }
#endif
template< class FunctorType >
void clear_dependence( FunctorType * task_functor ) const
{
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
get_task_root(task_functor)->clear_dependence();
#endif
}
template< class FunctorType , class A3 , class A4 >
void add_dependence( FunctorType * task_functor
, const Future<A3,A4> & before
, typename Kokkos::Impl::enable_if
< Kokkos::Impl::is_same< typename Future<A3,A4>::execution_space , execution_space >::value
>::type * = 0
)
{
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
get_task_root(task_functor)->add_dependence( before.m_task );
#endif
}
template< class FunctorType >
void respawn( FunctorType * task_functor ) const
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
{ get_task_root(task_functor)->respawn(); }
#else
{}
#endif
static member_type & member_single();
friend void wait( TaskPolicy< Kokkos::Qthread > & );
};
} /* namespace Experimental */
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#endif /* #define KOKKOS_QTHREAD_TASK_HPP */
diff --git a/lib/kokkos/core/src/Threads/Kokkos_ThreadsExec.cpp b/lib/kokkos/core/src/Threads/Kokkos_ThreadsExec.cpp
index 99553fccb..078cc658b 100644
--- a/lib/kokkos/core/src/Threads/Kokkos_ThreadsExec.cpp
+++ b/lib/kokkos/core/src/Threads/Kokkos_ThreadsExec.cpp
@@ -1,758 +1,839 @@
/*
//@HEADER
// ************************************************************************
-//
+//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
-//
+//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
-//
+//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
-//
+//
// ************************************************************************
//@HEADER
*/
#include <Kokkos_Core_fwd.hpp>
#if defined( KOKKOS_HAVE_PTHREAD ) || defined( KOKKOS_HAVE_WINTHREAD )
#include <stdint.h>
#include <limits>
#include <utility>
#include <iostream>
#include <sstream>
-#include <Kokkos_Threads.hpp>
-#include <Kokkos_hwloc.hpp>
-#include <Kokkos_Atomic.hpp>
+#include <Kokkos_Core.hpp>
#include <impl/Kokkos_Error.hpp>
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
namespace {
ThreadsExec s_threads_process ;
ThreadsExec * s_threads_exec[ ThreadsExec::MAX_THREAD_COUNT ] = { 0 };
pthread_t s_threads_pid[ ThreadsExec::MAX_THREAD_COUNT ] = { 0 };
std::pair<unsigned,unsigned> s_threads_coord[ ThreadsExec::MAX_THREAD_COUNT ];
int s_thread_pool_size[3] = { 0 , 0 , 0 };
unsigned s_current_reduce_size = 0 ;
unsigned s_current_shared_size = 0 ;
void (* volatile s_current_function)( ThreadsExec & , const void * );
const void * volatile s_current_function_arg = 0 ;
struct Sentinel {
Sentinel()
{
HostSpace::register_in_parallel( ThreadsExec::in_parallel );
}
~Sentinel()
{
if ( s_thread_pool_size[0] ||
s_thread_pool_size[1] ||
s_thread_pool_size[2] ||
s_current_reduce_size ||
s_current_shared_size ||
s_current_function ||
s_current_function_arg ||
s_threads_exec[0] ) {
std::cerr << "ERROR : Process exiting without calling Kokkos::Threads::terminate()" << std::endl ;
}
}
};
inline
unsigned fan_size( const unsigned rank , const unsigned size )
{
const unsigned rank_rev = size - ( rank + 1 );
unsigned count = 0 ;
for ( unsigned n = 1 ; ( rank_rev + n < size ) && ! ( rank_rev & n ) ; n <<= 1 ) { ++count ; }
return count ;
}
} // namespace
} // namespace Impl
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
void execute_function_noop( ThreadsExec & , const void * ) {}
void ThreadsExec::driver(void)
{
ThreadsExec this_thread ;
while ( ThreadsExec::Active == this_thread.m_pool_state ) {
(*s_current_function)( this_thread , s_current_function_arg );
// Deactivate thread and wait for reactivation
this_thread.m_pool_state = ThreadsExec::Inactive ;
wait_yield( this_thread.m_pool_state , ThreadsExec::Inactive );
}
}
ThreadsExec::ThreadsExec()
: m_pool_base(0)
+#if ! defined( KOKKOS_USING_EXPERIMENTAL_VIEW )
, m_scratch()
+#else
+ , m_scratch(0)
+#endif
, m_scratch_reduce_end(0)
, m_scratch_thread_end(0)
, m_numa_rank(0)
, m_numa_core_rank(0)
, m_pool_rank(0)
, m_pool_size(0)
, m_pool_fan_size(0)
, m_pool_state( ThreadsExec::Terminating )
{
if ( & s_threads_process != this ) {
// A spawned thread
ThreadsExec * const nil = 0 ;
// Which entry in 's_threads_exec', possibly determined from hwloc binding
const int entry = ((size_t)s_current_function_arg) < size_t(s_thread_pool_size[0])
? ((size_t)s_current_function_arg)
: size_t(Kokkos::hwloc::bind_this_thread( s_thread_pool_size[0] , s_threads_coord ));
// Given a good entry set this thread in the 's_threads_exec' array
if ( entry < s_thread_pool_size[0] &&
nil == atomic_compare_exchange( s_threads_exec + entry , nil , this ) ) {
const std::pair<unsigned,unsigned> coord = Kokkos::hwloc::get_this_thread_coordinate();
m_numa_rank = coord.first ;
m_numa_core_rank = coord.second ;
m_pool_base = s_threads_exec ;
m_pool_rank = s_thread_pool_size[0] - ( entry + 1 );
m_pool_size = s_thread_pool_size[0] ;
m_pool_fan_size = fan_size( m_pool_rank , m_pool_size );
m_pool_state = ThreadsExec::Active ;
s_threads_pid[ m_pool_rank ] = pthread_self();
// Inform spawning process that the threads_exec entry has been set.
s_threads_process.m_pool_state = ThreadsExec::Active ;
}
else {
// Inform spawning process that the threads_exec entry could not be set.
s_threads_process.m_pool_state = ThreadsExec::Terminating ;
}
}
else {
// Enables 'parallel_for' to execute on unitialized Threads device
m_pool_rank = 0 ;
m_pool_size = 1 ;
m_pool_state = ThreadsExec::Inactive ;
s_threads_pid[ m_pool_rank ] = pthread_self();
}
}
ThreadsExec::~ThreadsExec()
{
const unsigned entry = m_pool_size - ( m_pool_rank + 1 );
- m_pool_base = 0 ;
+#if defined( KOKKOS_USING_EXPERIMENTAL_VIEW )
+
+ typedef Kokkos::Experimental::Impl::SharedAllocationRecord< Kokkos::HostSpace , void > Record ;
+
+ if ( m_scratch ) {
+ Record * const r = Record::get_record( m_scratch );
+
+ m_scratch = 0 ;
+
+ Record::decrement( r );
+ }
+
+#else
+
m_scratch.clear();
+
+#endif
+
+ m_pool_base = 0 ;
m_scratch_reduce_end = 0 ;
m_scratch_thread_end = 0 ;
m_numa_rank = 0 ;
m_numa_core_rank = 0 ;
m_pool_rank = 0 ;
m_pool_size = 0 ;
m_pool_fan_size = 0 ;
m_pool_state = ThreadsExec::Terminating ;
if ( & s_threads_process != this && entry < MAX_THREAD_COUNT ) {
ThreadsExec * const nil = 0 ;
atomic_compare_exchange( s_threads_exec + entry , this , nil );
s_threads_process.m_pool_state = ThreadsExec::Terminating ;
}
}
int ThreadsExec::get_thread_count()
{
return s_thread_pool_size[0] ;
}
ThreadsExec * ThreadsExec::get_thread( const int init_thread_rank )
{
ThreadsExec * const th =
init_thread_rank < s_thread_pool_size[0]
? s_threads_exec[ s_thread_pool_size[0] - ( init_thread_rank + 1 ) ] : 0 ;
if ( 0 == th || th->m_pool_rank != init_thread_rank ) {
std::ostringstream msg ;
msg << "Kokkos::Impl::ThreadsExec::get_thread ERROR : "
<< "thread " << init_thread_rank << " of " << s_thread_pool_size[0] ;
if ( 0 == th ) {
msg << " does not exist" ;
}
else {
msg << " has wrong thread_rank " << th->m_pool_rank ;
}
Kokkos::Impl::throw_runtime_exception( msg.str() );
}
return th ;
}
//----------------------------------------------------------------------------
void ThreadsExec::execute_sleep( ThreadsExec & exec , const void * )
{
ThreadsExec::global_lock();
ThreadsExec::global_unlock();
const int n = exec.m_pool_fan_size ;
const int rank_rev = exec.m_pool_size - ( exec.m_pool_rank + 1 );
for ( int i = 0 ; i < n ; ++i ) {
Impl::spinwait( exec.m_pool_base[ rank_rev + (1<<i) ]->m_pool_state , ThreadsExec::Active );
}
exec.m_pool_state = ThreadsExec::Inactive ;
}
}
}
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
void ThreadsExec::verify_is_process( const std::string & name , const bool initialized )
{
if ( ! is_process() ) {
std::string msg( name );
msg.append( " FAILED : Called by a worker thread, can only be called by the master process." );
Kokkos::Impl::throw_runtime_exception( msg );
}
if ( initialized && 0 == s_thread_pool_size[0] ) {
std::string msg( name );
msg.append( " FAILED : Threads not initialized." );
Kokkos::Impl::throw_runtime_exception( msg );
}
}
int ThreadsExec::in_parallel()
{
// A thread function is in execution and
// the function argument is not the special threads process argument and
// the master process is a worker or is not the master process.
return s_current_function &&
( & s_threads_process != s_current_function_arg ) &&
( s_threads_process.m_pool_base || ! is_process() );
}
// Wait for root thread to become inactive
void ThreadsExec::fence()
{
if ( s_thread_pool_size[0] ) {
// Wait for the root thread to complete:
Impl::spinwait( s_threads_exec[0]->m_pool_state , ThreadsExec::Active );
}
s_current_function = 0 ;
s_current_function_arg = 0 ;
+
+ // Make sure function and arguments are cleared before
+ // potentially re-activating threads with a subsequent launch.
+ memory_fence();
}
/** \brief Begin execution of the asynchronous functor */
void ThreadsExec::start( void (*func)( ThreadsExec & , const void * ) , const void * arg )
{
verify_is_process("ThreadsExec::start" , true );
if ( s_current_function || s_current_function_arg ) {
Kokkos::Impl::throw_runtime_exception( std::string( "ThreadsExec::start() FAILED : already executing" ) );
}
s_current_function = func ;
s_current_function_arg = arg ;
+ // Make sure function and arguments are written before activating threads.
+ memory_fence();
+
// Activate threads:
for ( int i = s_thread_pool_size[0] ; 0 < i-- ; ) {
s_threads_exec[i]->m_pool_state = ThreadsExec::Active ;
}
if ( s_threads_process.m_pool_size ) {
// Master process is the root thread, run it:
(*func)( s_threads_process , arg );
s_threads_process.m_pool_state = ThreadsExec::Inactive ;
}
}
//----------------------------------------------------------------------------
bool ThreadsExec::sleep()
{
verify_is_process("ThreadsExec::sleep", true );
if ( & execute_sleep == s_current_function ) return false ;
fence();
ThreadsExec::global_lock();
s_current_function = & execute_sleep ;
// Activate threads:
for ( unsigned i = s_thread_pool_size[0] ; 0 < i ; ) {
s_threads_exec[--i]->m_pool_state = ThreadsExec::Active ;
}
return true ;
}
bool ThreadsExec::wake()
{
verify_is_process("ThreadsExec::wake", true );
if ( & execute_sleep != s_current_function ) return false ;
ThreadsExec::global_unlock();
if ( s_threads_process.m_pool_base ) {
execute_sleep( s_threads_process , 0 );
s_threads_process.m_pool_state = ThreadsExec::Inactive ;
}
fence();
return true ;
}
//----------------------------------------------------------------------------
void ThreadsExec::execute_serial( void (*func)( ThreadsExec & , const void * ) )
{
s_current_function = func ;
s_current_function_arg = & s_threads_process ;
+ // Make sure function and arguments are written before activating threads.
+ memory_fence();
+
const unsigned begin = s_threads_process.m_pool_base ? 1 : 0 ;
for ( unsigned i = s_thread_pool_size[0] ; begin < i ; ) {
ThreadsExec & th = * s_threads_exec[ --i ];
th.m_pool_state = ThreadsExec::Active ;
wait_yield( th.m_pool_state , ThreadsExec::Active );
}
if ( s_threads_process.m_pool_base ) {
s_threads_process.m_pool_state = ThreadsExec::Active ;
(*func)( s_threads_process , 0 );
s_threads_process.m_pool_state = ThreadsExec::Inactive ;
}
s_current_function_arg = 0 ;
s_current_function = 0 ;
+
+ // Make sure function and arguments are cleared before proceeding.
+ memory_fence();
}
//----------------------------------------------------------------------------
void * ThreadsExec::root_reduce_scratch()
{
return s_threads_process.reduce_memory();
}
void ThreadsExec::execute_resize_scratch( ThreadsExec & exec , const void * )
{
+#if defined( KOKKOS_USING_EXPERIMENTAL_VIEW )
+
+ typedef Kokkos::Experimental::Impl::SharedAllocationRecord< Kokkos::HostSpace , void > Record ;
+
+ if ( exec.m_scratch ) {
+ Record * const r = Record::get_record( exec.m_scratch );
+
+ exec.m_scratch = 0 ;
+
+ Record::decrement( r );
+ }
+
+#else
+
exec.m_scratch.clear();
+#endif
+
exec.m_scratch_reduce_end = s_threads_process.m_scratch_reduce_end ;
exec.m_scratch_thread_end = s_threads_process.m_scratch_thread_end ;
if ( s_threads_process.m_scratch_thread_end ) {
+#if defined( KOKKOS_USING_EXPERIMENTAL_VIEW )
+
+ // Allocate tracked memory:
+ {
+ Record * const r = Record::allocate( Kokkos::HostSpace() , "thread_scratch" , s_threads_process.m_scratch_thread_end );
+
+ Record::increment( r );
+
+ exec.m_scratch = r->data();
+ }
+
+ unsigned * ptr = reinterpret_cast<unsigned *>( exec.m_scratch );
+
+#else
+
exec.m_scratch =
HostSpace::allocate_and_track( "thread_scratch" , s_threads_process.m_scratch_thread_end );
unsigned * ptr = reinterpret_cast<unsigned *>( exec.m_scratch.alloc_ptr() );
+
+#endif
+
unsigned * const end = ptr + s_threads_process.m_scratch_thread_end / sizeof(unsigned);
// touch on this thread
while ( ptr < end ) *ptr++ = 0 ;
}
}
void * ThreadsExec::resize_scratch( size_t reduce_size , size_t thread_size )
{
enum { ALIGN_MASK = Kokkos::Impl::MEMORY_ALIGNMENT - 1 };
fence();
const size_t old_reduce_size = s_threads_process.m_scratch_reduce_end ;
const size_t old_thread_size = s_threads_process.m_scratch_thread_end - s_threads_process.m_scratch_reduce_end ;
reduce_size = ( reduce_size + ALIGN_MASK ) & ~ALIGN_MASK ;
thread_size = ( thread_size + ALIGN_MASK ) & ~ALIGN_MASK ;
// Increase size or deallocate completely.
if ( ( old_reduce_size < reduce_size ) ||
( old_thread_size < thread_size ) ||
( ( reduce_size == 0 && thread_size == 0 ) &&
( old_reduce_size != 0 || old_thread_size != 0 ) ) ) {
verify_is_process( "ThreadsExec::resize_scratch" , true );
s_threads_process.m_scratch_reduce_end = reduce_size ;
s_threads_process.m_scratch_thread_end = reduce_size + thread_size ;
execute_serial( & execute_resize_scratch );
s_threads_process.m_scratch = s_threads_exec[0]->m_scratch ;
}
+#if defined( KOKKOS_USING_EXPERIMENTAL_VIEW )
+ return s_threads_process.m_scratch ;
+#else
return s_threads_process.m_scratch.alloc_ptr() ;
+#endif
}
//----------------------------------------------------------------------------
void ThreadsExec::print_configuration( std::ostream & s , const bool detail )
{
verify_is_process("ThreadsExec::print_configuration",false);
fence();
const unsigned numa_count = Kokkos::hwloc::get_available_numa_count();
const unsigned cores_per_numa = Kokkos::hwloc::get_available_cores_per_numa();
const unsigned threads_per_core = Kokkos::hwloc::get_available_threads_per_core();
// Forestall compiler warnings for unused variables.
(void) numa_count;
(void) cores_per_numa;
(void) threads_per_core;
s << "Kokkos::Threads" ;
#if defined( KOKKOS_HAVE_PTHREAD )
s << " KOKKOS_HAVE_PTHREAD" ;
#endif
#if defined( KOKKOS_HAVE_HWLOC )
s << " hwloc[" << numa_count << "x" << cores_per_numa << "x" << threads_per_core << "]" ;
#endif
if ( s_thread_pool_size[0] ) {
s << " threads[" << s_thread_pool_size[0] << "]"
<< " threads_per_numa[" << s_thread_pool_size[1] << "]"
<< " threads_per_core[" << s_thread_pool_size[2] << "]"
;
if ( 0 == s_threads_process.m_pool_base ) { s << " Asynchronous" ; }
s << " ReduceScratch[" << s_current_reduce_size << "]"
<< " SharedScratch[" << s_current_shared_size << "]" ;
s << std::endl ;
if ( detail ) {
for ( int i = 0 ; i < s_thread_pool_size[0] ; ++i ) {
ThreadsExec * const th = s_threads_exec[i] ;
if ( th ) {
const int rank_rev = th->m_pool_size - ( th->m_pool_rank + 1 );
s << " Thread[ " << th->m_pool_rank << " : "
<< th->m_numa_rank << "." << th->m_numa_core_rank << " ]" ;
s << " Fan{" ;
for ( int j = 0 ; j < th->m_pool_fan_size ; ++j ) {
ThreadsExec * const thfan = th->m_pool_base[rank_rev+(1<<j)] ;
s << " [ " << thfan->m_pool_rank << " : "
<< thfan->m_numa_rank << "." << thfan->m_numa_core_rank << " ]" ;
}
s << " }" ;
if ( th == & s_threads_process ) {
s << " is_process" ;
}
}
s << std::endl ;
}
}
}
else {
s << " not initialized" << std::endl ;
}
}
//----------------------------------------------------------------------------
int ThreadsExec::is_initialized()
{ return 0 != s_threads_exec[0] ; }
void ThreadsExec::initialize( unsigned thread_count ,
unsigned use_numa_count ,
unsigned use_cores_per_numa ,
bool allow_asynchronous_threadpool )
{
static const Sentinel sentinel ;
const bool is_initialized = 0 != s_thread_pool_size[0] ;
unsigned thread_spawn_failed = 0 ;
for ( int i = 0; i < ThreadsExec::MAX_THREAD_COUNT ; i++)
s_threads_exec[i] = NULL;
if ( ! is_initialized ) {
// If thread_count, use_numa_count, or use_cores_per_numa are zero
// then they will be given default values based upon hwloc detection
// and allowed asynchronous execution.
- const bool hwloc_avail = hwloc::available();
+ const bool hwloc_avail = Kokkos::hwloc::available();
+ const bool hwloc_can_bind = hwloc_avail && Kokkos::hwloc::can_bind_threads();
if ( thread_count == 0 ) {
thread_count = hwloc_avail
? Kokkos::hwloc::get_available_numa_count() *
Kokkos::hwloc::get_available_cores_per_numa() *
Kokkos::hwloc::get_available_threads_per_core()
: 1 ;
}
const unsigned thread_spawn_begin =
hwloc::thread_mapping( "Kokkos::Threads::initialize" ,
allow_asynchronous_threadpool ,
thread_count ,
use_numa_count ,
use_cores_per_numa ,
s_threads_coord );
const std::pair<unsigned,unsigned> proc_coord = s_threads_coord[0] ;
if ( thread_spawn_begin ) {
// Synchronous with s_threads_coord[0] as the process core
// Claim entry #0 for binding the process core.
s_threads_coord[0] = std::pair<unsigned,unsigned>(~0u,~0u);
}
s_thread_pool_size[0] = thread_count ;
s_thread_pool_size[1] = s_thread_pool_size[0] / use_numa_count ;
s_thread_pool_size[2] = s_thread_pool_size[1] / use_cores_per_numa ;
s_current_function = & execute_function_noop ; // Initialization work function
for ( unsigned ith = thread_spawn_begin ; ith < thread_count ; ++ith ) {
s_threads_process.m_pool_state = ThreadsExec::Inactive ;
// If hwloc available then spawned thread will
// choose its own entry in 's_threads_coord'
// otherwise specify the entry.
- s_current_function_arg = (void*)static_cast<uintptr_t>( hwloc_avail ? ~0u : ith );
+ s_current_function_arg = (void*)static_cast<uintptr_t>( hwloc_can_bind ? ~0u : ith );
+
+ // Make sure all outstanding memory writes are complete
+ // before spawning the new thread.
+ memory_fence();
// Spawn thread executing the 'driver()' function.
// Wait until spawned thread has attempted to initialize.
// If spawning and initialization is successfull then
// an entry in 's_threads_exec' will be assigned.
if ( ThreadsExec::spawn() ) {
wait_yield( s_threads_process.m_pool_state , ThreadsExec::Inactive );
}
if ( s_threads_process.m_pool_state == ThreadsExec::Terminating ) break ;
}
// Wait for all spawned threads to deactivate before zeroing the function.
for ( unsigned ith = thread_spawn_begin ; ith < thread_count ; ++ith ) {
// Try to protect against cache coherency failure by casting to volatile.
ThreadsExec * const th = ((ThreadsExec * volatile *)s_threads_exec)[ith] ;
if ( th ) {
wait_yield( th->m_pool_state , ThreadsExec::Active );
}
else {
++thread_spawn_failed ;
}
}
s_current_function = 0 ;
s_current_function_arg = 0 ;
s_threads_process.m_pool_state = ThreadsExec::Inactive ;
+ memory_fence();
+
if ( ! thread_spawn_failed ) {
// Bind process to the core on which it was located before spawning occured
- Kokkos::hwloc::bind_this_thread( proc_coord );
+ if (hwloc_can_bind) {
+ Kokkos::hwloc::bind_this_thread( proc_coord );
+ }
if ( thread_spawn_begin ) { // Include process in pool.
const std::pair<unsigned,unsigned> coord = Kokkos::hwloc::get_this_thread_coordinate();
s_threads_exec[0] = & s_threads_process ;
s_threads_process.m_numa_rank = coord.first ;
s_threads_process.m_numa_core_rank = coord.second ;
s_threads_process.m_pool_base = s_threads_exec ;
s_threads_process.m_pool_rank = thread_count - 1 ; // Reversed for scan-compatible reductions
s_threads_process.m_pool_size = thread_count ;
s_threads_process.m_pool_fan_size = fan_size( s_threads_process.m_pool_rank , s_threads_process.m_pool_size );
s_threads_pid[ s_threads_process.m_pool_rank ] = pthread_self();
}
else {
s_threads_process.m_pool_base = 0 ;
s_threads_process.m_pool_rank = 0 ;
s_threads_process.m_pool_size = 0 ;
s_threads_process.m_pool_fan_size = 0 ;
}
// Initial allocations:
ThreadsExec::resize_scratch( 1024 , 1024 );
}
else {
s_thread_pool_size[0] = 0 ;
s_thread_pool_size[1] = 0 ;
s_thread_pool_size[2] = 0 ;
}
}
if ( is_initialized || thread_spawn_failed ) {
std::ostringstream msg ;
msg << "Kokkos::Threads::initialize ERROR" ;
if ( is_initialized ) {
msg << " : already initialized" ;
}
if ( thread_spawn_failed ) {
msg << " : failed to spawn " << thread_spawn_failed << " threads" ;
}
Kokkos::Impl::throw_runtime_exception( msg.str() );
}
// Init the array for used for arbitrarily sized atomics
Impl::init_lock_array_host_space();
}
//----------------------------------------------------------------------------
void ThreadsExec::finalize()
{
verify_is_process("ThreadsExec::finalize",false);
fence();
resize_scratch(0,0);
const unsigned begin = s_threads_process.m_pool_base ? 1 : 0 ;
for ( unsigned i = s_thread_pool_size[0] ; begin < i-- ; ) {
if ( s_threads_exec[i] ) {
s_threads_exec[i]->m_pool_state = ThreadsExec::Terminating ;
wait_yield( s_threads_process.m_pool_state , ThreadsExec::Inactive );
s_threads_process.m_pool_state = ThreadsExec::Inactive ;
}
s_threads_pid[i] = 0 ;
}
if ( s_threads_process.m_pool_base ) {
( & s_threads_process )->~ThreadsExec();
s_threads_exec[0] = 0 ;
}
- Kokkos::hwloc::unbind_this_thread();
+ if (Kokkos::hwloc::can_bind_threads() ) {
+ Kokkos::hwloc::unbind_this_thread();
+ }
s_thread_pool_size[0] = 0 ;
s_thread_pool_size[1] = 0 ;
s_thread_pool_size[2] = 0 ;
// Reset master thread to run solo.
s_threads_process.m_numa_rank = 0 ;
s_threads_process.m_numa_core_rank = 0 ;
s_threads_process.m_pool_base = 0 ;
s_threads_process.m_pool_rank = 0 ;
s_threads_process.m_pool_size = 1 ;
s_threads_process.m_pool_fan_size = 0 ;
s_threads_process.m_pool_state = ThreadsExec::Inactive ;
}
//----------------------------------------------------------------------------
} /* namespace Impl */
} /* namespace Kokkos */
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
Threads & Threads::instance(int)
{
static Threads t ;
return t ;
}
int Threads::thread_pool_size( int depth )
{
return Impl::s_thread_pool_size[depth];
}
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
int Threads::thread_pool_rank()
{
const pthread_t pid = pthread_self();
int i = 0;
while ( ( i < Impl::s_thread_pool_size[0] ) && ( pid != Impl::s_threads_pid[i] ) ) { ++i ; }
return i ;
}
#endif
} /* namespace Kokkos */
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#endif /* #if defined( KOKKOS_HAVE_PTHREAD ) || defined( KOKKOS_HAVE_WINTHREAD ) */
diff --git a/lib/kokkos/core/src/Threads/Kokkos_ThreadsExec.hpp b/lib/kokkos/core/src/Threads/Kokkos_ThreadsExec.hpp
index 382069797..684eac8b7 100644
--- a/lib/kokkos/core/src/Threads/Kokkos_ThreadsExec.hpp
+++ b/lib/kokkos/core/src/Threads/Kokkos_ThreadsExec.hpp
@@ -1,465 +1,479 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_THREADSEXEC_HPP
#define KOKKOS_THREADSEXEC_HPP
#include <stdio.h>
#include <utility>
#include <impl/Kokkos_spinwait.hpp>
#include <impl/Kokkos_FunctorAdapter.hpp>
#include <impl/Kokkos_AllocationTracker.hpp>
#include <Kokkos_Atomic.hpp>
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
class ThreadsExec {
public:
// Fan array has log_2(NT) reduction threads plus 2 scan threads
// Currently limited to 16k threads.
enum { MAX_FAN_COUNT = 16 };
enum { MAX_THREAD_COUNT = 1 << ( MAX_FAN_COUNT - 2 ) };
enum { VECTOR_LENGTH = 8 };
/** \brief States of a worker thread */
enum { Terminating ///< Termination in progress
, Inactive ///< Exists, waiting for work
, Active ///< Exists, performing work
, Rendezvous ///< Exists, waiting in a barrier or reduce
, ScanCompleted
, ScanAvailable
, ReductionAvailable
};
private:
friend class Kokkos::Threads ;
// Fan-in operations' root is the highest ranking thread
// to place the 'scan' reduction intermediate values on
// the threads that need them.
// For a simple reduction the thread location is arbitrary.
ThreadsExec * const * m_pool_base ; ///< Base for pool fan-in
+#if ! defined( KOKKOS_USING_EXPERIMENTAL_VIEW )
Impl::AllocationTracker m_scratch ;
+#else
+ void * m_scratch ;
+#endif
int m_scratch_reduce_end ;
int m_scratch_thread_end ;
int m_numa_rank ;
int m_numa_core_rank ;
int m_pool_rank ;
int m_pool_size ;
int m_pool_fan_size ;
int volatile m_pool_state ; ///< State for global synchronizations
static void global_lock();
static void global_unlock();
static bool spawn();
static void execute_resize_scratch( ThreadsExec & , const void * );
static void execute_sleep( ThreadsExec & , const void * );
ThreadsExec( const ThreadsExec & );
ThreadsExec & operator = ( const ThreadsExec & );
static void execute_serial( void (*)( ThreadsExec & , const void * ) );
public:
KOKKOS_INLINE_FUNCTION int pool_size() const { return m_pool_size ; }
KOKKOS_INLINE_FUNCTION int pool_rank() const { return m_pool_rank ; }
KOKKOS_INLINE_FUNCTION int numa_rank() const { return m_numa_rank ; }
KOKKOS_INLINE_FUNCTION int numa_core_rank() const { return m_numa_core_rank ; }
static int get_thread_count();
static ThreadsExec * get_thread( const int init_thread_rank );
+#if ! defined( KOKKOS_USING_EXPERIMENTAL_VIEW )
+
inline void * reduce_memory() const { return reinterpret_cast<unsigned char *>(m_scratch.alloc_ptr()); }
KOKKOS_INLINE_FUNCTION void * scratch_memory() const { return reinterpret_cast<unsigned char *>(m_scratch.alloc_ptr()) + m_scratch_reduce_end ; }
+#else
+
+ inline void * reduce_memory() const { return m_scratch ; }
+ KOKKOS_INLINE_FUNCTION void * scratch_memory() const
+ { return reinterpret_cast<unsigned char *>(m_scratch) + m_scratch_reduce_end ; }
+
+#endif
+
KOKKOS_INLINE_FUNCTION int volatile & state() { return m_pool_state ; }
KOKKOS_INLINE_FUNCTION ThreadsExec * const * pool_base() const { return m_pool_base ; }
static void driver(void);
~ThreadsExec();
ThreadsExec();
static void * resize_scratch( size_t reduce_size , size_t thread_size );
static void * root_reduce_scratch();
static bool is_process();
static void verify_is_process( const std::string & , const bool initialized );
static int is_initialized();
static void initialize( unsigned thread_count ,
unsigned use_numa_count ,
unsigned use_cores_per_numa ,
bool allow_asynchronous_threadpool );
static void finalize();
/* Given a requested team size, return valid team size */
static unsigned team_size_valid( unsigned );
static void print_configuration( std::ostream & , const bool detail = false );
//------------------------------------
static void wait_yield( volatile int & , const int );
//------------------------------------
// All-thread functions:
inline
int all_reduce( const int value )
{
// Make sure there is enough scratch space:
const int rev_rank = m_pool_size - ( m_pool_rank + 1 );
*((volatile int*) reduce_memory()) = value ;
memory_fence();
// Fan-in reduction with highest ranking thread as the root
for ( int i = 0 ; i < m_pool_fan_size ; ++i ) {
// Wait: Active -> Rendezvous
Impl::spinwait( m_pool_base[ rev_rank + (1<<i) ]->m_pool_state , ThreadsExec::Active );
}
if ( rev_rank ) {
m_pool_state = ThreadsExec::Rendezvous ;
// Wait: Rendezvous -> Active
Impl::spinwait( m_pool_state , ThreadsExec::Rendezvous );
}
else {
// Root thread does the reduction and broadcast
int accum = 0 ;
for ( int rank = 0 ; rank < m_pool_size ; ++rank ) {
accum += *((volatile int *) get_thread( rank )->reduce_memory());
}
for ( int rank = 0 ; rank < m_pool_size ; ++rank ) {
*((volatile int *) get_thread( rank )->reduce_memory()) = accum ;
}
memory_fence();
for ( int rank = 0 ; rank < m_pool_size ; ++rank ) {
get_thread( rank )->m_pool_state = ThreadsExec::Active ;
}
}
return *((volatile int*) reduce_memory());
}
//------------------------------------
// All-thread functions:
template< class FunctorType , class ArgTag >
inline
void fan_in_reduce( const FunctorType & f ) const
{
typedef Kokkos::Impl::FunctorValueJoin< FunctorType , ArgTag > Join ;
typedef Kokkos::Impl::FunctorFinal< FunctorType , ArgTag > Final ;
const int rev_rank = m_pool_size - ( m_pool_rank + 1 );
for ( int i = 0 ; i < m_pool_fan_size ; ++i ) {
ThreadsExec & fan = *m_pool_base[ rev_rank + ( 1 << i ) ] ;
Impl::spinwait( fan.m_pool_state , ThreadsExec::Active );
Join::join( f , reduce_memory() , fan.reduce_memory() );
}
if ( ! rev_rank ) {
Final::final( f , reduce_memory() );
}
}
inline
void fan_in() const
{
const int rev_rank = m_pool_size - ( m_pool_rank + 1 );
for ( int i = 0 ; i < m_pool_fan_size ; ++i ) {
Impl::spinwait( m_pool_base[rev_rank+(1<<i)]->m_pool_state , ThreadsExec::Active );
}
}
template< class FunctorType , class ArgTag >
inline
void scan_large( const FunctorType & f )
{
// Sequence of states:
// 0) Active : entry and exit state
// 1) ReductionAvailable : reduction value available
// 2) ScanAvailable : inclusive scan value available
// 3) Rendezvous : All threads inclusive scan value are available
// 4) ScanCompleted : exclusive scan value copied
typedef Kokkos::Impl::FunctorValueTraits< FunctorType , ArgTag > Traits ;
typedef Kokkos::Impl::FunctorValueJoin< FunctorType , ArgTag > Join ;
typedef Kokkos::Impl::FunctorValueInit< FunctorType , ArgTag > Init ;
typedef typename Traits::value_type scalar_type ;
const int rev_rank = m_pool_size - ( m_pool_rank + 1 );
const unsigned count = Traits::value_count( f );
scalar_type * const work_value = (scalar_type *) reduce_memory();
//--------------------------------
// Fan-in reduction with highest ranking thread as the root
for ( int i = 0 ; i < m_pool_fan_size ; ++i ) {
ThreadsExec & fan = *m_pool_base[ rev_rank + (1<<i) ];
// Wait: Active -> ReductionAvailable (or ScanAvailable)
Impl::spinwait( fan.m_pool_state , ThreadsExec::Active );
Join::join( f , work_value , fan.reduce_memory() );
}
// Copy reduction value to scan value before releasing from this phase.
for ( unsigned i = 0 ; i < count ; ++i ) { work_value[i+count] = work_value[i] ; }
if ( rev_rank ) {
// Set: Active -> ReductionAvailable
m_pool_state = ThreadsExec::ReductionAvailable ;
// Wait for contributing threads' scan value to be available.
if ( ( 1 << m_pool_fan_size ) < ( m_pool_rank + 1 ) ) {
ThreadsExec & th = *m_pool_base[ rev_rank + ( 1 << m_pool_fan_size ) ] ;
// Wait: Active -> ReductionAvailable
// Wait: ReductionAvailable -> ScanAvailable
Impl::spinwait( th.m_pool_state , ThreadsExec::Active );
Impl::spinwait( th.m_pool_state , ThreadsExec::ReductionAvailable );
Join::join( f , work_value + count , ((scalar_type *)th.reduce_memory()) + count );
}
// This thread has completed inclusive scan
// Set: ReductionAvailable -> ScanAvailable
m_pool_state = ThreadsExec::ScanAvailable ;
// Wait for all threads to complete inclusive scan
// Wait: ScanAvailable -> Rendezvous
Impl::spinwait( m_pool_state , ThreadsExec::ScanAvailable );
}
//--------------------------------
for ( int i = 0 ; i < m_pool_fan_size ; ++i ) {
ThreadsExec & fan = *m_pool_base[ rev_rank + (1<<i) ];
// Wait: ReductionAvailable -> ScanAvailable
Impl::spinwait( fan.m_pool_state , ThreadsExec::ReductionAvailable );
// Set: ScanAvailable -> Rendezvous
fan.m_pool_state = ThreadsExec::Rendezvous ;
}
// All threads have completed the inclusive scan.
// All non-root threads are in the Rendezvous state.
// Threads are free to overwrite their reduction value.
//--------------------------------
if ( ( rev_rank + 1 ) < m_pool_size ) {
// Exclusive scan: copy the previous thread's inclusive scan value
ThreadsExec & th = *m_pool_base[ rev_rank + 1 ] ; // Not the root thread
const scalar_type * const src_value = ((scalar_type *)th.reduce_memory()) + count ;
for ( unsigned j = 0 ; j < count ; ++j ) { work_value[j] = src_value[j]; }
}
else {
(void) Init::init( f , work_value );
}
//--------------------------------
// Wait for all threads to copy previous thread's inclusive scan value
// Wait for all threads: Rendezvous -> ScanCompleted
for ( int i = 0 ; i < m_pool_fan_size ; ++i ) {
Impl::spinwait( m_pool_base[ rev_rank + (1<<i) ]->m_pool_state , ThreadsExec::Rendezvous );
}
if ( rev_rank ) {
// Set: ScanAvailable -> ScanCompleted
m_pool_state = ThreadsExec::ScanCompleted ;
// Wait: ScanCompleted -> Active
Impl::spinwait( m_pool_state , ThreadsExec::ScanCompleted );
}
// Set: ScanCompleted -> Active
for ( int i = 0 ; i < m_pool_fan_size ; ++i ) {
m_pool_base[ rev_rank + (1<<i) ]->m_pool_state = ThreadsExec::Active ;
}
}
template< class FunctorType , class ArgTag >
inline
void scan_small( const FunctorType & f )
{
typedef Kokkos::Impl::FunctorValueTraits< FunctorType , ArgTag > Traits ;
typedef Kokkos::Impl::FunctorValueJoin< FunctorType , ArgTag > Join ;
typedef Kokkos::Impl::FunctorValueInit< FunctorType , ArgTag > Init ;
typedef typename Traits::value_type scalar_type ;
const int rev_rank = m_pool_size - ( m_pool_rank + 1 );
const unsigned count = Traits::value_count( f );
scalar_type * const work_value = (scalar_type *) reduce_memory();
//--------------------------------
// Fan-in reduction with highest ranking thread as the root
for ( int i = 0 ; i < m_pool_fan_size ; ++i ) {
// Wait: Active -> Rendezvous
Impl::spinwait( m_pool_base[ rev_rank + (1<<i) ]->m_pool_state , ThreadsExec::Active );
}
for ( unsigned i = 0 ; i < count ; ++i ) { work_value[i+count] = work_value[i]; }
if ( rev_rank ) {
m_pool_state = ThreadsExec::Rendezvous ;
// Wait: Rendezvous -> Active
Impl::spinwait( m_pool_state , ThreadsExec::Rendezvous );
}
else {
// Root thread does the thread-scan before releasing threads
scalar_type * ptr_prev = 0 ;
for ( int rank = 0 ; rank < m_pool_size ; ++rank ) {
scalar_type * const ptr = (scalar_type *) get_thread( rank )->reduce_memory();
if ( rank ) {
for ( unsigned i = 0 ; i < count ; ++i ) { ptr[i] = ptr_prev[ i + count ]; }
Join::join( f , ptr + count , ptr );
}
else {
(void) Init::init( f , ptr );
}
ptr_prev = ptr ;
}
}
for ( int i = 0 ; i < m_pool_fan_size ; ++i ) {
m_pool_base[ rev_rank + (1<<i) ]->m_pool_state = ThreadsExec::Active ;
}
}
//------------------------------------
/** \brief Wait for previous asynchronous functor to
* complete and release the Threads device.
* Acquire the Threads device and start this functor.
*/
static void start( void (*)( ThreadsExec & , const void * ) , const void * );
static int in_parallel();
static void fence();
static bool sleep();
static bool wake();
};
} /* namespace Impl */
} /* namespace Kokkos */
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
inline int Threads::in_parallel()
{ return Impl::ThreadsExec::in_parallel(); }
inline int Threads::is_initialized()
{ return Impl::ThreadsExec::is_initialized(); }
inline void Threads::initialize(
unsigned threads_count ,
unsigned use_numa_count ,
unsigned use_cores_per_numa ,
bool allow_asynchronous_threadpool )
{
Impl::ThreadsExec::initialize( threads_count , use_numa_count , use_cores_per_numa , allow_asynchronous_threadpool );
}
inline void Threads::finalize()
{
Impl::ThreadsExec::finalize();
}
inline void Threads::print_configuration( std::ostream & s , const bool detail )
{
Impl::ThreadsExec::print_configuration( s , detail );
}
inline bool Threads::sleep()
{ return Impl::ThreadsExec::sleep() ; }
inline bool Threads::wake()
{ return Impl::ThreadsExec::wake() ; }
inline void Threads::fence()
{ Impl::ThreadsExec::fence() ; }
} /* namespace Kokkos */
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#endif /* #define KOKKOS_THREADSEXEC_HPP */
diff --git a/lib/kokkos/core/src/Threads/Kokkos_ThreadsExec_base.cpp b/lib/kokkos/core/src/Threads/Kokkos_ThreadsExec_base.cpp
index 40d5efd0f..ce0924867 100644
--- a/lib/kokkos/core/src/Threads/Kokkos_ThreadsExec_base.cpp
+++ b/lib/kokkos/core/src/Threads/Kokkos_ThreadsExec_base.cpp
@@ -1,254 +1,255 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <Kokkos_Core_fwd.hpp>
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#if defined( KOKKOS_HAVE_PTHREAD )
/* Standard 'C' Linux libraries */
#include <pthread.h>
#include <sched.h>
#include <errno.h>
/* Standard C++ libaries */
#include <cstdlib>
#include <string>
#include <iostream>
#include <stdexcept>
#include <Kokkos_Threads.hpp>
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
namespace {
pthread_mutex_t host_internal_pthread_mutex = PTHREAD_MUTEX_INITIALIZER ;
// Pthreads compatible driver.
// Recovery from an exception would require constant intra-thread health
// verification; which would negatively impact runtime. As such simply
// abort the process.
void * internal_pthread_driver( void * )
{
try {
ThreadsExec::driver();
}
catch( const std::exception & x ) {
std::cerr << "Exception thrown from worker thread: " << x.what() << std::endl ;
std::cerr.flush();
std::abort();
}
catch( ... ) {
std::cerr << "Exception thrown from worker thread" << std::endl ;
std::cerr.flush();
std::abort();
}
return NULL ;
}
} // namespace
//----------------------------------------------------------------------------
// Spawn a thread
bool ThreadsExec::spawn()
{
bool result = false ;
pthread_attr_t attr ;
if ( 0 == pthread_attr_init( & attr ) ||
0 == pthread_attr_setscope( & attr, PTHREAD_SCOPE_SYSTEM ) ||
0 == pthread_attr_setdetachstate( & attr, PTHREAD_CREATE_DETACHED ) ) {
pthread_t pt ;
result = 0 == pthread_create( & pt, & attr, internal_pthread_driver, 0 );
}
pthread_attr_destroy( & attr );
return result ;
}
//----------------------------------------------------------------------------
bool ThreadsExec::is_process()
{
static const pthread_t master_pid = pthread_self();
return pthread_equal( master_pid , pthread_self() );
}
void ThreadsExec::global_lock()
{
pthread_mutex_lock( & host_internal_pthread_mutex );
}
void ThreadsExec::global_unlock()
{
pthread_mutex_unlock( & host_internal_pthread_mutex );
}
//----------------------------------------------------------------------------
void ThreadsExec::wait_yield( volatile int & flag , const int value )
{
while ( value == flag ) { sched_yield(); }
}
} // namespace Impl
} // namespace Kokkos
/* end #if defined( KOKKOS_HAVE_PTHREAD ) */
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#elif defined( KOKKOS_HAVE_WINTHREAD )
/* Windows libraries */
+#include <winsock2.h>
#include <windows.h>
#include <process.h>
/* Standard C++ libaries */
#include <cstdlib>
#include <string>
#include <iostream>
#include <stdexcept>
#include <Kokkos_Threads.hpp>
//----------------------------------------------------------------------------
// Driver for each created pthread
namespace Kokkos {
namespace Impl {
namespace {
unsigned WINAPI internal_winthread_driver( void * arg )
{
ThreadsExec::driver();
return 0 ;
}
class ThreadLockWindows {
private:
CRITICAL_SECTION m_handle ;
~ThreadLockWindows()
{ DeleteCriticalSection( & m_handle ); }
ThreadLockWindows();
{ InitializeCriticalSection( & m_handle ); }
ThreadLockWindows( const ThreadLockWindows & );
ThreadLockWindows & operator = ( const ThreadLockWindows & );
public:
static ThreadLockWindows & singleton();
void lock()
{ EnterCriticalSection( & m_handle ); }
void unlock()
{ LeaveCriticalSection( & m_handle ); }
};
ThreadLockWindows & ThreadLockWindows::singleton()
{ static ThreadLockWindows self ; return self ; }
} // namespace <>
} // namespace Kokkos
} // namespace Impl
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
// Spawn this thread
bool ThreadsExec::spawn()
{
unsigned Win32ThreadID = 0 ;
HANDLE handle =
_beginthreadex(0,0,internal_winthread_driver,0,0, & Win32ThreadID );
return ! handle ;
}
bool ThreadsExec::is_process() { return true ; }
void ThreadsExec::global_lock()
{ ThreadLockWindows::singleton().lock(); }
void ThreadsExec::global_unlock()
{ ThreadLockWindows::singleton().unlock(); }
void ThreadsExec::wait_yield( volatile int & flag , const int value ) {}
{
while ( value == flag ) { Sleep(0); }
}
} // namespace Impl
} // namespace Kokkos
#endif /* end #elif defined( KOKKOS_HAVE_WINTHREAD ) */
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
diff --git a/lib/kokkos/core/src/Threads/Kokkos_ThreadsTeam.hpp b/lib/kokkos/core/src/Threads/Kokkos_ThreadsTeam.hpp
index 53b5eb01d..b69d72d78 100644
--- a/lib/kokkos/core/src/Threads/Kokkos_ThreadsTeam.hpp
+++ b/lib/kokkos/core/src/Threads/Kokkos_ThreadsTeam.hpp
@@ -1,730 +1,781 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_THREADSTEAM_HPP
#define KOKKOS_THREADSTEAM_HPP
#include <stdio.h>
#include <utility>
#include <impl/Kokkos_spinwait.hpp>
#include <impl/Kokkos_FunctorAdapter.hpp>
#include <Kokkos_Atomic.hpp>
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
//----------------------------------------------------------------------------
template< class > struct ThreadsExecAdapter ;
//----------------------------------------------------------------------------
class ThreadsExecTeamMember {
private:
enum { TEAM_REDUCE_SIZE = 512 };
typedef Kokkos::Threads execution_space ;
typedef execution_space::scratch_memory_space space ;
ThreadsExec * const m_exec ;
ThreadsExec * const * m_team_base ; ///< Base for team fan-in
space m_team_shared ;
int m_team_shared_size ;
int m_team_size ;
int m_team_rank ;
int m_team_rank_rev ;
int m_league_size ;
int m_league_end ;
int m_league_rank ;
inline
void set_team_shared()
{ new( & m_team_shared ) space( ((char *) (*m_team_base)->scratch_memory()) + TEAM_REDUCE_SIZE , m_team_shared_size ); }
public:
// Fan-in and wait until the matching fan-out is called.
// The root thread which does not wait will return true.
// All other threads will return false during the fan-out.
KOKKOS_INLINE_FUNCTION bool team_fan_in() const
{
int n , j ;
// Wait for fan-in threads
for ( n = 1 ; ( ! ( m_team_rank_rev & n ) ) && ( ( j = m_team_rank_rev + n ) < m_team_size ) ; n <<= 1 ) {
Impl::spinwait( m_team_base[j]->state() , ThreadsExec::Active );
}
// If not root then wait for release
if ( m_team_rank_rev ) {
m_exec->state() = ThreadsExec::Rendezvous ;
Impl::spinwait( m_exec->state() , ThreadsExec::Rendezvous );
}
return ! m_team_rank_rev ;
}
KOKKOS_INLINE_FUNCTION void team_fan_out() const
{
int n , j ;
for ( n = 1 ; ( ! ( m_team_rank_rev & n ) ) && ( ( j = m_team_rank_rev + n ) < m_team_size ) ; n <<= 1 ) {
m_team_base[j]->state() = ThreadsExec::Active ;
}
}
public:
KOKKOS_INLINE_FUNCTION static int team_reduce_size() { return TEAM_REDUCE_SIZE ; }
KOKKOS_INLINE_FUNCTION
const execution_space::scratch_memory_space & team_shmem() const
{ return m_team_shared ; }
KOKKOS_INLINE_FUNCTION int league_rank() const { return m_league_rank ; }
KOKKOS_INLINE_FUNCTION int league_size() const { return m_league_size ; }
KOKKOS_INLINE_FUNCTION int team_rank() const { return m_team_rank ; }
KOKKOS_INLINE_FUNCTION int team_size() const { return m_team_size ; }
KOKKOS_INLINE_FUNCTION void team_barrier() const
{
team_fan_in();
team_fan_out();
}
template<class ValueType>
KOKKOS_INLINE_FUNCTION
void team_broadcast(ValueType& value, const int& thread_id) const
{
#if ! defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
{ }
#else
// Make sure there is enough scratch space:
typedef typename if_c< sizeof(ValueType) < TEAM_REDUCE_SIZE
, ValueType , void >::type type ;
if ( m_team_base ) {
type * const local_value = ((type*) m_team_base[0]->scratch_memory());
if(team_rank() == thread_id) *local_value = value;
memory_fence();
team_barrier();
value = *local_value;
}
#endif
}
template< typename Type >
KOKKOS_INLINE_FUNCTION Type team_reduce( const Type & value ) const
#if ! defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
{ return Type(); }
#else
{
// Make sure there is enough scratch space:
typedef typename if_c< sizeof(Type) < TEAM_REDUCE_SIZE , Type , void >::type type ;
if ( 0 == m_exec ) return value ;
*((volatile type*) m_exec->scratch_memory() ) = value ;
memory_fence();
type & accum = *((type *) m_team_base[0]->scratch_memory() );
if ( team_fan_in() ) {
for ( int i = 1 ; i < m_team_size ; ++i ) {
accum += *((type *) m_team_base[i]->scratch_memory() );
}
memory_fence();
}
team_fan_out();
return accum ;
}
#endif
#ifdef KOKKOS_HAVE_CXX11
template< class ValueType, class JoinOp >
KOKKOS_INLINE_FUNCTION ValueType
team_reduce( const ValueType & value
, const JoinOp & op_in ) const
#if ! defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
{ return ValueType(); }
#else
{
typedef ValueType value_type;
const JoinLambdaAdapter<value_type,JoinOp> op(op_in);
#endif
#else // KOKKOS_HAVE_CXX11
template< class JoinOp >
KOKKOS_INLINE_FUNCTION typename JoinOp::value_type
team_reduce( const typename JoinOp::value_type & value
, const JoinOp & op ) const
#if ! defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
{ return typename JoinOp::value_type(); }
#else
{
typedef typename JoinOp::value_type value_type;
#endif
#endif // KOKKOS_HAVE_CXX11
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
// Make sure there is enough scratch space:
typedef typename if_c< sizeof(value_type) < TEAM_REDUCE_SIZE
, value_type , void >::type type ;
if ( 0 == m_exec ) return value ;
type * const local_value = ((type*) m_exec->scratch_memory());
// Set this thread's contribution
*local_value = value ;
// Fence to make sure the base team member has access:
memory_fence();
if ( team_fan_in() ) {
// The last thread to synchronize returns true, all other threads wait for team_fan_out()
type * const team_value = ((type*) m_team_base[0]->scratch_memory());
// Join to the team value:
for ( int i = 1 ; i < m_team_size ; ++i ) {
op.join( *team_value , *((type*) m_team_base[i]->scratch_memory()) );
}
// Team base thread may "lap" member threads so copy out to their local value.
for ( int i = 1 ; i < m_team_size ; ++i ) {
*((type*) m_team_base[i]->scratch_memory()) = *team_value ;
}
// Fence to make sure all team members have access
memory_fence();
}
team_fan_out();
// Value was changed by the team base
return *((type volatile const *) local_value);
}
#endif
/** \brief Intra-team exclusive prefix sum with team_rank() ordering
* with intra-team non-deterministic ordering accumulation.
*
* The global inter-team accumulation value will, at the end of the
* league's parallel execution, be the scan's total.
* Parallel execution ordering of the league's teams is non-deterministic.
* As such the base value for each team's scan operation is similarly
* non-deterministic.
*/
template< typename ArgType >
KOKKOS_INLINE_FUNCTION ArgType team_scan( const ArgType & value , ArgType * const global_accum ) const
#if ! defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
{ return ArgType(); }
#else
{
// Make sure there is enough scratch space:
typedef typename if_c< sizeof(ArgType) < TEAM_REDUCE_SIZE , ArgType , void >::type type ;
if ( 0 == m_exec ) return type(0);
volatile type * const work_value = ((type*) m_exec->scratch_memory());
*work_value = value ;
memory_fence();
if ( team_fan_in() ) {
// The last thread to synchronize returns true, all other threads wait for team_fan_out()
// m_team_base[0] == highest ranking team member
// m_team_base[ m_team_size - 1 ] == lowest ranking team member
//
// 1) copy from lower to higher rank, initialize lowest rank to zero
// 2) prefix sum from lowest to highest rank, skipping lowest rank
type accum = 0 ;
if ( global_accum ) {
for ( int i = m_team_size ; i-- ; ) {
type & val = *((type*) m_team_base[i]->scratch_memory());
accum += val ;
}
accum = atomic_fetch_add( global_accum , accum );
}
for ( int i = m_team_size ; i-- ; ) {
type & val = *((type*) m_team_base[i]->scratch_memory());
const type offset = accum ;
accum += val ;
val = offset ;
}
memory_fence();
}
team_fan_out();
return *work_value ;
}
#endif
/** \brief Intra-team exclusive prefix sum with team_rank() ordering.
*
* The highest rank thread can compute the reduction total as
* reduction_total = dev.team_scan( value ) + value ;
*/
template< typename ArgType >
KOKKOS_INLINE_FUNCTION ArgType team_scan( const ArgType & value ) const
{ return this-> template team_scan<ArgType>( value , 0 ); }
//----------------------------------------
// Private for the driver
template< class Arg0 , class Arg1 >
ThreadsExecTeamMember( Impl::ThreadsExec * exec
, const TeamPolicy< Arg0 , Arg1 , Kokkos::Threads > & team
, const int shared_size )
: m_exec( exec )
, m_team_base(0)
, m_team_shared(0,0)
, m_team_shared_size( shared_size )
, m_team_size(0)
, m_team_rank(0)
, m_team_rank_rev(0)
, m_league_size(0)
, m_league_end(0)
, m_league_rank(0)
{
if ( team.league_size() ) {
// Execution is using device-team interface:
const int pool_rank_rev = m_exec->pool_size() - ( m_exec->pool_rank() + 1 );
const int team_rank_rev = pool_rank_rev % team.team_alloc();
// May be using fewer threads per team than a multiple of threads per core,
// some threads will idle.
if ( team_rank_rev < team.team_size() ) {
const size_t pool_league_size = m_exec->pool_size() / team.team_alloc() ;
const size_t pool_league_rank_rev = pool_rank_rev / team.team_alloc() ;
const size_t pool_league_rank = pool_league_size - ( pool_league_rank_rev + 1 );
m_team_base = m_exec->pool_base() + team.team_alloc() * pool_league_rank_rev ;
m_team_size = team.team_size() ;
m_team_rank = team.team_size() - ( team_rank_rev + 1 );
m_team_rank_rev = team_rank_rev ;
m_league_size = team.league_size();
m_league_rank = ( team.league_size() * pool_league_rank ) / pool_league_size ;
m_league_end = ( team.league_size() * (pool_league_rank+1) ) / pool_league_size ;
set_team_shared();
}
}
}
ThreadsExecTeamMember()
: m_exec(0)
, m_team_base(0)
, m_team_shared(0,0)
, m_team_shared_size(0)
, m_team_size(1)
, m_team_rank(0)
, m_team_rank_rev(0)
, m_league_size(1)
, m_league_end(0)
, m_league_rank(0)
{}
inline
ThreadsExec & threads_exec_team_base() const { return m_team_base ? **m_team_base : *m_exec ; }
bool valid() const
{ return m_league_rank < m_league_end ; }
void next()
{
if ( ++m_league_rank < m_league_end ) {
team_barrier();
set_team_shared();
}
}
void set_league_shmem( const int arg_league_rank
, const int arg_league_size
, const int arg_shmem_size
)
{
m_league_rank = arg_league_rank ;
m_league_size = arg_league_size ;
m_team_shared_size = arg_shmem_size ;
set_team_shared();
}
};
} /* namespace Impl */
} /* namespace Kokkos */
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
template< class Arg0 , class Arg1 >
class TeamPolicy< Arg0 , Arg1 , Kokkos::Threads >
{
private:
int m_league_size ;
int m_team_size ;
int m_team_alloc ;
+ size_t m_scratch_size;
+
inline
void init( const int league_size_request
, const int team_size_request )
{
const int pool_size = execution_space::thread_pool_size(0);
const int team_max = execution_space::thread_pool_size(1);
const int team_grain = execution_space::thread_pool_size(2);
m_league_size = league_size_request ;
m_team_size = team_size_request < team_max ?
team_size_request : team_max ;
// Round team size up to a multiple of 'team_gain'
const int team_size_grain = team_grain * ( ( m_team_size + team_grain - 1 ) / team_grain );
const int team_count = pool_size / team_size_grain ;
// Constraint : pool_size = m_team_alloc * team_count
m_team_alloc = pool_size / team_count ;
}
public:
//! Tag this class as a kokkos execution policy
typedef TeamPolicy execution_policy ;
typedef Kokkos::Threads execution_space ;
typedef typename
Impl::if_c< ! Impl::is_same< Kokkos::Threads , Arg0 >::value , Arg0 , Arg1 >::type
work_tag ;
//----------------------------------------
template< class FunctorType >
inline static
int team_size_max( const FunctorType & )
{ return execution_space::thread_pool_size(1); }
template< class FunctorType >
static int team_size_recommended( const FunctorType & )
{ return execution_space::thread_pool_size(2); }
template< class FunctorType >
inline static
int team_size_recommended( const FunctorType &, const int& )
{ return execution_space::thread_pool_size(2); }
//----------------------------------------
inline int team_size() const { return m_team_size ; }
inline int team_alloc() const { return m_team_alloc ; }
inline int league_size() const { return m_league_size ; }
+ inline size_t scratch_size() const { return m_scratch_size ; }
/** \brief Specify league size, request team size */
- TeamPolicy( execution_space & , int league_size_request , int team_size_request , int vector_length_request = 1 )
+ TeamPolicy( execution_space &
+ , int league_size_request
+ , int team_size_request
+ , int vector_length_request = 1 )
: m_league_size(0)
, m_team_size(0)
, m_team_alloc(0)
+ , m_scratch_size ( 0 )
{ init(league_size_request,team_size_request); (void) vector_length_request; }
- TeamPolicy( int league_size_request , int team_size_request , int vector_length_request = 1 )
+ /** \brief Specify league size, request team size */
+ TeamPolicy( execution_space &
+ , int league_size_request
+ , const Kokkos::AUTO_t & /* team_size_request */
+ , int /* vector_length_request */ = 1 )
: m_league_size(0)
, m_team_size(0)
, m_team_alloc(0)
- { init(league_size_request,team_size_request); (void) vector_length_request; }
+ , m_scratch_size ( 0 )
+ { init(league_size_request,execution_space::thread_pool_size(2)); }
+
+ TeamPolicy( int league_size_request
+ , int team_size_request
+ , int /* vector_length_request */ = 1 )
+ : m_league_size(0)
+ , m_team_size(0)
+ , m_team_alloc(0)
+ , m_scratch_size ( 0 )
+ { init(league_size_request,team_size_request); }
+
+ TeamPolicy( int league_size_request
+ , const Kokkos::AUTO_t & /* team_size_request */
+ , int /* vector_length_request */ = 1 )
+ : m_league_size(0)
+ , m_team_size(0)
+ , m_team_alloc(0)
+ , m_scratch_size ( 0 )
+ { init(league_size_request,execution_space::thread_pool_size(2)); }
+
+ template<class MemorySpace>
+ TeamPolicy( int league_size_request
+ , int team_size_request
+ , const Experimental::TeamScratchRequest<MemorySpace> & scratch_request )
+ : m_league_size(0)
+ , m_team_size(0)
+ , m_team_alloc(0)
+ , m_scratch_size(scratch_request.total(team_size_request))
+ { init(league_size_request,team_size_request); }
+
+
+ template<class MemorySpace>
+ TeamPolicy( int league_size_request
+ , const Kokkos::AUTO_t & /* team_size_request */
+ , const Experimental::TeamScratchRequest<MemorySpace> & scratch_request )
+ : m_league_size(0)
+ , m_team_size(0)
+ , m_team_alloc(0)
+ , m_scratch_size(scratch_request.total(execution_space::thread_pool_size(2)))
+ { init(league_size_request,execution_space::thread_pool_size(2)); }
typedef Impl::ThreadsExecTeamMember member_type ;
friend class Impl::ThreadsExecTeamMember ;
};
} /* namespace Kokkos */
namespace Kokkos {
template<typename iType>
KOKKOS_INLINE_FUNCTION
Impl::TeamThreadRangeBoundariesStruct<iType,Impl::ThreadsExecTeamMember>
TeamThreadRange(const Impl::ThreadsExecTeamMember& thread, const iType& count)
{
return Impl::TeamThreadRangeBoundariesStruct<iType,Impl::ThreadsExecTeamMember>(thread,count);
}
template<typename iType>
KOKKOS_INLINE_FUNCTION
Impl::TeamThreadRangeBoundariesStruct<iType,Impl::ThreadsExecTeamMember>
TeamThreadRange( const Impl::ThreadsExecTeamMember& thread
, const iType & begin
, const iType & end
)
{
return Impl::TeamThreadRangeBoundariesStruct<iType,Impl::ThreadsExecTeamMember>(thread,begin,end);
}
template<typename iType>
KOKKOS_INLINE_FUNCTION
Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::ThreadsExecTeamMember >
ThreadVectorRange(const Impl::ThreadsExecTeamMember& thread, const iType& count) {
return Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::ThreadsExecTeamMember >(thread,count);
}
KOKKOS_INLINE_FUNCTION
Impl::ThreadSingleStruct<Impl::ThreadsExecTeamMember> PerTeam(const Impl::ThreadsExecTeamMember& thread) {
return Impl::ThreadSingleStruct<Impl::ThreadsExecTeamMember>(thread);
}
KOKKOS_INLINE_FUNCTION
Impl::VectorSingleStruct<Impl::ThreadsExecTeamMember> PerThread(const Impl::ThreadsExecTeamMember& thread) {
return Impl::VectorSingleStruct<Impl::ThreadsExecTeamMember>(thread);
}
} // namespace Kokkos
namespace Kokkos {
/** \brief Inter-thread parallel_for. Executes lambda(iType i) for each i=0..N-1.
*
* The range i=0..N-1 is mapped to all threads of the the calling thread team.
* This functionality requires C++11 support.*/
template<typename iType, class Lambda>
KOKKOS_INLINE_FUNCTION
void parallel_for(const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::ThreadsExecTeamMember>& loop_boundaries, const Lambda& lambda) {
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment)
lambda(i);
}
/** \brief Inter-thread vector parallel_reduce. Executes lambda(iType i, ValueType & val) for each i=0..N-1.
*
* The range i=0..N-1 is mapped to all threads of the the calling thread team and a summation of
* val is performed and put into result. This functionality requires C++11 support.*/
template< typename iType, class Lambda, typename ValueType >
KOKKOS_INLINE_FUNCTION
void parallel_reduce(const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::ThreadsExecTeamMember>& loop_boundaries,
const Lambda & lambda, ValueType& result) {
result = ValueType();
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) {
ValueType tmp = ValueType();
lambda(i,tmp);
result+=tmp;
}
result = loop_boundaries.thread.team_reduce(result,Impl::JoinAdd<ValueType>());
}
#if defined( KOKKOS_HAVE_CXX11 )
/** \brief Intra-thread vector parallel_reduce. Executes lambda(iType i, ValueType & val) for each i=0..N-1.
*
* The range i=0..N-1 is mapped to all vector lanes of the the calling thread and a reduction of
* val is performed using JoinType(ValueType& val, const ValueType& update) and put into init_result.
* The input value of init_result is used as initializer for temporary variables of ValueType. Therefore
* the input value should be the neutral element with respect to the join operation (e.g. '0 for +-' or
* '1 for *'). This functionality requires C++11 support.*/
template< typename iType, class Lambda, typename ValueType, class JoinType >
KOKKOS_INLINE_FUNCTION
void parallel_reduce(const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::ThreadsExecTeamMember>& loop_boundaries,
const Lambda & lambda, const JoinType& join, ValueType& init_result) {
ValueType result = init_result;
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) {
ValueType tmp = ValueType();
lambda(i,tmp);
join(result,tmp);
}
init_result = loop_boundaries.thread.team_reduce(result,Impl::JoinLambdaAdapter<ValueType,JoinType>(join));
}
#endif /* #if defined( KOKKOS_HAVE_CXX11 ) */
} //namespace Kokkos
namespace Kokkos {
/** \brief Intra-thread vector parallel_for. Executes lambda(iType i) for each i=0..N-1.
*
* The range i=0..N-1 is mapped to all vector lanes of the the calling thread.
* This functionality requires C++11 support.*/
template<typename iType, class Lambda>
KOKKOS_INLINE_FUNCTION
void parallel_for(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::ThreadsExecTeamMember >&
loop_boundaries, const Lambda& lambda) {
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
#pragma ivdep
#endif
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment)
lambda(i);
}
/** \brief Intra-thread vector parallel_reduce. Executes lambda(iType i, ValueType & val) for each i=0..N-1.
*
* The range i=0..N-1 is mapped to all vector lanes of the the calling thread and a summation of
* val is performed and put into result. This functionality requires C++11 support.*/
template< typename iType, class Lambda, typename ValueType >
KOKKOS_INLINE_FUNCTION
void parallel_reduce(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::ThreadsExecTeamMember >&
loop_boundaries, const Lambda & lambda, ValueType& result) {
result = ValueType();
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
#pragma ivdep
#endif
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) {
ValueType tmp = ValueType();
lambda(i,tmp);
result+=tmp;
}
}
/** \brief Intra-thread vector parallel_reduce. Executes lambda(iType i, ValueType & val) for each i=0..N-1.
*
* The range i=0..N-1 is mapped to all vector lanes of the the calling thread and a reduction of
* val is performed using JoinType(ValueType& val, const ValueType& update) and put into init_result.
* The input value of init_result is used as initializer for temporary variables of ValueType. Therefore
* the input value should be the neutral element with respect to the join operation (e.g. '0 for +-' or
* '1 for *'). This functionality requires C++11 support.*/
template< typename iType, class Lambda, typename ValueType, class JoinType >
KOKKOS_INLINE_FUNCTION
void parallel_reduce(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::ThreadsExecTeamMember >&
loop_boundaries, const Lambda & lambda, const JoinType& join, ValueType& init_result) {
ValueType result = init_result;
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
#pragma ivdep
#endif
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) {
ValueType tmp = ValueType();
lambda(i,tmp);
join(result,tmp);
}
init_result = result;
}
/** \brief Intra-thread vector parallel exclusive prefix sum. Executes lambda(iType i, ValueType & val, bool final)
* for each i=0..N-1.
*
* The range i=0..N-1 is mapped to all vector lanes in the thread and a scan operation is performed.
* Depending on the target execution space the operator might be called twice: once with final=false
* and once with final=true. When final==true val contains the prefix sum value. The contribution of this
* "i" needs to be added to val no matter whether final==true or not. In a serial execution
* (i.e. team_size==1) the operator is only called once with final==true. Scan_val will be set
* to the final sum value over all vector lanes.
* This functionality requires C++11 support.*/
template< typename iType, class FunctorType >
KOKKOS_INLINE_FUNCTION
void parallel_scan(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::ThreadsExecTeamMember >&
loop_boundaries, const FunctorType & lambda) {
typedef Kokkos::Impl::FunctorValueTraits< FunctorType , void > ValueTraits ;
typedef typename ValueTraits::value_type value_type ;
value_type scan_val = value_type();
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
#pragma ivdep
#endif
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) {
lambda(i,scan_val,true);
}
}
} // namespace Kokkos
namespace Kokkos {
template<class FunctorType>
KOKKOS_INLINE_FUNCTION
void single(const Impl::VectorSingleStruct<Impl::ThreadsExecTeamMember>& single_struct, const FunctorType& lambda) {
lambda();
}
template<class FunctorType>
KOKKOS_INLINE_FUNCTION
void single(const Impl::ThreadSingleStruct<Impl::ThreadsExecTeamMember>& single_struct, const FunctorType& lambda) {
if(single_struct.team_member.team_rank()==0) lambda();
}
template<class FunctorType, class ValueType>
KOKKOS_INLINE_FUNCTION
void single(const Impl::VectorSingleStruct<Impl::ThreadsExecTeamMember>& single_struct, const FunctorType& lambda, ValueType& val) {
lambda(val);
}
template<class FunctorType, class ValueType>
KOKKOS_INLINE_FUNCTION
void single(const Impl::ThreadSingleStruct<Impl::ThreadsExecTeamMember>& single_struct, const FunctorType& lambda, ValueType& val) {
if(single_struct.team_member.team_rank()==0) {
lambda(val);
}
single_struct.team_member.team_broadcast(val,0);
}
}
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#endif /* #define KOKKOS_THREADSTEAM_HPP */
diff --git a/lib/kokkos/core/src/Threads/Kokkos_Threads_Parallel.hpp b/lib/kokkos/core/src/Threads/Kokkos_Threads_Parallel.hpp
index 4b2a16912..9e3b0acd3 100644
--- a/lib/kokkos/core/src/Threads/Kokkos_Threads_Parallel.hpp
+++ b/lib/kokkos/core/src/Threads/Kokkos_Threads_Parallel.hpp
@@ -1,427 +1,505 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_THREADS_PARALLEL_HPP
#define KOKKOS_THREADS_PARALLEL_HPP
#include <vector>
+#include <iostream>
#include <Kokkos_Parallel.hpp>
#include <impl/Kokkos_StaticAssert.hpp>
#include <impl/Kokkos_FunctorAdapter.hpp>
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
+/* ParallelFor Kokkos::Threads with RangePolicy */
template< class FunctorType , class Arg0 , class Arg1 , class Arg2 >
-class ParallelFor< FunctorType , Kokkos::RangePolicy< Arg0 , Arg1 , Arg2 , Kokkos::Threads > >
+class ParallelFor< FunctorType
+ , Kokkos::RangePolicy< Arg0 , Arg1 , Arg2 , Kokkos::Threads >
+ >
{
private:
typedef Kokkos::RangePolicy< Arg0 , Arg1 , Arg2 , Kokkos::Threads > Policy ;
+ typedef typename Policy::work_tag WorkTag ;
+ typedef typename Policy::WorkRange WorkRange ;
+ typedef typename Policy::member_type Member ;
- const FunctorType m_func ;
+ const FunctorType m_functor ;
const Policy m_policy ;
- template< class PType >
- KOKKOS_FORCEINLINE_FUNCTION static
- void driver( typename Impl::enable_if<
- ( Impl::is_same< typename PType::work_tag , void >::value )
- , const FunctorType & >::type functor
- , const PType & range )
+ template< class TagType >
+ inline static
+ typename std::enable_if< std::is_same< TagType , void >::value >::type
+ exec_range( const FunctorType & functor
+ , const Member ibeg , const Member iend )
{
- const typename PType::member_type e = range.end();
- for ( typename PType::member_type i = range.begin() ; i < e ; ++i ) {
+ #if defined( KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION ) && \
+ defined( KOKKOS_HAVE_PRAGMA_IVDEP )
+ #pragma ivdep
+ #endif
+ for ( Member i = ibeg ; i < iend ; ++i ) {
functor( i );
}
}
- template< class PType >
- KOKKOS_FORCEINLINE_FUNCTION static
- void driver( typename Impl::enable_if<
- ( ! Impl::is_same< typename PType::work_tag , void >::value )
- , const FunctorType & >::type functor
- , const PType & range )
+ template< class TagType >
+ inline static
+ typename std::enable_if< ! std::is_same< TagType , void >::value >::type
+ exec_range( const FunctorType & functor
+ , const Member ibeg , const Member iend )
{
- const typename PType::member_type e = range.end();
- for ( typename PType::member_type i = range.begin() ; i < e ; ++i ) {
- functor( typename PType::work_tag() , i );
+ const TagType t{} ;
+ #if defined( KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION ) && \
+ defined( KOKKOS_HAVE_PRAGMA_IVDEP )
+ #pragma ivdep
+ #endif
+ for ( Member i = ibeg ; i < iend ; ++i ) {
+ functor( t , i );
}
}
- static void execute( ThreadsExec & exec , const void * arg )
+ static void exec( ThreadsExec & exec , const void * arg )
{
const ParallelFor & self = * ((const ParallelFor *) arg );
- driver( self.m_func , typename Policy::WorkRange( self.m_policy , exec.pool_rank() , exec.pool_size() ) );
+ WorkRange range( self.m_policy , exec.pool_rank() , exec.pool_size() );
+
+ ParallelFor::template exec_range< WorkTag >
+ ( self.m_functor , range.begin() , range.end() );
exec.fan_in();
}
public:
- ParallelFor( const FunctorType & functor
- , const Policy & policy )
- : m_func( functor )
- , m_policy( policy )
+ inline
+ void execute() const
{
- ThreadsExec::start( & ParallelFor::execute , this );
-
+ ThreadsExec::start( & ParallelFor::exec , this );
ThreadsExec::fence();
}
+
+ ParallelFor( const FunctorType & arg_functor
+ , const Policy & arg_policy )
+ : m_functor( arg_functor )
+ , m_policy( arg_policy )
+ {}
};
+//----------------------------------------------------------------------------
+/* ParallelFor Kokkos::Threads with TeamPolicy */
+
template< class FunctorType , class Arg0 , class Arg1 >
-class ParallelFor< FunctorType , Kokkos::TeamPolicy< Arg0 , Arg1 , Kokkos::Threads > >
+class ParallelFor< FunctorType
+ , Kokkos::TeamPolicy< Arg0 , Arg1 , Kokkos::Threads >
+ >
{
private:
typedef TeamPolicy< Arg0 , Arg1 , Kokkos::Threads > Policy ;
+ typedef typename Policy::work_tag WorkTag ;
+ typedef typename Policy::member_type Member ;
- const FunctorType m_func ;
+ const FunctorType m_functor ;
const Policy m_policy ;
const int m_shared ;
template< class TagType >
- KOKKOS_FORCEINLINE_FUNCTION
- void driver( typename Impl::enable_if< Impl::is_same< TagType , void >::value ,
- const typename Policy::member_type & >::type member ) const
- { m_func( member ); }
+ inline static
+ typename std::enable_if< std::is_same< TagType , void >::value >::type
+ exec_team( const FunctorType & functor , Member member )
+ {
+ for ( ; member.valid() ; member.next() ) {
+ functor( member );
+ }
+ }
template< class TagType >
- KOKKOS_FORCEINLINE_FUNCTION
- void driver( typename Impl::enable_if< ! Impl::is_same< TagType , void >::value ,
- const typename Policy::member_type & >::type member ) const
- { m_func( TagType() , member ); }
+ inline static
+ typename std::enable_if< ! std::is_same< TagType , void >::value >::type
+ exec_team( const FunctorType & functor , Member member )
+ {
+ const TagType t{} ;
+ for ( ; member.valid() ; member.next() ) {
+ functor( t , member );
+ }
+ }
- static void execute( ThreadsExec & exec , const void * arg )
+ static void exec( ThreadsExec & exec , const void * arg )
{
const ParallelFor & self = * ((const ParallelFor *) arg );
- typename Policy::member_type member( & exec , self.m_policy , self.m_shared );
-
- for ( ; member.valid() ; member.next() ) {
- self.ParallelFor::template driver< typename Policy::work_tag >( member );
- }
+ ParallelFor::exec_team< WorkTag >
+ ( self.m_functor , Member( & exec , self.m_policy , self.m_shared ) );
exec.fan_in();
}
public:
- ParallelFor( const FunctorType & functor
- , const Policy & policy )
- : m_func( functor )
- , m_policy( policy )
- , m_shared( FunctorTeamShmemSize< FunctorType >::value( functor , policy.team_size() ) )
+ inline
+ void execute() const
{
ThreadsExec::resize_scratch( 0 , Policy::member_type::team_reduce_size() + m_shared );
- ThreadsExec::start( & ParallelFor::execute , this );
+ ThreadsExec::start( & ParallelFor::exec , this );
ThreadsExec::fence();
}
-};
-
+ ParallelFor( const FunctorType & arg_functor
+ , const Policy & arg_policy )
+ : m_functor( arg_functor )
+ , m_policy( arg_policy )
+ , m_shared( arg_policy.scratch_size() + FunctorTeamShmemSize< FunctorType >::value( arg_functor , arg_policy.team_size() ) )
+ { }
+};
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
+/* ParallelReduce with Kokkos::Threads and RangePolicy */
template< class FunctorType , class Arg0 , class Arg1 , class Arg2 >
-class ParallelReduce< FunctorType , Kokkos::RangePolicy< Arg0 , Arg1 , Arg2 , Kokkos::Threads > >
+class ParallelReduce< FunctorType
+ , Kokkos::RangePolicy< Arg0, Arg1, Arg2, Kokkos::Threads >
+ >
{
private:
- typedef Kokkos::RangePolicy< Arg0 , Arg1 , Arg2 , Kokkos::Threads > Policy ;
- typedef typename Policy::work_tag work_tag ;
- typedef Kokkos::Impl::FunctorValueTraits< FunctorType , work_tag > ValueTraits ;
- typedef Kokkos::Impl::FunctorValueInit< FunctorType , work_tag > ValueInit ;
+ typedef Kokkos::RangePolicy< Arg0 , Arg1, Arg2, Kokkos::Threads > Policy ;
+
+ typedef typename Policy::work_tag WorkTag ;
+ typedef typename Policy::WorkRange WorkRange ;
+ typedef typename Policy::member_type Member ;
+
+ typedef Kokkos::Impl::FunctorValueTraits< FunctorType, WorkTag > ValueTraits ;
+ typedef Kokkos::Impl::FunctorValueInit< FunctorType, WorkTag > ValueInit ;
typedef typename ValueTraits::pointer_type pointer_type ;
typedef typename ValueTraits::reference_type reference_type ;
- const FunctorType m_func ;
+ const FunctorType m_functor ;
const Policy m_policy ;
+ const pointer_type m_result_ptr ;
- template< class PType >
- KOKKOS_FORCEINLINE_FUNCTION static
- void driver( typename Impl::enable_if<
- ( Impl::is_same< typename PType::work_tag , void >::value )
- , const FunctorType & >::type functor
- , reference_type update
- , const PType & range )
+ template< class TagType >
+ inline static
+ typename std::enable_if< std::is_same< TagType , void >::value >::type
+ exec_range( const FunctorType & functor
+ , const Member & ibeg , const Member & iend
+ , reference_type update )
{
- const typename PType::member_type e = range.end();
- for ( typename PType::member_type i = range.begin() ; i < e ; ++i ) {
+ #if defined( KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION ) && \
+ defined( KOKKOS_HAVE_PRAGMA_IVDEP )
+ #pragma ivdep
+ #endif
+ for ( Member i = ibeg ; i < iend ; ++i ) {
functor( i , update );
}
}
- template< class PType >
- KOKKOS_FORCEINLINE_FUNCTION static
- void driver( typename Impl::enable_if<
- ( ! Impl::is_same< typename PType::work_tag , void >::value )
- , const FunctorType & >::type functor
- , reference_type update
- , const PType & range )
+ template< class TagType >
+ inline static
+ typename std::enable_if< ! std::is_same< TagType , void >::value >::type
+ exec_range( const FunctorType & functor
+ , const Member & ibeg , const Member & iend
+ , reference_type update )
{
- const typename PType::member_type e = range.end();
- for ( typename PType::member_type i = range.begin() ; i < e ; ++i ) {
- functor( typename PType::work_tag() , i , update );
+ const TagType t{} ;
+ #if defined( KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION ) && \
+ defined( KOKKOS_HAVE_PRAGMA_IVDEP )
+ #pragma ivdep
+ #endif
+ for ( Member i = ibeg ; i < iend ; ++i ) {
+ functor( t , i , update );
}
}
- static void execute( ThreadsExec & exec , const void * arg )
+ static void exec( ThreadsExec & exec , const void * arg )
{
const ParallelReduce & self = * ((const ParallelReduce *) arg );
+ const WorkRange range( self.m_policy, exec.pool_rank(), exec.pool_size() );
- driver( self.m_func
- , ValueInit::init( self.m_func , exec.reduce_memory() )
- , typename Policy::WorkRange( self.m_policy , exec.pool_rank() , exec.pool_size() )
- );
+ ParallelReduce::template exec_range< WorkTag >
+ ( self.m_functor , range.begin() , range.end()
+ , ValueInit::init( self.m_functor , exec.reduce_memory() ) );
- exec.template fan_in_reduce< FunctorType , work_tag >( self.m_func );
+ exec.template fan_in_reduce< FunctorType , WorkTag >( self.m_functor );
}
public:
- template< class HostViewType >
- ParallelReduce( const FunctorType & functor ,
- const Policy & policy ,
- const HostViewType & result_view )
- : m_func( functor )
- , m_policy( policy )
+ inline
+ void execute() const
{
- ThreadsExec::resize_scratch( ValueTraits::value_size( m_func ) , 0 );
+ ThreadsExec::resize_scratch( ValueTraits::value_size( m_functor ) , 0 );
- ThreadsExec::start( & ParallelReduce::execute , this );
-
- const pointer_type data = (pointer_type) ThreadsExec::root_reduce_scratch();
+ ThreadsExec::start( & ParallelReduce::exec , this );
ThreadsExec::fence();
- if ( result_view.ptr_on_device() ) {
- const unsigned n = ValueTraits::value_count( m_func );
- for ( unsigned i = 0 ; i < n ; ++i ) { result_view.ptr_on_device()[i] = data[i]; }
+ if ( m_result_ptr ) {
+
+ const pointer_type data =
+ (pointer_type) ThreadsExec::root_reduce_scratch();
+
+ const unsigned n = ValueTraits::value_count( m_functor );
+ for ( unsigned i = 0 ; i < n ; ++i ) { m_result_ptr[i] = data[i]; }
}
}
+
+ template< class HostViewType >
+ ParallelReduce( const FunctorType & arg_functor ,
+ const Policy & arg_policy ,
+ const HostViewType & arg_result_view )
+ : m_functor( arg_functor )
+ , m_policy( arg_policy )
+ , m_result_ptr( arg_result_view.ptr_on_device() )
+ {
+ static_assert( Kokkos::is_view< HostViewType >::value
+ , "Kokkos::Threads reduce result must be a View" );
+
+ static_assert( std::is_same< typename HostViewType::memory_space , HostSpace >::value
+ , "Kokkos::Threads reduce result must be a View in HostSpace" );
+ }
};
//----------------------------------------------------------------------------
+/* ParallelReduce with Kokkos::Threads and TeamPolicy */
template< class FunctorType , class Arg0 , class Arg1 >
-class ParallelReduce< FunctorType , Kokkos::TeamPolicy< Arg0 , Arg1 , Kokkos::Threads > >
+class ParallelReduce< FunctorType
+ , Kokkos::TeamPolicy< Arg0 , Arg1 , Kokkos::Threads >
+ >
{
private:
- typedef TeamPolicy< Arg0 , Arg1 , Kokkos::Threads > Policy ;
- typedef typename Policy::work_tag work_tag ;
- typedef Kokkos::Impl::FunctorValueTraits< FunctorType , work_tag > ValueTraits ;
- typedef Kokkos::Impl::FunctorValueInit< FunctorType , work_tag > ValueInit ;
+ typedef TeamPolicy< Arg0 , Arg1 , Kokkos::Threads > Policy ;
+ typedef typename Policy::work_tag WorkTag ;
+ typedef typename Policy::member_type Member ;
+ typedef Kokkos::Impl::FunctorValueTraits< FunctorType, WorkTag > ValueTraits ;
+ typedef Kokkos::Impl::FunctorValueInit< FunctorType, WorkTag > ValueInit ;
typedef typename ValueTraits::pointer_type pointer_type ;
typedef typename ValueTraits::reference_type reference_type ;
- const FunctorType m_func ;
+ const FunctorType m_functor ;
const Policy m_policy ;
+ const pointer_type m_result_ptr ;
const int m_shared ;
template< class TagType >
- KOKKOS_FORCEINLINE_FUNCTION
- void driver( typename Impl::enable_if< Impl::is_same< TagType , void >::value ,
- const typename Policy::member_type & >::type member
- , reference_type update ) const
- { m_func( member , update ); }
+ inline static
+ typename std::enable_if< std::is_same< TagType , void >::value >::type
+ exec_team( const FunctorType & functor , Member member , reference_type update )
+ {
+ for ( ; member.valid() ; member.next() ) {
+ functor( member , update );
+ }
+ }
template< class TagType >
- KOKKOS_FORCEINLINE_FUNCTION
- void driver( typename Impl::enable_if< ! Impl::is_same< TagType , void >::value ,
- const typename Policy::member_type & >::type member
- , reference_type update ) const
- { m_func( TagType() , member , update ); }
+ inline static
+ typename std::enable_if< ! std::is_same< TagType , void >::value >::type
+ exec_team( const FunctorType & functor , Member member , reference_type update )
+ {
+ const TagType t{} ;
+ for ( ; member.valid() ; member.next() ) {
+ functor( t , member , update );
+ }
+ }
- static void execute( ThreadsExec & exec , const void * arg )
+ static void exec( ThreadsExec & exec , const void * arg )
{
const ParallelReduce & self = * ((const ParallelReduce *) arg );
- // Initialize thread-local value
- reference_type update = ValueInit::init( self.m_func , exec.reduce_memory() );
-
- typename Policy::member_type member( & exec , self.m_policy , self.m_shared );
- for ( ; member.valid() ; member.next() ) {
- self.ParallelReduce::template driver< work_tag >( member , update );
- }
+ ParallelReduce::template exec_team< WorkTag >
+ ( self.m_functor , Member( & exec , self.m_policy , self.m_shared )
+ , ValueInit::init( self.m_functor , exec.reduce_memory() ) );
- exec.template fan_in_reduce< FunctorType , work_tag >( self.m_func );
+ exec.template fan_in_reduce< FunctorType , WorkTag >( self.m_functor );
}
public:
- ParallelReduce( const FunctorType & functor
- , const Policy & policy )
- : m_func( functor )
- , m_policy( policy )
- , m_shared( FunctorTeamShmemSize< FunctorType >::value( functor , policy.team_size() ) )
+ inline
+ void execute() const
{
- ThreadsExec::resize_scratch( ValueTraits::value_size( m_func ) , Policy::member_type::team_reduce_size() + m_shared );
+ ThreadsExec::resize_scratch( ValueTraits::value_size( m_functor ) , Policy::member_type::team_reduce_size() + m_shared );
- ThreadsExec::start( & ParallelReduce::execute , this );
+ ThreadsExec::start( & ParallelReduce::exec , this );
ThreadsExec::fence();
- }
-
- template< class ViewType >
- ParallelReduce( const FunctorType & functor
- , const Policy & policy
- , const ViewType & result )
- : m_func( functor )
- , m_policy( policy )
- , m_shared( FunctorTeamShmemSize< FunctorType >::value( functor , policy.team_size() ) )
- {
- ThreadsExec::resize_scratch( ValueTraits::value_size( m_func ) , Policy::member_type::team_reduce_size() + m_shared );
- ThreadsExec::start( & ParallelReduce::execute , this );
+ if ( m_result_ptr ) {
- const pointer_type data = (pointer_type) ThreadsExec::root_reduce_scratch();
+ const pointer_type data = (pointer_type) ThreadsExec::root_reduce_scratch();
- ThreadsExec::fence();
-
- const unsigned n = ValueTraits::value_count( m_func );
- for ( unsigned i = 0 ; i < n ; ++i ) { result.ptr_on_device()[i] = data[i]; }
+ const unsigned n = ValueTraits::value_count( m_functor );
+ for ( unsigned i = 0 ; i < n ; ++i ) { m_result_ptr[i] = data[i]; }
+ }
}
+
+ template< class ViewType >
+ ParallelReduce( const FunctorType & arg_functor
+ , const Policy & arg_policy
+ , const ViewType & arg_result )
+ : m_functor( arg_functor )
+ , m_policy( arg_policy )
+ , m_result_ptr( arg_result.ptr_on_device() )
+ , m_shared( arg_policy.scratch_size() + FunctorTeamShmemSize< FunctorType >::value( arg_functor , arg_policy.team_size() ) )
+ { }
};
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
+/* ParallelScan with Kokkos::Threads and RangePolicy */
template< class FunctorType , class Arg0 , class Arg1 , class Arg2 >
-class ParallelScan< FunctorType , Kokkos::RangePolicy< Arg0 , Arg1 , Arg2 , Kokkos::Threads > >
+class ParallelScan< FunctorType
+ , Kokkos::RangePolicy< Arg0, Arg1, Arg2, Kokkos::Threads >
+ >
{
private:
- typedef Kokkos::RangePolicy< Arg0 , Arg1 , Arg2 , Kokkos::Threads > Policy ;
- typedef typename Policy::work_tag work_tag ;
- typedef Kokkos::Impl::FunctorValueTraits< FunctorType , work_tag > ValueTraits ;
- typedef Kokkos::Impl::FunctorValueInit< FunctorType , work_tag > ValueInit ;
+ typedef Kokkos::RangePolicy< Arg0, Arg1, Arg2, Kokkos::Threads > Policy ;
+ typedef typename Policy::WorkRange WorkRange ;
+ typedef typename Policy::work_tag WorkTag ;
+ typedef typename Policy::member_type Member ;
+ typedef Kokkos::Impl::FunctorValueTraits< FunctorType, WorkTag > ValueTraits ;
+ typedef Kokkos::Impl::FunctorValueInit< FunctorType, WorkTag > ValueInit ;
typedef typename ValueTraits::pointer_type pointer_type ;
typedef typename ValueTraits::reference_type reference_type ;
- const FunctorType m_func ;
+ const FunctorType m_functor ;
const Policy m_policy ;
- template< class PType >
- KOKKOS_FORCEINLINE_FUNCTION static
- void driver( typename Impl::enable_if<
- ( Impl::is_same< typename PType::work_tag , void >::value )
- , const FunctorType & >::type functor
- , reference_type update
- , const bool final
- , const PType & range )
+ template< class TagType >
+ inline static
+ typename std::enable_if< std::is_same< TagType , void >::value >::type
+ exec_range( const FunctorType & functor
+ , const Member & ibeg , const Member & iend
+ , reference_type update , const bool final )
{
- const typename PType::member_type e = range.end();
- for ( typename PType::member_type i = range.begin() ; i < e ; ++i ) {
+ #if defined( KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION ) && \
+ defined( KOKKOS_HAVE_PRAGMA_IVDEP )
+ #pragma ivdep
+ #endif
+ for ( Member i = ibeg ; i < iend ; ++i ) {
functor( i , update , final );
}
}
- template< class PType >
- KOKKOS_FORCEINLINE_FUNCTION static
- void driver( typename Impl::enable_if<
- ( ! Impl::is_same< typename PType::work_tag , void >::value )
- , const FunctorType & >::type functor
- , reference_type update
- , const bool final
- , const PType & range )
+ template< class TagType >
+ inline static
+ typename std::enable_if< ! std::is_same< TagType , void >::value >::type
+ exec_range( const FunctorType & functor
+ , const Member & ibeg , const Member & iend
+ , reference_type update , const bool final )
{
- const typename PType::member_type e = range.end();
- for ( typename PType::member_type i = range.begin() ; i < e ; ++i ) {
- functor( typename PType::work_tag() , i , update , final );
+ const TagType t{} ;
+ #if defined( KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION ) && \
+ defined( KOKKOS_HAVE_PRAGMA_IVDEP )
+ #pragma ivdep
+ #endif
+ for ( Member i = ibeg ; i < iend ; ++i ) {
+ functor( t , i , update , final );
}
}
- static void execute( ThreadsExec & exec , const void * arg )
+ static void exec( ThreadsExec & exec , const void * arg )
{
const ParallelScan & self = * ((const ParallelScan *) arg );
- const typename Policy::WorkRange range( self.m_policy , exec.pool_rank() , exec.pool_size() );
+ const WorkRange range( self.m_policy, exec.pool_rank(), exec.pool_size() );
- reference_type update = ValueInit::init( self.m_func , exec.reduce_memory() );
+ reference_type update =
+ ValueInit::init( self.m_functor , exec.reduce_memory() );
- driver( self.m_func , update , false , range );
+ ParallelScan::template exec_range< WorkTag >
+ ( self.m_functor , range.begin(), range.end(), update, false );
- // exec.<FunctorType,work_tag>scan_large( self.m_func );
- exec.template scan_small<FunctorType,work_tag>( self.m_func );
+ // exec.template scan_large<FunctorType,WorkTag>( self.m_functor );
+ exec.template scan_small<FunctorType,WorkTag>( self.m_functor );
- driver( self.m_func , update , true , range );
+ ParallelScan::template exec_range< WorkTag >
+ ( self.m_functor , range.begin(), range.end(), update, true );
exec.fan_in();
}
public:
- ParallelScan( const FunctorType & functor , const Policy & policy )
- : m_func( functor )
- , m_policy( policy )
+ inline
+ void execute() const
{
- ThreadsExec::resize_scratch( 2 * ValueTraits::value_size( m_func ) , 0 );
- ThreadsExec::start( & ParallelScan::execute , this );
+ ThreadsExec::resize_scratch( 2 * ValueTraits::value_size( m_functor ) , 0 );
+ ThreadsExec::start( & ParallelScan::exec , this );
ThreadsExec::fence();
}
+
+ ParallelScan( const FunctorType & arg_functor
+ , const Policy & arg_policy )
+ : m_functor( arg_functor )
+ , m_policy( arg_policy )
+ { }
};
} // namespace Impl
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#endif /* #define KOKKOS_THREADS_PARALLEL_HPP */
diff --git a/lib/kokkos/core/src/impl/CMakeLists.txt b/lib/kokkos/core/src/impl/CMakeLists.txt
new file mode 100644
index 000000000..c543194de
--- /dev/null
+++ b/lib/kokkos/core/src/impl/CMakeLists.txt
@@ -0,0 +1,18 @@
+
+SET(HEADERS "")
+SET(SOURCES "")
+
+FILE(GLOB HEADERS *.hpp)
+FILE(GLOB SOURCES *.cpp)
+
+TRIBITS_ADD_LIBRARY(
+ kokkoscore_impl
+ NOINSTALLHEADERS ${HEADERS}
+ SOURCES ${SOURCES}
+ DEPLIBS
+ )
+
+SET(TRILINOS_INCDIR ${CMAKE_INSTALL_PREFIX}/${${PROJECT_NAME}_INSTALL_INCLUDE_DIR})
+
+INSTALL(FILES ${HEADERS} DESTINATION ${TRILINOS_INCDIR}/impl/)
+
diff --git a/lib/kokkos/core/src/impl/KokkosExp_SharedAlloc.cpp b/lib/kokkos/core/src/impl/KokkosExp_SharedAlloc.cpp
index 50168fe3c..e14929d16 100644
--- a/lib/kokkos/core/src/impl/KokkosExp_SharedAlloc.cpp
+++ b/lib/kokkos/core/src/impl/KokkosExp_SharedAlloc.cpp
@@ -1,275 +1,327 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <Kokkos_Core.hpp>
namespace Kokkos {
namespace Experimental {
namespace Impl {
+int SharedAllocationRecord< void , void >::s_tracking_enabled = 1 ;
+
+void SharedAllocationRecord< void , void >::tracking_claim_and_disable()
+{
+ // A host thread claim and disable tracking flag
+
+ while ( ! Kokkos::atomic_compare_exchange_strong( & s_tracking_enabled, 1, 0 ) );
+}
+
+void SharedAllocationRecord< void , void >::tracking_release_and_enable()
+{
+ // The host thread that claimed and disabled the tracking flag
+ // now release and enable tracking.
+
+ if ( ! Kokkos::atomic_compare_exchange_strong( & s_tracking_enabled, 0, 1 ) ){
+ Kokkos::Impl::throw_runtime_exception("Kokkos::Experimental::Impl::SharedAllocationRecord<>::tracking_release_and_enable FAILED, this host process thread did not hold the lock" );
+ }
+}
+
+//----------------------------------------------------------------------------
+
bool
SharedAllocationRecord< void , void >::
is_sane( SharedAllocationRecord< void , void > * arg_record )
{
constexpr static SharedAllocationRecord * zero = 0 ;
SharedAllocationRecord * const root = arg_record ? arg_record->m_root : 0 ;
bool ok = root != 0 && root->m_count == 0 ;
if ( ok ) {
SharedAllocationRecord * root_next = 0 ;
// Lock the list:
- while ( ( root_next = Kokkos::atomic_exchange( & root->m_next , zero ) ) == 0 );
+ while ( ( root_next = Kokkos::atomic_exchange( & root->m_next , zero ) ) == zero );
for ( SharedAllocationRecord * rec = root_next ; ok && rec != root ; rec = rec->m_next ) {
const bool ok_non_null = rec && rec->m_prev && ( rec == root || rec->m_next );
const bool ok_root = ok_non_null && rec->m_root == root ;
const bool ok_prev_next = ok_non_null && ( rec->m_prev != root ? rec->m_prev->m_next == rec : root_next == rec );
const bool ok_next_prev = ok_non_null && rec->m_next->m_prev == rec ;
const bool ok_count = ok_non_null && 0 <= rec->m_count ;
ok = ok_root && ok_prev_next && ok_next_prev && ok_count ;
if ( ! ok ) {
- fprintf(stderr,"Kokkos::Experimental::Impl::SharedAllocationRecord failed is_sane: rec(0x%.12lx){ m_count(%d) m_root(0x%.12lx) m_next(0x%.12lx) m_prev(0x%.12lx) m_next->m_prev(0x%.12lx) m_prev->m_next(0x%.12lx) }\n"
- , reinterpret_cast< unsigned long >( rec )
+ //Formatting dependent on sizeof(uintptr_t)
+ const char * format_string;
+
+ if (sizeof(uintptr_t) == sizeof(unsigned long)) {
+ format_string = "Kokkos::Experimental::Impl::SharedAllocationRecord failed is_sane: rec(0x%.12lx){ m_count(%d) m_root(0x%.12lx) m_next(0x%.12lx) m_prev(0x%.12lx) m_next->m_prev(0x%.12lx) m_prev->m_next(0x%.12lx) }\n";
+ }
+ else if (sizeof(uintptr_t) == sizeof(unsigned long long)) {
+ format_string = "Kokkos::Experimental::Impl::SharedAllocationRecord failed is_sane: rec(0x%.12llx){ m_count(%d) m_root(0x%.12llx) m_next(0x%.12llx) m_prev(0x%.12llx) m_next->m_prev(0x%.12llx) m_prev->m_next(0x%.12llx) }\n";
+ }
+
+ fprintf(stderr
+ , format_string
+ , reinterpret_cast< uintptr_t >( rec )
, rec->m_count
- , reinterpret_cast< unsigned long >( rec->m_root )
- , reinterpret_cast< unsigned long >( rec->m_next )
- , reinterpret_cast< unsigned long >( rec->m_prev )
- , reinterpret_cast< unsigned long >( rec->m_next->m_prev )
- , reinterpret_cast< unsigned long >( rec->m_prev != rec->m_root ? rec->m_prev->m_next : root_next )
+ , reinterpret_cast< uintptr_t >( rec->m_root )
+ , reinterpret_cast< uintptr_t >( rec->m_next )
+ , reinterpret_cast< uintptr_t >( rec->m_prev )
+ , reinterpret_cast< uintptr_t >( rec->m_next->m_prev )
+ , reinterpret_cast< uintptr_t >( rec->m_prev != rec->m_root ? rec->m_prev->m_next : root_next )
);
}
}
if ( zero != Kokkos::atomic_exchange( & root->m_next , root_next ) ) {
Kokkos::Impl::throw_runtime_exception("Kokkos::Experimental::Impl::SharedAllocationRecord failed is_sane unlocking");
}
}
return ok ;
}
SharedAllocationRecord<void,void> *
SharedAllocationRecord<void,void>::find( SharedAllocationRecord<void,void> * const arg_root , void * const arg_data_ptr )
{
constexpr static SharedAllocationRecord * zero = 0 ;
SharedAllocationRecord * root_next = 0 ;
// Lock the list:
- while ( ( root_next = Kokkos::atomic_exchange( & arg_root->m_next , 0 ) ) == 0 );
+ while ( ( root_next = Kokkos::atomic_exchange( & arg_root->m_next , zero ) ) == zero );
// Iterate searching for the record with this data pointer
SharedAllocationRecord * r = root_next ;
while ( ( r != arg_root ) && ( r->data() != arg_data_ptr ) ) { r = r->m_next ; }
if ( r == arg_root ) { r = 0 ; }
if ( zero != Kokkos::atomic_exchange( & arg_root->m_next , root_next ) ) {
Kokkos::Impl::throw_runtime_exception("Kokkos::Experimental::Impl::SharedAllocationRecord failed locking/unlocking");
}
return r ;
}
/**\brief Construct and insert into 'arg_root' tracking set.
* use_count is zero.
*/
SharedAllocationRecord< void , void >::
SharedAllocationRecord( SharedAllocationRecord<void,void> * arg_root
, SharedAllocationHeader * arg_alloc_ptr
, size_t arg_alloc_size
, SharedAllocationRecord< void , void >::function_type arg_dealloc
)
: m_alloc_ptr( arg_alloc_ptr )
, m_alloc_size( arg_alloc_size )
, m_dealloc( arg_dealloc )
, m_root( arg_root )
, m_prev( 0 )
, m_next( 0 )
, m_count( 0 )
{
constexpr static SharedAllocationRecord * zero = 0 ;
// Insert into the root double-linked list for tracking
//
// before: arg_root->m_next == next ; next->m_prev == arg_root
// after: arg_root->m_next == this ; this->m_prev == arg_root ;
// this->m_next == next ; next->m_prev == this
m_prev = m_root ;
// Read root->m_next and lock by setting to zero
- while ( ( m_next = Kokkos::atomic_exchange( & m_root->m_next , zero ) ) == 0 );
+ while ( ( m_next = Kokkos::atomic_exchange( & m_root->m_next , zero ) ) == zero );
m_next->m_prev = this ;
if ( zero != Kokkos::atomic_exchange( & m_root->m_next , this ) ) {
Kokkos::Impl::throw_runtime_exception("Kokkos::Experimental::Impl::SharedAllocationRecord failed locking/unlocking");
}
}
void
SharedAllocationRecord< void , void >::
increment( SharedAllocationRecord< void , void > * arg_record )
{
const int old_count = Kokkos::atomic_fetch_add( & arg_record->m_count , 1 );
if ( old_count < 0 ) { // Error
Kokkos::Impl::throw_runtime_exception("Kokkos::Experimental::Impl::SharedAllocationRecord failed increment");
}
}
SharedAllocationRecord< void , void > *
SharedAllocationRecord< void , void >::
decrement( SharedAllocationRecord< void , void > * arg_record )
{
constexpr static SharedAllocationRecord * zero = 0 ;
const int old_count = Kokkos::atomic_fetch_add( & arg_record->m_count , -1 );
if ( old_count == 1 ) {
// before: arg_record->m_prev->m_next == arg_record &&
// arg_record->m_next->m_prev == arg_record
//
// after: arg_record->m_prev->m_next == arg_record->m_next &&
// arg_record->m_next->m_prev == arg_record->m_prev
SharedAllocationRecord * root_next = 0 ;
// Lock the list:
- while ( ( root_next = Kokkos::atomic_exchange( & arg_record->m_root->m_next , 0 ) ) == 0 );
+ while ( ( root_next = Kokkos::atomic_exchange( & arg_record->m_root->m_next , zero ) ) == zero );
arg_record->m_next->m_prev = arg_record->m_prev ;
if ( root_next != arg_record ) {
arg_record->m_prev->m_next = arg_record->m_next ;
}
else {
// before: arg_record->m_root == arg_record->m_prev
// after: arg_record->m_root == arg_record->m_next
root_next = arg_record->m_next ;
}
// Unlock the list:
if ( zero != Kokkos::atomic_exchange( & arg_record->m_root->m_next , root_next ) ) {
Kokkos::Impl::throw_runtime_exception("Kokkos::Experimental::Impl::SharedAllocationRecord failed decrement unlocking");
}
arg_record->m_next = 0 ;
arg_record->m_prev = 0 ;
function_type d = arg_record->m_dealloc ;
(*d)( arg_record );
arg_record = 0 ;
}
else if ( old_count < 1 ) { // Error
Kokkos::Impl::throw_runtime_exception("Kokkos::Experimental::Impl::SharedAllocationRecord failed decrement count");
}
return arg_record ;
}
void
SharedAllocationRecord< void , void >::
print_host_accessible_records( std::ostream & s
, const char * const space_name
, const SharedAllocationRecord * const root
, const bool detail )
{
const SharedAllocationRecord< void , void > * r = root ;
char buffer[256] ;
if ( detail ) {
do {
+ //Formatting dependent on sizeof(uintptr_t)
+ const char * format_string;
- snprintf( buffer , 256 , "%s addr( 0x%.12lx ) list( 0x%.12lx 0x%.12lx ) extent[ 0x%.12lx + %.8ld ] count(%d) dealloc(0x%.12lx) %s\n"
+ if (sizeof(uintptr_t) == sizeof(unsigned long)) {
+ format_string = "%s addr( 0x%.12lx ) list( 0x%.12lx 0x%.12lx ) extent[ 0x%.12lx + %.8ld ] count(%d) dealloc(0x%.12lx) %s\n";
+ }
+ else if (sizeof(uintptr_t) == sizeof(unsigned long long)) {
+ format_string = "%s addr( 0x%.12llx ) list( 0x%.12llx 0x%.12llx ) extent[ 0x%.12llx + %.8ld ] count(%d) dealloc(0x%.12llx) %s\n";
+ }
+
+ snprintf( buffer , 256
+ , format_string
, space_name
- , reinterpret_cast<unsigned long>( r )
- , reinterpret_cast<unsigned long>( r->m_prev )
- , reinterpret_cast<unsigned long>( r->m_next )
- , reinterpret_cast<unsigned long>( r->m_alloc_ptr )
+ , reinterpret_cast<uintptr_t>( r )
+ , reinterpret_cast<uintptr_t>( r->m_prev )
+ , reinterpret_cast<uintptr_t>( r->m_next )
+ , reinterpret_cast<uintptr_t>( r->m_alloc_ptr )
, r->m_alloc_size
, r->m_count
- , reinterpret_cast<unsigned long>( r->m_dealloc )
+ , reinterpret_cast<uintptr_t>( r->m_dealloc )
, r->m_alloc_ptr->m_label
);
std::cout << buffer ;
r = r->m_next ;
} while ( r != root );
}
else {
do {
if ( r->m_alloc_ptr ) {
-
- snprintf( buffer , 256 , "%s [ 0x%.12lx + %ld ] %s\n"
+ //Formatting dependent on sizeof(uintptr_t)
+ const char * format_string;
+
+ if (sizeof(uintptr_t) == sizeof(unsigned long)) {
+ format_string = "%s [ 0x%.12lx + %ld ] %s\n";
+ }
+ else if (sizeof(uintptr_t) == sizeof(unsigned long long)) {
+ format_string = "%s [ 0x%.12llx + %ld ] %s\n";
+ }
+
+ snprintf( buffer , 256
+ , format_string
, space_name
- , reinterpret_cast< unsigned long >( r->data() )
+ , reinterpret_cast< uintptr_t >( r->data() )
, r->size()
, r->m_alloc_ptr->m_label
);
}
else {
snprintf( buffer , 256 , "%s [ 0 + 0 ]\n" , space_name );
}
std::cout << buffer ;
r = r->m_next ;
} while ( r != root );
}
}
} /* namespace Impl */
} /* namespace Experimental */
} /* namespace Kokkos */
diff --git a/lib/kokkos/core/src/impl/KokkosExp_SharedAlloc.hpp b/lib/kokkos/core/src/impl/KokkosExp_SharedAlloc.hpp
index c8c553731..f6fbe0b37 100644
--- a/lib/kokkos/core/src/impl/KokkosExp_SharedAlloc.hpp
+++ b/lib/kokkos/core/src/impl/KokkosExp_SharedAlloc.hpp
@@ -1,292 +1,388 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
+#ifndef KOKKOS_SHARED_ALLOC_HPP_
+#define KOKKOS_SHARED_ALLOC_HPP_
+
namespace Kokkos {
namespace Experimental {
namespace Impl {
template< class MemorySpace = void , class DestroyFunctor = void >
class SharedAllocationRecord ;
class SharedAllocationHeader {
private:
typedef SharedAllocationRecord<void,void> Record ;
static constexpr unsigned maximum_label_length = ( 1u << 7 /* 128 */ ) - sizeof(Record*);
template< class , class > friend class SharedAllocationRecord ;
Record * m_record ;
char m_label[ maximum_label_length ];
public:
/* Given user memory get pointer to the header */
KOKKOS_INLINE_FUNCTION static
const SharedAllocationHeader * get_header( void * alloc_ptr )
{ return reinterpret_cast<SharedAllocationHeader*>( reinterpret_cast<char*>(alloc_ptr) - sizeof(SharedAllocationHeader) ); }
};
template<>
class SharedAllocationRecord< void , void > {
protected:
static_assert( sizeof(SharedAllocationHeader) == ( 1u << 7 /* 128 */ ) , "sizeof(SharedAllocationHeader) != 128" );
template< class , class > friend class SharedAllocationRecord ;
typedef void (* function_type )( SharedAllocationRecord<void,void> * );
+ static int s_tracking_enabled ;
+
SharedAllocationHeader * const m_alloc_ptr ;
size_t const m_alloc_size ;
function_type const m_dealloc ;
SharedAllocationRecord * const m_root ;
SharedAllocationRecord * m_prev ;
SharedAllocationRecord * m_next ;
int m_count ;
SharedAllocationRecord( const SharedAllocationRecord & ) = delete ;
SharedAllocationRecord & operator = ( const SharedAllocationRecord & ) = delete ;
/**\brief Construct and insert into 'arg_root' tracking set.
* use_count is zero.
*/
SharedAllocationRecord( SharedAllocationRecord * arg_root
, SharedAllocationHeader * arg_alloc_ptr
, size_t arg_alloc_size
, function_type arg_dealloc
);
public:
+ static int tracking_enabled() { return s_tracking_enabled ; }
+
+ /**\brief A host process thread claims and disables the
+ * shared allocation tracking flag.
+ */
+ static void tracking_claim_and_disable();
+
+ /**\brief A host process thread releases and enables the
+ * shared allocation tracking flag.
+ */
+ static void tracking_release_and_enable();
+
~SharedAllocationRecord() = default ;
constexpr SharedAllocationRecord()
: m_alloc_ptr( 0 )
, m_alloc_size( 0 )
, m_dealloc( 0 )
, m_root( this )
, m_prev( this )
, m_next( this )
, m_count( 0 )
{}
static constexpr unsigned maximum_label_length = SharedAllocationHeader::maximum_label_length ;
KOKKOS_INLINE_FUNCTION
const SharedAllocationHeader * head() const { return m_alloc_ptr ; }
/* User's memory begins at the end of the header */
KOKKOS_INLINE_FUNCTION
void * data() const { return reinterpret_cast<void*>( m_alloc_ptr + 1 ); }
/* User's memory begins at the end of the header */
constexpr size_t size() const { return m_alloc_size - sizeof(SharedAllocationHeader) ; }
/* Cannot be 'constexpr' because 'm_count' is volatile */
int use_count() const { return m_count ; }
/* Increment use count */
static void increment( SharedAllocationRecord * );
/* Decrement use count. If 1->0 then remove from the tracking list and invoke m_dealloc */
static SharedAllocationRecord * decrement( SharedAllocationRecord * );
/* Given a root record and data pointer find the record */
static SharedAllocationRecord * find( SharedAllocationRecord * const , void * const );
/* Sanity check for the whole set of records to which the input record belongs.
* Locks the set's insert/erase operations until the sanity check is complete.
*/
static bool is_sane( SharedAllocationRecord * );
/* Print host-accessible records */
static void print_host_accessible_records( std::ostream &
, const char * const space_name
, const SharedAllocationRecord * const root
, const bool detail );
};
+namespace {
+
+/* Taking the address of this function so make sure it is unique */
+template < class MemorySpace , class DestroyFunctor >
+void deallocate( SharedAllocationRecord<void,void> * record_ptr )
+{
+ typedef SharedAllocationRecord< MemorySpace , void > base_type ;
+ typedef SharedAllocationRecord< MemorySpace , DestroyFunctor > this_type ;
+
+ this_type * const ptr = static_cast< this_type * >(
+ static_cast< base_type * >( record_ptr ) );
+
+ ptr->m_destroy.destroy_shared_allocation();
+
+ delete ptr ;
+}
+
+}
+
/*
* Memory space specialization of SharedAllocationRecord< Space , void > requires :
*
* SharedAllocationRecord< Space , void > : public SharedAllocationRecord< void , void >
* {
* // delete allocated user memory via static_cast to this type.
* static void deallocate( const SharedAllocationRecord<void,void> * );
* Space m_space ;
* }
*/
-
template< class MemorySpace , class DestroyFunctor >
class SharedAllocationRecord : public SharedAllocationRecord< MemorySpace , void >
{
private:
- static void deallocate( SharedAllocationRecord<void,void> * record_ptr )
- { delete static_cast<SharedAllocationRecord<MemorySpace,DestroyFunctor>*>(record_ptr); }
-
SharedAllocationRecord( const MemorySpace & arg_space
, const std::string & arg_label
, const size_t arg_alloc
)
/* Allocate user memory as [ SharedAllocationHeader , user_memory ] */
- : SharedAllocationRecord< MemorySpace , void >( arg_space , arg_label , arg_alloc , & deallocate )
+ : SharedAllocationRecord< MemorySpace , void >( arg_space , arg_label , arg_alloc , & Kokkos::Experimental::Impl::deallocate< MemorySpace , DestroyFunctor > )
, m_destroy()
{}
- ~SharedAllocationRecord() { m_destroy.destroy_shared_allocation(); }
+ SharedAllocationRecord() = delete ;
+ SharedAllocationRecord( const SharedAllocationRecord & ) = delete ;
+ SharedAllocationRecord & operator = ( const SharedAllocationRecord & ) = delete ;
public:
DestroyFunctor m_destroy ;
// Allocate with a zero use count. Incrementing the use count from zero to one
// inserts the record into the tracking list. Decrementing the count from one to zero
// removes from the trakcing list and deallocates.
KOKKOS_INLINE_FUNCTION static
SharedAllocationRecord * allocate( const MemorySpace & arg_space
, const std::string & arg_label
, const size_t arg_alloc
)
{
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
return new SharedAllocationRecord( arg_space , arg_label , arg_alloc );
#else
return (SharedAllocationRecord *) 0 ;
#endif
}
};
union SharedAllocationTracker {
private:
typedef SharedAllocationRecord<void,void> Record ;
- enum : unsigned long {
- DO_NOT_DEREF_FLAG = 0x01ul
- };
+ enum : uintptr_t { DO_NOT_DEREF_FLAG = 0x01ul };
// The allocation record resides in Host memory space
- Record * m_record ;
- unsigned long m_record_bits;
+ Record * m_record ;
+ uintptr_t m_record_bits ;
- KOKKOS_INLINE_FUNCTION
- static Record * disable( Record * rec )
- { return reinterpret_cast<Record*>( reinterpret_cast<unsigned long>( rec ) & DO_NOT_DEREF_FLAG ); }
+public:
- KOKKOS_INLINE_FUNCTION
- void increment() const
- {
-#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
- if ( ! ( m_record_bits & DO_NOT_DEREF_FLAG ) ) Record::increment( m_record );
-#endif
- }
+ // Use macros instead of inline functions to reduce
+ // pressure on compiler optimization by reducing
+ // number of symbols and inline functons.
- KOKKOS_INLINE_FUNCTION
- void decrement() const
- {
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
- if ( ! ( m_record_bits & DO_NOT_DEREF_FLAG ) ) Record::decrement( m_record );
-#endif
- }
-public:
+#define KOKKOS_SHARED_ALLOCATION_TRACKER_ENABLED \
+ Record::tracking_enabled()
- KOKKOS_INLINE_FUNCTION
- constexpr SharedAllocationTracker() : m_record_bits( DO_NOT_DEREF_FLAG ) {}
+#define KOKKOS_SHARED_ALLOCATION_TRACKER_INCREMENT \
+ if ( ! ( m_record_bits & DO_NOT_DEREF_FLAG ) ) Record::increment( m_record );
+
+#define KOKKOS_SHARED_ALLOCATION_TRACKER_DECREMENT \
+ if ( ! ( m_record_bits & DO_NOT_DEREF_FLAG ) ) Record::decrement( m_record );
+
+#else
+
+#define KOKKOS_SHARED_ALLOCATION_TRACKER_ENABLED 0
+
+#define KOKKOS_SHARED_ALLOCATION_TRACKER_INCREMENT /* */
+
+#define KOKKOS_SHARED_ALLOCATION_TRACKER_DECREMENT /* */
+
+#endif
+
+ /** \brief Assign a specialized record */
+ inline
+ void assign_allocated_record_to_uninitialized( Record * arg_record )
+ { Record::increment( m_record = arg_record ); }
template< class MemorySpace >
constexpr
- SharedAllocationRecord< MemorySpace , void > & get_record() const
+ SharedAllocationRecord< MemorySpace , void > &
+ get_record() const
{ return * static_cast< SharedAllocationRecord< MemorySpace , void > * >( m_record ); }
template< class MemorySpace >
std::string get_label() const
{
return ( m_record_bits & DO_NOT_DEREF_FLAG )
? std::string()
: static_cast< SharedAllocationRecord< MemorySpace , void > * >( m_record )->get_label()
;
}
KOKKOS_INLINE_FUNCTION
- SharedAllocationTracker( Record * arg_record )
- : m_record( arg_record ) { increment(); }
+ int use_count() const
+ {
+#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
+ Record * const tmp = reinterpret_cast<Record*>( m_record_bits & ~DO_NOT_DEREF_FLAG );
+ return ( tmp ? tmp->use_count() : 0 );
+#else
+ return 0 ;
+#endif
+ }
- KOKKOS_INLINE_FUNCTION
- ~SharedAllocationTracker() { decrement(); }
+ KOKKOS_FORCEINLINE_FUNCTION
+ ~SharedAllocationTracker()
+ { KOKKOS_SHARED_ALLOCATION_TRACKER_DECREMENT }
- KOKKOS_INLINE_FUNCTION
- SharedAllocationTracker( const SharedAllocationTracker & rhs )
- : m_record( rhs.m_record ) { increment(); }
+ KOKKOS_FORCEINLINE_FUNCTION
+ constexpr SharedAllocationTracker()
+ : m_record_bits( DO_NOT_DEREF_FLAG ) {}
- KOKKOS_INLINE_FUNCTION
+ // Move:
+
+ KOKKOS_FORCEINLINE_FUNCTION
SharedAllocationTracker( SharedAllocationTracker && rhs )
- : m_record( rhs.m_record ) { rhs.m_record_bits = DO_NOT_DEREF_FLAG ; }
+ : m_record_bits( rhs.m_record_bits )
+ { rhs.m_record_bits = DO_NOT_DEREF_FLAG ; }
- KOKKOS_INLINE_FUNCTION
- SharedAllocationTracker & operator = ( const SharedAllocationTracker & rhs )
+ KOKKOS_FORCEINLINE_FUNCTION
+ SharedAllocationTracker & operator = ( SharedAllocationTracker && rhs )
{
- decrement();
- m_record = rhs.m_record ;
- increment();
+ // If this is tracking then must decrement
+ KOKKOS_SHARED_ALLOCATION_TRACKER_DECREMENT
+ // Move and reset RHS to default constructed value.
+ m_record_bits = rhs.m_record_bits ;
+ rhs.m_record_bits = DO_NOT_DEREF_FLAG ;
return *this ;
}
- KOKKOS_INLINE_FUNCTION
- SharedAllocationTracker & operator = ( SharedAllocationTracker && rhs )
+ // Copy:
+
+ KOKKOS_FORCEINLINE_FUNCTION
+ SharedAllocationTracker( const SharedAllocationTracker & rhs )
+ : m_record_bits( KOKKOS_SHARED_ALLOCATION_TRACKER_ENABLED
+ ? rhs.m_record_bits
+ : rhs.m_record_bits | DO_NOT_DEREF_FLAG )
{
- m_record = rhs.m_record ;
- rhs.m_record_bits = DO_NOT_DEREF_FLAG ;
+ KOKKOS_SHARED_ALLOCATION_TRACKER_INCREMENT
+ }
+
+ /** \brief Copy construction may disable tracking. */
+ KOKKOS_FORCEINLINE_FUNCTION
+ SharedAllocationTracker( const SharedAllocationTracker & rhs
+ , const bool enable_tracking )
+ : m_record_bits( KOKKOS_SHARED_ALLOCATION_TRACKER_ENABLED
+ && enable_tracking
+ ? rhs.m_record_bits
+ : rhs.m_record_bits | DO_NOT_DEREF_FLAG )
+ { KOKKOS_SHARED_ALLOCATION_TRACKER_INCREMENT }
+
+ KOKKOS_FORCEINLINE_FUNCTION
+ SharedAllocationTracker & operator = ( const SharedAllocationTracker & rhs )
+ {
+ // If this is tracking then must decrement
+ KOKKOS_SHARED_ALLOCATION_TRACKER_DECREMENT
+ m_record_bits = KOKKOS_SHARED_ALLOCATION_TRACKER_ENABLED
+ ? rhs.m_record_bits
+ : rhs.m_record_bits | DO_NOT_DEREF_FLAG ;
+ KOKKOS_SHARED_ALLOCATION_TRACKER_INCREMENT
return *this ;
}
+
+ /** \brief Copy assignment may disable tracking */
+ KOKKOS_FORCEINLINE_FUNCTION
+ void assign( const SharedAllocationTracker & rhs
+ , const bool enable_tracking )
+ {
+ KOKKOS_SHARED_ALLOCATION_TRACKER_DECREMENT
+ m_record_bits = KOKKOS_SHARED_ALLOCATION_TRACKER_ENABLED
+ && enable_tracking
+ ? rhs.m_record_bits
+ : rhs.m_record_bits | DO_NOT_DEREF_FLAG ;
+ KOKKOS_SHARED_ALLOCATION_TRACKER_INCREMENT
+ }
+
+#undef KOKKOS_SHARED_ALLOCATION_TRACKER_ENABLED
+#undef KOKKOS_SHARED_ALLOCATION_TRACKER_INCREMENT
+#undef KOKKOS_SHARED_ALLOCATION_TRACKER_DECREMENT
+
};
} /* namespace Impl */
} /* namespace Experimental */
} /* namespace Kokkos */
-
+#endif
diff --git a/lib/kokkos/core/src/impl/KokkosExp_ViewAllocProp.hpp b/lib/kokkos/core/src/impl/KokkosExp_ViewAllocProp.hpp
index 348ccaf5e..d571a1ea0 100644
--- a/lib/kokkos/core/src/impl/KokkosExp_ViewAllocProp.hpp
+++ b/lib/kokkos/core/src/impl/KokkosExp_ViewAllocProp.hpp
@@ -1,416 +1,438 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_EXPERIMENTAL_IMPL_VIEW_ALLOC_PROP_HPP
#define KOKKOS_EXPERIMENTAL_IMPL_VIEW_ALLOC_PROP_HPP
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
+#if defined( KOKKOS_USING_EXPERIMENTAL_VIEW )
+
+namespace Kokkos {
+
+/* For backward compatibility */
+
+struct ViewAllocateWithoutInitializing {
+
+ const std::string label ;
+
+ ViewAllocateWithoutInitializing() : label() {}
+ ViewAllocateWithoutInitializing( const std::string & arg_label ) : label( arg_label ) {}
+ ViewAllocateWithoutInitializing( const char * const arg_label ) : label( arg_label ) {}
+};
+
+} /* namespace Kokkos */
+
+#endif
+
+//----------------------------------------------------------------------------
+//----------------------------------------------------------------------------
+
namespace Kokkos {
namespace Experimental {
namespace Impl {
struct WithoutInitializing_t {};
struct AllowPadding_t {};
template< class ... Parameters >
struct ViewAllocProp ;
template<>
struct ViewAllocProp<> {
struct NullSpace {};
typedef std::false_type allow_padding_t ;
typedef std::true_type initialize_t ;
typedef NullSpace memory_space ;
typedef NullSpace execution_space ;
const std::string label ;
const memory_space memory ;
const execution_space execution ;
const allow_padding_t allow_padding ;
const initialize_t initialize ;
ViewAllocProp()
: label()
, memory()
, execution()
, allow_padding()
, initialize()
{}
ViewAllocProp( const std::string & arg_label )
: label( arg_label )
, memory()
, execution()
, allow_padding()
, initialize()
{}
};
template< class ... Parameters >
struct ViewAllocProp< const char * , Parameters ... >
{
typedef ViewAllocProp< Parameters ... > base_prop_type ;
typedef typename base_prop_type::allow_padding_t allow_padding_t ;
typedef typename base_prop_type::initialize_t initialize_t ;
typedef typename base_prop_type::memory_space memory_space ;
typedef typename base_prop_type::execution_space execution_space ;
const std::string label ;
const memory_space memory ;
const execution_space execution ;
const allow_padding_t allow_padding ;
const initialize_t initialize ;
ViewAllocProp( const char * const arg_label , Parameters ... arg_param )
: label( arg_label )
, memory( base_prop_type( arg_param ... ).memory )
, execution( base_prop_type( arg_param ... ).execution )
, allow_padding()
, initialize()
{}
};
template< class ... Parameters >
struct ViewAllocProp< std::string , Parameters ... >
{
typedef ViewAllocProp< Parameters ... > base_prop_type ;
typedef typename base_prop_type::allow_padding_t allow_padding_t ;
typedef typename base_prop_type::initialize_t initialize_t ;
typedef typename base_prop_type::memory_space memory_space ;
typedef typename base_prop_type::execution_space execution_space ;
const std::string label ;
const memory_space memory ;
const execution_space execution ;
const allow_padding_t allow_padding ;
const initialize_t initialize ;
ViewAllocProp( const std::string & arg_label , Parameters ... arg_param )
: label( arg_label )
, memory( base_prop_type( arg_param ... ).memory )
, execution( base_prop_type( arg_param ... ).execution )
, allow_padding()
, initialize()
{}
};
template< class ... Parameters >
struct ViewAllocProp< WithoutInitializing_t , Parameters ... >
{
typedef ViewAllocProp< Parameters ... > base_prop_type ;
typedef typename base_prop_type::allow_padding_t allow_padding_t ;
typedef std::false_type initialize_t ;
typedef typename base_prop_type::memory_space memory_space ;
typedef typename base_prop_type::execution_space execution_space ;
const std::string label ;
const memory_space memory ;
const execution_space execution ;
const allow_padding_t allow_padding ;
const initialize_t initialize ;
ViewAllocProp( const WithoutInitializing_t & , Parameters ... arg_param )
: label( base_prop_type( arg_param ... ).label )
, memory( base_prop_type( arg_param ... ).memory )
, execution( base_prop_type( arg_param ... ).execution )
, allow_padding()
, initialize()
{}
};
template< class ... Parameters >
struct ViewAllocProp< AllowPadding_t , Parameters ... >
{
typedef ViewAllocProp< Parameters ... > base_prop_type ;
typedef std::true_type allow_padding_t ;
typedef typename base_prop_type::initialize_t initialize_t ;
typedef typename base_prop_type::memory_space memory_space ;
typedef typename base_prop_type::execution_space execution_space ;
const std::string label ;
const memory_space memory ;
const execution_space execution ;
const allow_padding_t allow_padding ;
const initialize_t initialize ;
ViewAllocProp( const AllowPadding_t & , Parameters ... arg_param )
: label( base_prop_type( arg_param ... ).label )
, memory( base_prop_type( arg_param ... ).memory )
, execution( base_prop_type( arg_param ... ).execution )
, allow_padding()
, initialize()
{}
};
template< class Space , class ... Parameters >
struct ViewAllocProp< Space , Parameters ... >
{
enum { is_exec = Kokkos::Impl::is_execution_space< Space >::value };
enum { is_mem = Kokkos::Impl::is_memory_space< Space >::value };
static_assert( is_exec || is_mem , "View allocation given unknown parameter" );
typedef ViewAllocProp< Parameters ... > base_prop_type ;
typedef typename base_prop_type::allow_padding_t allow_padding_t ;
typedef typename base_prop_type::initialize_t initialize_t ;
typedef typename std::conditional< is_mem , Space , typename base_prop_type::memory_space >::type memory_space ;
typedef typename std::conditional< is_exec , Space , typename base_prop_type::execution_space >::type execution_space ;
const std::string label ;
const memory_space memory ;
const execution_space execution ;
const allow_padding_t allow_padding ;
const initialize_t initialize ;
// Templated so that 'base_prop_type( args ... ).execution'
// is not used unless arg_space == memory_space.
template< class ... Args >
ViewAllocProp( const memory_space & arg_space , Args ... args )
: label( base_prop_type( args ... ).label )
, memory( arg_space )
, execution( base_prop_type( args ... ).execution )
, allow_padding()
, initialize()
{}
// Templated so that 'base_prop_type( args ... ).memory'
// is not used unless arg_space == execution_space.
template< class ... Args >
ViewAllocProp( const execution_space & arg_space , Args ... args )
: label( base_prop_type( args ... ).label )
, memory( base_prop_type( args ... ).memory )
, execution( arg_space )
, allow_padding()
, initialize()
{}
};
template< class ExecSpace , class MemSpace >
struct ViewAllocProp< Kokkos::Device< ExecSpace , MemSpace > , std::string >
{
typedef ViewAllocProp<> base_prop_type ;
typedef typename base_prop_type::allow_padding_t allow_padding_t ;
typedef typename base_prop_type::initialize_t initialize_t ;
typedef MemSpace memory_space ;
typedef ExecSpace execution_space ;
const std::string label ;
const memory_space memory ;
const execution_space execution ;
const allow_padding_t allow_padding ;
const initialize_t initialize ;
ViewAllocProp( const std::string & arg_label )
: label( arg_label )
, memory()
, execution()
, allow_padding()
, initialize()
{}
};
template< class ExecSpace , class MemSpace , unsigned N >
struct ViewAllocProp< Kokkos::Device< ExecSpace , MemSpace > , char[N] >
{
typedef ViewAllocProp<> base_prop_type ;
typedef typename base_prop_type::allow_padding_t allow_padding_t ;
typedef typename base_prop_type::initialize_t initialize_t ;
typedef MemSpace memory_space ;
typedef ExecSpace execution_space ;
const std::string label ;
const memory_space memory ;
const execution_space execution ;
const allow_padding_t allow_padding ;
const initialize_t initialize ;
ViewAllocProp( const char * const arg_label )
: label( arg_label )
, memory()
, execution()
, allow_padding()
, initialize()
{}
};
// Deprecate in favor of view_alloc( Kokkos::WithoutInitializing )
template< class ExecSpace , class MemSpace >
struct ViewAllocProp< Kokkos::Device< ExecSpace , MemSpace >
, Kokkos::ViewAllocateWithoutInitializing
>
{
typedef ViewAllocProp<> base_prop_type ;
typedef typename base_prop_type::allow_padding_t allow_padding_t ;
typedef std::false_type initialize_t ;
typedef MemSpace memory_space ;
typedef ExecSpace execution_space ;
const std::string label ;
const memory_space memory ;
const execution_space execution ;
const allow_padding_t allow_padding ;
const initialize_t initialize ;
ViewAllocProp( const Kokkos::ViewAllocateWithoutInitializing & arg )
: label( arg.label )
, memory()
, execution()
, allow_padding()
, initialize()
{}
};
template< class ExecSpace , class MemSpace , class ... Parameters >
struct ViewAllocProp< Kokkos::Device< ExecSpace , MemSpace >
, ViewAllocProp< Parameters ... >
>
{
typedef ViewAllocProp< Parameters ... > base_prop_type ;
typedef typename base_prop_type::allow_padding_t allow_padding_t ;
typedef typename base_prop_type::initialize_t initialize_t ;
typedef MemSpace memory_space ;
typedef
typename std::conditional
< Kokkos::Impl::is_execution_space< typename base_prop_type::execution_space >::value
, typename base_prop_type::execution_space
, ExecSpace
>::type execution_space ;
static_assert( std::is_same< typename base_prop_type::memory_space , ViewAllocProp<>::NullSpace >::value ||
std::is_same< typename base_prop_type::memory_space , memory_space >::value
, "View allocation given incompatible memory space" );
static_assert( Kokkos::Impl::VerifyExecutionCanAccessMemorySpace< typename execution_space::memory_space
, memory_space >::value
, "View allocation given incompatible execution space" );
const std::string label ;
const memory_space memory ;
const execution_space execution ;
const allow_padding_t allow_padding ;
const initialize_t initialize ;
// If the input properties have a memory or execution space then copy construct those spaces
// otherwise default construct those spaces.
template< class P >
ViewAllocProp( const P & arg_prop
, typename std::enable_if
< std::is_same< P , base_prop_type >::value &&
Kokkos::Impl::is_memory_space< typename P::memory_space >::value &&
Kokkos::Impl::is_execution_space< typename P::memory_space >::value
>::type * = 0 )
: label( arg_prop.label )
, memory( arg_prop.memory )
, execution( arg_prop.execution )
, allow_padding()
, initialize()
{}
template< class P >
ViewAllocProp( const P & arg_prop
, typename std::enable_if
< std::is_same< P , base_prop_type >::value &&
Kokkos::Impl::is_memory_space< typename P::memory_space >::value &&
! Kokkos::Impl::is_execution_space< typename P::execution_space >::value
>::type * = 0 )
: label( arg_prop.label )
, memory( arg_prop.memory )
, execution()
, allow_padding()
, initialize()
{}
template< class P >
ViewAllocProp( const P & arg_prop
, typename std::enable_if
< std::is_same< P , base_prop_type >::value &&
! Kokkos::Impl::is_memory_space< typename P::memory_space >::value &&
Kokkos::Impl::is_execution_space< typename P::execution_space >::value
>::type * = 0 )
: label( arg_prop.label )
, memory()
, execution( arg_prop.execution )
, allow_padding()
, initialize()
{}
template< class P >
ViewAllocProp( const P & arg_prop
, typename std::enable_if
< std::is_same< P , base_prop_type >::value &&
! Kokkos::Impl::is_memory_space< typename P::memory_space >::value &&
! Kokkos::Impl::is_execution_space< typename P::execution_space >::value
>::type * = 0 )
: label( arg_prop.label )
, memory()
, execution()
, allow_padding()
, initialize()
{}
};
} /* namespace Impl */
} /* namespace Experimental */
} /* namespace Kokkos */
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#endif
diff --git a/lib/kokkos/core/src/impl/KokkosExp_ViewArray.hpp b/lib/kokkos/core/src/impl/KokkosExp_ViewArray.hpp
index 6f49c57b3..432d29ab3 100644
--- a/lib/kokkos/core/src/impl/KokkosExp_ViewArray.hpp
+++ b/lib/kokkos/core/src/impl/KokkosExp_ViewArray.hpp
@@ -1,616 +1,602 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_EXPERIMENTAL_VIEW_ARRAY_MAPPING_HPP
#define KOKKOS_EXPERIMENTAL_VIEW_ARRAY_MAPPING_HPP
#include <Kokkos_Array.hpp>
namespace Kokkos {
namespace Experimental {
namespace Impl {
-template< class DataType , class V , long N , class P , class ArrayLayout >
-struct ViewDataAnalysis< DataType , Kokkos::Array<V,N,P> , ArrayLayout >
+template< class DataType , class ArrayLayout , class V , size_t N , class P >
+struct ViewDataAnalysis< DataType , ArrayLayout , Kokkos::Array<V,N,P> >
{
private:
typedef ViewArrayAnalysis<DataType> array_analysis ;
static_assert( std::is_same<P,void>::value , "" );
static_assert( std::is_same<typename array_analysis::non_const_value_type , Kokkos::Array<V,N,P> >::value , "" );
static_assert( std::is_scalar<V>::value , "View of Array type must be of a scalar type" );
public:
typedef Kokkos::Array<> specialize ;
typedef typename array_analysis::dimension dimension ;
private:
enum { is_const = std::is_same< typename array_analysis::value_type
, typename array_analysis::const_value_type
>::value };
- typedef ViewDimension< ( dimension::rank == 0 ? N : dimension::arg_N0 )
- , ( dimension::rank == 1 ? N : dimension::arg_N1 )
- , ( dimension::rank == 2 ? N : dimension::arg_N2 )
- , ( dimension::rank == 3 ? N : dimension::arg_N3 )
- , ( dimension::rank == 4 ? N : dimension::arg_N4 )
- , ( dimension::rank == 5 ? N : dimension::arg_N5 )
- , ( dimension::rank == 6 ? N : dimension::arg_N6 )
- , ( dimension::rank == 7 ? N : dimension::arg_N7 )
- > array_scalar_dimension ;
+ typedef typename dimension::template append<N>::type array_scalar_dimension ;
typedef typename std::conditional< is_const , const V , V >::type scalar_type ;
typedef V non_const_scalar_type ;
typedef const V const_scalar_type ;
public:
typedef typename array_analysis::value_type value_type ;
typedef typename array_analysis::const_value_type const_value_type ;
typedef typename array_analysis::non_const_value_type non_const_value_type ;
typedef typename ViewDataType< value_type , dimension >::type type ;
typedef typename ViewDataType< const_value_type , dimension >::type const_type ;
typedef typename ViewDataType< non_const_value_type , dimension >::type non_const_type ;
typedef typename ViewDataType< scalar_type , array_scalar_dimension >::type array_scalar_type ;
typedef typename ViewDataType< const_scalar_type , array_scalar_dimension >::type const_array_scalar_type ;
typedef typename ViewDataType< non_const_scalar_type , array_scalar_dimension >::type non_const_array_scalar_type ;
};
}}} // namespace Kokkos::Experimental::Impl
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Experimental {
namespace Impl {
/** \brief View mapping for non-specialized data type and standard layout */
template< class Traits >
-class ViewMapping< Traits , void ,
- typename std::enable_if<( std::is_same< typename Traits::specialize , Kokkos::Array<> >::value &&
- ( std::is_same< typename Traits::array_layout , Kokkos::LayoutLeft >::value ||
- std::is_same< typename Traits::array_layout , Kokkos::LayoutRight >::value ||
- std::is_same< typename Traits::array_layout , Kokkos::LayoutStride >::value )
- )>::type >
+class ViewMapping< Traits ,
+ typename std::enable_if<(
+ std::is_same< typename Traits::specialize , Kokkos::Array<> >::value &&
+ ( std::is_same< typename Traits::array_layout , Kokkos::LayoutLeft >::value ||
+ std::is_same< typename Traits::array_layout , Kokkos::LayoutRight >::value ||
+ std::is_same< typename Traits::array_layout , Kokkos::LayoutStride >::value )
+ )>::type >
{
private:
- template< class , class , typename > friend class ViewMapping ;
- template< class , bool , bool , bool , bool , bool , bool , bool , bool , class > friend struct SubviewMapping ;
- template< class , class , class , class > friend class Kokkos::Experimental::View ;
+ template< class , class ... > friend class ViewMapping ;
+ template< class , class ... > friend class Kokkos::Experimental::View ;
typedef ViewOffset< typename Traits::dimension
, typename Traits::array_layout
, void
> offset_type ;
typedef typename Traits::value_type::pointer handle_type ;
handle_type m_handle ;
offset_type m_offset ;
size_t m_stride ;
typedef typename Traits::value_type::value_type scalar_type ;
typedef Kokkos::Array< scalar_type , ~size_t(0) , Kokkos::Array<>::contiguous > contiguous_reference ;
typedef Kokkos::Array< scalar_type , ~size_t(0) , Kokkos::Array<>::strided > strided_reference ;
enum { is_contiguous_reference =
( Traits::rank == 0 ) || ( std::is_same< typename Traits::array_layout , Kokkos::LayoutRight >::value ) };
enum { Array_N = Traits::value_type::size() };
enum { Array_S = is_contiguous_reference ? Array_N : 1 };
KOKKOS_INLINE_FUNCTION
ViewMapping( const handle_type & arg_handle , const offset_type & arg_offset )
: m_handle( arg_handle )
, m_offset( arg_offset )
, m_stride( is_contiguous_reference ? 0 : arg_offset.span() )
{}
public:
//----------------------------------------
// Domain dimensions
enum { Rank = Traits::dimension::rank };
KOKKOS_INLINE_FUNCTION constexpr size_t dimension_0() const { return m_offset.dimension_0(); }
KOKKOS_INLINE_FUNCTION constexpr size_t dimension_1() const { return m_offset.dimension_1(); }
KOKKOS_INLINE_FUNCTION constexpr size_t dimension_2() const { return m_offset.dimension_2(); }
KOKKOS_INLINE_FUNCTION constexpr size_t dimension_3() const { return m_offset.dimension_3(); }
KOKKOS_INLINE_FUNCTION constexpr size_t dimension_4() const { return m_offset.dimension_4(); }
KOKKOS_INLINE_FUNCTION constexpr size_t dimension_5() const { return m_offset.dimension_5(); }
KOKKOS_INLINE_FUNCTION constexpr size_t dimension_6() const { return m_offset.dimension_6(); }
KOKKOS_INLINE_FUNCTION constexpr size_t dimension_7() const { return m_offset.dimension_7(); }
// Is a regular layout with uniform striding for each index.
using is_regular = typename offset_type::is_regular ;
KOKKOS_INLINE_FUNCTION constexpr size_t stride_0() const { return m_offset.stride_0(); }
KOKKOS_INLINE_FUNCTION constexpr size_t stride_1() const { return m_offset.stride_1(); }
KOKKOS_INLINE_FUNCTION constexpr size_t stride_2() const { return m_offset.stride_2(); }
KOKKOS_INLINE_FUNCTION constexpr size_t stride_3() const { return m_offset.stride_3(); }
KOKKOS_INLINE_FUNCTION constexpr size_t stride_4() const { return m_offset.stride_4(); }
KOKKOS_INLINE_FUNCTION constexpr size_t stride_5() const { return m_offset.stride_5(); }
KOKKOS_INLINE_FUNCTION constexpr size_t stride_6() const { return m_offset.stride_6(); }
KOKKOS_INLINE_FUNCTION constexpr size_t stride_7() const { return m_offset.stride_7(); }
//----------------------------------------
// Range span
/** \brief Span of the mapped range */
- KOKKOS_INLINE_FUNCTION constexpr size_t span() const { return m_offset.span(); }
+ KOKKOS_INLINE_FUNCTION constexpr size_t span() const
+ { return m_offset.span() * Array_N ; }
/** \brief Is the mapped range span contiguous */
- KOKKOS_INLINE_FUNCTION constexpr bool span_is_contiguous() const { return m_offset.span_is_contiguous(); }
+ KOKKOS_INLINE_FUNCTION constexpr bool span_is_contiguous() const
+ { return m_offset.span_is_contiguous(); }
typedef typename std::conditional< is_contiguous_reference , contiguous_reference , strided_reference >::type reference_type ;
+ typedef handle_type pointer_type ;
+
/** \brief If data references are lvalue_reference than can query pointer to memory */
- KOKKOS_INLINE_FUNCTION constexpr typename Traits::value_type * data() const
- { return (typename Traits::value_type *) 0 ; }
+ KOKKOS_INLINE_FUNCTION constexpr pointer_type data() const
+ { return m_handle ; }
//----------------------------------------
// The View class performs all rank and bounds checking before
// calling these element reference methods.
KOKKOS_FORCEINLINE_FUNCTION
reference_type reference() const { return reference_type( m_handle + 0 , Array_N , 0 ); }
template< typename I0 >
KOKKOS_FORCEINLINE_FUNCTION
reference_type
reference( const I0 & i0 ) const
{ return reference_type( m_handle + m_offset(i0) * Array_S , Array_N , m_stride ); }
template< typename I0 , typename I1 >
KOKKOS_FORCEINLINE_FUNCTION
reference_type reference( const I0 & i0 , const I1 & i1 ) const
{ return reference_type( m_handle + m_offset(i0,i1) * Array_S , Array_N , m_stride ); }
template< typename I0 , typename I1 , typename I2 >
KOKKOS_FORCEINLINE_FUNCTION
reference_type reference( const I0 & i0 , const I1 & i1 , const I2 & i2 ) const
{ return reference_type( m_handle + m_offset(i0,i1,i2) * Array_S , Array_N , m_stride ); }
template< typename I0 , typename I1 , typename I2 , typename I3 >
KOKKOS_FORCEINLINE_FUNCTION
reference_type reference( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3 ) const
{ return reference_type( m_handle + m_offset(i0,i1,i2,i3) * Array_S , Array_N , m_stride ); }
template< typename I0 , typename I1 , typename I2 , typename I3
, typename I4 >
KOKKOS_FORCEINLINE_FUNCTION
reference_type reference( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3
, const I4 & i4 ) const
{ return reference_type( m_handle + m_offset(i0,i1,i2,i3,i4) * Array_S , Array_N , m_stride ); }
template< typename I0 , typename I1 , typename I2 , typename I3
, typename I4 , typename I5 >
KOKKOS_FORCEINLINE_FUNCTION
reference_type reference( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3
, const I4 & i4 , const I5 & i5 ) const
{ return reference_type( m_handle + m_offset(i0,i1,i2,i3,i4,i5) * Array_S , Array_N , m_stride ); }
template< typename I0 , typename I1 , typename I2 , typename I3
, typename I4 , typename I5 , typename I6 >
KOKKOS_FORCEINLINE_FUNCTION
reference_type reference( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3
, const I4 & i4 , const I5 & i5 , const I6 & i6 ) const
{ return reference_type( m_handle + m_offset(i0,i1,i2,i3,i4,i5,i6) * Array_S , Array_N , m_stride ); }
template< typename I0 , typename I1 , typename I2 , typename I3
, typename I4 , typename I5 , typename I6 , typename I7 >
KOKKOS_FORCEINLINE_FUNCTION
reference_type reference( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3
, const I4 & i4 , const I5 & i5 , const I6 & i6 , const I7 & i7 ) const
{ return reference_type( m_handle + m_offset(i0,i1,i2,i3,i4,i5,i6,i7) * Array_S , Array_N , m_stride ); }
//----------------------------------------
private:
enum { MemorySpanMask = 8 - 1 /* Force alignment on 8 byte boundary */ };
- enum { MemorySpanSize = sizeof(typename Traits::value_type) };
+ enum { MemorySpanSize = sizeof(scalar_type) };
public:
/** \brief Span, in bytes, of the referenced memory */
KOKKOS_INLINE_FUNCTION constexpr size_t memory_span() const
{
- return ( m_stride * sizeof(typename Traits::value_type) + MemorySpanMask ) & ~size_t(MemorySpanMask);
+ return ( m_offset.span() * Array_N * MemorySpanSize + MemorySpanMask ) & ~size_t(MemorySpanMask);
}
/** \brief Span, in bytes, of the required memory */
template< bool AllowPadding >
KOKKOS_INLINE_FUNCTION
static constexpr size_t memory_span( const std::integral_constant<bool,AllowPadding> &
, const size_t N0 , const size_t N1 , const size_t N2 , const size_t N3
, const size_t N4 , const size_t N5 , const size_t N6 , const size_t N7 )
{
typedef std::integral_constant< unsigned , AllowPadding ? MemorySpanSize : 0 > padding ;
- return ( offset_type( padding(), N0, N1, N2, N3, N4, N5, N6, N7 ).span() * MemorySpanSize + MemorySpanMask ) & ~size_t(MemorySpanMask);
+ return ( offset_type( padding(), N0, N1, N2, N3, N4, N5, N6, N7 ).span() * Array_N * MemorySpanSize + MemorySpanMask ) & ~size_t(MemorySpanMask);
}
/** \brief Span, in bytes, of the required memory */
template< bool AllowPadding >
KOKKOS_INLINE_FUNCTION
static constexpr size_t memory_span( const std::integral_constant<bool,AllowPadding> &
, const typename Traits::array_layout & layout )
{
- return ( offset_type( layout ).span() * MemorySpanSize + MemorySpanMask ) & ~size_t(MemorySpanMask);
+ return ( offset_type( layout ).span() * Array_N * MemorySpanSize + MemorySpanMask ) & ~size_t(MemorySpanMask);
}
//----------------------------------------
KOKKOS_INLINE_FUNCTION ~ViewMapping() {}
KOKKOS_INLINE_FUNCTION ViewMapping() : m_handle(), m_offset(), m_stride(0) {}
KOKKOS_INLINE_FUNCTION ViewMapping( const ViewMapping & rhs )
: m_handle( rhs.m_handle ), m_offset( rhs.m_offset ), m_stride( rhs.m_stride ) {}
KOKKOS_INLINE_FUNCTION ViewMapping & operator = ( const ViewMapping & rhs )
{ m_handle = rhs.m_handle ; m_offset = rhs.m_offset ; m_stride = rhs.m_stride ; ; return *this ; }
KOKKOS_INLINE_FUNCTION ViewMapping( ViewMapping && rhs )
: m_handle( rhs.m_handle ), m_offset( rhs.m_offset ), m_stride( rhs.m_stride ) {}
KOKKOS_INLINE_FUNCTION ViewMapping & operator = ( ViewMapping && rhs )
{ m_handle = rhs.m_handle ; m_offset = rhs.m_offset ; m_stride = rhs.m_stride ; return *this ; }
template< bool AllowPadding >
KOKKOS_INLINE_FUNCTION
- ViewMapping( void * ptr
+ ViewMapping( pointer_type ptr
, const std::integral_constant<bool,AllowPadding> &
, const size_t N0 , const size_t N1 , const size_t N2 , const size_t N3
, const size_t N4 , const size_t N5 , const size_t N6 , const size_t N7 )
- : m_handle( reinterpret_cast< handle_type >( ptr ) )
+ : m_handle( ptr )
, m_offset( std::integral_constant< unsigned , AllowPadding ? sizeof(typename Traits::value_type) : 0 >()
, N0, N1, N2, N3, N4, N5, N6, N7 )
, m_stride( m_offset.span() )
{}
template< bool AllowPadding >
KOKKOS_INLINE_FUNCTION
- ViewMapping( void * ptr
+ ViewMapping( pointer_type ptr
, const std::integral_constant<bool,AllowPadding> &
, const typename Traits::array_layout & layout )
- : m_handle( reinterpret_cast< handle_type >( ptr ) )
+ : m_handle( ptr )
, m_offset( layout )
, m_stride( m_offset.span() )
{}
//----------------------------------------
// If the View is to construct or destroy the elements.
KOKKOS_FORCEINLINE_FUNCTION
void operator()( const size_t i ) const
{
reference_type ref( m_handle + i * Array_S , Array_N , m_stride );
for ( size_t j = 0 ; j < Array_N ; ++j ) ref[j] = 0 ;
}
template< class ExecSpace >
void construct( const ExecSpace & space ) const
{
typedef Kokkos::RangePolicy< ExecSpace , size_t > Policy ;
- (void) Kokkos::Impl::ParallelFor< ViewMapping , Policy >( *this , Policy( 0 , m_stride ) );
+ const Kokkos::Impl::ParallelFor< ViewMapping , Policy > closure( *this , Policy( 0 , m_stride ) );
+ closure.execute();
ExecSpace::fence();
}
template< class ExecSpace >
void destroy( const ExecSpace & ) const {}
};
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
/** \brief Assign compatible default mappings */
template< class DstTraits , class SrcTraits >
class ViewMapping< DstTraits , SrcTraits ,
typename std::enable_if<(
std::is_same< typename DstTraits::memory_space , typename SrcTraits::memory_space >::value
&&
std::is_same< typename DstTraits::specialize , Kokkos::Array<> >::value
&&
(
std::is_same< typename DstTraits::array_layout , Kokkos::LayoutLeft >::value ||
std::is_same< typename DstTraits::array_layout , Kokkos::LayoutRight >::value ||
std::is_same< typename DstTraits::array_layout , Kokkos::LayoutStride >::value
)
&&
std::is_same< typename SrcTraits::specialize , Kokkos::Array<> >::value
&&
(
std::is_same< typename SrcTraits::array_layout , Kokkos::LayoutLeft >::value ||
std::is_same< typename SrcTraits::array_layout , Kokkos::LayoutRight >::value ||
std::is_same< typename SrcTraits::array_layout , Kokkos::LayoutStride >::value
)
)>::type >
{
public:
enum { is_assignable = true };
typedef Kokkos::Experimental::Impl::SharedAllocationTracker TrackType ;
- typedef ViewMapping< DstTraits , void , void > DstType ;
- typedef ViewMapping< SrcTraits , void , void > SrcType ;
+ typedef ViewMapping< DstTraits , void > DstType ;
+ typedef ViewMapping< SrcTraits , void > SrcType ;
KOKKOS_INLINE_FUNCTION
static void assign( DstType & dst , const SrcType & src , const TrackType & src_track )
{
static_assert( std::is_same< typename DstTraits::value_type , typename SrcTraits::value_type >::value ||
std::is_same< typename DstTraits::value_type , typename SrcTraits::const_value_type >::value
, "View assignment must have same value type or const = non-const" );
static_assert( ViewDimensionAssignable< typename DstTraits::dimension , typename SrcTraits::dimension >::value
, "View assignment must have compatible dimensions" );
static_assert( std::is_same< typename DstTraits::array_layout , typename SrcTraits::array_layout >::value ||
std::is_same< typename DstTraits::array_layout , Kokkos::LayoutStride >::value ||
( DstTraits::dimension::rank == 0 ) ||
( DstTraits::dimension::rank == 1 && DstTraits::dimension::rank_dynamic == 1 )
, "View assignment must have compatible layout or have rank <= 1" );
typedef typename DstType::offset_type dst_offset_type ;
dst.m_offset = dst_offset_type( src.m_offset );
dst.m_handle = src.m_handle ;
dst.m_stride = src.m_stride ;
}
};
/** \brief Assign Array to non-Array */
template< class DstTraits , class SrcTraits >
class ViewMapping< DstTraits , SrcTraits ,
typename std::enable_if<(
std::is_same< typename DstTraits::memory_space , typename SrcTraits::memory_space >::value
&&
std::is_same< typename DstTraits::specialize , void >::value
&&
(
std::is_same< typename DstTraits::array_layout , Kokkos::LayoutLeft >::value ||
std::is_same< typename DstTraits::array_layout , Kokkos::LayoutRight >::value ||
std::is_same< typename DstTraits::array_layout , Kokkos::LayoutStride >::value
)
&&
std::is_same< typename SrcTraits::specialize , Kokkos::Array<> >::value
&&
(
std::is_same< typename SrcTraits::array_layout , Kokkos::LayoutLeft >::value ||
std::is_same< typename SrcTraits::array_layout , Kokkos::LayoutRight >::value ||
std::is_same< typename SrcTraits::array_layout , Kokkos::LayoutStride >::value
)
)>::type >
{
public:
// Can only convert to View::array_type
enum { is_assignable = std::is_same< typename DstTraits::data_type , typename SrcTraits::array_scalar_type >::value &&
std::is_same< typename DstTraits::array_layout , typename SrcTraits::array_layout >::value };
typedef Kokkos::Experimental::Impl::SharedAllocationTracker TrackType ;
- typedef ViewMapping< DstTraits , void , void > DstType ;
- typedef ViewMapping< SrcTraits , void , void > SrcType ;
+ typedef ViewMapping< DstTraits , void > DstType ;
+ typedef ViewMapping< SrcTraits , void > SrcType ;
KOKKOS_INLINE_FUNCTION
static void assign( DstType & dst , const SrcType & src , const TrackType & src_track )
{
static_assert( is_assignable , "Can only convert to array_type" );
typedef typename DstType::offset_type dst_offset_type ;
// Array dimension becomes the last dimension.
// Arguments beyond the destination rank are ignored.
if ( src.span_is_contiguous() ) { // not padded
dst.m_offset = dst_offset_type( std::integral_constant<unsigned,0>()
+ , ( 0 < SrcType::Rank ? src.dimension_0() : SrcTraits::value_type::size() )
, ( 1 < SrcType::Rank ? src.dimension_1() : SrcTraits::value_type::size() )
, ( 2 < SrcType::Rank ? src.dimension_2() : SrcTraits::value_type::size() )
, ( 3 < SrcType::Rank ? src.dimension_3() : SrcTraits::value_type::size() )
, ( 4 < SrcType::Rank ? src.dimension_4() : SrcTraits::value_type::size() )
, ( 5 < SrcType::Rank ? src.dimension_5() : SrcTraits::value_type::size() )
, ( 6 < SrcType::Rank ? src.dimension_6() : SrcTraits::value_type::size() )
, ( 7 < SrcType::Rank ? src.dimension_7() : SrcTraits::value_type::size() )
);
}
else { // is padded
typedef std::integral_constant<unsigned,sizeof(typename SrcTraits::value_type::value_type)> padded ;
dst.m_offset = dst_offset_type( padded()
, ( 0 < SrcType::Rank ? src.dimension_0() : SrcTraits::value_type::size() )
, ( 1 < SrcType::Rank ? src.dimension_1() : SrcTraits::value_type::size() )
, ( 2 < SrcType::Rank ? src.dimension_2() : SrcTraits::value_type::size() )
, ( 3 < SrcType::Rank ? src.dimension_3() : SrcTraits::value_type::size() )
, ( 4 < SrcType::Rank ? src.dimension_4() : SrcTraits::value_type::size() )
, ( 5 < SrcType::Rank ? src.dimension_5() : SrcTraits::value_type::size() )
, ( 6 < SrcType::Rank ? src.dimension_6() : SrcTraits::value_type::size() )
, ( 7 < SrcType::Rank ? src.dimension_7() : SrcTraits::value_type::size() )
);
}
dst.m_handle = src.m_handle ;
}
};
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
-/** \brief View mapping for non-specialized data type and standard layout */
-template< class Traits , bool R0 , bool R1 , bool R2 , bool R3 , bool R4 , bool R5 , bool R6 , bool R7 >
-struct SubviewMapping< Traits, R0, R1, R2, R3, R4, R5, R6, R7 ,
- typename std::enable_if<(
- std::is_same< typename Traits::specialize , Kokkos::Array<> >::value
- &&
- (
- std::is_same< typename Traits::array_layout , Kokkos::LayoutLeft >::value ||
- std::is_same< typename Traits::array_layout , Kokkos::LayoutRight >::value ||
- std::is_same< typename Traits::array_layout , Kokkos::LayoutStride >::value
- )
- )>::type >
+template< class SrcTraits , class ... Args >
+struct ViewMapping
+ < typename std::enable_if<(
+ std::is_same< typename SrcTraits::specialize , Kokkos::Array<> >::value
+ &&
+ (
+ std::is_same< typename SrcTraits::array_layout , Kokkos::LayoutLeft >::value ||
+ std::is_same< typename SrcTraits::array_layout , Kokkos::LayoutRight >::value ||
+ std::is_same< typename SrcTraits::array_layout , Kokkos::LayoutStride >::value
+ )
+ )>::type
+ , SrcTraits
+ , Args ... >
{
private:
- // Subview's rank
+ static_assert( SrcTraits::rank == sizeof...(Args) , "" );
+
+ enum : bool
+ { R0 = is_integral_extent<0,Args...>::value
+ , R1 = is_integral_extent<1,Args...>::value
+ , R2 = is_integral_extent<2,Args...>::value
+ , R3 = is_integral_extent<3,Args...>::value
+ , R4 = is_integral_extent<4,Args...>::value
+ , R5 = is_integral_extent<5,Args...>::value
+ , R6 = is_integral_extent<6,Args...>::value
+ , R7 = is_integral_extent<7,Args...>::value
+ };
+
enum { rank = unsigned(R0) + unsigned(R1) + unsigned(R2) + unsigned(R3)
+ unsigned(R4) + unsigned(R5) + unsigned(R6) + unsigned(R7) };
// Whether right-most rank is a range.
- enum { R0_rev = 0 == Traits::rank ? false : (
- 1 == Traits::rank ? R0 : (
- 2 == Traits::rank ? R1 : (
- 3 == Traits::rank ? R2 : (
- 4 == Traits::rank ? R3 : (
- 5 == Traits::rank ? R4 : (
- 6 == Traits::rank ? R5 : (
- 7 == Traits::rank ? R6 : R7 ))))))) };
+ enum { R0_rev = 0 == SrcTraits::rank ? false : (
+ 1 == SrcTraits::rank ? R0 : (
+ 2 == SrcTraits::rank ? R1 : (
+ 3 == SrcTraits::rank ? R2 : (
+ 4 == SrcTraits::rank ? R3 : (
+ 5 == SrcTraits::rank ? R4 : (
+ 6 == SrcTraits::rank ? R5 : (
+ 7 == SrcTraits::rank ? R6 : R7 ))))))) };
// Subview's layout
typedef typename std::conditional<
( /* Same array layout IF */
( rank == 0 ) /* output rank zero */
||
// OutputRank 1 or 2, InputLayout Left, Interval 0
// because single stride one or second index has a stride.
- ( rank <= 2 && R0 && std::is_same< typename Traits::array_layout , Kokkos::LayoutLeft >::value )
+ ( rank <= 2 && R0 && std::is_same< typename SrcTraits::array_layout , Kokkos::LayoutLeft >::value )
||
// OutputRank 1 or 2, InputLayout Right, Interval [InputRank-1]
// because single stride one or second index has a stride.
- ( rank <= 2 && R0_rev && std::is_same< typename Traits::array_layout , Kokkos::LayoutRight >::value )
- ), typename Traits::array_layout , Kokkos::LayoutStride
+ ( rank <= 2 && R0_rev && std::is_same< typename SrcTraits::array_layout , Kokkos::LayoutRight >::value )
+ ), typename SrcTraits::array_layout , Kokkos::LayoutStride
>::type array_layout ;
- typedef typename Traits::value_type value_type ;
+ typedef typename SrcTraits::value_type value_type ;
typedef typename std::conditional< rank == 0 , value_type ,
typename std::conditional< rank == 1 , value_type * ,
typename std::conditional< rank == 2 , value_type ** ,
typename std::conditional< rank == 3 , value_type *** ,
typename std::conditional< rank == 4 , value_type **** ,
typename std::conditional< rank == 5 , value_type ***** ,
typename std::conditional< rank == 6 , value_type ****** ,
typename std::conditional< rank == 7 , value_type ******* ,
value_type ********
>::type >::type >::type >::type >::type >::type >::type >::type
data_type ;
public:
- typedef
- Kokkos::Experimental::ViewTraits< data_type , array_layout
- , typename Traits::device_type
- , typename Traits::memory_traits > traits_type ;
+ typedef Kokkos::Experimental::ViewTraits
+ < data_type
+ , array_layout
+ , typename SrcTraits::device_type
+ , typename SrcTraits::memory_traits > traits_type ;
- typedef Kokkos::Experimental::View< data_type
- , array_layout
- , typename Traits::device_type
- , typename Traits::memory_traits > type ;
+ typedef Kokkos::Experimental::View
+ < data_type
+ , array_layout
+ , typename SrcTraits::device_type
+ , typename SrcTraits::memory_traits > type ;
- template< class T0 , class T1 , class T2 , class T3
- , class T4 , class T5 , class T6 , class T7 >
KOKKOS_INLINE_FUNCTION
- static void assign( ViewMapping< traits_type , void , void > & dst
- , ViewMapping< Traits , void , void > const & src
- , T0 const & arg0
- , T1 const & arg1
- , T2 const & arg2
- , T3 const & arg3
- , T4 const & arg4
- , T5 const & arg5
- , T6 const & arg6
- , T7 const & arg7
- )
+ static void assign( ViewMapping< traits_type , void > & dst
+ , ViewMapping< SrcTraits , void > const & src
+ , Args ... args )
{
- typedef ViewMapping< traits_type , void , void > DstType ;
+ typedef ViewMapping< traits_type , void > DstType ;
typedef typename DstType::offset_type dst_offset_type ;
typedef typename DstType::handle_type dst_handle_type ;
- typedef Kokkos::Experimental::Impl::ViewOffsetRange<T0> V0 ;
- typedef Kokkos::Experimental::Impl::ViewOffsetRange<T1> V1 ;
- typedef Kokkos::Experimental::Impl::ViewOffsetRange<T2> V2 ;
- typedef Kokkos::Experimental::Impl::ViewOffsetRange<T3> V3 ;
- typedef Kokkos::Experimental::Impl::ViewOffsetRange<T4> V4 ;
- typedef Kokkos::Experimental::Impl::ViewOffsetRange<T5> V5 ;
- typedef Kokkos::Experimental::Impl::ViewOffsetRange<T6> V6 ;
- typedef Kokkos::Experimental::Impl::ViewOffsetRange<T7> V7 ;
-
- dst.m_offset = dst_offset_type
- ( src.m_offset
- , V0::dimension( src.m_offset.dimension_0() , arg0 )
- , V1::dimension( src.m_offset.dimension_1() , arg1 )
- , V2::dimension( src.m_offset.dimension_2() , arg2 )
- , V3::dimension( src.m_offset.dimension_3() , arg3 )
- , V4::dimension( src.m_offset.dimension_4() , arg4 )
- , V5::dimension( src.m_offset.dimension_5() , arg5 )
- , V6::dimension( src.m_offset.dimension_6() , arg6 )
- , V7::dimension( src.m_offset.dimension_7() , arg7 )
- );
+ const SubviewExtents< SrcTraits::rank , rank >
+ extents( src.m_offset.m_dim , args... );
+ dst.m_offset = dst_offset_type( src.m_offset , extents );
dst.m_handle = dst_handle_type( src.m_handle +
- src.m_offset( V0::begin( arg0 )
- , V1::begin( arg1 )
- , V2::begin( arg2 )
- , V3::begin( arg3 )
- , V4::begin( arg4 )
- , V5::begin( arg5 )
- , V6::begin( arg6 )
- , V7::begin( arg7 )
+ src.m_offset( extents.domain_offset(0)
+ , extents.domain_offset(1)
+ , extents.domain_offset(2)
+ , extents.domain_offset(3)
+ , extents.domain_offset(4)
+ , extents.domain_offset(5)
+ , extents.domain_offset(6)
+ , extents.domain_offset(7)
) );
}
};
}}} // namespace Kokkos::Experimental::Impl
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#endif /* #ifndef KOKKOS_EXPERIMENTAL_VIEW_ARRAY_MAPPING_HPP */
diff --git a/lib/kokkos/core/src/impl/KokkosExp_ViewMapping.hpp b/lib/kokkos/core/src/impl/KokkosExp_ViewMapping.hpp
index 5fa1bb715..5ec003222 100644
--- a/lib/kokkos/core/src/impl/KokkosExp_ViewMapping.hpp
+++ b/lib/kokkos/core/src/impl/KokkosExp_ViewMapping.hpp
@@ -1,2830 +1,2723 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_EXPERIMENTAL_VIEW_MAPPING_HPP
#define KOKKOS_EXPERIMENTAL_VIEW_MAPPING_HPP
#include <type_traits>
#include <initializer_list>
#include <Kokkos_Pair.hpp>
#include <Kokkos_Layout.hpp>
+#include <impl/Kokkos_Error.hpp>
#include <impl/Kokkos_Traits.hpp>
#include <impl/Kokkos_Atomic_View.hpp>
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
template< class FunctorType , class ExecPolicy > class ParallelFor ;
}} /* namespace Kokkos::Impl */
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Experimental {
namespace Impl {
-template< long sN0 = -1
- , long sN1 = -1
- , long sN2 = -1
- , long sN3 = -1
- , long sN4 = -1
- , long sN5 = -1
- , long sN6 = -1
- , long sN7 = -1
- >
-struct ViewDimension {
-
- enum { arg_N0 = sN0 };
- enum { arg_N1 = sN1 };
- enum { arg_N2 = sN2 };
- enum { arg_N3 = sN3 };
- enum { arg_N4 = sN4 };
- enum { arg_N5 = sN5 };
- enum { arg_N6 = sN6 };
- enum { arg_N7 = sN7 };
-
- enum { rank = ( sN0 < 0 ? 0 :
- ( sN1 < 0 ? 1 :
- ( sN2 < 0 ? 2 :
- ( sN3 < 0 ? 3 :
- ( sN4 < 0 ? 4 :
- ( sN5 < 0 ? 5 :
- ( sN6 < 0 ? 6 :
- ( sN7 < 0 ? 7 : 8 )))))))) };
- enum { rank_dynamic = 0 };
-
- enum { N0 = 0 < sN0 ? sN0 : 1 };
- enum { N1 = 0 < sN1 ? sN1 : 1 };
- enum { N2 = 0 < sN2 ? sN2 : 1 };
- enum { N3 = 0 < sN3 ? sN3 : 1 };
- enum { N4 = 0 < sN4 ? sN4 : 1 };
- enum { N5 = 0 < sN5 ? sN5 : 1 };
- enum { N6 = 0 < sN6 ? sN6 : 1 };
- enum { N7 = 0 < sN7 ? sN7 : 1 };
+template< unsigned I , size_t ... Args >
+struct variadic_size_t
+ { enum { value = ~size_t(0) }; };
- ViewDimension() = default ;
- ViewDimension( const ViewDimension & ) = default ;
- ViewDimension & operator = ( const ViewDimension & ) = default ;
+template< size_t Val , size_t ... Args >
+struct variadic_size_t< 0 , Val , Args ... >
+ { enum { value = Val }; };
- KOKKOS_INLINE_FUNCTION
- constexpr ViewDimension( size_t , unsigned , unsigned , unsigned
- , unsigned , unsigned , unsigned , unsigned ) {}
-};
+template< unsigned I , size_t Val , size_t ... Args >
+struct variadic_size_t< I , Val , Args ... >
+ { enum { value = variadic_size_t< I - 1 , Args ... >::value }; };
-template< long sN1
- , long sN2
- , long sN3
- , long sN4
- , long sN5
- , long sN6
- , long sN7
- >
-struct ViewDimension< 0, sN1, sN2, sN3, sN4, sN5, sN6, sN7 > {
-
- enum { arg_N0 = 0 };
- enum { arg_N1 = sN1 };
- enum { arg_N2 = sN2 };
- enum { arg_N3 = sN3 };
- enum { arg_N4 = sN4 };
- enum { arg_N5 = sN5 };
- enum { arg_N6 = sN6 };
- enum { arg_N7 = sN7 };
-
- enum { rank = ( sN1 < 0 ? 1 :
- ( sN2 < 0 ? 2 :
- ( sN3 < 0 ? 3 :
- ( sN4 < 0 ? 4 :
- ( sN5 < 0 ? 5 :
- ( sN6 < 0 ? 6 :
- ( sN7 < 0 ? 7 : 8 ))))))) };
- enum { rank_dynamic = 1 };
-
- size_t N0 ; /* When 1 == rank_dynamic allow N0 >= 2^32 */
- enum { N1 = 0 < sN1 ? sN1 : 1 };
- enum { N2 = 0 < sN2 ? sN2 : 1 };
- enum { N3 = 0 < sN3 ? sN3 : 1 };
- enum { N4 = 0 < sN4 ? sN4 : 1 };
- enum { N5 = 0 < sN5 ? sN5 : 1 };
- enum { N6 = 0 < sN6 ? sN6 : 1 };
- enum { N7 = 0 < sN7 ? sN7 : 1 };
+template< size_t ... Args >
+struct rank_dynamic ;
- ViewDimension() = default ;
- ViewDimension( const ViewDimension & ) = default ;
- ViewDimension & operator = ( const ViewDimension & ) = default ;
+template<>
+struct rank_dynamic<> { enum { value = 0 }; };
- KOKKOS_INLINE_FUNCTION
- constexpr ViewDimension( size_t aN0 , unsigned , unsigned , unsigned
- , unsigned , unsigned , unsigned , unsigned )
- : N0( aN0 ) {}
+template< size_t Val , size_t ... Args >
+struct rank_dynamic< Val , Args... >
+{
+ enum { value = ( Val == 0 ? 1 : 0 ) + rank_dynamic< Args... >::value };
};
-template< long sN2
- , long sN3
- , long sN4
- , long sN5
- , long sN6
- , long sN7
- >
-struct ViewDimension< 0, 0, sN2, sN3, sN4, sN5, sN6, sN7 > {
-
- enum { arg_N0 = 0 };
- enum { arg_N1 = 0 };
- enum { arg_N2 = sN2 };
- enum { arg_N3 = sN3 };
- enum { arg_N4 = sN4 };
- enum { arg_N5 = sN5 };
- enum { arg_N6 = sN6 };
- enum { arg_N7 = sN7 };
-
- enum { rank = ( sN2 < 0 ? 2 :
- ( sN3 < 0 ? 3 :
- ( sN4 < 0 ? 4 :
- ( sN5 < 0 ? 5 :
- ( sN6 < 0 ? 6 :
- ( sN7 < 0 ? 7 : 8 )))))) };
- enum { rank_dynamic = 2 };
-
- size_t N0 ; /* When 2 == rank_dynamic allow N0 >= 2^32 */
- size_t N1 ; /* When 2 == rank_dynamic allow N1 >= 2^32 */
- enum { N2 = 0 < sN2 ? sN2 : 1 };
- enum { N3 = 0 < sN3 ? sN3 : 1 };
- enum { N4 = 0 < sN4 ? sN4 : 1 };
- enum { N5 = 0 < sN5 ? sN5 : 1 };
- enum { N6 = 0 < sN6 ? sN6 : 1 };
- enum { N7 = 0 < sN7 ? sN7 : 1 };
+#define KOKKOS_IMPL_VIEW_DIMENSION( R ) \
+ template< size_t V , unsigned > struct ViewDimension ## R \
+ { \
+ enum { ArgN ## R = ( V != ~size_t(0) ? V : 1 ) }; \
+ enum { N ## R = ( V != ~size_t(0) ? V : 1 ) }; \
+ KOKKOS_INLINE_FUNCTION explicit ViewDimension ## R ( size_t ) {} \
+ ViewDimension ## R () = default ; \
+ ViewDimension ## R ( const ViewDimension ## R & ) = default ; \
+ ViewDimension ## R & operator = ( const ViewDimension ## R & ) = default ; \
+ }; \
+ template< unsigned RD > struct ViewDimension ## R < 0 , RD > \
+ { \
+ enum { ArgN ## R = 0 }; \
+ typename std::conditional<( RD < 3 ), size_t , unsigned >::type N ## R ; \
+ ViewDimension ## R () = default ; \
+ ViewDimension ## R ( const ViewDimension ## R & ) = default ; \
+ ViewDimension ## R & operator = ( const ViewDimension ## R & ) = default ; \
+ KOKKOS_INLINE_FUNCTION explicit ViewDimension ## R ( size_t V ) : N ## R ( V ) {} \
+ };
+
+KOKKOS_IMPL_VIEW_DIMENSION( 0 )
+KOKKOS_IMPL_VIEW_DIMENSION( 1 )
+KOKKOS_IMPL_VIEW_DIMENSION( 2 )
+KOKKOS_IMPL_VIEW_DIMENSION( 3 )
+KOKKOS_IMPL_VIEW_DIMENSION( 4 )
+KOKKOS_IMPL_VIEW_DIMENSION( 5 )
+KOKKOS_IMPL_VIEW_DIMENSION( 6 )
+KOKKOS_IMPL_VIEW_DIMENSION( 7 )
+
+#undef KOKKOS_IMPL_VIEW_DIMENSION
+
+template< size_t ... Vals >
+struct ViewDimension
+ : public ViewDimension0< variadic_size_t<0,Vals...>::value
+ , rank_dynamic< Vals... >::value >
+ , public ViewDimension1< variadic_size_t<1,Vals...>::value
+ , rank_dynamic< Vals... >::value >
+ , public ViewDimension2< variadic_size_t<2,Vals...>::value
+ , rank_dynamic< Vals... >::value >
+ , public ViewDimension3< variadic_size_t<3,Vals...>::value
+ , rank_dynamic< Vals... >::value >
+ , public ViewDimension4< variadic_size_t<4,Vals...>::value
+ , rank_dynamic< Vals... >::value >
+ , public ViewDimension5< variadic_size_t<5,Vals...>::value
+ , rank_dynamic< Vals... >::value >
+ , public ViewDimension6< variadic_size_t<6,Vals...>::value
+ , rank_dynamic< Vals... >::value >
+ , public ViewDimension7< variadic_size_t<7,Vals...>::value
+ , rank_dynamic< Vals... >::value >
+{
+ typedef ViewDimension0< variadic_size_t<0,Vals...>::value
+ , rank_dynamic< Vals... >::value > D0 ;
+ typedef ViewDimension1< variadic_size_t<1,Vals...>::value
+ , rank_dynamic< Vals... >::value > D1 ;
+ typedef ViewDimension2< variadic_size_t<2,Vals...>::value
+ , rank_dynamic< Vals... >::value > D2 ;
+ typedef ViewDimension3< variadic_size_t<3,Vals...>::value
+ , rank_dynamic< Vals... >::value > D3 ;
+ typedef ViewDimension4< variadic_size_t<4,Vals...>::value
+ , rank_dynamic< Vals... >::value > D4 ;
+ typedef ViewDimension5< variadic_size_t<5,Vals...>::value
+ , rank_dynamic< Vals... >::value > D5 ;
+ typedef ViewDimension6< variadic_size_t<6,Vals...>::value
+ , rank_dynamic< Vals... >::value > D6 ;
+ typedef ViewDimension7< variadic_size_t<7,Vals...>::value
+ , rank_dynamic< Vals... >::value > D7 ;
+
+ using D0::ArgN0 ;
+ using D1::ArgN1 ;
+ using D2::ArgN2 ;
+ using D3::ArgN3 ;
+ using D4::ArgN4 ;
+ using D5::ArgN5 ;
+ using D6::ArgN6 ;
+ using D7::ArgN7 ;
+
+ using D0::N0 ;
+ using D1::N1 ;
+ using D2::N2 ;
+ using D3::N3 ;
+ using D4::N4 ;
+ using D5::N5 ;
+ using D6::N6 ;
+ using D7::N7 ;
+
+ enum { rank = sizeof...(Vals) };
+ enum { rank_dynamic = Impl::rank_dynamic< Vals... >::value };
ViewDimension() = default ;
ViewDimension( const ViewDimension & ) = default ;
ViewDimension & operator = ( const ViewDimension & ) = default ;
KOKKOS_INLINE_FUNCTION
- constexpr ViewDimension( size_t aN0 , unsigned aN1 , unsigned , unsigned
- , unsigned , unsigned , unsigned , unsigned )
- : N0( aN0 ) , N1( aN1 ) {}
-};
+ constexpr
+ ViewDimension( size_t n0 , size_t n1 , size_t n2 , size_t n3
+ , size_t n4 , size_t n5 , size_t n6 , size_t n7 )
+ : D0( n0 )
+ , D1( n1 )
+ , D2( n2 )
+ , D3( n3 )
+ , D4( n4 )
+ , D5( n5 )
+ , D6( n6 )
+ , D7( n7 )
+ {}
-template< long sN3
- , long sN4
- , long sN5
- , long sN6
- , long sN7
- >
-struct ViewDimension< 0, 0, 0, sN3, sN4, sN5, sN6, sN7 > {
-
- enum { arg_N0 = 0 };
- enum { arg_N1 = 0 };
- enum { arg_N2 = 0 };
- enum { arg_N3 = sN3 };
- enum { arg_N4 = sN4 };
- enum { arg_N5 = sN5 };
- enum { arg_N6 = sN6 };
- enum { arg_N7 = sN7 };
-
- enum { rank = ( sN3 < 0 ? 3 :
- ( sN4 < 0 ? 4 :
- ( sN5 < 0 ? 5 :
- ( sN6 < 0 ? 6 :
- ( sN7 < 0 ? 7 : 8 ))))) };
- enum { rank_dynamic = 3 };
-
- unsigned N0 ;
- unsigned N1 ;
- unsigned N2 ;
- enum { N3 = 0 < sN3 ? sN3 : 1 };
- enum { N4 = 0 < sN4 ? sN4 : 1 };
- enum { N5 = 0 < sN5 ? sN5 : 1 };
- enum { N6 = 0 < sN6 ? sN6 : 1 };
- enum { N7 = 0 < sN7 ? sN7 : 1 };
+ KOKKOS_INLINE_FUNCTION
+ constexpr size_t extent( const unsigned r ) const
+ {
+ return r == 0 ? N0 : (
+ r == 1 ? N1 : (
+ r == 2 ? N2 : (
+ r == 3 ? N3 : (
+ r == 4 ? N4 : (
+ r == 5 ? N5 : (
+ r == 6 ? N6 : (
+ r == 7 ? N7 : 0 )))))));
+ }
- ViewDimension() = default ;
- ViewDimension( const ViewDimension & ) = default ;
- ViewDimension & operator = ( const ViewDimension & ) = default ;
+ template< size_t N >
+ struct prepend { typedef ViewDimension< N , Vals... > type ; };
- KOKKOS_INLINE_FUNCTION
- constexpr ViewDimension( size_t aN0 , unsigned aN1 , unsigned aN2 , unsigned
- , unsigned , unsigned , unsigned , unsigned )
- : N0( aN0 ) , N1( aN1 ) , N2( aN2 ) {}
+ template< size_t N >
+ struct append { typedef ViewDimension< Vals... , N > type ; };
};
-template< long sN4
- , long sN5
- , long sN6
- , long sN7
- >
-struct ViewDimension< 0, 0, 0, 0, sN4, sN5, sN6, sN7 > {
-
- enum { arg_N0 = 0 };
- enum { arg_N1 = 0 };
- enum { arg_N2 = 0 };
- enum { arg_N3 = 0 };
- enum { arg_N4 = sN4 };
- enum { arg_N5 = sN5 };
- enum { arg_N6 = sN6 };
- enum { arg_N7 = sN7 };
-
- enum { rank = ( sN4 < 0 ? 4 :
- ( sN5 < 0 ? 5 :
- ( sN6 < 0 ? 6 :
- ( sN7 < 0 ? 7 : 8 )))) };
- enum { rank_dynamic = 4 };
-
- unsigned N0 ;
- unsigned N1 ;
- unsigned N2 ;
- unsigned N3 ;
- enum { N4 = 0 < sN4 ? sN4 : 1 };
- enum { N5 = 0 < sN5 ? sN5 : 1 };
- enum { N6 = 0 < sN6 ? sN6 : 1 };
- enum { N7 = 0 < sN7 ? sN7 : 1 };
+template< class A , class B >
+struct ViewDimensionJoin ;
- ViewDimension() = default ;
- ViewDimension( const ViewDimension & ) = default ;
- ViewDimension & operator = ( const ViewDimension & ) = default ;
-
- KOKKOS_INLINE_FUNCTION
- constexpr ViewDimension( size_t aN0 , unsigned aN1 , unsigned aN2 , unsigned aN3
- , unsigned , unsigned , unsigned , unsigned )
- : N0( aN0 ) , N1( aN1 ) , N2( aN2 ) , N3( aN3 ) {}
+template< size_t ... A , size_t ... B >
+struct ViewDimensionJoin< ViewDimension< A... > , ViewDimension< B... > > {
+ typedef ViewDimension< A... , B... > type ;
};
-template< long sN5
- , long sN6
- , long sN7
- >
-struct ViewDimension< 0, 0, 0, 0, 0, sN5, sN6, sN7 > {
-
- enum { arg_N0 = 0 };
- enum { arg_N1 = 0 };
- enum { arg_N2 = 0 };
- enum { arg_N3 = 0 };
- enum { arg_N4 = 0 };
- enum { arg_N5 = sN5 };
- enum { arg_N6 = sN6 };
- enum { arg_N7 = sN7 };
-
- enum { rank = ( sN5 < 0 ? 5 :
- ( sN6 < 0 ? 6 :
- ( sN7 < 0 ? 7 : 8 ))) };
- enum { rank_dynamic = 5 };
-
- unsigned N0 ;
- unsigned N1 ;
- unsigned N2 ;
- unsigned N3 ;
- unsigned N4 ;
- enum { N5 = 0 < sN5 ? sN5 : 1 };
- enum { N6 = 0 < sN6 ? sN6 : 1 };
- enum { N7 = 0 < sN7 ? sN7 : 1 };
+//----------------------------------------------------------------------------
- ViewDimension() = default ;
- ViewDimension( const ViewDimension & ) = default ;
- ViewDimension & operator = ( const ViewDimension & ) = default ;
+template< class DstDim , class SrcDim >
+struct ViewDimensionAssignable ;
- KOKKOS_INLINE_FUNCTION
- constexpr ViewDimension( size_t aN0 , unsigned aN1 , unsigned aN2 , unsigned aN3
- , unsigned aN4 , unsigned , unsigned , unsigned )
- : N0( aN0 ) , N1( aN1 ) , N2( aN2 ) , N3( aN3 ) , N4( aN4 ) {}
+template< size_t ... DstArgs , size_t ... SrcArgs >
+struct ViewDimensionAssignable< ViewDimension< DstArgs ... >
+ , ViewDimension< SrcArgs ... > >
+{
+ typedef ViewDimension< DstArgs... > dst ;
+ typedef ViewDimension< SrcArgs... > src ;
+
+ enum { value =
+ dst::rank == src::rank &&
+ dst::rank_dynamic >= src::rank_dynamic &&
+ ( 0 < dst::rank_dynamic || size_t(dst::ArgN0) == size_t(src::ArgN0) ) &&
+ ( 1 < dst::rank_dynamic || size_t(dst::ArgN1) == size_t(src::ArgN1) ) &&
+ ( 2 < dst::rank_dynamic || size_t(dst::ArgN2) == size_t(src::ArgN2) ) &&
+ ( 3 < dst::rank_dynamic || size_t(dst::ArgN3) == size_t(src::ArgN3) ) &&
+ ( 4 < dst::rank_dynamic || size_t(dst::ArgN4) == size_t(src::ArgN4) ) &&
+ ( 5 < dst::rank_dynamic || size_t(dst::ArgN5) == size_t(src::ArgN5) ) &&
+ ( 6 < dst::rank_dynamic || size_t(dst::ArgN6) == size_t(src::ArgN6) ) &&
+ ( 7 < dst::rank_dynamic || size_t(dst::ArgN7) == size_t(src::ArgN7) ) };
};
-template< long sN6
- , long sN7
- >
-struct ViewDimension< 0, 0, 0, 0, 0, 0, sN6, sN7 > {
-
- enum { arg_N0 = 0 };
- enum { arg_N1 = 0 };
- enum { arg_N2 = 0 };
- enum { arg_N3 = 0 };
- enum { arg_N4 = 0 };
- enum { arg_N5 = 0 };
- enum { arg_N6 = sN6 };
- enum { arg_N7 = sN7 };
-
- enum { rank = ( sN6 < 0 ? 6 :
- ( sN7 < 0 ? 7 : 8 )) };
- enum { rank_dynamic = 6 };
-
- unsigned N0 ;
- unsigned N1 ;
- unsigned N2 ;
- unsigned N3 ;
- unsigned N4 ;
- unsigned N5 ;
- enum { N6 = 0 < sN6 ? sN6 : 1 };
- enum { N7 = 0 < sN7 ? sN7 : 1 };
+}}} // namespace Kokkos::Experimental::Impl
- ViewDimension() = default ;
- ViewDimension( const ViewDimension & ) = default ;
- ViewDimension & operator = ( const ViewDimension & ) = default ;
+//----------------------------------------------------------------------------
+//----------------------------------------------------------------------------
+namespace Kokkos {
+namespace Experimental {
+namespace Impl {
+
+struct ALL_t {
KOKKOS_INLINE_FUNCTION
- constexpr ViewDimension( size_t aN0 , unsigned aN1 , unsigned aN2 , unsigned aN3
- , unsigned aN4 , unsigned aN5 , unsigned , unsigned )
- : N0( aN0 ) , N1( aN1 ) , N2( aN2 ) , N3( aN3 ) , N4( aN4 ) , N5( aN5 ) {}
+ constexpr const ALL_t & operator()() const { return *this ; }
};
-template< long sN7 >
-struct ViewDimension< 0, 0, 0, 0, 0, 0, 0, sN7 > {
-
- enum { arg_N0 = 0 };
- enum { arg_N1 = 0 };
- enum { arg_N2 = 0 };
- enum { arg_N3 = 0 };
- enum { arg_N4 = 0 };
- enum { arg_N5 = 0 };
- enum { arg_N6 = 0 };
- enum { arg_N7 = sN7 };
-
- enum { rank = ( sN7 < 0 ? 7 : 8 ) };
- enum { rank_dynamic = 7 };
-
- unsigned N0 ;
- unsigned N1 ;
- unsigned N2 ;
- unsigned N3 ;
- unsigned N4 ;
- unsigned N5 ;
- unsigned N6 ;
- enum { N7 = 0 < sN7 ? sN7 : 1 };
+template< class T >
+struct is_integral_extent_type
+{ enum { value = std::is_same<T,Kokkos::Experimental::Impl::ALL_t>::value ? 1 : 0 }; };
- ViewDimension() = default ;
- ViewDimension( const ViewDimension & ) = default ;
- ViewDimension & operator = ( const ViewDimension & ) = default ;
+template< class iType >
+struct is_integral_extent_type< std::pair<iType,iType> >
+{ enum { value = std::is_integral<iType>::value ? 1 : 0 }; };
- KOKKOS_INLINE_FUNCTION
- constexpr ViewDimension( size_t aN0 , unsigned aN1 , unsigned aN2 , unsigned aN3
- , unsigned aN4 , unsigned aN5 , unsigned aN6 , unsigned )
- : N0( aN0 ) , N1( aN1 ) , N2( aN2 ) , N3( aN3 ) , N4( aN4 ) , N5( aN5 ) , N6( aN6 ) {}
+template< class iType >
+struct is_integral_extent_type< Kokkos::pair<iType,iType> >
+{ enum { value = std::is_integral<iType>::value ? 1 : 0 }; };
+
+// Assuming '2 == initializer_list<iType>::size()'
+template< class iType >
+struct is_integral_extent_type< std::initializer_list<iType> >
+{ enum { value = std::is_integral<iType>::value ? 1 : 0 }; };
+
+template < unsigned I , class ... Args >
+struct is_integral_extent
+{
+ // variadic_type is void when sizeof...(Args) <= I
+ typedef typename std::remove_cv<
+ typename std::remove_reference<
+ typename Kokkos::Impl::variadic_type<I,Args...
+ >::type >::type >::type type ;
+
+ enum { value = is_integral_extent_type<type>::value };
+
+ static_assert( value ||
+ std::is_integral<type>::value ||
+ std::is_same<type,void>::value
+ , "subview argument must be either integral or integral extent" );
};
-template<>
-struct ViewDimension< 0, 0, 0, 0, 0, 0, 0, 0 > {
-
- enum { arg_N0 = 0 };
- enum { arg_N1 = 0 };
- enum { arg_N2 = 0 };
- enum { arg_N3 = 0 };
- enum { arg_N4 = 0 };
- enum { arg_N5 = 0 };
- enum { arg_N6 = 0 };
- enum { arg_N7 = 0 };
-
- enum { rank = 8 };
- enum { rank_dynamic = 8 };
-
- unsigned N0 ;
- unsigned N1 ;
- unsigned N2 ;
- unsigned N3 ;
- unsigned N4 ;
- unsigned N5 ;
- unsigned N6 ;
- unsigned N7 ;
+template< unsigned DomainRank , unsigned RangeRank >
+struct SubviewExtents {
+private:
- ViewDimension() = default ;
- ViewDimension( const ViewDimension & ) = default ;
- ViewDimension & operator = ( const ViewDimension & ) = default ;
+ // Cannot declare zero-length arrays
+ enum { InternalRangeRank = RangeRank ? RangeRank : 1u };
+
+ size_t m_begin[ DomainRank ];
+ size_t m_length[ InternalRangeRank ];
+ unsigned m_index[ InternalRangeRank ];
+
+ template< size_t ... DimArgs >
+ KOKKOS_FORCEINLINE_FUNCTION
+ bool set( unsigned domain_rank
+ , unsigned range_rank
+ , const ViewDimension< DimArgs ... > & dim )
+ { return true ; }
+
+ template< class T , size_t ... DimArgs , class ... Args >
+ KOKKOS_FORCEINLINE_FUNCTION
+ bool set( unsigned domain_rank
+ , unsigned range_rank
+ , const ViewDimension< DimArgs ... > & dim
+ , const T & val
+ , Args ... args )
+ {
+ const size_t v = static_cast<size_t>(val);
+
+ m_begin[ domain_rank ] = v ;
+
+ return set( domain_rank + 1 , range_rank , dim , args... )
+#if defined( KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK )
+ && ( v < dim.extent( domain_rank ) )
+#endif
+ ;
+ }
+
+ // std::pair range
+ template< size_t ... DimArgs , class ... Args >
+ KOKKOS_FORCEINLINE_FUNCTION
+ bool set( unsigned domain_rank
+ , unsigned range_rank
+ , const ViewDimension< DimArgs ... > & dim
+ , const Kokkos::Experimental::Impl::ALL_t
+ , Args ... args )
+ {
+ m_begin[ domain_rank ] = 0 ;
+ m_length[ range_rank ] = dim.extent( domain_rank );
+ m_index[ range_rank ] = domain_rank ;
+
+ return set( domain_rank + 1 , range_rank + 1 , dim , args... );
+ }
+
+ // std::pair range
+ template< class T , size_t ... DimArgs , class ... Args >
+ KOKKOS_FORCEINLINE_FUNCTION
+ bool set( unsigned domain_rank
+ , unsigned range_rank
+ , const ViewDimension< DimArgs ... > & dim
+ , const std::pair<T,T> & val
+ , Args ... args )
+ {
+ const size_t b = static_cast<size_t>( val.first );
+ const size_t e = static_cast<size_t>( val.second );
+
+ m_begin[ domain_rank ] = b ;
+ m_length[ range_rank ] = e - b ;
+ m_index[ range_rank ] = domain_rank ;
+
+ return set( domain_rank + 1 , range_rank + 1 , dim , args... )
+#if defined( KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK )
+ && ( e <= b + dim.extent( domain_rank ) )
+#endif
+ ;
+ }
+
+ // Kokkos::pair range
+ template< class T , size_t ... DimArgs , class ... Args >
+ KOKKOS_FORCEINLINE_FUNCTION
+ bool set( unsigned domain_rank
+ , unsigned range_rank
+ , const ViewDimension< DimArgs ... > & dim
+ , const Kokkos::pair<T,T> & val
+ , Args ... args )
+ {
+ const size_t b = static_cast<size_t>( val.first );
+ const size_t e = static_cast<size_t>( val.second );
+
+ m_begin[ domain_rank ] = b ;
+ m_length[ range_rank ] = e - b ;
+ m_index[ range_rank ] = domain_rank ;
+
+ return set( domain_rank + 1 , range_rank + 1 , dim , args... )
+#if defined( KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK )
+ && ( e <= b + dim.extent( domain_rank ) )
+#endif
+ ;
+ }
+
+ // { begin , end } range
+ template< class T , size_t ... DimArgs , class ... Args >
+ KOKKOS_FORCEINLINE_FUNCTION
+ bool set( unsigned domain_rank
+ , unsigned range_rank
+ , const ViewDimension< DimArgs ... > & dim
+ , const std::initializer_list< T > & val
+ , Args ... args )
+ {
+ const size_t b = static_cast<size_t>( val.begin()[0] );
+ const size_t e = static_cast<size_t>( val.begin()[1] );
+
+ m_begin[ domain_rank ] = b ;
+ m_length[ range_rank ] = e - b ;
+ m_index[ range_rank ] = domain_rank ;
+
+ return set( domain_rank + 1 , range_rank + 1 , dim , args... )
+#if defined( KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK )
+ && ( val.size() == 2 )
+ && ( e <= b + dim.extent( domain_rank ) )
+#endif
+ ;
+ }
+
+ //------------------------------
+
+#if defined( KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK )
+
+ template< size_t ... DimArgs >
+ void error( char *
+ , int
+ , unsigned
+ , unsigned
+ , const ViewDimension< DimArgs ... > & ) const
+ {}
+
+ template< class T , size_t ... DimArgs , class ... Args >
+ void error( char * buf , int buf_len
+ , unsigned domain_rank
+ , unsigned range_rank
+ , const ViewDimension< DimArgs ... > & dim
+ , const T & val
+ , Args ... args ) const
+ {
+ const int n = std::min( buf_len ,
+ snprintf( buf , buf_len
+ , " %lu < %lu %c"
+ , static_cast<unsigned long>(val)
+ , static_cast<unsigned long>( dim.extent( domain_rank ) )
+ , int( sizeof...(Args) ? ',' : ')' ) ) );
+
+ error( buf+n, buf_len-n, domain_rank + 1 , range_rank , dim , args... );
+ }
+
+ // std::pair range
+ template< size_t ... DimArgs , class ... Args >
+ void error( char * buf , int buf_len
+ , unsigned domain_rank
+ , unsigned range_rank
+ , const ViewDimension< DimArgs ... > & dim
+ , const Kokkos::Experimental::Impl::ALL_t
+ , Args ... args ) const
+ {
+ const int n = std::min( buf_len ,
+ snprintf( buf , buf_len
+ , " Kokkos::ALL %c"
+ , int( sizeof...(Args) ? ',' : ')' ) ) );
+
+ error( buf+n , buf_len-n , domain_rank + 1 , range_rank + 1 , dim , args... );
+ }
+
+ // std::pair range
+ template< class T , size_t ... DimArgs , class ... Args >
+ void error( char * buf , int buf_len
+ , unsigned domain_rank
+ , unsigned range_rank
+ , const ViewDimension< DimArgs ... > & dim
+ , const std::pair<T,T> & val
+ , Args ... args ) const
+ {
+ // d <= e - b
+ const int n = std::min( buf_len ,
+ snprintf( buf , buf_len
+ , " %lu <= %lu - %lu %c"
+ , static_cast<unsigned long>( dim.extent( domain_rank ) )
+ , static_cast<unsigned long>( val.second )
+ , static_cast<unsigned long>( val.begin )
+ , int( sizeof...(Args) ? ',' : ')' ) ) );
+
+ error( buf+n , buf_len-n , domain_rank + 1 , range_rank + 1 , dim , args... );
+ }
+
+ // Kokkos::pair range
+ template< class T , size_t ... DimArgs , class ... Args >
+ void error( char * buf , int buf_len
+ , unsigned domain_rank
+ , unsigned range_rank
+ , const ViewDimension< DimArgs ... > & dim
+ , const Kokkos::pair<T,T> & val
+ , Args ... args ) const
+ {
+ // d <= e - b
+ const int n = std::min( buf_len ,
+ snprintf( buf , buf_len
+ , " %lu <= %lu - %lu %c"
+ , static_cast<unsigned long>( dim.extent( domain_rank ) )
+ , static_cast<unsigned long>( val.second )
+ , static_cast<unsigned long>( val.begin )
+ , int( sizeof...(Args) ? ',' : ')' ) ) );
+
+ error( buf+n , buf_len-n , domain_rank + 1 , range_rank + 1 , dim , args... );
+ }
+
+ // { begin , end } range
+ template< class T , size_t ... DimArgs , class ... Args >
+ void error( char * buf , int buf_len
+ , unsigned domain_rank
+ , unsigned range_rank
+ , const ViewDimension< DimArgs ... > & dim
+ , const std::initializer_list< T > & val
+ , Args ... args ) const
+ {
+ // d <= e - b
+ int n = 0 ;
+ if ( val.size() == 2 ) {
+ n = std::min( buf_len ,
+ snprintf( buf , buf_len
+ , " %lu <= %lu - %lu %c"
+ , static_cast<unsigned long>( dim.extent( domain_rank ) )
+ , static_cast<unsigned long>( val.begin()[0] )
+ , static_cast<unsigned long>( val.begin()[1] )
+ , int( sizeof...(Args) ? ',' : ')' ) ) );
+ }
+ else {
+ n = std::min( buf_len ,
+ snprintf( buf , buf_len
+ , " { ... }.size() == %u %c"
+ , unsigned(val.size())
+ , int( sizeof...(Args) ? ',' : ')' ) ) );
+ }
+
+ error( buf+n , buf_len-n , domain_rank + 1 , range_rank + 1 , dim , args... );
+ }
+
+ template< size_t ... DimArgs , class ... Args >
+ void error( const ViewDimension< DimArgs ... > & dim , Args ... args ) const
+ {
+#if defined( KOKKOS_ACTIVE_EXECUTION_SPACE_HOST )
+ enum { LEN = 1024 };
+ char buffer[ LEN ];
+
+ const int n = snprintf(buffer,LEN,"Kokkos::subview bounds error (");
+ error( buffer+n , LEN-n , 0 , 0 , dim , args... );
+
+ Kokkos::Impl::throw_runtime_exception(std::string(buffer));
+#else
+ Kokkos::abort("Kokkos::subview bounds error");
+#endif
+ }
+#else
+
+ template< size_t ... DimArgs , class ... Args >
+ KOKKOS_FORCEINLINE_FUNCTION
+ void error( const ViewDimension< DimArgs ... > & , Args ... ) const {}
+
+#endif
+
+public:
+
+ template< size_t ... DimArgs , class ... Args >
KOKKOS_INLINE_FUNCTION
- constexpr ViewDimension( size_t aN0 , unsigned aN1 , unsigned aN2 , unsigned aN3
- , unsigned aN4 , unsigned aN5 , unsigned aN6 , unsigned aN7 )
- : N0( aN0 ) , N1( aN1 ) , N2( aN2 ) , N3( aN3 ) , N4( aN4 ) , N5( aN5 ) , N6( aN6 ) , N7( aN7 ) {}
-};
+ SubviewExtents( const ViewDimension< DimArgs ... > & dim , Args ... args )
+ {
+ static_assert( DomainRank == sizeof...(DimArgs) , "" );
+ static_assert( DomainRank == sizeof...(Args) , "" );
+
+ // Verifies that all arguments, up to 8, are integral types,
+ // integral extents, or don't exist.
+ static_assert( RangeRank ==
+ unsigned( is_integral_extent<0,Args...>::value ) +
+ unsigned( is_integral_extent<1,Args...>::value ) +
+ unsigned( is_integral_extent<2,Args...>::value ) +
+ unsigned( is_integral_extent<3,Args...>::value ) +
+ unsigned( is_integral_extent<4,Args...>::value ) +
+ unsigned( is_integral_extent<5,Args...>::value ) +
+ unsigned( is_integral_extent<6,Args...>::value ) +
+ unsigned( is_integral_extent<7,Args...>::value ) , "" );
+
+ if ( RangeRank == 0 ) { m_length[0] = 0 ; m_index[0] = ~0u ; }
+
+ if ( ! set( 0 , 0 , dim , args... ) ) error( dim , args... );
+ }
-//----------------------------------------------------------------------------
+ template < typename iType >
+ KOKKOS_FORCEINLINE_FUNCTION
+ constexpr size_t domain_offset( const iType i ) const
+ { return unsigned(i) < DomainRank ? m_begin[i] : 0 ; }
-template< class DstDim , class SrcDim >
-struct ViewDimensionAssignable ;
+ template < typename iType >
+ KOKKOS_FORCEINLINE_FUNCTION
+ constexpr size_t range_extent( const iType i ) const
+ { return unsigned(i) < InternalRangeRank ? m_length[i] : 0 ; }
-template< long dN0 , long dN1 , long dN2 , long dN3 , long dN4 , long dN5 , long dN6 , long dN7
- , long sN0 , long sN1 , long sN2 , long sN3 , long sN4 , long sN5 , long sN6 , long sN7 >
-struct ViewDimensionAssignable< ViewDimension<dN0,dN1,dN2,dN3,dN4,dN5,dN6,dN7>
- , ViewDimension<sN0,sN1,sN2,sN3,sN4,sN5,sN6,sN7> >
-{
- typedef ViewDimension<dN0,dN1,dN2,dN3,dN4,dN5,dN6,dN7> dst ;
- typedef ViewDimension<sN0,sN1,sN2,sN3,sN4,sN5,sN6,sN7> src ;
-
- enum { value = dst::rank == src::rank &&
- dst::rank_dynamic >= src::rank_dynamic &&
- ( 0 < dst::rank_dynamic || dN0 == sN0 ) &&
- ( 1 < dst::rank_dynamic || dN1 == sN1 ) &&
- ( 2 < dst::rank_dynamic || dN2 == sN2 ) &&
- ( 3 < dst::rank_dynamic || dN3 == sN3 ) &&
- ( 4 < dst::rank_dynamic || dN4 == sN4 ) &&
- ( 5 < dst::rank_dynamic || dN5 == sN5 ) &&
- ( 6 < dst::rank_dynamic || dN6 == sN6 ) &&
- ( 7 < dst::rank_dynamic || dN7 == sN7 ) };
+ template < typename iType >
+ KOKKOS_FORCEINLINE_FUNCTION
+ constexpr unsigned range_index( const iType i ) const
+ { return unsigned(i) < InternalRangeRank ? m_index[i] : ~0u ; }
};
}}} // namespace Kokkos::Experimental::Impl
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Experimental {
namespace Impl {
/** \brief Given a value type and dimension generate the View data type */
-template< class T , class Dim /* ViewDimension */ >
-struct ViewDataType {
- enum { R = Dim::rank };
- enum { RD = Dim::rank_dynamic };
-
- // Unused static dimensions are set to 1 (instead of 0 or -1L) to avoid compile errors
- // in the 'false' clauses of the std::conditional.
-
- enum { N0 = 0 < Dim::arg_N0 ? Dim::arg_N0 : 1 };
- enum { N1 = 0 < Dim::arg_N1 ? Dim::arg_N1 : 1 };
- enum { N2 = 0 < Dim::arg_N2 ? Dim::arg_N2 : 1 };
- enum { N3 = 0 < Dim::arg_N3 ? Dim::arg_N3 : 1 };
- enum { N4 = 0 < Dim::arg_N4 ? Dim::arg_N4 : 1 };
- enum { N5 = 0 < Dim::arg_N5 ? Dim::arg_N5 : 1 };
- enum { N6 = 0 < Dim::arg_N6 ? Dim::arg_N6 : 1 };
- enum { N7 = 0 < Dim::arg_N7 ? Dim::arg_N7 : 1 };
-
- typedef typename std::conditional< R == 0 , T ,
- typename std::conditional< R == 1 ,
- typename std::conditional< RD == 0 , T[N0] , T * >::type ,
-
- typename std::conditional< R == 2 ,
- typename std::conditional< RD == 0 , T[N0][N1] ,
- typename std::conditional< RD == 1 , T* [N1] ,
- T**
- >::type >::type ,
-
- typename std::conditional< R == 3 ,
- typename std::conditional< RD == 0 , T[N0][N1][N2] ,
- typename std::conditional< RD == 1 , T* [N1][N2] ,
- typename std::conditional< RD == 2 , T** [N2] ,
- T***
- >::type >::type >::type ,
-
- typename std::conditional< R == 4 ,
- typename std::conditional< RD == 0 , T[N0][N1][N2][N3] ,
- typename std::conditional< RD == 1 , T* [N1][N2][N3] ,
- typename std::conditional< RD == 2 , T** [N2][N3] ,
- typename std::conditional< RD == 3 , T*** [N3] ,
- T****
- >::type >::type >::type >::type ,
-
- typename std::conditional< R == 5 ,
- typename std::conditional< RD == 0 , T[N0][N1][N2][N3][N4] ,
- typename std::conditional< RD == 1 , T* [N1][N2][N3][N4] ,
- typename std::conditional< RD == 2 , T** [N2][N3][N4] ,
- typename std::conditional< RD == 3 , T*** [N3][N4] ,
- typename std::conditional< RD == 4 , T**** [N4] ,
- T*****
- >::type >::type >::type >::type >::type ,
-
- typename std::conditional< R == 6 ,
- typename std::conditional< RD == 0 , T[N0][N1][N2][N3][N4][N5] ,
- typename std::conditional< RD == 1 , T* [N1][N2][N3][N4][N5] ,
- typename std::conditional< RD == 2 , T** [N2][N3][N4][N5] ,
- typename std::conditional< RD == 3 , T*** [N3][N4][N5] ,
- typename std::conditional< RD == 4 , T**** [N4][N5] ,
- typename std::conditional< RD == 5 , T***** [N5] ,
- T******
- >::type >::type >::type >::type >::type >::type ,
-
- typename std::conditional< R == 7 ,
- typename std::conditional< RD == 0 , T[N0][N1][N2][N3][N4][N5][N6] ,
- typename std::conditional< RD == 1 , T* [N1][N2][N3][N4][N5][N6] ,
- typename std::conditional< RD == 2 , T** [N2][N3][N4][N5][N6] ,
- typename std::conditional< RD == 3 , T*** [N3][N4][N5][N6] ,
- typename std::conditional< RD == 4 , T**** [N4][N5][N6] ,
- typename std::conditional< RD == 5 , T***** [N5][N6] ,
- typename std::conditional< RD == 6 , T****** [N6] ,
- T*******
- >::type >::type >::type >::type >::type >::type >::type ,
-
- typename std::conditional< R == 8 ,
- typename std::conditional< RD == 0 , T[N0][N1][N2][N3][N4][N5][N6][N7] ,
- typename std::conditional< RD == 1 , T* [N1][N2][N3][N4][N5][N6][N7] ,
- typename std::conditional< RD == 2 , T** [N2][N3][N4][N5][N6][N7] ,
- typename std::conditional< RD == 3 , T*** [N3][N4][N5][N6][N7] ,
- typename std::conditional< RD == 4 , T**** [N4][N5][N6][N7] ,
- typename std::conditional< RD == 5 , T***** [N5][N6][N7] ,
- typename std::conditional< RD == 6 , T****** [N6][N7] ,
- typename std::conditional< RD == 7 , T******* [N7] ,
- T********
- >::type >::type >::type >::type >::type >::type >::type >::type ,
-
- void >::type >::type >::type >::type >::type >::type >::type >::type >::type
- type ;
+template< class T , class Dim >
+struct ViewDataType ;
+
+template< class T >
+struct ViewDataType< T , ViewDimension<> >
+{
+ typedef T type ;
+};
+
+template< class T , size_t ... Args >
+struct ViewDataType< T , ViewDimension< 0 , Args... > >
+{
+ typedef typename ViewDataType<T*,ViewDimension<Args...> >::type type ;
+};
+
+template< class T , size_t N , size_t ... Args >
+struct ViewDataType< T , ViewDimension< N , Args... > >
+{
+ typedef typename ViewDataType<T,ViewDimension<Args...> >::type type[N] ;
};
/**\brief Analysis of View data type.
*
* Data type conforms to one of the following patterns :
* {const} value_type [][#][#][#]
* {const} value_type ***[#][#][#]
* Where the sum of counts of '*' and '[#]' is at most ten.
*
* Provide typedef for the ViewDimension<...> and value_type.
*/
template< class T >
-struct ViewArrayAnalysis
+struct ViewArrayAnalysis
{
-private:
- // std::rank<T>, std::extent<T,i>, and std::remove_all_extents<T>
- // consider "const value_type***" to be the type.
-
- // Strip away pointers and count them
- typedef typename std::remove_all_extents< T >::type t_0 ; // brackets removed
- typedef typename std::remove_pointer< t_0 >::type t_1 ;
- typedef typename std::remove_pointer< t_1 >::type t_2 ;
- typedef typename std::remove_pointer< t_2 >::type t_3 ;
- typedef typename std::remove_pointer< t_3 >::type t_4 ;
- typedef typename std::remove_pointer< t_4 >::type t_5 ;
- typedef typename std::remove_pointer< t_5 >::type t_6 ;
- typedef typename std::remove_pointer< t_6 >::type t_7 ;
- typedef typename std::remove_pointer< t_7 >::type t_8 ;
- typedef typename std::remove_pointer< t_8 >::type t_9 ;
- typedef typename std::remove_pointer< t_9 >::type t_10 ;
-
- enum { rank_pointer =
- ( ! std::is_pointer< t_0 >::value ? 0 :
- ( ! std::is_pointer< t_1 >::value ? 1 :
- ( ! std::is_pointer< t_2 >::value ? 2 :
- ( ! std::is_pointer< t_3 >::value ? 3 :
- ( ! std::is_pointer< t_4 >::value ? 4 :
- ( ! std::is_pointer< t_5 >::value ? 5 :
- ( ! std::is_pointer< t_6 >::value ? 6 :
- ( ! std::is_pointer< t_7 >::value ? 7 :
- ( ! std::is_pointer< t_8 >::value ? 8 :
- ( ! std::is_pointer< t_9 >::value ? 9 :
- ( ! std::is_pointer< t_10 >::value ? 10 : 0x7fffffff ))))))))))) };
-
- // The pointer-stripped type t_10 may have been an array typedef of the form 'type[#][#]...'
- // Append those dimensions.
-
- enum { rank_bracket = std::rank< T >::value };
- enum { rank_bracket_nested = std::rank< t_10 >::value };
- enum { rank_base = rank_pointer + rank_bracket };
- enum { rank = rank_pointer + rank_bracket + rank_bracket_nested };
-
- static_assert( rank <= 10 , "Maximum ten dimensional array" );
-
- enum { extent_0 = 0 < rank_base ? std::extent< T , rank_pointer <= 0 ? 0 - rank_pointer : 10 >::value
- : std::extent< t_10 , rank_base <= 0 ? 0 - rank_base : 10 >::value };
-
- enum { extent_1 = 1 < rank_base ? std::extent< T , rank_pointer <= 1 ? 1 - rank_pointer : 10 >::value
- : std::extent< t_10 , rank_base <= 1 ? 1 - rank_base : 10 >::value };
+ typedef T value_type ;
+ typedef typename std::add_const< T >::type const_value_type ;
+ typedef typename std::remove_const< T >::type non_const_value_type ;
+ typedef ViewDimension<> static_dimension ;
+ typedef ViewDimension<> dynamic_dimension ;
+ typedef ViewDimension<> dimension ;
+};
- enum { extent_2 = 2 < rank_base ? std::extent< T , rank_pointer <= 2 ? 2 - rank_pointer : 10 >::value
- : std::extent< t_10 , rank_base <= 2 ? 2 - rank_base : 10 >::value };
+template< class T , size_t N >
+struct ViewArrayAnalysis< T[N] >
+{
+private:
+ typedef ViewArrayAnalysis< T > nested ;
+public:
+ typedef typename nested::value_type value_type ;
+ typedef typename nested::const_value_type const_value_type ;
+ typedef typename nested::non_const_value_type non_const_value_type ;
- enum { extent_3 = 3 < rank_base ? std::extent< T , rank_pointer <= 3 ? 3 - rank_pointer : 10 >::value
- : std::extent< t_10 , rank_base <= 3 ? 3 - rank_base : 10 >::value };
+ typedef typename nested::static_dimension::template prepend<N>::type
+ static_dimension ;
- enum { extent_4 = 4 < rank_base ? std::extent< T , rank_pointer <= 4 ? 4 - rank_pointer : 10 >::value
- : std::extent< t_10 , rank_base <= 4 ? 4 - rank_base : 10 >::value };
+ typedef typename nested::dynamic_dimension dynamic_dimension ;
- enum { extent_5 = 5 < rank_base ? std::extent< T , rank_pointer <= 5 ? 5 - rank_pointer : 10 >::value
- : std::extent< t_10 , rank_base <= 5 ? 5 - rank_base : 10 >::value };
+ typedef typename
+ ViewDimensionJoin< dynamic_dimension , static_dimension >::type
+ dimension ;
+};
- enum { extent_6 = 6 < rank_base ? std::extent< T , rank_pointer <= 6 ? 6 - rank_pointer : 10 >::value
- : std::extent< t_10 , rank_base <= 6 ? 6 - rank_base : 10 >::value };
+template< class T >
+struct ViewArrayAnalysis< T[] >
+{
+private:
+ typedef ViewArrayAnalysis< T > nested ;
+ typedef typename nested::dimension nested_dimension ;
+public:
+ typedef typename nested::value_type value_type ;
+ typedef typename nested::const_value_type const_value_type ;
+ typedef typename nested::non_const_value_type non_const_value_type ;
- enum { extent_7 = 7 < rank_base ? std::extent< T , rank_pointer <= 7 ? 7 - rank_pointer : 10 >::value
- : std::extent< t_10 , rank_base <= 7 ? 7 - rank_base : 10 >::value };
+ typedef typename nested::dynamic_dimension::template prepend<0>::type
+ dynamic_dimension ;
- enum { extent_8 = 8 < rank_base ? std::extent< T , rank_pointer <= 8 ? 8 - rank_pointer : 10 >::value
- : std::extent< t_10 , rank_base <= 8 ? 8 - rank_base : 10 >::value };
+ typedef typename nested::static_dimension static_dimension ;
- enum { extent_9 = 9 < rank_base ? std::extent< T , rank_pointer <= 9 ? 9 - rank_pointer : 10 >::value
- : std::extent< t_10 , rank_base <= 9 ? 9 - rank_base : 10 >::value };
+ typedef typename
+ ViewDimensionJoin< dynamic_dimension , static_dimension >::type
+ dimension ;
+};
- typedef typename std::remove_all_extents< t_10 >::type base_type ;
+template< class T >
+struct ViewArrayAnalysis< T* >
+{
+private:
+ typedef ViewArrayAnalysis< T > nested ;
+public:
+ typedef typename nested::value_type value_type ;
+ typedef typename nested::const_value_type const_value_type ;
+ typedef typename nested::non_const_value_type non_const_value_type ;
- enum { rank_dynamic = rank_pointer ? rank_pointer : ( ( rank_bracket && extent_0 == 0 ) ? 1 : 0 ) };
+ typedef typename nested::dynamic_dimension::template prepend<0>::type
+ dynamic_dimension ;
-public:
+ typedef typename nested::static_dimension static_dimension ;
- typedef ViewDimension< ( rank <= 0 ? -1L : extent_0 )
- , ( rank <= 1 ? -1L : extent_1 )
- , ( rank <= 2 ? -1L : extent_2 )
- , ( rank <= 3 ? -1L : extent_3 )
- , ( rank <= 4 ? -1L : extent_4 )
- , ( rank <= 5 ? -1L : extent_5 )
- , ( rank <= 6 ? -1L : extent_6 )
- , ( rank <= 7 ? -1L : extent_7 )
- > dimension ;
-
- typedef base_type value_type ;
- typedef typename std::add_const< base_type >::type const_value_type ;
- typedef typename std::remove_const< base_type >::type non_const_value_type ;
-
- static_assert( unsigned(dimension::rank) == unsigned(rank) , "" );
- static_assert( unsigned(dimension::rank_dynamic) == unsigned(rank_dynamic) , "" );
+ typedef typename
+ ViewDimensionJoin< dynamic_dimension , static_dimension >::type
+ dimension ;
};
-template< class DataType , class ValueType , class ArrayLayout >
+
+template< class DataType , class ArrayLayout , class ValueType >
struct ViewDataAnalysis
{
private:
typedef ViewArrayAnalysis< DataType > array_analysis ;
// ValueType is opportunity for partial specialization.
// Must match array analysis when this default template is used.
static_assert( std::is_same< ValueType , typename array_analysis::non_const_value_type >::value , "" );
public:
typedef void specialize ; // No specialization
typedef typename array_analysis::dimension dimension ;
typedef typename array_analysis::value_type value_type ;
typedef typename array_analysis::const_value_type const_value_type ;
typedef typename array_analysis::non_const_value_type non_const_value_type ;
// Generate analogous multidimensional array specification type.
typedef typename ViewDataType< value_type , dimension >::type type ;
typedef typename ViewDataType< const_value_type , dimension >::type const_type ;
typedef typename ViewDataType< non_const_value_type , dimension >::type non_const_type ;
// Generate "flattened" multidimensional array specification type.
typedef type array_scalar_type ;
typedef const_type const_array_scalar_type ;
typedef non_const_type non_const_array_scalar_type ;
};
}}} // namespace Kokkos::Experimental::Impl
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Experimental {
namespace Impl {
template < class Dimension , class Layout , typename Enable = void >
struct ViewOffset {
using is_mapping_plugin = std::false_type ;
};
//----------------------------------------------------------------------------
// LayoutLeft AND ( 1 >= rank OR 0 == rank_dynamic ) : no padding / striding
template < class Dimension >
struct ViewOffset< Dimension , Kokkos::LayoutLeft
, typename std::enable_if<( 1 >= Dimension::rank
||
0 == Dimension::rank_dynamic
)>::type >
{
using is_mapping_plugin = std::true_type ;
using is_regular = std::true_type ;
typedef size_t size_type ;
typedef Dimension dimension_type ;
typedef Kokkos::LayoutLeft array_layout ;
dimension_type m_dim ;
//----------------------------------------
// rank 1
template< typename I0 >
KOKKOS_INLINE_FUNCTION constexpr
size_type operator()( I0 const & i0 ) const { return i0 ; }
// rank 2
template < typename I0 , typename I1 >
KOKKOS_INLINE_FUNCTION constexpr
size_type operator()( I0 const & i0 , I1 const & i1 ) const
{ return i0 + m_dim.N0 * i1 ; }
//rank 3
template < typename I0, typename I1, typename I2 >
KOKKOS_INLINE_FUNCTION constexpr
size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2 ) const
{
return i0 + m_dim.N0 * ( i1 + m_dim.N1 * i2 );
}
//rank 4
template < typename I0, typename I1, typename I2, typename I3 >
KOKKOS_INLINE_FUNCTION constexpr
size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3 ) const
{
return i0 + m_dim.N0 * (
i1 + m_dim.N1 * (
i2 + m_dim.N2 * i3 ));
}
//rank 5
template < typename I0, typename I1, typename I2, typename I3
, typename I4 >
KOKKOS_INLINE_FUNCTION constexpr
size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3
, I4 const & i4 ) const
{
return i0 + m_dim.N0 * (
i1 + m_dim.N1 * (
i2 + m_dim.N2 * (
i3 + m_dim.N3 * i4 )));
}
//rank 6
template < typename I0, typename I1, typename I2, typename I3
, typename I4, typename I5 >
KOKKOS_INLINE_FUNCTION constexpr
size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3
, I4 const & i4, I5 const & i5 ) const
{
return i0 + m_dim.N0 * (
i1 + m_dim.N1 * (
i2 + m_dim.N2 * (
i3 + m_dim.N3 * (
i4 + m_dim.N4 * i5 ))));
}
//rank 7
template < typename I0, typename I1, typename I2, typename I3
, typename I4, typename I5, typename I6 >
KOKKOS_INLINE_FUNCTION constexpr
size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3
, I4 const & i4, I5 const & i5, I6 const & i6 ) const
{
return i0 + m_dim.N0 * (
i1 + m_dim.N1 * (
i2 + m_dim.N2 * (
i3 + m_dim.N3 * (
i4 + m_dim.N4 * (
i5 + m_dim.N5 * i6 )))));
}
//rank 8
template < typename I0, typename I1, typename I2, typename I3
, typename I4, typename I5, typename I6, typename I7 >
KOKKOS_INLINE_FUNCTION constexpr
size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3
, I4 const & i4, I5 const & i5, I6 const & i6, I7 const & i7 ) const
{
return i0 + m_dim.N0 * (
i1 + m_dim.N1 * (
i2 + m_dim.N2 * (
i3 + m_dim.N3 * (
i4 + m_dim.N4 * (
i5 + m_dim.N5 * (
i6 + m_dim.N6 * i7 ))))));
}
//----------------------------------------
KOKKOS_INLINE_FUNCTION constexpr size_type dimension_0() const { return m_dim.N0 ; }
KOKKOS_INLINE_FUNCTION constexpr size_type dimension_1() const { return m_dim.N1 ; }
KOKKOS_INLINE_FUNCTION constexpr size_type dimension_2() const { return m_dim.N2 ; }
KOKKOS_INLINE_FUNCTION constexpr size_type dimension_3() const { return m_dim.N3 ; }
KOKKOS_INLINE_FUNCTION constexpr size_type dimension_4() const { return m_dim.N4 ; }
KOKKOS_INLINE_FUNCTION constexpr size_type dimension_5() const { return m_dim.N5 ; }
KOKKOS_INLINE_FUNCTION constexpr size_type dimension_6() const { return m_dim.N6 ; }
KOKKOS_INLINE_FUNCTION constexpr size_type dimension_7() const { return m_dim.N7 ; }
/* Cardinality of the domain index space */
KOKKOS_INLINE_FUNCTION
constexpr size_type size() const
{ return m_dim.N0 * m_dim.N1 * m_dim.N2 * m_dim.N3 * m_dim.N4 * m_dim.N5 * m_dim.N6 * m_dim.N7 ; }
/* Span of the range space */
KOKKOS_INLINE_FUNCTION
constexpr size_type span() const
{ return m_dim.N0 * m_dim.N1 * m_dim.N2 * m_dim.N3 * m_dim.N4 * m_dim.N5 * m_dim.N6 * m_dim.N7 ; }
KOKKOS_INLINE_FUNCTION constexpr bool span_is_contiguous() const { return true ; }
/* Strides of dimensions */
KOKKOS_INLINE_FUNCTION constexpr size_type stride_0() const { return 1 ; }
KOKKOS_INLINE_FUNCTION constexpr size_type stride_1() const { return m_dim.N0 ; }
KOKKOS_INLINE_FUNCTION constexpr size_type stride_2() const { return m_dim.N0 * m_dim.N1 ; }
KOKKOS_INLINE_FUNCTION constexpr size_type stride_3() const { return m_dim.N0 * m_dim.N1 * m_dim.N2 ; }
KOKKOS_INLINE_FUNCTION constexpr size_type stride_4() const { return m_dim.N0 * m_dim.N1 * m_dim.N2 * m_dim.N3 ; }
KOKKOS_INLINE_FUNCTION constexpr size_type stride_5() const { return m_dim.N0 * m_dim.N1 * m_dim.N2 * m_dim.N3 * m_dim.N4 ; }
KOKKOS_INLINE_FUNCTION constexpr size_type stride_6() const { return m_dim.N0 * m_dim.N1 * m_dim.N2 * m_dim.N3 * m_dim.N4 * m_dim.N5 ; }
KOKKOS_INLINE_FUNCTION constexpr size_type stride_7() const { return m_dim.N0 * m_dim.N1 * m_dim.N2 * m_dim.N3 * m_dim.N4 * m_dim.N5 * m_dim.N6 ; }
// Stride with [ rank ] value is the total length
template< typename iType >
KOKKOS_INLINE_FUNCTION
void stride( iType * const s ) const
{
s[0] = 1 ;
if ( 0 < dimension_type::rank ) { s[1] = m_dim.N0 ; }
if ( 1 < dimension_type::rank ) { s[2] = s[1] * m_dim.N1 ; }
if ( 2 < dimension_type::rank ) { s[3] = s[2] * m_dim.N2 ; }
if ( 3 < dimension_type::rank ) { s[4] = s[3] * m_dim.N3 ; }
if ( 4 < dimension_type::rank ) { s[5] = s[4] * m_dim.N4 ; }
if ( 5 < dimension_type::rank ) { s[6] = s[5] * m_dim.N5 ; }
if ( 6 < dimension_type::rank ) { s[7] = s[6] * m_dim.N6 ; }
if ( 7 < dimension_type::rank ) { s[8] = s[7] * m_dim.N7 ; }
}
//----------------------------------------
ViewOffset() = default ;
ViewOffset( const ViewOffset & ) = default ;
ViewOffset & operator = ( const ViewOffset & ) = default ;
template< unsigned TrivialScalarSize >
KOKKOS_INLINE_FUNCTION
constexpr ViewOffset( std::integral_constant<unsigned,TrivialScalarSize> const &
, size_t aN0 , unsigned aN1 , unsigned aN2 , unsigned aN3
, unsigned aN4 , unsigned aN5 , unsigned aN6 , unsigned aN7 )
: m_dim( aN0, aN1, aN2, aN3, aN4, aN5, aN6, aN7 )
{}
template< class DimRHS >
KOKKOS_INLINE_FUNCTION
constexpr ViewOffset( const ViewOffset< DimRHS , Kokkos::LayoutLeft , void > & rhs )
: m_dim( rhs.m_dim.N0 , rhs.m_dim.N1 , rhs.m_dim.N2 , rhs.m_dim.N3
, rhs.m_dim.N4 , rhs.m_dim.N5 , rhs.m_dim.N6 , rhs.m_dim.N7 )
{
static_assert( int(DimRHS::rank) == int(dimension_type::rank) , "ViewOffset assignment requires equal rank" );
// Also requires equal static dimensions ...
}
template< class DimRHS >
KOKKOS_INLINE_FUNCTION
constexpr ViewOffset( const ViewOffset< DimRHS , Kokkos::LayoutRight , void > & rhs )
: m_dim( rhs.m_dim.N0, 0, 0, 0, 0, 0, 0, 0 )
{
static_assert( DimRHS::rank == 1 && dimension_type::rank == 1 && dimension_type::rank_dynamic == 1
, "ViewOffset LayoutLeft and LayoutRight are only compatible when rank == 1" );
}
template< class DimRHS >
KOKKOS_INLINE_FUNCTION
ViewOffset( const ViewOffset< DimRHS , Kokkos::LayoutStride , void > & rhs )
: m_dim( rhs.m_dim.N0, 0, 0, 0, 0, 0, 0, 0 )
{
static_assert( DimRHS::rank == 1 && dimension_type::rank == 1 && dimension_type::rank_dynamic == 1
, "ViewOffset LayoutLeft and LayoutStride are only compatible when rank == 1" );
if ( rhs.m_stride.S0 != 1 ) {
Kokkos::abort("Kokkos::Experimental::ViewOffset assignment of LayoutLeft from LayoutStride requires stride == 1" );
}
}
//----------------------------------------
// Subview construction
template< class DimRHS >
KOKKOS_INLINE_FUNCTION
- constexpr ViewOffset( const ViewOffset< DimRHS , Kokkos::LayoutLeft , void > & rhs
- , const size_t n0
- , const size_t
- , const size_t
- , const size_t
- , const size_t
- , const size_t
- , const size_t
- , const size_t
- )
- : m_dim( n0, 0, 0, 0, 0, 0, 0, 0 )
+ constexpr ViewOffset(
+ const ViewOffset< DimRHS , Kokkos::LayoutLeft , void > & rhs ,
+ const SubviewExtents< DimRHS::rank , dimension_type::rank > & sub )
+ : m_dim( sub.range_extent(0), 0, 0, 0, 0, 0, 0, 0 )
{
static_assert( ( 0 == dimension_type::rank ) ||
( 1 == dimension_type::rank && 1 == dimension_type::rank_dynamic && 1 <= DimRHS::rank )
, "ViewOffset subview construction requires compatible rank" );
}
};
//----------------------------------------------------------------------------
// LayoutLeft AND ( 1 < rank AND 0 < rank_dynamic ) : has padding / striding
template < class Dimension >
struct ViewOffset< Dimension , Kokkos::LayoutLeft
, typename std::enable_if<( 1 < Dimension::rank
&&
0 < Dimension::rank_dynamic
)>::type >
{
using is_mapping_plugin = std::true_type ;
using is_regular = std::true_type ;
typedef size_t size_type ;
typedef Dimension dimension_type ;
typedef Kokkos::LayoutLeft array_layout ;
dimension_type m_dim ;
size_type m_stride ;
//----------------------------------------
// rank 1
template< typename I0 >
KOKKOS_INLINE_FUNCTION constexpr
size_type operator()( I0 const & i0 ) const { return i0 ; }
// rank 2
template < typename I0 , typename I1 >
KOKKOS_INLINE_FUNCTION constexpr
size_type operator()( I0 const & i0 , I1 const & i1 ) const
{ return i0 + m_stride * i1 ; }
//rank 3
template < typename I0, typename I1, typename I2 >
KOKKOS_INLINE_FUNCTION constexpr
size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2 ) const
{
return i0 + m_stride * ( i1 + m_dim.N1 * i2 );
}
//rank 4
template < typename I0, typename I1, typename I2, typename I3 >
KOKKOS_INLINE_FUNCTION constexpr
size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3 ) const
{
return i0 + m_stride * (
i1 + m_dim.N1 * (
i2 + m_dim.N2 * i3 ));
}
//rank 5
template < typename I0, typename I1, typename I2, typename I3
, typename I4 >
KOKKOS_INLINE_FUNCTION constexpr
size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3
, I4 const & i4 ) const
{
return i0 + m_stride * (
i1 + m_dim.N1 * (
i2 + m_dim.N2 * (
i3 + m_dim.N3 * i4 )));
}
//rank 6
template < typename I0, typename I1, typename I2, typename I3
, typename I4, typename I5 >
KOKKOS_INLINE_FUNCTION constexpr
size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3
, I4 const & i4, I5 const & i5 ) const
{
return i0 + m_stride * (
i1 + m_dim.N1 * (
i2 + m_dim.N2 * (
i3 + m_dim.N3 * (
i4 + m_dim.N4 * i5 ))));
}
//rank 7
template < typename I0, typename I1, typename I2, typename I3
, typename I4, typename I5, typename I6 >
KOKKOS_INLINE_FUNCTION constexpr
size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3
, I4 const & i4, I5 const & i5, I6 const & i6 ) const
{
return i0 + m_stride * (
i1 + m_dim.N1 * (
i2 + m_dim.N2 * (
i3 + m_dim.N3 * (
i4 + m_dim.N4 * (
i5 + m_dim.N5 * i6 )))));
}
//rank 8
template < typename I0, typename I1, typename I2, typename I3
, typename I4, typename I5, typename I6, typename I7 >
KOKKOS_INLINE_FUNCTION constexpr
size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3
, I4 const & i4, I5 const & i5, I6 const & i6, I7 const & i7 ) const
{
return i0 + m_stride * (
i1 + m_dim.N1 * (
i2 + m_dim.N2 * (
i3 + m_dim.N3 * (
i4 + m_dim.N4 * (
i5 + m_dim.N5 * (
i6 + m_dim.N6 * i7 ))))));
}
//----------------------------------------
KOKKOS_INLINE_FUNCTION constexpr size_type dimension_0() const { return m_dim.N0 ; }
KOKKOS_INLINE_FUNCTION constexpr size_type dimension_1() const { return m_dim.N1 ; }
KOKKOS_INLINE_FUNCTION constexpr size_type dimension_2() const { return m_dim.N2 ; }
KOKKOS_INLINE_FUNCTION constexpr size_type dimension_3() const { return m_dim.N3 ; }
KOKKOS_INLINE_FUNCTION constexpr size_type dimension_4() const { return m_dim.N4 ; }
KOKKOS_INLINE_FUNCTION constexpr size_type dimension_5() const { return m_dim.N5 ; }
KOKKOS_INLINE_FUNCTION constexpr size_type dimension_6() const { return m_dim.N6 ; }
KOKKOS_INLINE_FUNCTION constexpr size_type dimension_7() const { return m_dim.N7 ; }
/* Cardinality of the domain index space */
KOKKOS_INLINE_FUNCTION
constexpr size_type size() const
{ return m_dim.N0 * m_dim.N1 * m_dim.N2 * m_dim.N3 * m_dim.N4 * m_dim.N5 * m_dim.N6 * m_dim.N7 ; }
/* Span of the range space */
KOKKOS_INLINE_FUNCTION
constexpr size_type span() const
{ return m_stride * m_dim.N1 * m_dim.N2 * m_dim.N3 * m_dim.N4 * m_dim.N5 * m_dim.N6 * m_dim.N7 ; }
KOKKOS_INLINE_FUNCTION constexpr bool span_is_contiguous() const { return m_stride == m_dim.N0 ; }
/* Strides of dimensions */
KOKKOS_INLINE_FUNCTION constexpr size_type stride_0() const { return 1 ; }
KOKKOS_INLINE_FUNCTION constexpr size_type stride_1() const { return m_stride ; }
KOKKOS_INLINE_FUNCTION constexpr size_type stride_2() const { return m_stride * m_dim.N1 ; }
KOKKOS_INLINE_FUNCTION constexpr size_type stride_3() const { return m_stride * m_dim.N1 * m_dim.N2 ; }
KOKKOS_INLINE_FUNCTION constexpr size_type stride_4() const { return m_stride * m_dim.N1 * m_dim.N2 * m_dim.N3 ; }
KOKKOS_INLINE_FUNCTION constexpr size_type stride_5() const { return m_stride * m_dim.N1 * m_dim.N2 * m_dim.N3 * m_dim.N4 ; }
KOKKOS_INLINE_FUNCTION constexpr size_type stride_6() const { return m_stride * m_dim.N1 * m_dim.N2 * m_dim.N3 * m_dim.N4 * m_dim.N5 ; }
KOKKOS_INLINE_FUNCTION constexpr size_type stride_7() const { return m_stride * m_dim.N1 * m_dim.N2 * m_dim.N3 * m_dim.N4 * m_dim.N5 * m_dim.N6 ; }
// Stride with [ rank ] value is the total length
template< typename iType >
KOKKOS_INLINE_FUNCTION
void stride( iType * const s ) const
{
s[0] = 1 ;
if ( 0 < dimension_type::rank ) { s[1] = m_stride ; }
if ( 1 < dimension_type::rank ) { s[2] = s[1] * m_dim.N1 ; }
if ( 2 < dimension_type::rank ) { s[3] = s[2] * m_dim.N2 ; }
if ( 3 < dimension_type::rank ) { s[4] = s[3] * m_dim.N3 ; }
if ( 4 < dimension_type::rank ) { s[5] = s[4] * m_dim.N4 ; }
if ( 5 < dimension_type::rank ) { s[6] = s[5] * m_dim.N5 ; }
if ( 6 < dimension_type::rank ) { s[7] = s[6] * m_dim.N6 ; }
if ( 7 < dimension_type::rank ) { s[8] = s[7] * m_dim.N7 ; }
}
//----------------------------------------
private:
template< unsigned TrivialScalarSize >
struct Padding {
enum { div = TrivialScalarSize == 0 ? 0 : Kokkos::Impl::MEMORY_ALIGNMENT / ( TrivialScalarSize ? TrivialScalarSize : 1 ) };
enum { mod = TrivialScalarSize == 0 ? 0 : Kokkos::Impl::MEMORY_ALIGNMENT % ( TrivialScalarSize ? TrivialScalarSize : 1 ) };
// If memory alignment is a multiple of the trivial scalar size then attempt to align.
enum { align = 0 != TrivialScalarSize && 0 == mod ? div : 0 };
enum { div_ok = div ? div : 1 }; // To valid modulo zero in constexpr
KOKKOS_INLINE_FUNCTION
static constexpr size_t stride( size_t const N )
{
return ( align && ( Kokkos::Impl::MEMORY_ALIGNMENT_THRESHOLD * align < N ) && ( N % div_ok ) )
? N + align - ( N % div_ok ) : N ;
}
};
public:
ViewOffset() = default ;
ViewOffset( const ViewOffset & ) = default ;
ViewOffset & operator = ( const ViewOffset & ) = default ;
/* Enable padding for trivial scalar types with non-zero trivial scalar size */
template< unsigned TrivialScalarSize >
KOKKOS_INLINE_FUNCTION
constexpr ViewOffset( std::integral_constant<unsigned,TrivialScalarSize> const & padding_type_size
, size_t aN0 , unsigned aN1 , unsigned aN2 , unsigned aN3
, unsigned aN4 , unsigned aN5 , unsigned aN6 , unsigned aN7 )
: m_dim( aN0, aN1, aN2, aN3, aN4, aN5, aN6, aN7 )
, m_stride( Padding<TrivialScalarSize>::stride( aN0 ) )
{}
template< class DimRHS >
KOKKOS_INLINE_FUNCTION
constexpr ViewOffset( const ViewOffset< DimRHS , Kokkos::LayoutLeft , void > & rhs )
: m_dim( rhs.m_dim.N0 , rhs.m_dim.N1 , rhs.m_dim.N2 , rhs.m_dim.N3
, rhs.m_dim.N4 , rhs.m_dim.N5 , rhs.m_dim.N6 , rhs.m_dim.N7 )
, m_stride( rhs.stride_1() )
{
static_assert( int(DimRHS::rank) == int(dimension_type::rank) , "ViewOffset assignment requires equal rank" );
// Also requires equal static dimensions ...
}
//----------------------------------------
// Subview construction
+ // This subview must be 2 == rank and 2 == rank_dynamic
+ // due to only having stride #0.
+ // The source dimension #0 must be non-zero for stride-one leading dimension.
+ // At most subsequent dimension can be non-zero.
template< class DimRHS >
KOKKOS_INLINE_FUNCTION
- constexpr ViewOffset( const ViewOffset< DimRHS , Kokkos::LayoutLeft , void > & rhs
- , const size_t aN0
- , const size_t aN1
- , const size_t aN2
- , const size_t aN3
- , const size_t aN4
- , const size_t aN5
- , const size_t aN6
- , const size_t aN7
- )
- : m_dim( aN0
- , ( 1 < DimRHS::rank && aN1 ? aN1 :
- ( 2 < DimRHS::rank && aN2 ? aN2 :
- ( 3 < DimRHS::rank && aN3 ? aN3 :
- ( 4 < DimRHS::rank && aN4 ? aN4 :
- ( 5 < DimRHS::rank && aN5 ? aN5 :
- ( 6 < DimRHS::rank && aN6 ? aN6 :
- ( 7 < DimRHS::rank && aN7 ? aN7 : 0 )))))))
+ constexpr ViewOffset
+ ( const ViewOffset< DimRHS , Kokkos::LayoutLeft , void > & rhs ,
+ const SubviewExtents< DimRHS::rank , dimension_type::rank > & sub )
+ : m_dim( sub.range_extent(0)
+ , sub.range_extent(1)
, 0, 0, 0, 0, 0, 0 )
- , m_stride( ( 1 < DimRHS::rank && aN1 ? rhs.stride_1() :
- ( 2 < DimRHS::rank && aN2 ? rhs.stride_2() :
- ( 3 < DimRHS::rank && aN3 ? rhs.stride_3() :
- ( 4 < DimRHS::rank && aN4 ? rhs.stride_4() :
- ( 5 < DimRHS::rank && aN5 ? rhs.stride_5() :
- ( 6 < DimRHS::rank && aN6 ? rhs.stride_6() :
- ( 7 < DimRHS::rank && aN7 ? rhs.stride_7() : 0 ))))))) )
+ , m_stride( ( 1 == sub.range_index(1) ? rhs.stride_1() :
+ ( 2 == sub.range_index(1) ? rhs.stride_2() :
+ ( 3 == sub.range_index(1) ? rhs.stride_3() :
+ ( 4 == sub.range_index(1) ? rhs.stride_4() :
+ ( 5 == sub.range_index(1) ? rhs.stride_5() :
+ ( 6 == sub.range_index(1) ? rhs.stride_6() :
+ ( 7 == sub.range_index(1) ? rhs.stride_7() : 0 ))))))))
{
- // This subview must be 2 == rank and 2 == rank_dynamic
- // due to only having stride #0.
- // The source dimension #0 must be non-zero for stride-one leading dimension.
- // At most subsequent dimension can be non-zero.
-
static_assert( ( 2 == dimension_type::rank ) &&
( 2 == dimension_type::rank_dynamic ) &&
( 2 <= DimRHS::rank )
, "ViewOffset subview construction requires compatible rank" );
}
};
//----------------------------------------------------------------------------
// LayoutRight AND ( 1 >= rank OR 0 == rank_dynamic ) : no padding / striding
template < class Dimension >
struct ViewOffset< Dimension , Kokkos::LayoutRight
, typename std::enable_if<( 1 >= Dimension::rank
||
0 == Dimension::rank_dynamic
)>::type >
{
using is_mapping_plugin = std::true_type ;
using is_regular = std::true_type ;
typedef size_t size_type ;
typedef Dimension dimension_type ;
typedef Kokkos::LayoutRight array_layout ;
dimension_type m_dim ;
//----------------------------------------
// rank 1
template< typename I0 >
KOKKOS_INLINE_FUNCTION constexpr
size_type operator()( I0 const & i0 ) const { return i0 ; }
// rank 2
template < typename I0 , typename I1 >
KOKKOS_INLINE_FUNCTION constexpr
size_type operator()( I0 const & i0 , I1 const & i1 ) const
{ return i1 + m_dim.N1 * i0 ; }
//rank 3
template < typename I0, typename I1, typename I2 >
KOKKOS_INLINE_FUNCTION constexpr
size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2 ) const
{
return i2 + m_dim.N2 * ( i1 + m_dim.N1 * ( i0 ));
}
//rank 4
template < typename I0, typename I1, typename I2, typename I3 >
KOKKOS_INLINE_FUNCTION constexpr
size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3 ) const
{
return i3 + m_dim.N3 * (
i2 + m_dim.N2 * (
i1 + m_dim.N1 * ( i0 )));
}
//rank 5
template < typename I0, typename I1, typename I2, typename I3
, typename I4 >
KOKKOS_INLINE_FUNCTION constexpr
size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3
, I4 const & i4 ) const
{
return i4 + m_dim.N4 * (
i3 + m_dim.N3 * (
i2 + m_dim.N2 * (
i1 + m_dim.N1 * ( i0 ))));
}
//rank 6
template < typename I0, typename I1, typename I2, typename I3
, typename I4, typename I5 >
KOKKOS_INLINE_FUNCTION constexpr
size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3
, I4 const & i4, I5 const & i5 ) const
{
return i5 + m_dim.N5 * (
i4 + m_dim.N4 * (
i3 + m_dim.N3 * (
i2 + m_dim.N2 * (
i1 + m_dim.N1 * ( i0 )))));
}
//rank 7
template < typename I0, typename I1, typename I2, typename I3
, typename I4, typename I5, typename I6 >
KOKKOS_INLINE_FUNCTION constexpr
size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3
, I4 const & i4, I5 const & i5, I6 const & i6 ) const
{
return i6 + m_dim.N6 * (
i5 + m_dim.N5 * (
i4 + m_dim.N4 * (
i3 + m_dim.N3 * (
i2 + m_dim.N2 * (
i1 + m_dim.N1 * ( i0 ))))));
}
//rank 8
template < typename I0, typename I1, typename I2, typename I3
, typename I4, typename I5, typename I6, typename I7 >
KOKKOS_INLINE_FUNCTION constexpr
size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3
, I4 const & i4, I5 const & i5, I6 const & i6, I7 const & i7 ) const
{
return i7 + m_dim.N7 * (
i6 + m_dim.N6 * (
i5 + m_dim.N5 * (
i4 + m_dim.N4 * (
i3 + m_dim.N3 * (
i2 + m_dim.N2 * (
i1 + m_dim.N1 * ( i0 )))))));
}
//----------------------------------------
KOKKOS_INLINE_FUNCTION constexpr size_type dimension_0() const { return m_dim.N0 ; }
KOKKOS_INLINE_FUNCTION constexpr size_type dimension_1() const { return m_dim.N1 ; }
KOKKOS_INLINE_FUNCTION constexpr size_type dimension_2() const { return m_dim.N2 ; }
KOKKOS_INLINE_FUNCTION constexpr size_type dimension_3() const { return m_dim.N3 ; }
KOKKOS_INLINE_FUNCTION constexpr size_type dimension_4() const { return m_dim.N4 ; }
KOKKOS_INLINE_FUNCTION constexpr size_type dimension_5() const { return m_dim.N5 ; }
KOKKOS_INLINE_FUNCTION constexpr size_type dimension_6() const { return m_dim.N6 ; }
KOKKOS_INLINE_FUNCTION constexpr size_type dimension_7() const { return m_dim.N7 ; }
/* Cardinality of the domain index space */
KOKKOS_INLINE_FUNCTION
constexpr size_type size() const
{ return m_dim.N0 * m_dim.N1 * m_dim.N2 * m_dim.N3 * m_dim.N4 * m_dim.N5 * m_dim.N6 * m_dim.N7 ; }
/* Span of the range space */
KOKKOS_INLINE_FUNCTION
constexpr size_type span() const
{ return m_dim.N0 * m_dim.N1 * m_dim.N2 * m_dim.N3 * m_dim.N4 * m_dim.N5 * m_dim.N6 * m_dim.N7 ; }
KOKKOS_INLINE_FUNCTION constexpr bool span_is_contiguous() const { return true ; }
/* Strides of dimensions */
KOKKOS_INLINE_FUNCTION constexpr size_type stride_7() const { return 1 ; }
KOKKOS_INLINE_FUNCTION constexpr size_type stride_6() const { return m_dim.N7 ; }
KOKKOS_INLINE_FUNCTION constexpr size_type stride_5() const { return m_dim.N7 * m_dim.N6 ; }
KOKKOS_INLINE_FUNCTION constexpr size_type stride_4() const { return m_dim.N7 * m_dim.N6 * m_dim.N5 ; }
KOKKOS_INLINE_FUNCTION constexpr size_type stride_3() const { return m_dim.N7 * m_dim.N6 * m_dim.N5 * m_dim.N4 ; }
KOKKOS_INLINE_FUNCTION constexpr size_type stride_2() const { return m_dim.N7 * m_dim.N6 * m_dim.N5 * m_dim.N4 * m_dim.N3 ; }
KOKKOS_INLINE_FUNCTION constexpr size_type stride_1() const { return m_dim.N7 * m_dim.N6 * m_dim.N5 * m_dim.N4 * m_dim.N3 * m_dim.N2 ; }
KOKKOS_INLINE_FUNCTION constexpr size_type stride_0() const { return m_dim.N7 * m_dim.N6 * m_dim.N5 * m_dim.N4 * m_dim.N3 * m_dim.N2 * m_dim.N1 ; }
// Stride with [ rank ] value is the total length
template< typename iType >
KOKKOS_INLINE_FUNCTION
void stride( iType * const s ) const
{
size_type n = 1 ;
if ( 7 < dimension_type::rank ) { s[7] = n ; n *= m_dim.N7 ; }
if ( 6 < dimension_type::rank ) { s[6] = n ; n *= m_dim.N6 ; }
if ( 5 < dimension_type::rank ) { s[5] = n ; n *= m_dim.N5 ; }
if ( 4 < dimension_type::rank ) { s[4] = n ; n *= m_dim.N4 ; }
if ( 3 < dimension_type::rank ) { s[3] = n ; n *= m_dim.N3 ; }
if ( 2 < dimension_type::rank ) { s[2] = n ; n *= m_dim.N2 ; }
if ( 1 < dimension_type::rank ) { s[1] = n ; n *= m_dim.N1 ; }
if ( 0 < dimension_type::rank ) { s[0] = n ; }
s[dimension_type::rank] = n * m_dim.N0 ;
}
//----------------------------------------
ViewOffset() = default ;
ViewOffset( const ViewOffset & ) = default ;
ViewOffset & operator = ( const ViewOffset & ) = default ;
template< unsigned TrivialScalarSize >
KOKKOS_INLINE_FUNCTION
constexpr ViewOffset( std::integral_constant<unsigned,TrivialScalarSize> const &
, size_t aN0 , unsigned aN1 , unsigned aN2 , unsigned aN3
, unsigned aN4 , unsigned aN5 , unsigned aN6 , unsigned aN7 )
: m_dim( aN0, aN1, aN2, aN3, aN4, aN5, aN6, aN7 )
{}
template< class DimRHS >
KOKKOS_INLINE_FUNCTION
constexpr ViewOffset( const ViewOffset< DimRHS , Kokkos::LayoutRight , void > & rhs )
: m_dim( rhs.m_dim.N0 , rhs.m_dim.N1 , rhs.m_dim.N2 , rhs.m_dim.N3
, rhs.m_dim.N4 , rhs.m_dim.N5 , rhs.m_dim.N6 , rhs.m_dim.N7 )
{
static_assert( int(DimRHS::rank) == int(dimension_type::rank) , "ViewOffset assignment requires equal rank" );
// Also requires equal static dimensions ...
}
template< class DimRHS >
KOKKOS_INLINE_FUNCTION
constexpr ViewOffset( const ViewOffset< DimRHS , Kokkos::LayoutLeft , void > & rhs )
: m_dim( rhs.m_dim.N0, 0, 0, 0, 0, 0, 0, 0 )
{
static_assert( DimRHS::rank == 1 && dimension_type::rank == 1 && dimension_type::rank_dynamic == 1
, "ViewOffset LayoutRight and LayoutLeft are only compatible when rank == 1" );
}
template< class DimRHS >
KOKKOS_INLINE_FUNCTION
ViewOffset( const ViewOffset< DimRHS , Kokkos::LayoutStride , void > & rhs )
: m_dim( rhs.m_dim.N0, 0, 0, 0, 0, 0, 0, 0 )
{
static_assert( DimRHS::rank == 1 && dimension_type::rank == 1 && dimension_type::rank_dynamic == 1
, "ViewOffset LayoutLeft and LayoutStride are only compatible when rank == 1" );
if ( rhs.m_stride.S0 != 1 ) {
Kokkos::abort("Kokkos::Experimental::ViewOffset assignment of LayoutRight from LayoutStride requires stride == 1" );
}
}
//----------------------------------------
// Subview construction
template< class DimRHS >
KOKKOS_INLINE_FUNCTION
- constexpr ViewOffset( const ViewOffset< DimRHS , Kokkos::LayoutRight , void > & rhs
- , const size_t n0
- , const size_t
- , const size_t
- , const size_t
- , const size_t
- , const size_t
- , const size_t
- , const size_t
- )
- : m_dim( n0, 0, 0, 0, 0, 0, 0, 0 )
+ constexpr ViewOffset
+ ( const ViewOffset< DimRHS , Kokkos::LayoutRight , void > & rhs
+ , const SubviewExtents< DimRHS::rank , dimension_type::rank > & sub
+ )
+ : m_dim( sub.range_extent(0) , 0, 0, 0, 0, 0, 0, 0 )
{
static_assert( ( 0 == dimension_type::rank ) ||
( 1 == dimension_type::rank && 1 == dimension_type::rank_dynamic && 1 <= DimRHS::rank )
, "ViewOffset subview construction requires compatible rank" );
}
};
//----------------------------------------------------------------------------
// LayoutRight AND ( 1 < rank AND 0 < rank_dynamic ) : has padding / striding
template < class Dimension >
struct ViewOffset< Dimension , Kokkos::LayoutRight
, typename std::enable_if<( 1 < Dimension::rank
&&
0 < Dimension::rank_dynamic
)>::type >
{
using is_mapping_plugin = std::true_type ;
using is_regular = std::true_type ;
typedef size_t size_type ;
typedef Dimension dimension_type ;
typedef Kokkos::LayoutRight array_layout ;
dimension_type m_dim ;
size_type m_stride ;
//----------------------------------------
// rank 1
template< typename I0 >
KOKKOS_INLINE_FUNCTION constexpr
size_type operator()( I0 const & i0 ) const { return i0 ; }
// rank 2
template < typename I0 , typename I1 >
KOKKOS_INLINE_FUNCTION constexpr
size_type operator()( I0 const & i0 , I1 const & i1 ) const
{ return i1 + i0 * m_stride ; }
//rank 3
template < typename I0, typename I1, typename I2 >
KOKKOS_INLINE_FUNCTION constexpr
size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2 ) const
{ return i2 + m_dim.N2 * ( i1 ) + i0 * m_stride ; }
//rank 4
template < typename I0, typename I1, typename I2, typename I3 >
KOKKOS_INLINE_FUNCTION constexpr
size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3 ) const
{
return i3 + m_dim.N3 * (
i2 + m_dim.N2 * ( i1 )) +
i0 * m_stride ;
}
//rank 5
template < typename I0, typename I1, typename I2, typename I3
, typename I4 >
KOKKOS_INLINE_FUNCTION constexpr
size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3
, I4 const & i4 ) const
{
return i4 + m_dim.N4 * (
i3 + m_dim.N3 * (
i2 + m_dim.N2 * ( i1 ))) +
i0 * m_stride ;
}
//rank 6
template < typename I0, typename I1, typename I2, typename I3
, typename I4, typename I5 >
KOKKOS_INLINE_FUNCTION constexpr
size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3
, I4 const & i4, I5 const & i5 ) const
{
return i5 + m_dim.N5 * (
i4 + m_dim.N4 * (
i3 + m_dim.N3 * (
i2 + m_dim.N2 * ( i1 )))) +
i0 * m_stride ;
}
//rank 7
template < typename I0, typename I1, typename I2, typename I3
, typename I4, typename I5, typename I6 >
KOKKOS_INLINE_FUNCTION constexpr
size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3
, I4 const & i4, I5 const & i5, I6 const & i6 ) const
{
return i6 + m_dim.N6 * (
i5 + m_dim.N5 * (
i4 + m_dim.N4 * (
i3 + m_dim.N3 * (
i2 + m_dim.N2 * ( i1 ))))) +
i0 * m_stride ;
}
//rank 8
template < typename I0, typename I1, typename I2, typename I3
, typename I4, typename I5, typename I6, typename I7 >
KOKKOS_INLINE_FUNCTION constexpr
size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3
, I4 const & i4, I5 const & i5, I6 const & i6, I7 const & i7 ) const
{
return i7 + m_dim.N7 * (
i6 + m_dim.N6 * (
i5 + m_dim.N5 * (
i4 + m_dim.N4 * (
i3 + m_dim.N3 * (
i2 + m_dim.N2 * ( i1 )))))) +
i0 * m_stride ;
}
//----------------------------------------
KOKKOS_INLINE_FUNCTION constexpr size_type dimension_0() const { return m_dim.N0 ; }
KOKKOS_INLINE_FUNCTION constexpr size_type dimension_1() const { return m_dim.N1 ; }
KOKKOS_INLINE_FUNCTION constexpr size_type dimension_2() const { return m_dim.N2 ; }
KOKKOS_INLINE_FUNCTION constexpr size_type dimension_3() const { return m_dim.N3 ; }
KOKKOS_INLINE_FUNCTION constexpr size_type dimension_4() const { return m_dim.N4 ; }
KOKKOS_INLINE_FUNCTION constexpr size_type dimension_5() const { return m_dim.N5 ; }
KOKKOS_INLINE_FUNCTION constexpr size_type dimension_6() const { return m_dim.N6 ; }
KOKKOS_INLINE_FUNCTION constexpr size_type dimension_7() const { return m_dim.N7 ; }
/* Cardinality of the domain index space */
KOKKOS_INLINE_FUNCTION
constexpr size_type size() const
{ return m_dim.N0 * m_dim.N1 * m_dim.N2 * m_dim.N3 * m_dim.N4 * m_dim.N5 * m_dim.N6 * m_dim.N7 ; }
/* Span of the range space */
KOKKOS_INLINE_FUNCTION
constexpr size_type span() const
{ return m_dim.N0 * m_stride ; }
KOKKOS_INLINE_FUNCTION constexpr bool span_is_contiguous() const
{ return m_stride == m_dim.N7 * m_dim.N6 * m_dim.N5 * m_dim.N4 * m_dim.N3 * m_dim.N2 * m_dim.N1 ; }
/* Strides of dimensions */
KOKKOS_INLINE_FUNCTION constexpr size_type stride_7() const { return 1 ; }
KOKKOS_INLINE_FUNCTION constexpr size_type stride_6() const { return m_dim.N7 ; }
KOKKOS_INLINE_FUNCTION constexpr size_type stride_5() const { return m_dim.N7 * m_dim.N6 ; }
KOKKOS_INLINE_FUNCTION constexpr size_type stride_4() const { return m_dim.N7 * m_dim.N6 * m_dim.N5 ; }
KOKKOS_INLINE_FUNCTION constexpr size_type stride_3() const { return m_dim.N7 * m_dim.N6 * m_dim.N5 * m_dim.N4 ; }
KOKKOS_INLINE_FUNCTION constexpr size_type stride_2() const { return m_dim.N7 * m_dim.N6 * m_dim.N5 * m_dim.N4 * m_dim.N3 ; }
KOKKOS_INLINE_FUNCTION constexpr size_type stride_1() const { return m_dim.N7 * m_dim.N6 * m_dim.N5 * m_dim.N4 * m_dim.N3 * m_dim.N2 ; }
KOKKOS_INLINE_FUNCTION constexpr size_type stride_0() const { return m_stride ; }
// Stride with [ rank ] value is the total length
template< typename iType >
KOKKOS_INLINE_FUNCTION
void stride( iType * const s ) const
{
size_type n = 1 ;
if ( 7 < dimension_type::rank ) { s[7] = n ; n *= m_dim.N7 ; }
if ( 6 < dimension_type::rank ) { s[6] = n ; n *= m_dim.N6 ; }
if ( 5 < dimension_type::rank ) { s[5] = n ; n *= m_dim.N5 ; }
if ( 4 < dimension_type::rank ) { s[4] = n ; n *= m_dim.N4 ; }
if ( 3 < dimension_type::rank ) { s[3] = n ; n *= m_dim.N3 ; }
if ( 2 < dimension_type::rank ) { s[2] = n ; n *= m_dim.N2 ; }
if ( 1 < dimension_type::rank ) { s[1] = n ; }
if ( 0 < dimension_type::rank ) { s[0] = m_stride ; }
s[dimension_type::rank] = m_stride * m_dim.N0 ;
}
//----------------------------------------
private:
template< unsigned TrivialScalarSize >
struct Padding {
enum { div = TrivialScalarSize == 0 ? 0 : Kokkos::Impl::MEMORY_ALIGNMENT / ( TrivialScalarSize ? TrivialScalarSize : 1 ) };
enum { mod = TrivialScalarSize == 0 ? 0 : Kokkos::Impl::MEMORY_ALIGNMENT % ( TrivialScalarSize ? TrivialScalarSize : 1 ) };
// If memory alignment is a multiple of the trivial scalar size then attempt to align.
enum { align = 0 != TrivialScalarSize && 0 == mod ? div : 0 };
enum { div_ok = div ? div : 1 }; // To valid modulo zero in constexpr
KOKKOS_INLINE_FUNCTION
static constexpr size_t stride( size_t const N )
{
return ( align && ( Kokkos::Impl::MEMORY_ALIGNMENT_THRESHOLD * align < N ) && ( N % div_ok ) )
? N + align - ( N % div_ok ) : N ;
}
};
public:
ViewOffset() = default ;
ViewOffset( const ViewOffset & ) = default ;
ViewOffset & operator = ( const ViewOffset & ) = default ;
/* Enable padding for trivial scalar types with non-zero trivial scalar size. */
template< unsigned TrivialScalarSize >
KOKKOS_INLINE_FUNCTION
constexpr ViewOffset( std::integral_constant<unsigned,TrivialScalarSize> const & padding_type_size
, size_t aN0 , unsigned aN1 , unsigned aN2 , unsigned aN3
, unsigned aN4 , unsigned aN5 , unsigned aN6 , unsigned aN7 )
: m_dim( aN0, aN1, aN2, aN3, aN4, aN5, aN6, aN7 )
, m_stride( Padding<TrivialScalarSize>::
stride( /* 2 <= rank */
m_dim.N1 * ( dimension_type::rank == 2 ? 1 :
m_dim.N2 * ( dimension_type::rank == 3 ? 1 :
m_dim.N3 * ( dimension_type::rank == 4 ? 1 :
m_dim.N4 * ( dimension_type::rank == 5 ? 1 :
m_dim.N5 * ( dimension_type::rank == 6 ? 1 :
m_dim.N6 * ( dimension_type::rank == 7 ? 1 : m_dim.N7 )))))) ))
{}
template< class DimRHS >
KOKKOS_INLINE_FUNCTION
constexpr ViewOffset( const ViewOffset< DimRHS , Kokkos::LayoutLeft , void > & rhs )
: m_dim( rhs.m_dim.N0 , rhs.m_dim.N1 , rhs.m_dim.N2 , rhs.m_dim.N3
, rhs.m_dim.N4 , rhs.m_dim.N5 , rhs.m_dim.N6 , rhs.m_dim.N7 )
, m_stride( rhs.stride_0() )
{
static_assert( int(DimRHS::rank) == int(dimension_type::rank) , "ViewOffset assignment requires equal rank" );
// Also requires equal static dimensions ...
}
//----------------------------------------
// Subview construction
// Last dimension must be non-zero
template< class DimRHS >
KOKKOS_INLINE_FUNCTION
- constexpr ViewOffset( const ViewOffset< DimRHS , Kokkos::LayoutRight , void > & rhs
- , const size_t aN0
- , const size_t aN1
- , const size_t aN2
- , const size_t aN3
- , const size_t aN4
- , const size_t aN5
- , const size_t aN6
- , const size_t aN7
- )
- : m_dim( // N0 == First non-zero dimension before the last dimension.
- ( 1 < DimRHS::rank && aN0 ? aN0 :
- ( 2 < DimRHS::rank && aN1 ? aN1 :
- ( 3 < DimRHS::rank && aN2 ? aN2 :
- ( 4 < DimRHS::rank && aN3 ? aN3 :
- ( 5 < DimRHS::rank && aN4 ? aN4 :
- ( 6 < DimRHS::rank && aN5 ? aN5 :
- ( 7 < DimRHS::rank && aN6 ? aN6 : 0 )))))))
- , // N1 == Last dimension.
- ( 2 == DimRHS::rank ? aN1 :
- ( 3 == DimRHS::rank ? aN2 :
- ( 4 == DimRHS::rank ? aN3 :
- ( 5 == DimRHS::rank ? aN4 :
- ( 6 == DimRHS::rank ? aN5 :
- ( 7 == DimRHS::rank ? aN6 : aN7 ))))))
+ constexpr ViewOffset
+ ( const ViewOffset< DimRHS , Kokkos::LayoutRight , void > & rhs
+ , const SubviewExtents< DimRHS::rank , dimension_type::rank > & sub
+ )
+ : m_dim( sub.range_extent(0)
+ , sub.range_extent(1)
, 0, 0, 0, 0, 0, 0 )
- , m_stride( ( 1 < DimRHS::rank && aN0 ? rhs.stride_0() :
- ( 2 < DimRHS::rank && aN1 ? rhs.stride_1() :
- ( 3 < DimRHS::rank && aN2 ? rhs.stride_2() :
- ( 4 < DimRHS::rank && aN3 ? rhs.stride_3() :
- ( 5 < DimRHS::rank && aN4 ? rhs.stride_4() :
- ( 6 < DimRHS::rank && aN5 ? rhs.stride_5() :
- ( 7 < DimRHS::rank && aN6 ? rhs.stride_6() : 0 ))))))) )
+ , m_stride( 0 == sub.range_index(0) ? rhs.stride_0() : (
+ 1 == sub.range_index(0) ? rhs.stride_1() : (
+ 2 == sub.range_index(0) ? rhs.stride_2() : (
+ 3 == sub.range_index(0) ? rhs.stride_3() : (
+ 4 == sub.range_index(0) ? rhs.stride_4() : (
+ 5 == sub.range_index(0) ? rhs.stride_5() : (
+ 6 == sub.range_index(0) ? rhs.stride_6() : 0 )))))))
{
// This subview must be 2 == rank and 2 == rank_dynamic
// due to only having stride #0.
// The source dimension #0 must be non-zero for stride-one leading dimension.
// At most subsequent dimension can be non-zero.
static_assert( ( 2 == dimension_type::rank ) &&
( 2 == dimension_type::rank_dynamic ) &&
( 2 <= DimRHS::rank )
, "ViewOffset subview construction requires compatible rank" );
}
};
//----------------------------------------------------------------------------
/* Strided array layout only makes sense for 0 < rank */
template< unsigned Rank >
struct ViewStride ;
template<>
struct ViewStride<1> {
size_t S0 ;
enum { S1 = 0 , S2 = 0 , S3 = 0 , S4 = 0 , S5 = 0 , S6 = 0 , S7 = 0 };
ViewStride() = default ;
ViewStride( const ViewStride & ) = default ;
ViewStride & operator = ( const ViewStride & ) = default ;
KOKKOS_INLINE_FUNCTION
constexpr ViewStride( size_t aS0 , size_t , size_t , size_t
, size_t , size_t , size_t , size_t )
: S0( aS0 )
{}
};
template<>
struct ViewStride<2> {
size_t S0 , S1 ;
enum { S2 = 0 , S3 = 0 , S4 = 0 , S5 = 0 , S6 = 0 , S7 = 0 };
ViewStride() = default ;
ViewStride( const ViewStride & ) = default ;
ViewStride & operator = ( const ViewStride & ) = default ;
KOKKOS_INLINE_FUNCTION
constexpr ViewStride( size_t aS0 , size_t aS1 , size_t , size_t
, size_t , size_t , size_t , size_t )
: S0( aS0 ) , S1( aS1 )
{}
};
template<>
struct ViewStride<3> {
size_t S0 , S1 , S2 ;
enum { S3 = 0 , S4 = 0 , S5 = 0 , S6 = 0 , S7 = 0 };
ViewStride() = default ;
ViewStride( const ViewStride & ) = default ;
ViewStride & operator = ( const ViewStride & ) = default ;
KOKKOS_INLINE_FUNCTION
constexpr ViewStride( size_t aS0 , size_t aS1 , size_t aS2 , size_t
, size_t , size_t , size_t , size_t )
: S0( aS0 ) , S1( aS1 ) , S2( aS2 )
{}
};
template<>
struct ViewStride<4> {
size_t S0 , S1 , S2 , S3 ;
enum { S4 = 0 , S5 = 0 , S6 = 0 , S7 = 0 };
ViewStride() = default ;
ViewStride( const ViewStride & ) = default ;
ViewStride & operator = ( const ViewStride & ) = default ;
KOKKOS_INLINE_FUNCTION
constexpr ViewStride( size_t aS0 , size_t aS1 , size_t aS2 , size_t aS3
, size_t , size_t , size_t , size_t )
: S0( aS0 ) , S1( aS1 ) , S2( aS2 ) , S3( aS3 )
{}
};
template<>
struct ViewStride<5> {
size_t S0 , S1 , S2 , S3 , S4 ;
enum { S5 = 0 , S6 = 0 , S7 = 0 };
ViewStride() = default ;
ViewStride( const ViewStride & ) = default ;
ViewStride & operator = ( const ViewStride & ) = default ;
KOKKOS_INLINE_FUNCTION
constexpr ViewStride( size_t aS0 , size_t aS1 , size_t aS2 , size_t aS3
, size_t aS4 , size_t , size_t , size_t )
: S0( aS0 ) , S1( aS1 ) , S2( aS2 ) , S3( aS3 )
, S4( aS4 )
{}
};
template<>
struct ViewStride<6> {
size_t S0 , S1 , S2 , S3 , S4 , S5 ;
enum { S6 = 0 , S7 = 0 };
ViewStride() = default ;
ViewStride( const ViewStride & ) = default ;
ViewStride & operator = ( const ViewStride & ) = default ;
KOKKOS_INLINE_FUNCTION
constexpr ViewStride( size_t aS0 , size_t aS1 , size_t aS2 , size_t aS3
, size_t aS4 , size_t aS5 , size_t , size_t )
: S0( aS0 ) , S1( aS1 ) , S2( aS2 ) , S3( aS3 )
, S4( aS4 ) , S5( aS5 )
{}
};
template<>
struct ViewStride<7> {
size_t S0 , S1 , S2 , S3 , S4 , S5 , S6 ;
enum { S7 = 0 };
ViewStride() = default ;
ViewStride( const ViewStride & ) = default ;
ViewStride & operator = ( const ViewStride & ) = default ;
KOKKOS_INLINE_FUNCTION
constexpr ViewStride( size_t aS0 , size_t aS1 , size_t aS2 , size_t aS3
, size_t aS4 , size_t aS5 , size_t aS6 , size_t )
: S0( aS0 ) , S1( aS1 ) , S2( aS2 ) , S3( aS3 )
, S4( aS4 ) , S5( aS5 ) , S6( aS6 )
{}
};
template<>
struct ViewStride<8> {
size_t S0 , S1 , S2 , S3 , S4 , S5 , S6 , S7 ;
ViewStride() = default ;
ViewStride( const ViewStride & ) = default ;
ViewStride & operator = ( const ViewStride & ) = default ;
KOKKOS_INLINE_FUNCTION
constexpr ViewStride( size_t aS0 , size_t aS1 , size_t aS2 , size_t aS3
, size_t aS4 , size_t aS5 , size_t aS6 , size_t aS7 )
: S0( aS0 ) , S1( aS1 ) , S2( aS2 ) , S3( aS3 )
, S4( aS4 ) , S5( aS5 ) , S6( aS6 ) , S7( aS7 )
{}
};
template < class Dimension >
struct ViewOffset< Dimension , Kokkos::LayoutStride
, typename std::enable_if<( 0 < Dimension::rank )>::type >
{
private:
typedef ViewStride< Dimension::rank > stride_type ;
public:
using is_mapping_plugin = std::true_type ;
using is_regular = std::true_type ;
typedef size_t size_type ;
typedef Dimension dimension_type ;
typedef Kokkos::LayoutStride array_layout ;
dimension_type m_dim ;
stride_type m_stride ;
//----------------------------------------
// rank 1
template< typename I0 >
KOKKOS_INLINE_FUNCTION constexpr
size_type operator()( I0 const & i0 ) const
{
return i0 * m_stride.S0 ;
}
// rank 2
template < typename I0 , typename I1 >
KOKKOS_INLINE_FUNCTION constexpr
size_type operator()( I0 const & i0 , I1 const & i1 ) const
{
return i0 * m_stride.S0 +
i1 * m_stride.S1 ;
}
//rank 3
template < typename I0, typename I1, typename I2 >
KOKKOS_INLINE_FUNCTION constexpr
size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2 ) const
{
return i0 * m_stride.S0 +
i1 * m_stride.S1 +
i2 * m_stride.S2 ;
}
//rank 4
template < typename I0, typename I1, typename I2, typename I3 >
KOKKOS_INLINE_FUNCTION constexpr
size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3 ) const
{
return i0 * m_stride.S0 +
i1 * m_stride.S1 +
i2 * m_stride.S2 +
i3 * m_stride.S3 ;
}
//rank 5
template < typename I0, typename I1, typename I2, typename I3
, typename I4 >
KOKKOS_INLINE_FUNCTION constexpr
size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3
, I4 const & i4 ) const
{
return i0 * m_stride.S0 +
i1 * m_stride.S1 +
i2 * m_stride.S2 +
i3 * m_stride.S3 +
i4 * m_stride.S4 ;
}
//rank 6
template < typename I0, typename I1, typename I2, typename I3
, typename I4, typename I5 >
KOKKOS_INLINE_FUNCTION constexpr
size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3
, I4 const & i4, I5 const & i5 ) const
{
return i0 * m_stride.S0 +
i1 * m_stride.S1 +
i2 * m_stride.S2 +
i3 * m_stride.S3 +
i4 * m_stride.S4 +
i5 * m_stride.S5 ;
}
//rank 7
template < typename I0, typename I1, typename I2, typename I3
, typename I4, typename I5, typename I6 >
KOKKOS_INLINE_FUNCTION constexpr
size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3
, I4 const & i4, I5 const & i5, I6 const & i6 ) const
{
return i0 * m_stride.S0 +
i1 * m_stride.S1 +
i2 * m_stride.S2 +
i3 * m_stride.S3 +
i4 * m_stride.S4 +
i5 * m_stride.S5 +
i6 * m_stride.S6 ;
}
//rank 8
template < typename I0, typename I1, typename I2, typename I3
, typename I4, typename I5, typename I6, typename I7 >
KOKKOS_INLINE_FUNCTION constexpr
size_type operator()( I0 const & i0, I1 const & i1, I2 const & i2, I3 const & i3
, I4 const & i4, I5 const & i5, I6 const & i6, I7 const & i7 ) const
{
return i0 * m_stride.S0 +
i1 * m_stride.S1 +
i2 * m_stride.S2 +
i3 * m_stride.S3 +
i4 * m_stride.S4 +
i5 * m_stride.S5 +
i6 * m_stride.S6 +
i7 * m_stride.S7 ;
}
//----------------------------------------
KOKKOS_INLINE_FUNCTION constexpr size_type dimension_0() const { return m_dim.N0 ; }
KOKKOS_INLINE_FUNCTION constexpr size_type dimension_1() const { return m_dim.N1 ; }
KOKKOS_INLINE_FUNCTION constexpr size_type dimension_2() const { return m_dim.N2 ; }
KOKKOS_INLINE_FUNCTION constexpr size_type dimension_3() const { return m_dim.N3 ; }
KOKKOS_INLINE_FUNCTION constexpr size_type dimension_4() const { return m_dim.N4 ; }
KOKKOS_INLINE_FUNCTION constexpr size_type dimension_5() const { return m_dim.N5 ; }
KOKKOS_INLINE_FUNCTION constexpr size_type dimension_6() const { return m_dim.N6 ; }
KOKKOS_INLINE_FUNCTION constexpr size_type dimension_7() const { return m_dim.N7 ; }
/* Cardinality of the domain index space */
KOKKOS_INLINE_FUNCTION
constexpr size_type size() const
{ return m_dim.N0 * m_dim.N1 * m_dim.N2 * m_dim.N3 * m_dim.N4 * m_dim.N5 * m_dim.N6 * m_dim.N7 ; }
private:
KOKKOS_INLINE_FUNCTION
static constexpr size_type Max( size_type lhs , size_type rhs )
{ return lhs < rhs ? rhs : lhs ; }
public:
/* Span of the range space, largest stride * dimension */
KOKKOS_INLINE_FUNCTION
constexpr size_type span() const
{
return Max( m_dim.N0 * m_stride.S0 ,
Max( m_dim.N1 * m_stride.S1 ,
Max( m_dim.N2 * m_stride.S2 ,
Max( m_dim.N3 * m_stride.S3 ,
Max( m_dim.N4 * m_stride.S4 ,
Max( m_dim.N5 * m_stride.S5 ,
Max( m_dim.N6 * m_stride.S6 ,
m_dim.N7 * m_stride.S7 )))))));
}
KOKKOS_INLINE_FUNCTION constexpr bool span_is_contiguous() const { return span() == size(); }
/* Strides of dimensions */
KOKKOS_INLINE_FUNCTION constexpr size_type stride_0() const { return m_stride.S0 ; }
KOKKOS_INLINE_FUNCTION constexpr size_type stride_1() const { return m_stride.S1 ; }
KOKKOS_INLINE_FUNCTION constexpr size_type stride_2() const { return m_stride.S2 ; }
KOKKOS_INLINE_FUNCTION constexpr size_type stride_3() const { return m_stride.S3 ; }
KOKKOS_INLINE_FUNCTION constexpr size_type stride_4() const { return m_stride.S4 ; }
KOKKOS_INLINE_FUNCTION constexpr size_type stride_5() const { return m_stride.S5 ; }
KOKKOS_INLINE_FUNCTION constexpr size_type stride_6() const { return m_stride.S6 ; }
KOKKOS_INLINE_FUNCTION constexpr size_type stride_7() const { return m_stride.S7 ; }
// Stride with [ rank ] value is the total length
template< typename iType >
KOKKOS_INLINE_FUNCTION
void stride( iType * const s ) const
{
if ( 0 < dimension_type::rank ) { s[0] = m_stride.S0 ; }
if ( 1 < dimension_type::rank ) { s[1] = m_stride.S1 ; }
if ( 2 < dimension_type::rank ) { s[2] = m_stride.S2 ; }
if ( 3 < dimension_type::rank ) { s[3] = m_stride.S3 ; }
if ( 4 < dimension_type::rank ) { s[4] = m_stride.S4 ; }
if ( 5 < dimension_type::rank ) { s[5] = m_stride.S5 ; }
if ( 6 < dimension_type::rank ) { s[6] = m_stride.S6 ; }
if ( 7 < dimension_type::rank ) { s[7] = m_stride.S7 ; }
s[dimension_type::rank] = span();
}
//----------------------------------------
ViewOffset() = default ;
ViewOffset( const ViewOffset & ) = default ;
ViewOffset & operator = ( const ViewOffset & ) = default ;
KOKKOS_INLINE_FUNCTION
ViewOffset( const Kokkos::LayoutStride & rhs )
: m_dim( rhs.dimension[0] , rhs.dimension[1] , rhs.dimension[2] , rhs.dimension[3]
, rhs.dimension[4] , rhs.dimension[5] , rhs.dimension[6] , rhs.dimension[7] )
, m_stride( rhs.stride[0] , rhs.stride[1] , rhs.stride[2] , rhs.stride[3]
, rhs.stride[4] , rhs.stride[5] , rhs.stride[6] , rhs.stride[7] )
{}
template< class DimRHS , class LayoutRHS >
KOKKOS_INLINE_FUNCTION
constexpr ViewOffset( const ViewOffset< DimRHS , LayoutRHS , void > & rhs )
: m_dim( rhs.m_dim.N0 , rhs.m_dim.N1 , rhs.m_dim.N2 , rhs.m_dim.N3
, rhs.m_dim.N4 , rhs.m_dim.N5 , rhs.m_dim.N6 , rhs.m_dim.N7 )
, m_stride( rhs.stride_0() , rhs.stride_1() , rhs.stride_2() , rhs.stride_3()
, rhs.stride_4() , rhs.stride_5() , rhs.stride_6() , rhs.stride_7() )
{
static_assert( int(DimRHS::rank) == int(dimension_type::rank) , "ViewOffset assignment requires equal rank" );
// Also requires equal static dimensions ...
}
//----------------------------------------
// Subview construction
private:
- KOKKOS_INLINE_FUNCTION
- static constexpr unsigned
- count_non_zero( const size_t aN0 = 0
- , const size_t aN1 = 0
- , const size_t aN2 = 0
- , const size_t aN3 = 0
- , const size_t aN4 = 0
- , const size_t aN5 = 0
- , const size_t aN6 = 0
- , const size_t aN7 = 0
- )
- {
- return ( aN0 ? 1 : 0 ) +
- ( aN1 ? 1 : 0 ) +
- ( aN2 ? 1 : 0 ) +
- ( aN3 ? 1 : 0 ) +
- ( aN4 ? 1 : 0 ) +
- ( aN5 ? 1 : 0 ) +
- ( aN6 ? 1 : 0 ) +
- ( aN7 ? 1 : 0 );
- }
-
- template< unsigned Rank , unsigned I >
- KOKKOS_INLINE_FUNCTION
- static constexpr size_t
- get_non_zero( const size_t aN0
- , const size_t aN1
- , const size_t aN2
- , const size_t aN3
- , const size_t aN4
- , const size_t aN5
- , const size_t aN6
- , const size_t aN7
- )
- {
- return ( 0 < Rank && I < 1 && aN0 ? aN0 :
- ( 1 < Rank && I < 2 && I == count_non_zero(aN0) && aN1 ? aN1 :
- ( 2 < Rank && I < 3 && I == count_non_zero(aN0,aN1) && aN2 ? aN2 :
- ( 3 < Rank && I < 4 && I == count_non_zero(aN0,aN1,aN2) && aN3 ? aN3 :
- ( 4 < Rank && I < 5 && I == count_non_zero(aN0,aN1,aN2,aN3) && aN4 ? aN4 :
- ( 5 < Rank && I < 6 && I == count_non_zero(aN0,aN1,aN2,aN3,aN4) && aN5 ? aN5 :
- ( 6 < Rank && I < 7 && I == count_non_zero(aN0,aN1,aN2,aN3,aN4,aN5) && aN6 ? aN6 :
- ( 7 < Rank && I < 8 && I == count_non_zero(aN0,aN1,aN2,aN3,aN4,aN5,aN6) && aN7 ? aN7 : 0 ))))))));
- }
-
- template< unsigned Rank , unsigned I , class DimRHS , class LayoutRHS >
- KOKKOS_INLINE_FUNCTION
- static constexpr size_t
- get_non_zero( const size_t aN0 , const size_t aN1 , const size_t aN2 , const size_t aN3
- , const size_t aN4 , const size_t aN5 , const size_t aN6 , const size_t aN7
- , const ViewOffset< DimRHS , LayoutRHS , void > & rhs )
+ template< class DimRHS , class LayoutRHS >
+ KOKKOS_INLINE_FUNCTION static
+ constexpr size_t stride
+ ( unsigned r , const ViewOffset< DimRHS , LayoutRHS , void > & rhs )
{
- return ( 0 < Rank && I < 1 && aN0 ? rhs.stride_0() :
- ( 1 < Rank && I < 2 && I == count_non_zero(aN0) && aN1 ? rhs.stride_1() :
- ( 2 < Rank && I < 3 && I == count_non_zero(aN0,aN1) && aN2 ? rhs.stride_2() :
- ( 3 < Rank && I < 4 && I == count_non_zero(aN0,aN1,aN2) && aN3 ? rhs.stride_3() :
- ( 4 < Rank && I < 5 && I == count_non_zero(aN0,aN1,aN2,aN3) && aN4 ? rhs.stride_4() :
- ( 5 < Rank && I < 6 && I == count_non_zero(aN0,aN1,aN2,aN3,aN4) && aN5 ? rhs.stride_5() :
- ( 6 < Rank && I < 7 && I == count_non_zero(aN0,aN1,aN2,aN3,aN4,aN5) && aN6 ? rhs.stride_6() :
- ( 7 < Rank && I < 8 && I == count_non_zero(aN0,aN1,aN2,aN3,aN4,aN5,aN6) && aN7 ? rhs.stride_7() : 0 ))))))));
+ return r > 7 ? 0 : (
+ r == 0 ? rhs.stride_0() : (
+ r == 1 ? rhs.stride_1() : (
+ r == 2 ? rhs.stride_2() : (
+ r == 3 ? rhs.stride_3() : (
+ r == 4 ? rhs.stride_4() : (
+ r == 5 ? rhs.stride_5() : (
+ r == 6 ? rhs.stride_6() : rhs.stride_7() )))))));
}
-
public:
template< class DimRHS , class LayoutRHS >
KOKKOS_INLINE_FUNCTION
- constexpr ViewOffset( const ViewOffset< DimRHS , LayoutRHS , void > & rhs
- , const size_t aN0
- , const size_t aN1
- , const size_t aN2
- , const size_t aN3
- , const size_t aN4
- , const size_t aN5
- , const size_t aN6
- , const size_t aN7
- )
- // Contract the non-zero dimensions
- : m_dim( ViewOffset::template get_non_zero<DimRHS::rank,0>( aN0, aN1, aN2, aN3, aN4, aN5, aN6, aN7 )
- , ViewOffset::template get_non_zero<DimRHS::rank,1>( aN0, aN1, aN2, aN3, aN4, aN5, aN6, aN7 )
- , ViewOffset::template get_non_zero<DimRHS::rank,2>( aN0, aN1, aN2, aN3, aN4, aN5, aN6, aN7 )
- , ViewOffset::template get_non_zero<DimRHS::rank,3>( aN0, aN1, aN2, aN3, aN4, aN5, aN6, aN7 )
- , ViewOffset::template get_non_zero<DimRHS::rank,4>( aN0, aN1, aN2, aN3, aN4, aN5, aN6, aN7 )
- , ViewOffset::template get_non_zero<DimRHS::rank,5>( aN0, aN1, aN2, aN3, aN4, aN5, aN6, aN7 )
- , ViewOffset::template get_non_zero<DimRHS::rank,6>( aN0, aN1, aN2, aN3, aN4, aN5, aN6, aN7 )
- , ViewOffset::template get_non_zero<DimRHS::rank,7>( aN0, aN1, aN2, aN3, aN4, aN5, aN6, aN7 )
+ constexpr ViewOffset
+ ( const ViewOffset< DimRHS , LayoutRHS , void > & rhs
+ , const SubviewExtents< DimRHS::rank , dimension_type::rank > & sub
+ )
+ // range_extent(r) returns 0 when dimension_type::rank <= r
+ : m_dim( sub.range_extent(0)
+ , sub.range_extent(1)
+ , sub.range_extent(2)
+ , sub.range_extent(3)
+ , sub.range_extent(4)
+ , sub.range_extent(5)
+ , sub.range_extent(6)
+ , sub.range_extent(7)
)
- , m_stride( ViewOffset::template get_non_zero<DimRHS::rank,0>( aN0, aN1, aN2, aN3, aN4, aN5, aN6, aN7, rhs )
- , ViewOffset::template get_non_zero<DimRHS::rank,1>( aN0, aN1, aN2, aN3, aN4, aN5, aN6, aN7, rhs )
- , ViewOffset::template get_non_zero<DimRHS::rank,2>( aN0, aN1, aN2, aN3, aN4, aN5, aN6, aN7, rhs )
- , ViewOffset::template get_non_zero<DimRHS::rank,3>( aN0, aN1, aN2, aN3, aN4, aN5, aN6, aN7, rhs )
- , ViewOffset::template get_non_zero<DimRHS::rank,4>( aN0, aN1, aN2, aN3, aN4, aN5, aN6, aN7, rhs )
- , ViewOffset::template get_non_zero<DimRHS::rank,5>( aN0, aN1, aN2, aN3, aN4, aN5, aN6, aN7, rhs )
- , ViewOffset::template get_non_zero<DimRHS::rank,6>( aN0, aN1, aN2, aN3, aN4, aN5, aN6, aN7, rhs )
- , ViewOffset::template get_non_zero<DimRHS::rank,7>( aN0, aN1, aN2, aN3, aN4, aN5, aN6, aN7, rhs )
+ // range_index(r) returns ~0u when dimension_type::rank <= r
+ , m_stride( stride( sub.range_index(0), rhs )
+ , stride( sub.range_index(1), rhs )
+ , stride( sub.range_index(2), rhs )
+ , stride( sub.range_index(3), rhs )
+ , stride( sub.range_index(4), rhs )
+ , stride( sub.range_index(5), rhs )
+ , stride( sub.range_index(6), rhs )
+ , stride( sub.range_index(7), rhs )
)
- {
- }
-
- //----------------------------------------
-};
-
-}}} // namespace Kokkos::Experimental::Impl
-
-//----------------------------------------------------------------------------
-//----------------------------------------------------------------------------
-
-namespace Kokkos {
-namespace Experimental {
-namespace Impl {
-
-struct ALL_t {
- KOKKOS_INLINE_FUNCTION
- constexpr const ALL_t & operator()() const { return *this ; }
-};
-
-template< class T >
-struct ViewOffsetRange {
-
- static_assert( std::is_integral<T>::value , "Non-range must be an integral type" );
-
- enum { is_range = false };
-
- KOKKOS_INLINE_FUNCTION static
- size_t dimension( size_t const , T const & ) { return 0 ; }
-
- KOKKOS_INLINE_FUNCTION static
- size_t begin( T const & i ) { return size_t(i) ; }
-};
-
-template<>
-struct ViewOffsetRange<void> {
- enum { is_range = false };
-};
-
-template<>
-struct ViewOffsetRange< Kokkos::Experimental::Impl::ALL_t > {
- enum { is_range = true };
-
- KOKKOS_INLINE_FUNCTION static
- size_t dimension( size_t const n , Experimental::Impl::ALL_t const & ) { return n ; }
-
- KOKKOS_INLINE_FUNCTION static
- size_t begin( Experimental::Impl::ALL_t const & ) { return 0 ; }
-};
-
-template< typename iType >
-struct ViewOffsetRange< std::pair<iType,iType> > {
-
- static_assert( std::is_integral<iType>::value , "Range bounds must be an integral type" );
-
- enum { is_range = true };
-
- KOKKOS_INLINE_FUNCTION static
- size_t dimension( size_t const n , std::pair<iType,iType> const & r )
- { return ( size_t(r.first) < size_t(r.second) && size_t(r.second) <= n ) ? size_t(r.second) - size_t(r.first) : 0 ; }
-
- KOKKOS_INLINE_FUNCTION static
- size_t begin( std::pair<iType,iType> const & r ) { return size_t(r.first) ; }
-};
-
-template< typename iType >
-struct ViewOffsetRange< Kokkos::pair<iType,iType> > {
-
- static_assert( std::is_integral<iType>::value , "Range bounds must be an integral type" );
-
- enum { is_range = true };
-
- KOKKOS_INLINE_FUNCTION static
- size_t dimension( size_t const n , Kokkos::pair<iType,iType> const & r )
- { return ( size_t(r.first) < size_t(r.second) && size_t(r.second) <= n ) ? size_t(r.second) - size_t(r.first) : 0 ; }
-
- KOKKOS_INLINE_FUNCTION static
- size_t begin( Kokkos::pair<iType,iType> const & r ) { return size_t(r.first) ; }
-};
-
-template< typename iType >
-struct ViewOffsetRange< std::initializer_list< iType > > {
-
- static_assert( std::is_integral<iType>::value , "Range bounds must be an integral type" );
-
- enum { is_range = true };
-
- KOKKOS_INLINE_FUNCTION static
- size_t dimension( size_t const n , std::initializer_list< iType > const & r )
- {
- return ( size_t(r.begin()[0]) < size_t(r.begin()[1]) && size_t(r.begin()[1]) <= n )
- ? size_t(r.begin()[1]) - size_t(r.begin()[0]) : 0 ;
- }
-
- KOKKOS_INLINE_FUNCTION static
- size_t begin( std::initializer_list< iType > const & r ) { return size_t(r.begin()[0]) ; }
+ {}
};
}}} // namespace Kokkos::Experimental::Impl
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Experimental {
namespace Impl {
/** \brief ViewDataHandle provides the type of the 'data handle' which the view
* uses to access data with the [] operator. It also provides
* an allocate function and a function to extract a raw ptr from the
* data handle. ViewDataHandle also defines an enum ReferenceAble which
* specifies whether references/pointers to elements can be taken and a
* 'return_type' which is what the view operators will give back.
* Specialisation of this object allows three things depending
* on ViewTraits and compiler options:
* (i) Use special allocator (e.g. huge pages/small pages and pinned memory)
* (ii) Use special data handle type (e.g. add Cuda Texture Object)
* (iii) Use special access intrinsics (e.g. texture fetch and non-caching loads)
*/
template< class Traits , class Enable = void >
struct ViewDataHandle {
typedef typename Traits::value_type value_type ;
typedef typename Traits::value_type * handle_type ;
typedef typename Traits::value_type & return_type ;
typedef Kokkos::Experimental::Impl::SharedAllocationTracker track_type ;
KOKKOS_INLINE_FUNCTION
static handle_type assign( value_type * arg_data_ptr
, track_type const & /*arg_tracker*/ )
{
return handle_type( arg_data_ptr );
}
};
template< class Traits >
struct ViewDataHandle< Traits ,
typename std::enable_if<( std::is_same< typename Traits::non_const_value_type
, typename Traits::value_type >::value
&&
std::is_same< typename Traits::specialize , void >::value
&&
Traits::memory_traits::Atomic
)>::type >
{
typedef typename Traits::value_type value_type ;
typedef typename Kokkos::Impl::AtomicViewDataHandle< Traits > handle_type ;
typedef typename Kokkos::Impl::AtomicDataElement< Traits > return_type ;
typedef Kokkos::Experimental::Impl::SharedAllocationTracker track_type ;
KOKKOS_INLINE_FUNCTION
static handle_type assign( value_type * arg_data_ptr
, track_type const & /*arg_tracker*/ )
{
return handle_type( arg_data_ptr );
}
};
}}} // namespace Kokkos::Experimental::Impl
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Experimental {
namespace Impl {
-template< class Traits
- , bool R0 = false
- , bool R1 = false
- , bool R2 = false
- , bool R3 = false
- , bool R4 = false
- , bool R5 = false
- , bool R6 = false
- , bool R7 = false
- , typename Enable = void >
-struct SubviewMapping ;
+//----------------------------------------------------------------------------
+
+template< class ValueType , class ExecSpace
+ , bool IsScalar = std::is_scalar< ValueType >::value >
+struct ViewValueFunctor ;
+
+/*
+ * The construction, assignment to default, and destruction
+ * are merged into a single functor.
+ * Primarily to work around an unresolved CUDA back-end bug
+ * that would lose the destruction cuda device function when
+ * called from the shared memory tracking destruction.
+ * Secondarily to have two fewer partial specializations.
+ */
+template< class ValueType , class ExecSpace >
+struct ViewValueFunctor< ValueType , ExecSpace , false >
+{
+ enum { CONSTRUCT = 0x01 , ASSIGN = 0x02 , DESTROY = 0x04 };
+
+ ValueType * const ptr ;
+ int const mode ;
+
+ KOKKOS_INLINE_FUNCTION
+ void operator()( size_t i ) const
+ {
+ if ( mode == CONSTRUCT ) { new (ptr+i) ValueType(); }
+ else if ( mode == ASSIGN ) { ptr[i] = ValueType(); }
+ else if ( mode == DESTROY ) { (ptr+i)->~ValueType(); }
+ }
+
+ ViewValueFunctor( const ExecSpace & arg_space
+ , ValueType * const arg_ptr
+ , size_t const arg_n
+ , int const arg_mode )
+ : ptr( arg_ptr )
+ , mode( arg_mode )
+ {
+ if ( ! arg_space.in_parallel() ) {
+ typedef Kokkos::RangePolicy< ExecSpace > PolicyType ;
+ const Kokkos::Impl::ParallelFor< ViewValueFunctor , PolicyType >
+ closure( *this , PolicyType( 0 , arg_n ) );
+ closure.execute();
+ arg_space.fence();
+ }
+ else {
+ for ( size_t i = 0 ; i < arg_n ; ++i ) operator()(i);
+ }
+ }
+};
+
+template< class ValueType , class ExecSpace >
+struct ViewValueFunctor< ValueType , ExecSpace , true >
+{
+ enum { CONSTRUCT = 0x01 , ASSIGN = 0x02 , DESTROY = 0x04 };
+
+ ValueType * const ptr ;
+ int const mode ;
+
+ KOKKOS_INLINE_FUNCTION
+ void operator()( size_t i ) const { ptr[i] = 0 ; }
+
+ ViewValueFunctor( const ExecSpace & arg_space
+ , ValueType * const arg_ptr
+ , size_t const arg_n
+ , int const arg_mode )
+ : ptr( arg_ptr )
+ , mode( arg_mode )
+ {
+ if ( mode == CONSTRUCT || mode == ASSIGN ) {
+ if ( ! arg_space.in_parallel() ) {
+ typedef Kokkos::RangePolicy< ExecSpace > PolicyType ;
+ const Kokkos::Impl::ParallelFor< ViewValueFunctor , PolicyType >
+ closure( *this , PolicyType( 0 , arg_n ) );
+ closure.execute();
+ arg_space.fence();
+ }
+ else {
+ for ( size_t i = 0 ; i < arg_n ; ++i ) operator()(i);
+ }
+ }
+ }
+};
+//----------------------------------------------------------------------------
/** \brief View mapping for non-specialized data type and standard layout */
template< class Traits >
-class ViewMapping< Traits , void ,
+class ViewMapping< Traits ,
typename std::enable_if<(
std::is_same< typename Traits::specialize , void >::value
&&
ViewOffset< typename Traits::dimension
, typename Traits::array_layout
, void >::is_mapping_plugin::value
)>::type >
{
private:
- template< class , class , typename > friend class ViewMapping ;
- template< class , bool , bool , bool , bool , bool , bool , bool , bool , class > friend struct SubviewMapping ;
- template< class , class , class , class > friend class Kokkos::Experimental::View ;
+ template< class , class ... > friend class ViewMapping ;
+ template< class , class ... > friend class Kokkos::Experimental::View ;
typedef ViewOffset< typename Traits::dimension
, typename Traits::array_layout
, void
> offset_type ;
typedef typename ViewDataHandle< Traits >::handle_type handle_type ;
handle_type m_handle ;
offset_type m_offset ;
KOKKOS_INLINE_FUNCTION
ViewMapping( const handle_type & arg_handle , const offset_type & arg_offset )
: m_handle( arg_handle )
, m_offset( arg_offset )
{}
public:
//----------------------------------------
// Domain dimensions
enum { Rank = Traits::dimension::rank };
KOKKOS_INLINE_FUNCTION constexpr size_t dimension_0() const { return m_offset.dimension_0(); }
KOKKOS_INLINE_FUNCTION constexpr size_t dimension_1() const { return m_offset.dimension_1(); }
KOKKOS_INLINE_FUNCTION constexpr size_t dimension_2() const { return m_offset.dimension_2(); }
KOKKOS_INLINE_FUNCTION constexpr size_t dimension_3() const { return m_offset.dimension_3(); }
KOKKOS_INLINE_FUNCTION constexpr size_t dimension_4() const { return m_offset.dimension_4(); }
KOKKOS_INLINE_FUNCTION constexpr size_t dimension_5() const { return m_offset.dimension_5(); }
KOKKOS_INLINE_FUNCTION constexpr size_t dimension_6() const { return m_offset.dimension_6(); }
KOKKOS_INLINE_FUNCTION constexpr size_t dimension_7() const { return m_offset.dimension_7(); }
// Is a regular layout with uniform striding for each index.
using is_regular = typename offset_type::is_regular ;
KOKKOS_INLINE_FUNCTION constexpr size_t stride_0() const { return m_offset.stride_0(); }
KOKKOS_INLINE_FUNCTION constexpr size_t stride_1() const { return m_offset.stride_1(); }
KOKKOS_INLINE_FUNCTION constexpr size_t stride_2() const { return m_offset.stride_2(); }
KOKKOS_INLINE_FUNCTION constexpr size_t stride_3() const { return m_offset.stride_3(); }
KOKKOS_INLINE_FUNCTION constexpr size_t stride_4() const { return m_offset.stride_4(); }
KOKKOS_INLINE_FUNCTION constexpr size_t stride_5() const { return m_offset.stride_5(); }
KOKKOS_INLINE_FUNCTION constexpr size_t stride_6() const { return m_offset.stride_6(); }
KOKKOS_INLINE_FUNCTION constexpr size_t stride_7() const { return m_offset.stride_7(); }
+ template< typename iType >
+ KOKKOS_INLINE_FUNCTION void stride( iType * const s ) const { m_offset.stride(s); }
+
//----------------------------------------
// Range span
/** \brief Span of the mapped range */
KOKKOS_INLINE_FUNCTION constexpr size_t span() const { return m_offset.span(); }
/** \brief Is the mapped range span contiguous */
KOKKOS_INLINE_FUNCTION constexpr bool span_is_contiguous() const { return m_offset.span_is_contiguous(); }
typedef typename ViewDataHandle< Traits >::return_type reference_type ;
+ typedef typename Traits::value_type * pointer_type ;
/** \brief If data references are lvalue_reference than can query pointer to memory */
- KOKKOS_INLINE_FUNCTION constexpr typename Traits::value_type * data() const
+ KOKKOS_INLINE_FUNCTION constexpr pointer_type data() const
{
- typedef typename Traits::value_type * ptr_type ;
-
return std::is_lvalue_reference< reference_type >::value
- ? (ptr_type) m_handle
- : (ptr_type) 0 ;
+ ? (pointer_type) m_handle
+ : (pointer_type) 0 ;
}
//----------------------------------------
// The View class performs all rank and bounds checking before
// calling these element reference methods.
KOKKOS_FORCEINLINE_FUNCTION
reference_type reference() const { return m_handle[0]; }
template< typename I0 >
KOKKOS_FORCEINLINE_FUNCTION
typename
std::enable_if< std::is_integral<I0>::value &&
! std::is_same< typename Traits::array_layout , Kokkos::LayoutStride >::value
, reference_type >::type
reference( const I0 & i0 ) const { return m_handle[i0]; }
template< typename I0 >
KOKKOS_FORCEINLINE_FUNCTION
typename
std::enable_if< std::is_integral<I0>::value &&
std::is_same< typename Traits::array_layout , Kokkos::LayoutStride >::value
, reference_type >::type
reference( const I0 & i0 ) const { return m_handle[ m_offset(i0) ]; }
template< typename I0 , typename I1 >
KOKKOS_FORCEINLINE_FUNCTION
reference_type reference( const I0 & i0 , const I1 & i1 ) const
{ return m_handle[ m_offset(i0,i1) ]; }
template< typename I0 , typename I1 , typename I2 >
KOKKOS_FORCEINLINE_FUNCTION
reference_type reference( const I0 & i0 , const I1 & i1 , const I2 & i2 ) const
{ return m_handle[ m_offset(i0,i1,i2) ]; }
template< typename I0 , typename I1 , typename I2 , typename I3 >
KOKKOS_FORCEINLINE_FUNCTION
reference_type reference( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3 ) const
{ return m_handle[ m_offset(i0,i1,i2,i3) ]; }
template< typename I0 , typename I1 , typename I2 , typename I3
, typename I4 >
KOKKOS_FORCEINLINE_FUNCTION
reference_type reference( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3
, const I4 & i4 ) const
{ return m_handle[ m_offset(i0,i1,i2,i3,i4) ]; }
template< typename I0 , typename I1 , typename I2 , typename I3
, typename I4 , typename I5 >
KOKKOS_FORCEINLINE_FUNCTION
reference_type reference( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3
, const I4 & i4 , const I5 & i5 ) const
{ return m_handle[ m_offset(i0,i1,i2,i3,i4,i5) ]; }
template< typename I0 , typename I1 , typename I2 , typename I3
, typename I4 , typename I5 , typename I6 >
KOKKOS_FORCEINLINE_FUNCTION
reference_type reference( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3
, const I4 & i4 , const I5 & i5 , const I6 & i6 ) const
{ return m_handle[ m_offset(i0,i1,i2,i3,i4,i5,i6) ]; }
template< typename I0 , typename I1 , typename I2 , typename I3
, typename I4 , typename I5 , typename I6 , typename I7 >
KOKKOS_FORCEINLINE_FUNCTION
reference_type reference( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3
, const I4 & i4 , const I5 & i5 , const I6 & i6 , const I7 & i7 ) const
{ return m_handle[ m_offset(i0,i1,i2,i3,i4,i5,i6,i7) ]; }
//----------------------------------------
private:
enum { MemorySpanMask = 8 - 1 /* Force alignment on 8 byte boundary */ };
enum { MemorySpanSize = sizeof(typename Traits::value_type) };
public:
/** \brief Span, in bytes, of the referenced memory */
KOKKOS_INLINE_FUNCTION constexpr size_t memory_span() const
{
return ( m_offset.span() * sizeof(typename Traits::value_type) + MemorySpanMask ) & ~size_t(MemorySpanMask);
}
/** \brief Span, in bytes, of the required memory */
template< bool AllowPadding >
KOKKOS_INLINE_FUNCTION
static constexpr size_t memory_span( const std::integral_constant<bool,AllowPadding> &
, const size_t N0 , const size_t N1 , const size_t N2 , const size_t N3
, const size_t N4 , const size_t N5 , const size_t N6 , const size_t N7 )
{
typedef std::integral_constant< unsigned , AllowPadding ? MemorySpanSize : 0 > padding ;
return ( offset_type( padding(), N0, N1, N2, N3, N4, N5, N6, N7 ).span() * MemorySpanSize + MemorySpanMask ) & ~size_t(MemorySpanMask);
}
/** \brief Span, in bytes, of the required memory */
template< bool AllowPadding >
KOKKOS_INLINE_FUNCTION
static constexpr size_t memory_span( const std::integral_constant<bool,AllowPadding> &
, const typename Traits::array_layout & layout )
{
return ( offset_type( layout ).span() * MemorySpanSize + MemorySpanMask ) & ~size_t(MemorySpanMask);
}
//----------------------------------------
KOKKOS_INLINE_FUNCTION ~ViewMapping() {}
KOKKOS_INLINE_FUNCTION ViewMapping() : m_handle(), m_offset() {}
KOKKOS_INLINE_FUNCTION ViewMapping( const ViewMapping & rhs )
: m_handle( rhs.m_handle ), m_offset( rhs.m_offset ) {}
KOKKOS_INLINE_FUNCTION ViewMapping & operator = ( const ViewMapping & rhs )
{ m_handle = rhs.m_handle ; m_offset = rhs.m_offset ; return *this ; }
KOKKOS_INLINE_FUNCTION ViewMapping( ViewMapping && rhs )
: m_handle( rhs.m_handle ), m_offset( rhs.m_offset ) {}
KOKKOS_INLINE_FUNCTION ViewMapping & operator = ( ViewMapping && rhs )
{ m_handle = rhs.m_handle ; m_offset = rhs.m_offset ; return *this ; }
template< bool AllowPadding >
KOKKOS_INLINE_FUNCTION
- ViewMapping( void * ptr
+ ViewMapping( pointer_type ptr
, const std::integral_constant<bool,AllowPadding> &
, const size_t N0 , const size_t N1 , const size_t N2 , const size_t N3
, const size_t N4 , const size_t N5 , const size_t N6 , const size_t N7 )
- : m_handle( reinterpret_cast< handle_type >( ptr ) )
+ : m_handle( ptr )
, m_offset( std::integral_constant< unsigned , AllowPadding ? sizeof(typename Traits::value_type) : 0 >()
, N0, N1, N2, N3, N4, N5, N6, N7 )
{}
template< bool AllowPadding >
KOKKOS_INLINE_FUNCTION
- ViewMapping( void * ptr
+ ViewMapping( pointer_type ptr
, const std::integral_constant<bool,AllowPadding> &
, const typename Traits::array_layout & layout )
- : m_handle( reinterpret_cast< handle_type >( ptr ) )
+ : m_handle( ptr )
, m_offset( layout )
{}
//----------------------------------------
// If the View is to construct or destroy the elements.
- struct FunctorTagConstructScalar {};
- struct FunctorTagConstructNonScalar {};
- struct FunctorTagDestructNonScalar {};
-
- KOKKOS_FORCEINLINE_FUNCTION
- void operator()( const FunctorTagConstructScalar & , const size_t i ) const
- { m_handle[i] = 0 ; }
-
- KOKKOS_FORCEINLINE_FUNCTION
- void operator()( const FunctorTagConstructNonScalar & , const size_t i ) const
- {
- typedef typename Traits::value_type value_type ;
- new( & m_handle[i] ) value_type();
- }
-
- KOKKOS_FORCEINLINE_FUNCTION
- void operator()( const FunctorTagDestructNonScalar & , const size_t i ) const
- {
- typedef typename Traits::value_type value_type ;
- ( & (m_handle[i]) )->~value_type();
- }
-
template< class ExecSpace >
- typename std::enable_if< Kokkos::Impl::is_execution_space<ExecSpace>::value &&
- std::is_scalar< typename Traits::value_type >::value >::type
- construct( const ExecSpace & space ) const
+ void construct( const ExecSpace & space ) const
{
- typedef Kokkos::RangePolicy< ExecSpace , FunctorTagConstructScalar , size_t > Policy ;
+ typedef typename Traits::value_type value_type ;
+ typedef ViewValueFunctor< value_type , ExecSpace > FunctorType ;
- (void) Kokkos::Impl::ParallelFor< ViewMapping , Policy >( *this , Policy( 0 , m_offset.span() ) );
- ExecSpace::fence();
+ (void) FunctorType( space , (value_type *) m_handle , m_offset.span() , FunctorType::CONSTRUCT );
}
template< class ExecSpace >
- typename std::enable_if< Kokkos::Impl::is_execution_space<ExecSpace>::value &&
- ! std::is_scalar< typename Traits::value_type >::value >::type
- construct( const ExecSpace & space ) const
+ void destroy( const ExecSpace & space ) const
{
- typedef Kokkos::RangePolicy< ExecSpace , FunctorTagConstructNonScalar , size_t > Policy ;
+ typedef typename Traits::value_type value_type ;
+ typedef ViewValueFunctor< value_type , ExecSpace > FunctorType ;
- (void) Kokkos::Impl::ParallelFor< ViewMapping , Policy >( *this , Policy( 0 , m_offset.span() ) );
- ExecSpace::fence();
- }
-
- template< class ExecSpace >
- typename std::enable_if< Kokkos::Impl::is_execution_space<ExecSpace>::value &&
- std::is_scalar< typename Traits::value_type >::value >::type
- destroy( const ExecSpace & ) const {}
-
- template< class ExecSpace >
- typename std::enable_if< Kokkos::Impl::is_execution_space<ExecSpace>::value &&
- ! std::is_scalar< typename Traits::value_type >::value >::type
- destroy( const ExecSpace & space ) const
- {
- typedef Kokkos::RangePolicy< ExecSpace , FunctorTagDestructNonScalar , size_t > Policy ;
-
- (void) Kokkos::Impl::ParallelFor< ViewMapping , Policy >( *this , Policy( 0 , m_offset.span() ) );
- ExecSpace::fence();
+ (void) FunctorType( space , (value_type *) m_handle , m_offset.span() , FunctorType::DESTROY );
}
};
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
/** \brief Assign compatible default mappings */
template< class DstTraits , class SrcTraits >
class ViewMapping< DstTraits , SrcTraits ,
typename std::enable_if<(
std::is_same< typename DstTraits::memory_space , typename SrcTraits::memory_space >::value
&&
std::is_same< typename DstTraits::specialize , void >::value
&&
(
std::is_same< typename DstTraits::array_layout , Kokkos::LayoutLeft >::value ||
std::is_same< typename DstTraits::array_layout , Kokkos::LayoutRight >::value ||
std::is_same< typename DstTraits::array_layout , Kokkos::LayoutStride >::value
)
&&
std::is_same< typename SrcTraits::specialize , void >::value
&&
(
std::is_same< typename SrcTraits::array_layout , Kokkos::LayoutLeft >::value ||
std::is_same< typename SrcTraits::array_layout , Kokkos::LayoutRight >::value ||
std::is_same< typename SrcTraits::array_layout , Kokkos::LayoutStride >::value
)
)>::type >
{
+private:
+
+ enum { is_assignable_value_type =
+ std::is_same< typename DstTraits::value_type
+ , typename SrcTraits::value_type >::value ||
+ std::is_same< typename DstTraits::value_type
+ , typename SrcTraits::const_value_type >::value };
+
+ enum { is_assignable_dimension =
+ ViewDimensionAssignable< typename DstTraits::dimension
+ , typename SrcTraits::dimension >::value };
+
+ enum { is_assignable_layout =
+ std::is_same< typename DstTraits::array_layout
+ , typename SrcTraits::array_layout >::value ||
+ std::is_same< typename DstTraits::array_layout
+ , Kokkos::LayoutStride >::value ||
+ ( DstTraits::dimension::rank == 0 ) ||
+ ( DstTraits::dimension::rank == 1 &&
+ DstTraits::dimension::rank_dynamic == 1 )
+ };
+
public:
- enum { is_assignable = true };
+ enum { is_assignable = is_assignable_value_type &&
+ is_assignable_dimension &&
+ is_assignable_layout };
typedef Kokkos::Experimental::Impl::SharedAllocationTracker TrackType ;
- typedef ViewMapping< DstTraits , void , void > DstType ;
- typedef ViewMapping< SrcTraits , void , void > SrcType ;
+ typedef ViewMapping< DstTraits , void > DstType ;
+ typedef ViewMapping< SrcTraits , void > SrcType ;
KOKKOS_INLINE_FUNCTION
static void assign( DstType & dst , const SrcType & src , const TrackType & src_track )
{
- static_assert( std::is_same< typename DstTraits::value_type , typename SrcTraits::value_type >::value ||
- std::is_same< typename DstTraits::value_type , typename SrcTraits::const_value_type >::value
+ static_assert( is_assignable_value_type
, "View assignment must have same value type or const = non-const" );
- static_assert( ViewDimensionAssignable< typename DstTraits::dimension , typename SrcTraits::dimension >::value
+ static_assert( is_assignable_dimension
, "View assignment must have compatible dimensions" );
- static_assert( std::is_same< typename DstTraits::array_layout , typename SrcTraits::array_layout >::value ||
- std::is_same< typename DstTraits::array_layout , Kokkos::LayoutStride >::value ||
- ( DstTraits::dimension::rank == 0 ) ||
- ( DstTraits::dimension::rank == 1 && DstTraits::dimension::rank_dynamic == 1 )
+ static_assert( is_assignable_layout
, "View assignment must have compatible layout or have rank <= 1" );
typedef typename DstType::offset_type dst_offset_type ;
dst.m_offset = dst_offset_type( src.m_offset );
dst.m_handle = Kokkos::Experimental::Impl::ViewDataHandle< DstTraits >::assign( src.m_handle , src_track );
}
};
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
-
-/** \brief View mapping for non-specialized data type and standard layout */
-template< class Traits , bool R0 , bool R1 , bool R2 , bool R3 , bool R4 , bool R5 , bool R6 , bool R7 >
-struct SubviewMapping< Traits, R0, R1, R2, R3, R4, R5, R6, R7 ,
- typename std::enable_if<(
- std::is_same< typename Traits::specialize , void >::value
- &&
- (
- std::is_same< typename Traits::array_layout , Kokkos::LayoutLeft >::value ||
- std::is_same< typename Traits::array_layout , Kokkos::LayoutRight >::value ||
- std::is_same< typename Traits::array_layout , Kokkos::LayoutStride >::value
- )
- )>::type >
+// Subview mapping.
+// Deduce destination view type from source view traits and subview arguments
+
+template< class SrcTraits , class ... Args >
+struct ViewMapping
+ < typename std::enable_if<(
+ std::is_same< typename SrcTraits::specialize , void >::value
+ &&
+ (
+ std::is_same< typename SrcTraits::array_layout
+ , Kokkos::LayoutLeft >::value ||
+ std::is_same< typename SrcTraits::array_layout
+ , Kokkos::LayoutRight >::value ||
+ std::is_same< typename SrcTraits::array_layout
+ , Kokkos::LayoutStride >::value
+ )
+ )>::type
+ , SrcTraits
+ , Args ... >
{
private:
- // Subview's rank
+ static_assert( SrcTraits::rank == sizeof...(Args) ,
+ "Subview mapping requires one argument for each dimension of source View" );
+
+ enum
+ { RZ = false
+ , R0 = bool(is_integral_extent<0,Args...>::value)
+ , R1 = bool(is_integral_extent<1,Args...>::value)
+ , R2 = bool(is_integral_extent<2,Args...>::value)
+ , R3 = bool(is_integral_extent<3,Args...>::value)
+ , R4 = bool(is_integral_extent<4,Args...>::value)
+ , R5 = bool(is_integral_extent<5,Args...>::value)
+ , R6 = bool(is_integral_extent<6,Args...>::value)
+ , R7 = bool(is_integral_extent<7,Args...>::value)
+ };
+
enum { rank = unsigned(R0) + unsigned(R1) + unsigned(R2) + unsigned(R3)
+ unsigned(R4) + unsigned(R5) + unsigned(R6) + unsigned(R7) };
// Whether right-most rank is a range.
- enum { R0_rev = 0 == Traits::rank ? false : (
- 1 == Traits::rank ? R0 : (
- 2 == Traits::rank ? R1 : (
- 3 == Traits::rank ? R2 : (
- 4 == Traits::rank ? R3 : (
- 5 == Traits::rank ? R4 : (
- 6 == Traits::rank ? R5 : (
- 7 == Traits::rank ? R6 : R7 ))))))) };
+ enum { R0_rev = ( 0 == SrcTraits::rank ? RZ : (
+ 1 == SrcTraits::rank ? R0 : (
+ 2 == SrcTraits::rank ? R1 : (
+ 3 == SrcTraits::rank ? R2 : (
+ 4 == SrcTraits::rank ? R3 : (
+ 5 == SrcTraits::rank ? R4 : (
+ 6 == SrcTraits::rank ? R5 : (
+ 7 == SrcTraits::rank ? R6 : R7 )))))))) };
// Subview's layout
typedef typename std::conditional<
( /* Same array layout IF */
( rank == 0 ) /* output rank zero */
||
// OutputRank 1 or 2, InputLayout Left, Interval 0
// because single stride one or second index has a stride.
- ( rank <= 2 && R0 && std::is_same< typename Traits::array_layout , Kokkos::LayoutLeft >::value )
+ ( rank <= 2 && R0 && std::is_same< typename SrcTraits::array_layout , Kokkos::LayoutLeft >::value )
||
// OutputRank 1 or 2, InputLayout Right, Interval [InputRank-1]
// because single stride one or second index has a stride.
- ( rank <= 2 && R0_rev && std::is_same< typename Traits::array_layout , Kokkos::LayoutRight >::value )
- ), typename Traits::array_layout , Kokkos::LayoutStride
+ ( rank <= 2 && R0_rev && std::is_same< typename SrcTraits::array_layout , Kokkos::LayoutRight >::value )
+ ), typename SrcTraits::array_layout , Kokkos::LayoutStride
>::type array_layout ;
- typedef typename Traits::value_type value_type ;
+ typedef typename SrcTraits::value_type value_type ;
typedef typename std::conditional< rank == 0 , value_type ,
typename std::conditional< rank == 1 , value_type * ,
typename std::conditional< rank == 2 , value_type ** ,
typename std::conditional< rank == 3 , value_type *** ,
typename std::conditional< rank == 4 , value_type **** ,
typename std::conditional< rank == 5 , value_type ***** ,
typename std::conditional< rank == 6 , value_type ****** ,
typename std::conditional< rank == 7 , value_type ******* ,
value_type ********
>::type >::type >::type >::type >::type >::type >::type >::type
data_type ;
public:
- typedef
- Kokkos::Experimental::ViewTraits< data_type , array_layout
- , typename Traits::device_type
- , typename Traits::memory_traits > traits_type ;
-
- typedef Kokkos::Experimental::View< data_type
- , array_layout
- , typename Traits::device_type
- , typename Traits::memory_traits > type ;
+ typedef Kokkos::Experimental::ViewTraits
+ < data_type
+ , array_layout
+ , typename SrcTraits::device_type
+ , typename SrcTraits::memory_traits > traits_type ;
+
+ typedef Kokkos::Experimental::View
+ < data_type
+ , array_layout
+ , typename SrcTraits::device_type
+ , typename SrcTraits::memory_traits > type ;
+
+ template< class MemoryTraits >
+ struct apply {
+
+ static_assert( Kokkos::Impl::is_memory_traits< MemoryTraits >::value , "" );
+
+ typedef Kokkos::Experimental::ViewTraits
+ < data_type
+ , array_layout
+ , typename SrcTraits::device_type
+ , MemoryTraits > traits_type ;
+
+ typedef Kokkos::Experimental::View
+ < data_type
+ , array_layout
+ , typename SrcTraits::device_type
+ , MemoryTraits > type ;
+ };
- template< class T0 , class T1 , class T2 , class T3
- , class T4 , class T5 , class T6 , class T7 >
+ // The presumed type is 'ViewMapping< traits_type , void >'
+ // However, a compatible ViewMapping is acceptable.
+ template< class DstTraits >
KOKKOS_INLINE_FUNCTION
- static void assign( ViewMapping< traits_type , void , void > & dst
- , ViewMapping< Traits , void , void > const & src
- , T0 const & arg0
- , T1 const & arg1
- , T2 const & arg2
- , T3 const & arg3
- , T4 const & arg4
- , T5 const & arg5
- , T6 const & arg6
- , T7 const & arg7
- )
+ static void assign( ViewMapping< DstTraits , void > & dst
+ , ViewMapping< SrcTraits , void > const & src
+ , Args ... args )
{
- typedef ViewMapping< traits_type , void , void > DstType ;
+ static_assert(
+ ViewMapping< DstTraits , traits_type , void >::is_assignable ,
+ "Subview destination type must be compatible with subview derived type" );
+
+ typedef ViewMapping< DstTraits , void > DstType ;
typedef typename DstType::offset_type dst_offset_type ;
typedef typename DstType::handle_type dst_handle_type ;
- typedef Kokkos::Experimental::Impl::ViewOffsetRange<T0> V0 ;
- typedef Kokkos::Experimental::Impl::ViewOffsetRange<T1> V1 ;
- typedef Kokkos::Experimental::Impl::ViewOffsetRange<T2> V2 ;
- typedef Kokkos::Experimental::Impl::ViewOffsetRange<T3> V3 ;
- typedef Kokkos::Experimental::Impl::ViewOffsetRange<T4> V4 ;
- typedef Kokkos::Experimental::Impl::ViewOffsetRange<T5> V5 ;
- typedef Kokkos::Experimental::Impl::ViewOffsetRange<T6> V6 ;
- typedef Kokkos::Experimental::Impl::ViewOffsetRange<T7> V7 ;
-
- dst.m_offset = dst_offset_type
- ( src.m_offset
- , V0::dimension( src.m_offset.dimension_0() , arg0 )
- , V1::dimension( src.m_offset.dimension_1() , arg1 )
- , V2::dimension( src.m_offset.dimension_2() , arg2 )
- , V3::dimension( src.m_offset.dimension_3() , arg3 )
- , V4::dimension( src.m_offset.dimension_4() , arg4 )
- , V5::dimension( src.m_offset.dimension_5() , arg5 )
- , V6::dimension( src.m_offset.dimension_6() , arg6 )
- , V7::dimension( src.m_offset.dimension_7() , arg7 )
- );
+ const SubviewExtents< SrcTraits::rank , rank >
+ extents( src.m_offset.m_dim , args... );
+ dst.m_offset = dst_offset_type( src.m_offset , extents );
dst.m_handle = dst_handle_type( src.m_handle +
- src.m_offset( V0::begin( arg0 )
- , V1::begin( arg1 )
- , V2::begin( arg2 )
- , V3::begin( arg3 )
- , V4::begin( arg4 )
- , V5::begin( arg5 )
- , V6::begin( arg6 )
- , V7::begin( arg7 )
+ src.m_offset( extents.domain_offset(0)
+ , extents.domain_offset(1)
+ , extents.domain_offset(2)
+ , extents.domain_offset(3)
+ , extents.domain_offset(4)
+ , extents.domain_offset(5)
+ , extents.domain_offset(6)
+ , extents.domain_offset(7)
) );
}
};
}}} // namespace Kokkos::Experimental::Impl
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Experimental {
namespace Impl {
-template< class V
- , bool R0 = false , bool R1 = false , bool R2 = false , bool R3 = false
- , bool R4 = false , bool R5 = false , bool R6 = false , bool R7 = false >
-struct SubviewType ;
-
-template< class D , class A1, class A2, class A3
- , bool R0 , bool R1 , bool R2 , bool R3
- , bool R4 , bool R5 , bool R6 , bool R7 >
-struct SubviewType< Kokkos::Experimental::View< D , A1, A2, A3 > , R0 , R1 , R2 , R3 , R4 , R5 , R6 , R7 >
-{
-private:
- typedef Kokkos::Experimental::ViewTraits< D , A1 , A2 , A3 > traits ;
- typedef Kokkos::Experimental::Impl::SubviewMapping< traits , R0 , R1 , R2 , R3 , R4 , R5 , R6 , R7 > mapping ;
-public:
- typedef typename mapping::type type ;
-};
-
-}}} // namespace Kokkos::Experimental::Impl
-
-//----------------------------------------------------------------------------
-//----------------------------------------------------------------------------
-
-namespace Kokkos {
-namespace Experimental {
-namespace Impl {
-
class Error_view_scalar_reference_to_non_scalar_view ;
} /* namespace Impl */
} /* namespace Experimental */
} /* namespace Kokkos */
-#if defined( KOKKOS_EXPRESSION_CHECK )
-
-#define KOKKOS_ASSERT_VIEW_MAPPING_ACCESS( SPACE , MAP , RANK , I0 , I1 , I2 , I3 , I4 , I5 , I6 , I7 ) \
- Kokkos::Impl::VerifyExecutionCanAccessMemorySpace< \
- Kokkos::Impl::ActiveExecutionMemorySpace , SPACE >::verify( MAP.data() ); \
- /* array bounds checking */
-
-#else
-
-#define KOKKOS_ASSERT_VIEW_MAPPING_ACCESS( SPACE , MAP , RANK , I0 , I1 , I2 , I3 , I4 , I5 , I6 , I7 ) \
- Kokkos::Impl::VerifyExecutionCanAccessMemorySpace< \
- Kokkos::Impl::ActiveExecutionMemorySpace , SPACE >::verify( MAP.data() )
-
-#endif
-
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#endif /* #ifndef KOKKOS_EXPERIMENTAL_VIEW_MAPPING_HPP */
diff --git a/lib/kokkos/core/src/impl/KokkosExp_ViewTile.hpp b/lib/kokkos/core/src/impl/KokkosExp_ViewTile.hpp
index a661a3534..32bd7bac9 100644
--- a/lib/kokkos/core/src/impl/KokkosExp_ViewTile.hpp
+++ b/lib/kokkos/core/src/impl/KokkosExp_ViewTile.hpp
@@ -1,219 +1,224 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_EXPERIMENTAL_VIEWTILE_HPP
#define KOKKOS_EXPERIMENTAL_VIEWTILE_HPP
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Experimental {
namespace Impl {
// View mapping for rank two tiled array
template< class L >
struct is_layout_tile : public std::false_type {};
template< unsigned N0 , unsigned N1 >
struct is_layout_tile< Kokkos::LayoutTileLeft<N0,N1,true> > : public std::true_type {};
template< class Dimension , class Layout >
struct ViewOffset< Dimension , Layout ,
typename std::enable_if<(
( Dimension::rank == 2 )
&&
is_layout_tile< Layout >::value
)>::type >
{
public:
- enum { SHIFT_0 = Kokkos::Impl::power_of_two<Layout::N0>::value };
- enum { SHIFT_1 = Kokkos::Impl::power_of_two<Layout::N1>::value };
+ enum { SHIFT_0 = Kokkos::Impl::integral_power_of_two(Layout::N0) };
+ enum { SHIFT_1 = Kokkos::Impl::integral_power_of_two(Layout::N1) };
enum { SHIFT_T = SHIFT_0 + SHIFT_1 };
enum { MASK_0 = Layout::N0 - 1 };
enum { MASK_1 = Layout::N1 - 1 };
// Is an irregular layout that does not have uniform striding for each index.
using is_mapping_plugin = std::true_type ;
using is_regular = std::false_type ;
typedef size_t size_type ;
typedef Dimension dimension_type ;
typedef Layout array_layout ;
dimension_type m_dim ;
size_type m_tile_N0 ;
//----------------------------------------
// Only instantiated for rank 2
template< typename I0 , typename I1 >
KOKKOS_INLINE_FUNCTION constexpr
size_type operator()( I0 const & i0 , I1 const & i1
, int = 0 , int = 0
, int = 0 , int = 0
, int = 0 , int = 0
) const
{
return /* ( ( Tile offset ) * Tile size ) */
( ( (i0>>SHIFT_0) + m_tile_N0 * (i1>>SHIFT_1) ) << SHIFT_T) +
/* ( Offset within tile ) */
( (i0 & MASK_0) + ((i1 & MASK_1)<<SHIFT_0) ) ;
}
//----------------------------------------
KOKKOS_INLINE_FUNCTION constexpr size_type dimension_0() const { return m_dim.N0 ; }
KOKKOS_INLINE_FUNCTION constexpr size_type dimension_1() const { return m_dim.N1 ; }
KOKKOS_INLINE_FUNCTION constexpr size_type dimension_2() const { return 1 ; }
KOKKOS_INLINE_FUNCTION constexpr size_type dimension_3() const { return 1 ; }
KOKKOS_INLINE_FUNCTION constexpr size_type dimension_4() const { return 1 ; }
KOKKOS_INLINE_FUNCTION constexpr size_type dimension_5() const { return 1 ; }
KOKKOS_INLINE_FUNCTION constexpr size_type dimension_6() const { return 1 ; }
KOKKOS_INLINE_FUNCTION constexpr size_type dimension_7() const { return 1 ; }
KOKKOS_INLINE_FUNCTION constexpr size_type size() const { return m_dim.N0 * m_dim.N1 ; }
// Strides are meaningless due to irregularity
KOKKOS_INLINE_FUNCTION constexpr size_type stride_0() const { return 0 ; }
KOKKOS_INLINE_FUNCTION constexpr size_type stride_1() const { return 0 ; }
KOKKOS_INLINE_FUNCTION constexpr size_type stride_2() const { return 0 ; }
KOKKOS_INLINE_FUNCTION constexpr size_type stride_3() const { return 0 ; }
KOKKOS_INLINE_FUNCTION constexpr size_type stride_4() const { return 0 ; }
KOKKOS_INLINE_FUNCTION constexpr size_type stride_5() const { return 0 ; }
KOKKOS_INLINE_FUNCTION constexpr size_type stride_6() const { return 0 ; }
KOKKOS_INLINE_FUNCTION constexpr size_type stride_7() const { return 0 ; }
KOKKOS_INLINE_FUNCTION constexpr size_type span() const
{
// ( TileDim0 * ( TileDim1 ) ) * TileSize
return ( m_tile_N0 * ( ( m_dim.N1 + MASK_1 ) >> SHIFT_1 ) ) << SHIFT_T ;
}
KOKKOS_INLINE_FUNCTION constexpr bool span_is_contiguous() const
{
// Only if dimensions align with tile size
return ( m_dim.N0 & MASK_0 ) == 0 && ( m_dim.N1 & MASK_1 ) == 0 ;
}
//----------------------------------------
~ViewOffset() = default ;
ViewOffset() = default ;
ViewOffset( const ViewOffset & ) = default ;
ViewOffset & operator = ( const ViewOffset & ) = default ;
template< unsigned TrivialScalarSize >
KOKKOS_INLINE_FUNCTION
constexpr ViewOffset( std::integral_constant<unsigned,TrivialScalarSize> const &
, size_t aN0 , size_t aN1
, unsigned , unsigned , unsigned , unsigned , unsigned , unsigned )
: m_dim( aN0, aN1, 0, 0, 0, 0, 0, 0 )
, m_tile_N0( ( aN0 + MASK_0 ) >> SHIFT_0 /* number of tiles in first dimension */ )
{}
};
-} /* namespace Impl */
-} /* namespace Experimental */
-} /* namespace Kokkos */
-
-namespace Kokkos {
-namespace Experimental {
-
-// Using View with an invalid data type to construct the tiling subview.
-// View is a friend of View so we use this invalid data type partial specialization
-// to access implementation of both source and destination view for constructing
-// the tile subview.
-
-template< unsigned N0 , unsigned N1 >
-struct View< void , Kokkos::LayoutTileLeft<N0,N1,true> , void , void >
+template< typename T , unsigned N0 , unsigned N1 , class ... P
+ , typename iType0 , typename iType1
+ >
+struct ViewMapping
+ < void
+ , Kokkos::Experimental::ViewTraits<T**,Kokkos::LayoutTileLeft<N0,N1,true>,P...>
+ , Kokkos::LayoutTileLeft<N0,N1,true>
+ , iType0
+ , iType1 >
{
- typedef Kokkos::LayoutTileLeft<N0,N1,true> Layout ;
+ typedef Kokkos::LayoutTileLeft<N0,N1,true> src_layout ;
+ typedef Kokkos::Experimental::ViewTraits< T** , src_layout , P... > src_traits ;
+ typedef Kokkos::Experimental::ViewTraits< T[N0][N1] , LayoutLeft , P ... > traits ;
+ typedef Kokkos::Experimental::View< T[N0][N1] , LayoutLeft , P ... > type ;
- template< typename T , class A2 , class A3 >
KOKKOS_INLINE_FUNCTION static
- Kokkos::Experimental::View< T[N0][N1] , LayoutLeft , A2 , A3 >
- tile_subview( const Kokkos::Experimental::View<T**,Layout,A2,A3> & src
- , const size_t i_tile0
- , const size_t i_tile1
- )
+ void assign( ViewMapping< traits , void > & dst
+ , const ViewMapping< src_traits , void > & src
+ , const src_layout &
+ , const size_t i_tile0
+ , const size_t i_tile1
+ )
{
- typedef Kokkos::Experimental::View<T**,Layout,A2,A3> SrcView ;
- typedef Kokkos::Experimental::View< T[N0][N1] , LayoutLeft , A2 , A3 > DstView ;
-
- typedef typename SrcView::map_type::offset_type src_offset_type ;
- typedef typename DstView::map_type dst_map_type ;
- typedef typename DstView::map_type::handle_type dst_handle_type ;
- typedef typename DstView::map_type::offset_type dst_offset_type ;
-
- return DstView( src.m_track ,
- dst_map_type(
- dst_handle_type( src.m_map.m_handle +
- ( ( i_tile0 + src.m_map.m_offset.m_tile_N0 * i_tile1 ) << src_offset_type::SHIFT_T ) ) ,
- dst_offset_type() )
- );
+ typedef ViewMapping< traits , void > dst_map_type ;
+ typedef ViewMapping< src_traits , void > src_map_type ;
+ typedef typename dst_map_type::handle_type dst_handle_type ;
+ typedef typename dst_map_type::offset_type dst_offset_type ;
+ typedef typename src_map_type::offset_type src_offset_type ;
+
+ dst = dst_map_type(
+ dst_handle_type( src.m_handle +
+ ( ( i_tile0 + src.m_offset.m_tile_N0 * i_tile1 ) << src_offset_type::SHIFT_T ) ) ,
+ dst_offset_type() );
}
};
-template< typename T , unsigned N0 , unsigned N1 , class A2 , class A3 >
+} /* namespace Impl */
+} /* namespace Experimental */
+} /* namespace Kokkos */
+
+namespace Kokkos {
+namespace Experimental {
+
+template< typename T , unsigned N0 , unsigned N1 , class ... P >
KOKKOS_INLINE_FUNCTION
-Kokkos::Experimental::View< T[N0][N1] , LayoutLeft , A2 , A3 >
-tile_subview( const Kokkos::Experimental::View<T**,Kokkos::LayoutTileLeft<N0,N1,true>,A2,A3> & src
+Kokkos::Experimental::View< T[N0][N1] , LayoutLeft , P... >
+tile_subview( const Kokkos::Experimental::View<T**,Kokkos::LayoutTileLeft<N0,N1,true>,P...> & src
, const size_t i_tile0
, const size_t i_tile1
)
{
- return View< void , Kokkos::LayoutTileLeft<N0,N1,true> , void , void >::
- tile_subview( src , i_tile0 , i_tile1 );
+ // Force the specialized ViewMapping for extracting a tile
+ // by using the first subview argument as the layout.
+ typedef Kokkos::LayoutTileLeft<N0,N1,true> SrcLayout ;
+
+ return Kokkos::Experimental::View< T[N0][N1] , LayoutLeft , P... >
+ ( src , SrcLayout() , i_tile0 , i_tile1 );
}
} /* namespace Experimental */
} /* namespace Kokkos */
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#endif /* #ifndef KOKKOS_EXPERIENTAL_VIEWTILE_HPP */
diff --git a/lib/kokkos/core/src/impl/Kokkos_AllocationTracker.cpp b/lib/kokkos/core/src/impl/Kokkos_AllocationTracker.cpp
index 7fb33853d..efd2a096a 100644
--- a/lib/kokkos/core/src/impl/Kokkos_AllocationTracker.cpp
+++ b/lib/kokkos/core/src/impl/Kokkos_AllocationTracker.cpp
@@ -1,844 +1,848 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <Kokkos_Core_fwd.hpp>
+#if ! defined( KOKKOS_USING_EXPERIMENTAL_VIEW )
+
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
#include <Kokkos_Atomic.hpp>
#include <impl/Kokkos_Singleton.hpp>
#include <impl/Kokkos_AllocationTracker.hpp>
#include <impl/Kokkos_Error.hpp>
#include <string>
#include <vector>
#include <sstream>
#include <algorithm>
#include <utility>
#include <cstdlib>
#include <cstring>
#include <iostream>
#include <iomanip>
/* Enable clean up of memory leaks */
#define CLEAN_UP_MEMORY_LEAKS 0
namespace Kokkos { namespace Impl {
namespace {
//-----------------------------------------------------------------------------
// AllocationRecord
//-----------------------------------------------------------------------------
//
// Used to track details about an allocation and provide a ref count
// sizeof(AllocationRecord) == 128
struct AllocationRecord
{
enum {
OFFSET = sizeof(AllocatorBase*) // allocator
+ sizeof(void*) // alloc_ptr
+ sizeof(uint64_t) // alloc_size
+ sizeof(AllocatorAttributeBase*) // attribute
+ sizeof(uint32_t) // node_index
+ sizeof(uint32_t) // ref_count
, LABEL_LENGTH = 128 - OFFSET
};
AllocatorBase * const allocator;
void * const alloc_ptr;
const uint64_t alloc_size;
AllocatorAttributeBase * const attribute;
const int32_t node_index;
volatile uint32_t ref_count;
const char label[LABEL_LENGTH];
AllocationRecord( AllocatorBase * const arg_allocator
, void * arg_alloc_ptr
, uint64_t arg_alloc_size
, int32_t arg_node_index
, const std::string & arg_label
)
: allocator(arg_allocator)
, alloc_ptr(arg_alloc_ptr)
, alloc_size(arg_alloc_size)
, attribute(NULL)
, node_index(arg_node_index)
, ref_count(1)
, label() // zero fill
{
const size_t length = static_cast<size_t>(LABEL_LENGTH-1u) < arg_label.size() ? static_cast<size_t>(LABEL_LENGTH-1u) : arg_label.size();
strncpy( const_cast<char *>(label), arg_label.c_str(), length );
}
~AllocationRecord()
{
if (attribute) {
delete attribute;
}
}
uint32_t increment_ref_count()
{
uint32_t old_value = atomic_fetch_add( &ref_count, static_cast<uint32_t>(1) );
return old_value + 1u;
}
uint32_t decrement_ref_count()
{
uint32_t old_value = atomic_fetch_sub( &ref_count, static_cast<uint32_t>(1) );
return old_value - 1u;
}
void print( std::ostream & oss ) const
{
oss << "{ " << allocator->name()
<< " } : \"" << label
<< "\" ref_count(" << ref_count
<< ") memory[ " << alloc_ptr
<< " + " << alloc_size
<< " ]" ;
}
bool set_attribute( AllocatorAttributeBase * attr )
{
bool result = false;
if (attribute == NULL) {
result = NULL == atomic_compare_exchange( const_cast<AllocatorAttributeBase **>(&attribute)
, reinterpret_cast<AllocatorAttributeBase *>(NULL)
, attr );
}
return result;
}
// disallow copy and assignment
AllocationRecord( const AllocationRecord & );
AllocationRecord & operator=(const AllocationRecord &);
};
template <int NumBlocks>
struct Bitset
{
enum { blocks = NumBlocks };
enum { size = blocks * 64 };
enum { block_mask = 63u };
enum { block_shift = 6 };
// used to find free bits in a bitset
static int count_trailing_zeros(uint64_t x)
{
#if defined( KOKKOS_COMPILER_GNU ) || defined( KOKKOS_COMPILER_CLANG ) || defined( KOKKOS_COMPILER_APPLECC )
return x ? __builtin_ctzll(x) : 64;
#elif defined( KOKKOS_COMPILER_INTEL )
enum { shift = 32 };
enum { mask = (static_cast<uint64_t>(1) << shift) - 1u };
return (x & mask) ? _bit_scan_forward(static_cast<int>(x & mask)) :
(x >> shift) ? shift + _bit_scan_forward(static_cast<int>(x >> shift)) :
64 ;
#elif defined( KOKKOS_COMPILER_IBM )
return x ? __cnttz8(x) : 64;
#else
int i = 0;
for (; ((x & (static_cast<uint64_t>(1) << i)) == 0u) && i < 64; ++i ) {}
return i;
#endif
}
Bitset()
: m_bits()
{
for (int i=0; i < blocks; ++i) {
m_bits[i] = 0u;
}
}
bool set( int i )
{
const uint64_t bit = static_cast<uint64_t>(1) << ( i & block_mask );
return !( atomic_fetch_or( m_bits + (i >> block_shift), bit ) & bit );
}
bool reset( int i )
{
const uint64_t bit = static_cast<uint64_t>(1) << ( i & block_mask );
return atomic_fetch_and( m_bits + (i >> block_shift), ~bit ) & bit;
}
bool test( int i )
{
const uint64_t block = m_bits[ i >> block_shift ];
const uint64_t bit = static_cast<uint64_t>(1) << ( i & block_mask );
return block & bit;
}
int find_first_unset() const
{
for (int i=0; i < blocks; ++i) {
const uint64_t block = m_bits[i];
int b = count_trailing_zeros( ~block );
if ( b < 64 ) {
return (i << block_shift) + b;
}
}
return size;
}
volatile uint64_t m_bits[blocks];
};
//-----------------------------------------------------------------------------
// AllocationRecordPool -- singleton class
//
// global_alloc_rec_pool is the ONLY instance of this class
//
//-----------------------------------------------------------------------------
// Record AllocationRecords in a lock-free circular list.
// Each node in the list has a buffer with space for 959 ((15*64)-1) records
// managed by a bitset. Atomics are used to set and reset bits in the bit set.
// The head of the list is atomically updated to the last node found with
// unused space.
//
// Cost time to create an allocation record: amortized O(1), worst case O(num nodes)
// Cost to destroy an allocation recored: O(1)
//
// Singleton allocations are pushed onto a lock-free stack that is destroyed
// after the circular list of allocation records.
struct AllocationRecordPool
{
enum { BITSET_BLOCKS = 15 };
typedef Bitset<BITSET_BLOCKS> bitset_type;
enum { BUFFER_SIZE = (bitset_type::size - 1) * sizeof(AllocationRecord) };
struct AllocationNode
{
AllocationNode()
: next()
, bitset()
, buffer()
{
// set the first bit to used
bitset.set(0);
}
void * get_buffer( int32_t node_index )
{
return buffer + (node_index-1) * sizeof(AllocationRecord);
}
// return 0 if no space is available in the node
int32_t get_node_index()
{
int32_t node_index = 0;
do {
node_index = bitset.find_first_unset();
// successfully claimed a bit
if ( node_index != bitset.size && bitset.set(node_index) )
{
return node_index;
}
} while ( node_index != bitset.size );
return 0;
}
void clear_node_index( int32_t node_index )
{
bitset.reset(node_index);
}
AllocationNode * next;
bitset_type bitset;
char buffer[BUFFER_SIZE];
};
struct SingletonNode
{
void * buffer;
SingletonNode * next;
Impl::singleton_destroy_function_type destroy;
SingletonNode( size_t size, Impl::singleton_create_function_type create_func, Impl::singleton_destroy_function_type destroy_func )
: buffer(NULL)
, next(NULL)
, destroy(destroy_func)
{
if (size) {
buffer = malloc(size);
create_func(buffer);
}
}
~SingletonNode()
{
if (buffer) {
try {
destroy(buffer);
} catch(...) {}
free(buffer);
}
}
};
AllocationRecordPool()
: head( new AllocationNode() )
, singleton_head(NULL)
{
// setup ring
head->next = head;
}
~AllocationRecordPool()
{
// delete allocation records
{
AllocationNode * start = head;
AllocationNode * curr = start;
std::vector< std::string > string_vec;
do {
AllocationNode * next = curr->next;
#if defined( KOKKOS_DEBUG_PRINT_ALLOCATION_BITSET )
// print node bitset
for (int i=0; i < bitset_type::blocks; ++i ) {
std::cout << std::hex << std::showbase << curr->bitset.m_bits[i] << " ";
}
std::cout << std::endl;
#endif
// bit zero does not map to an AllocationRecord
for ( int32_t i=1; i < bitset_type::size; ++i )
{
if (curr->bitset.test(i)) {
AllocationRecord * alloc_rec = reinterpret_cast<AllocationRecord *>( curr->get_buffer(i) );
std::ostringstream oss;
alloc_rec->print( oss );
string_vec.push_back( oss.str() );
#if CLEAN_UP_MEMORY_LEAKS
/* Cleaning up memory leaks prevents memory error detection tools
* from reporting the original source of allocation, which can
* impede debugging with such tools.
*/
try {
destroy(alloc_rec);
}
catch(...) {}
#endif
}
}
curr->next = NULL;
delete curr;
curr = next;
} while ( curr != start );
if ( !string_vec.empty() ) {
std::sort( string_vec.begin(), string_vec.end() );
std::ostringstream oss;
oss << "Error: Allocation pool destroyed with the following memory leak(s):\n";
for (size_t i=0; i< string_vec.size(); ++i)
{
oss << " " << string_vec[i] << std::endl;
}
std::cerr << oss.str() << std::endl;
}
}
// delete singletons
{
SingletonNode * curr = singleton_head;
while (curr) {
SingletonNode * next = curr->next;
delete curr;
curr = next;
}
}
}
AllocationRecord * create( AllocatorBase * arg_allocator
, void * arg_alloc_ptr
, size_t arg_alloc_size
, const std::string & arg_label
)
{
AllocationNode * start = volatile_load(&head);
AllocationNode * curr = start;
int32_t node_index = curr->get_node_index();
if (node_index == 0) {
curr = volatile_load(&curr->next);
}
while (node_index == 0 && curr != start)
{
node_index = curr->get_node_index();
if (node_index == 0) {
curr = volatile_load(&curr->next);
}
}
// Need to allocate and insert a new node
if (node_index == 0 && curr == start)
{
AllocationNode * new_node = new AllocationNode();
node_index = new_node->get_node_index();
AllocationNode * next = NULL;
do {
next = volatile_load(&curr->next);
new_node->next = next;
memory_fence();
} while ( next != atomic_compare_exchange( &(curr->next), next, new_node ) );
curr = new_node;
}
void * buffer = curr->get_buffer(node_index);
// try to set head to curr
if ( start != curr )
{
atomic_compare_exchange( & head, start, curr );
}
return new (buffer) AllocationRecord( arg_allocator
, arg_alloc_ptr
, arg_alloc_size
, node_index
, arg_label
);
}
void destroy( AllocationRecord * alloc_rec )
{
if (alloc_rec) {
const int32_t node_index = alloc_rec->node_index;
AllocationNode * node = get_node( alloc_rec );
// deallocate memory
alloc_rec->allocator->deallocate( alloc_rec->alloc_ptr, alloc_rec->alloc_size );
// call destructor
alloc_rec->~AllocationRecord();
// wait for writes to complete
memory_fence();
// clear node index
node->clear_node_index( node_index );
}
}
void * create_singleton( size_t size, Impl::singleton_create_function_type create_func, Impl::singleton_destroy_function_type destroy_func )
{
SingletonNode * node = new SingletonNode( size, create_func, destroy_func );
SingletonNode * next;
// insert new node at the head of the list
do {
next = volatile_load(&singleton_head);
node->next = next;
} while ( next != atomic_compare_exchange( &singleton_head, next, node ) );
return node->buffer;
}
void print_memory( std::ostream & out ) const
{
AllocationNode * start = head;
AllocationNode * curr = start;
std::vector< std::string > string_vec;
do {
AllocationNode * next = curr->next;
// bit zero does not map to an AllocationRecord
for ( int32_t i=1; i < bitset_type::size; ++i )
{
if (curr->bitset.test(i)) {
AllocationRecord * alloc_rec = reinterpret_cast<AllocationRecord *>( curr->get_buffer(i) );
std::ostringstream oss;
alloc_rec->print( oss );
string_vec.push_back( oss.str() );
}
}
curr = next;
} while ( curr != start );
if ( !string_vec.empty() ) {
std::sort( string_vec.begin(), string_vec.end() );
std::ostringstream oss;
oss << "Tracked Memory:" << std::endl;
for (size_t i=0; i< string_vec.size(); ++i)
{
oss << " " << string_vec[i] << std::endl;
}
out << oss.str() << std::endl;
}
else {
out << "No Tracked Memory" << std::endl;
}
}
// find an AllocationRecord such that
// alloc_ptr <= ptr < alloc_ptr + alloc_size
// otherwise return NULL
AllocationRecord * find( void const * ptr, AllocatorBase const * allocator ) const
{
AllocationNode * start = head;
AllocationNode * curr = start;
char const * const char_ptr = reinterpret_cast<const char *>(ptr);
do {
AllocationNode * next = curr->next;
// bit zero does not map to an AllocationRecord
for ( int32_t i=1; i < bitset_type::size; ++i )
{
if (curr->bitset.test(i)) {
AllocationRecord * alloc_rec = reinterpret_cast<AllocationRecord *>( curr->get_buffer(i) );
char const * const alloc_ptr = reinterpret_cast<char const *>(alloc_rec->alloc_ptr);
if ( (allocator == alloc_rec->allocator)
&& (alloc_ptr <= char_ptr)
&& (char_ptr < (alloc_ptr + alloc_rec->alloc_size)) )
{
return alloc_rec;
}
}
}
curr = next;
} while ( curr != start );
return NULL;
}
private:
AllocationNode * get_node( AllocationRecord * alloc_rec )
{
return reinterpret_cast<AllocationNode *>( alloc_rec - alloc_rec->node_index);
}
AllocationNode * head;
SingletonNode * singleton_head;
};
// create the global pool for allocation records
AllocationRecordPool global_alloc_rec_pool;
// convert a uintptr_t to an AllocationRecord pointer
inline
AllocationRecord * to_alloc_rec( uintptr_t alloc_rec )
{
return reinterpret_cast<AllocationRecord *>( alloc_rec & ~static_cast<uintptr_t>(1) );
}
} // unnamed namespace
//-----------------------------------------------------------------------------
// Allocation Tracker methods
//-----------------------------------------------------------------------------
// Create a reference counted AllocationTracker
void AllocationTracker::initalize( AllocatorBase * arg_allocator
, void * arg_alloc_ptr
, size_t arg_alloc_size
, const std::string & arg_label
)
{
if ( arg_allocator && arg_alloc_ptr && arg_alloc_size) {
// create record
AllocationRecord * alloc_rec = global_alloc_rec_pool.create( arg_allocator
, arg_alloc_ptr
, arg_alloc_size
, arg_label
);
m_alloc_rec = reinterpret_cast<uintptr_t>(alloc_rec) | REF_COUNT_BIT;
}
}
void AllocationTracker::reallocate( size_t size ) const
{
AllocationRecord * rec = to_alloc_rec( m_alloc_rec );
void * the_alloc_ptr = rec->allocator->reallocate( rec->alloc_ptr, rec->alloc_size, size );
if ( NULL != the_alloc_ptr )
{
*const_cast<void **>(&rec->alloc_ptr) = the_alloc_ptr;
*const_cast<uint64_t *>(&rec->alloc_size) = size;
}
else {
Impl::throw_runtime_exception( "Error: unable to reallocate allocation tracker");
}
}
void AllocationTracker::increment_ref_count() const
{
to_alloc_rec( m_alloc_rec )->increment_ref_count();
}
void AllocationTracker::decrement_ref_count() const
{
AllocationRecord * alloc_rec = to_alloc_rec( m_alloc_rec );
uint32_t the_ref_count = alloc_rec->decrement_ref_count();
if (the_ref_count == 0u) {
try {
global_alloc_rec_pool.destroy( alloc_rec );
}
catch(...) {}
}
}
namespace {
struct NullAllocator { static const char * name() { return "Null Allocator"; } };
}
AllocatorBase * AllocationTracker::allocator() const
{
if (m_alloc_rec & REF_COUNT_MASK) {
return to_alloc_rec(m_alloc_rec)->allocator;
}
return Allocator<NullAllocator>::singleton();
}
void * AllocationTracker::alloc_ptr() const
{
if (m_alloc_rec & REF_COUNT_MASK) {
return to_alloc_rec(m_alloc_rec)->alloc_ptr;
}
return NULL;
}
size_t AllocationTracker::alloc_size() const
{
if (m_alloc_rec & REF_COUNT_MASK) {
return to_alloc_rec(m_alloc_rec)->alloc_size;
}
return 0u;
}
size_t AllocationTracker::ref_count() const
{
if (m_alloc_rec & REF_COUNT_MASK) {
return to_alloc_rec(m_alloc_rec)->ref_count;
}
return 0u;
}
char const * AllocationTracker::label() const
{
if (m_alloc_rec & REF_COUNT_MASK) {
return to_alloc_rec(m_alloc_rec)->label;
}
return "[Empty Allocation Tracker]";
}
void AllocationTracker::print( std::ostream & oss) const
{
if (m_alloc_rec & REF_COUNT_MASK) {
to_alloc_rec(m_alloc_rec)->print(oss);
}
else {
oss << label();
}
}
bool AllocationTracker::set_attribute( AllocatorAttributeBase * attr ) const
{
bool result = false;
if (m_alloc_rec & REF_COUNT_MASK) {
result = to_alloc_rec(m_alloc_rec)->set_attribute(attr);
}
return result;
}
AllocatorAttributeBase * AllocationTracker::attribute() const
{
if (m_alloc_rec & REF_COUNT_MASK) {
return to_alloc_rec(m_alloc_rec)->attribute;
}
return NULL;
}
void AllocationTracker::print_tracked_memory( std::ostream & out )
{
global_alloc_rec_pool.print_memory( out );
}
AllocationTracker AllocationTracker::find( void const * ptr, AllocatorBase const * arg_allocator )
{
AllocationRecord * alloc_rec = global_alloc_rec_pool.find(ptr, arg_allocator);
AllocationTracker tracker;
if ( alloc_rec != NULL )
{
if ( tracking_enabled() ) {
alloc_rec->increment_ref_count();
tracker.m_alloc_rec = reinterpret_cast<uintptr_t>(alloc_rec) | REF_COUNT_BIT;
}
else {
tracker.m_alloc_rec = reinterpret_cast<uintptr_t>(alloc_rec);
}
}
return tracker ;
}
//-----------------------------------------------------------------------------
// static AllocationTracker
//-----------------------------------------------------------------------------
#if defined( KOKKOS_USE_DECENTRALIZED_HOST )
namespace {
// TODO : Detect compiler support for thread local variables
#if defined( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_OPENMP )
bool g_thread_local_tracking_enabled = true;
#pragma omp threadprivate(g_thread_local_tracking_enabled)
#elif defined( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_THREADS )
__thread bool g_thread_local_tracking_enabled = true;
#elif defined( KOKKOS_HAVE_OPENMP )
bool g_thread_local_tracking_enabled = true;
#pragma omp threadprivate(g_thread_local_tracking_enabled)
#elif defined( KOKKOS_HAVE_PTHREAD )
__thread bool g_thread_local_tracking_enabled = true;
#elif defined( KOKKOS_HAVE_SERIAL )
bool g_thread_local_tracking_enabled = true;
#endif
} // unnamed namespace
void AllocationTracker::disable_tracking()
{
g_thread_local_tracking_enabled = false;
}
void AllocationTracker::enable_tracking()
{
g_thread_local_tracking_enabled = true;
}
bool AllocationTracker::tracking_enabled()
{
return g_thread_local_tracking_enabled;
}
#else
namespace {
enum TrackingEnum { TRACKING_ENABLED, TRACKING_DISABLED };
volatile TrackingEnum g_tracking_enabled = TRACKING_ENABLED;
}
void AllocationTracker::disable_tracking()
{
if ( TRACKING_ENABLED != atomic_compare_exchange( &g_tracking_enabled, TRACKING_ENABLED, TRACKING_DISABLED ) ) {
Impl::throw_runtime_exception("Error: Tracking already disabled");
}
}
void AllocationTracker::enable_tracking()
{
if ( TRACKING_DISABLED != atomic_compare_exchange( &g_tracking_enabled, TRACKING_DISABLED, TRACKING_ENABLED ) ) {
Impl::throw_runtime_exception("Error: Tracking already enabled");
}
}
bool AllocationTracker::tracking_enabled()
{
return g_tracking_enabled == TRACKING_ENABLED;
}
#endif
//-----------------------------------------------------------------------------
// create singleton free function
//-----------------------------------------------------------------------------
void * create_singleton( size_t size
, Impl::singleton_create_function_type create_func
, Impl::singleton_destroy_function_type destroy_func )
{
return global_alloc_rec_pool.create_singleton( size, create_func, destroy_func );
}
}} // namespace Kokkos::Impl
#endif /* #if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) */
+#endif /* #if ! defined( KOKKOS_USING_EXPERIMENTAL_VIEW ) */
+
diff --git a/lib/kokkos/core/src/impl/Kokkos_AllocationTracker.hpp b/lib/kokkos/core/src/impl/Kokkos_AllocationTracker.hpp
index 331c4e8fa..8912d73ba 100644
--- a/lib/kokkos/core/src/impl/Kokkos_AllocationTracker.hpp
+++ b/lib/kokkos/core/src/impl/Kokkos_AllocationTracker.hpp
@@ -1,586 +1,574 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_ALLOCATION_TRACKER_HPP
#define KOKKOS_ALLOCATION_TRACKER_HPP
#include <Kokkos_Macros.hpp>
+#if ! defined( KOKKOS_USING_EXPERIMENTAL_VIEW )
+
#include <impl/Kokkos_Traits.hpp>
#include <impl/Kokkos_Error.hpp>
#include <stdint.h>
#include <cstdlib>
#include <string>
#include <iosfwd>
namespace Kokkos { namespace Impl {
//-----------------------------------------------------------------------------
// Create Singleton objects
//-----------------------------------------------------------------------------
typedef void * (*singleton_create_function_type)(void * buffer);
typedef void (*singleton_destroy_function_type)(void *);
void * create_singleton( size_t size
, singleton_create_function_type create_func
, singleton_destroy_function_type destroy_func
);
/// class Singleton
///
/// Default construct a singleton type. This method is used to circumvent
/// order of construction issues. Singleton objects are destroyed after all
/// other allocations in the reverse order of their creation.
template <typename Type>
class Singleton
{
public:
/// Get a pointer to the Singleton. Default construct the singleton if it does not already exist
static Type * get()
{
static Type * singleton = NULL;
if (singleton == NULL) {
Impl::singleton_create_function_type create_func = &create;
Impl::singleton_destroy_function_type destroy_func = &destroy;
singleton = reinterpret_cast<Type*>( Impl::create_singleton( sizeof(Type), create_func, destroy_func ) );
}
return singleton;
}
private:
/// Call the Type constructor
static void destroy(void * ptr)
{
reinterpret_cast<Type*>(ptr)->~Type();
}
/// placement new the Type in buffer
static void * create(void * buffer)
{
return new (buffer) Type();
}
};
//-----------------------------------------------------------------------------
// AllocatorBase
//-----------------------------------------------------------------------------
/// class AllocatorBase
///
/// Abstract base class for all Allocators.
/// Allocators should be singleton objects, use Singleton<Allocator>::get to create
/// to avoid order of destruction issues
class AllocatorBase
{
public:
/// name of the allocator
/// used to report memory leaks
virtual const char * name() const = 0;
/// Allocate a buffer of size number of bytes
virtual void* allocate(size_t size) const = 0;
/// Deallocate a buffer with size number of bytes
/// The pointer must have been allocated with a call to corresponding allocate
virtual void deallocate(void * ptr, size_t size) const = 0;
/// Changes the size of the memory block pointed to by ptr.
/// Ptr must have been allocated with the corresponding allocate call
/// The function may move the memory block to a new location
/// (whose address is returned by the function).
///
/// The content of the memory block is preserved up to the lesser of the new and
/// old sizes, even if the block is moved to a new location. If the new size is larger,
/// the value of the newly allocated portion is indeterminate.
///
/// In case that ptr is a null pointer, the function behaves like allocate, assigning a
/// new block of size bytes and returning a pointer to its beginning.
virtual void * reallocate(void * old_ptr, size_t old_size, size_t new_size) const = 0;
/// can a texture object be bound to the allocated memory
virtual bool support_texture_binding() const = 0;
/// virtual destructor
virtual ~AllocatorBase() {}
};
/// class AllocatorAttributeBase
class AllocatorAttributeBase
{
public:
virtual ~AllocatorAttributeBase() {}
};
//-----------------------------------------------------------------------------
// Allocator< StaticAllocator > : public AllocatorBase
//-----------------------------------------------------------------------------
// HasStaticName
template<typename T>
class HasStaticName
{
typedef const char * (*static_method)();
template<typename U, static_method> struct SFINAE {};
template<typename U> static char Test(SFINAE<U, &U::name>*);
template<typename U> static int Test(...);
public:
enum { value = sizeof(Test<T>(0)) == sizeof(char) };
};
template <typename T>
inline
typename enable_if<HasStaticName<T>::value, const char *>::type
allocator_name()
{
return T::name();
}
template <typename T>
inline
typename enable_if<!HasStaticName<T>::value, const char *>::type
allocator_name()
{
return "Unnamed Allocator";
}
// HasStaticAllocate
template<typename T>
class HasStaticAllocate
{
typedef void * (*static_method)(size_t);
template<typename U, static_method> struct SFINAE {};
template<typename U> static char Test(SFINAE<U, &U::allocate>*);
template<typename U> static int Test(...);
public:
enum { value = sizeof(Test<T>(0)) == sizeof(char) };
};
template <typename T>
inline
typename enable_if<HasStaticAllocate<T>::value, void *>::type
allocator_allocate(size_t size)
{
return T::allocate(size);
}
template <typename T>
inline
typename enable_if<!HasStaticAllocate<T>::value, void *>::type
allocator_allocate(size_t)
{
throw_runtime_exception( std::string("Error: ")
+ std::string(allocator_name<T>())
+ std::string(" cannot allocate memory!") );
return NULL;
}
// HasStaticDeallocate
template<typename T>
class HasStaticDeallocate
{
typedef void (*static_method)(void *, size_t);
template<typename U, static_method> struct SFINAE {};
template<typename U> static char Test(SFINAE<U, &U::deallocate>*);
template<typename U> static int Test(...);
public:
enum { value = sizeof(Test<T>(0)) == sizeof(char) };
};
template <typename T>
inline
typename enable_if<HasStaticDeallocate<T>::value, void>::type
allocator_deallocate(void * ptr, size_t size)
{
T::deallocate(ptr,size);
}
template <typename T>
inline
typename enable_if<!HasStaticDeallocate<T>::value, void>::type
allocator_deallocate(void *, size_t)
{
throw_runtime_exception( std::string("Error: ")
+ std::string(allocator_name<T>())
+ std::string(" cannot deallocate memory!") );
}
// HasStaticReallocate
template<typename T>
class HasStaticReallocate
{
typedef void * (*static_method)(void *, size_t, size_t);
template<typename U, static_method> struct SFINAE {};
template<typename U> static char Test(SFINAE<U, &U::reallocate>*);
template<typename U> static int Test(...);
public:
enum { value = sizeof(Test<T>(0)) == sizeof(char) };
};
template <typename T>
inline
typename enable_if<HasStaticReallocate<T>::value, void *>::type
allocator_reallocate(void * old_ptr, size_t old_size, size_t new_size)
{
return T::reallocate(old_ptr, old_size, new_size);
}
template <typename T>
inline
typename enable_if<!HasStaticReallocate<T>::value, void *>::type
allocator_reallocate(void *, size_t, size_t)
{
throw_runtime_exception( std::string("Error: ")
+ std::string(allocator_name<T>())
+ std::string(" cannot reallocate memory!") );
return NULL;
}
// HasStaticReallocate
template<typename T>
class HasStaticSupportTextureBinding
{
typedef bool (*static_method)();
template<typename U, static_method> struct SFINAE {};
template<typename U> static char Test(SFINAE<U, &U::support_texture_binding>*);
template<typename U> static int Test(...);
public:
enum { value = sizeof(Test<T>(0)) == sizeof(char) };
};
template <typename T>
inline
typename enable_if<HasStaticSupportTextureBinding<T>::value, bool>::type
allocator_support_texture_binding()
{
return T::support_texture_binding();
}
template <typename T>
inline
typename enable_if<!HasStaticSupportTextureBinding<T>::value, bool>::type
allocator_support_texture_binding()
{
return false;
}
template <typename T>
class Allocator : public AllocatorBase
{
public:
virtual const char * name() const
{
return allocator_name<T>();
}
virtual void* allocate(size_t size) const
{
return allocator_allocate<T>(size);
}
virtual void deallocate(void * ptr, size_t size) const
{
allocator_deallocate<T>(ptr,size);
}
virtual void * reallocate(void * old_ptr, size_t old_size, size_t new_size) const
{
return allocator_reallocate<T>(old_ptr, old_size, new_size);
}
virtual bool support_texture_binding() const
{
return allocator_support_texture_binding<T>();
}
static AllocatorBase * singleton()
{
return Singleton< Allocator<T> >::get();
}
};
//-----------------------------------------------------------------------------
// AllocationTracker
//-----------------------------------------------------------------------------
// forward declaration for friend classes
-struct CopyWithoutTracking;
struct MallocHelper;
/// class AllocationTracker
/// Will call deallocate from the AllocatorBase when the reference count reaches 0.
/// Reference counting is disabled when the host is in parallel.
class AllocationTracker
{
// use the least significant bit of the AllocationRecord pointer to indicate if the
// AllocationTracker should reference count
enum {
REF_COUNT_BIT = static_cast<uintptr_t>(1)
, REF_COUNT_MASK = ~static_cast<uintptr_t>(1)
};
public:
/// Find an AllocationTracker such that
/// alloc_ptr <= ptr < alloc_ptr + alloc_size
/// O(n) where n is the number of tracked allocations.
template <typename StaticAllocator>
static AllocationTracker find( void const * ptr )
{
return find( ptr, Allocator<StaticAllocator>::singleton() );
}
/// Pretty print all the currently tracked memory
static void print_tracked_memory( std::ostream & out );
/// Default constructor
KOKKOS_INLINE_FUNCTION
AllocationTracker()
: m_alloc_rec(0)
{}
/// Create a AllocationTracker
///
/// Start reference counting the alloc_ptr.
/// When the reference count reachs 0 the allocator deallocate method
/// will be call with the given size. The alloc_ptr should have been
/// allocated with the allocator's allocate method.
///
/// If arg_allocator == NULL OR arg_alloc_ptr == NULL OR size == 0
/// do nothing
template <typename StaticAllocator>
AllocationTracker( StaticAllocator const &
, void * arg_alloc_ptr
, size_t arg_alloc_size
, const std::string & arg_label = std::string("") )
: m_alloc_rec(0)
{
AllocatorBase * arg_allocator = Allocator<StaticAllocator>::singleton();
initalize( arg_allocator, arg_alloc_ptr, arg_alloc_size, arg_label);
}
/// Create a AllocationTracker
///
/// Start reference counting the alloc_ptr.
/// When the reference count reachs 0 the allocator deallocate method
/// will be call with the given size. The alloc_ptr should have been
/// allocated with the allocator's allocate method.
///
/// If arg_allocator == NULL OR arg_alloc_ptr == NULL OR size == 0
/// do nothing
template <typename StaticAllocator>
AllocationTracker( StaticAllocator const &
, size_t arg_alloc_size
, const std::string & arg_label = std::string("")
)
: m_alloc_rec(0)
{
AllocatorBase * arg_allocator = Allocator<StaticAllocator>::singleton();
void * arg_alloc_ptr = arg_allocator->allocate( arg_alloc_size );
initalize( arg_allocator, arg_alloc_ptr, arg_alloc_size, arg_label);
}
/// Copy an AllocatorTracker
KOKKOS_INLINE_FUNCTION
AllocationTracker( const AllocationTracker & rhs )
: m_alloc_rec( rhs.m_alloc_rec)
{
#if !defined( __CUDA_ARCH__ )
if ( rhs.ref_counting() && tracking_enabled() ) {
increment_ref_count();
}
else {
m_alloc_rec = m_alloc_rec & REF_COUNT_MASK;
}
#else
m_alloc_rec = m_alloc_rec & REF_COUNT_MASK;
#endif
}
/// Copy an AllocatorTracker
/// Decrement the reference count of the current tracker if necessary
KOKKOS_INLINE_FUNCTION
AllocationTracker & operator=( const AllocationTracker & rhs )
{
if (this != &rhs) {
#if !defined( __CUDA_ARCH__ )
if ( ref_counting() ) {
decrement_ref_count();
}
m_alloc_rec = rhs.m_alloc_rec;
if ( rhs.ref_counting() && tracking_enabled() ) {
increment_ref_count();
}
else {
m_alloc_rec = m_alloc_rec & REF_COUNT_MASK;
}
#else
m_alloc_rec = rhs.m_alloc_rec & REF_COUNT_MASK;
#endif
}
return * this;
}
/// Destructor
/// Decrement the reference count if necessary
KOKKOS_INLINE_FUNCTION
~AllocationTracker()
{
#if !defined( __CUDA_ARCH__ )
if ( ref_counting() ) {
decrement_ref_count();
}
#endif
}
/// Is the tracker valid?
KOKKOS_INLINE_FUNCTION
bool is_valid() const
{
return (m_alloc_rec & REF_COUNT_MASK);
}
/// clear the tracker
KOKKOS_INLINE_FUNCTION
void clear()
{
#if !defined( __CUDA_ARCH__ )
if ( ref_counting() ) {
decrement_ref_count();
}
#endif
m_alloc_rec = 0;
}
/// is this tracker currently counting allocations?
KOKKOS_INLINE_FUNCTION
bool ref_counting() const
{
return (m_alloc_rec & REF_COUNT_BIT);
}
AllocatorBase * allocator() const;
/// pointer to the allocated memory
void * alloc_ptr() const;
/// size in bytes of the allocated memory
size_t alloc_size() const;
/// the current reference count
size_t ref_count() const;
/// the label given to the allocation
char const * label() const;
/// pretty print all the tracker's information to the std::ostream
void print( std::ostream & oss) const;
/// set an attribute ptr on the allocation record
/// the arg_attribute pointer will be deleted when the record is destroyed
/// the attribute ptr can only be set once
bool set_attribute( AllocatorAttributeBase * arg_attribute) const;
/// get the attribute ptr from the allocation record
AllocatorAttributeBase * attribute() const;
/// reallocate the memory tracked by this allocation
/// NOT thread-safe
void reallocate( size_t size ) const;
+ static void disable_tracking();
+ static void enable_tracking();
+ static bool tracking_enabled();
+
private:
static AllocationTracker find( void const * ptr, AllocatorBase const * arg_allocator );
void initalize( AllocatorBase * arg_allocator
, void * arg_alloc_ptr
, size_t arg_alloc_size
, std::string const & label );
void increment_ref_count() const;
void decrement_ref_count() const;
- static void disable_tracking();
- static void enable_tracking();
- static bool tracking_enabled();
-
- friend struct Impl::CopyWithoutTracking;
friend struct Impl::MallocHelper;
uintptr_t m_alloc_rec;
};
-
-
-/// Make a copy of the functor with reference counting disabled
-struct CopyWithoutTracking
-{
- template <typename Functor>
- static Functor apply( const Functor & f )
- {
- AllocationTracker::disable_tracking();
- Functor func(f);
- AllocationTracker::enable_tracking();
- return func;
- }
-};
-
}} // namespace Kokkos::Impl
+#endif /* #if ! defined( KOKKOS_USING_EXPERIMENTAL_VIEW ) */
+
#endif //KOKKOS_ALLOCATION_TRACKER_HPP
+
diff --git a/lib/kokkos/core/src/impl/Kokkos_Atomic_View.hpp b/lib/kokkos/core/src/impl/Kokkos_Atomic_View.hpp
index f95ed67da..b1d47e19f 100644
--- a/lib/kokkos/core/src/impl/Kokkos_Atomic_View.hpp
+++ b/lib/kokkos/core/src/impl/Kokkos_Atomic_View.hpp
@@ -1,462 +1,466 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_ATOMIC_VIEW_HPP
#define KOKKOS_ATOMIC_VIEW_HPP
#include <Kokkos_Macros.hpp>
#include <Kokkos_Atomic.hpp>
namespace Kokkos { namespace Impl {
class AllocationTracker;
//The following tag is used to prevent an implicit call of the constructor when trying
//to assign a literal 0 int ( = 0 );
struct AtomicViewConstTag {};
template<class ViewTraits>
class AtomicDataElement {
public:
typedef typename ViewTraits::value_type value_type;
typedef typename ViewTraits::const_value_type const_value_type;
typedef typename ViewTraits::non_const_value_type non_const_value_type;
volatile value_type* const ptr;
KOKKOS_INLINE_FUNCTION
AtomicDataElement(value_type* ptr_, AtomicViewConstTag ):ptr(ptr_){}
KOKKOS_INLINE_FUNCTION
const_value_type operator = (const_value_type& val) const {
*ptr = val;
return val;
}
KOKKOS_INLINE_FUNCTION
const_value_type operator = (volatile const_value_type& val) const {
*ptr = val;
return val;
}
KOKKOS_INLINE_FUNCTION
void inc() const {
Kokkos::atomic_increment(ptr);
}
KOKKOS_INLINE_FUNCTION
void dec() const {
Kokkos::atomic_decrement(ptr);
}
KOKKOS_INLINE_FUNCTION
const_value_type operator ++ () const {
const_value_type tmp = Kokkos::atomic_fetch_add(ptr,1);
return tmp+1;
}
KOKKOS_INLINE_FUNCTION
const_value_type operator -- () const {
const_value_type tmp = Kokkos::atomic_fetch_add(ptr,-1);
return tmp-1;
}
KOKKOS_INLINE_FUNCTION
const_value_type operator ++ (int) const {
return Kokkos::atomic_fetch_add(ptr,1);
}
KOKKOS_INLINE_FUNCTION
const_value_type operator -- (int) const {
return Kokkos::atomic_fetch_add(ptr,-1);
}
KOKKOS_INLINE_FUNCTION
const_value_type operator += (const_value_type& val) const {
const_value_type tmp = Kokkos::atomic_fetch_add(ptr,val);
return tmp+val;
}
KOKKOS_INLINE_FUNCTION
const_value_type operator += (volatile const_value_type& val) const {
const_value_type tmp = Kokkos::atomic_fetch_add(ptr,val);
return tmp+val;
}
KOKKOS_INLINE_FUNCTION
const_value_type operator -= (const_value_type& val) const {
const_value_type tmp = Kokkos::atomic_fetch_add(ptr,-val);
return tmp-val;
}
KOKKOS_INLINE_FUNCTION
const_value_type operator -= (volatile const_value_type& val) const {
const_value_type tmp = Kokkos::atomic_fetch_add(ptr,-val);
return tmp-val;
}
KOKKOS_INLINE_FUNCTION
const_value_type operator *= (const_value_type& val) const {
return Kokkos::atomic_mul_fetch(ptr,val);
}
KOKKOS_INLINE_FUNCTION
const_value_type operator *= (volatile const_value_type& val) const {
return Kokkos::atomic_mul_fetch(ptr,val);
}
KOKKOS_INLINE_FUNCTION
const_value_type operator /= (const_value_type& val) const {
return Kokkos::atomic_div_fetch(ptr,val);
}
KOKKOS_INLINE_FUNCTION
const_value_type operator /= (volatile const_value_type& val) const {
return Kokkos::atomic_div_fetch(ptr,val);
}
KOKKOS_INLINE_FUNCTION
const_value_type operator %= (const_value_type& val) const {
return Kokkos::atomic_mod_fetch(ptr,val);
}
KOKKOS_INLINE_FUNCTION
const_value_type operator %= (volatile const_value_type& val) const {
return Kokkos::atomic_mod_fetch(ptr,val);
}
KOKKOS_INLINE_FUNCTION
const_value_type operator &= (const_value_type& val) const {
return Kokkos::atomic_and_fetch(ptr,val);
}
KOKKOS_INLINE_FUNCTION
const_value_type operator &= (volatile const_value_type& val) const {
return Kokkos::atomic_and_fetch(ptr,val);
}
KOKKOS_INLINE_FUNCTION
const_value_type operator ^= (const_value_type& val) const {
return Kokkos::atomic_xor_fetch(ptr,val);
}
KOKKOS_INLINE_FUNCTION
const_value_type operator ^= (volatile const_value_type& val) const {
return Kokkos::atomic_xor_fetch(ptr,val);
}
KOKKOS_INLINE_FUNCTION
const_value_type operator |= (const_value_type& val) const {
return Kokkos::atomic_or_fetch(ptr,val);
}
KOKKOS_INLINE_FUNCTION
const_value_type operator |= (volatile const_value_type& val) const {
return Kokkos::atomic_or_fetch(ptr,val);
}
KOKKOS_INLINE_FUNCTION
const_value_type operator <<= (const_value_type& val) const {
return Kokkos::atomic_lshift_fetch(ptr,val);
}
KOKKOS_INLINE_FUNCTION
const_value_type operator <<= (volatile const_value_type& val) const {
return Kokkos::atomic_lshift_fetch(ptr,val);
}
KOKKOS_INLINE_FUNCTION
const_value_type operator >>= (const_value_type& val) const {
return Kokkos::atomic_rshift_fetch(ptr,val);
}
KOKKOS_INLINE_FUNCTION
const_value_type operator >>= (volatile const_value_type& val) const {
return Kokkos::atomic_rshift_fetch(ptr,val);
}
KOKKOS_INLINE_FUNCTION
const_value_type operator + (const_value_type& val) const {
return *ptr+val;
}
KOKKOS_INLINE_FUNCTION
const_value_type operator + (volatile const_value_type& val) const {
return *ptr+val;
}
KOKKOS_INLINE_FUNCTION
const_value_type operator - (const_value_type& val) const {
return *ptr-val;
}
KOKKOS_INLINE_FUNCTION
const_value_type operator - (volatile const_value_type& val) const {
return *ptr-val;
}
KOKKOS_INLINE_FUNCTION
const_value_type operator * (const_value_type& val) const {
return *ptr*val;
}
KOKKOS_INLINE_FUNCTION
const_value_type operator * (volatile const_value_type& val) const {
return *ptr*val;
}
KOKKOS_INLINE_FUNCTION
const_value_type operator / (const_value_type& val) const {
return *ptr/val;
}
KOKKOS_INLINE_FUNCTION
const_value_type operator / (volatile const_value_type& val) const {
return *ptr/val;
}
KOKKOS_INLINE_FUNCTION
const_value_type operator % (const_value_type& val) const {
return *ptr^val;
}
KOKKOS_INLINE_FUNCTION
const_value_type operator % (volatile const_value_type& val) const {
return *ptr^val;
}
KOKKOS_INLINE_FUNCTION
const_value_type operator ! () const {
return !*ptr;
}
KOKKOS_INLINE_FUNCTION
const_value_type operator && (const_value_type& val) const {
return *ptr&&val;
}
KOKKOS_INLINE_FUNCTION
const_value_type operator && (volatile const_value_type& val) const {
return *ptr&&val;
}
KOKKOS_INLINE_FUNCTION
const_value_type operator || (const_value_type& val) const {
return *ptr|val;
}
KOKKOS_INLINE_FUNCTION
const_value_type operator || (volatile const_value_type& val) const {
return *ptr|val;
}
KOKKOS_INLINE_FUNCTION
const_value_type operator & (const_value_type& val) const {
return *ptr&val;
}
KOKKOS_INLINE_FUNCTION
const_value_type operator & (volatile const_value_type& val) const {
return *ptr&val;
}
KOKKOS_INLINE_FUNCTION
const_value_type operator | (const_value_type& val) const {
return *ptr|val;
}
KOKKOS_INLINE_FUNCTION
const_value_type operator | (volatile const_value_type& val) const {
return *ptr|val;
}
KOKKOS_INLINE_FUNCTION
const_value_type operator ^ (const_value_type& val) const {
return *ptr^val;
}
KOKKOS_INLINE_FUNCTION
const_value_type operator ^ (volatile const_value_type& val) const {
return *ptr^val;
}
KOKKOS_INLINE_FUNCTION
const_value_type operator ~ () const {
return ~*ptr;
}
KOKKOS_INLINE_FUNCTION
const_value_type operator << (const unsigned int& val) const {
return *ptr<<val;
}
KOKKOS_INLINE_FUNCTION
const_value_type operator << (volatile const unsigned int& val) const {
return *ptr<<val;
}
KOKKOS_INLINE_FUNCTION
const_value_type operator >> (const unsigned int& val) const {
return *ptr>>val;
}
KOKKOS_INLINE_FUNCTION
const_value_type operator >> (volatile const unsigned int& val) const {
return *ptr>>val;
}
KOKKOS_INLINE_FUNCTION
bool operator == (const_value_type& val) const {
return *ptr == val;
}
KOKKOS_INLINE_FUNCTION
bool operator == (volatile const_value_type& val) const {
return *ptr == val;
}
KOKKOS_INLINE_FUNCTION
bool operator != (const_value_type& val) const {
return *ptr != val;
}
KOKKOS_INLINE_FUNCTION
bool operator != (volatile const_value_type& val) const {
return *ptr != val;
}
KOKKOS_INLINE_FUNCTION
bool operator >= (const_value_type& val) const {
return *ptr >= val;
}
KOKKOS_INLINE_FUNCTION
bool operator >= (volatile const_value_type& val) const {
return *ptr >= val;
}
KOKKOS_INLINE_FUNCTION
bool operator <= (const_value_type& val) const {
return *ptr <= val;
}
KOKKOS_INLINE_FUNCTION
bool operator <= (volatile const_value_type& val) const {
return *ptr <= val;
}
KOKKOS_INLINE_FUNCTION
bool operator < (const_value_type& val) const {
return *ptr < val;
}
KOKKOS_INLINE_FUNCTION
bool operator < (volatile const_value_type& val) const {
return *ptr < val;
}
KOKKOS_INLINE_FUNCTION
bool operator > (const_value_type& val) const {
return *ptr > val;
}
KOKKOS_INLINE_FUNCTION
bool operator > (volatile const_value_type& val) const {
return *ptr > val;
}
KOKKOS_INLINE_FUNCTION
operator const_value_type () const {
//return Kokkos::atomic_load(ptr);
return *ptr;
}
KOKKOS_INLINE_FUNCTION
operator volatile non_const_value_type () volatile const {
//return Kokkos::atomic_load(ptr);
return *ptr;
}
};
template<class ViewTraits>
class AtomicViewDataHandle {
public:
typename ViewTraits::value_type* ptr;
KOKKOS_INLINE_FUNCTION
AtomicViewDataHandle()
: ptr(NULL)
{}
KOKKOS_INLINE_FUNCTION
AtomicViewDataHandle(typename ViewTraits::value_type* ptr_)
:ptr(ptr_)
{}
template<class iType>
KOKKOS_INLINE_FUNCTION
AtomicDataElement<ViewTraits> operator[] (const iType& i) const {
return AtomicDataElement<ViewTraits>(ptr+i,AtomicViewConstTag());
}
KOKKOS_INLINE_FUNCTION
operator typename ViewTraits::value_type * () const { return ptr ; }
};
template<unsigned Size>
struct Kokkos_Atomic_is_only_allowed_with_32bit_and_64bit_scalars;
template<>
struct Kokkos_Atomic_is_only_allowed_with_32bit_and_64bit_scalars<4> {
typedef int type;
};
template<>
struct Kokkos_Atomic_is_only_allowed_with_32bit_and_64bit_scalars<8> {
typedef int64_t type;
};
+#if ! defined( KOKKOS_USING_EXPERIMENTAL_VIEW )
+
// Must be non-const, atomic access trait, and 32 or 64 bit type for true atomics.
template<class ViewTraits>
class ViewDataHandle<
ViewTraits ,
typename enable_if<
( ! is_same<typename ViewTraits::const_value_type,typename ViewTraits::value_type>::value) &&
( ViewTraits::memory_traits::Atomic )
>::type >
{
private:
// typedef typename if_c<(sizeof(typename ViewTraits::const_value_type)==4) ||
// (sizeof(typename ViewTraits::const_value_type)==8),
// int, Kokkos_Atomic_is_only_allowed_with_32bit_and_64bit_scalars >::type
// atomic_view_possible;
typedef typename Kokkos_Atomic_is_only_allowed_with_32bit_and_64bit_scalars<sizeof(typename ViewTraits::const_value_type)>::type enable_atomic_type;
typedef ViewDataHandle self_type;
public:
enum { ReturnTypeIsReference = false };
typedef Impl::AtomicViewDataHandle<ViewTraits> handle_type;
typedef Impl::AtomicDataElement<ViewTraits> return_type;
KOKKOS_INLINE_FUNCTION
static handle_type create_handle( typename ViewTraits::value_type * arg_data_ptr, AllocationTracker const & /*arg_tracker*/ )
{
return handle_type(arg_data_ptr);
}
};
+#endif /* #if ! defined( KOKKOS_USING_EXPERIMENTAL_VIEW ) */
+
}} // namespace Kokkos::Impl
#endif
diff --git a/lib/kokkos/core/src/impl/Kokkos_Atomic_Windows.hpp b/lib/kokkos/core/src/impl/Kokkos_Atomic_Windows.hpp
index 62581569f..14066e8be 100644
--- a/lib/kokkos/core/src/impl/Kokkos_Atomic_Windows.hpp
+++ b/lib/kokkos/core/src/impl/Kokkos_Atomic_Windows.hpp
@@ -1,211 +1,232 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_ATOMIC_WINDOWS_HPP
#define KOKKOS_ATOMIC_WINDOWS_HPP
#ifdef _WIN32
#define NOMINMAX
+#include <winsock2.h>
#include <Windows.h>
namespace Kokkos {
namespace Impl {
_declspec(align(16))
struct cas128_t
{
LONGLONG lower;
LONGLONG upper;
KOKKOS_INLINE_FUNCTION
bool operator != (const cas128_t& a) const {
return (lower != a.lower) || upper != a.upper;
}
};
}
-#ifdef KOKKOS_HAVE_CXX11
template < typename T >
KOKKOS_INLINE_FUNCTION
T atomic_compare_exchange(volatile T * const dest, const T & compare,
typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(LONG), const T & >::type val)
{
union U {
LONG i;
T t;
KOKKOS_INLINE_FUNCTION U() {};
} tmp;
tmp.i = _InterlockedCompareExchange((LONG*)dest, *((LONG*)&val), *((LONG*)&compare));
return tmp.t;
}
template < typename T >
KOKKOS_INLINE_FUNCTION
T atomic_compare_exchange(volatile T * const dest, const T & compare,
typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(LONGLONG), const T & >::type val)
{
union U {
LONGLONG i;
T t;
KOKKOS_INLINE_FUNCTION U() {};
} tmp;
tmp.i = _InterlockedCompareExchange64((LONGLONG*)dest, *((LONGLONG*)&val), *((LONGLONG*)&compare));
return tmp.t;
}
template < typename T >
KOKKOS_INLINE_FUNCTION
T atomic_compare_exchange(volatile T * const dest, const T & compare,
typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(Impl::cas128_t), const T & >::type val)
{
union U {
Impl::cas128_t i;
T t;
KOKKOS_INLINE_FUNCTION U() {};
} tmp, newval;
newval.t = val;
- tmp.i = _InterlockedCompareExchange128((LONGLONG*)dest, newval.i.upper, newval.i.lower, *((LONGLONG*)&compare));
+ _InterlockedCompareExchange128((LONGLONG*)dest, newval.i.upper, newval.i.lower, ((LONGLONG*)&compare));
+ tmp.t = dest;
return tmp.t;
}
+ template < typename T >
+ KOKKOS_INLINE_FUNCTION
+ T atomic_compare_exchange_strong(volatile T * const dest, const T & compare, const T & val)
+ {
+ return atomic_compare_exchange(dest,compare,val);
+ }
+
template< typename T >
T atomic_fetch_or(volatile T * const dest, const T val) {
T oldval = *dest;
T assume;
do {
assume = oldval;
T newval = val | oldval;
oldval = atomic_compare_exchange(dest, assume, newval);
} while (assume != oldval);
return oldval;
}
template< typename T >
T atomic_fetch_and(volatile T * const dest, const T val) {
T oldval = *dest;
T assume;
do {
assume = oldval;
T newval = val & oldval;
oldval = atomic_compare_exchange(dest, assume, newval);
} while (assume != oldval);
return oldval;
}
template< typename T >
T atomic_fetch_add(volatile T * const dest, const T val) {
T oldval = *dest;
T assume;
do {
assume = oldval;
T newval = val + oldval;
oldval = atomic_compare_exchange(dest, assume, newval);
} while (assume != oldval);
return oldval;
}
template< typename T >
- T atomic_fetch_exchange(volatile T * const dest, const T val) {
+ T atomic_fetch_sub(volatile T * const dest, const T val) {
+ T oldval = *dest;
+ T assume;
+ do {
+ assume = oldval;
+ T newval = val - oldval;
+ oldval = atomic_compare_exchange(dest, assume, newval);
+ } while (assume != oldval);
+
+ return oldval;
+ }
+
+ template< typename T >
+ T atomic_exchange(volatile T * const dest, const T val) {
T oldval = *dest;
T assume;
do {
assume = oldval;
oldval = atomic_compare_exchange(dest, assume, val);
} while (assume != oldval);
return oldval;
}
template< typename T >
void atomic_or(volatile T * const dest, const T val) {
atomic_fetch_or(dest, val);
}
template< typename T >
void atomic_and(volatile T * const dest, const T val) {
atomic_fetch_and(dest, val);
}
template< typename T >
void atomic_add(volatile T * const dest, const T val) {
atomic_fetch_add(dest, val);
}
template< typename T >
- void atomic_exchange(volatile T * const dest, const T val) {
- atomic_fetch_exchange(dest, val);
+ void atomic_sub(volatile T * const dest, const T val) {
+ atomic_fetch_sub(dest, val);
}
template< typename T >
void atomic_assign(volatile T * const dest, const T val) {
atomic_fetch_exchange(dest, val);
}
template< typename T >
T atomic_increment(volatile T * const dest) {
T oldval = *dest;
T assume;
do {
assume = oldval;
T newval = assume++;
oldval = atomic_compare_exchange(dest, assume, newval);
} while (assume != oldval);
}
template< typename T >
T atomic_decrement(volatile T * const dest) {
T oldval = *dest;
T assume;
do {
assume = oldval;
T newval = assume--;
oldval = atomic_compare_exchange(dest, assume, newval);
} while (assume != oldval);
}
}
#endif
#endif
-#endif
\ No newline at end of file
+
diff --git a/lib/kokkos/core/src/impl/Kokkos_BasicAllocators.cpp b/lib/kokkos/core/src/impl/Kokkos_BasicAllocators.cpp
index 08085dca3..6562ea700 100644
--- a/lib/kokkos/core/src/impl/Kokkos_BasicAllocators.cpp
+++ b/lib/kokkos/core/src/impl/Kokkos_BasicAllocators.cpp
@@ -1,281 +1,287 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <Kokkos_HostSpace.hpp>
+#if ! defined( KOKKOS_USING_EXPERIMENTAL_VIEW )
+
#include <impl/Kokkos_BasicAllocators.hpp>
#include <impl/Kokkos_Error.hpp>
#include <stdint.h> // uintptr_t
#include <cstdlib> // for malloc, realloc, and free
#include <cstring> // for memcpy
+
+#if defined(KOKKOS_POSIX_MEMALIGN_AVAILABLE)
#include <sys/mman.h> // for mmap, munmap, MAP_ANON, etc
#include <unistd.h> // for sysconf, _SC_PAGE_SIZE, _SC_PHYS_PAGES
+#endif
#include <sstream>
namespace Kokkos { namespace Impl {
/*--------------------------------------------------------------------------*/
void* MallocAllocator::allocate( size_t size )
{
void * ptr = NULL;
if (size) {
ptr = malloc(size);
if (!ptr)
{
std::ostringstream msg ;
msg << name() << ": allocate(" << size << ") FAILED";
throw_runtime_exception( msg.str() );
}
}
return ptr;
}
void MallocAllocator::deallocate( void * ptr, size_t /*size*/ )
{
if (ptr) {
free(ptr);
}
}
void * MallocAllocator::reallocate(void * old_ptr, size_t /*old_size*/, size_t new_size)
{
void * ptr = realloc(old_ptr, new_size);
if (new_size > 0u && ptr == NULL) {
throw_runtime_exception("Error: Malloc Allocator could not reallocate memory");
}
return ptr;
}
/*--------------------------------------------------------------------------*/
namespace {
void * raw_aligned_allocate( size_t size, size_t alignment )
{
void * ptr = NULL;
if ( size ) {
#if defined( __INTEL_COMPILER ) && !defined ( KOKKOS_HAVE_CUDA )
ptr = _mm_malloc( size , alignment );
-#elif ( defined( _POSIX_C_SOURCE ) && _POSIX_C_SOURCE >= 200112L ) || \
- ( defined( _XOPEN_SOURCE ) && _XOPEN_SOURCE >= 600 )
+#elif defined(KOKKOS_POSIX_MEMALIGN_AVAILABLE)
posix_memalign( & ptr, alignment , size );
#else
// Over-allocate to and round up to guarantee proper alignment.
size_t size_padded = size + alignment + sizeof(void *);
void * alloc_ptr = malloc( size_padded );
if (alloc_ptr) {
uintptr_t address = reinterpret_cast<uintptr_t>(alloc_ptr);
// offset enough to record the alloc_ptr
address += sizeof(void *);
uintptr_t rem = address % alignment;
uintptr_t offset = rem ? (alignment - rem) : 0u;
address += offset;
ptr = reinterpret_cast<void *>(address);
// record the alloc'd pointer
address -= sizeof(void *);
*reinterpret_cast<void **>(address) = alloc_ptr;
}
#endif
}
return ptr;
}
void raw_aligned_deallocate( void * ptr, size_t /*size*/ )
{
if ( ptr ) {
#if defined( __INTEL_COMPILER ) && !defined ( KOKKOS_HAVE_CUDA )
_mm_free( ptr );
-#elif ( defined( _POSIX_C_SOURCE ) && _POSIX_C_SOURCE >= 200112L ) || \
- ( defined( _XOPEN_SOURCE ) && _XOPEN_SOURCE >= 600 )
+#elif defined(KOKKOS_POSIX_MEMALIGN_AVAILABLE)
free( ptr );
#else
// get the alloc'd pointer
void * alloc_ptr = *(reinterpret_cast<void **>(ptr) -1);
free( alloc_ptr );
#endif
}
}
}
void* AlignedAllocator::allocate( size_t size )
{
void * ptr = 0 ;
if ( size ) {
ptr = raw_aligned_allocate(size, MEMORY_ALIGNMENT);
if (!ptr)
{
std::ostringstream msg ;
msg << name() << ": allocate(" << size << ") FAILED";
throw_runtime_exception( msg.str() );
}
}
return ptr;
}
void AlignedAllocator::deallocate( void * ptr, size_t size )
{
raw_aligned_deallocate( ptr, size);
}
void * AlignedAllocator::reallocate(void * old_ptr, size_t old_size, size_t new_size)
{
void * ptr = old_ptr;;
if (old_size < new_size) {
ptr = allocate( new_size );
memcpy(ptr, old_ptr, old_size );
deallocate( old_ptr, old_size );
}
return ptr;
}
/*--------------------------------------------------------------------------*/
// mmap flags for private anonymous memory allocation
#if defined( MAP_ANONYMOUS ) && defined( MAP_PRIVATE )
#define MMAP_FLAGS (MAP_PRIVATE | MAP_ANONYMOUS)
#elif defined( MAP_ANON) && defined( MAP_PRIVATE )
#define MMAP_FLAGS (MAP_PRIVATE | MAP_ANON)
#else
#define NO_MMAP
#endif
// huge page tables
#if !defined( NO_MMAP )
#if defined( MAP_HUGETLB )
#define MMAP_FLAGS_HUGE (MMAP_FLAGS | MAP_HUGETLB )
#elif defined( MMAP_FLAGS )
#define MMAP_FLAGS_HUGE MMAP_FLAGS
#endif
// threshold to use huge pages
#define MMAP_USE_HUGE_PAGES (1u << 27)
#endif
// read write access to private memory
#if !defined( NO_MMAP )
#define MMAP_PROTECTION (PROT_READ | PROT_WRITE)
#endif
void* PageAlignedAllocator::allocate( size_t size )
{
void *ptr = NULL;
if (size) {
#if !defined NO_MMAP
if ( size < MMAP_USE_HUGE_PAGES ) {
ptr = mmap( NULL, size, MMAP_PROTECTION, MMAP_FLAGS, -1 /*file descriptor*/, 0 /*offset*/);
} else {
ptr = mmap( NULL, size, MMAP_PROTECTION, MMAP_FLAGS_HUGE, -1 /*file descriptor*/, 0 /*offset*/);
}
if (ptr == MAP_FAILED) {
ptr = NULL;
}
#else
static const size_t page_size = 4096; // TODO: read in from sysconf( _SC_PAGE_SIZE )
ptr = raw_aligned_allocate( size, page_size);
#endif
if (!ptr)
{
std::ostringstream msg ;
msg << name() << ": allocate(" << size << ") FAILED";
throw_runtime_exception( msg.str() );
}
}
return ptr;
}
void PageAlignedAllocator::deallocate( void * ptr, size_t size )
{
#if !defined( NO_MMAP )
munmap(ptr, size);
#else
raw_aligned_deallocate(ptr, size);
#endif
}
void * PageAlignedAllocator::reallocate(void * old_ptr, size_t old_size, size_t new_size)
{
void * ptr = NULL;
#if defined( NO_MMAP ) || defined( __APPLE__ ) || defined( __CYGWIN__ )
if (old_size != new_size) {
ptr = allocate( new_size );
memcpy(ptr, old_ptr, (old_size < new_size ? old_size : new_size) );
deallocate( old_ptr, old_size );
}
else {
ptr = old_ptr;
}
#else
ptr = mremap( old_ptr, old_size, new_size, MREMAP_MAYMOVE );
if (ptr == MAP_FAILED) {
throw_runtime_exception("Error: Page Aligned Allocator could not reallocate memory");
}
#endif
return ptr;
}
}} // namespace Kokkos::Impl
+
+#endif /* #if ! defined( KOKKOS_USING_EXPERIMENTAL_VIEW ) */
+
diff --git a/lib/kokkos/core/src/impl/Kokkos_BasicAllocators.hpp b/lib/kokkos/core/src/impl/Kokkos_BasicAllocators.hpp
index 76377c5f1..43a150fb4 100644
--- a/lib/kokkos/core/src/impl/Kokkos_BasicAllocators.hpp
+++ b/lib/kokkos/core/src/impl/Kokkos_BasicAllocators.hpp
@@ -1,118 +1,121 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_BASIC_ALLOCATORS_HPP
#define KOKKOS_BASIC_ALLOCATORS_HPP
+#if ! defined( KOKKOS_USING_EXPERIMENTAL_VIEW )
namespace Kokkos { namespace Impl {
/// class UnmanagedAllocator
/// does nothing when deallocate(ptr,size) is called
class UnmanagedAllocator
{
public:
static const char * name() { return "Unmanaged Allocator"; }
static void deallocate(void * /*ptr*/, size_t /*size*/) {}
};
/// class MallocAllocator
class MallocAllocator
{
public:
static const char * name()
{
return "Malloc Allocator";
}
static void* allocate(size_t size);
static void deallocate(void * ptr, size_t size);
static void * reallocate(void * old_ptr, size_t old_size, size_t new_size);
};
/// class AlignedAllocator
/// memory aligned to Kokkos::Impl::MEMORY_ALIGNMENT
class AlignedAllocator
{
public:
static const char * name()
{
return "Aligned Allocator";
}
static void* allocate(size_t size);
static void deallocate(void * ptr, size_t size);
static void * reallocate(void * old_ptr, size_t old_size, size_t new_size);
};
/// class PageAlignedAllocator
/// memory aligned to PAGE_SIZE
class PageAlignedAllocator
{
public:
static const char * name()
{
return "Page Aligned Allocator";
}
static void* allocate(size_t size);
static void deallocate(void * ptr, size_t size);
static void * reallocate(void * old_ptr, size_t old_size, size_t new_size);
};
}} // namespace Kokkos::Impl
+#endif /* #if ! defined( KOKKOS_USING_EXPERIMENTAL_VIEW ) */
+
#endif //KOKKOS_BASIC_ALLOCATORS_HPP
diff --git a/lib/kokkos/core/src/impl/Kokkos_Core.cpp b/lib/kokkos/core/src/impl/Kokkos_Core.cpp
index 1c3c83cfe..bb0ce3f83 100644
--- a/lib/kokkos/core/src/impl/Kokkos_Core.cpp
+++ b/lib/kokkos/core/src/impl/Kokkos_Core.cpp
@@ -1,447 +1,454 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <Kokkos_Core.hpp>
#include <impl/Kokkos_Error.hpp>
#include <cctype>
#include <cstring>
#include <iostream>
#include <cstdlib>
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
namespace {
bool is_unsigned_int(const char* str)
{
const size_t len = strlen (str);
for (size_t i = 0; i < len; ++i) {
if (! isdigit (str[i])) {
return false;
}
}
return true;
}
void initialize_internal(const InitArguments& args)
{
+// This is an experimental setting
+// For KNL in Flat mode this variable should be set, so that
+// memkind allocates high bandwidth memory correctly.
+#ifdef KOKKOS_HAVE_HBWSPACE
+setenv("MEMKIND_HBW_NODES", "1", 0);
+#endif
+
// Protect declarations, to prevent "unused variable" warnings.
#if defined( KOKKOS_HAVE_OPENMP ) || defined( KOKKOS_HAVE_PTHREAD )
const int num_threads = args.num_threads;
const int use_numa = args.num_numa;
#endif // defined( KOKKOS_HAVE_OPENMP ) || defined( KOKKOS_HAVE_PTHREAD )
#if defined( KOKKOS_HAVE_CUDA )
const int use_gpu = args.device_id;
#endif // defined( KOKKOS_HAVE_CUDA )
#if defined( KOKKOS_HAVE_OPENMP )
if( Impl::is_same< Kokkos::OpenMP , Kokkos::DefaultExecutionSpace >::value ||
Impl::is_same< Kokkos::OpenMP , Kokkos::HostSpace::execution_space >::value ) {
if(num_threads>0) {
if(use_numa>0) {
Kokkos::OpenMP::initialize(num_threads,use_numa);
}
else {
Kokkos::OpenMP::initialize(num_threads);
}
} else {
Kokkos::OpenMP::initialize();
}
//std::cout << "Kokkos::initialize() fyi: OpenMP enabled and initialized" << std::endl ;
}
else {
//std::cout << "Kokkos::initialize() fyi: OpenMP enabled but not initialized" << std::endl ;
}
#endif
#if defined( KOKKOS_HAVE_PTHREAD )
if( Impl::is_same< Kokkos::Threads , Kokkos::DefaultExecutionSpace >::value ||
Impl::is_same< Kokkos::Threads , Kokkos::HostSpace::execution_space >::value ) {
if(num_threads>0) {
if(use_numa>0) {
Kokkos::Threads::initialize(num_threads,use_numa);
}
else {
Kokkos::Threads::initialize(num_threads);
}
} else {
Kokkos::Threads::initialize();
}
//std::cout << "Kokkos::initialize() fyi: Pthread enabled and initialized" << std::endl ;
}
else {
//std::cout << "Kokkos::initialize() fyi: Pthread enabled but not initialized" << std::endl ;
}
#endif
#if defined( KOKKOS_HAVE_SERIAL )
// Prevent "unused variable" warning for 'args' input struct. If
// Serial::initialize() ever needs to take arguments from the input
// struct, you may remove this line of code.
(void) args;
if( Impl::is_same< Kokkos::Serial , Kokkos::DefaultExecutionSpace >::value ||
Impl::is_same< Kokkos::Serial , Kokkos::HostSpace::execution_space >::value ) {
Kokkos::Serial::initialize();
}
#endif
#if defined( KOKKOS_HAVE_CUDA )
if( Impl::is_same< Kokkos::Cuda , Kokkos::DefaultExecutionSpace >::value || 0 < use_gpu ) {
if (use_gpu > -1) {
Kokkos::Cuda::initialize( Kokkos::Cuda::SelectDevice( use_gpu ) );
}
else {
Kokkos::Cuda::initialize();
}
//std::cout << "Kokkos::initialize() fyi: Cuda enabled and initialized" << std::endl ;
}
#endif
#ifdef KOKKOSP_ENABLE_PROFILING
Kokkos::Experimental::initialize();
#endif
}
void finalize_internal( const bool all_spaces = false )
{
#if defined( KOKKOS_HAVE_CUDA )
if( Impl::is_same< Kokkos::Cuda , Kokkos::DefaultExecutionSpace >::value || all_spaces ) {
if(Kokkos::Cuda::is_initialized())
Kokkos::Cuda::finalize();
}
#endif
#if defined( KOKKOS_HAVE_OPENMP )
if( Impl::is_same< Kokkos::OpenMP , Kokkos::DefaultExecutionSpace >::value ||
Impl::is_same< Kokkos::OpenMP , Kokkos::HostSpace::execution_space >::value ||
all_spaces ) {
if(Kokkos::OpenMP::is_initialized())
Kokkos::OpenMP::finalize();
}
#endif
#if defined( KOKKOS_HAVE_PTHREAD )
if( Impl::is_same< Kokkos::Threads , Kokkos::DefaultExecutionSpace >::value ||
Impl::is_same< Kokkos::Threads , Kokkos::HostSpace::execution_space >::value ||
all_spaces ) {
if(Kokkos::Threads::is_initialized())
Kokkos::Threads::finalize();
}
#endif
#if defined( KOKKOS_HAVE_SERIAL )
if( Impl::is_same< Kokkos::Serial , Kokkos::DefaultExecutionSpace >::value ||
Impl::is_same< Kokkos::Serial , Kokkos::HostSpace::execution_space >::value ||
all_spaces ) {
if(Kokkos::Serial::is_initialized())
Kokkos::Serial::finalize();
}
#endif
#ifdef KOKKOSP_ENABLE_PROFILING
Kokkos::Experimental::finalize();
#endif
}
void fence_internal()
{
#if defined( KOKKOS_HAVE_CUDA )
if( Impl::is_same< Kokkos::Cuda , Kokkos::DefaultExecutionSpace >::value ) {
Kokkos::Cuda::fence();
}
#endif
#if defined( KOKKOS_HAVE_OPENMP )
if( Impl::is_same< Kokkos::OpenMP , Kokkos::DefaultExecutionSpace >::value ||
Impl::is_same< Kokkos::OpenMP , Kokkos::HostSpace::execution_space >::value ) {
Kokkos::OpenMP::fence();
}
#endif
#if defined( KOKKOS_HAVE_PTHREAD )
if( Impl::is_same< Kokkos::Threads , Kokkos::DefaultExecutionSpace >::value ||
Impl::is_same< Kokkos::Threads , Kokkos::HostSpace::execution_space >::value ) {
Kokkos::Threads::fence();
}
#endif
#if defined( KOKKOS_HAVE_SERIAL )
if( Impl::is_same< Kokkos::Serial , Kokkos::DefaultExecutionSpace >::value ||
Impl::is_same< Kokkos::Serial , Kokkos::HostSpace::execution_space >::value ) {
Kokkos::Serial::fence();
}
#endif
}
} // namespace
} // namespace Impl
} // namespace Kokkos
//----------------------------------------------------------------------------
namespace Kokkos {
void initialize(int& narg, char* arg[])
{
int num_threads = -1;
int numa = -1;
int device = -1;
int kokkos_threads_found = 0;
int kokkos_numa_found = 0;
int kokkos_device_found = 0;
int kokkos_ndevices_found = 0;
int iarg = 0;
while (iarg < narg) {
if ((strncmp(arg[iarg],"--kokkos-threads",16) == 0) || (strncmp(arg[iarg],"--threads",9) == 0)) {
//Find the number of threads (expecting --threads=XX)
if (!((strncmp(arg[iarg],"--kokkos-threads=",17) == 0) || (strncmp(arg[iarg],"--threads=",10) == 0)))
Impl::throw_runtime_exception("Error: expecting an '=INT' after command line argument '--threads/--kokkos-threads'. Raised by Kokkos::initialize(int narg, char* argc[]).");
char* number = strchr(arg[iarg],'=')+1;
if(!Impl::is_unsigned_int(number) || (strlen(number)==0))
Impl::throw_runtime_exception("Error: expecting an '=INT' after command line argument '--threads/--kokkos-threads'. Raised by Kokkos::initialize(int narg, char* argc[]).");
if((strncmp(arg[iarg],"--kokkos-threads",16) == 0) || !kokkos_threads_found)
num_threads = atoi(number);
//Remove the --kokkos-threads argument from the list but leave --threads
if(strncmp(arg[iarg],"--kokkos-threads",16) == 0) {
for(int k=iarg;k<narg-1;k++) {
arg[k] = arg[k+1];
}
kokkos_threads_found=1;
narg--;
} else {
iarg++;
}
} else if ((strncmp(arg[iarg],"--kokkos-numa",13) == 0) || (strncmp(arg[iarg],"--numa",6) == 0)) {
//Find the number of numa (expecting --numa=XX)
if (!((strncmp(arg[iarg],"--kokkos-numa=",14) == 0) || (strncmp(arg[iarg],"--numa=",7) == 0)))
Impl::throw_runtime_exception("Error: expecting an '=INT' after command line argument '--numa/--kokkos-numa'. Raised by Kokkos::initialize(int narg, char* argc[]).");
char* number = strchr(arg[iarg],'=')+1;
if(!Impl::is_unsigned_int(number) || (strlen(number)==0))
Impl::throw_runtime_exception("Error: expecting an '=INT' after command line argument '--numa/--kokkos-numa'. Raised by Kokkos::initialize(int narg, char* argc[]).");
if((strncmp(arg[iarg],"--kokkos-numa",13) == 0) || !kokkos_numa_found)
numa = atoi(number);
//Remove the --kokkos-numa argument from the list but leave --numa
if(strncmp(arg[iarg],"--kokkos-numa",13) == 0) {
for(int k=iarg;k<narg-1;k++) {
arg[k] = arg[k+1];
}
kokkos_numa_found=1;
narg--;
} else {
iarg++;
}
} else if ((strncmp(arg[iarg],"--kokkos-device",15) == 0) || (strncmp(arg[iarg],"--device",8) == 0)) {
//Find the number of device (expecting --device=XX)
if (!((strncmp(arg[iarg],"--kokkos-device=",16) == 0) || (strncmp(arg[iarg],"--device=",9) == 0)))
Impl::throw_runtime_exception("Error: expecting an '=INT' after command line argument '--device/--kokkos-device'. Raised by Kokkos::initialize(int narg, char* argc[]).");
char* number = strchr(arg[iarg],'=')+1;
if(!Impl::is_unsigned_int(number) || (strlen(number)==0))
Impl::throw_runtime_exception("Error: expecting an '=INT' after command line argument '--device/--kokkos-device'. Raised by Kokkos::initialize(int narg, char* argc[]).");
if((strncmp(arg[iarg],"--kokkos-device",15) == 0) || !kokkos_device_found)
device = atoi(number);
//Remove the --kokkos-device argument from the list but leave --device
if(strncmp(arg[iarg],"--kokkos-device",15) == 0) {
for(int k=iarg;k<narg-1;k++) {
arg[k] = arg[k+1];
}
kokkos_device_found=1;
narg--;
} else {
iarg++;
}
} else if ((strncmp(arg[iarg],"--kokkos-ndevices",17) == 0) || (strncmp(arg[iarg],"--ndevices",10) == 0)) {
//Find the number of device (expecting --device=XX)
if (!((strncmp(arg[iarg],"--kokkos-ndevices=",18) == 0) || (strncmp(arg[iarg],"--ndevices=",11) == 0)))
Impl::throw_runtime_exception("Error: expecting an '=INT[,INT]' after command line argument '--ndevices/--kokkos-ndevices'. Raised by Kokkos::initialize(int narg, char* argc[]).");
int ndevices=-1;
int skip_device = 9999;
char* num1 = strchr(arg[iarg],'=')+1;
char* num2 = strpbrk(num1,",");
int num1_len = num2==NULL?strlen(num1):num2-num1;
char* num1_only = new char[num1_len+1];
strncpy(num1_only,num1,num1_len);
num1_only[num1_len]=0;
if(!Impl::is_unsigned_int(num1_only) || (strlen(num1_only)==0)) {
Impl::throw_runtime_exception("Error: expecting an integer number after command line argument '--kokkos-ndevices'. Raised by Kokkos::initialize(int narg, char* argc[]).");
}
if((strncmp(arg[iarg],"--kokkos-ndevices",17) == 0) || !kokkos_ndevices_found)
ndevices = atoi(num1_only);
if( num2 != NULL ) {
if(( !Impl::is_unsigned_int(num2+1) ) || (strlen(num2)==1) )
Impl::throw_runtime_exception("Error: expecting an integer number after command line argument '--kokkos-ndevices=XX,'. Raised by Kokkos::initialize(int narg, char* argc[]).");
if((strncmp(arg[iarg],"--kokkos-ndevices",17) == 0) || !kokkos_ndevices_found)
skip_device = atoi(num2+1);
}
if((strncmp(arg[iarg],"--kokkos-ndevices",17) == 0) || !kokkos_ndevices_found) {
char *str;
if ((str = getenv("SLURM_LOCALID"))) {
int local_rank = atoi(str);
device = local_rank % ndevices;
if (device >= skip_device) device++;
}
if ((str = getenv("MV2_COMM_WORLD_LOCAL_RANK"))) {
int local_rank = atoi(str);
device = local_rank % ndevices;
if (device >= skip_device) device++;
}
if ((str = getenv("OMPI_COMM_WORLD_LOCAL_RANK"))) {
int local_rank = atoi(str);
device = local_rank % ndevices;
if (device >= skip_device) device++;
}
if(device==-1) {
device = 0;
if (device >= skip_device) device++;
}
}
//Remove the --kokkos-ndevices argument from the list but leave --ndevices
if(strncmp(arg[iarg],"--kokkos-ndevices",17) == 0) {
for(int k=iarg;k<narg-1;k++) {
arg[k] = arg[k+1];
}
kokkos_ndevices_found=1;
narg--;
} else {
iarg++;
}
} else if ((strcmp(arg[iarg],"--kokkos-help") == 0) || (strcmp(arg[iarg],"--help") == 0)) {
std::cout << std::endl;
std::cout << "--------------------------------------------------------------------------------" << std::endl;
std::cout << "-------------Kokkos command line arguments--------------------------------------" << std::endl;
std::cout << "--------------------------------------------------------------------------------" << std::endl;
std::cout << "The following arguments exist also without prefix 'kokkos' (e.g. --help)." << std::endl;
std::cout << "The prefixed arguments will be removed from the list by Kokkos::initialize()," << std::endl;
std::cout << "the non-prefixed ones are not removed. Prefixed versions take precedence over " << std::endl;
std::cout << "non prefixed ones, and the last occurence of an argument overwrites prior" << std::endl;
std::cout << "settings." << std::endl;
std::cout << std::endl;
std::cout << "--kokkos-help : print this message" << std::endl;
std::cout << "--kokkos-threads=INT : specify total number of threads or" << std::endl;
std::cout << " number of threads per NUMA region if " << std::endl;
std::cout << " used in conjunction with '--numa' option. " << std::endl;
std::cout << "--kokkos-numa=INT : specify number of NUMA regions used by process." << std::endl;
std::cout << "--kokkos-device=INT : specify device id to be used by Kokkos. " << std::endl;
std::cout << "--kokkos-ndevices=INT[,INT] : used when running MPI jobs. Specify number of" << std::endl;
std::cout << " devices per node to be used. Process to device" << std::endl;
std::cout << " mapping happens by obtaining the local MPI rank" << std::endl;
std::cout << " and assigning devices round-robin. The optional" << std::endl;
std::cout << " second argument allows for an existing device" << std::endl;
std::cout << " to be ignored. This is most useful on workstations" << std::endl;
std::cout << " with multiple GPUs of which one is used to drive" << std::endl;
std::cout << " screen output." << std::endl;
std::cout << std::endl;
std::cout << "--------------------------------------------------------------------------------" << std::endl;
std::cout << std::endl;
//Remove the --kokkos-help argument from the list but leave --ndevices
if(strcmp(arg[iarg],"--kokkos-help") == 0) {
for(int k=iarg;k<narg-1;k++) {
arg[k] = arg[k+1];
}
narg--;
} else {
iarg++;
}
} else
iarg++;
}
InitArguments arguments;
arguments.num_threads = num_threads;
arguments.num_numa = numa;
arguments.device_id = device;
Impl::initialize_internal(arguments);
}
void initialize(const InitArguments& arguments) {
Impl::initialize_internal(arguments);
}
void finalize()
{
Impl::finalize_internal();
}
void finalize_all()
{
enum { all_spaces = true };
Impl::finalize_internal( all_spaces );
}
void fence()
{
Impl::fence_internal();
}
} // namespace Kokkos
diff --git a/lib/kokkos/core/src/impl/Kokkos_Error.cpp b/lib/kokkos/core/src/impl/Kokkos_Error.cpp
index 97cfbfae7..36224990d 100644
--- a/lib/kokkos/core/src/impl/Kokkos_Error.cpp
+++ b/lib/kokkos/core/src/impl/Kokkos_Error.cpp
@@ -1,193 +1,193 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <ostream>
#include <sstream>
#include <iomanip>
#include <stdexcept>
#include <impl/Kokkos_Error.hpp>
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
void host_abort( const char * const message )
{
fwrite(message,1,strlen(message),stderr);
fflush(stderr);
- abort();
+ ::abort();
}
void throw_runtime_exception( const std::string & msg )
{
std::ostringstream o ;
o << msg ;
traceback_callstack( o );
throw std::runtime_error( o.str() );
}
std::string human_memory_size(size_t arg_bytes)
{
double bytes = arg_bytes;
const double K = 1024;
const double M = K*1024;
const double G = M*1024;
std::ostringstream out;
if (bytes < K) {
out << std::setprecision(4) << bytes << " B";
} else if (bytes < M) {
bytes /= K;
out << std::setprecision(4) << bytes << " K";
} else if (bytes < G) {
bytes /= M;
out << std::setprecision(4) << bytes << " M";
} else {
bytes /= G;
out << std::setprecision(4) << bytes << " G";
}
return out.str();
}
}
}
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#if defined( __GNUC__ ) && defined( ENABLE_TRACEBACK )
/* This is only known to work with GNU C++
* Must be compiled with '-rdynamic'
* Must be linked with '-ldl'
*/
/* Print call stack into an error stream,
* so one knows in which function the error occured.
*
* Code copied from:
* http://stupefydeveloper.blogspot.com/2008/10/cc-call-stack.html
*
* License on this site:
* This blog is licensed under a
* Creative Commons Attribution-Share Alike 3.0 Unported License.
*
* http://creativecommons.org/licenses/by-sa/3.0/
*
* Modified to output to std::ostream.
*/
#include <signal.h>
#include <execinfo.h>
#include <cxxabi.h>
#include <dlfcn.h>
#include <stdlib.h>
namespace Kokkos {
namespace Impl {
void traceback_callstack( std::ostream & msg )
{
using namespace abi;
enum { MAX_DEPTH = 32 };
void *trace[MAX_DEPTH];
Dl_info dlinfo;
int status;
int trace_size = backtrace(trace, MAX_DEPTH);
msg << std::endl << "Call stack {" << std::endl ;
for (int i=1; i<trace_size; ++i)
{
if(!dladdr(trace[i], &dlinfo))
continue;
const char * symname = dlinfo.dli_sname;
char * demangled = __cxa_demangle(symname, NULL, 0, &status);
if ( status == 0 && demangled ) {
symname = demangled;
}
if ( symname && *symname != 0 ) {
msg << " object: " << dlinfo.dli_fname
<< " function: " << symname
<< std::endl ;
}
if ( demangled ) {
free(demangled);
}
}
msg << "}" ;
}
}
}
#else
namespace Kokkos {
namespace Impl {
void traceback_callstack( std::ostream & msg )
{
msg << std::endl << "Traceback functionality not available" << std::endl ;
}
}
}
#endif
diff --git a/lib/kokkos/core/src/impl/Kokkos_Error.hpp b/lib/kokkos/core/src/impl/Kokkos_Error.hpp
index 33e203c94..5f88d6620 100644
--- a/lib/kokkos/core/src/impl/Kokkos_Error.hpp
+++ b/lib/kokkos/core/src/impl/Kokkos_Error.hpp
@@ -1,78 +1,82 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_IMPL_ERROR_HPP
#define KOKKOS_IMPL_ERROR_HPP
#include <string>
#include <iosfwd>
+#include <KokkosCore_config.h>
+#ifdef KOKKOS_HAVE_CUDA
+#include <Cuda/Kokkos_Cuda_abort.hpp>
+#endif
namespace Kokkos {
namespace Impl {
void host_abort( const char * const );
void throw_runtime_exception( const std::string & );
void traceback_callstack( std::ostream & );
std::string human_memory_size(size_t arg_bytes);
}
}
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
namespace Kokkos {
inline
void abort( const char * const message ) { Kokkos::Impl::host_abort(message); }
}
#endif /* defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_CUDA ) */
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#endif /* #ifndef KOKKOS_IMPL_ERROR_HPP */
diff --git a/lib/kokkos/core/unit_test/TestViewOfClass.hpp b/lib/kokkos/core/src/impl/Kokkos_HBWAllocators.cpp
similarity index 59%
copy from lib/kokkos/core/unit_test/TestViewOfClass.hpp
copy to lib/kokkos/core/src/impl/Kokkos_HBWAllocators.cpp
index 09abacd80..4eb80d03f 100644
--- a/lib/kokkos/core/unit_test/TestViewOfClass.hpp
+++ b/lib/kokkos/core/src/impl/Kokkos_HBWAllocators.cpp
@@ -1,126 +1,108 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
-#include <gtest/gtest.h>
+#include <Kokkos_HostSpace.hpp>
-#include <Kokkos_Core.hpp>
-#include <stdexcept>
-#include <sstream>
-#include <iostream>
-
-/*--------------------------------------------------------------------------*/
+#include <impl/Kokkos_HBWAllocators.hpp>
+#include <impl/Kokkos_Error.hpp>
-namespace Test {
-namespace {
-volatile int nested_view_count ;
-}
-
-template< class Space >
-class NestedView {
-private:
- Kokkos::View<int*,Space> member ;
+#include <stdint.h> // uintptr_t
+#include <cstdlib> // for malloc, realloc, and free
+#include <cstring> // for memcpy
-public:
-
- KOKKOS_INLINE_FUNCTION
- NestedView()
-#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
- : member("member",2)
- { Kokkos::atomic_increment( & nested_view_count ); }
-#else
- : member(){}
+#if defined(KOKKOS_POSIX_MEMALIGN_AVAILABLE)
+#include <sys/mman.h> // for mmap, munmap, MAP_ANON, etc
+#include <unistd.h> // for sysconf, _SC_PAGE_SIZE, _SC_PHYS_PAGES
#endif
- ~NestedView()
-#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
- { Kokkos::atomic_decrement( & nested_view_count ); }
-#else
- {}
-#endif
+#include <sstream>
+#include <iostream>
-};
+#ifdef KOKKOS_HAVE_HBWSPACE
+#include <memkind.h>
+namespace Kokkos {
+namespace Experimental {
+namespace Impl {
+#define MEMKIND_TYPE MEMKIND_HBW //hbw_get_kind(HBW_PAGESIZE_4KB)
+/*--------------------------------------------------------------------------*/
-template< class Space >
-void view_nested_view()
+void* HBWMallocAllocator::allocate( size_t size )
{
- ASSERT_EQ( 0 , nested_view_count );
- {
- Kokkos::View< NestedView<Space> * , Space > a("a_nested_view",2);
- ASSERT_EQ( 2 , nested_view_count );
- Kokkos::View< NestedView<Space> * , Space > b("b_nested_view",2);
- ASSERT_EQ( 4 , nested_view_count );
+ std::cout<< "Allocate HBW: " << 1.0e-6*size << "MB" << std::endl;
+ void * ptr = NULL;
+ if (size) {
+ ptr = memkind_malloc(MEMKIND_TYPE,size);
+
+ if (!ptr)
+ {
+ std::ostringstream msg ;
+ msg << name() << ": allocate(" << size << ") FAILED";
+ Kokkos::Impl::throw_runtime_exception( msg.str() );
+ }
}
- // ASSERT_EQ( 0 , nested_view_count );
+ return ptr;
}
+void HBWMallocAllocator::deallocate( void * ptr, size_t /*size*/ )
+{
+ if (ptr) {
+ memkind_free(MEMKIND_TYPE,ptr);
+ }
}
-namespace Kokkos {
-namespace Impl {
-
-template< class ExecSpace , class S >
-struct ViewDefaultConstruct< ExecSpace , Test::NestedView<S> , true >
+void * HBWMallocAllocator::reallocate(void * old_ptr, size_t /*old_size*/, size_t new_size)
{
- typedef Test::NestedView<S> type ;
- type * const m_ptr ;
+ void * ptr = memkind_realloc(MEMKIND_TYPE, old_ptr, new_size);
- KOKKOS_FORCEINLINE_FUNCTION
- void operator()( const typename ExecSpace::size_type& i ) const
- { new(m_ptr+i) type(); }
-
- ViewDefaultConstruct( type * pointer , size_t capacity )
- : m_ptr( pointer )
- {
- Kokkos::RangePolicy< ExecSpace > range( 0 , capacity );
- parallel_for( range , *this );
- ExecSpace::fence();
- }
-};
+ if (new_size > 0u && ptr == NULL) {
+ Kokkos::Impl::throw_runtime_exception("Error: Malloc Allocator could not reallocate memory");
+ }
+ return ptr;
+}
} // namespace Impl
+} // namespace Experimental
} // namespace Kokkos
-
-/*--------------------------------------------------------------------------*/
-
+#endif
diff --git a/lib/kokkos/core/src/impl/Kokkos_Error.hpp b/lib/kokkos/core/src/impl/Kokkos_HBWAllocators.hpp
similarity index 69%
copy from lib/kokkos/core/src/impl/Kokkos_Error.hpp
copy to lib/kokkos/core/src/impl/Kokkos_HBWAllocators.hpp
index 33e203c94..be0134460 100644
--- a/lib/kokkos/core/src/impl/Kokkos_Error.hpp
+++ b/lib/kokkos/core/src/impl/Kokkos_HBWAllocators.hpp
@@ -1,78 +1,75 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
-#ifndef KOKKOS_IMPL_ERROR_HPP
-#define KOKKOS_IMPL_ERROR_HPP
+#ifndef KOKKOS_HBW_ALLOCATORS_HPP
+#define KOKKOS_HBW_ALLOCATORS_HPP
-#include <string>
-#include <iosfwd>
+#ifdef KOKKOS_HAVE_HBWSPACE
namespace Kokkos {
+namespace Experimental {
namespace Impl {
-void host_abort( const char * const );
+/// class MallocAllocator
+class HBWMallocAllocator
+{
+public:
+ static const char * name()
+ {
+ return "HBW Malloc Allocator";
+ }
-void throw_runtime_exception( const std::string & );
+ static void* allocate(size_t size);
-void traceback_callstack( std::ostream & );
+ static void deallocate(void * ptr, size_t size);
-std::string human_memory_size(size_t arg_bytes);
+ static void * reallocate(void * old_ptr, size_t old_size, size_t new_size);
+};
}
}
+} // namespace Kokkos::Impl
+#endif //KOKKOS_HAVE_HBWSPACE
+#endif //KOKKOS_HBW_ALLOCATORS_HPP
-//----------------------------------------------------------------------------
-//----------------------------------------------------------------------------
-
-#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
-namespace Kokkos {
-inline
-void abort( const char * const message ) { Kokkos::Impl::host_abort(message); }
-}
-#endif /* defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_CUDA ) */
-
-//----------------------------------------------------------------------------
-//----------------------------------------------------------------------------
-
-#endif /* #ifndef KOKKOS_IMPL_ERROR_HPP */
diff --git a/lib/kokkos/core/src/impl/Kokkos_HostSpace.cpp b/lib/kokkos/core/src/impl/Kokkos_HBWSpace.cpp
similarity index 50%
copy from lib/kokkos/core/src/impl/Kokkos_HostSpace.cpp
copy to lib/kokkos/core/src/impl/Kokkos_HBWSpace.cpp
index 69b8ce86d..68e424e85 100644
--- a/lib/kokkos/core/src/impl/Kokkos_HostSpace.cpp
+++ b/lib/kokkos/core/src/impl/Kokkos_HBWSpace.cpp
@@ -1,441 +1,397 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <Kokkos_Macros.hpp>
-/*--------------------------------------------------------------------------*/
-
-#if defined( __INTEL_COMPILER ) && ! defined ( KOKKOS_HAVE_CUDA )
-
-// Intel specialized allocator does not interoperate with CUDA memory allocation
-
-#define KOKKOS_INTEL_MM_ALLOC_AVAILABLE
-
-#endif
-
-/*--------------------------------------------------------------------------*/
-
-#if ( defined( _POSIX_C_SOURCE ) && _POSIX_C_SOURCE >= 200112L ) || \
- ( defined( _XOPEN_SOURCE ) && _XOPEN_SOURCE >= 600 )
-
-#define KOKKOS_POSIX_MEMALIGN_AVAILABLE
-
-#include <unistd.h>
-#include <sys/mman.h>
-
-/* mmap flags for private anonymous memory allocation */
-
-#if defined( MAP_ANONYMOUS ) && defined( MAP_PRIVATE )
- #define KOKKOS_POSIX_MMAP_FLAGS (MAP_PRIVATE | MAP_ANONYMOUS)
-#elif defined( MAP_ANON ) && defined( MAP_PRIVATE )
- #define KOKKOS_POSIX_MMAP_FLAGS (MAP_PRIVATE | MAP_ANON)
-#endif
-
-// mmap flags for huge page tables
-#if defined( KOKKOS_POSIX_MMAP_FLAGS )
- #if defined( MAP_HUGETLB )
- #define KOKKOS_POSIX_MMAP_FLAGS_HUGE (KOKKOS_POSIX_MMAP_FLAGS | MAP_HUGETLB )
- #else
- #define KOKKOS_POSIX_MMAP_FLAGS_HUGE KOKKOS_POSIX_MMAP_FLAGS
- #endif
-#endif
-
-#endif
-
-/*--------------------------------------------------------------------------*/
#include <stddef.h>
#include <stdlib.h>
#include <stdint.h>
#include <memory.h>
#include <iostream>
#include <sstream>
#include <cstring>
+#include <algorithm>
-#include <Kokkos_HostSpace.hpp>
+#include <Kokkos_HBWSpace.hpp>
#include <impl/Kokkos_BasicAllocators.hpp>
#include <impl/Kokkos_Error.hpp>
#include <Kokkos_Atomic.hpp>
+#ifdef KOKKOS_HAVE_HBWSPACE
+#include <memkind.h>
+#endif
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
+#ifdef KOKKOS_HAVE_HBWSPACE
+#define MEMKIND_TYPE MEMKIND_HBW //hbw_get_kind(HBW_PAGESIZE_4KB)
namespace Kokkos {
+namespace Experimental {
namespace {
static const int QUERY_SPACE_IN_PARALLEL_MAX = 16 ;
typedef int (* QuerySpaceInParallelPtr )();
QuerySpaceInParallelPtr s_in_parallel_query[ QUERY_SPACE_IN_PARALLEL_MAX ] ;
int s_in_parallel_query_count = 0 ;
} // namespace <empty>
-void HostSpace::register_in_parallel( int (*device_in_parallel)() )
+void HBWSpace::register_in_parallel( int (*device_in_parallel)() )
{
if ( 0 == device_in_parallel ) {
- Kokkos::Impl::throw_runtime_exception( std::string("Kokkos::HostSpace::register_in_parallel ERROR : given NULL" ) );
+ Kokkos::Impl::throw_runtime_exception( std::string("Kokkos::Experimental::HBWSpace::register_in_parallel ERROR : given NULL" ) );
}
int i = -1 ;
if ( ! (device_in_parallel)() ) {
for ( i = 0 ; i < s_in_parallel_query_count && ! (*(s_in_parallel_query[i]))() ; ++i );
}
if ( i < s_in_parallel_query_count ) {
- Kokkos::Impl::throw_runtime_exception( std::string("Kokkos::HostSpace::register_in_parallel_query ERROR : called in_parallel" ) );
+ Kokkos::Impl::throw_runtime_exception( std::string("Kokkos::Experimental::HBWSpace::register_in_parallel_query ERROR : called in_parallel" ) );
}
if ( QUERY_SPACE_IN_PARALLEL_MAX <= i ) {
- Kokkos::Impl::throw_runtime_exception( std::string("Kokkos::HostSpace::register_in_parallel_query ERROR : exceeded maximum" ) );
+ Kokkos::Impl::throw_runtime_exception( std::string("Kokkos::Experimental::HBWSpace::register_in_parallel_query ERROR : exceeded maximum" ) );
}
for ( i = 0 ; i < s_in_parallel_query_count && s_in_parallel_query[i] != device_in_parallel ; ++i );
if ( i == s_in_parallel_query_count ) {
s_in_parallel_query[s_in_parallel_query_count++] = device_in_parallel ;
}
}
-int HostSpace::in_parallel()
+int HBWSpace::in_parallel()
{
const int n = s_in_parallel_query_count ;
int i = 0 ;
while ( i < n && ! (*(s_in_parallel_query[i]))() ) { ++i ; }
return i < n ;
}
+} // namespace Experiemtal
} // namespace Kokkos
/*--------------------------------------------------------------------------*/
+#if ! defined( KOKKOS_USING_EXPERIMENTAL_VIEW )
+
namespace Kokkos {
+namespace Experimental {
-Impl::AllocationTracker HostSpace::allocate_and_track( const std::string & label, const size_t size )
+Kokkos::Impl::AllocationTracker HBWSpace::allocate_and_track( const std::string & label, const size_t size )
{
- return Impl::AllocationTracker( allocator(), size, label );
+ return Kokkos::Impl::AllocationTracker( allocator(), size, label );
}
+} // namespace Experimental
} // namespace Kokkos
+#endif /* #if ! defined( KOKKOS_USING_EXPERIMENTAL_VIEW ) */
+
/*--------------------------------------------------------------------------*/
namespace Kokkos {
+namespace Experimental {
/* Default allocation mechanism */
-HostSpace::HostSpace()
+HBWSpace::HBWSpace()
: m_alloc_mech(
-#if defined( KOKKOS_INTEL_MM_ALLOC_AVAILABLE )
- HostSpace::INTEL_MM_ALLOC
-#elif defined( KOKKOS_POSIX_MMAP_FLAGS )
- HostSpace::POSIX_MMAP
-#elif defined( KOKKOS_POSIX_MEMALIGN_AVAILABLE )
- HostSpace::POSIX_MEMALIGN
-#else
- HostSpace::STD_MALLOC
-#endif
+ HBWSpace::STD_MALLOC
)
-{}
+{
+printf("Init\n");
+setenv("MEMKIND_HBW_NODES", "1", 0);
+}
/* Default allocation mechanism */
-HostSpace::HostSpace( const HostSpace::AllocationMechanism & arg_alloc_mech )
- : m_alloc_mech( HostSpace::STD_MALLOC )
+HBWSpace::HBWSpace( const HBWSpace::AllocationMechanism & arg_alloc_mech )
+ : m_alloc_mech( HBWSpace::STD_MALLOC )
{
+printf("Init2\n");
+setenv("MEMKIND_HBW_NODES", "1", 0);
if ( arg_alloc_mech == STD_MALLOC ) {
- m_alloc_mech = HostSpace::STD_MALLOC ;
- }
-#if defined( KOKKOS_INTEL_MM_ALLOC_AVAILABLE )
- else if ( arg_alloc_mech == HostSpace::INTEL_MM_ALLOC ) {
- m_alloc_mech = HostSpace::INTEL_MM_ALLOC ;
- }
-#elif defined( KOKKOS_POSIX_MEMALIGN_AVAILABLE )
- else if ( arg_alloc_mech == HostSpace::POSIX_MEMALIGN ) {
- m_alloc_mech = HostSpace::POSIX_MEMALIGN ;
- }
-#elif defined( KOKKOS_POSIX_MMAP_FLAGS )
- else if ( arg_alloc_mech == HostSpace::POSIX_MMAP ) {
- m_alloc_mech = HostSpace::POSIX_MMAP ;
- }
-#endif
- else {
- const char * const mech =
- ( arg_alloc_mech == HostSpace::INTEL_MM_ALLOC ) ? "INTEL_MM_ALLOC" : (
- ( arg_alloc_mech == HostSpace::POSIX_MEMALIGN ) ? "POSIX_MEMALIGN" : (
- ( arg_alloc_mech == HostSpace::POSIX_MMAP ) ? "POSIX_MMAP" : "" ));
-
- std::string msg ;
- msg.append("Kokkos::HostSpace ");
- msg.append(mech);
- msg.append(" is not available" );
- Kokkos::Impl::throw_runtime_exception( msg );
+ m_alloc_mech = HBWSpace::STD_MALLOC ;
}
}
-void * HostSpace::allocate( const size_t arg_alloc_size ) const
+void * HBWSpace::allocate( const size_t arg_alloc_size ) const
{
static_assert( sizeof(void*) == sizeof(uintptr_t)
, "Error sizeof(void*) != sizeof(uintptr_t)" );
static_assert( Kokkos::Impl::power_of_two< Kokkos::Impl::MEMORY_ALIGNMENT >::value
, "Memory alignment must be power of two" );
- constexpr size_t alignment = Kokkos::Impl::MEMORY_ALIGNMENT ;
- constexpr size_t alignment_mask = alignment - 1 ;
+ constexpr uintptr_t alignment = Kokkos::Impl::MEMORY_ALIGNMENT ;
+ constexpr uintptr_t alignment_mask = alignment - 1 ;
- void * ptr = NULL;
+ void * ptr = 0 ;
if ( arg_alloc_size ) {
if ( m_alloc_mech == STD_MALLOC ) {
// Over-allocate to and round up to guarantee proper alignment.
size_t size_padded = arg_alloc_size + sizeof(void*) + alignment ;
- void * alloc_ptr = malloc( size_padded );
+ void * alloc_ptr = memkind_malloc(MEMKIND_TYPE, size_padded );
if (alloc_ptr) {
uintptr_t address = reinterpret_cast<uintptr_t>(alloc_ptr);
// offset enough to record the alloc_ptr
address += sizeof(void *);
uintptr_t rem = address % alignment;
uintptr_t offset = rem ? (alignment - rem) : 0u;
address += offset;
ptr = reinterpret_cast<void *>(address);
// record the alloc'd pointer
address -= sizeof(void *);
*reinterpret_cast<void **>(address) = alloc_ptr;
}
}
+ }
-#if defined( KOKKOS_INTEL_MM_ALLOC_AVAILABLE )
- else if ( m_alloc_mech == INTEL_MM_ALLOC ) {
- ptr = _mm_malloc( arg_alloc_size , alignment );
+ if ( ( ptr == 0 ) || ( reinterpret_cast<uintptr_t>(ptr) == ~uintptr_t(0) )
+ || ( reinterpret_cast<uintptr_t>(ptr) & alignment_mask ) ) {
+ std::ostringstream msg ;
+ msg << "Kokkos::Experimental::HBWSpace::allocate[ " ;
+ switch( m_alloc_mech ) {
+ case STD_MALLOC: msg << "STD_MALLOC" ; break ;
}
-#endif
+ msg << " ]( " << arg_alloc_size << " ) FAILED" ;
+ if ( ptr == NULL ) { msg << " NULL" ; }
+ else { msg << " NOT ALIGNED " << ptr ; }
-#if defined( KOKKOS_POSIX_MEMALIGN_AVAILABLE )
- else if ( m_alloc_mech == POSIX_MEMALIGN ) {
- posix_memalign( & ptr, alignment , arg_alloc_size );
- }
-#endif
+ std::cerr << msg.str() << std::endl ;
+ std::cerr.flush();
-#if defined( KOKKOS_POSIX_MMAP_FLAGS )
- else if ( m_alloc_mech == POSIX_MMAP ) {
- constexpr size_t use_huge_pages = (1u << 27);
- constexpr int prot = PROT_READ | PROT_WRITE ;
- const int flags = arg_alloc_size < use_huge_pages
- ? KOKKOS_POSIX_MMAP_FLAGS
- : KOKKOS_POSIX_MMAP_FLAGS_HUGE ;
-
- // read write access to private memory
-
- ptr = mmap( NULL /* address hint, if NULL OS kernel chooses address */
- , arg_alloc_size /* size in bytes */
- , prot /* memory protection */
- , flags /* visibility of updates */
- , -1 /* file descriptor */
- , 0 /* offset */
- );
-
-/* Associated reallocation:
- ptr = mremap( old_ptr , old_size , new_size , MREMAP_MAYMOVE );
-*/
- }
-#endif
- }
-
- if ( reinterpret_cast<uintptr_t>(ptr) & alignment_mask ) {
- Kokkos::Impl::throw_runtime_exception( "Kokkos::HostSpace aligned allocation failed" );
+ Kokkos::Impl::throw_runtime_exception( msg.str() );
}
return ptr;
}
-void HostSpace::deallocate( void * const arg_alloc_ptr , const size_t arg_alloc_size ) const
+void HBWSpace::deallocate( void * const arg_alloc_ptr , const size_t arg_alloc_size ) const
{
if ( arg_alloc_ptr ) {
if ( m_alloc_mech == STD_MALLOC ) {
void * alloc_ptr = *(reinterpret_cast<void **>(arg_alloc_ptr) -1);
- free( alloc_ptr );
+ memkind_free(MEMKIND_TYPE, alloc_ptr );
}
-#if defined( KOKKOS_INTEL_MM_ALLOC_AVAILABLE )
- else if ( m_alloc_mech == INTEL_MM_ALLOC ) {
- _mm_free( arg_alloc_ptr );
- }
-#endif
-
-#if defined( KOKKOS_POSIX_MEMALIGN_AVAILABLE )
- else if ( m_alloc_mech == POSIX_MEMALIGN ) {
- free( arg_alloc_ptr );
- }
-#endif
-
-#if defined( KOKKOS_POSIX_MMAP_FLAGS )
- else if ( m_alloc_mech == POSIX_MMAP ) {
- munmap( arg_alloc_ptr , arg_alloc_size );
- }
-#endif
-
}
}
+} // namespace Experimental
} // namespace Kokkos
+//----------------------------------------------------------------------------
+//----------------------------------------------------------------------------
+
namespace Kokkos {
namespace Experimental {
namespace Impl {
SharedAllocationRecord< void , void >
-SharedAllocationRecord< Kokkos::HostSpace , void >::s_root_record ;
+SharedAllocationRecord< Kokkos::Experimental::HBWSpace , void >::s_root_record ;
void
-SharedAllocationRecord< Kokkos::HostSpace , void >::
+SharedAllocationRecord< Kokkos::Experimental::HBWSpace , void >::
deallocate( SharedAllocationRecord< void , void > * arg_rec )
{
delete static_cast<SharedAllocationRecord*>(arg_rec);
}
-SharedAllocationRecord< Kokkos::HostSpace , void >::
+SharedAllocationRecord< Kokkos::Experimental::HBWSpace , void >::
~SharedAllocationRecord()
{
m_space.deallocate( SharedAllocationRecord< void , void >::m_alloc_ptr
, SharedAllocationRecord< void , void >::m_alloc_size
);
}
-SharedAllocationRecord< Kokkos::HostSpace , void >::
-SharedAllocationRecord( const Kokkos::HostSpace & arg_space
+SharedAllocationRecord< Kokkos::Experimental::HBWSpace , void >::
+SharedAllocationRecord( const Kokkos::Experimental::HBWSpace & arg_space
, const std::string & arg_label
, const size_t arg_alloc_size
, const SharedAllocationRecord< void , void >::function_type arg_dealloc
)
// Pass through allocated [ SharedAllocationHeader , user_memory ]
// Pass through deallocation function
: SharedAllocationRecord< void , void >
- ( & SharedAllocationRecord< Kokkos::HostSpace , void >::s_root_record
+ ( & SharedAllocationRecord< Kokkos::Experimental::HBWSpace , void >::s_root_record
, reinterpret_cast<SharedAllocationHeader*>( arg_space.allocate( sizeof(SharedAllocationHeader) + arg_alloc_size ) )
, sizeof(SharedAllocationHeader) + arg_alloc_size
, arg_dealloc
)
, m_space( arg_space )
{
// Fill in the Header information
RecordBase::m_alloc_ptr->m_record = static_cast< SharedAllocationRecord< void , void > * >( this );
strncpy( RecordBase::m_alloc_ptr->m_label
, arg_label.c_str()
, SharedAllocationHeader::maximum_label_length
);
}
-SharedAllocationRecord< Kokkos::HostSpace , void > *
-SharedAllocationRecord< Kokkos::HostSpace , void >::get_record( void * alloc_ptr )
+//----------------------------------------------------------------------------
+
+void * SharedAllocationRecord< Kokkos::Experimental::HBWSpace , void >::
+allocate_tracked( const Kokkos::Experimental::HBWSpace & arg_space
+ , const std::string & arg_alloc_label
+ , const size_t arg_alloc_size )
+{
+ if ( ! arg_alloc_size ) return (void *) 0 ;
+
+ SharedAllocationRecord * const r =
+ allocate( arg_space , arg_alloc_label , arg_alloc_size );
+
+ RecordBase::increment( r );
+
+ return r->data();
+}
+
+void SharedAllocationRecord< Kokkos::Experimental::HBWSpace , void >::
+deallocate_tracked( void * const arg_alloc_ptr )
+{
+ if ( arg_alloc_ptr != 0 ) {
+ SharedAllocationRecord * const r = get_record( arg_alloc_ptr );
+
+ RecordBase::decrement( r );
+ }
+}
+
+void * SharedAllocationRecord< Kokkos::Experimental::HBWSpace , void >::
+reallocate_tracked( void * const arg_alloc_ptr
+ , const size_t arg_alloc_size )
+{
+ SharedAllocationRecord * const r_old = get_record( arg_alloc_ptr );
+ SharedAllocationRecord * const r_new = allocate( r_old->m_space , r_old->get_label() , arg_alloc_size );
+
+ Kokkos::Impl::DeepCopy<HBWSpace,HBWSpace>( r_new->data() , r_old->data()
+ , std::min( r_old->size() , r_new->size() ) );
+
+ RecordBase::increment( r_new );
+ RecordBase::decrement( r_old );
+
+ return r_new->data();
+}
+
+SharedAllocationRecord< Kokkos::Experimental::HBWSpace , void > *
+SharedAllocationRecord< Kokkos::Experimental::HBWSpace , void >::get_record( void * alloc_ptr )
{
typedef SharedAllocationHeader Header ;
- typedef SharedAllocationRecord< Kokkos::HostSpace , void > RecordHost ;
+ typedef SharedAllocationRecord< Kokkos::Experimental::HBWSpace , void > RecordHost ;
- SharedAllocationHeader const * const head = Header::get_header( alloc_ptr );
- RecordHost * const record = static_cast< RecordHost * >( head->m_record );
+ SharedAllocationHeader const * const head = alloc_ptr ? Header::get_header( alloc_ptr ) : (SharedAllocationHeader *)0 ;
+ RecordHost * const record = head ? static_cast< RecordHost * >( head->m_record ) : (RecordHost *) 0 ;
- if ( record->m_alloc_ptr != head ) {
- Kokkos::Impl::throw_runtime_exception( std::string("Kokkos::Experimental::Impl::SharedAllocationRecord< Kokkos::HostSpace , void >::get_record ERROR" ) );
+ if ( ! alloc_ptr || record->m_alloc_ptr != head ) {
+ Kokkos::Impl::throw_runtime_exception( std::string("Kokkos::Experimental::Impl::SharedAllocationRecord< Kokkos::Experimental::HBWSpace , void >::get_record ERROR" ) );
}
return record ;
}
// Iterate records to print orphaned memory ...
-void SharedAllocationRecord< Kokkos::HostSpace , void >::
-print_records( std::ostream & s , const Kokkos::HostSpace & space , bool detail )
+void SharedAllocationRecord< Kokkos::Experimental::HBWSpace , void >::
+print_records( std::ostream & s , const Kokkos::Experimental::HBWSpace & space , bool detail )
{
- SharedAllocationRecord< void , void >::print_host_accessible_records( s , "HostSpace" , & s_root_record , detail );
+ SharedAllocationRecord< void , void >::print_host_accessible_records( s , "HBWSpace" , & s_root_record , detail );
}
} // namespace Impl
} // namespace Experimental
} // namespace Kokkos
/*--------------------------------------------------------------------------*/
/*--------------------------------------------------------------------------*/
namespace Kokkos {
+namespace Experimental {
namespace {
- const unsigned HOST_SPACE_ATOMIC_MASK = 0xFFFF;
- const unsigned HOST_SPACE_ATOMIC_XOR_MASK = 0x5A39;
- static int HOST_SPACE_ATOMIC_LOCKS[HOST_SPACE_ATOMIC_MASK+1];
+ const unsigned HBW_SPACE_ATOMIC_MASK = 0xFFFF;
+ const unsigned HBW_SPACE_ATOMIC_XOR_MASK = 0x5A39;
+ static int HBW_SPACE_ATOMIC_LOCKS[HBW_SPACE_ATOMIC_MASK+1];
}
namespace Impl {
-void init_lock_array_host_space() {
+void init_lock_array_hbw_space() {
static int is_initialized = 0;
if(! is_initialized)
- for(int i = 0; i < static_cast<int> (HOST_SPACE_ATOMIC_MASK+1); i++)
- HOST_SPACE_ATOMIC_LOCKS[i] = 0;
+ for(int i = 0; i < static_cast<int> (HBW_SPACE_ATOMIC_MASK+1); i++)
+ HBW_SPACE_ATOMIC_LOCKS[i] = 0;
}
-bool lock_address_host_space(void* ptr) {
- return 0 == atomic_compare_exchange( &HOST_SPACE_ATOMIC_LOCKS[
- (( size_t(ptr) >> 2 ) & HOST_SPACE_ATOMIC_MASK) ^ HOST_SPACE_ATOMIC_XOR_MASK] ,
+bool lock_address_hbw_space(void* ptr) {
+ return 0 == atomic_compare_exchange( &HBW_SPACE_ATOMIC_LOCKS[
+ (( size_t(ptr) >> 2 ) & HBW_SPACE_ATOMIC_MASK) ^ HBW_SPACE_ATOMIC_XOR_MASK] ,
0 , 1);
}
-void unlock_address_host_space(void* ptr) {
- atomic_exchange( &HOST_SPACE_ATOMIC_LOCKS[
- (( size_t(ptr) >> 2 ) & HOST_SPACE_ATOMIC_MASK) ^ HOST_SPACE_ATOMIC_XOR_MASK] ,
+void unlock_address_hbw_space(void* ptr) {
+ atomic_exchange( &HBW_SPACE_ATOMIC_LOCKS[
+ (( size_t(ptr) >> 2 ) & HBW_SPACE_ATOMIC_MASK) ^ HBW_SPACE_ATOMIC_XOR_MASK] ,
0);
}
}
}
+}
+#endif
diff --git a/lib/kokkos/core/src/impl/Kokkos_HostSpace.cpp b/lib/kokkos/core/src/impl/Kokkos_HostSpace.cpp
index 69b8ce86d..851a39a3e 100644
--- a/lib/kokkos/core/src/impl/Kokkos_HostSpace.cpp
+++ b/lib/kokkos/core/src/impl/Kokkos_HostSpace.cpp
@@ -1,441 +1,553 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
-
+#include <algorithm>
#include <Kokkos_Macros.hpp>
/*--------------------------------------------------------------------------*/
#if defined( __INTEL_COMPILER ) && ! defined ( KOKKOS_HAVE_CUDA )
// Intel specialized allocator does not interoperate with CUDA memory allocation
#define KOKKOS_INTEL_MM_ALLOC_AVAILABLE
#endif
/*--------------------------------------------------------------------------*/
-#if ( defined( _POSIX_C_SOURCE ) && _POSIX_C_SOURCE >= 200112L ) || \
- ( defined( _XOPEN_SOURCE ) && _XOPEN_SOURCE >= 600 )
-
-#define KOKKOS_POSIX_MEMALIGN_AVAILABLE
+#if defined(KOKKOS_POSIX_MEMALIGN_AVAILABLE)
#include <unistd.h>
#include <sys/mman.h>
/* mmap flags for private anonymous memory allocation */
#if defined( MAP_ANONYMOUS ) && defined( MAP_PRIVATE )
#define KOKKOS_POSIX_MMAP_FLAGS (MAP_PRIVATE | MAP_ANONYMOUS)
#elif defined( MAP_ANON ) && defined( MAP_PRIVATE )
#define KOKKOS_POSIX_MMAP_FLAGS (MAP_PRIVATE | MAP_ANON)
#endif
// mmap flags for huge page tables
+// the Cuda driver does not interoperate with MAP_HUGETLB
#if defined( KOKKOS_POSIX_MMAP_FLAGS )
- #if defined( MAP_HUGETLB )
+ #if defined( MAP_HUGETLB ) && ! defined( KOKKOS_HAVE_CUDA )
#define KOKKOS_POSIX_MMAP_FLAGS_HUGE (KOKKOS_POSIX_MMAP_FLAGS | MAP_HUGETLB )
#else
#define KOKKOS_POSIX_MMAP_FLAGS_HUGE KOKKOS_POSIX_MMAP_FLAGS
#endif
#endif
#endif
/*--------------------------------------------------------------------------*/
#include <stddef.h>
#include <stdlib.h>
#include <stdint.h>
#include <memory.h>
#include <iostream>
#include <sstream>
#include <cstring>
#include <Kokkos_HostSpace.hpp>
#include <impl/Kokkos_BasicAllocators.hpp>
#include <impl/Kokkos_Error.hpp>
#include <Kokkos_Atomic.hpp>
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace {
static const int QUERY_SPACE_IN_PARALLEL_MAX = 16 ;
typedef int (* QuerySpaceInParallelPtr )();
QuerySpaceInParallelPtr s_in_parallel_query[ QUERY_SPACE_IN_PARALLEL_MAX ] ;
int s_in_parallel_query_count = 0 ;
} // namespace <empty>
void HostSpace::register_in_parallel( int (*device_in_parallel)() )
{
if ( 0 == device_in_parallel ) {
Kokkos::Impl::throw_runtime_exception( std::string("Kokkos::HostSpace::register_in_parallel ERROR : given NULL" ) );
}
int i = -1 ;
if ( ! (device_in_parallel)() ) {
for ( i = 0 ; i < s_in_parallel_query_count && ! (*(s_in_parallel_query[i]))() ; ++i );
}
if ( i < s_in_parallel_query_count ) {
Kokkos::Impl::throw_runtime_exception( std::string("Kokkos::HostSpace::register_in_parallel_query ERROR : called in_parallel" ) );
}
if ( QUERY_SPACE_IN_PARALLEL_MAX <= i ) {
Kokkos::Impl::throw_runtime_exception( std::string("Kokkos::HostSpace::register_in_parallel_query ERROR : exceeded maximum" ) );
}
for ( i = 0 ; i < s_in_parallel_query_count && s_in_parallel_query[i] != device_in_parallel ; ++i );
if ( i == s_in_parallel_query_count ) {
s_in_parallel_query[s_in_parallel_query_count++] = device_in_parallel ;
}
}
int HostSpace::in_parallel()
{
const int n = s_in_parallel_query_count ;
int i = 0 ;
while ( i < n && ! (*(s_in_parallel_query[i]))() ) { ++i ; }
return i < n ;
}
} // namespace Kokkos
/*--------------------------------------------------------------------------*/
+#if ! defined( KOKKOS_USING_EXPERIMENTAL_VIEW )
+
namespace Kokkos {
Impl::AllocationTracker HostSpace::allocate_and_track( const std::string & label, const size_t size )
{
return Impl::AllocationTracker( allocator(), size, label );
}
} // namespace Kokkos
+#endif /* #if ! defined( KOKKOS_USING_EXPERIMENTAL_VIEW ) */
+
/*--------------------------------------------------------------------------*/
namespace Kokkos {
/* Default allocation mechanism */
HostSpace::HostSpace()
: m_alloc_mech(
#if defined( KOKKOS_INTEL_MM_ALLOC_AVAILABLE )
HostSpace::INTEL_MM_ALLOC
#elif defined( KOKKOS_POSIX_MMAP_FLAGS )
HostSpace::POSIX_MMAP
#elif defined( KOKKOS_POSIX_MEMALIGN_AVAILABLE )
HostSpace::POSIX_MEMALIGN
#else
HostSpace::STD_MALLOC
#endif
)
{}
/* Default allocation mechanism */
HostSpace::HostSpace( const HostSpace::AllocationMechanism & arg_alloc_mech )
: m_alloc_mech( HostSpace::STD_MALLOC )
{
if ( arg_alloc_mech == STD_MALLOC ) {
m_alloc_mech = HostSpace::STD_MALLOC ;
}
#if defined( KOKKOS_INTEL_MM_ALLOC_AVAILABLE )
else if ( arg_alloc_mech == HostSpace::INTEL_MM_ALLOC ) {
m_alloc_mech = HostSpace::INTEL_MM_ALLOC ;
}
#elif defined( KOKKOS_POSIX_MEMALIGN_AVAILABLE )
else if ( arg_alloc_mech == HostSpace::POSIX_MEMALIGN ) {
m_alloc_mech = HostSpace::POSIX_MEMALIGN ;
}
#elif defined( KOKKOS_POSIX_MMAP_FLAGS )
else if ( arg_alloc_mech == HostSpace::POSIX_MMAP ) {
m_alloc_mech = HostSpace::POSIX_MMAP ;
}
#endif
else {
const char * const mech =
( arg_alloc_mech == HostSpace::INTEL_MM_ALLOC ) ? "INTEL_MM_ALLOC" : (
( arg_alloc_mech == HostSpace::POSIX_MEMALIGN ) ? "POSIX_MEMALIGN" : (
( arg_alloc_mech == HostSpace::POSIX_MMAP ) ? "POSIX_MMAP" : "" ));
std::string msg ;
msg.append("Kokkos::HostSpace ");
msg.append(mech);
msg.append(" is not available" );
Kokkos::Impl::throw_runtime_exception( msg );
}
}
void * HostSpace::allocate( const size_t arg_alloc_size ) const
{
static_assert( sizeof(void*) == sizeof(uintptr_t)
, "Error sizeof(void*) != sizeof(uintptr_t)" );
- static_assert( Kokkos::Impl::power_of_two< Kokkos::Impl::MEMORY_ALIGNMENT >::value
+ static_assert( Kokkos::Impl::is_integral_power_of_two( Kokkos::Impl::MEMORY_ALIGNMENT )
, "Memory alignment must be power of two" );
- constexpr size_t alignment = Kokkos::Impl::MEMORY_ALIGNMENT ;
- constexpr size_t alignment_mask = alignment - 1 ;
+ constexpr uintptr_t alignment = Kokkos::Impl::MEMORY_ALIGNMENT ;
+ constexpr uintptr_t alignment_mask = alignment - 1 ;
- void * ptr = NULL;
+ void * ptr = 0 ;
if ( arg_alloc_size ) {
if ( m_alloc_mech == STD_MALLOC ) {
// Over-allocate to and round up to guarantee proper alignment.
size_t size_padded = arg_alloc_size + sizeof(void*) + alignment ;
void * alloc_ptr = malloc( size_padded );
if (alloc_ptr) {
uintptr_t address = reinterpret_cast<uintptr_t>(alloc_ptr);
// offset enough to record the alloc_ptr
address += sizeof(void *);
uintptr_t rem = address % alignment;
uintptr_t offset = rem ? (alignment - rem) : 0u;
address += offset;
ptr = reinterpret_cast<void *>(address);
// record the alloc'd pointer
address -= sizeof(void *);
*reinterpret_cast<void **>(address) = alloc_ptr;
}
}
#if defined( KOKKOS_INTEL_MM_ALLOC_AVAILABLE )
else if ( m_alloc_mech == INTEL_MM_ALLOC ) {
ptr = _mm_malloc( arg_alloc_size , alignment );
}
#endif
#if defined( KOKKOS_POSIX_MEMALIGN_AVAILABLE )
else if ( m_alloc_mech == POSIX_MEMALIGN ) {
posix_memalign( & ptr, alignment , arg_alloc_size );
}
#endif
#if defined( KOKKOS_POSIX_MMAP_FLAGS )
else if ( m_alloc_mech == POSIX_MMAP ) {
constexpr size_t use_huge_pages = (1u << 27);
constexpr int prot = PROT_READ | PROT_WRITE ;
- const int flags = arg_alloc_size < use_huge_pages
- ? KOKKOS_POSIX_MMAP_FLAGS
- : KOKKOS_POSIX_MMAP_FLAGS_HUGE ;
+ const int flags = arg_alloc_size < use_huge_pages
+ ? KOKKOS_POSIX_MMAP_FLAGS
+ : KOKKOS_POSIX_MMAP_FLAGS_HUGE ;
// read write access to private memory
ptr = mmap( NULL /* address hint, if NULL OS kernel chooses address */
, arg_alloc_size /* size in bytes */
, prot /* memory protection */
, flags /* visibility of updates */
- , -1 /* file descriptor */
- , 0 /* offset */
+ , -1 /* file descriptor */
+ , 0 /* offset */
);
/* Associated reallocation:
ptr = mremap( old_ptr , old_size , new_size , MREMAP_MAYMOVE );
*/
}
#endif
}
- if ( reinterpret_cast<uintptr_t>(ptr) & alignment_mask ) {
- Kokkos::Impl::throw_runtime_exception( "Kokkos::HostSpace aligned allocation failed" );
+ if ( ( ptr == 0 ) || ( reinterpret_cast<uintptr_t>(ptr) == ~uintptr_t(0) )
+ || ( reinterpret_cast<uintptr_t>(ptr) & alignment_mask ) ) {
+ std::ostringstream msg ;
+ msg << "Kokkos::HostSpace::allocate[ " ;
+ switch( m_alloc_mech ) {
+ case STD_MALLOC: msg << "STD_MALLOC" ; break ;
+ case POSIX_MEMALIGN: msg << "POSIX_MEMALIGN" ; break ;
+ case POSIX_MMAP: msg << "POSIX_MMAP" ; break ;
+ case INTEL_MM_ALLOC: msg << "INTEL_MM_ALLOC" ; break ;
+ }
+ msg << " ]( " << arg_alloc_size << " ) FAILED" ;
+ if ( ptr == NULL ) { msg << " NULL" ; }
+ else { msg << " NOT ALIGNED " << ptr ; }
+
+ std::cerr << msg.str() << std::endl ;
+ std::cerr.flush();
+
+ Kokkos::Impl::throw_runtime_exception( msg.str() );
}
return ptr;
}
void HostSpace::deallocate( void * const arg_alloc_ptr , const size_t arg_alloc_size ) const
{
if ( arg_alloc_ptr ) {
if ( m_alloc_mech == STD_MALLOC ) {
void * alloc_ptr = *(reinterpret_cast<void **>(arg_alloc_ptr) -1);
free( alloc_ptr );
}
#if defined( KOKKOS_INTEL_MM_ALLOC_AVAILABLE )
else if ( m_alloc_mech == INTEL_MM_ALLOC ) {
_mm_free( arg_alloc_ptr );
}
#endif
#if defined( KOKKOS_POSIX_MEMALIGN_AVAILABLE )
else if ( m_alloc_mech == POSIX_MEMALIGN ) {
free( arg_alloc_ptr );
}
#endif
#if defined( KOKKOS_POSIX_MMAP_FLAGS )
else if ( m_alloc_mech == POSIX_MMAP ) {
munmap( arg_alloc_ptr , arg_alloc_size );
}
#endif
}
}
} // namespace Kokkos
+//----------------------------------------------------------------------------
+//----------------------------------------------------------------------------
+
namespace Kokkos {
namespace Experimental {
namespace Impl {
SharedAllocationRecord< void , void >
SharedAllocationRecord< Kokkos::HostSpace , void >::s_root_record ;
void
SharedAllocationRecord< Kokkos::HostSpace , void >::
deallocate( SharedAllocationRecord< void , void > * arg_rec )
{
delete static_cast<SharedAllocationRecord*>(arg_rec);
}
SharedAllocationRecord< Kokkos::HostSpace , void >::
~SharedAllocationRecord()
{
m_space.deallocate( SharedAllocationRecord< void , void >::m_alloc_ptr
, SharedAllocationRecord< void , void >::m_alloc_size
);
}
SharedAllocationRecord< Kokkos::HostSpace , void >::
SharedAllocationRecord( const Kokkos::HostSpace & arg_space
, const std::string & arg_label
, const size_t arg_alloc_size
, const SharedAllocationRecord< void , void >::function_type arg_dealloc
)
// Pass through allocated [ SharedAllocationHeader , user_memory ]
// Pass through deallocation function
: SharedAllocationRecord< void , void >
( & SharedAllocationRecord< Kokkos::HostSpace , void >::s_root_record
, reinterpret_cast<SharedAllocationHeader*>( arg_space.allocate( sizeof(SharedAllocationHeader) + arg_alloc_size ) )
, sizeof(SharedAllocationHeader) + arg_alloc_size
, arg_dealloc
)
, m_space( arg_space )
{
// Fill in the Header information
RecordBase::m_alloc_ptr->m_record = static_cast< SharedAllocationRecord< void , void > * >( this );
strncpy( RecordBase::m_alloc_ptr->m_label
, arg_label.c_str()
, SharedAllocationHeader::maximum_label_length
);
}
+//----------------------------------------------------------------------------
+
+void * SharedAllocationRecord< Kokkos::HostSpace , void >::
+allocate_tracked( const Kokkos::HostSpace & arg_space
+ , const std::string & arg_alloc_label
+ , const size_t arg_alloc_size )
+{
+ if ( ! arg_alloc_size ) return (void *) 0 ;
+
+ SharedAllocationRecord * const r =
+ allocate( arg_space , arg_alloc_label , arg_alloc_size );
+
+ RecordBase::increment( r );
+
+ return r->data();
+}
+
+void SharedAllocationRecord< Kokkos::HostSpace , void >::
+deallocate_tracked( void * const arg_alloc_ptr )
+{
+ if ( arg_alloc_ptr != 0 ) {
+ SharedAllocationRecord * const r = get_record( arg_alloc_ptr );
+
+ RecordBase::decrement( r );
+ }
+}
+
+void * SharedAllocationRecord< Kokkos::HostSpace , void >::
+reallocate_tracked( void * const arg_alloc_ptr
+ , const size_t arg_alloc_size )
+{
+ SharedAllocationRecord * const r_old = get_record( arg_alloc_ptr );
+ SharedAllocationRecord * const r_new = allocate( r_old->m_space , r_old->get_label() , arg_alloc_size );
+
+ Kokkos::Impl::DeepCopy<HostSpace,HostSpace>( r_new->data() , r_old->data()
+ , std::min( r_old->size() , r_new->size() ) );
+
+ RecordBase::increment( r_new );
+ RecordBase::decrement( r_old );
+
+ return r_new->data();
+}
+
SharedAllocationRecord< Kokkos::HostSpace , void > *
SharedAllocationRecord< Kokkos::HostSpace , void >::get_record( void * alloc_ptr )
{
typedef SharedAllocationHeader Header ;
typedef SharedAllocationRecord< Kokkos::HostSpace , void > RecordHost ;
- SharedAllocationHeader const * const head = Header::get_header( alloc_ptr );
- RecordHost * const record = static_cast< RecordHost * >( head->m_record );
+ SharedAllocationHeader const * const head = alloc_ptr ? Header::get_header( alloc_ptr ) : (SharedAllocationHeader *)0 ;
+ RecordHost * const record = head ? static_cast< RecordHost * >( head->m_record ) : (RecordHost *) 0 ;
- if ( record->m_alloc_ptr != head ) {
+ if ( ! alloc_ptr || record->m_alloc_ptr != head ) {
Kokkos::Impl::throw_runtime_exception( std::string("Kokkos::Experimental::Impl::SharedAllocationRecord< Kokkos::HostSpace , void >::get_record ERROR" ) );
}
return record ;
}
// Iterate records to print orphaned memory ...
void SharedAllocationRecord< Kokkos::HostSpace , void >::
print_records( std::ostream & s , const Kokkos::HostSpace & space , bool detail )
{
SharedAllocationRecord< void , void >::print_host_accessible_records( s , "HostSpace" , & s_root_record , detail );
}
} // namespace Impl
} // namespace Experimental
} // namespace Kokkos
/*--------------------------------------------------------------------------*/
/*--------------------------------------------------------------------------*/
+namespace Kokkos {
+namespace Experimental {
+namespace Impl {
+
+template< class >
+struct ViewOperatorBoundsErrorAbort ;
+
+template<>
+struct ViewOperatorBoundsErrorAbort< Kokkos::HostSpace > {
+ static void apply( const size_t rank
+ , const size_t n0 , const size_t n1
+ , const size_t n2 , const size_t n3
+ , const size_t n4 , const size_t n5
+ , const size_t n6 , const size_t n7
+ , const size_t i0 , const size_t i1
+ , const size_t i2 , const size_t i3
+ , const size_t i4 , const size_t i5
+ , const size_t i6 , const size_t i7 );
+};
+
+void ViewOperatorBoundsErrorAbort< Kokkos::HostSpace >::
+apply( const size_t rank
+ , const size_t n0 , const size_t n1
+ , const size_t n2 , const size_t n3
+ , const size_t n4 , const size_t n5
+ , const size_t n6 , const size_t n7
+ , const size_t i0 , const size_t i1
+ , const size_t i2 , const size_t i3
+ , const size_t i4 , const size_t i5
+ , const size_t i6 , const size_t i7 )
+{
+ char buffer[512];
+
+ snprintf( buffer , sizeof(buffer)
+ , "View operator bounds error : rank(%lu) dim(%lu,%lu,%lu,%lu,%lu,%lu,%lu,%lu) index(%lu,%lu,%lu,%lu,%lu,%lu,%lu,%lu)"
+ , rank , n0 , n1 , n2 , n3 , n4 , n5 , n6 , n7
+ , i0 , i1 , i2 , i3 , i4 , i5 , i6 , i7 );
+
+ Kokkos::Impl::throw_runtime_exception( buffer );
+}
+
+} // namespace Impl
+} // namespace Experimental
+} // namespace Kokkos
+
+/*--------------------------------------------------------------------------*/
+/*--------------------------------------------------------------------------*/
+
namespace Kokkos {
namespace {
const unsigned HOST_SPACE_ATOMIC_MASK = 0xFFFF;
const unsigned HOST_SPACE_ATOMIC_XOR_MASK = 0x5A39;
static int HOST_SPACE_ATOMIC_LOCKS[HOST_SPACE_ATOMIC_MASK+1];
}
namespace Impl {
void init_lock_array_host_space() {
static int is_initialized = 0;
if(! is_initialized)
for(int i = 0; i < static_cast<int> (HOST_SPACE_ATOMIC_MASK+1); i++)
HOST_SPACE_ATOMIC_LOCKS[i] = 0;
}
bool lock_address_host_space(void* ptr) {
return 0 == atomic_compare_exchange( &HOST_SPACE_ATOMIC_LOCKS[
(( size_t(ptr) >> 2 ) & HOST_SPACE_ATOMIC_MASK) ^ HOST_SPACE_ATOMIC_XOR_MASK] ,
0 , 1);
}
void unlock_address_host_space(void* ptr) {
atomic_exchange( &HOST_SPACE_ATOMIC_LOCKS[
(( size_t(ptr) >> 2 ) & HOST_SPACE_ATOMIC_MASK) ^ HOST_SPACE_ATOMIC_XOR_MASK] ,
0);
}
}
}
diff --git a/lib/kokkos/core/src/impl/Kokkos_Profiling_Interface.cpp b/lib/kokkos/core/src/impl/Kokkos_Profiling_Interface.cpp
index a88be37dd..50e45166b 100644
--- a/lib/kokkos/core/src/impl/Kokkos_Profiling_Interface.cpp
+++ b/lib/kokkos/core/src/impl/Kokkos_Profiling_Interface.cpp
@@ -1,148 +1,160 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <impl/Kokkos_Profiling_Interface.hpp>
#ifdef KOKKOSP_ENABLE_PROFILING
#include <string.h>
namespace Kokkos {
namespace Experimental {
bool profileLibraryLoaded() {
return (NULL != initProfileLibrary);
}
void beginParallelFor(const std::string& kernelPrefix, const uint32_t devID, uint64_t* kernelID) {
if(NULL != beginForCallee) {
Kokkos::fence();
(*beginForCallee)(kernelPrefix.c_str(), devID, kernelID);
}
};
void endParallelFor(const uint64_t kernelID) {
if(NULL != endForCallee) {
Kokkos::fence();
(*endForCallee)(kernelID);
}
};
void beginParallelScan(const std::string& kernelPrefix, const uint32_t devID, uint64_t* kernelID) {
if(NULL != beginScanCallee) {
Kokkos::fence();
(*beginScanCallee)(kernelPrefix.c_str(), devID, kernelID);
}
};
void endParallelScan(const uint64_t kernelID) {
if(NULL != endScanCallee) {
Kokkos::fence();
(*endScanCallee)(kernelID);
}
};
void beginParallelReduce(const std::string& kernelPrefix, const uint32_t devID, uint64_t* kernelID) {
if(NULL != beginReduceCallee) {
Kokkos::fence();
(*beginReduceCallee)(kernelPrefix.c_str(), devID, kernelID);
}
};
void endParallelReduce(const uint64_t kernelID) {
if(NULL != endReduceCallee) {
Kokkos::fence();
(*endReduceCallee)(kernelID);
}
};
void initialize() {
void* firstProfileLibrary;
char* envProfileLibrary = getenv("KOKKOS_PROFILE_LIBRARY");
// If we do not find a profiling library in the environment then exit
// early.
if( NULL == envProfileLibrary ) {
return ;
}
char* profileLibraryName = strtok(envProfileLibrary, ";");
if( (NULL != profileLibraryName) && (strcmp(profileLibraryName, "") != 0) ) {
firstProfileLibrary = dlopen(profileLibraryName, RTLD_NOW | RTLD_GLOBAL);
if(NULL == firstProfileLibrary) {
std::cerr << "Error: Unable to load KokkosP library: " <<
profileLibraryName << std::endl;
} else {
std::cout << "KokkosP: Library Loaded: " << profileLibraryName << std::endl;
beginForCallee = (beginFunction) dlsym(firstProfileLibrary, "kokkosp_begin_parallel_for");
beginScanCallee = (beginFunction) dlsym(firstProfileLibrary, "kokkosp_begin_parallel_scan");
beginReduceCallee = (beginFunction) dlsym(firstProfileLibrary, "kokkosp_begin_parallel_reduce");
endScanCallee = (endFunction) dlsym(firstProfileLibrary, "kokkosp_end_parallel_scan");
endForCallee = (endFunction) dlsym(firstProfileLibrary, "kokkosp_end_parallel_for");
endReduceCallee = (endFunction) dlsym(firstProfileLibrary, "kokkosp_end_parallel_reduce");
initProfileLibrary = (initFunction) dlsym(firstProfileLibrary, "kokkosp_init_library");
finalizeProfileLibrary = (finalizeFunction) dlsym(firstProfileLibrary, "kokkosp_finalize_library");
}
}
if(NULL != initProfileLibrary) {
(*initProfileLibrary)(0,
(uint64_t) KOKKOSP_INTERFACE_VERSION,
(uint32_t) 0,
NULL);
}
};
void finalize() {
- if(NULL != finalizeProfileLibrary) {
- (*finalizeProfileLibrary)();
- }
+ if(NULL != finalizeProfileLibrary) {
+ (*finalizeProfileLibrary)();
+
+ // Set all profile hooks to NULL to prevent
+ // any additional calls. Once we are told to
+ // finalize, we mean it
+ beginForCallee = NULL;
+ beginScanCallee = NULL;
+ beginReduceCallee = NULL;
+ endScanCallee = NULL;
+ endForCallee = NULL;
+ endReduceCallee = NULL;
+ initProfileLibrary = NULL;
+ finalizeProfileLibrary = NULL;
+ }
};
}
}
#endif
diff --git a/lib/kokkos/core/src/impl/Kokkos_Serial_TaskPolicy.cpp b/lib/kokkos/core/src/impl/Kokkos_Serial_TaskPolicy.cpp
index 688f97f42..5f3e65b32 100644
--- a/lib/kokkos/core/src/impl/Kokkos_Serial_TaskPolicy.cpp
+++ b/lib/kokkos/core/src/impl/Kokkos_Serial_TaskPolicy.cpp
@@ -1,336 +1,336 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
// Experimental unified task-data parallel manycore LDRD
#include <impl/Kokkos_Serial_TaskPolicy.hpp>
#if defined( KOKKOS_HAVE_SERIAL )
#include <stdlib.h>
#include <stdexcept>
#include <iostream>
#include <sstream>
#include <string>
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Experimental {
TaskPolicy< Kokkos::Serial >::member_type &
TaskPolicy< Kokkos::Serial >::member_single()
{
static member_type s(0,1,0);
return s ;
}
} // namespace Experimental
} // namespace Kokkos
namespace Kokkos {
namespace Experimental {
namespace Impl {
typedef TaskMember< Kokkos::Serial , void , void > Task ;
//----------------------------------------------------------------------------
namespace {
inline
unsigned padded_sizeof_derived( unsigned sizeof_derived )
{
return sizeof_derived +
( sizeof_derived % sizeof(Task*) ? sizeof(Task*) - sizeof_derived % sizeof(Task*) : 0 );
}
} // namespace
void Task::deallocate( void * ptr )
{
free( ptr );
}
void * Task::allocate( const unsigned arg_sizeof_derived
, const unsigned arg_dependence_capacity )
{
return malloc( padded_sizeof_derived( arg_sizeof_derived ) + arg_dependence_capacity * sizeof(Task*) );
}
Task::~TaskMember()
{
}
Task::TaskMember( const Task::function_verify_type arg_verify
, const Task::function_dealloc_type arg_dealloc
, const Task::function_apply_type arg_apply
, const unsigned arg_sizeof_derived
, const unsigned arg_dependence_capacity
)
: m_dealloc( arg_dealloc )
, m_verify( arg_verify )
, m_apply( arg_apply )
, m_dep( (Task **)( ((unsigned char *) this) + padded_sizeof_derived( arg_sizeof_derived ) ) )
, m_wait( 0 )
, m_next( 0 )
, m_dep_capacity( arg_dependence_capacity )
, m_dep_size( 0 )
, m_ref_count( 0 )
, m_state( TASK_STATE_CONSTRUCTING )
{
for ( unsigned i = 0 ; i < arg_dependence_capacity ; ++i ) m_dep[i] = 0 ;
}
Task::TaskMember( const Task::function_dealloc_type arg_dealloc
, const Task::function_apply_type arg_apply
, const unsigned arg_sizeof_derived
, const unsigned arg_dependence_capacity
)
: m_dealloc( arg_dealloc )
, m_verify( & Task::verify_type<void> )
, m_apply( arg_apply )
, m_dep( (Task **)( ((unsigned char *) this) + padded_sizeof_derived( arg_sizeof_derived ) ) )
, m_wait( 0 )
, m_next( 0 )
, m_dep_capacity( arg_dependence_capacity )
, m_dep_size( 0 )
, m_ref_count( 0 )
, m_state( TASK_STATE_CONSTRUCTING )
{
for ( unsigned i = 0 ; i < arg_dependence_capacity ; ++i ) m_dep[i] = 0 ;
}
//----------------------------------------------------------------------------
void Task::throw_error_add_dependence() const
{
std::cerr << "TaskMember< Serial >::add_dependence ERROR"
<< " state(" << m_state << ")"
<< " dep_size(" << m_dep_size << ")"
<< std::endl ;
throw std::runtime_error("TaskMember< Serial >::add_dependence ERROR");
}
void Task::throw_error_verify_type()
{
throw std::runtime_error("TaskMember< Serial >::verify_type ERROR");
}
//----------------------------------------------------------------------------
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
void Task::assign( Task ** const lhs , Task * rhs , const bool no_throw )
{
static const char msg_error_header[] = "Kokkos::Experimental::Impl::TaskManager<Kokkos::Serial>::assign ERROR" ;
static const char msg_error_count[] = ": negative reference count" ;
static const char msg_error_complete[] = ": destroy task that is not complete" ;
static const char msg_error_dependences[] = ": destroy task that has dependences" ;
static const char msg_error_exception[] = ": caught internal exception" ;
const char * msg_error = 0 ;
try {
if ( *lhs ) {
const int count = --((**lhs).m_ref_count);
if ( 0 == count ) {
// Reference count at zero, delete it
// Should only be deallocating a completed task
if ( (**lhs).m_state == Kokkos::Experimental::TASK_STATE_COMPLETE ) {
// A completed task should not have dependences...
for ( int i = 0 ; i < (**lhs).m_dep_size && 0 == msg_error ; ++i ) {
if ( (**lhs).m_dep[i] ) msg_error = msg_error_dependences ;
}
}
else {
msg_error = msg_error_complete ;
}
if ( 0 == msg_error ) {
// Get deletion function and apply it
const Task::function_dealloc_type d = (**lhs).m_dealloc ;
(*d)( *lhs );
}
}
else if ( count <= 0 ) {
msg_error = msg_error_count ;
}
}
if ( 0 == msg_error && rhs ) { ++( rhs->m_ref_count ); }
*lhs = rhs ;
}
catch( ... ) {
if ( 0 == msg_error ) msg_error = msg_error_exception ;
}
if ( 0 != msg_error ) {
if ( no_throw ) {
std::cerr << msg_error_header << msg_error << std::endl ;
std::cerr.flush();
}
else {
std::string msg(msg_error_header);
msg.append(msg_error);
throw std::runtime_error( msg );
}
}
}
#endif
namespace {
Task * s_ready = 0 ;
-Task * s_denied = reinterpret_cast<Task*>( ~((unsigned long)0) );
+Task * s_denied = reinterpret_cast<Task*>( ~((uintptr_t)0) );
}
void Task::schedule()
{
// Execute ready tasks in case the task being scheduled
// is dependent upon a waiting and ready task.
Task::execute_ready_tasks();
// spawning : Constructing -> Waiting
// respawning : Executing -> Waiting
// updating : Waiting -> Waiting
// Must not be in a dependence linked list: 0 == t->m_next
const bool ok_state = TASK_STATE_COMPLETE != m_state ;
const bool ok_list = 0 == m_next ;
if ( ok_state && ok_list ) {
// Will be waiting for execution upon return from this function
m_state = Kokkos::Experimental::TASK_STATE_WAITING ;
// Insert this task into another dependence that is not complete
int i = 0 ;
for ( ; i < m_dep_size ; ++i ) {
Task * const y = m_dep[i] ;
if ( y && s_denied != ( m_next = y->m_wait ) ) {
y->m_wait = this ; // CAS( & y->m_wait , m_next , this );
break ;
}
}
if ( i == m_dep_size ) {
// All dependences are complete, insert into the ready list
m_next = s_ready ;
s_ready = this ; // CAS( & s_ready , m_next = s_ready , this );
}
}
else {
throw std::runtime_error(std::string("Kokkos::Experimental::Impl::Task spawn or respawn state error"));
}
}
void Task::execute_ready_tasks()
{
while ( s_ready ) {
// Remove this task from the ready list
// Task * task ;
// while ( ! CAS( & s_ready , task = s_ready , s_ready->m_next ) );
Task * const task = s_ready ;
s_ready = task->m_next ;
task->m_next = 0 ;
// precondition: task->m_state = TASK_STATE_WAITING
// precondition: task->m_dep[i]->m_state == TASK_STATE_COMPLETE for all i
// precondition: does not exist T such that T->m_wait = task
// precondition: does not exist T such that T->m_next = task
task->m_state = Kokkos::Experimental::TASK_STATE_EXECUTING ;
(*task->m_apply)( task );
if ( task->m_state == Kokkos::Experimental::TASK_STATE_EXECUTING ) {
// task did not respawn itself
task->m_state = Kokkos::Experimental::TASK_STATE_COMPLETE ;
// release dependences:
for ( int i = 0 ; i < task->m_dep_size ; ++i ) {
assign( task->m_dep + i , 0 );
}
// Stop other tasks from adding themselves to 'task->m_wait' ;
Task * x ;
// CAS( & task->m_wait , x = task->m_wait , s_denied );
x = task->m_wait ; task->m_wait = s_denied ;
// update tasks waiting on this task
while ( x ) {
Task * const next = x->m_next ;
x->m_next = 0 ;
x->schedule(); // could happen concurrently
x = next ;
}
}
}
}
} // namespace Impl
} // namespace Experimental
} // namespace Kokkos
#endif // defined( KOKKOS_HAVE_SERIAL )
diff --git a/lib/kokkos/core/src/impl/Kokkos_Shape.hpp b/lib/kokkos/core/src/impl/Kokkos_Shape.hpp
index dba730127..9749e0a1f 100644
--- a/lib/kokkos/core/src/impl/Kokkos_Shape.hpp
+++ b/lib/kokkos/core/src/impl/Kokkos_Shape.hpp
@@ -1,917 +1,917 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_SHAPE_HPP
#define KOKKOS_SHAPE_HPP
#include <typeinfo>
#include <utility>
#include <Kokkos_Core_fwd.hpp>
#include <impl/Kokkos_Traits.hpp>
#include <impl/Kokkos_StaticAssert.hpp>
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
//----------------------------------------------------------------------------
/** \brief The shape of a Kokkos with dynamic and static dimensions.
* Dynamic dimensions are member values and static dimensions are
* 'static const' values.
*
* The upper bound on the array rank is eight.
*/
template< unsigned ScalarSize ,
unsigned Rank ,
unsigned s0 = 1 ,
unsigned s1 = 1 ,
unsigned s2 = 1 ,
unsigned s3 = 1 ,
unsigned s4 = 1 ,
unsigned s5 = 1 ,
unsigned s6 = 1 ,
unsigned s7 = 1 >
struct Shape ;
//----------------------------------------------------------------------------
/** \brief Shape equality if the value type, layout, and dimensions
* are equal.
*/
template< unsigned xSize , unsigned xRank ,
unsigned xN0 , unsigned xN1 , unsigned xN2 , unsigned xN3 ,
unsigned xN4 , unsigned xN5 , unsigned xN6 , unsigned xN7 ,
unsigned ySize , unsigned yRank ,
unsigned yN0 , unsigned yN1 , unsigned yN2 , unsigned yN3 ,
unsigned yN4 , unsigned yN5 , unsigned yN6 , unsigned yN7 >
KOKKOS_INLINE_FUNCTION
bool operator == ( const Shape<xSize,xRank,xN0,xN1,xN2,xN3,xN4,xN5,xN6,xN7> & x ,
const Shape<ySize,yRank,yN0,yN1,yN2,yN3,yN4,yN5,yN6,yN7> & y )
{
enum { same_size = xSize == ySize };
enum { same_rank = xRank == yRank };
return same_size && same_rank &&
size_t( x.N0 ) == size_t( y.N0 ) &&
unsigned( x.N1 ) == unsigned( y.N1 ) &&
unsigned( x.N2 ) == unsigned( y.N2 ) &&
unsigned( x.N3 ) == unsigned( y.N3 ) &&
unsigned( x.N4 ) == unsigned( y.N4 ) &&
unsigned( x.N5 ) == unsigned( y.N5 ) &&
unsigned( x.N6 ) == unsigned( y.N6 ) &&
unsigned( x.N7 ) == unsigned( y.N7 ) ;
}
template< unsigned xSize , unsigned xRank ,
unsigned xN0 , unsigned xN1 , unsigned xN2 , unsigned xN3 ,
unsigned xN4 , unsigned xN5 , unsigned xN6 , unsigned xN7 ,
unsigned ySize ,unsigned yRank ,
unsigned yN0 , unsigned yN1 , unsigned yN2 , unsigned yN3 ,
unsigned yN4 , unsigned yN5 , unsigned yN6 , unsigned yN7 >
KOKKOS_INLINE_FUNCTION
bool operator != ( const Shape<xSize,xRank,xN0,xN1,xN2,xN3,xN4,xN5,xN6,xN7> & x ,
const Shape<ySize,yRank,yN0,yN1,yN2,yN3,yN4,yN5,yN6,yN7> & y )
{ return ! operator == ( x , y ); }
//----------------------------------------------------------------------------
void assert_counts_are_equal_throw(
const size_t x_count ,
const size_t y_count );
inline
void assert_counts_are_equal(
const size_t x_count ,
const size_t y_count )
{
if ( x_count != y_count ) {
assert_counts_are_equal_throw( x_count , y_count );
}
}
void assert_shapes_are_equal_throw(
const unsigned x_scalar_size ,
const unsigned x_rank ,
const size_t x_N0 , const unsigned x_N1 ,
const unsigned x_N2 , const unsigned x_N3 ,
const unsigned x_N4 , const unsigned x_N5 ,
const unsigned x_N6 , const unsigned x_N7 ,
const unsigned y_scalar_size ,
const unsigned y_rank ,
const size_t y_N0 , const unsigned y_N1 ,
const unsigned y_N2 , const unsigned y_N3 ,
const unsigned y_N4 , const unsigned y_N5 ,
const unsigned y_N6 , const unsigned y_N7 );
template< unsigned xSize , unsigned xRank ,
unsigned xN0 , unsigned xN1 , unsigned xN2 , unsigned xN3 ,
unsigned xN4 , unsigned xN5 , unsigned xN6 , unsigned xN7 ,
unsigned ySize , unsigned yRank ,
unsigned yN0 , unsigned yN1 , unsigned yN2 , unsigned yN3 ,
unsigned yN4 , unsigned yN5 , unsigned yN6 , unsigned yN7 >
inline
void assert_shapes_are_equal(
const Shape<xSize,xRank,xN0,xN1,xN2,xN3,xN4,xN5,xN6,xN7> & x ,
const Shape<ySize,yRank,yN0,yN1,yN2,yN3,yN4,yN5,yN6,yN7> & y )
{
typedef Shape<xSize,xRank,xN0,xN1,xN2,xN3,xN4,xN5,xN6,xN7> x_type ;
typedef Shape<ySize,yRank,yN0,yN1,yN2,yN3,yN4,yN5,yN6,yN7> y_type ;
if ( x != y ) {
assert_shapes_are_equal_throw(
x_type::scalar_size, x_type::rank, x.N0, x.N1, x.N2, x.N3, x.N4, x.N5, x.N6, x.N7,
y_type::scalar_size, y_type::rank, y.N0, y.N1, y.N2, y.N3, y.N4, y.N5, y.N6, y.N7 );
}
}
template< unsigned xSize , unsigned xRank ,
unsigned xN0 , unsigned xN1 , unsigned xN2 , unsigned xN3 ,
unsigned xN4 , unsigned xN5 , unsigned xN6 , unsigned xN7 ,
unsigned ySize , unsigned yRank ,
unsigned yN0 , unsigned yN1 , unsigned yN2 , unsigned yN3 ,
unsigned yN4 , unsigned yN5 , unsigned yN6 , unsigned yN7 >
void assert_shapes_equal_dimension(
const Shape<xSize,xRank,xN0,xN1,xN2,xN3,xN4,xN5,xN6,xN7> & x ,
const Shape<ySize,yRank,yN0,yN1,yN2,yN3,yN4,yN5,yN6,yN7> & y )
{
typedef Shape<xSize,xRank,xN0,xN1,xN2,xN3,xN4,xN5,xN6,xN7> x_type ;
typedef Shape<ySize,yRank,yN0,yN1,yN2,yN3,yN4,yN5,yN6,yN7> y_type ;
// Omit comparison of scalar_size.
if ( unsigned( x.rank ) != unsigned( y.rank ) ||
size_t( x.N0 ) != size_t( y.N0 ) ||
unsigned( x.N1 ) != unsigned( y.N1 ) ||
unsigned( x.N2 ) != unsigned( y.N2 ) ||
unsigned( x.N3 ) != unsigned( y.N3 ) ||
unsigned( x.N4 ) != unsigned( y.N4 ) ||
unsigned( x.N5 ) != unsigned( y.N5 ) ||
unsigned( x.N6 ) != unsigned( y.N6 ) ||
unsigned( x.N7 ) != unsigned( y.N7 ) ) {
assert_shapes_are_equal_throw(
x_type::scalar_size, x_type::rank, x.N0, x.N1, x.N2, x.N3, x.N4, x.N5, x.N6, x.N7,
y_type::scalar_size, y_type::rank, y.N0, y.N1, y.N2, y.N3, y.N4, y.N5, y.N6, y.N7 );
}
}
//----------------------------------------------------------------------------
template< class ShapeType > struct assert_shape_is_rank_zero ;
template< class ShapeType > struct assert_shape_is_rank_one ;
template< unsigned Size >
struct assert_shape_is_rank_zero< Shape<Size,0> >
: public true_type {};
template< unsigned Size , unsigned s0 >
struct assert_shape_is_rank_one< Shape<Size,1,s0> >
: public true_type {};
//----------------------------------------------------------------------------
/** \brief Array bounds assertion templated on the execution space
* to allow device-specific abort code.
*/
template< class Space >
struct AssertShapeBoundsAbort ;
template<>
struct AssertShapeBoundsAbort< Kokkos::HostSpace >
{
static void apply( const size_t rank ,
const size_t n0 , const size_t n1 ,
const size_t n2 , const size_t n3 ,
const size_t n4 , const size_t n5 ,
const size_t n6 , const size_t n7 ,
const size_t arg_rank ,
const size_t i0 , const size_t i1 ,
const size_t i2 , const size_t i3 ,
const size_t i4 , const size_t i5 ,
const size_t i6 , const size_t i7 );
};
template< class ExecutionSpace >
struct AssertShapeBoundsAbort
{
KOKKOS_INLINE_FUNCTION
static void apply( const size_t rank ,
const size_t n0 , const size_t n1 ,
const size_t n2 , const size_t n3 ,
const size_t n4 , const size_t n5 ,
const size_t n6 , const size_t n7 ,
const size_t arg_rank ,
const size_t i0 , const size_t i1 ,
const size_t i2 , const size_t i3 ,
const size_t i4 , const size_t i5 ,
const size_t i6 , const size_t i7 )
{
AssertShapeBoundsAbort< Kokkos::HostSpace >
::apply( rank , n0 , n1 , n2 , n3 , n4 , n5 , n6 , n7 ,
arg_rank, i0 , i1 , i2 , i3 , i4 , i5 , i6 , i7 );
}
};
template< class ShapeType >
KOKKOS_INLINE_FUNCTION
void assert_shape_bounds( const ShapeType & shape ,
const size_t arg_rank ,
const size_t i0 ,
const size_t i1 = 0 ,
const size_t i2 = 0 ,
const size_t i3 = 0 ,
const size_t i4 = 0 ,
const size_t i5 = 0 ,
const size_t i6 = 0 ,
const size_t i7 = 0 )
{
// Must supply at least as many indices as ranks.
// Every index must be within bounds.
const bool ok = ShapeType::rank <= arg_rank &&
- i0 < shape.N0 &&
- i1 < shape.N1 &&
- i2 < shape.N2 &&
- i3 < shape.N3 &&
- i4 < shape.N4 &&
- i5 < shape.N5 &&
- i6 < shape.N6 &&
- i7 < shape.N7 ;
+ i0 < size_t(shape.N0) &&
+ i1 < size_t(shape.N1) &&
+ i2 < size_t(shape.N2) &&
+ i3 < size_t(shape.N3) &&
+ i4 < size_t(shape.N4) &&
+ i5 < size_t(shape.N5) &&
+ i6 < size_t(shape.N6) &&
+ i7 < size_t(shape.N7) ;
if ( ! ok ) {
AssertShapeBoundsAbort< Kokkos::Impl::ActiveExecutionMemorySpace >
::apply( ShapeType::rank ,
shape.N0 , shape.N1 , shape.N2 , shape.N3 ,
shape.N4 , shape.N5 , shape.N6 , shape.N7 ,
arg_rank , i0 , i1 , i2 , i3 , i4 , i5 , i6 , i7 );
}
}
#if defined( KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK )
#define KOKKOS_ASSERT_SHAPE_BOUNDS_1( S , I0 ) assert_shape_bounds(S,1,I0);
#define KOKKOS_ASSERT_SHAPE_BOUNDS_2( S , I0 , I1 ) assert_shape_bounds(S,2,I0,I1);
#define KOKKOS_ASSERT_SHAPE_BOUNDS_3( S , I0 , I1 , I2 ) assert_shape_bounds(S,3,I0,I1,I2);
#define KOKKOS_ASSERT_SHAPE_BOUNDS_4( S , I0 , I1 , I2 , I3 ) assert_shape_bounds(S,4,I0,I1,I2,I3);
#define KOKKOS_ASSERT_SHAPE_BOUNDS_5( S , I0 , I1 , I2 , I3 , I4 ) assert_shape_bounds(S,5,I0,I1,I2,I3,I4);
#define KOKKOS_ASSERT_SHAPE_BOUNDS_6( S , I0 , I1 , I2 , I3 , I4 , I5 ) assert_shape_bounds(S,6,I0,I1,I2,I3,I4,I5);
#define KOKKOS_ASSERT_SHAPE_BOUNDS_7( S , I0 , I1 , I2 , I3 , I4 , I5 , I6 ) assert_shape_bounds(S,7,I0,I1,I2,I3,I4,I5,I6);
#define KOKKOS_ASSERT_SHAPE_BOUNDS_8( S , I0 , I1 , I2 , I3 , I4 , I5 , I6 , I7 ) assert_shape_bounds(S,8,I0,I1,I2,I3,I4,I5,I6,I7);
#else
#define KOKKOS_ASSERT_SHAPE_BOUNDS_1( S , I0 ) /* */
#define KOKKOS_ASSERT_SHAPE_BOUNDS_2( S , I0 , I1 ) /* */
#define KOKKOS_ASSERT_SHAPE_BOUNDS_3( S , I0 , I1 , I2 ) /* */
#define KOKKOS_ASSERT_SHAPE_BOUNDS_4( S , I0 , I1 , I2 , I3 ) /* */
#define KOKKOS_ASSERT_SHAPE_BOUNDS_5( S , I0 , I1 , I2 , I3 , I4 ) /* */
#define KOKKOS_ASSERT_SHAPE_BOUNDS_6( S , I0 , I1 , I2 , I3 , I4 , I5 ) /* */
#define KOKKOS_ASSERT_SHAPE_BOUNDS_7( S , I0 , I1 , I2 , I3 , I4 , I5 , I6 ) /* */
#define KOKKOS_ASSERT_SHAPE_BOUNDS_8( S , I0 , I1 , I2 , I3 , I4 , I5 , I6 , I7 ) /* */
#endif
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
// Specialization and optimization for the Rank 0 shape.
template < unsigned ScalarSize >
struct Shape< ScalarSize , 0, 1,1,1,1, 1,1,1,1 >
{
enum { scalar_size = ScalarSize };
enum { rank_dynamic = 0 };
enum { rank = 0 };
enum { N0 = 1 };
enum { N1 = 1 };
enum { N2 = 1 };
enum { N3 = 1 };
enum { N4 = 1 };
enum { N5 = 1 };
enum { N6 = 1 };
enum { N7 = 1 };
KOKKOS_INLINE_FUNCTION
static
void assign( Shape & ,
unsigned = 0 , unsigned = 0 , unsigned = 0 , unsigned = 0 ,
unsigned = 0 , unsigned = 0 , unsigned = 0 , unsigned = 0 )
{}
};
//----------------------------------------------------------------------------
template< unsigned R > struct assign_shape_dimension ;
#define KOKKOS_ASSIGN_SHAPE_DIMENSION( R ) \
template<> \
struct assign_shape_dimension< R > \
{ \
template< class ShapeType > \
KOKKOS_INLINE_FUNCTION \
assign_shape_dimension( ShapeType & shape \
, typename Impl::enable_if<( R < ShapeType::rank_dynamic ), size_t >::type n \
) { shape.N ## R = n ; } \
};
KOKKOS_ASSIGN_SHAPE_DIMENSION(0)
KOKKOS_ASSIGN_SHAPE_DIMENSION(1)
KOKKOS_ASSIGN_SHAPE_DIMENSION(2)
KOKKOS_ASSIGN_SHAPE_DIMENSION(3)
KOKKOS_ASSIGN_SHAPE_DIMENSION(4)
KOKKOS_ASSIGN_SHAPE_DIMENSION(5)
KOKKOS_ASSIGN_SHAPE_DIMENSION(6)
KOKKOS_ASSIGN_SHAPE_DIMENSION(7)
#undef KOKKOS_ASSIGN_SHAPE_DIMENSION
//----------------------------------------------------------------------------
// All-static dimension array
template < unsigned ScalarSize ,
unsigned Rank ,
unsigned s0 ,
unsigned s1 ,
unsigned s2 ,
unsigned s3 ,
unsigned s4 ,
unsigned s5 ,
unsigned s6 ,
unsigned s7 >
struct Shape {
enum { scalar_size = ScalarSize };
enum { rank_dynamic = 0 };
enum { rank = Rank };
enum { N0 = s0 };
enum { N1 = s1 };
enum { N2 = s2 };
enum { N3 = s3 };
enum { N4 = s4 };
enum { N5 = s5 };
enum { N6 = s6 };
enum { N7 = s7 };
KOKKOS_INLINE_FUNCTION
static
void assign( Shape & ,
unsigned = 0 , unsigned = 0 , unsigned = 0 , unsigned = 0 ,
unsigned = 0 , unsigned = 0 , unsigned = 0 , unsigned = 0 )
{}
};
// 1 == dynamic_rank <= rank <= 8
template < unsigned ScalarSize ,
unsigned Rank ,
unsigned s1 ,
unsigned s2 ,
unsigned s3 ,
unsigned s4 ,
unsigned s5 ,
unsigned s6 ,
unsigned s7 >
struct Shape< ScalarSize , Rank , 0,s1,s2,s3, s4,s5,s6,s7 >
{
enum { scalar_size = ScalarSize };
enum { rank_dynamic = 1 };
enum { rank = Rank };
size_t N0 ; // For 1 == dynamic_rank allow N0 > 2^32
enum { N1 = s1 };
enum { N2 = s2 };
enum { N3 = s3 };
enum { N4 = s4 };
enum { N5 = s5 };
enum { N6 = s6 };
enum { N7 = s7 };
KOKKOS_INLINE_FUNCTION
static
void assign( Shape & s ,
size_t n0 , unsigned = 0 , unsigned = 0 , unsigned = 0 ,
unsigned = 0 , unsigned = 0 , unsigned = 0 , unsigned = 0 )
{ s.N0 = n0 ; }
};
// 2 == dynamic_rank <= rank <= 8
template < unsigned ScalarSize , unsigned Rank ,
unsigned s2 ,
unsigned s3 ,
unsigned s4 ,
unsigned s5 ,
unsigned s6 ,
unsigned s7 >
struct Shape< ScalarSize , Rank , 0,0,s2,s3, s4,s5,s6,s7 >
{
enum { scalar_size = ScalarSize };
enum { rank_dynamic = 2 };
enum { rank = Rank };
unsigned N0 ;
unsigned N1 ;
enum { N2 = s2 };
enum { N3 = s3 };
enum { N4 = s4 };
enum { N5 = s5 };
enum { N6 = s6 };
enum { N7 = s7 };
KOKKOS_INLINE_FUNCTION
static
void assign( Shape & s ,
unsigned n0 , unsigned n1 , unsigned = 0 , unsigned = 0 ,
unsigned = 0 , unsigned = 0 , unsigned = 0 , unsigned = 0 )
{ s.N0 = n0 ; s.N1 = n1 ; }
};
// 3 == dynamic_rank <= rank <= 8
template < unsigned Rank , unsigned ScalarSize ,
unsigned s3 ,
unsigned s4 ,
unsigned s5 ,
unsigned s6 ,
unsigned s7 >
struct Shape< ScalarSize , Rank , 0,0,0,s3, s4,s5,s6,s7>
{
enum { scalar_size = ScalarSize };
enum { rank_dynamic = 3 };
enum { rank = Rank };
unsigned N0 ;
unsigned N1 ;
unsigned N2 ;
enum { N3 = s3 };
enum { N4 = s4 };
enum { N5 = s5 };
enum { N6 = s6 };
enum { N7 = s7 };
KOKKOS_INLINE_FUNCTION
static
void assign( Shape & s ,
unsigned n0 , unsigned n1 , unsigned n2 , unsigned = 0 ,
unsigned = 0 , unsigned = 0 , unsigned = 0 , unsigned = 0 )
{ s.N0 = n0 ; s.N1 = n1 ; s.N2 = n2 ; }
};
// 4 == dynamic_rank <= rank <= 8
template < unsigned ScalarSize , unsigned Rank ,
unsigned s4 ,
unsigned s5 ,
unsigned s6 ,
unsigned s7 >
struct Shape< ScalarSize , Rank, 0,0,0,0, s4,s5,s6,s7 >
{
enum { scalar_size = ScalarSize };
enum { rank_dynamic = 4 };
enum { rank = Rank };
unsigned N0 ;
unsigned N1 ;
unsigned N2 ;
unsigned N3 ;
enum { N4 = s4 };
enum { N5 = s5 };
enum { N6 = s6 };
enum { N7 = s7 };
KOKKOS_INLINE_FUNCTION
static
void assign( Shape & s ,
unsigned n0 , unsigned n1 , unsigned n2 , unsigned n3 ,
unsigned = 0 , unsigned = 0 , unsigned = 0 , unsigned = 0 )
{ s.N0 = n0 ; s.N1 = n1 ; s.N2 = n2 ; s.N3 = n3 ; }
};
// 5 == dynamic_rank <= rank <= 8
template < unsigned ScalarSize , unsigned Rank ,
unsigned s5 ,
unsigned s6 ,
unsigned s7 >
struct Shape< ScalarSize , Rank , 0,0,0,0, 0,s5,s6,s7 >
{
enum { scalar_size = ScalarSize };
enum { rank_dynamic = 5 };
enum { rank = Rank };
unsigned N0 ;
unsigned N1 ;
unsigned N2 ;
unsigned N3 ;
unsigned N4 ;
enum { N5 = s5 };
enum { N6 = s6 };
enum { N7 = s7 };
KOKKOS_INLINE_FUNCTION
static
void assign( Shape & s ,
unsigned n0 , unsigned n1 , unsigned n2 , unsigned n3 ,
unsigned n4 , unsigned = 0 , unsigned = 0 , unsigned = 0 )
{ s.N0 = n0 ; s.N1 = n1 ; s.N2 = n2 ; s.N3 = n3 ; s.N4 = n4 ; }
};
// 6 == dynamic_rank <= rank <= 8
template < unsigned ScalarSize , unsigned Rank ,
unsigned s6 ,
unsigned s7 >
struct Shape< ScalarSize , Rank , 0,0,0,0, 0,0,s6,s7 >
{
enum { scalar_size = ScalarSize };
enum { rank_dynamic = 6 };
enum { rank = Rank };
unsigned N0 ;
unsigned N1 ;
unsigned N2 ;
unsigned N3 ;
unsigned N4 ;
unsigned N5 ;
enum { N6 = s6 };
enum { N7 = s7 };
KOKKOS_INLINE_FUNCTION
static
void assign( Shape & s ,
unsigned n0 , unsigned n1 , unsigned n2 , unsigned n3 ,
unsigned n4 , unsigned n5 = 0 , unsigned = 0 , unsigned = 0 )
{
s.N0 = n0 ; s.N1 = n1 ; s.N2 = n2 ; s.N3 = n3 ;
s.N4 = n4 ; s.N5 = n5 ;
}
};
// 7 == dynamic_rank <= rank <= 8
template < unsigned ScalarSize , unsigned Rank ,
unsigned s7 >
struct Shape< ScalarSize , Rank , 0,0,0,0, 0,0,0,s7 >
{
enum { scalar_size = ScalarSize };
enum { rank_dynamic = 7 };
enum { rank = Rank };
unsigned N0 ;
unsigned N1 ;
unsigned N2 ;
unsigned N3 ;
unsigned N4 ;
unsigned N5 ;
unsigned N6 ;
enum { N7 = s7 };
KOKKOS_INLINE_FUNCTION
static
void assign( Shape & s ,
unsigned n0 , unsigned n1 , unsigned n2 , unsigned n3 ,
unsigned n4 , unsigned n5 , unsigned n6 , unsigned = 0 )
{
s.N0 = n0 ; s.N1 = n1 ; s.N2 = n2 ; s.N3 = n3 ;
s.N4 = n4 ; s.N5 = n5 ; s.N6 = n6 ;
}
};
// 8 == dynamic_rank <= rank <= 8
template < unsigned ScalarSize >
struct Shape< ScalarSize , 8 , 0,0,0,0, 0,0,0,0 >
{
enum { scalar_size = ScalarSize };
enum { rank_dynamic = 8 };
enum { rank = 8 };
unsigned N0 ;
unsigned N1 ;
unsigned N2 ;
unsigned N3 ;
unsigned N4 ;
unsigned N5 ;
unsigned N6 ;
unsigned N7 ;
KOKKOS_INLINE_FUNCTION
static
void assign( Shape & s ,
unsigned n0 , unsigned n1 , unsigned n2 , unsigned n3 ,
unsigned n4 , unsigned n5 , unsigned n6 , unsigned n7 )
{
s.N0 = n0 ; s.N1 = n1 ; s.N2 = n2 ; s.N3 = n3 ;
s.N4 = n4 ; s.N5 = n5 ; s.N6 = n6 ; s.N7 = n7 ;
}
};
//----------------------------------------------------------------------------
template< class ShapeType , unsigned N ,
unsigned R = ShapeType::rank_dynamic >
struct ShapeInsert ;
template< class ShapeType , unsigned N >
struct ShapeInsert< ShapeType , N , 0 >
{
typedef Shape< ShapeType::scalar_size ,
ShapeType::rank + 1 ,
N ,
ShapeType::N0 ,
ShapeType::N1 ,
ShapeType::N2 ,
ShapeType::N3 ,
ShapeType::N4 ,
ShapeType::N5 ,
ShapeType::N6 > type ;
};
template< class ShapeType , unsigned N >
struct ShapeInsert< ShapeType , N , 1 >
{
typedef Shape< ShapeType::scalar_size ,
ShapeType::rank + 1 ,
0 ,
N ,
ShapeType::N1 ,
ShapeType::N2 ,
ShapeType::N3 ,
ShapeType::N4 ,
ShapeType::N5 ,
ShapeType::N6 > type ;
};
template< class ShapeType , unsigned N >
struct ShapeInsert< ShapeType , N , 2 >
{
typedef Shape< ShapeType::scalar_size ,
ShapeType::rank + 1 ,
0 ,
0 ,
N ,
ShapeType::N2 ,
ShapeType::N3 ,
ShapeType::N4 ,
ShapeType::N5 ,
ShapeType::N6 > type ;
};
template< class ShapeType , unsigned N >
struct ShapeInsert< ShapeType , N , 3 >
{
typedef Shape< ShapeType::scalar_size ,
ShapeType::rank + 1 ,
0 ,
0 ,
0 ,
N ,
ShapeType::N3 ,
ShapeType::N4 ,
ShapeType::N5 ,
ShapeType::N6 > type ;
};
template< class ShapeType , unsigned N >
struct ShapeInsert< ShapeType , N , 4 >
{
typedef Shape< ShapeType::scalar_size ,
ShapeType::rank + 1 ,
0 ,
0 ,
0 ,
0 ,
N ,
ShapeType::N4 ,
ShapeType::N5 ,
ShapeType::N6 > type ;
};
template< class ShapeType , unsigned N >
struct ShapeInsert< ShapeType , N , 5 >
{
typedef Shape< ShapeType::scalar_size ,
ShapeType::rank + 1 ,
0 ,
0 ,
0 ,
0 ,
0 ,
N ,
ShapeType::N5 ,
ShapeType::N6 > type ;
};
template< class ShapeType , unsigned N >
struct ShapeInsert< ShapeType , N , 6 >
{
typedef Shape< ShapeType::scalar_size ,
ShapeType::rank + 1 ,
0 ,
0 ,
0 ,
0 ,
0 ,
0 ,
N ,
ShapeType::N6 > type ;
};
template< class ShapeType , unsigned N >
struct ShapeInsert< ShapeType , N , 7 >
{
typedef Shape< ShapeType::scalar_size ,
ShapeType::rank + 1 ,
0 ,
0 ,
0 ,
0 ,
0 ,
0 ,
0 ,
N > type ;
};
//----------------------------------------------------------------------------
template< class DstShape , class SrcShape ,
unsigned DstRankDynamic = DstShape::rank_dynamic ,
bool DstRankDynamicOK = unsigned(DstShape::rank_dynamic) >= unsigned(SrcShape::rank_dynamic) >
struct ShapeCompatible { enum { value = false }; };
template< class DstShape , class SrcShape >
struct ShapeCompatible< DstShape , SrcShape , 8 , true >
{
enum { value = unsigned(DstShape::scalar_size) == unsigned(SrcShape::scalar_size) };
};
template< class DstShape , class SrcShape >
struct ShapeCompatible< DstShape , SrcShape , 7 , true >
{
enum { value = unsigned(DstShape::scalar_size) == unsigned(SrcShape::scalar_size) &&
unsigned(DstShape::N7) == unsigned(SrcShape::N7) };
};
template< class DstShape , class SrcShape >
struct ShapeCompatible< DstShape , SrcShape , 6 , true >
{
enum { value = unsigned(DstShape::scalar_size) == unsigned(SrcShape::scalar_size) &&
unsigned(DstShape::N6) == unsigned(SrcShape::N6) &&
unsigned(DstShape::N7) == unsigned(SrcShape::N7) };
};
template< class DstShape , class SrcShape >
struct ShapeCompatible< DstShape , SrcShape , 5 , true >
{
enum { value = unsigned(DstShape::scalar_size) == unsigned(SrcShape::scalar_size) &&
unsigned(DstShape::N5) == unsigned(SrcShape::N5) &&
unsigned(DstShape::N6) == unsigned(SrcShape::N6) &&
unsigned(DstShape::N7) == unsigned(SrcShape::N7) };
};
template< class DstShape , class SrcShape >
struct ShapeCompatible< DstShape , SrcShape , 4 , true >
{
enum { value = unsigned(DstShape::scalar_size) == unsigned(SrcShape::scalar_size) &&
unsigned(DstShape::N4) == unsigned(SrcShape::N4) &&
unsigned(DstShape::N5) == unsigned(SrcShape::N5) &&
unsigned(DstShape::N6) == unsigned(SrcShape::N6) &&
unsigned(DstShape::N7) == unsigned(SrcShape::N7) };
};
template< class DstShape , class SrcShape >
struct ShapeCompatible< DstShape , SrcShape , 3 , true >
{
enum { value = unsigned(DstShape::scalar_size) == unsigned(SrcShape::scalar_size) &&
unsigned(DstShape::N3) == unsigned(SrcShape::N3) &&
unsigned(DstShape::N4) == unsigned(SrcShape::N4) &&
unsigned(DstShape::N5) == unsigned(SrcShape::N5) &&
unsigned(DstShape::N6) == unsigned(SrcShape::N6) &&
unsigned(DstShape::N7) == unsigned(SrcShape::N7) };
};
template< class DstShape , class SrcShape >
struct ShapeCompatible< DstShape , SrcShape , 2 , true >
{
enum { value = unsigned(DstShape::scalar_size) == unsigned(SrcShape::scalar_size) &&
unsigned(DstShape::N2) == unsigned(SrcShape::N2) &&
unsigned(DstShape::N3) == unsigned(SrcShape::N3) &&
unsigned(DstShape::N4) == unsigned(SrcShape::N4) &&
unsigned(DstShape::N5) == unsigned(SrcShape::N5) &&
unsigned(DstShape::N6) == unsigned(SrcShape::N6) &&
unsigned(DstShape::N7) == unsigned(SrcShape::N7) };
};
template< class DstShape , class SrcShape >
struct ShapeCompatible< DstShape , SrcShape , 1 , true >
{
enum { value = unsigned(DstShape::scalar_size) == unsigned(SrcShape::scalar_size) &&
unsigned(DstShape::N1) == unsigned(SrcShape::N1) &&
unsigned(DstShape::N2) == unsigned(SrcShape::N2) &&
unsigned(DstShape::N3) == unsigned(SrcShape::N3) &&
unsigned(DstShape::N4) == unsigned(SrcShape::N4) &&
unsigned(DstShape::N5) == unsigned(SrcShape::N5) &&
unsigned(DstShape::N6) == unsigned(SrcShape::N6) &&
unsigned(DstShape::N7) == unsigned(SrcShape::N7) };
};
template< class DstShape , class SrcShape >
struct ShapeCompatible< DstShape , SrcShape , 0 , true >
{
enum { value = unsigned(DstShape::scalar_size) == unsigned(SrcShape::scalar_size) &&
unsigned(DstShape::N0) == unsigned(SrcShape::N0) &&
unsigned(DstShape::N1) == unsigned(SrcShape::N1) &&
unsigned(DstShape::N2) == unsigned(SrcShape::N2) &&
unsigned(DstShape::N3) == unsigned(SrcShape::N3) &&
unsigned(DstShape::N4) == unsigned(SrcShape::N4) &&
unsigned(DstShape::N5) == unsigned(SrcShape::N5) &&
unsigned(DstShape::N6) == unsigned(SrcShape::N6) &&
unsigned(DstShape::N7) == unsigned(SrcShape::N7) };
};
} /* namespace Impl */
} /* namespace Kokkos */
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
template< unsigned ScalarSize , unsigned Rank ,
unsigned s0 , unsigned s1 , unsigned s2 , unsigned s3 ,
unsigned s4 , unsigned s5 , unsigned s6 , unsigned s7 ,
typename iType >
KOKKOS_INLINE_FUNCTION
size_t dimension(
const Shape<ScalarSize,Rank,s0,s1,s2,s3,s4,s5,s6,s7> & shape ,
const iType & r )
{
return 0 == r ? shape.N0 : (
1 == r ? shape.N1 : (
2 == r ? shape.N2 : (
3 == r ? shape.N3 : (
4 == r ? shape.N4 : (
5 == r ? shape.N5 : (
6 == r ? shape.N6 : (
7 == r ? shape.N7 : 1 )))))));
}
template< unsigned ScalarSize , unsigned Rank ,
unsigned s0 , unsigned s1 , unsigned s2 , unsigned s3 ,
unsigned s4 , unsigned s5 , unsigned s6 , unsigned s7 >
KOKKOS_INLINE_FUNCTION
size_t cardinality_count(
const Shape<ScalarSize,Rank,s0,s1,s2,s3,s4,s5,s6,s7> & shape )
{
return size_t(shape.N0) * shape.N1 * shape.N2 * shape.N3 *
shape.N4 * shape.N5 * shape.N6 * shape.N7 ;
}
//----------------------------------------------------------------------------
} /* namespace Impl */
} /* namespace Kokkos */
#endif /* #ifndef KOKKOS_CORESHAPE_HPP */
diff --git a/lib/kokkos/core/src/impl/Kokkos_Synchronic.hpp b/lib/kokkos/core/src/impl/Kokkos_Synchronic.hpp
new file mode 100644
index 000000000..b2aea14df
--- /dev/null
+++ b/lib/kokkos/core/src/impl/Kokkos_Synchronic.hpp
@@ -0,0 +1,693 @@
+/*
+
+Copyright (c) 2014, NVIDIA Corporation
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+
+1. Redistributions of source code must retain the above copyright notice, this
+list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright notice,
+this list of conditions and the following disclaimer in the documentation
+and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+OF THE POSSIBILITY OF SUCH DAMAGE.
+
+*/
+
+#ifndef KOKKOS_SYNCHRONIC_HPP
+#define KOKKOS_SYNCHRONIC_HPP
+
+#include <impl/Kokkos_Synchronic_Config.hpp>
+
+#include <atomic>
+#include <chrono>
+#include <thread>
+#include <functional>
+#include <algorithm>
+
+namespace Kokkos {
+namespace Impl {
+
+enum notify_hint {
+ notify_all,
+ notify_one,
+ notify_none
+};
+enum expect_hint {
+ expect_urgent,
+ expect_delay
+};
+
+namespace Details {
+
+template <class S, class T>
+bool __synchronic_spin_wait_for_update(S const& arg, T const& nval, int attempts) noexcept {
+ int i = 0;
+ for(;i < __SYNCHRONIC_SPIN_RELAX(attempts); ++i)
+ if(__builtin_expect(arg.load(std::memory_order_relaxed) != nval,1))
+ return true;
+ else
+ __synchronic_relax();
+ for(;i < attempts; ++i)
+ if(__builtin_expect(arg.load(std::memory_order_relaxed) != nval,1))
+ return true;
+ else
+ __synchronic_yield();
+ return false;
+}
+
+struct __exponential_backoff {
+ __exponential_backoff(int arg_maximum=512) : maximum(arg_maximum), microseconds(8), x(123456789), y(362436069), z(521288629) {
+ }
+ static inline void sleep_for(std::chrono::microseconds const& time) {
+ auto t = time.count();
+ if(__builtin_expect(t > 75,0)) {
+ portable_sleep(time);
+ }
+ else if(__builtin_expect(t > 25,0))
+ __synchronic_yield();
+ else
+ __synchronic_relax();
+ }
+ void sleep_for_step() {
+ sleep_for(step());
+ }
+ std::chrono::microseconds step() {
+ float const f = ranfu();
+ int const t = int(microseconds * f);
+ if(__builtin_expect(f >= 0.95f,0))
+ microseconds = 8;
+ else
+ microseconds = (std::min)(microseconds>>1,maximum);
+ return std::chrono::microseconds(t);
+ }
+private :
+ int maximum, microseconds, x, y, z;
+ int xorshf96() {
+ int t;
+ x ^= x << 16; x ^= x >> 5; x ^= x << 1;
+ t = x; x = y; y = z; z = t ^ x ^ y;
+ return z;
+ }
+ float ranfu() {
+ return (float)(xorshf96()&(~0UL>>1)) / (float)(~0UL>>1);
+ }
+};
+
+template <class T, class Enable = void>
+struct __synchronic_base {
+
+protected:
+ std::atomic<T> atom;
+
+ void notify(notify_hint = notify_all) noexcept {
+ }
+ void notify(notify_hint = notify_all) volatile noexcept {
+ }
+
+public :
+ __synchronic_base() noexcept = default;
+ constexpr __synchronic_base(T v) noexcept : atom(v) { }
+ __synchronic_base(const __synchronic_base&) = delete;
+ ~__synchronic_base() { }
+ __synchronic_base& operator=(const __synchronic_base&) = delete;
+ __synchronic_base& operator=(const __synchronic_base&) volatile = delete;
+
+ void expect_update(T val, expect_hint = expect_urgent) const noexcept {
+ if(__synchronic_spin_wait_for_update(atom, val, __SYNCHRONIC_SPIN_COUNT_A))
+ return;
+ __exponential_backoff b;
+ while(atom.load(std::memory_order_relaxed) == val) {
+ __do_backoff(b);
+ if(__synchronic_spin_wait_for_update(atom, val, __SYNCHRONIC_SPIN_COUNT_B))
+ return;
+ }
+ }
+ void expect_update(T val, expect_hint = expect_urgent) const volatile noexcept {
+ if(__synchronic_spin_wait_for_update(atom, val, __SYNCHRONIC_SPIN_COUNT_A))
+ return;
+ __exponential_backoff b;
+ while(atom.load(std::memory_order_relaxed) == val) {
+ __do_backoff(b);
+ if(__synchronic_spin_wait_for_update(atom, val, __SYNCHRONIC_SPIN_COUNT_B))
+ return;
+ }
+ }
+
+ template <class Clock, class Duration>
+ void expect_update_until(T val, std::chrono::time_point<Clock,Duration> const& then, expect_hint = expect_urgent) const {
+ if(__synchronic_spin_wait_for_update(atom, val, __SYNCHRONIC_SPIN_COUNT_A))
+ return;
+ __exponential_backoff b;
+ std::chrono::milliseconds remains = then - std::chrono::high_resolution_clock::now();
+ while(remains > std::chrono::milliseconds::zero() && atom.load(std::memory_order_relaxed) == val) {
+ __do_backoff(b);
+ if(__synchronic_spin_wait_for_update(atom, val, __SYNCHRONIC_SPIN_COUNT_B))
+ return;
+ remains = then - std::chrono::high_resolution_clock::now();
+ }
+ }
+ template <class Clock, class Duration>
+ void expect_update_until(T val, std::chrono::time_point<Clock,Duration> const& then, expect_hint = expect_urgent) const volatile {
+ if(__synchronic_spin_wait_for_update(atom, val, __SYNCHRONIC_SPIN_COUNT_A))
+ return;
+ __exponential_backoff b;
+ std::chrono::milliseconds remains = then - std::chrono::high_resolution_clock::now();
+ while(remains > std::chrono::milliseconds::zero() && atom.load(std::memory_order_relaxed) == val) {
+ __do_backoff(b);
+ if(__synchronic_spin_wait_for_update(atom, val, __SYNCHRONIC_SPIN_COUNT_B))
+ return;
+ remains = then - std::chrono::high_resolution_clock::now();
+ }
+ }
+};
+
+#ifdef __SYNCHRONIC_COMPATIBLE
+template <class T>
+struct __synchronic_base<T, typename std::enable_if<__SYNCHRONIC_COMPATIBLE(T)>::type> {
+
+public:
+ std::atomic<T> atom;
+
+ void notify(notify_hint hint = notify_all) noexcept {
+ if(__builtin_expect(hint == notify_none,1))
+ return;
+ auto const x = count.fetch_add(0,std::memory_order_acq_rel);
+ if(__builtin_expect(x,0)) {
+ if(__builtin_expect(hint == notify_all,1))
+ __synchronic_wake_all(&atom);
+ else
+ __synchronic_wake_one(&atom);
+ }
+ }
+ void notify(notify_hint hint = notify_all) volatile noexcept {
+ if(__builtin_expect(hint == notify_none,1))
+ return;
+ auto const x = count.fetch_add(0,std::memory_order_acq_rel);
+ if(__builtin_expect(x,0)) {
+ if(__builtin_expect(hint == notify_all,1))
+ __synchronic_wake_all_volatile(&atom);
+ else
+ __synchronic_wake_one_volatile(&atom);
+ }
+ }
+
+public :
+ __synchronic_base() noexcept : count(0) { }
+ constexpr __synchronic_base(T v) noexcept : atom(v), count(0) { }
+ __synchronic_base(const __synchronic_base&) = delete;
+ ~__synchronic_base() { }
+ __synchronic_base& operator=(const __synchronic_base&) = delete;
+ __synchronic_base& operator=(const __synchronic_base&) volatile = delete;
+
+ void expect_update(T val, expect_hint = expect_urgent) const noexcept {
+ if(__builtin_expect(__synchronic_spin_wait_for_update(atom, val,__SYNCHRONIC_SPIN_COUNT_A),1))
+ return;
+ while(__builtin_expect(atom.load(std::memory_order_relaxed) == val,1)) {
+ count.fetch_add(1,std::memory_order_release);
+ __synchronic_wait(&atom,val);
+ count.fetch_add(-1,std::memory_order_acquire);
+ }
+ }
+ void expect_update(T val, expect_hint = expect_urgent) const volatile noexcept {
+ if(__builtin_expect(__synchronic_spin_wait_for_update(atom, val,__SYNCHRONIC_SPIN_COUNT_A),1))
+ return;
+ while(__builtin_expect(atom.load(std::memory_order_relaxed) == val,1)) {
+ count.fetch_add(1,std::memory_order_release);
+ __synchronic_wait_volatile(&atom,val);
+ count.fetch_add(-1,std::memory_order_acquire);
+ }
+ }
+
+ template <class Clock, class Duration>
+ void expect_update_until(T val, std::chrono::time_point<Clock,Duration> const& then, expect_hint = expect_urgent) const {
+ if(__builtin_expect(__synchronic_spin_wait_for_update(atom, val,__SYNCHRONIC_SPIN_COUNT_A),1))
+ return;
+ std::chrono::milliseconds remains = then - std::chrono::high_resolution_clock::now();
+ while(__builtin_expect(remains > std::chrono::milliseconds::zero() && atom.load(std::memory_order_relaxed) == val,1)) {
+ count.fetch_add(1,std::memory_order_release);
+ __synchronic_wait_timed(&atom,val,remains);
+ count.fetch_add(-1,std::memory_order_acquire);
+ remains = then - std::chrono::high_resolution_clock::now();
+ }
+ }
+ template <class Clock, class Duration>
+ void expect_update_until(T val, std::chrono::time_point<Clock,Duration> const& then, expect_hint = expect_urgent) const volatile {
+ if(__builtin_expect(__synchronic_spin_wait_for_update(atom, val,__SYNCHRONIC_SPIN_COUNT_A),1))
+ return;
+ std::chrono::milliseconds remains = then - std::chrono::high_resolution_clock::now();
+ while(__builtin_expect(remains > std::chrono::milliseconds::zero() && atom.load(std::memory_order_relaxed) == val,1)) {
+ count.fetch_add(1,std::memory_order_release);
+ __synchronic_wait_timed_volatile(&atom,val,remains);
+ count.fetch_add(-1,std::memory_order_acquire);
+ remains = then - std::chrono::high_resolution_clock::now();
+ }
+ }
+private:
+ mutable std::atomic<int> count;
+};
+#endif
+
+template <class T, class Enable = void>
+struct __synchronic : public __synchronic_base<T> {
+
+ __synchronic() noexcept = default;
+ constexpr __synchronic(T v) noexcept : __synchronic_base<T>(v) { }
+ __synchronic(const __synchronic&) = delete;
+ __synchronic& operator=(const __synchronic&) = delete;
+ __synchronic& operator=(const __synchronic&) volatile = delete;
+};
+
+template <class T>
+struct __synchronic<T,typename std::enable_if<std::is_integral<T>::value>::type> : public __synchronic_base<T> {
+
+ T fetch_add(T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) volatile noexcept {
+ auto const t = this->atom.fetch_add(v,m);
+ this->notify(n);
+ return t;
+ }
+ T fetch_add(T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) noexcept {
+ auto const t = this->atom.fetch_add(v,m);
+ this->notify(n);
+ return t;
+ }
+ T fetch_sub(T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) volatile noexcept {
+ auto const t = this->atom.fetch_sub(v,m);
+ this->notify(n);
+ return t;
+ }
+ T fetch_sub(T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) noexcept {
+ auto const t = this->atom.fetch_sub(v,m);
+ this->notify(n);
+ return t;
+ }
+ T fetch_and(T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) volatile noexcept {
+ auto const t = this->atom.fetch_and(v,m);
+ this->notify(n);
+ return t;
+ }
+ T fetch_and(T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) noexcept {
+ auto const t = this->atom.fetch_and(v,m);
+ this->notify(n);
+ return t;
+ }
+ T fetch_or(T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) volatile noexcept {
+ auto const t = this->atom.fetch_or(v,m);
+ this->notify(n);
+ return t;
+ }
+ T fetch_or(T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) noexcept {
+ auto const t = this->atom.fetch_or(v,m);
+ this->notify(n);
+ return t;
+ }
+ T fetch_xor(T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) volatile noexcept {
+ auto const t = this->atom.fetch_xor(v,m);
+ this->notify(n);
+ return t;
+ }
+ T fetch_xor(T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) noexcept {
+ auto const t = this->atom.fetch_xor(v,m);
+ this->notify(n);
+ return t;
+ }
+
+ __synchronic() noexcept = default;
+ constexpr __synchronic(T v) noexcept : __synchronic_base<T>(v) { }
+ __synchronic(const __synchronic&) = delete;
+ __synchronic& operator=(const __synchronic&) = delete;
+ __synchronic& operator=(const __synchronic&) volatile = delete;
+
+ T operator=(T v) volatile noexcept {
+ auto const t = this->atom = v;
+ this->notify();
+ return t;
+ }
+ T operator=(T v) noexcept {
+ auto const t = this->atom = v;
+ this->notify();
+ return t;
+ }
+ T operator++(int) volatile noexcept {
+ auto const t = ++this->atom;
+ this->notify();
+ return t;
+ }
+ T operator++(int) noexcept {
+ auto const t = ++this->atom;
+ this->notify();
+ return t;
+ }
+ T operator--(int) volatile noexcept {
+ auto const t = --this->atom;
+ this->notify();
+ return t;
+ }
+ T operator--(int) noexcept {
+ auto const t = --this->atom;
+ this->notify();
+ return t;
+ }
+ T operator++() volatile noexcept {
+ auto const t = this->atom++;
+ this->notify();
+ return t;
+ }
+ T operator++() noexcept {
+ auto const t = this->atom++;
+ this->notify();
+ return t;
+ }
+ T operator--() volatile noexcept {
+ auto const t = this->atom--;
+ this->notify();
+ return t;
+ }
+ T operator--() noexcept {
+ auto const t = this->atom--;
+ this->notify();
+ return t;
+ }
+ T operator+=(T v) volatile noexcept {
+ auto const t = this->atom += v;
+ this->notify();
+ return t;
+ }
+ T operator+=(T v) noexcept {
+ auto const t = this->atom += v;
+ this->notify();
+ return t;
+ }
+ T operator-=(T v) volatile noexcept {
+ auto const t = this->atom -= v;
+ this->notify();
+ return t;
+ }
+ T operator-=(T v) noexcept {
+ auto const t = this->atom -= v;
+ this->notify();
+ return t;
+ }
+ T operator&=(T v) volatile noexcept {
+ auto const t = this->atom &= v;
+ this->notify();
+ return t;
+ }
+ T operator&=(T v) noexcept {
+ auto const t = this->atom &= v;
+ this->notify();
+ return t;
+ }
+ T operator|=(T v) volatile noexcept {
+ auto const t = this->atom |= v;
+ this->notify();
+ return t;
+ }
+ T operator|=(T v) noexcept {
+ auto const t = this->atom |= v;
+ this->notify();
+ return t;
+ }
+ T operator^=(T v) volatile noexcept {
+ auto const t = this->atom ^= v;
+ this->notify();
+ return t;
+ }
+ T operator^=(T v) noexcept {
+ auto const t = this->atom ^= v;
+ this->notify();
+ return t;
+ }
+};
+
+template <class T>
+struct __synchronic<T*> : public __synchronic_base<T*> {
+
+ T* fetch_add(ptrdiff_t v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) volatile noexcept {
+ auto const t = this->atom.fetch_add(v,m);
+ this->notify(n);
+ return t;
+ }
+ T* fetch_add(ptrdiff_t v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) noexcept {
+ auto const t = this->atom.fetch_add(v,m);
+ this->notify(n);
+ return t;
+ }
+ T* fetch_sub(ptrdiff_t v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) volatile noexcept {
+ auto const t = this->atom.fetch_sub(v,m);
+ this->notify(n);
+ return t;
+ }
+ T* fetch_sub(ptrdiff_t v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) noexcept {
+ auto const t = this->atom.fetch_sub(v,m);
+ this->notify(n);
+ return t;
+ }
+
+ __synchronic() noexcept = default;
+ constexpr __synchronic(T* v) noexcept : __synchronic_base<T*>(v) { }
+ __synchronic(const __synchronic&) = delete;
+ __synchronic& operator=(const __synchronic&) = delete;
+ __synchronic& operator=(const __synchronic&) volatile = delete;
+
+ T* operator=(T* v) volatile noexcept {
+ auto const t = this->atom = v;
+ this->notify();
+ return t;
+ }
+ T* operator=(T* v) noexcept {
+ auto const t = this->atom = v;
+ this->notify();
+ return t;
+ }
+ T* operator++(int) volatile noexcept {
+ auto const t = ++this->atom;
+ this->notify();
+ return t;
+ }
+ T* operator++(int) noexcept {
+ auto const t = ++this->atom;
+ this->notify();
+ return t;
+ }
+ T* operator--(int) volatile noexcept {
+ auto const t = --this->atom;
+ this->notify();
+ return t;
+ }
+ T* operator--(int) noexcept {
+ auto const t = --this->atom;
+ this->notify();
+ return t;
+ }
+ T* operator++() volatile noexcept {
+ auto const t = this->atom++;
+ this->notify();
+ return t;
+ }
+ T* operator++() noexcept {
+ auto const t = this->atom++;
+ this->notify();
+ return t;
+ }
+ T* operator--() volatile noexcept {
+ auto const t = this->atom--;
+ this->notify();
+ return t;
+ }
+ T* operator--() noexcept {
+ auto const t = this->atom--;
+ this->notify();
+ return t;
+ }
+ T* operator+=(ptrdiff_t v) volatile noexcept {
+ auto const t = this->atom += v;
+ this->notify();
+ return t;
+ }
+ T* operator+=(ptrdiff_t v) noexcept {
+ auto const t = this->atom += v;
+ this->notify();
+ return t;
+ }
+ T* operator-=(ptrdiff_t v) volatile noexcept {
+ auto const t = this->atom -= v;
+ this->notify();
+ return t;
+ }
+ T* operator-=(ptrdiff_t v) noexcept {
+ auto const t = this->atom -= v;
+ this->notify();
+ return t;
+ }
+};
+
+} //namespace Details
+
+template <class T>
+struct synchronic : public Details::__synchronic<T> {
+
+ bool is_lock_free() const volatile noexcept { return this->atom.is_lock_free(); }
+ bool is_lock_free() const noexcept { return this->atom.is_lock_free(); }
+ void store(T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) volatile noexcept {
+ this->atom.store(v,m);
+ this->notify(n);
+ }
+ void store(T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) noexcept {
+ this->atom.store(v,m);
+ this->notify(n);
+ }
+ T load(std::memory_order m = std::memory_order_seq_cst) const volatile noexcept { return this->atom.load(m); }
+ T load(std::memory_order m = std::memory_order_seq_cst) const noexcept { return this->atom.load(m); }
+
+ operator T() const volatile noexcept { return (T)this->atom; }
+ operator T() const noexcept { return (T)this->atom; }
+
+ T exchange(T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) volatile noexcept {
+ auto const t = this->atom.exchange(v,m);
+ this->notify(n);
+ return t;
+ }
+ T exchange(T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) noexcept {
+ auto const t = this->atom.exchange(v,m);
+ this->notify(n);
+ return t;
+ }
+ bool compare_exchange_weak(T& r, T v, std::memory_order m1, std::memory_order m2, notify_hint n = notify_all) volatile noexcept {
+ auto const t = this->atom.compare_exchange_weak(r,v,m1,m2);
+ this->notify(n);
+ return t;
+ }
+ bool compare_exchange_weak(T& r, T v, std::memory_order m1, std::memory_order m2, notify_hint n = notify_all) noexcept {
+ auto const t = this->atom.compare_exchange_weak(r,v,m1, m2);
+ this->notify(n);
+ return t;
+ }
+ bool compare_exchange_strong(T& r, T v, std::memory_order m1, std::memory_order m2, notify_hint n = notify_all) volatile noexcept {
+ auto const t = this->atom.compare_exchange_strong(r,v,m1,m2);
+ this->notify(n);
+ return t;
+ }
+ bool compare_exchange_strong(T& r, T v, std::memory_order m1, std::memory_order m2, notify_hint n = notify_all) noexcept {
+ auto const t = this->atom.compare_exchange_strong(r,v,m1,m2);
+ this->notify(n);
+ return t;
+ }
+ bool compare_exchange_weak(T& r, T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) volatile noexcept {
+ auto const t = this->atom.compare_exchange_weak(r,v,m);
+ this->notify(n);
+ return t;
+ }
+ bool compare_exchange_weak(T& r, T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) noexcept {
+ auto const t = this->atom.compare_exchange_weak(r,v,m);
+ this->notify(n);
+ return t;
+ }
+ bool compare_exchange_strong(T& r, T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) volatile noexcept {
+ auto const t = this->atom.compare_exchange_strong(r,v,m);
+ this->notify(n);
+ return t;
+ }
+ bool compare_exchange_strong(T& r, T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) noexcept {
+ auto const t = this->atom.compare_exchange_strong(r,v,m);
+ this->notify(n);
+ return t;
+ }
+
+ synchronic() noexcept = default;
+ constexpr synchronic(T val) noexcept : Details::__synchronic<T>(val) { }
+ synchronic(const synchronic&) = delete;
+ ~synchronic() { }
+ synchronic& operator=(const synchronic&) = delete;
+ synchronic& operator=(const synchronic&) volatile = delete;
+ T operator=(T val) noexcept {
+ return Details::__synchronic<T>::operator=(val);
+ }
+ T operator=(T val) volatile noexcept {
+ return Details::__synchronic<T>::operator=(val);
+ }
+
+ T load_when_not_equal(T val, std::memory_order order = std::memory_order_seq_cst, expect_hint h = expect_urgent) const noexcept {
+ Details::__synchronic<T>::expect_update(val,h);
+ return load(order);
+ }
+ T load_when_not_equal(T val, std::memory_order order = std::memory_order_seq_cst, expect_hint h = expect_urgent) const volatile noexcept {
+ Details::__synchronic<T>::expect_update(val,h);
+ return load(order);
+ }
+ T load_when_equal(T val, std::memory_order order = std::memory_order_seq_cst, expect_hint h = expect_urgent) const noexcept {
+ for(T nval = load(std::memory_order_relaxed); nval != val; nval = load(std::memory_order_relaxed))
+ Details::__synchronic<T>::expect_update(nval,h);
+ return load(order);
+ }
+ T load_when_equal(T val, std::memory_order order = std::memory_order_seq_cst, expect_hint h = expect_urgent) const volatile noexcept {
+ for(T nval = load(std::memory_order_relaxed); nval != val; nval = load(std::memory_order_relaxed))
+ expect_update(nval,h);
+ return load(order);
+ }
+ template <class Rep, class Period>
+ void expect_update_for(T val, std::chrono::duration<Rep,Period> const& delta, expect_hint h = expect_urgent) const {
+ Details::__synchronic<T>::expect_update_until(val, std::chrono::high_resolution_clock::now() + delta,h);
+ }
+ template < class Rep, class Period>
+ void expect_update_for(T val, std::chrono::duration<Rep,Period> const& delta, expect_hint h = expect_urgent) const volatile {
+ Details::__synchronic<T>::expect_update_until(val, std::chrono::high_resolution_clock::now() + delta,h);
+ }
+};
+
+#include <inttypes.h>
+
+typedef synchronic<char> synchronic_char;
+typedef synchronic<char> synchronic_schar;
+typedef synchronic<unsigned char> synchronic_uchar;
+typedef synchronic<short> synchronic_short;
+typedef synchronic<unsigned short> synchronic_ushort;
+typedef synchronic<int> synchronic_int;
+typedef synchronic<unsigned int> synchronic_uint;
+typedef synchronic<long> synchronic_long;
+typedef synchronic<unsigned long> synchronic_ulong;
+typedef synchronic<long long> synchronic_llong;
+typedef synchronic<unsigned long long> synchronic_ullong;
+//typedef synchronic<char16_t> synchronic_char16_t;
+//typedef synchronic<char32_t> synchronic_char32_t;
+typedef synchronic<wchar_t> synchronic_wchar_t;
+
+typedef synchronic<int_least8_t> synchronic_int_least8_t;
+typedef synchronic<uint_least8_t> synchronic_uint_least8_t;
+typedef synchronic<int_least16_t> synchronic_int_least16_t;
+typedef synchronic<uint_least16_t> synchronic_uint_least16_t;
+typedef synchronic<int_least32_t> synchronic_int_least32_t;
+typedef synchronic<uint_least32_t> synchronic_uint_least32_t;
+//typedef synchronic<int_least_64_t> synchronic_int_least_64_t;
+typedef synchronic<uint_least64_t> synchronic_uint_least64_t;
+typedef synchronic<int_fast8_t> synchronic_int_fast8_t;
+typedef synchronic<uint_fast8_t> synchronic_uint_fast8_t;
+typedef synchronic<int_fast16_t> synchronic_int_fast16_t;
+typedef synchronic<uint_fast16_t> synchronic_uint_fast16_t;
+typedef synchronic<int_fast32_t> synchronic_int_fast32_t;
+typedef synchronic<uint_fast32_t> synchronic_uint_fast32_t;
+typedef synchronic<int_fast64_t> synchronic_int_fast64_t;
+typedef synchronic<uint_fast64_t> synchronic_uint_fast64_t;
+typedef synchronic<intptr_t> synchronic_intptr_t;
+typedef synchronic<uintptr_t> synchronic_uintptr_t;
+typedef synchronic<size_t> synchronic_size_t;
+typedef synchronic<ptrdiff_t> synchronic_ptrdiff_t;
+typedef synchronic<intmax_t> synchronic_intmax_t;
+typedef synchronic<uintmax_t> synchronic_uintmax_t;
+
+}
+}
+
+#endif //__SYNCHRONIC_H
diff --git a/lib/kokkos/core/src/impl/Kokkos_Synchronic_Config.hpp b/lib/kokkos/core/src/impl/Kokkos_Synchronic_Config.hpp
new file mode 100644
index 000000000..0a6dd6e71
--- /dev/null
+++ b/lib/kokkos/core/src/impl/Kokkos_Synchronic_Config.hpp
@@ -0,0 +1,169 @@
+/*
+
+Copyright (c) 2014, NVIDIA Corporation
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+
+1. Redistributions of source code must retain the above copyright notice, this
+list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright notice,
+this list of conditions and the following disclaimer in the documentation
+and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+OF THE POSSIBILITY OF SUCH DAMAGE.
+
+*/
+
+#ifndef KOKKOS_SYNCHRONIC_CONFIG_H
+#define KOKKOS_SYNCHRONIC_CONFIG_H
+
+#include <thread>
+#include <chrono>
+
+namespace Kokkos {
+namespace Impl {
+
+//the default yield function used inside the implementation is the Standard one
+#define __synchronic_yield std::this_thread::yield
+#define __synchronic_relax __synchronic_yield
+
+#if defined(_MSC_VER)
+ //this is a handy GCC optimization that I use inside the implementation
+ #define __builtin_expect(condition,common) condition
+ #if _MSC_VER <= 1800
+ //using certain keywords that VC++ temporarily doesn't support
+ #define _ALLOW_KEYWORD_MACROS
+ #define noexcept
+ #define constexpr
+ #endif
+ //yes, I define multiple assignment operators
+ #pragma warning(disable:4522)
+ //I don't understand how Windows is so bad at timing functions, but is OK
+ //with straight-up yield loops
+ #define __do_backoff(b) __synchronic_yield()
+#else
+#define __do_backoff(b) b.sleep_for_step()
+#endif
+
+//certain platforms have efficient support for spin-waiting built into the operating system
+#if defined(__linux__) || (defined(_WIN32_WINNT) && _WIN32_WINNT >= 0x0602)
+#if defined(_WIN32_WINNT)
+#include <winsock2.h>
+#include <Windows.h>
+ //the combination of WaitOnAddress and WakeByAddressAll is supported on Windows 8.1+
+ #define __synchronic_wait(x,v) WaitOnAddress((PVOID)x,(PVOID)&v,sizeof(v),-1)
+ #define __synchronic_wait_timed(x,v,t) WaitOnAddress((PVOID)x,(PVOID)&v,sizeof(v),std::chrono::duration_cast<std::chrono::milliseconds>(t).count())
+ #define __synchronic_wake_one(x) WakeByAddressSingle((PVOID)x)
+ #define __synchronic_wake_all(x) WakeByAddressAll((PVOID)x)
+ #define __synchronic_wait_volatile(x,v) WaitOnAddress((PVOID)x,(PVOID)&v,sizeof(v),-1)
+ #define __synchronic_wait_timed_volatile(x,v,t) WaitOnAddress((PVOID)x,(PVOID)&v,sizeof(v),std::chrono::duration_cast<std::chrono::milliseconds>(t).count())
+ #define __synchronic_wake_one_volatile(x) WakeByAddressSingle((PVOID)x)
+ #define __synchronic_wake_all_volatile(x) WakeByAddressAll((PVOID)x)
+ #define __SYNCHRONIC_COMPATIBLE(x) (std::is_pod<x>::value && (sizeof(x) <= 8))
+
+ inline void native_sleep(unsigned long microseconds)
+ {
+ // What to do if microseconds is < 1000?
+ Sleep(microseconds / 1000);
+ }
+
+ inline void native_yield()
+ {
+ SwitchToThread();
+ }
+#elif defined(__linux__)
+ #include <chrono>
+ #include <time.h>
+ #include <unistd.h>
+ #include <pthread.h>
+ #include <linux/futex.h>
+ #include <sys/syscall.h>
+ #include <climits>
+ #include <cassert>
+ template < class Rep, class Period>
+ inline timespec to_timespec(std::chrono::duration<Rep,Period> const& delta) {
+ struct timespec ts;
+ ts.tv_sec = static_cast<long>(std::chrono::duration_cast<std::chrono::seconds>(delta).count());
+ assert(!ts.tv_sec);
+ ts.tv_nsec = static_cast<long>(std::chrono::duration_cast<std::chrono::nanoseconds>(delta).count());
+ return ts;
+ }
+ inline long futex(void const* addr1, int op, int val1) {
+ return syscall(SYS_futex, addr1, op, val1, 0, 0, 0);
+ }
+ inline long futex(void const* addr1, int op, int val1, struct timespec timeout) {
+ return syscall(SYS_futex, addr1, op, val1, &timeout, 0, 0);
+ }
+ inline void native_sleep(unsigned long microseconds)
+ {
+ usleep(microseconds);
+ }
+ inline void native_yield()
+ {
+ pthread_yield();
+ }
+
+ //the combination of SYS_futex(WAIT) and SYS_futex(WAKE) is supported on all recent Linux distributions
+ #define __synchronic_wait(x,v) futex(x, FUTEX_WAIT_PRIVATE, v)
+ #define __synchronic_wait_timed(x,v,t) futex(x, FUTEX_WAIT_PRIVATE, v, to_timespec(t))
+ #define __synchronic_wake_one(x) futex(x, FUTEX_WAKE_PRIVATE, 1)
+ #define __synchronic_wake_all(x) futex(x, FUTEX_WAKE_PRIVATE, INT_MAX)
+ #define __synchronic_wait_volatile(x,v) futex(x, FUTEX_WAIT, v)
+ #define __synchronic_wait_volatile_timed(x,v,t) futex(x, FUTEX_WAIT, v, to_timespec(t))
+ #define __synchronic_wake_one_volatile(x) futex(x, FUTEX_WAKE, 1)
+ #define __synchronic_wake_all_volatile(x) futex(x, FUTEX_WAKE, INT_MAX)
+ #define __SYNCHRONIC_COMPATIBLE(x) (std::is_integral<x>::value && (sizeof(x) <= 4))
+
+ //the yield function on Linux is better replaced by sched_yield, which is tuned for spin-waiting
+ #undef __synchronic_yield
+ #define __synchronic_yield sched_yield
+
+ //for extremely short wait times, just let another hyper-thread run
+ #undef __synchronic_relax
+ #define __synchronic_relax() asm volatile("rep; nop" ::: "memory")
+
+#endif
+#endif
+
+#ifdef _GLIBCXX_USE_NANOSLEEP
+inline void portable_sleep(std::chrono::microseconds const& time)
+{ std::this_thread::sleep_for(time); }
+#else
+inline void portable_sleep(std::chrono::microseconds const& time)
+{ native_sleep(time.count()); }
+#endif
+
+#ifdef _GLIBCXX_USE_SCHED_YIELD
+inline void portable_yield()
+{ std::this_thread::yield(); }
+#else
+inline void portable_yield()
+{ native_yield(); }
+#endif
+
+//this is the number of times we initially spin, on the first wait attempt
+#define __SYNCHRONIC_SPIN_COUNT_A 16
+
+//this is how decide to yield instead of just spinning, 'c' is the current trip count
+//#define __SYNCHRONIC_SPIN_YIELD(c) true
+#define __SYNCHRONIC_SPIN_RELAX(c) (c>>3)
+
+//this is the number of times we normally spin, on every subsequent wait attempt
+#define __SYNCHRONIC_SPIN_COUNT_B 8
+
+}
+}
+
+#endif //__SYNCHRONIC_CONFIG_H
diff --git a/lib/kokkos/core/src/impl/Kokkos_Synchronic_n3998.hpp b/lib/kokkos/core/src/impl/Kokkos_Synchronic_n3998.hpp
new file mode 100644
index 000000000..facc8d6d8
--- /dev/null
+++ b/lib/kokkos/core/src/impl/Kokkos_Synchronic_n3998.hpp
@@ -0,0 +1,162 @@
+/*
+
+Copyright (c) 2014, NVIDIA Corporation
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+
+1. Redistributions of source code must retain the above copyright notice, this
+list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright notice,
+this list of conditions and the following disclaimer in the documentation
+and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+OF THE POSSIBILITY OF SUCH DAMAGE.
+
+*/
+
+#ifndef KOKKOS_SYNCHRONIC_N3998_HPP
+#define KOKKOS_SYNCHRONIC_N3998_HPP
+
+#include <impl/Kokkos_Synchronic.hpp>
+#include <functional>
+
+/*
+In the section below, a synchronization point represents a point at which a
+thread may block until a given synchronization condition has been reached or
+at which it may notify other threads that a synchronization condition has
+been achieved.
+*/
+namespace Kokkos { namespace Impl {
+
+ /*
+ A latch maintains an internal counter that is initialized when the latch
+ is created. The synchronization condition is reached when the counter is
+ decremented to 0. Threads may block at a synchronization point waiting
+ for the condition to be reached. When the condition is reached, any such
+ blocked threads will be released.
+ */
+ struct latch {
+ latch(int val) : count(val), released(false) { }
+ latch(const latch&) = delete;
+ latch& operator=(const latch&) = delete;
+ ~latch( ) { }
+ void arrive( ) {
+ __arrive( );
+ }
+ void arrive_and_wait( ) {
+ if(!__arrive( ))
+ wait( );
+ }
+ void wait( ) {
+ while(!released.load_when_not_equal(false,std::memory_order_acquire))
+ ;
+ }
+ bool try_wait( ) {
+ return released.load(std::memory_order_acquire);
+ }
+ private:
+ bool __arrive( ) {
+ if(count.fetch_add(-1,std::memory_order_release)!=1)
+ return false;
+ released.store(true,std::memory_order_release);
+ return true;
+ }
+ std::atomic<int> count;
+ synchronic<bool> released;
+ };
+
+ /*
+ A barrier is created with an initial value representing the number of threads
+ that can arrive at the synchronization point. When that many threads have
+ arrived, the synchronization condition is reached and the threads are
+ released. The barrier will then reset, and may be reused for a new cycle, in
+ which the same set of threads may arrive again at the synchronization point.
+ The same set of threads shall arrive at the barrier in each cycle, otherwise
+ the behaviour is undefined.
+ */
+ struct barrier {
+ barrier(int val) : expected(val), arrived(0), nexpected(val), epoch(0) { }
+ barrier(const barrier&) = delete;
+ barrier& operator=(const barrier&) = delete;
+ ~barrier() { }
+ void arrive_and_wait() {
+ int const myepoch = epoch.load(std::memory_order_relaxed);
+ if(!__arrive(myepoch))
+ while(epoch.load_when_not_equal(myepoch,std::memory_order_acquire) == myepoch)
+ ;
+ }
+ void arrive_and_drop() {
+ nexpected.fetch_add(-1,std::memory_order_relaxed);
+ __arrive(epoch.load(std::memory_order_relaxed));
+ }
+ private:
+ bool __arrive(int const myepoch) {
+ int const myresult = arrived.fetch_add(1,std::memory_order_acq_rel) + 1;
+ if(__builtin_expect(myresult == expected,0)) {
+ expected = nexpected.load(std::memory_order_relaxed);
+ arrived.store(0,std::memory_order_relaxed);
+ epoch.store(myepoch+1,std::memory_order_release);
+ return true;
+ }
+ return false;
+ }
+ int expected;
+ std::atomic<int> arrived, nexpected;
+ synchronic<int> epoch;
+ };
+
+ /*
+ A notifying barrier behaves as a barrier, but is constructed with a callable
+ completion function that is invoked after all threads have arrived at the
+ synchronization point, and before the synchronization condition is reached.
+ The completion may modify the set of threads that arrives at the barrier in
+ each cycle.
+ */
+ struct notifying_barrier {
+ template <typename T>
+ notifying_barrier(int val, T && f) : expected(val), arrived(0), nexpected(val), epoch(0), completion(std::forward<T>(f)) { }
+ notifying_barrier(const notifying_barrier&) = delete;
+ notifying_barrier& operator=(const notifying_barrier&) = delete;
+ ~notifying_barrier( ) { }
+ void arrive_and_wait() {
+ int const myepoch = epoch.load(std::memory_order_relaxed);
+ if(!__arrive(myepoch))
+ while(epoch.load_when_not_equal(myepoch,std::memory_order_acquire) == myepoch)
+ ;
+ }
+ void arrive_and_drop() {
+ nexpected.fetch_add(-1,std::memory_order_relaxed);
+ __arrive(epoch.load(std::memory_order_relaxed));
+ }
+ private:
+ bool __arrive(int const myepoch) {
+ int const myresult = arrived.fetch_add(1,std::memory_order_acq_rel) + 1;
+ if(__builtin_expect(myresult == expected,0)) {
+ int const newexpected = completion();
+ expected = newexpected ? newexpected : nexpected.load(std::memory_order_relaxed);
+ arrived.store(0,std::memory_order_relaxed);
+ epoch.store(myepoch+1,std::memory_order_release);
+ return true;
+ }
+ return false;
+ }
+ int expected;
+ std::atomic<int> arrived, nexpected;
+ synchronic<int> epoch;
+ std::function<int()> completion;
+ };
+}}
+
+#endif //__N3998_H
diff --git a/lib/kokkos/core/src/impl/Kokkos_Tags.hpp b/lib/kokkos/core/src/impl/Kokkos_Tags.hpp
index 4885d3737..b7e6ba23a 100644
--- a/lib/kokkos/core/src/impl/Kokkos_Tags.hpp
+++ b/lib/kokkos/core/src/impl/Kokkos_Tags.hpp
@@ -1,156 +1,164 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_TAGS_HPP
#define KOKKOS_TAGS_HPP
#include <impl/Kokkos_Traits.hpp>
#include <Kokkos_Core_fwd.hpp>
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
-namespace Kokkos {
-//----------------------------------------------------------------------------
-
-template<class ExecutionSpace, class MemorySpace>
-struct Device {
- typedef ExecutionSpace execution_space;
- typedef MemorySpace memory_space;
- typedef Device<execution_space,memory_space> device_type;
-};
-}
-
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
template< class C , class Enable = void >
struct is_memory_space : public bool_< false > {};
template< class C , class Enable = void >
struct is_execution_space : public bool_< false > {};
template< class C , class Enable = void >
struct is_execution_policy : public bool_< false > {};
template< class C , class Enable = void >
struct is_array_layout : public Impl::false_type {};
template< class C , class Enable = void >
struct is_memory_traits : public Impl::false_type {};
template< class C >
struct is_memory_space< C , typename Impl::enable_if_type< typename C::memory_space >::type >
: public bool_< Impl::is_same< C , typename C::memory_space >::value > {};
template< class C >
struct is_execution_space< C , typename Impl::enable_if_type< typename C::execution_space >::type >
: public bool_< Impl::is_same< C , typename C::execution_space >::value > {};
template< class C >
struct is_execution_policy< C , typename Impl::enable_if_type< typename C::execution_policy >::type >
: public bool_< Impl::is_same< C , typename C::execution_policy >::value > {};
template< class C >
struct is_array_layout< C , typename Impl::enable_if_type< typename C::array_layout >::type >
: public bool_< Impl::is_same< C , typename C::array_layout >::value > {};
template< class C >
struct is_memory_traits< C , typename Impl::enable_if_type< typename C::memory_traits >::type >
: public bool_< Impl::is_same< C , typename C::memory_traits >::value > {};
+}
+}
+
+namespace Kokkos {
+//----------------------------------------------------------------------------
+
+template< class ExecutionSpace , class MemorySpace >
+struct Device {
+ static_assert( Impl::is_execution_space<ExecutionSpace>::value
+ , "Execution space is not valid" );
+ static_assert( Impl::is_memory_space<MemorySpace>::value
+ , "Memory space is not valid" );
+ typedef ExecutionSpace execution_space;
+ typedef MemorySpace memory_space;
+ typedef Device<execution_space,memory_space> device_type;
+};
+}
+namespace Kokkos {
+namespace Impl {
//----------------------------------------------------------------------------
template< class C , class Enable = void >
struct is_space : public Impl::false_type {};
template< class C >
struct is_space< C
, typename Impl::enable_if<(
Impl::is_same< C , typename C::execution_space >::value ||
Impl::is_same< C , typename C::memory_space >::value ||
Impl::is_same< C , Device<
typename C::execution_space,
typename C::memory_space> >::value
)>::type
>
: public Impl::true_type
{
typedef typename C::execution_space execution_space ;
typedef typename C::memory_space memory_space ;
// The host_memory_space defines a space with host-resident memory.
// If the execution space's memory space is host accessible then use that execution space.
// else use the HostSpace.
typedef
typename Impl::if_c< Impl::is_same< memory_space , HostSpace >::value
#ifdef KOKKOS_HAVE_CUDA
|| Impl::is_same< memory_space , CudaUVMSpace>::value
|| Impl::is_same< memory_space , CudaHostPinnedSpace>::value
#endif
, memory_space , HostSpace >::type
host_memory_space ;
// The host_execution_space defines a space which has access to HostSpace.
// If the execution space can access HostSpace then use that execution space.
// else use the DefaultHostExecutionSpace.
#ifdef KOKKOS_HAVE_CUDA
typedef
typename Impl::if_c< Impl::is_same< execution_space , Cuda >::value
, DefaultHostExecutionSpace , execution_space >::type
host_execution_space ;
#else
typedef execution_space host_execution_space;
#endif
typedef Device<host_execution_space,host_memory_space> host_mirror_space;
};
}
}
#endif
diff --git a/lib/kokkos/core/src/impl/Kokkos_Traits.hpp b/lib/kokkos/core/src/impl/Kokkos_Traits.hpp
index 52358842f..e2e02c3fa 100644
--- a/lib/kokkos/core/src/impl/Kokkos_Traits.hpp
+++ b/lib/kokkos/core/src/impl/Kokkos_Traits.hpp
@@ -1,370 +1,406 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOSTRAITS_HPP
#define KOKKOSTRAITS_HPP
#include <stddef.h>
#include <stdint.h>
#include <Kokkos_Macros.hpp>
namespace Kokkos {
namespace Impl {
+//----------------------------------------------------------------------------
+// Help with C++11 variadic argument packs
+
+template< unsigned I , class ... Args >
+struct variadic_type { typedef void type ; };
+
+template< class T , class ... Args >
+struct variadic_type< 0 , T , Args ... >
+ { typedef T type ; };
+
+template< unsigned I , class T , class ... Args >
+struct variadic_type< I , T , Args ... >
+ { typedef typename variadic_type< I - 1 , Args ... >::type type ; };
+
+//----------------------------------------------------------------------------
/* C++11 conformal compile-time type traits utilities.
* Prefer to use C++11 when portably available.
*/
//----------------------------------------------------------------------------
// C++11 Helpers:
template < class T , T v >
struct integral_constant
{
// Declaration of 'static const' causes an unresolved linker symbol in debug
// static const T value = v ;
enum { value = T(v) };
typedef T value_type;
typedef integral_constant<T,v> type;
KOKKOS_INLINE_FUNCTION operator T() { return v ; }
};
typedef integral_constant<bool,false> false_type ;
typedef integral_constant<bool,true> true_type ;
//----------------------------------------------------------------------------
// C++11 Type relationships:
template< class X , class Y > struct is_same : public false_type {};
template< class X > struct is_same<X,X> : public true_type {};
//----------------------------------------------------------------------------
// C++11 Type properties:
template <typename T> struct is_const : public false_type {};
template <typename T> struct is_const<const T> : public true_type {};
template <typename T> struct is_const<const T & > : public true_type {};
template <typename T> struct is_array : public false_type {};
template <typename T> struct is_array< T[] > : public true_type {};
template <typename T, unsigned N > struct is_array< T[N] > : public true_type {};
//----------------------------------------------------------------------------
// C++11 Type transformations:
template <typename T> struct remove_const { typedef T type; };
template <typename T> struct remove_const<const T> { typedef T type; };
template <typename T> struct remove_const<const T & > { typedef T & type; };
template <typename T> struct add_const { typedef const T type; };
template <typename T> struct add_const<T & > { typedef const T & type; };
template <typename T> struct add_const<const T> { typedef const T type; };
template <typename T> struct add_const<const T & > { typedef const T & type; };
template <typename T> struct remove_reference { typedef T type ; };
template <typename T> struct remove_reference< T & > { typedef T type ; };
template <typename T> struct remove_reference< const T & > { typedef const T type ; };
template <typename T> struct remove_extent { typedef T type ; };
template <typename T> struct remove_extent<T[]> { typedef T type ; };
template <typename T, unsigned N > struct remove_extent<T[N]> { typedef T type ; };
//----------------------------------------------------------------------------
// C++11 Other type generators:
template< bool , class T , class F >
struct condition { typedef F type ; };
template< class T , class F >
struct condition<true,T,F> { typedef T type ; };
template< bool , class = void >
struct enable_if ;
template< class T >
struct enable_if< true , T > { typedef T type ; };
//----------------------------------------------------------------------------
} // namespace Impl
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
// Other traits
namespace Kokkos {
namespace Impl {
//----------------------------------------------------------------------------
template< class , class T = void >
struct enable_if_type { typedef T type ; };
//----------------------------------------------------------------------------
template< bool B >
struct bool_ : public integral_constant<bool,B> {};
template< unsigned I >
struct unsigned_ : public integral_constant<unsigned,I> {};
template< int I >
struct int_ : public integral_constant<int,I> {};
typedef bool_<true> true_;
typedef bool_<false> false_;
//----------------------------------------------------------------------------
// if_
template < bool Cond , typename TrueType , typename FalseType>
struct if_c
{
enum { value = Cond };
typedef FalseType type;
typedef typename remove_const<
typename remove_reference<type>::type >::type value_type ;
typedef typename add_const<value_type>::type const_value_type ;
static KOKKOS_INLINE_FUNCTION
const_value_type & select( const_value_type & v ) { return v ; }
static KOKKOS_INLINE_FUNCTION
value_type & select( value_type & v ) { return v ; }
template< class T >
static KOKKOS_INLINE_FUNCTION
value_type & select( const T & ) { value_type * ptr(0); return *ptr ; }
template< class T >
static KOKKOS_INLINE_FUNCTION
const_value_type & select( const T & , const_value_type & v ) { return v ; }
template< class T >
static KOKKOS_INLINE_FUNCTION
value_type & select( const T & , value_type & v ) { return v ; }
};
template <typename TrueType, typename FalseType>
struct if_c< true , TrueType , FalseType >
{
enum { value = true };
typedef TrueType type;
typedef typename remove_const<
typename remove_reference<type>::type >::type value_type ;
typedef typename add_const<value_type>::type const_value_type ;
static KOKKOS_INLINE_FUNCTION
const_value_type & select( const_value_type & v ) { return v ; }
static KOKKOS_INLINE_FUNCTION
value_type & select( value_type & v ) { return v ; }
template< class T >
static KOKKOS_INLINE_FUNCTION
value_type & select( const T & ) { value_type * ptr(0); return *ptr ; }
template< class F >
static KOKKOS_INLINE_FUNCTION
const_value_type & select( const_value_type & v , const F & ) { return v ; }
template< class F >
static KOKKOS_INLINE_FUNCTION
value_type & select( value_type & v , const F & ) { return v ; }
};
template< typename TrueType >
struct if_c< false , TrueType , void >
{
enum { value = false };
typedef void type ;
typedef void value_type ;
};
template< typename FalseType >
struct if_c< true , void , FalseType >
{
enum { value = true };
typedef void type ;
typedef void value_type ;
};
template <typename Cond, typename TrueType, typename FalseType>
struct if_ : public if_c<Cond::value, TrueType, FalseType> {};
//----------------------------------------------------------------------------
// Allows aliased types:
template< typename T >
struct is_integral : public integral_constant< bool ,
(
- Impl::is_same< T , char >::value ||
- Impl::is_same< T , unsigned char >::value ||
- Impl::is_same< T , short int >::value ||
- Impl::is_same< T , unsigned short int >::value ||
- Impl::is_same< T , int >::value ||
- Impl::is_same< T , unsigned int >::value ||
- Impl::is_same< T , long int >::value ||
- Impl::is_same< T , unsigned long int >::value ||
- Impl::is_same< T , long long int >::value ||
- Impl::is_same< T , unsigned long long int >::value ||
-
- Impl::is_same< T , int8_t >::value ||
- Impl::is_same< T , int16_t >::value ||
- Impl::is_same< T , int32_t >::value ||
- Impl::is_same< T , int64_t >::value ||
- Impl::is_same< T , uint8_t >::value ||
- Impl::is_same< T , uint16_t >::value ||
- Impl::is_same< T , uint32_t >::value ||
- Impl::is_same< T , uint64_t >::value
+ std::is_same< T , char >::value ||
+ std::is_same< T , unsigned char >::value ||
+ std::is_same< T , short int >::value ||
+ std::is_same< T , unsigned short int >::value ||
+ std::is_same< T , int >::value ||
+ std::is_same< T , unsigned int >::value ||
+ std::is_same< T , long int >::value ||
+ std::is_same< T , unsigned long int >::value ||
+ std::is_same< T , long long int >::value ||
+ std::is_same< T , unsigned long long int >::value ||
+
+ std::is_same< T , int8_t >::value ||
+ std::is_same< T , int16_t >::value ||
+ std::is_same< T , int32_t >::value ||
+ std::is_same< T , int64_t >::value ||
+ std::is_same< T , uint8_t >::value ||
+ std::is_same< T , uint16_t >::value ||
+ std::is_same< T , uint32_t >::value ||
+ std::is_same< T , uint64_t >::value
)>
{};
//----------------------------------------------------------------------------
+// These 'constexpr'functions can be used as
+// both regular functions and meta-function.
+
+/**\brief There exists integral 'k' such that N = 2^k */
+KOKKOS_INLINE_FUNCTION
+constexpr bool is_integral_power_of_two( const size_t N )
+{ return ( 0 < N ) && ( 0 == ( N & ( N - 1 ) ) ); }
+
+/**\brief Return integral 'k' such that N = 2^k, assuming valid. */
+KOKKOS_INLINE_FUNCTION
+constexpr unsigned integral_power_of_two_assume_valid( const size_t N )
+{ return N == 1 ? 0 : 1 + integral_power_of_two_assume_valid( N >> 1 ); }
+
+/**\brief Return integral 'k' such that N = 2^k, if exists.
+ * If does not exist return ~0u.
+ */
+KOKKOS_INLINE_FUNCTION
+constexpr unsigned integral_power_of_two( const size_t N )
+{ return is_integral_power_of_two(N) ? integral_power_of_two_assume_valid(N) : ~0u ; }
+
+//----------------------------------------------------------------------------
template < size_t N >
struct is_power_of_two
{
enum type { value = (N > 0) && !(N & (N-1)) };
};
template < size_t N , bool OK = is_power_of_two<N>::value >
struct power_of_two ;
template < size_t N >
struct power_of_two<N,true>
{
enum type { value = 1+ power_of_two<(N>>1),true>::value };
};
template <>
struct power_of_two<2,true>
{
enum type { value = 1 };
};
template <>
struct power_of_two<1,true>
{
enum type { value = 0 };
};
/** \brief If power of two then return power,
* otherwise return ~0u.
*/
static KOKKOS_FORCEINLINE_FUNCTION
unsigned power_of_two_if_valid( const unsigned N )
{
unsigned p = ~0u ;
if ( N && ! ( N & ( N - 1 ) ) ) {
#if defined( __CUDA_ARCH__ ) && defined( KOKKOS_HAVE_CUDA )
p = __ffs(N) - 1 ;
#elif defined( __GNUC__ ) || defined( __GNUG__ )
p = __builtin_ffs(N) - 1 ;
#elif defined( __INTEL_COMPILER )
p = _bit_scan_forward(N);
#else
p = 0 ;
for ( unsigned j = 1 ; ! ( N & j ) ; j <<= 1 ) { ++p ; }
#endif
}
return p ;
}
//----------------------------------------------------------------------------
template< typename T , T v , bool NonZero = ( v != T(0) ) >
struct integral_nonzero_constant
{
// Declaration of 'static const' causes an unresolved linker symbol in debug
// static const T value = v ;
enum { value = T(v) };
typedef T value_type ;
typedef integral_nonzero_constant<T,v> type ;
KOKKOS_INLINE_FUNCTION integral_nonzero_constant( const T & ) {}
};
template< typename T , T zero >
struct integral_nonzero_constant<T,zero,false>
{
const T value ;
typedef T value_type ;
typedef integral_nonzero_constant<T,0> type ;
KOKKOS_INLINE_FUNCTION integral_nonzero_constant( const T & v ) : value(v) {}
};
//----------------------------------------------------------------------------
template < class C > struct is_integral_constant : public false_
{
typedef void integral_type ;
enum { integral_value = 0 };
};
template < typename T , T v >
struct is_integral_constant< integral_constant<T,v> > : public true_
{
typedef T integral_type ;
enum { integral_value = v };
};
} // namespace Impl
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#endif /* #ifndef KOKKOSTRAITS_HPP */
diff --git a/lib/kokkos/core/src/impl/Kokkos_ViewDefault.hpp b/lib/kokkos/core/src/impl/Kokkos_ViewDefault.hpp
index 8334af3a3..94c8e13c1 100644
--- a/lib/kokkos/core/src/impl/Kokkos_ViewDefault.hpp
+++ b/lib/kokkos/core/src/impl/Kokkos_ViewDefault.hpp
@@ -1,878 +1,886 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_VIEWDEFAULT_HPP
#define KOKKOS_VIEWDEFAULT_HPP
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
template<>
struct ViewAssignment< ViewDefault , ViewDefault , void >
{
typedef ViewDefault Specialize ;
//------------------------------------
/** \brief Compatible value and shape and LayoutLeft/Right to LayoutStride*/
template< class DT , class DL , class DD , class DM ,
class ST , class SL , class SD , class SM >
KOKKOS_INLINE_FUNCTION
ViewAssignment( View<DT,DL,DD,DM,Specialize> & dst ,
const View<ST,SL,SD,SM,Specialize> & src ,
const typename enable_if<(
ViewAssignable< ViewTraits<DT,DL,DD,DM> ,
ViewTraits<ST,SL,SD,SM> >::value
||
( ViewAssignable< ViewTraits<DT,DL,DD,DM> ,
ViewTraits<ST,SL,SD,SM> >::assignable_value
&&
ShapeCompatible< typename ViewTraits<DT,DL,DD,DM>::shape_type ,
typename ViewTraits<ST,SL,SD,SM>::shape_type >::value
&&
is_same< typename ViewTraits<DT,DL,DD,DM>::array_layout,LayoutStride>::value
&& (is_same< typename ViewTraits<ST,SL,SD,SM>::array_layout,LayoutLeft>::value ||
is_same< typename ViewTraits<ST,SL,SD,SM>::array_layout,LayoutRight>::value))
)>::type * = 0 )
{
dst.m_offset_map.assign( src.m_offset_map );
dst.m_management = src.m_management ;
dst.m_ptr_on_device = ViewDataManagement< ViewTraits<DT,DL,DD,DM> >::create_handle( src.m_ptr_on_device, src.m_tracker );
- dst.m_tracker = src.m_tracker ;
-
+ if( dst.is_managed )
+ dst.m_tracker = src.m_tracker ;
+ else {
+ dst.m_tracker = AllocationTracker();
+ dst.m_management.set_unmanaged();
+ }
}
/** \brief Assign 1D Strided View to LayoutLeft or LayoutRight if stride[0]==1 */
template< class DT , class DL , class DD , class DM ,
class ST , class SD , class SM >
KOKKOS_INLINE_FUNCTION
ViewAssignment( View<DT,DL,DD,DM,Specialize> & dst ,
const View<ST,LayoutStride,SD,SM,Specialize> & src ,
const typename enable_if<(
(
ViewAssignable< ViewTraits<DT,DL,DD,DM> ,
ViewTraits<ST,LayoutStride,SD,SM> >::value
||
( ViewAssignable< ViewTraits<DT,DL,DD,DM> ,
ViewTraits<ST,LayoutStride,SD,SM> >::assignable_value
&&
ShapeCompatible< typename ViewTraits<DT,DL,DD,DM>::shape_type ,
typename ViewTraits<ST,LayoutStride,SD,SM>::shape_type >::value
)
)
&&
(View<DT,DL,DD,DM,Specialize>::rank==1)
&& (is_same< typename ViewTraits<DT,DL,DD,DM>::array_layout,LayoutLeft>::value ||
is_same< typename ViewTraits<DT,DL,DD,DM>::array_layout,LayoutRight>::value)
)>::type * = 0 )
{
size_t strides[8];
src.stride(strides);
if(strides[0]!=1) {
- abort("Trying to assign strided 1D View to LayoutRight or LayoutLeft which is not stride-1");
+ Kokkos::abort("Trying to assign strided 1D View to LayoutRight or LayoutLeft which is not stride-1");
}
dst.m_offset_map.assign( src.dimension_0(), 0, 0, 0, 0, 0, 0, 0, 0 );
dst.m_management = src.m_management ;
dst.m_ptr_on_device = ViewDataManagement< ViewTraits<DT,DL,DD,DM> >::create_handle( src.m_ptr_on_device, src.m_tracker );
- dst.m_tracker = src.m_tracker ;
-
+ if( dst.is_managed )
+ dst.m_tracker = src.m_tracker ;
+ else {
+ dst.m_tracker = AllocationTracker();
+ dst.m_management.set_unmanaged();
+ }
}
//------------------------------------
/** \brief Deep copy data from compatible value type, layout, rank, and specialization.
* Check the dimensions and allocation lengths at runtime.
*/
template< class DT , class DL , class DD , class DM ,
class ST , class SL , class SD , class SM >
inline static
void deep_copy( const View<DT,DL,DD,DM,Specialize> & dst ,
const View<ST,SL,SD,SM,Specialize> & src ,
const typename Impl::enable_if<(
Impl::is_same< typename ViewTraits<DT,DL,DD,DM>::value_type ,
typename ViewTraits<ST,SL,SD,SM>::non_const_value_type >::value
&&
Impl::is_same< typename ViewTraits<DT,DL,DD,DM>::array_layout ,
typename ViewTraits<ST,SL,SD,SM>::array_layout >::value
&&
( unsigned(ViewTraits<DT,DL,DD,DM>::rank) == unsigned(ViewTraits<ST,SL,SD,SM>::rank) )
)>::type * = 0 )
{
typedef typename ViewTraits<DT,DL,DD,DM>::memory_space dst_memory_space ;
typedef typename ViewTraits<ST,SL,SD,SM>::memory_space src_memory_space ;
if ( dst.ptr_on_device() != src.ptr_on_device() ) {
Impl::assert_shapes_are_equal( dst.m_offset_map , src.m_offset_map );
const size_t nbytes = dst.m_offset_map.scalar_size * dst.m_offset_map.capacity();
DeepCopy< dst_memory_space , src_memory_space >( dst.ptr_on_device() , src.ptr_on_device() , nbytes );
}
}
};
} /* namespace Impl */
} /* namespace Kokkos */
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
template< class ExecSpace , class DT , class DL, class DD, class DM, class DS >
struct ViewDefaultConstruct< ExecSpace , Kokkos::View<DT,DL,DD,DM,DS> , true >
{
Kokkos::View<DT,DL,DD,DM,DS> * const m_ptr ;
KOKKOS_FORCEINLINE_FUNCTION
void operator()( const typename ExecSpace::size_type& i ) const
{ new(m_ptr+i) Kokkos::View<DT,DL,DD,DM,DS>(); }
ViewDefaultConstruct( Kokkos::View<DT,DL,DD,DM,DS> * pointer , size_t capacity )
: m_ptr( pointer )
{
Kokkos::RangePolicy< ExecSpace > range( 0 , capacity );
parallel_for( range , *this );
ExecSpace::fence();
}
};
template< class SrcDataType , class SrcArg1Type , class SrcArg2Type , class SrcArg3Type
, class SubArg0_type , class SubArg1_type , class SubArg2_type , class SubArg3_type
, class SubArg4_type , class SubArg5_type , class SubArg6_type , class SubArg7_type
>
struct ViewSubview< View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault >
, SubArg0_type , SubArg1_type , SubArg2_type , SubArg3_type
, SubArg4_type , SubArg5_type , SubArg6_type , SubArg7_type >
{
private:
typedef View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault > SrcViewType ;
enum { V0 = Impl::is_same< SubArg0_type , void >::value ? 1 : 0 };
enum { V1 = Impl::is_same< SubArg1_type , void >::value ? 1 : 0 };
enum { V2 = Impl::is_same< SubArg2_type , void >::value ? 1 : 0 };
enum { V3 = Impl::is_same< SubArg3_type , void >::value ? 1 : 0 };
enum { V4 = Impl::is_same< SubArg4_type , void >::value ? 1 : 0 };
enum { V5 = Impl::is_same< SubArg5_type , void >::value ? 1 : 0 };
enum { V6 = Impl::is_same< SubArg6_type , void >::value ? 1 : 0 };
enum { V7 = Impl::is_same< SubArg7_type , void >::value ? 1 : 0 };
// The source view rank must be equal to the input argument rank
// Once a void argument is encountered all subsequent arguments must be void.
enum { InputRank =
Impl::StaticAssert<( SrcViewType::rank ==
( V0 ? 0 : (
V1 ? 1 : (
V2 ? 2 : (
V3 ? 3 : (
V4 ? 4 : (
V5 ? 5 : (
V6 ? 6 : (
V7 ? 7 : 8 ))))))) ))
&&
( SrcViewType::rank ==
( 8 - ( V0 + V1 + V2 + V3 + V4 + V5 + V6 + V7 ) ) )
>::value ? SrcViewType::rank : 0 };
enum { R0 = Impl::ViewOffsetRange< SubArg0_type >::is_range ? 1 : 0 };
enum { R1 = Impl::ViewOffsetRange< SubArg1_type >::is_range ? 1 : 0 };
enum { R2 = Impl::ViewOffsetRange< SubArg2_type >::is_range ? 1 : 0 };
enum { R3 = Impl::ViewOffsetRange< SubArg3_type >::is_range ? 1 : 0 };
enum { R4 = Impl::ViewOffsetRange< SubArg4_type >::is_range ? 1 : 0 };
enum { R5 = Impl::ViewOffsetRange< SubArg5_type >::is_range ? 1 : 0 };
enum { R6 = Impl::ViewOffsetRange< SubArg6_type >::is_range ? 1 : 0 };
enum { R7 = Impl::ViewOffsetRange< SubArg7_type >::is_range ? 1 : 0 };
enum { OutputRank = unsigned(R0) + unsigned(R1) + unsigned(R2) + unsigned(R3)
+ unsigned(R4) + unsigned(R5) + unsigned(R6) + unsigned(R7) };
// Reverse
enum { R0_rev = 0 == InputRank ? 0u : (
1 == InputRank ? unsigned(R0) : (
2 == InputRank ? unsigned(R1) : (
3 == InputRank ? unsigned(R2) : (
4 == InputRank ? unsigned(R3) : (
5 == InputRank ? unsigned(R4) : (
6 == InputRank ? unsigned(R5) : (
7 == InputRank ? unsigned(R6) : unsigned(R7) ))))))) };
typedef typename SrcViewType::array_layout SrcViewLayout ;
// Choose array layout, attempting to preserve original layout if at all possible.
typedef typename Impl::if_c<
( // Same Layout IF
// OutputRank 0
( OutputRank == 0 )
||
// OutputRank 1 or 2, InputLayout Left, Interval 0
// because single stride one or second index has a stride.
( OutputRank <= 2 && R0 && Impl::is_same<SrcViewLayout,LayoutLeft>::value )
||
// OutputRank 1 or 2, InputLayout Right, Interval [InputRank-1]
// because single stride one or second index has a stride.
( OutputRank <= 2 && R0_rev && Impl::is_same<SrcViewLayout,LayoutRight>::value )
), SrcViewLayout , Kokkos::LayoutStride >::type OutputViewLayout ;
// Choose data type as a purely dynamic rank array to accomodate a runtime range.
typedef typename Impl::if_c< OutputRank == 0 , typename SrcViewType::value_type ,
typename Impl::if_c< OutputRank == 1 , typename SrcViewType::value_type *,
typename Impl::if_c< OutputRank == 2 , typename SrcViewType::value_type **,
typename Impl::if_c< OutputRank == 3 , typename SrcViewType::value_type ***,
typename Impl::if_c< OutputRank == 4 , typename SrcViewType::value_type ****,
typename Impl::if_c< OutputRank == 5 , typename SrcViewType::value_type *****,
typename Impl::if_c< OutputRank == 6 , typename SrcViewType::value_type ******,
typename Impl::if_c< OutputRank == 7 , typename SrcViewType::value_type *******,
typename SrcViewType::value_type ********
>::type >::type >::type >::type >::type >::type >::type >::type OutputData ;
// Choose space.
// If the source view's template arg1 or arg2 is a space then use it,
// otherwise use the source view's execution space.
typedef typename Impl::if_c< Impl::is_space< SrcArg1Type >::value , SrcArg1Type ,
typename Impl::if_c< Impl::is_space< SrcArg2Type >::value , SrcArg2Type , typename SrcViewType::device_type
>::type >::type OutputSpace ;
public:
// If keeping the layout then match non-data type arguments
// else keep execution space and memory traits.
typedef typename
Impl::if_c< Impl::is_same< SrcViewLayout , OutputViewLayout >::value
, Kokkos::View< OutputData , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault >
, Kokkos::View< OutputData , OutputViewLayout , OutputSpace
, typename SrcViewType::memory_traits
, Impl::ViewDefault >
>::type type ;
};
} /* namespace Impl */
} /* namespace Kokkos */
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
// Construct subview of a Rank 8 view
template< class DstDataType , class DstArg1Type , class DstArg2Type , class DstArg3Type >
template< class SrcDataType , class SrcArg1Type , class SrcArg2Type , class SrcArg3Type
, class SubArg0_type , class SubArg1_type , class SubArg2_type , class SubArg3_type
, class SubArg4_type , class SubArg5_type , class SubArg6_type , class SubArg7_type
>
KOKKOS_INLINE_FUNCTION
View< DstDataType , DstArg1Type , DstArg2Type , DstArg3Type , Impl::ViewDefault >::
View( const View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault > & src
, const SubArg0_type & arg0
, const SubArg1_type & arg1
, const SubArg2_type & arg2
, const SubArg3_type & arg3
, const SubArg4_type & arg4
, const SubArg5_type & arg5
, const SubArg6_type & arg6
, const SubArg7_type & arg7
)
: m_ptr_on_device( (typename traits::value_type*) NULL)
, m_offset_map()
, m_management()
, m_tracker()
{
// This constructor can only be used to construct a subview
// from the source view. This type must match the subview type
// deduced from the source view and subview arguments.
typedef Impl::ViewSubview< View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault >
, SubArg0_type , SubArg1_type , SubArg2_type , SubArg3_type
, SubArg4_type , SubArg5_type , SubArg6_type , SubArg7_type >
ViewSubviewDeduction ;
enum { is_a_valid_subview_constructor =
Impl::StaticAssert<
Impl::is_same< View , typename ViewSubviewDeduction::type >::value
>::value
};
if ( is_a_valid_subview_constructor ) {
typedef Impl::ViewOffsetRange< SubArg0_type > R0 ;
typedef Impl::ViewOffsetRange< SubArg1_type > R1 ;
typedef Impl::ViewOffsetRange< SubArg2_type > R2 ;
typedef Impl::ViewOffsetRange< SubArg3_type > R3 ;
typedef Impl::ViewOffsetRange< SubArg4_type > R4 ;
typedef Impl::ViewOffsetRange< SubArg5_type > R5 ;
typedef Impl::ViewOffsetRange< SubArg6_type > R6 ;
typedef Impl::ViewOffsetRange< SubArg7_type > R7 ;
// 'assign_subview' returns whether the subview offset_map
// introduces noncontiguity in the view.
const bool introduce_noncontiguity =
m_offset_map.assign_subview( src.m_offset_map
, R0::dimension( src.m_offset_map.N0 , arg0 )
, R1::dimension( src.m_offset_map.N1 , arg1 )
, R2::dimension( src.m_offset_map.N2 , arg2 )
, R3::dimension( src.m_offset_map.N3 , arg3 )
, R4::dimension( src.m_offset_map.N4 , arg4 )
, R5::dimension( src.m_offset_map.N5 , arg5 )
, R6::dimension( src.m_offset_map.N6 , arg6 )
, R7::dimension( src.m_offset_map.N7 , arg7 )
);
if ( m_offset_map.capacity() ) {
m_management = src.m_management ;
if ( introduce_noncontiguity ) m_management.set_noncontiguous();
m_ptr_on_device = src.m_ptr_on_device +
src.m_offset_map( R0::begin( arg0 )
, R1::begin( arg1 )
, R2::begin( arg2 )
, R3::begin( arg3 )
, R4::begin( arg4 )
, R5::begin( arg5 )
, R6::begin( arg6 )
, R7::begin( arg7 ) );
m_tracker = src.m_tracker ;
}
}
}
// Construct subview of a Rank 7 view
template< class DstDataType , class DstArg1Type , class DstArg2Type , class DstArg3Type >
template< class SrcDataType , class SrcArg1Type , class SrcArg2Type , class SrcArg3Type
, class SubArg0_type , class SubArg1_type , class SubArg2_type , class SubArg3_type
, class SubArg4_type , class SubArg5_type , class SubArg6_type
>
KOKKOS_INLINE_FUNCTION
View< DstDataType , DstArg1Type , DstArg2Type , DstArg3Type , Impl::ViewDefault >::
View( const View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault > & src
, const SubArg0_type & arg0
, const SubArg1_type & arg1
, const SubArg2_type & arg2
, const SubArg3_type & arg3
, const SubArg4_type & arg4
, const SubArg5_type & arg5
, const SubArg6_type & arg6
)
: m_ptr_on_device( (typename traits::value_type*) NULL)
, m_offset_map()
, m_management()
, m_tracker()
{
// This constructor can only be used to construct a subview
// from the source view. This type must match the subview type
// deduced from the source view and subview arguments.
typedef Impl::ViewSubview< View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault >
, SubArg0_type , SubArg1_type , SubArg2_type , SubArg3_type
, SubArg4_type , SubArg5_type , SubArg6_type , void >
ViewSubviewDeduction ;
enum { is_a_valid_subview_constructor =
Impl::StaticAssert<
Impl::is_same< View , typename ViewSubviewDeduction::type >::value
>::value
};
if ( is_a_valid_subview_constructor ) {
typedef Impl::ViewOffsetRange< SubArg0_type > R0 ;
typedef Impl::ViewOffsetRange< SubArg1_type > R1 ;
typedef Impl::ViewOffsetRange< SubArg2_type > R2 ;
typedef Impl::ViewOffsetRange< SubArg3_type > R3 ;
typedef Impl::ViewOffsetRange< SubArg4_type > R4 ;
typedef Impl::ViewOffsetRange< SubArg5_type > R5 ;
typedef Impl::ViewOffsetRange< SubArg6_type > R6 ;
// 'assign_subview' returns whether the subview offset_map
// introduces noncontiguity in the view.
const bool introduce_noncontiguity =
m_offset_map.assign_subview( src.m_offset_map
, R0::dimension( src.m_offset_map.N0 , arg0 )
, R1::dimension( src.m_offset_map.N1 , arg1 )
, R2::dimension( src.m_offset_map.N2 , arg2 )
, R3::dimension( src.m_offset_map.N3 , arg3 )
, R4::dimension( src.m_offset_map.N4 , arg4 )
, R5::dimension( src.m_offset_map.N5 , arg5 )
, R6::dimension( src.m_offset_map.N6 , arg6 )
, 0
);
if ( m_offset_map.capacity() ) {
m_management = src.m_management ;
if ( introduce_noncontiguity ) m_management.set_noncontiguous();
m_ptr_on_device = src.m_ptr_on_device +
src.m_offset_map( R0::begin( arg0 )
, R1::begin( arg1 )
, R2::begin( arg2 )
, R3::begin( arg3 )
, R4::begin( arg4 )
, R5::begin( arg5 )
, R6::begin( arg6 )
);
m_tracker = src.m_tracker ;
}
}
}
// Construct subview of a Rank 6 view
template< class DstDataType , class DstArg1Type , class DstArg2Type , class DstArg3Type >
template< class SrcDataType , class SrcArg1Type , class SrcArg2Type , class SrcArg3Type
, class SubArg0_type , class SubArg1_type , class SubArg2_type , class SubArg3_type
, class SubArg4_type , class SubArg5_type
>
KOKKOS_INLINE_FUNCTION
View< DstDataType , DstArg1Type , DstArg2Type , DstArg3Type , Impl::ViewDefault >::
View( const View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault > & src
, const SubArg0_type & arg0
, const SubArg1_type & arg1
, const SubArg2_type & arg2
, const SubArg3_type & arg3
, const SubArg4_type & arg4
, const SubArg5_type & arg5
)
: m_ptr_on_device( (typename traits::value_type*) NULL)
, m_offset_map()
, m_management()
, m_tracker()
{
// This constructor can only be used to construct a subview
// from the source view. This type must match the subview type
// deduced from the source view and subview arguments.
typedef Impl::ViewSubview< View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault >
, SubArg0_type , SubArg1_type , SubArg2_type , SubArg3_type
, SubArg4_type , SubArg5_type , void , void >
ViewSubviewDeduction ;
enum { is_a_valid_subview_constructor =
Impl::StaticAssert<
Impl::is_same< View , typename ViewSubviewDeduction::type >::value
>::value
};
if ( is_a_valid_subview_constructor ) {
typedef Impl::ViewOffsetRange< SubArg0_type > R0 ;
typedef Impl::ViewOffsetRange< SubArg1_type > R1 ;
typedef Impl::ViewOffsetRange< SubArg2_type > R2 ;
typedef Impl::ViewOffsetRange< SubArg3_type > R3 ;
typedef Impl::ViewOffsetRange< SubArg4_type > R4 ;
typedef Impl::ViewOffsetRange< SubArg5_type > R5 ;
// 'assign_subview' returns whether the subview offset_map
// introduces noncontiguity in the view.
const bool introduce_noncontiguity =
m_offset_map.assign_subview( src.m_offset_map
, R0::dimension( src.m_offset_map.N0 , arg0 )
, R1::dimension( src.m_offset_map.N1 , arg1 )
, R2::dimension( src.m_offset_map.N2 , arg2 )
, R3::dimension( src.m_offset_map.N3 , arg3 )
, R4::dimension( src.m_offset_map.N4 , arg4 )
, R5::dimension( src.m_offset_map.N5 , arg5 )
, 0
, 0
);
if ( m_offset_map.capacity() ) {
m_management = src.m_management ;
if ( introduce_noncontiguity ) m_management.set_noncontiguous();
m_ptr_on_device = src.m_ptr_on_device +
src.m_offset_map( R0::begin( arg0 )
, R1::begin( arg1 )
, R2::begin( arg2 )
, R3::begin( arg3 )
, R4::begin( arg4 )
, R5::begin( arg5 )
);
m_tracker = src.m_tracker ;
}
}
}
// Construct subview of a Rank 5 view
template< class DstDataType , class DstArg1Type , class DstArg2Type , class DstArg3Type >
template< class SrcDataType , class SrcArg1Type , class SrcArg2Type , class SrcArg3Type
, class SubArg0_type , class SubArg1_type , class SubArg2_type , class SubArg3_type
, class SubArg4_type
>
KOKKOS_INLINE_FUNCTION
View< DstDataType , DstArg1Type , DstArg2Type , DstArg3Type , Impl::ViewDefault >::
View( const View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault > & src
, const SubArg0_type & arg0
, const SubArg1_type & arg1
, const SubArg2_type & arg2
, const SubArg3_type & arg3
, const SubArg4_type & arg4
)
: m_ptr_on_device( (typename traits::value_type*) NULL)
, m_offset_map()
, m_management()
, m_tracker()
{
// This constructor can only be used to construct a subview
// from the source view. This type must match the subview type
// deduced from the source view and subview arguments.
typedef Impl::ViewSubview< View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault >
, SubArg0_type , SubArg1_type , SubArg2_type , SubArg3_type
, SubArg4_type , void , void , void >
ViewSubviewDeduction ;
enum { is_a_valid_subview_constructor =
Impl::StaticAssert<
Impl::is_same< View , typename ViewSubviewDeduction::type >::value
>::value
};
if ( is_a_valid_subview_constructor ) {
typedef Impl::ViewOffsetRange< SubArg0_type > R0 ;
typedef Impl::ViewOffsetRange< SubArg1_type > R1 ;
typedef Impl::ViewOffsetRange< SubArg2_type > R2 ;
typedef Impl::ViewOffsetRange< SubArg3_type > R3 ;
typedef Impl::ViewOffsetRange< SubArg4_type > R4 ;
// 'assign_subview' returns whether the subview offset_map
// introduces noncontiguity in the view.
const bool introduce_noncontiguity =
m_offset_map.assign_subview( src.m_offset_map
, R0::dimension( src.m_offset_map.N0 , arg0 )
, R1::dimension( src.m_offset_map.N1 , arg1 )
, R2::dimension( src.m_offset_map.N2 , arg2 )
, R3::dimension( src.m_offset_map.N3 , arg3 )
, R4::dimension( src.m_offset_map.N4 , arg4 )
, 0
, 0
, 0
);
if ( m_offset_map.capacity() ) {
m_management = src.m_management ;
if ( introduce_noncontiguity ) m_management.set_noncontiguous();
m_ptr_on_device = src.m_ptr_on_device +
src.m_offset_map( R0::begin( arg0 )
, R1::begin( arg1 )
, R2::begin( arg2 )
, R3::begin( arg3 )
, R4::begin( arg4 )
);
m_tracker = src.m_tracker ;
}
}
}
// Construct subview of a Rank 4 view
template< class DstDataType , class DstArg1Type , class DstArg2Type , class DstArg3Type >
template< class SrcDataType , class SrcArg1Type , class SrcArg2Type , class SrcArg3Type
, class SubArg0_type , class SubArg1_type , class SubArg2_type , class SubArg3_type
>
KOKKOS_INLINE_FUNCTION
View< DstDataType , DstArg1Type , DstArg2Type , DstArg3Type , Impl::ViewDefault >::
View( const View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault > & src
, const SubArg0_type & arg0
, const SubArg1_type & arg1
, const SubArg2_type & arg2
, const SubArg3_type & arg3
)
: m_ptr_on_device( (typename traits::value_type*) NULL)
, m_offset_map()
, m_management()
, m_tracker()
{
// This constructor can only be used to construct a subview
// from the source view. This type must match the subview type
// deduced from the source view and subview arguments.
typedef Impl::ViewSubview< View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault >
, SubArg0_type , SubArg1_type , SubArg2_type , SubArg3_type
, void , void , void , void >
ViewSubviewDeduction ;
enum { is_a_valid_subview_constructor =
Impl::StaticAssert<
Impl::is_same< View , typename ViewSubviewDeduction::type >::value
>::value
};
if ( is_a_valid_subview_constructor ) {
typedef Impl::ViewOffsetRange< SubArg0_type > R0 ;
typedef Impl::ViewOffsetRange< SubArg1_type > R1 ;
typedef Impl::ViewOffsetRange< SubArg2_type > R2 ;
typedef Impl::ViewOffsetRange< SubArg3_type > R3 ;
// 'assign_subview' returns whether the subview offset_map
// introduces noncontiguity in the view.
const bool introduce_noncontiguity =
m_offset_map.assign_subview( src.m_offset_map
, R0::dimension( src.m_offset_map.N0 , arg0 )
, R1::dimension( src.m_offset_map.N1 , arg1 )
, R2::dimension( src.m_offset_map.N2 , arg2 )
, R3::dimension( src.m_offset_map.N3 , arg3 )
, 0
, 0
, 0
, 0
);
if ( m_offset_map.capacity() ) {
m_management = src.m_management ;
if ( introduce_noncontiguity ) m_management.set_noncontiguous();
m_ptr_on_device = src.m_ptr_on_device +
src.m_offset_map( R0::begin( arg0 )
, R1::begin( arg1 )
, R2::begin( arg2 )
, R3::begin( arg3 )
);
m_tracker = src.m_tracker ;
}
}
}
// Construct subview of a Rank 3 view
template< class DstDataType , class DstArg1Type , class DstArg2Type , class DstArg3Type >
template< class SrcDataType , class SrcArg1Type , class SrcArg2Type , class SrcArg3Type
, class SubArg0_type , class SubArg1_type , class SubArg2_type
>
KOKKOS_INLINE_FUNCTION
View< DstDataType , DstArg1Type , DstArg2Type , DstArg3Type , Impl::ViewDefault >::
View( const View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault > & src
, const SubArg0_type & arg0
, const SubArg1_type & arg1
, const SubArg2_type & arg2
)
: m_ptr_on_device( (typename traits::value_type*) NULL)
, m_offset_map()
, m_management()
, m_tracker()
{
// This constructor can only be used to construct a subview
// from the source view. This type must match the subview type
// deduced from the source view and subview arguments.
typedef Impl::ViewSubview< View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault >
, SubArg0_type , SubArg1_type , SubArg2_type , void , void , void , void , void >
ViewSubviewDeduction ;
enum { is_a_valid_subview_constructor =
Impl::StaticAssert<
Impl::is_same< View , typename ViewSubviewDeduction::type >::value
>::value
};
if ( is_a_valid_subview_constructor ) {
typedef Impl::ViewOffsetRange< SubArg0_type > R0 ;
typedef Impl::ViewOffsetRange< SubArg1_type > R1 ;
typedef Impl::ViewOffsetRange< SubArg2_type > R2 ;
// 'assign_subview' returns whether the subview offset_map
// introduces noncontiguity in the view.
const bool introduce_noncontiguity =
m_offset_map.assign_subview( src.m_offset_map
, R0::dimension( src.m_offset_map.N0 , arg0 )
, R1::dimension( src.m_offset_map.N1 , arg1 )
, R2::dimension( src.m_offset_map.N2 , arg2 )
, 0 , 0 , 0 , 0 , 0);
if ( m_offset_map.capacity() ) {
m_management = src.m_management ;
if ( introduce_noncontiguity ) m_management.set_noncontiguous();
m_ptr_on_device = src.m_ptr_on_device +
src.m_offset_map( R0::begin( arg0 )
, R1::begin( arg1 )
, R2::begin( arg2 )
);
m_tracker = src.m_tracker ;
}
}
}
// Construct subview of a Rank 2 view
template< class DstDataType , class DstArg1Type , class DstArg2Type , class DstArg3Type >
template< class SrcDataType , class SrcArg1Type , class SrcArg2Type , class SrcArg3Type
, class SubArg0_type , class SubArg1_type
>
KOKKOS_INLINE_FUNCTION
View< DstDataType , DstArg1Type , DstArg2Type , DstArg3Type , Impl::ViewDefault >::
View( const View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault > & src
, const SubArg0_type & arg0
, const SubArg1_type & arg1
)
: m_ptr_on_device( (typename traits::value_type*) NULL)
, m_offset_map()
, m_management()
, m_tracker()
{
// This constructor can only be used to construct a subview
// from the source view. This type must match the subview type
// deduced from the source view and subview arguments.
typedef Impl::ViewSubview< View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault >
, SubArg0_type , SubArg1_type , void , void , void , void , void , void >
ViewSubviewDeduction ;
enum { is_a_valid_subview_constructor =
Impl::StaticAssert<
Impl::is_same< View , typename ViewSubviewDeduction::type >::value
>::value
};
if ( is_a_valid_subview_constructor ) {
typedef Impl::ViewOffsetRange< SubArg0_type > R0 ;
typedef Impl::ViewOffsetRange< SubArg1_type > R1 ;
// 'assign_subview' returns whether the subview offset_map
// introduces noncontiguity in the view.
const bool introduce_noncontiguity =
m_offset_map.assign_subview( src.m_offset_map
, R0::dimension( src.m_offset_map.N0 , arg0 )
, R1::dimension( src.m_offset_map.N1 , arg1 )
, 0 , 0 , 0 , 0 , 0 , 0 );
if ( m_offset_map.capacity() ) {
m_management = src.m_management ;
if ( introduce_noncontiguity ) m_management.set_noncontiguous();
m_ptr_on_device = src.m_ptr_on_device +
src.m_offset_map( R0::begin( arg0 )
, R1::begin( arg1 )
);
m_tracker = src.m_tracker ;
}
}
}
// Construct subview of a Rank 1 view
template< class DstDataType , class DstArg1Type , class DstArg2Type , class DstArg3Type >
template< class SrcDataType , class SrcArg1Type , class SrcArg2Type , class SrcArg3Type
, class SubArg0_type
>
KOKKOS_INLINE_FUNCTION
View< DstDataType , DstArg1Type , DstArg2Type , DstArg3Type , Impl::ViewDefault >::
View( const View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault > & src
, const SubArg0_type & arg0
)
: m_ptr_on_device( (typename traits::value_type*) NULL)
, m_offset_map()
, m_management()
, m_tracker()
{
// This constructor can only be used to construct a subview
// from the source view. This type must match the subview type
// deduced from the source view and subview arguments.
typedef Impl::ViewSubview< View< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type , Impl::ViewDefault >
, SubArg0_type , void , void , void , void , void , void , void >
ViewSubviewDeduction ;
enum { is_a_valid_subview_constructor =
Impl::StaticAssert<
Impl::is_same< View , typename ViewSubviewDeduction::type >::value
>::value
};
if ( is_a_valid_subview_constructor ) {
typedef Impl::ViewOffsetRange< SubArg0_type > R0 ;
// 'assign_subview' returns whether the subview offset_map
// introduces noncontiguity in the view.
const bool introduce_noncontiguity =
m_offset_map.assign_subview( src.m_offset_map
, R0::dimension( src.m_offset_map.N0 , arg0 )
, 0 , 0 , 0 , 0 , 0 , 0 , 0 );
if ( m_offset_map.capacity() ) {
m_management = src.m_management ;
if ( introduce_noncontiguity ) m_management.set_noncontiguous();
m_ptr_on_device = src.m_ptr_on_device +
src.m_offset_map( R0::begin( arg0 )
);
m_tracker = src.m_tracker ;
}
}
}
} /* namespace Kokkos */
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#endif /* #ifndef KOKKOS_VIEWDEFAULT_HPP */
diff --git a/lib/kokkos/core/src/impl/Kokkos_ViewSupport.hpp b/lib/kokkos/core/src/impl/Kokkos_ViewSupport.hpp
index 006b35923..1d54b7bcc 100644
--- a/lib/kokkos/core/src/impl/Kokkos_ViewSupport.hpp
+++ b/lib/kokkos/core/src/impl/Kokkos_ViewSupport.hpp
@@ -1,518 +1,522 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_VIEWSUPPORT_HPP
#define KOKKOS_VIEWSUPPORT_HPP
+#include <algorithm>
#include <Kokkos_ExecPolicy.hpp>
#include <impl/Kokkos_Shape.hpp>
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
/** \brief Evaluate if LHS = RHS view assignment is allowed. */
template< class ViewLHS , class ViewRHS >
struct ViewAssignable
{
// Same memory space.
// Same value type.
// Compatible 'const' qualifier
// Cannot assign managed = unmannaged
enum { assignable_value =
( is_same< typename ViewLHS::value_type ,
typename ViewRHS::value_type >::value
||
is_same< typename ViewLHS::value_type ,
typename ViewRHS::const_value_type >::value )
&&
is_same< typename ViewLHS::memory_space ,
typename ViewRHS::memory_space >::value
&&
( ! ( ViewLHS::is_managed && ! ViewRHS::is_managed ) )
};
enum { assignable_shape =
// Compatible shape and matching layout:
( ShapeCompatible< typename ViewLHS::shape_type ,
typename ViewRHS::shape_type >::value
&&
is_same< typename ViewLHS::array_layout ,
typename ViewRHS::array_layout >::value )
||
// Matching layout, same rank, and LHS dynamic rank
( is_same< typename ViewLHS::array_layout ,
typename ViewRHS::array_layout >::value
&&
int(ViewLHS::rank) == int(ViewRHS::rank)
&&
int(ViewLHS::rank) == int(ViewLHS::rank_dynamic) )
||
// Both rank-0, any shape and layout
( int(ViewLHS::rank) == 0 && int(ViewRHS::rank) == 0 )
||
// Both rank-1 and LHS is dynamic rank-1, any shape and layout
( int(ViewLHS::rank) == 1 && int(ViewRHS::rank) == 1 &&
int(ViewLHS::rank_dynamic) == 1 )
};
enum { value = assignable_value && assignable_shape };
};
} // namespace Impl
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
template< class ExecSpace , class Type , bool Initialize >
struct ViewDefaultConstruct
{ ViewDefaultConstruct( Type * , size_t ) {} };
+#if ! defined( KOKKOS_USING_EXPERIMENTAL_VIEW )
/** \brief ViewDataHandle provides the type of the 'data handle' which the view
* uses to access data with the [] operator. It also provides
* an allocate function and a function to extract a raw ptr from the
* data handle. ViewDataHandle also defines an enum ReferenceAble which
* specifies whether references/pointers to elements can be taken and a
* 'return_type' which is what the view operators will give back.
* Specialisation of this object allows three things depending
* on ViewTraits and compiler options:
* (i) Use special allocator (e.g. huge pages/small pages and pinned memory)
* (ii) Use special data handle type (e.g. add Cuda Texture Object)
* (iii) Use special access intrinsics (e.g. texture fetch and non-caching loads)
*/
template< class StaticViewTraits , class Enable = void >
struct ViewDataHandle {
enum { ReturnTypeIsReference = true };
typedef typename StaticViewTraits::value_type * handle_type;
typedef typename StaticViewTraits::value_type & return_type;
KOKKOS_INLINE_FUNCTION
static handle_type create_handle( typename StaticViewTraits::value_type * arg_data_ptr, AllocationTracker const & /*arg_tracker*/ )
{
return handle_type(arg_data_ptr);
}
};
template< class StaticViewTraits , class Enable = void >
class ViewDataManagement : public ViewDataHandle< StaticViewTraits > {
private:
template< class , class > friend class ViewDataManagement ;
struct PotentiallyManaged {};
struct StaticallyUnmanaged {};
/* Statically unmanaged if traits or not executing in host-accessible memory space */
typedef typename
Impl::if_c< StaticViewTraits::is_managed &&
Impl::is_same< Kokkos::HostSpace
, Kokkos::Impl::ActiveExecutionMemorySpace >::value
, PotentiallyManaged
, StaticallyUnmanaged
>::type StaticManagementTag ;
enum { Unmanaged = 0x01
, Noncontiguous = 0x02
};
enum { DefaultTraits = Impl::is_same< StaticManagementTag , StaticallyUnmanaged >::value ? Unmanaged : 0 };
unsigned m_traits ; ///< Runtime traits
template< class T >
inline static
unsigned assign( const ViewDataManagement<T> & rhs , const PotentiallyManaged & )
{ return rhs.m_traits | ( rhs.is_managed() && Kokkos::HostSpace::in_parallel() ? unsigned(Unmanaged) : 0u ); }
template< class T >
KOKKOS_INLINE_FUNCTION static
unsigned assign( const ViewDataManagement<T> & rhs , const StaticallyUnmanaged & )
{ return rhs.m_traits | Unmanaged ; }
public:
typedef typename ViewDataHandle< StaticViewTraits >::handle_type handle_type;
KOKKOS_INLINE_FUNCTION
ViewDataManagement() : m_traits( DefaultTraits ) {}
KOKKOS_INLINE_FUNCTION
ViewDataManagement( const ViewDataManagement & rhs )
: m_traits( assign( rhs , StaticManagementTag() ) ) {}
KOKKOS_INLINE_FUNCTION
ViewDataManagement & operator = ( const ViewDataManagement & rhs )
{ m_traits = assign( rhs , StaticManagementTag() ); return *this ; }
template< class SVT >
KOKKOS_INLINE_FUNCTION
ViewDataManagement( const ViewDataManagement<SVT> & rhs )
: m_traits( assign( rhs , StaticManagementTag() ) ) {}
template< class SVT >
KOKKOS_INLINE_FUNCTION
ViewDataManagement & operator = ( const ViewDataManagement<SVT> & rhs )
{ m_traits = assign( rhs , StaticManagementTag() ); return *this ; }
KOKKOS_INLINE_FUNCTION
bool is_managed() const { return ! ( m_traits & Unmanaged ); }
KOKKOS_INLINE_FUNCTION
bool is_contiguous() const { return ! ( m_traits & Noncontiguous ); }
KOKKOS_INLINE_FUNCTION
void set_unmanaged() { m_traits |= Unmanaged ; }
KOKKOS_INLINE_FUNCTION
void set_noncontiguous() { m_traits |= Noncontiguous ; }
template< bool Initialize >
static
handle_type allocate( const std::string & label
, const Impl::ViewOffset< typename StaticViewTraits::shape_type, typename StaticViewTraits::array_layout > & offset_map
, AllocationTracker & tracker
)
{
typedef typename StaticViewTraits::execution_space execution_space ;
typedef typename StaticViewTraits::memory_space memory_space ;
typedef typename StaticViewTraits::value_type value_type ;
const size_t count = offset_map.capacity();
tracker = memory_space::allocate_and_track( label, sizeof(value_type) * count );
value_type * ptr = reinterpret_cast<value_type *>(tracker.alloc_ptr());
// Default construct within the view's execution space.
(void) ViewDefaultConstruct< execution_space , value_type , Initialize >( ptr , count );
return ViewDataHandle< StaticViewTraits >::create_handle(ptr, tracker);
}
};
+#endif /* #if ! defined( KOKKOS_USING_EXPERIMENTAL_VIEW ) */
+
} // namespace Impl
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
template< class OutputView , class InputView , unsigned Rank = OutputView::Rank >
struct ViewRemap
{
typedef typename OutputView::size_type size_type ;
const OutputView output ;
const InputView input ;
const size_type n0 ;
const size_type n1 ;
const size_type n2 ;
const size_type n3 ;
const size_type n4 ;
const size_type n5 ;
const size_type n6 ;
const size_type n7 ;
ViewRemap( const OutputView & arg_out , const InputView & arg_in )
: output( arg_out ), input( arg_in )
, n0( std::min( (size_t)arg_out.dimension_0() , (size_t)arg_in.dimension_0() ) )
, n1( std::min( (size_t)arg_out.dimension_1() , (size_t)arg_in.dimension_1() ) )
, n2( std::min( (size_t)arg_out.dimension_2() , (size_t)arg_in.dimension_2() ) )
, n3( std::min( (size_t)arg_out.dimension_3() , (size_t)arg_in.dimension_3() ) )
, n4( std::min( (size_t)arg_out.dimension_4() , (size_t)arg_in.dimension_4() ) )
, n5( std::min( (size_t)arg_out.dimension_5() , (size_t)arg_in.dimension_5() ) )
, n6( std::min( (size_t)arg_out.dimension_6() , (size_t)arg_in.dimension_6() ) )
, n7( std::min( (size_t)arg_out.dimension_7() , (size_t)arg_in.dimension_7() ) )
{
typedef typename OutputView::execution_space execution_space ;
Kokkos::RangePolicy< execution_space > range( 0 , n0 );
parallel_for( range , *this );
}
KOKKOS_INLINE_FUNCTION
void operator()( const size_type i0 ) const
{
for ( size_type i1 = 0 ; i1 < n1 ; ++i1 ) {
for ( size_type i2 = 0 ; i2 < n2 ; ++i2 ) {
for ( size_type i3 = 0 ; i3 < n3 ; ++i3 ) {
for ( size_type i4 = 0 ; i4 < n4 ; ++i4 ) {
for ( size_type i5 = 0 ; i5 < n5 ; ++i5 ) {
for ( size_type i6 = 0 ; i6 < n6 ; ++i6 ) {
for ( size_type i7 = 0 ; i7 < n7 ; ++i7 ) {
output.at(i0,i1,i2,i3,i4,i5,i6,i7) = input.at(i0,i1,i2,i3,i4,i5,i6,i7);
}}}}}}}
}
};
template< class OutputView , class InputView >
struct ViewRemap< OutputView , InputView , 0 >
{
typedef typename OutputView::value_type value_type ;
typedef typename OutputView::memory_space dst_space ;
typedef typename InputView ::memory_space src_space ;
ViewRemap( const OutputView & arg_out , const InputView & arg_in )
{
DeepCopy< dst_space , src_space >( arg_out.ptr_on_device() ,
arg_in.ptr_on_device() ,
sizeof(value_type) );
}
};
//----------------------------------------------------------------------------
template< class ExecSpace , class Type >
struct ViewDefaultConstruct< ExecSpace , Type , true >
{
Type * const m_ptr ;
KOKKOS_FORCEINLINE_FUNCTION
void operator()( const typename ExecSpace::size_type& i ) const
{ m_ptr[i] = Type(); }
ViewDefaultConstruct( Type * pointer , size_t capacity )
: m_ptr( pointer )
{
Kokkos::RangePolicy< ExecSpace > range( 0 , capacity );
parallel_for( range , *this );
ExecSpace::fence();
}
};
template< class OutputView , unsigned Rank = OutputView::Rank ,
class Enabled = void >
struct ViewFill
{
typedef typename OutputView::const_value_type const_value_type ;
typedef typename OutputView::size_type size_type ;
const OutputView output ;
const_value_type input ;
ViewFill( const OutputView & arg_out , const_value_type & arg_in )
: output( arg_out ), input( arg_in )
{
typedef typename OutputView::execution_space execution_space ;
Kokkos::RangePolicy< execution_space > range( 0 , output.dimension_0() );
parallel_for( range , *this );
execution_space::fence();
}
KOKKOS_INLINE_FUNCTION
void operator()( const size_type i0 ) const
{
for ( size_type i1 = 0 ; i1 < output.dimension_1() ; ++i1 ) {
for ( size_type i2 = 0 ; i2 < output.dimension_2() ; ++i2 ) {
for ( size_type i3 = 0 ; i3 < output.dimension_3() ; ++i3 ) {
for ( size_type i4 = 0 ; i4 < output.dimension_4() ; ++i4 ) {
for ( size_type i5 = 0 ; i5 < output.dimension_5() ; ++i5 ) {
for ( size_type i6 = 0 ; i6 < output.dimension_6() ; ++i6 ) {
for ( size_type i7 = 0 ; i7 < output.dimension_7() ; ++i7 ) {
output.at(i0,i1,i2,i3,i4,i5,i6,i7) = input ;
}}}}}}}
}
};
template< class OutputView >
struct ViewFill< OutputView , 0 >
{
typedef typename OutputView::const_value_type const_value_type ;
typedef typename OutputView::memory_space dst_space ;
ViewFill( const OutputView & arg_out , const_value_type & arg_in )
{
DeepCopy< dst_space , dst_space >( arg_out.ptr_on_device() , & arg_in ,
sizeof(const_value_type) );
}
};
} // namespace Impl
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
struct ViewAllocateWithoutInitializing {
const std::string label ;
ViewAllocateWithoutInitializing() : label() {}
ViewAllocateWithoutInitializing( const std::string & arg_label ) : label( arg_label ) {}
ViewAllocateWithoutInitializing( const char * const arg_label ) : label( arg_label ) {}
};
struct ViewAllocate {
const std::string label ;
ViewAllocate() : label() {}
ViewAllocate( const std::string & arg_label ) : label( arg_label ) {}
ViewAllocate( const char * const arg_label ) : label( arg_label ) {}
};
}
namespace Kokkos {
namespace Impl {
template< class Traits , class AllocationProperties , class Enable = void >
struct ViewAllocProp : public Kokkos::Impl::false_type {};
template< class Traits >
struct ViewAllocProp< Traits , Kokkos::ViewAllocate
, typename Kokkos::Impl::enable_if<(
Traits::is_managed && ! Kokkos::Impl::is_const< typename Traits::value_type >::value
)>::type >
: public Kokkos::Impl::true_type
{
typedef size_t size_type ;
typedef const ViewAllocate & property_type ;
enum { Initialize = true };
enum { AllowPadding = false };
inline
static const std::string & label( property_type p ) { return p.label ; }
};
template< class Traits >
struct ViewAllocProp< Traits , std::string
, typename Kokkos::Impl::enable_if<(
Traits::is_managed && ! Kokkos::Impl::is_const< typename Traits::value_type >::value
)>::type >
: public Kokkos::Impl::true_type
{
typedef size_t size_type ;
typedef const std::string & property_type ;
enum { Initialize = true };
enum { AllowPadding = false };
inline
static const std::string & label( property_type s ) { return s ; }
};
template< class Traits , unsigned N >
struct ViewAllocProp< Traits , char[N]
, typename Kokkos::Impl::enable_if<(
Traits::is_managed && ! Kokkos::Impl::is_const< typename Traits::value_type >::value
)>::type >
: public Kokkos::Impl::true_type
{
private:
typedef char label_type[N] ;
public:
typedef size_t size_type ;
typedef const label_type & property_type ;
enum { Initialize = true };
enum { AllowPadding = false };
inline
static std::string label( property_type s ) { return std::string(s) ; }
};
template< class Traits >
struct ViewAllocProp< Traits , Kokkos::ViewAllocateWithoutInitializing
, typename Kokkos::Impl::enable_if<(
Traits::is_managed && ! Kokkos::Impl::is_const< typename Traits::value_type >::value
)>::type >
: public Kokkos::Impl::true_type
{
typedef size_t size_type ;
typedef const Kokkos::ViewAllocateWithoutInitializing & property_type ;
enum { Initialize = false };
enum { AllowPadding = false };
inline
static std::string label( property_type s ) { return s.label ; }
};
} // namespace Impl
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
template< class Traits , class PointerProperties , class Enable = void >
struct ViewRawPointerProp : public Kokkos::Impl::false_type {};
template< class Traits , typename T >
struct ViewRawPointerProp< Traits , T ,
typename Kokkos::Impl::enable_if<(
Impl::is_same< T , typename Traits::value_type >::value ||
Impl::is_same< T , typename Traits::non_const_value_type >::value
)>::type >
: public Kokkos::Impl::true_type
{
typedef size_t size_type ;
};
} // namespace Impl
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#endif /* #ifndef KOKKOS_VIEWSUPPORT_HPP */
diff --git a/lib/kokkos/core/src/impl/Kokkos_ViewTileLeft.hpp b/lib/kokkos/core/src/impl/Kokkos_ViewTileLeft.hpp
index 0bbb781c8..beaa288ce 100644
--- a/lib/kokkos/core/src/impl/Kokkos_ViewTileLeft.hpp
+++ b/lib/kokkos/core/src/impl/Kokkos_ViewTileLeft.hpp
@@ -1,209 +1,209 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_VIEWTILELEFT_HPP
#define KOKKOS_VIEWTILELEFT_HPP
#include <impl/KokkosExp_ViewTile.hpp>
#if defined( KOKKOS_USING_EXPERIMENTAL_VIEW )
namespace Kokkos {
using Kokkos::Experimental::tile_subview ;
}
#else
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
template< class T , unsigned N0 , unsigned N1 , class MemorySpace , class MemoryTraits >
struct ViewSpecialize< T , void , LayoutTileLeft<N0,N1> , MemorySpace , MemoryTraits >
{
typedef ViewDefault type ;
};
struct ViewTile {};
template< class ShapeType , unsigned N0 , unsigned N1 >
struct ViewOffset< ShapeType
, LayoutTileLeft<N0,N1,true> /* Only accept properly shaped tiles */
, typename Impl::enable_if<( 2 == ShapeType::rank
&&
2 == ShapeType::rank_dynamic
)>::type >
: public ShapeType
{
- enum { SHIFT_0 = Impl::power_of_two<N0>::value };
- enum { SHIFT_1 = Impl::power_of_two<N1>::value };
+ enum { SHIFT_0 = Impl::integral_power_of_two(N0) };
+ enum { SHIFT_1 = Impl::integral_power_of_two(N1) };
enum { MASK_0 = N0 - 1 };
enum { MASK_1 = N1 - 1 };
typedef size_t size_type ;
typedef ShapeType shape_type ;
typedef LayoutTileLeft<N0,N1,true> array_layout ;
enum { has_padding = true };
size_type tile_N0 ;
KOKKOS_INLINE_FUNCTION
void assign( const ViewOffset & rhs )
{
shape_type::N0 = rhs.N0 ;
shape_type::N1 = rhs.N1 ;
tile_N0 = ( rhs.N0 + MASK_0 ) >> SHIFT_0 ; // number of tiles in first dimension
}
KOKKOS_INLINE_FUNCTION
void assign( size_t n0 , size_t n1
, int = 0 , int = 0
, int = 0 , int = 0
, int = 0 , int = 0
, int = 0
)
{
shape_type::N0 = n0 ;
shape_type::N1 = n1 ;
tile_N0 = ( n0 + MASK_0 ) >> SHIFT_0 ; // number of tiles in first dimension
}
KOKKOS_INLINE_FUNCTION
void set_padding() {}
template< typename I0 , typename I1 >
KOKKOS_INLINE_FUNCTION
size_type operator()( I0 const & i0 , I1 const & i1
, int = 0 , int = 0
, int = 0 , int = 0
, int = 0 , int = 0
) const
{
return /* ( ( Tile offset ) * ( Tile size ) ) */
( ( (i0>>SHIFT_0) + tile_N0 * (i1>>SHIFT_1) ) << (SHIFT_0 + SHIFT_1) ) +
/* ( Offset within tile ) */
( (i0 & MASK_0) + ((i1 & MASK_1)<<SHIFT_0) ) ;
}
template< typename I0 , typename I1 >
KOKKOS_INLINE_FUNCTION
size_type tile_begin( I0 const & i_tile0 , I1 const & i_tile1 ) const
{
return ( i_tile0 + tile_N0 * i_tile1 ) << ( SHIFT_0 + SHIFT_1 );
}
KOKKOS_INLINE_FUNCTION
size_type capacity() const
{
// ( TileDim0 * ( TileDim1 ) ) * TileSize
return ( tile_N0 * ( ( shape_type::N1 + MASK_1 ) >> SHIFT_1 ) ) << ( SHIFT_0 + SHIFT_1 );
}
};
template<>
struct ViewAssignment< ViewTile , void , void >
{
// Some compilers have type-matching issues on the integer values when using:
// template< class T , unsigned N0 , unsigned N1 , class A2 , class A3 >
template< class T , unsigned dN0 , unsigned dN1
, class A2 , class A3
, unsigned sN0 , unsigned sN1 >
KOKKOS_INLINE_FUNCTION
ViewAssignment( View< T[dN0][dN1], LayoutLeft, A2, A3, Impl::ViewDefault > & dst
, View< T** , LayoutTileLeft<sN0,sN1,true>, A2, A3, Impl::ViewDefault > const & src
, size_t const i_tile0
, typename Impl::enable_if< unsigned(dN0) == unsigned(sN0) &&
unsigned(dN1) == unsigned(sN1)
, size_t const
>::type i_tile1
)
{
// Destination is always contiguous but source may be non-contiguous
// so don't assign the whole view management object.
// Just query and appropriately set the reference-count state.
if ( ! src.m_management.is_managed() ) dst.m_management.set_unmanaged();
dst.m_ptr_on_device = src.m_ptr_on_device + src.m_offset_map.tile_begin(i_tile0,i_tile1);
dst.m_tracker = src.m_tracker;
}
};
} /* namespace Impl */
} /* namespace Kokkos */
namespace Kokkos {
template< class T , unsigned N0, unsigned N1, class A2, class A3 >
KOKKOS_INLINE_FUNCTION
View< T[N0][N1], LayoutLeft, A2, A3, Impl::ViewDefault >
tile_subview( const View<T**,LayoutTileLeft<N0,N1,true>,A2,A3,Impl::ViewDefault> & src
, const size_t i_tile0
, const size_t i_tile1
)
{
View< T[N0][N1], LayoutLeft, A2, A3, Impl::ViewDefault > dst ;
(void) Impl::ViewAssignment< Impl::ViewTile , void , void >( dst , src , i_tile0 , i_tile1 );
return dst ;
}
} /* namespace Kokkos */
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#endif
#endif /* #ifndef KOKKOS_VIEWTILELEFT_HPP */
diff --git a/lib/kokkos/core/src/impl/Kokkos_hwloc.cpp b/lib/kokkos/core/src/impl/Kokkos_hwloc.cpp
index 1d173fb4f..cb561f711 100644
--- a/lib/kokkos/core/src/impl/Kokkos_hwloc.cpp
+++ b/lib/kokkos/core/src/impl/Kokkos_hwloc.cpp
@@ -1,704 +1,726 @@
/*
//@HEADER
// ************************************************************************
-//
+//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
-//
+//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
-//
+//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
-//
+//
// ************************************************************************
//@HEADER
*/
#define DEBUG_PRINT 0
#include <iostream>
#include <sstream>
+#include <algorithm>
#include <Kokkos_Macros.hpp>
#include <Kokkos_hwloc.hpp>
#include <impl/Kokkos_Error.hpp>
/*--------------------------------------------------------------------------*/
/*--------------------------------------------------------------------------*/
namespace Kokkos {
namespace hwloc {
/* Return 0 if asynchronous, 1 if synchronous and include process. */
unsigned thread_mapping( const char * const label ,
const bool allow_async ,
unsigned & thread_count ,
unsigned & use_numa_count ,
unsigned & use_cores_per_numa ,
std::pair<unsigned,unsigned> threads_coord[] )
{
const bool hwloc_avail = Kokkos::hwloc::available();
const unsigned avail_numa_count = hwloc_avail ? hwloc::get_available_numa_count() : 1 ;
const unsigned avail_cores_per_numa = hwloc_avail ? hwloc::get_available_cores_per_numa() : thread_count ;
const unsigned avail_threads_per_core = hwloc_avail ? hwloc::get_available_threads_per_core() : 1 ;
// (numa,core) coordinate of the process:
const std::pair<unsigned,unsigned> proc_coord = Kokkos::hwloc::get_this_thread_coordinate();
//------------------------------------------------------------------------
// Defaults for unspecified inputs:
if ( ! use_numa_count ) {
// Default to use all NUMA regions
use_numa_count = ! thread_count ? avail_numa_count : (
thread_count < avail_numa_count ? thread_count : avail_numa_count );
}
if ( ! use_cores_per_numa ) {
// Default to use all but one core if asynchronous, all cores if synchronous.
const unsigned threads_per_numa = thread_count / use_numa_count ;
use_cores_per_numa = ! threads_per_numa ? avail_cores_per_numa - ( allow_async ? 1 : 0 ) : (
threads_per_numa < avail_cores_per_numa ? threads_per_numa : avail_cores_per_numa );
}
if ( ! thread_count ) {
thread_count = use_numa_count * use_cores_per_numa * avail_threads_per_core ;
}
//------------------------------------------------------------------------
// Input verification:
const bool valid_numa = use_numa_count <= avail_numa_count ;
const bool valid_cores = use_cores_per_numa &&
use_cores_per_numa <= avail_cores_per_numa ;
const bool valid_threads = thread_count &&
thread_count <= use_numa_count * use_cores_per_numa * avail_threads_per_core ;
const bool balanced_numa = ! ( thread_count % use_numa_count );
const bool balanced_cores = ! ( thread_count % ( use_numa_count * use_cores_per_numa ) );
const bool valid_input = valid_numa && valid_cores && valid_threads && balanced_numa && balanced_cores ;
if ( ! valid_input ) {
std::ostringstream msg ;
msg << label << " HWLOC ERROR(s)" ;
if ( ! valid_threads ) {
msg << " : thread_count(" << thread_count
<< ") exceeds capacity("
<< use_numa_count * use_cores_per_numa * avail_threads_per_core
<< ")" ;
}
if ( ! valid_numa ) {
msg << " : use_numa_count(" << use_numa_count
<< ") exceeds capacity(" << avail_numa_count << ")" ;
}
if ( ! valid_cores ) {
msg << " : use_cores_per_numa(" << use_cores_per_numa
<< ") exceeds capacity(" << avail_cores_per_numa << ")" ;
}
if ( ! balanced_numa ) {
msg << " : thread_count(" << thread_count
<< ") imbalanced among numa(" << use_numa_count << ")" ;
}
if ( ! balanced_cores ) {
msg << " : thread_count(" << thread_count
<< ") imbalanced among cores(" << use_numa_count * use_cores_per_numa << ")" ;
}
Kokkos::Impl::throw_runtime_exception( msg.str() );
}
const unsigned thread_spawn_synchronous =
( allow_async &&
1 < thread_count &&
( use_numa_count < avail_numa_count ||
use_cores_per_numa < avail_cores_per_numa ) )
? 0 /* asyncronous */
: 1 /* synchronous, threads_coord[0] is process core */ ;
// Determine binding coordinates for to-be-spawned threads so that
// threads may be bound to cores as they are spawned.
const unsigned threads_per_core = thread_count / ( use_numa_count * use_cores_per_numa );
if ( thread_spawn_synchronous ) {
// Working synchronously and include process core as threads_coord[0].
// Swap the NUMA coordinate of the process core with 0
// Swap the CORE coordinate of the process core with 0
for ( unsigned i = 0 , inuma = avail_numa_count - use_numa_count ; inuma < avail_numa_count ; ++inuma ) {
const unsigned numa_coord = 0 == inuma ? proc_coord.first : ( proc_coord.first == inuma ? 0 : inuma );
for ( unsigned icore = avail_cores_per_numa - use_cores_per_numa ; icore < avail_cores_per_numa ; ++icore ) {
const unsigned core_coord = 0 == icore ? proc_coord.second : ( proc_coord.second == icore ? 0 : icore );
for ( unsigned ith = 0 ; ith < threads_per_core ; ++ith , ++i ) {
threads_coord[i].first = numa_coord ;
threads_coord[i].second = core_coord ;
}
}
}
}
else if ( use_numa_count < avail_numa_count ) {
// Working asynchronously and omit the process' NUMA region from the pool.
// Swap the NUMA coordinate of the process core with ( ( avail_numa_count - use_numa_count ) - 1 )
const unsigned numa_coord_swap = ( avail_numa_count - use_numa_count ) - 1 ;
for ( unsigned i = 0 , inuma = avail_numa_count - use_numa_count ; inuma < avail_numa_count ; ++inuma ) {
const unsigned numa_coord = proc_coord.first == inuma ? numa_coord_swap : inuma ;
for ( unsigned icore = avail_cores_per_numa - use_cores_per_numa ; icore < avail_cores_per_numa ; ++icore ) {
const unsigned core_coord = icore ;
for ( unsigned ith = 0 ; ith < threads_per_core ; ++ith , ++i ) {
threads_coord[i].first = numa_coord ;
threads_coord[i].second = core_coord ;
}
}
}
}
else if ( use_cores_per_numa < avail_cores_per_numa ) {
// Working asynchronously and omit the process' core from the pool.
// Swap the CORE coordinate of the process core with ( ( avail_cores_per_numa - use_cores_per_numa ) - 1 )
const unsigned core_coord_swap = ( avail_cores_per_numa - use_cores_per_numa ) - 1 ;
for ( unsigned i = 0 , inuma = avail_numa_count - use_numa_count ; inuma < avail_numa_count ; ++inuma ) {
const unsigned numa_coord = inuma ;
for ( unsigned icore = avail_cores_per_numa - use_cores_per_numa ; icore < avail_cores_per_numa ; ++icore ) {
const unsigned core_coord = proc_coord.second == icore ? core_coord_swap : icore ;
for ( unsigned ith = 0 ; ith < threads_per_core ; ++ith , ++i ) {
threads_coord[i].first = numa_coord ;
threads_coord[i].second = core_coord ;
}
}
}
}
return thread_spawn_synchronous ;
}
} /* namespace hwloc */
} /* namespace Kokkos */
/*--------------------------------------------------------------------------*/
/*--------------------------------------------------------------------------*/
#if defined( KOKKOS_HAVE_HWLOC )
#include <iostream>
#include <sstream>
#include <stdexcept>
/*--------------------------------------------------------------------------*/
/* Third Party Libraries */
/* Hardware locality library: http://www.open-mpi.org/projects/hwloc/ */
#include <hwloc.h>
#define REQUIRED_HWLOC_API_VERSION 0x000010300
#if HWLOC_API_VERSION < REQUIRED_HWLOC_API_VERSION
#error "Requires http://www.open-mpi.org/projects/hwloc/ Version 1.3 or greater"
#endif
/*--------------------------------------------------------------------------*/
namespace Kokkos {
namespace hwloc {
namespace {
#if DEBUG_PRINT
inline
void print_bitmap( std::ostream & s , const hwloc_const_bitmap_t bitmap )
{
s << "{" ;
for ( int i = hwloc_bitmap_first( bitmap ) ;
-1 != i ; i = hwloc_bitmap_next( bitmap , i ) ) {
s << " " << i ;
}
s << " }" ;
}
#endif
enum { MAX_CORE = 1024 };
std::pair<unsigned,unsigned> s_core_topology(0,0);
unsigned s_core_capacity(0);
hwloc_topology_t s_hwloc_topology(0);
hwloc_bitmap_t s_hwloc_location(0);
hwloc_bitmap_t s_process_binding(0);
hwloc_bitmap_t s_core[ MAX_CORE ];
+bool s_can_bind_threads(true);
struct Sentinel {
~Sentinel();
Sentinel();
};
bool sentinel()
{
static Sentinel self ;
if ( 0 == s_hwloc_topology ) {
std::cerr << "Kokkos::hwloc ERROR : Called after return from main()" << std::endl ;
std::cerr.flush();
}
return 0 != s_hwloc_topology ;
}
Sentinel::~Sentinel()
{
hwloc_topology_destroy( s_hwloc_topology );
hwloc_bitmap_free( s_process_binding );
hwloc_bitmap_free( s_hwloc_location );
s_core_topology.first = 0 ;
s_core_topology.second = 0 ;
s_core_capacity = 0 ;
s_hwloc_topology = 0 ;
s_hwloc_location = 0 ;
s_process_binding = 0 ;
}
Sentinel::Sentinel()
{
#if defined(__MIC__)
static const bool remove_core_0 = true ;
#else
static const bool remove_core_0 = false ;
#endif
s_core_topology = std::pair<unsigned,unsigned>(0,0);
s_core_capacity = 0 ;
s_hwloc_topology = 0 ;
s_hwloc_location = 0 ;
s_process_binding = 0 ;
for ( unsigned i = 0 ; i < MAX_CORE ; ++i ) s_core[i] = 0 ;
hwloc_topology_init( & s_hwloc_topology );
hwloc_topology_load( s_hwloc_topology );
s_hwloc_location = hwloc_bitmap_alloc();
s_process_binding = hwloc_bitmap_alloc();
hwloc_get_cpubind( s_hwloc_topology , s_process_binding , HWLOC_CPUBIND_PROCESS );
+ if ( hwloc_bitmap_iszero( s_process_binding ) ) {
+ std::cerr << "WARNING: Cannot detect process binding -- ASSUMING ALL processing units" << std::endl;
+ const int pu_depth = hwloc_get_type_depth( s_hwloc_topology, HWLOC_OBJ_PU );
+ int num_pu = 1;
+ if ( pu_depth != HWLOC_TYPE_DEPTH_UNKNOWN ) {
+ num_pu = hwloc_get_nbobjs_by_depth( s_hwloc_topology, pu_depth );
+ }
+ else {
+ std::cerr << "WARNING: Cannot detect number of processing units -- ASSUMING 1 (serial)." << std::endl;
+ num_pu = 1;
+ }
+ hwloc_bitmap_set_range( s_process_binding, 0, num_pu-1);
+ s_can_bind_threads = false;
+ }
+
+
if ( remove_core_0 ) {
const hwloc_obj_t core = hwloc_get_obj_by_type( s_hwloc_topology , HWLOC_OBJ_CORE , 0 );
if ( hwloc_bitmap_intersects( s_process_binding , core->allowed_cpuset ) ) {
hwloc_bitmap_t s_process_no_core_zero = hwloc_bitmap_alloc();
hwloc_bitmap_andnot( s_process_no_core_zero , s_process_binding , core->allowed_cpuset );
bool ok = 0 == hwloc_set_cpubind( s_hwloc_topology ,
s_process_no_core_zero ,
HWLOC_CPUBIND_PROCESS | HWLOC_CPUBIND_STRICT );
if ( ok ) {
hwloc_get_cpubind( s_hwloc_topology , s_process_binding , HWLOC_CPUBIND_PROCESS );
ok = 0 != hwloc_bitmap_isequal( s_process_binding , s_process_no_core_zero );
}
hwloc_bitmap_free( s_process_no_core_zero );
if ( ! ok ) {
std::cerr << "WARNING: Kokkos::hwloc attempted and failed to move process off of core #0" << std::endl ;
}
}
}
// Choose a hwloc object type for the NUMA level, which may not exist.
hwloc_obj_type_t root_type = HWLOC_OBJ_TYPE_MAX ;
{
// Object types to search, in order.
static const hwloc_obj_type_t candidate_root_type[] =
{ HWLOC_OBJ_NODE /* NUMA region */
, HWLOC_OBJ_SOCKET /* hardware socket */
, HWLOC_OBJ_MACHINE /* local machine */
};
enum { CANDIDATE_ROOT_TYPE_COUNT =
sizeof(candidate_root_type) / sizeof(hwloc_obj_type_t) };
for ( int k = 0 ; k < CANDIDATE_ROOT_TYPE_COUNT && HWLOC_OBJ_TYPE_MAX == root_type ; ++k ) {
if ( 0 < hwloc_get_nbobjs_by_type( s_hwloc_topology , candidate_root_type[k] ) ) {
root_type = candidate_root_type[k] ;
}
}
}
// Determine which of these 'root' types are available to this process.
// The process may have been bound (e.g., by MPI) to a subset of these root types.
// Determine current location of the master (calling) process>
hwloc_bitmap_t proc_cpuset_location = hwloc_bitmap_alloc();
hwloc_get_last_cpu_location( s_hwloc_topology , proc_cpuset_location , HWLOC_CPUBIND_THREAD );
const unsigned max_root = hwloc_get_nbobjs_by_type( s_hwloc_topology , root_type );
unsigned root_base = max_root ;
unsigned root_count = 0 ;
unsigned core_per_root = 0 ;
unsigned pu_per_core = 0 ;
bool symmetric = true ;
for ( unsigned i = 0 ; i < max_root ; ++i ) {
const hwloc_obj_t root = hwloc_get_obj_by_type( s_hwloc_topology , root_type , i );
if ( hwloc_bitmap_intersects( s_process_binding , root->allowed_cpuset ) ) {
++root_count ;
// Remember which root (NUMA) object the master thread is running on.
// This will be logical NUMA rank #0 for this process.
if ( hwloc_bitmap_intersects( proc_cpuset_location, root->allowed_cpuset ) ) {
root_base = i ;
}
// Count available cores:
const unsigned max_core =
hwloc_get_nbobjs_inside_cpuset_by_type( s_hwloc_topology ,
root->allowed_cpuset ,
HWLOC_OBJ_CORE );
unsigned core_count = 0 ;
for ( unsigned j = 0 ; j < max_core ; ++j ) {
const hwloc_obj_t core =
hwloc_get_obj_inside_cpuset_by_type( s_hwloc_topology ,
root->allowed_cpuset ,
HWLOC_OBJ_CORE , j );
// If process' cpuset intersects core's cpuset then process can access this core.
// Must use intersection instead of inclusion because the Intel-Phi
// MPI may bind the process to only one of the core's hyperthreads.
//
// Assumption: if the process can access any hyperthread of the core
// then it has ownership of the entire core.
// This assumes that it would be performance-detrimental
// to spawn more than one MPI process per core and use nested threading.
if ( hwloc_bitmap_intersects( s_process_binding , core->allowed_cpuset ) ) {
++core_count ;
const unsigned pu_count =
hwloc_get_nbobjs_inside_cpuset_by_type( s_hwloc_topology ,
core->allowed_cpuset ,
HWLOC_OBJ_PU );
if ( pu_per_core == 0 ) pu_per_core = pu_count ;
// Enforce symmetry by taking the minimum:
pu_per_core = std::min( pu_per_core , pu_count );
if ( pu_count != pu_per_core ) symmetric = false ;
}
}
if ( 0 == core_per_root ) core_per_root = core_count ;
// Enforce symmetry by taking the minimum:
core_per_root = std::min( core_per_root , core_count );
if ( core_count != core_per_root ) symmetric = false ;
}
}
s_core_topology.first = root_count ;
s_core_topology.second = core_per_root ;
s_core_capacity = pu_per_core ;
// Fill the 's_core' array for fast mapping from a core coordinate to the
// hwloc cpuset object required for thread location querying and binding.
for ( unsigned i = 0 ; i < max_root ; ++i ) {
const unsigned root_rank = ( i + root_base ) % max_root ;
const hwloc_obj_t root = hwloc_get_obj_by_type( s_hwloc_topology , root_type , root_rank );
if ( hwloc_bitmap_intersects( s_process_binding , root->allowed_cpuset ) ) {
const unsigned max_core =
hwloc_get_nbobjs_inside_cpuset_by_type( s_hwloc_topology ,
root->allowed_cpuset ,
HWLOC_OBJ_CORE );
unsigned core_count = 0 ;
for ( unsigned j = 0 ; j < max_core && core_count < core_per_root ; ++j ) {
const hwloc_obj_t core =
hwloc_get_obj_inside_cpuset_by_type( s_hwloc_topology ,
root->allowed_cpuset ,
HWLOC_OBJ_CORE , j );
if ( hwloc_bitmap_intersects( s_process_binding , core->allowed_cpuset ) ) {
s_core[ core_count + core_per_root * i ] = core->allowed_cpuset ;
++core_count ;
}
}
}
}
hwloc_bitmap_free( proc_cpuset_location );
if ( ! symmetric ) {
std::cout << "Kokkos::hwloc WARNING: Using a symmetric subset of a non-symmetric core topology."
<< std::endl ;
}
}
} // namespace
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
bool available()
{ return true ; }
unsigned get_available_numa_count()
{ sentinel(); return s_core_topology.first ; }
unsigned get_available_cores_per_numa()
{ sentinel(); return s_core_topology.second ; }
unsigned get_available_threads_per_core()
{ sentinel(); return s_core_capacity ; }
+bool can_bind_threads()
+{ sentinel(); return s_can_bind_threads; }
+
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
unsigned bind_this_thread(
const unsigned coordinate_count ,
std::pair<unsigned,unsigned> coordinate[] )
{
unsigned i = 0 ;
try {
const std::pair<unsigned,unsigned> current = get_this_thread_coordinate();
// Match one of the requests:
for ( i = 0 ; i < coordinate_count && current != coordinate[i] ; ++i );
if ( coordinate_count == i ) {
// Match the first request (typically NUMA):
for ( i = 0 ; i < coordinate_count && current.first != coordinate[i].first ; ++i );
}
if ( coordinate_count == i ) {
// Match any unclaimed request:
for ( i = 0 ; i < coordinate_count && ~0u == coordinate[i].first ; ++i );
}
if ( coordinate_count == i || ! bind_this_thread( coordinate[i] ) ) {
// Failed to bind:
i = ~0u ;
}
if ( i < coordinate_count ) {
#if DEBUG_PRINT
if ( current != coordinate[i] ) {
std::cout << " bind_this_thread: rebinding from ("
<< current.first << ","
<< current.second
<< ") to ("
<< coordinate[i].first << ","
<< coordinate[i].second
<< ")" << std::endl ;
}
#endif
coordinate[i].first = ~0u ;
coordinate[i].second = ~0u ;
}
}
catch( ... ) {
i = ~0u ;
}
return i ;
}
bool bind_this_thread( const std::pair<unsigned,unsigned> coord )
{
if ( ! sentinel() ) return false ;
#if DEBUG_PRINT
std::cout << "Kokkos::bind_this_thread() at " ;
hwloc_get_last_cpu_location( s_hwloc_topology ,
s_hwloc_location , HWLOC_CPUBIND_THREAD );
print_bitmap( std::cout , s_hwloc_location );
std::cout << " to " ;
print_bitmap( std::cout , s_core[ coord.second + coord.first * s_core_topology.second ] );
std::cout << std::endl ;
#endif
// As safe and fast as possible.
// Fast-lookup by caching the coordinate -> hwloc cpuset mapping in 's_core'.
return coord.first < s_core_topology.first &&
coord.second < s_core_topology.second &&
0 == hwloc_set_cpubind( s_hwloc_topology ,
s_core[ coord.second + coord.first * s_core_topology.second ] ,
HWLOC_CPUBIND_THREAD | HWLOC_CPUBIND_STRICT );
}
bool unbind_this_thread()
{
if ( ! sentinel() ) return false ;
#define HWLOC_DEBUG_PRINT 0
#if HWLOC_DEBUG_PRINT
std::cout << "Kokkos::unbind_this_thread() from " ;
hwloc_get_cpubind( s_hwloc_topology , s_hwloc_location , HWLOC_CPUBIND_THREAD );
print_bitmap( std::cout , s_hwloc_location );
#endif
const bool result =
s_hwloc_topology &&
0 == hwloc_set_cpubind( s_hwloc_topology ,
s_process_binding ,
HWLOC_CPUBIND_THREAD | HWLOC_CPUBIND_STRICT );
#if HWLOC_DEBUG_PRINT
std::cout << " to " ;
hwloc_get_cpubind( s_hwloc_topology , s_hwloc_location , HWLOC_CPUBIND_THREAD );
print_bitmap( std::cout , s_hwloc_location );
std::cout << std::endl ;
#endif
return result ;
#undef HWLOC_DEBUG_PRINT
}
//----------------------------------------------------------------------------
std::pair<unsigned,unsigned> get_this_thread_coordinate()
{
std::pair<unsigned,unsigned> coord(0u,0u);
if ( ! sentinel() ) return coord ;
const unsigned n = s_core_topology.first * s_core_topology.second ;
// Using the pre-allocated 's_hwloc_location' to avoid memory
// allocation by this thread. This call is NOT thread-safe.
hwloc_get_last_cpu_location( s_hwloc_topology ,
s_hwloc_location , HWLOC_CPUBIND_THREAD );
unsigned i = 0 ;
while ( i < n && ! hwloc_bitmap_intersects( s_hwloc_location , s_core[ i ] ) ) ++i ;
if ( i < n ) {
coord.first = i / s_core_topology.second ;
coord.second = i % s_core_topology.second ;
}
return coord ;
}
//----------------------------------------------------------------------------
} /* namespace hwloc */
} /* namespace Kokkos */
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#else /* ! defined( KOKKOS_HAVE_HWLOC ) */
namespace Kokkos {
namespace hwloc {
bool available() { return false ; }
+bool can_bind_threads() { return false ; }
unsigned get_available_numa_count() { return 1 ; }
unsigned get_available_cores_per_numa() { return 1 ; }
unsigned get_available_threads_per_core() { return 1 ; }
unsigned bind_this_thread( const unsigned , std::pair<unsigned,unsigned>[] )
{ return ~0 ; }
bool bind_this_thread( const std::pair<unsigned,unsigned> )
{ return false ; }
bool unbind_this_thread()
{ return true ; }
std::pair<unsigned,unsigned> get_this_thread_coordinate()
{ return std::pair<unsigned,unsigned>(0,0); }
} // namespace hwloc
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#endif
diff --git a/lib/kokkos/core/src/impl/Kokkos_spinwait.cpp b/lib/kokkos/core/src/impl/Kokkos_spinwait.cpp
index e16d9c495..aff7f29f8 100644
--- a/lib/kokkos/core/src/impl/Kokkos_spinwait.cpp
+++ b/lib/kokkos/core/src/impl/Kokkos_spinwait.cpp
@@ -1,82 +1,89 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <Kokkos_Macros.hpp>
#include <impl/Kokkos_spinwait.hpp>
/*--------------------------------------------------------------------------*/
#if ( KOKKOS_ENABLE_ASM )
#if defined( __arm__ ) || defined( __aarch64__ )
/* No-operation instruction to idle the thread. */
#define YIELD asm volatile("nop")
#else
/* Pause instruction to prevent excess processor bus usage */
#define YIELD asm volatile("pause\n":::"memory")
#endif
#elif defined ( KOKKOS_HAVE_WINTHREAD )
#include <process.h>
#define YIELD Sleep(0)
+#elif defined ( _WIN32) && defined (_MSC_VER)
+ /* Windows w/ Visual Studio */
+ #define NOMINMAX
+ #include <winsock2.h>
+ #include <windows.h>
+#define YIELD YieldProcessor();
#elif defined ( _WIN32 )
- #define YIELD __asm__ __volatile__("pause\n":::"memory")
+ /* Windows w/ Intel*/
+ #define YIELD __asm__ __volatile__("pause\n":::"memory")
#else
#include <sched.h>
#define YIELD sched_yield()
#endif
/*--------------------------------------------------------------------------*/
namespace Kokkos {
namespace Impl {
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
void spinwait( volatile int & flag , const int value )
{
while ( value == flag ) {
YIELD ;
}
}
#endif
} /* namespace Impl */
} /* namespace Kokkos */
diff --git a/lib/kokkos/core/unit_test/CMakeLists.txt b/lib/kokkos/core/unit_test/CMakeLists.txt
new file mode 100644
index 000000000..e835245e2
--- /dev/null
+++ b/lib/kokkos/core/unit_test/CMakeLists.txt
@@ -0,0 +1,102 @@
+#
+# Add test-only library for gtest to be reused by all the subpackages
+#
+
+SET(GTEST_SOURCE_DIR ${${PARENT_PACKAGE_NAME}_SOURCE_DIR}/tpls/gtest)
+
+INCLUDE_DIRECTORIES(${GTEST_SOURCE_DIR})
+TRIBITS_ADD_LIBRARY(
+ kokkos_gtest
+ HEADERS ${GTEST_SOURCE_DIR}/gtest/gtest.h
+ SOURCES ${GTEST_SOURCE_DIR}/gtest/gtest-all.cc
+ TESTONLY
+ )
+
+#
+# Define the tests
+#
+
+INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR})
+INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
+
+IF(Kokkos_ENABLE_Serial)
+ TRIBITS_ADD_EXECUTABLE_AND_TEST(
+ UnitTest_Serial
+ SOURCES UnitTestMain.cpp TestSerial.cpp
+ COMM serial mpi
+ NUM_MPI_PROCS 1
+ FAIL_REGULAR_EXPRESSION " FAILED "
+ TESTONLYLIBS kokkos_gtest
+ )
+ENDIF()
+
+IF(Kokkos_ENABLE_Pthread)
+ TRIBITS_ADD_EXECUTABLE_AND_TEST(
+ UnitTest_Threads
+ SOURCES UnitTestMain.cpp TestThreads.cpp
+ COMM serial mpi
+ NUM_MPI_PROCS 1
+ FAIL_REGULAR_EXPRESSION " FAILED "
+ TESTONLYLIBS kokkos_gtest
+ )
+ENDIF()
+
+IF(Kokkos_ENABLE_OpenMP)
+ TRIBITS_ADD_EXECUTABLE_AND_TEST(
+ UnitTest_OpenMP
+ SOURCES UnitTestMain.cpp TestOpenMP.cpp
+ COMM serial mpi
+ NUM_MPI_PROCS 1
+ FAIL_REGULAR_EXPRESSION " FAILED "
+ TESTONLYLIBS kokkos_gtest
+ )
+ENDIF()
+
+IF(Kokkos_ENABLE_QTHREAD)
+ TRIBITS_ADD_EXECUTABLE_AND_TEST(
+ UnitTest_Qthread
+ SOURCES UnitTestMain.cpp TestQthread.cpp
+ COMM serial mpi
+ NUM_MPI_PROCS 1
+ FAIL_REGULAR_EXPRESSION " FAILED "
+ TESTONLYLIBS kokkos_gtest
+ )
+ENDIF()
+
+IF(Kokkos_ENABLE_Cuda)
+ TRIBITS_ADD_EXECUTABLE_AND_TEST(
+ UnitTest_Cuda
+ SOURCES UnitTestMain.cpp TestCuda.cpp
+ COMM serial mpi
+ NUM_MPI_PROCS 1
+ FAIL_REGULAR_EXPRESSION " FAILED "
+ TESTONLYLIBS kokkos_gtest
+ )
+ENDIF()
+
+TRIBITS_ADD_EXECUTABLE_AND_TEST(
+ UnitTest_Default
+ SOURCES UnitTestMain.cpp TestDefaultDeviceType.cpp TestDefaultDeviceTypeInit.cpp
+ COMM serial mpi
+ NUM_MPI_PROCS 1
+ FAIL_REGULAR_EXPRESSION " FAILED "
+ TESTONLYLIBS kokkos_gtest
+)
+
+TRIBITS_ADD_EXECUTABLE_AND_TEST(
+ UnitTest_HWLOC
+ SOURCES UnitTestMain.cpp TestHWLOC.cpp
+ COMM serial mpi
+ NUM_MPI_PROCS 1
+ FAIL_REGULAR_EXPRESSION " FAILED "
+ TESTONLYLIBS kokkos_gtest
+)
+
+TRIBITS_ADD_EXECUTABLE_AND_TEST(
+ UnitTest_AllocationTracker
+ SOURCES UnitTestMain.cpp TestAllocationTracker.cpp
+ COMM serial mpi
+ NUM_MPI_PROCS 1
+ FAIL_REGULAR_EXPRESSION " FAILED "
+ TESTONLYLIBS kokkos_gtest
+)
diff --git a/lib/kokkos/core/unit_test/Makefile b/lib/kokkos/core/unit_test/Makefile
index b2d3d5506..5c69c4014 100644
--- a/lib/kokkos/core/unit_test/Makefile
+++ b/lib/kokkos/core/unit_test/Makefile
@@ -1,146 +1,154 @@
KOKKOS_PATH = ../..
-GTEST_PATH = ../../TPL/gtest
+GTEST_PATH = ../../tpls/gtest
vpath %.cpp ${KOKKOS_PATH}/core/unit_test
TEST_HEADERS = $(wildcard $(KOKKOS_PATH)/core/unit_test/*.hpp)
default: build_all
echo "End Build"
-
include $(KOKKOS_PATH)/Makefile.kokkos
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
- CXX = nvcc_wrapper
+ CXX = $(NVCC_WRAPPER)
CXXFLAGS ?= -O3
LINK = $(CXX)
LDFLAGS ?= -lpthread
else
CXX ?= g++
CXXFLAGS ?= -O3
LINK ?= $(CXX)
LDFLAGS ?= -lpthread
endif
KOKKOS_CXXFLAGS += -I$(GTEST_PATH) -I${KOKKOS_PATH}/core/unit_test
-TEST_TARGETS =
-TARGETS =
+TEST_TARGETS =
+TARGETS =
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
OBJ_CUDA = TestCuda.o UnitTestMain.o gtest-all.o
TARGETS += KokkosCore_UnitTest_Cuda
TEST_TARGETS += test-cuda
endif
ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1)
OBJ_THREADS = TestThreads.o UnitTestMain.o gtest-all.o
TARGETS += KokkosCore_UnitTest_Threads
TEST_TARGETS += test-threads
endif
ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1)
OBJ_OPENMP = TestOpenMP.o UnitTestMain.o gtest-all.o
TARGETS += KokkosCore_UnitTest_OpenMP
TEST_TARGETS += test-openmp
endif
ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1)
OBJ_SERIAL = TestSerial.o UnitTestMain.o gtest-all.o
TARGETS += KokkosCore_UnitTest_Serial
TEST_TARGETS += test-serial
endif
ifeq ($(KOKKOS_INTERNAL_USE_QTHREAD), 1)
OBJ_QTHREAD = TestQthread.o UnitTestMain.o gtest-all.o
TARGETS += KokkosCore_UnitTest_Qthread
TEST_TARGETS += test-qthread
endif
OBJ_HWLOC = TestHWLOC.o UnitTestMain.o gtest-all.o
TARGETS += KokkosCore_UnitTest_HWLOC
TEST_TARGETS += test-hwloc
OBJ_ALLOCATIONTRACKER = TestAllocationTracker.o UnitTestMain.o gtest-all.o
TARGETS += KokkosCore_UnitTest_AllocationTracker
TEST_TARGETS += test-allocationtracker
OBJ_DEFAULT = TestDefaultDeviceType.o UnitTestMain.o gtest-all.o
TARGETS += KokkosCore_UnitTest_Default
TEST_TARGETS += test-default
OBJ_DEFAULTINIT = TestDefaultDeviceTypeInit.o UnitTestMain.o gtest-all.o
TARGETS += KokkosCore_UnitTest_DefaultInit
TEST_TARGETS += test-default-init
+OBJ_SYNCHRONIC = TestSynchronic.o UnitTestMain.o gtest-all.o
+TARGETS += KokkosCore_UnitTest_Synchronic
+TEST_TARGETS += test-synchronic
KokkosCore_UnitTest_Cuda: $(OBJ_CUDA) $(KOKKOS_LINK_DEPENDS)
$(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_CUDA) $(KOKKOS_LIBS) $(LIB) -o KokkosCore_UnitTest_Cuda
KokkosCore_UnitTest_Threads: $(OBJ_THREADS) $(KOKKOS_LINK_DEPENDS)
$(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_THREADS) $(KOKKOS_LIBS) $(LIB) -o KokkosCore_UnitTest_Threads
-
+
KokkosCore_UnitTest_OpenMP: $(OBJ_OPENMP) $(KOKKOS_LINK_DEPENDS)
$(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_OPENMP) $(KOKKOS_LIBS) $(LIB) -o KokkosCore_UnitTest_OpenMP
KokkosCore_UnitTest_Serial: $(OBJ_SERIAL) $(KOKKOS_LINK_DEPENDS)
$(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_SERIAL) $(KOKKOS_LIBS) $(LIB) -o KokkosCore_UnitTest_Serial
KokkosCore_UnitTest_Qthread: $(OBJ_QTHREAD) $(KOKKOS_LINK_DEPENDS)
$(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_QTHREAD) $(KOKKOS_LIBS) $(LIB) -o KokkosCore_UnitTest_Qthread
KokkosCore_UnitTest_HWLOC: $(OBJ_HWLOC) $(KOKKOS_LINK_DEPENDS)
$(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_HWLOC) $(KOKKOS_LIBS) $(LIB) -o KokkosCore_UnitTest_HWLOC
KokkosCore_UnitTest_AllocationTracker: $(OBJ_ALLOCATIONTRACKER) $(KOKKOS_LINK_DEPENDS)
$(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_ALLOCATIONTRACKER) $(KOKKOS_LIBS) $(LIB) -o KokkosCore_UnitTest_AllocationTracker
KokkosCore_UnitTest_Default: $(OBJ_DEFAULT) $(KOKKOS_LINK_DEPENDS)
$(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_DEFAULT) $(KOKKOS_LIBS) $(LIB) -o KokkosCore_UnitTest_Default
KokkosCore_UnitTest_DefaultInit: $(OBJ_DEFAULTINIT) $(KOKKOS_LINK_DEPENDS)
$(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_DEFAULTINIT) $(KOKKOS_LIBS) $(LIB) -o KokkosCore_UnitTest_DefaultInit
+KokkosCore_UnitTest_Synchronic: $(OBJ_SYNCHRONIC) $(KOKKOS_LINK_DEPENDS)
+ $(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_SYNCHRONIC) $(KOKKOS_LIBS) $(LIB) -o KokkosCore_UnitTest_Synchronic
+
test-cuda: KokkosCore_UnitTest_Cuda
./KokkosCore_UnitTest_Cuda
test-threads: KokkosCore_UnitTest_Threads
./KokkosCore_UnitTest_Threads
test-openmp: KokkosCore_UnitTest_OpenMP
./KokkosCore_UnitTest_OpenMP
test-serial: KokkosCore_UnitTest_Serial
./KokkosCore_UnitTest_Serial
-
+
test-qthread: KokkosCore_UnitTest_Qthread
./KokkosCore_UnitTest_Qthread
test-hwloc: KokkosCore_UnitTest_HWLOC
./KokkosCore_UnitTest_HWLOC
-
+
test-allocationtracker: KokkosCore_UnitTest_AllocationTracker
./KokkosCore_UnitTest_AllocationTracker
-
+
test-default: KokkosCore_UnitTest_Default
./KokkosCore_UnitTest_Default
-
+
test-default-init: KokkosCore_UnitTest_DefaultInit
./KokkosCore_UnitTest_DefaultInit
+test-synchronic: KokkosCore_UnitTest_Synchronic
+ ./KokkosCore_UnitTest_Synchronic
+
build_all: $(TARGETS)
test: $(TEST_TARGETS)
-
-clean: kokkos-clean
+
+clean: kokkos-clean
rm -f *.o $(TARGETS)
# Compilation rules
%.o:%.cpp $(KOKKOS_CPP_DEPENDS) $(TEST_HEADERS)
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<
-gtest-all.o:$(GTEST_PATH)/gtest/gtest-all.cc
+gtest-all.o:$(GTEST_PATH)/gtest/gtest-all.cc
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $(GTEST_PATH)/gtest/gtest-all.cc
diff --git a/lib/kokkos/core/unit_test/TestAggregate.hpp b/lib/kokkos/core/unit_test/TestAggregate.hpp
index b16e17b4c..c106dfd87 100644
--- a/lib/kokkos/core/unit_test/TestAggregate.hpp
+++ b/lib/kokkos/core/unit_test/TestAggregate.hpp
@@ -1,753 +1,770 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef TEST_AGGREGATE_HPP
#define TEST_AGGREGATE_HPP
#include <gtest/gtest.h>
#include <stdexcept>
#include <sstream>
#include <iostream>
/*--------------------------------------------------------------------------*/
#if ! defined( KOKKOS_USING_EXPERIMENTAL_VIEW )
namespace Test {
struct EmbedArray {};
struct ArrayProxyContiguous {};
struct ArrayProxyStrided {};
template< typename T , unsigned N = 0 , class Proxy = void >
struct Array ;
template< typename T >
struct Array<T,0,ArrayProxyContiguous>
{
public:
typedef T value_type ;
enum { StaticLength = 0 };
T * const value ;
const unsigned count ;
KOKKOS_INLINE_FUNCTION
Array( T * v , unsigned n ) : value(v), count(n) {}
template< class Proxy >
KOKKOS_INLINE_FUNCTION
Array & operator = ( const Array<T,0,Proxy> & rhs ) { return *this ; }
};
template< typename T , unsigned N >
struct Array<T,N,ArrayProxyContiguous>
{
public:
typedef T value_type ;
enum { StaticLength = N };
T * const value ;
KOKKOS_INLINE_FUNCTION
Array( T * v , unsigned ) : value(v) {}
template< class Proxy >
KOKKOS_INLINE_FUNCTION
Array & operator = ( const Array<T,N,Proxy> & rhs ) { return *this ; }
};
template< typename T , unsigned N >
struct Array<T,N,ArrayProxyStrided>
{
public:
typedef T value_type ;
enum { StaticLength = N };
T * const value ;
const unsigned stride ;
KOKKOS_INLINE_FUNCTION
Array( T * v , unsigned , unsigned s ) : value(v), stride(s) {}
template< class Proxy >
KOKKOS_INLINE_FUNCTION
Array & operator = ( const Array<T,N,Proxy> & rhs ) { return *this ; }
};
template< typename T >
struct Array<T,0,ArrayProxyStrided>
{
public:
typedef T value_type ;
enum { StaticLength = 0 };
T * const value ;
const unsigned count ;
const unsigned stride ;
KOKKOS_INLINE_FUNCTION
Array( T * v , unsigned n , unsigned s ) : value(v), count(n), stride(s) {}
template< class Proxy >
KOKKOS_INLINE_FUNCTION
Array & operator = ( const Array<T,0,Proxy> & rhs ) { return *this ; }
};
template< typename T >
struct Array<T,0,void>
{
public:
typedef T value_type ;
enum { StaticLength = 0 };
T * value ;
const unsigned count ;
KOKKOS_INLINE_FUNCTION
Array() : value(0) , count(0) {}
template< unsigned N , class Proxy >
KOKKOS_INLINE_FUNCTION
Array( const Array<T,N,Proxy> & rhs ) : value(rhs.value), count(N) {}
};
template< typename T , unsigned N >
struct Array<T,N,void>
{
public:
typedef T value_type ;
enum { StaticLength = N };
T value[N] ;
template< class Proxy >
KOKKOS_INLINE_FUNCTION
Array & operator = ( const Array<T,N,Proxy> & ) { return *this ; }
};
} // namespace Test
/*--------------------------------------------------------------------------*/
/*--------------------------------------------------------------------------*/
namespace Kokkos {
namespace Impl {
template< typename T , unsigned N >
struct AnalyzeShape< Test::Array< T , N > >
: public ShapeInsert< typename AnalyzeShape< T >::shape , N >::type
{
private:
typedef AnalyzeShape< T > nested ;
public:
typedef Test::EmbedArray specialize ;
typedef typename ShapeInsert< typename nested::shape , N >::type shape ;
typedef typename nested::array_intrinsic_type array_intrinsic_type[ N ];
typedef Test::Array< T , N > value_type ;
typedef Test::Array< T , N > type ;
typedef const array_intrinsic_type const_array_intrinsic_type ;
typedef const value_type const_value_type ;
typedef const type const_type ;
typedef typename nested::non_const_array_intrinsic_type non_const_array_intrinsic_type[ N ];
typedef Test::Array< typename nested::non_const_value_type , N > non_const_value_type ;
typedef Test::Array< typename nested::non_const_value_type , N > non_const_type ;
};
template< typename T >
struct AnalyzeShape< Test::Array< T , 0 > >
: public ShapeInsert< typename AnalyzeShape< T >::shape , 0 >::type
{
private:
typedef AnalyzeShape< T > nested ;
public:
typedef Test::EmbedArray specialize ;
typedef typename ShapeInsert< typename nested::shape , 0 >::type shape ;
typedef typename nested::array_intrinsic_type * array_intrinsic_type ;
typedef Test::Array< T , 0 > value_type ;
typedef Test::Array< T , 0 > type ;
typedef const array_intrinsic_type const_array_intrinsic_type ;
typedef const value_type const_value_type ;
typedef const type const_type ;
typedef typename nested::non_const_array_intrinsic_type * non_const_array_intrinsic_type ;
typedef Test::Array< typename nested::non_const_value_type , 0 > non_const_value_type ;
typedef Test::Array< typename nested::non_const_value_type , 0 > non_const_type ;
};
/*--------------------------------------------------------------------------*/
template< class ValueType , class MemorySpace , class MemoryTraits >
struct ViewSpecialize< ValueType
, Test::EmbedArray
, LayoutLeft
, MemorySpace
, MemoryTraits >
{ typedef Test::EmbedArray type ; };
template< class ValueType , class MemorySpace , class MemoryTraits >
struct ViewSpecialize< ValueType
, Test::EmbedArray
, LayoutRight
, MemorySpace
, MemoryTraits >
{ typedef Test::EmbedArray type ; };
/*--------------------------------------------------------------------------*/
template<>
struct ViewAssignment< Test::EmbedArray , Test::EmbedArray , void >
{
//------------------------------------
/** \brief Compatible value and shape */
template< class DT , class DL , class DD , class DM ,
class ST , class SL , class SD , class SM >
KOKKOS_INLINE_FUNCTION
ViewAssignment( View<DT,DL,DD,DM,Test::EmbedArray> & dst
, const View<ST,SL,SD,SM,Test::EmbedArray> & src
, const typename enable_if<(
ViewAssignable< ViewTraits<DT,DL,DD,DM> ,
ViewTraits<ST,SL,SD,SM> >::value
)>::type * = 0
)
{
dst.m_offset_map.assign( src.m_offset_map );
dst.m_ptr_on_device = src.m_ptr_on_device ;
dst.m_tracker = src.m_tracker;
}
};
template<>
struct ViewAssignment< ViewDefault , Test::EmbedArray , void >
{
//------------------------------------
/** \brief Compatible value and shape */
template< class ST , class SL , class SD , class SM >
KOKKOS_INLINE_FUNCTION
ViewAssignment( typename View<ST,SL,SD,SM,Test::EmbedArray>::array_type & dst
, const View<ST,SL,SD,SM,Test::EmbedArray> & src
)
{
dst.m_offset_map.assign( src.m_offset_map );
dst.m_ptr_on_device = src.m_ptr_on_device ;
dst.m_tracker = src.m_tracker;
}
};
} // namespace Impl
} // namespace Kokkos
/*--------------------------------------------------------------------------*/
/*--------------------------------------------------------------------------*/
namespace Kokkos {
template< class DataType ,
class Arg1Type ,
class Arg2Type ,
class Arg3Type >
class View< DataType , Arg1Type , Arg2Type , Arg3Type , Test::EmbedArray >
: public ViewTraits< DataType , Arg1Type , Arg2Type, Arg3Type >
{
public:
typedef ViewTraits< DataType , Arg1Type , Arg2Type, Arg3Type > traits ;
private:
// Assignment of compatible views requirement:
template< class , class , class , class , class > friend class View ;
// Assignment of compatible subview requirement:
template< class , class , class > friend struct Impl::ViewAssignment ;
typedef Impl::ViewOffset< typename traits::shape_type ,
typename traits::array_layout > offset_map_type ;
typedef Impl::ViewDataManagement< traits > view_data_management ;
// traits::value_type = Test::Array< T , N >
typename traits::value_type::value_type * m_ptr_on_device ;
offset_map_type m_offset_map ;
view_data_management m_management ;
Impl::AllocationTracker m_tracker ;
public:
typedef View< typename traits::array_intrinsic_type ,
typename traits::array_layout ,
typename traits::execution_space ,
typename traits::memory_traits > array_type ;
typedef View< typename traits::non_const_data_type ,
typename traits::array_layout ,
typename traits::execution_space ,
typename traits::memory_traits > non_const_type ;
typedef View< typename traits::const_data_type ,
typename traits::array_layout ,
typename traits::execution_space ,
typename traits::memory_traits > const_type ;
typedef View< typename traits::non_const_data_type ,
typename traits::array_layout ,
typename traits::host_mirror_space ,
void > HostMirror ;
//------------------------------------
// Shape
enum { Rank = traits::rank - 1 };
KOKKOS_INLINE_FUNCTION typename traits::shape_type shape() const { return m_offset_map ; }
KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_0() const { return m_offset_map.N0 ; }
KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_1() const { return m_offset_map.N1 ; }
KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_2() const { return m_offset_map.N2 ; }
KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_3() const { return m_offset_map.N3 ; }
KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_4() const { return m_offset_map.N4 ; }
KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_5() const { return m_offset_map.N5 ; }
KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_6() const { return m_offset_map.N6 ; }
KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_7() const { return m_offset_map.N7 ; }
KOKKOS_INLINE_FUNCTION typename traits::size_type size() const
{
return m_offset_map.N0
* m_offset_map.N1
* m_offset_map.N2
* m_offset_map.N3
* m_offset_map.N4
* m_offset_map.N5
* m_offset_map.N6
* m_offset_map.N7
;
}
template< typename iType >
KOKKOS_INLINE_FUNCTION
typename traits::size_type dimension( const iType & i ) const
{ return Impl::dimension( m_offset_map , i ); }
//------------------------------------
// Destructor, constructors, assignment operators:
KOKKOS_INLINE_FUNCTION
~View() {}
KOKKOS_INLINE_FUNCTION
View()
: m_ptr_on_device(0)
, m_offset_map()
, m_management()
, m_tracker()
{ m_offset_map.assing(0,0,0,0,0,0,0,0); }
KOKKOS_INLINE_FUNCTION
View( const View & rhs )
: m_ptr_on_device(0)
, m_offset_map()
, m_management()
, m_tracker()
{
(void) Impl::ViewAssignment<
typename traits::specialize ,
typename traits::specialize >( *this , rhs );
}
KOKKOS_INLINE_FUNCTION
View & operator = ( const View & rhs )
{
(void) Impl::ViewAssignment<
typename traits::specialize ,
typename traits::specialize >( *this , rhs );
return *this ;
}
//------------------------------------
// Construct or assign compatible view:
template< class RT , class RL , class RD , class RM , class RS >
KOKKOS_INLINE_FUNCTION
View( const View<RT,RL,RD,RM,RS> & rhs )
: m_ptr_on_device(0)
, m_offset_map()
, m_management()
, m_tracker()
{
(void) Impl::ViewAssignment<
typename traits::specialize , RS >( *this , rhs );
}
template< class RT , class RL , class RD , class RM , class RS >
KOKKOS_INLINE_FUNCTION
View & operator = ( const View<RT,RL,RD,RM,RS> & rhs )
{
(void) Impl::ViewAssignment<
typename traits::specialize , RS >( *this , rhs );
return *this ;
}
//------------------------------------
// Allocation of a managed view with possible alignment padding.
template< class AllocationProperties >
explicit inline
View( const AllocationProperties & prop ,
const typename Impl::ViewAllocProp< traits , AllocationProperties >::size_type n0 = 0 ,
const size_t n1 = 0 ,
const size_t n2 = 0 ,
const size_t n3 = 0 ,
const size_t n4 = 0 ,
const size_t n5 = 0 ,
const size_t n6 = 0 ,
const size_t n7 = 0 )
: m_ptr_on_device(0)
, m_offset_map()
, m_management()
, m_tracker()
{
typedef Impl::ViewAllocProp< traits , AllocationProperties > Alloc ;
typedef typename traits::memory_space memory_space ;
typedef typename traits::value_type::value_type scalar_type ;
m_offset_map.assign( n0, n1, n2, n3, n4, n5, n6, n7 );
m_offset_map.set_padding();
m_tracker = memory_space::allocate_and_track( Alloc::label( prop ), sizeof(scalar_type) * m_offset_map.capacity() );
m_ptr_on_device = reinterpret_cast<scalar_type *>(m_tracker.alloc_ptr());
(void) Impl::ViewDefaultConstruct< typename traits::execution_space , scalar_type , Alloc::Initialize >( m_ptr_on_device , m_offset_map.capacity() );
}
//------------------------------------
// Assign an unmanaged View from pointer, can be called in functors.
// No alignment padding is performed.
typedef Impl::if_c< ! traits::is_managed ,
typename traits::value_type::value_type * ,
Impl::ViewError::user_pointer_constructor_requires_unmanaged >
if_user_pointer_constructor ;
View( typename if_user_pointer_constructor::type ptr ,
const size_t n0 = 0 ,
const size_t n1 = 0 ,
const size_t n2 = 0 ,
const size_t n3 = 0 ,
const size_t n4 = 0 ,
const size_t n5 = 0 ,
const size_t n6 = 0 ,
const size_t n7 = 0 )
: m_ptr_on_device(0)
, m_offset_map()
, m_management()
, m_tracker()
{
m_offset_map.assign( n0, n1, n2, n3, n4, n5, n6, n7 );
m_ptr_on_device = if_user_pointer_constructor::select( ptr );
m_management.set_unmanaged();
}
//------------------------------------
// Assign unmanaged View to portion of Device shared memory
typedef Impl::if_c< ! traits::is_managed ,
typename traits::execution_space ,
Impl::ViewError::device_shmem_constructor_requires_unmanaged >
if_device_shmem_constructor ;
explicit KOKKOS_INLINE_FUNCTION
View( typename if_device_shmem_constructor::type & dev ,
const unsigned n0 = 0 ,
const unsigned n1 = 0 ,
const unsigned n2 = 0 ,
const unsigned n3 = 0 ,
const unsigned n4 = 0 ,
const unsigned n5 = 0 ,
const unsigned n6 = 0 ,
const unsigned n7 = 0 )
: m_ptr_on_device(0)
, m_offset_map()
, m_management()
, m_tracker()
{
typedef typename traits::value_type::value_type scalar_type ;
enum { align = 8 };
enum { mask = align - 1 };
typedef Impl::if_c< ! traits::is_managed ,
scalar_type * ,
Impl::ViewError::device_shmem_constructor_requires_unmanaged >
if_device_shmem_pointer ;
m_offset_map.assign( n0, n1, n2, n3, n4, n5, n6, n7 );
// Select the first argument:
m_ptr_on_device = if_device_shmem_pointer::select(
(scalar_type *) dev.get_shmem( unsigned( sizeof(scalar_type) * m_offset_map.capacity() + unsigned(mask) ) & ~unsigned(mask) ) );
}
static inline
unsigned shmem_size( const unsigned n0 = 0 ,
const unsigned n1 = 0 ,
const unsigned n2 = 0 ,
const unsigned n3 = 0 ,
const unsigned n4 = 0 ,
const unsigned n5 = 0 ,
const unsigned n6 = 0 ,
const unsigned n7 = 0 )
{
enum { align = 8 };
enum { mask = align - 1 };
typedef typename traits::value_type::value_type scalar_type ;
offset_map_type offset_map ;
offset_map.assign( n0, n1, n2, n3, n4, n5, n6, n7 );
return unsigned( sizeof(scalar_type) * offset_map.capacity() + unsigned(mask) ) & ~unsigned(mask) ;
}
//------------------------------------
// Is not allocated
KOKKOS_INLINE_FUNCTION
bool is_null() const { return 0 == m_ptr_on_device ; }
//------------------------------------
// LayoutLeft, rank 2:
typedef Test::Array< typename traits::value_type::value_type ,
traits::value_type::StaticLength ,
Test::ArrayProxyStrided > LeftValue ;
template< typename iType0 >
KOKKOS_INLINE_FUNCTION
typename Impl::ViewEnableArrayOper< LeftValue , traits, LayoutLeft, 2, iType0 >::type
operator[] ( const iType0 & i0 ) const
{
KOKKOS_ASSERT_SHAPE_BOUNDS_2( m_offset_map, i0, 0 );
KOKKOS_RESTRICT_EXECUTION_TO_DATA( typename traits::memory_space , m_ptr_on_device );
return LeftValue( m_ptr_on_device + i0 , m_offset_map.N1 , m_offset_map.S0 );
}
template< typename iType0 >
KOKKOS_INLINE_FUNCTION
typename Impl::ViewEnableArrayOper< LeftValue , traits, LayoutLeft, 2, iType0 >::type
operator() ( const iType0 & i0 ) const
{
KOKKOS_ASSERT_SHAPE_BOUNDS_2( m_offset_map, i0, 0 );
KOKKOS_RESTRICT_EXECUTION_TO_DATA( typename traits::memory_space , m_ptr_on_device );
return LeftValue( m_ptr_on_device + i0 , m_offset_map.N1 , m_offset_map.S0 );
}
template< typename iType0 >
KOKKOS_INLINE_FUNCTION
typename Impl::ViewEnableArrayOper< LeftValue , traits, LayoutLeft, 2, iType0 >::type
at( const iType0 & i0 , const int , const int , const int ,
const int , const int , const int , const int ) const
{
KOKKOS_ASSERT_SHAPE_BOUNDS_2( m_offset_map, i0, 0 );
KOKKOS_RESTRICT_EXECUTION_TO_DATA( typename traits::memory_space , m_ptr_on_device );
return LeftValue( m_ptr_on_device + i0 , m_offset_map.N1 , m_offset_map.S0 );
}
//------------------------------------
// LayoutRight, rank 2:
typedef Test::Array< typename traits::value_type::value_type ,
traits::value_type::StaticLength ,
Test::ArrayProxyContiguous > RightValue ;
template< typename iType0 >
KOKKOS_INLINE_FUNCTION
typename Impl::ViewEnableArrayOper< RightValue , traits, LayoutRight, 2, iType0 >::type
operator[] ( const iType0 & i0 ) const
{
KOKKOS_ASSERT_SHAPE_BOUNDS_2( m_offset_map, i0, 0 );
KOKKOS_RESTRICT_EXECUTION_TO_DATA( typename traits::memory_space , m_ptr_on_device );
return RightValue( m_ptr_on_device + i0 , m_offset_map.N1 );
}
template< typename iType0 >
KOKKOS_INLINE_FUNCTION
typename Impl::ViewEnableArrayOper< RightValue , traits, LayoutRight, 2, iType0 >::type
operator() ( const iType0 & i0 ) const
{
KOKKOS_ASSERT_SHAPE_BOUNDS_2( m_offset_map, i0, 0 );
KOKKOS_RESTRICT_EXECUTION_TO_DATA( typename traits::memory_space , m_ptr_on_device );
return RightValue( m_ptr_on_device + i0 , m_offset_map.N1 );
}
template< typename iType0 >
KOKKOS_INLINE_FUNCTION
typename Impl::ViewEnableArrayOper< RightValue , traits, LayoutRight, 2, iType0 >::type
at( const iType0 & i0 , const int , const int , const int ,
const int , const int , const int , const int ) const
{
KOKKOS_ASSERT_SHAPE_BOUNDS_2( m_offset_map, i0, 0 );
KOKKOS_RESTRICT_EXECUTION_TO_DATA( typename traits::memory_space , m_ptr_on_device );
return RightValue( m_ptr_on_device + i0 , m_offset_map.N1 );
}
//------------------------------------
// Access to the underlying contiguous storage of this view specialization.
// These methods are specific to specialization of a view.
KOKKOS_INLINE_FUNCTION
typename traits::value_type::value_type * ptr_on_device() const { return m_ptr_on_device ; }
// Stride of physical storage, dimensioned to at least Rank
template< typename iType >
KOKKOS_INLINE_FUNCTION
void stride( iType * const s ) const
{ m_offset_map.stride( s ); }
// Count of contiguously allocated data members including padding.
KOKKOS_INLINE_FUNCTION
typename traits::size_type capacity() const
{ return m_offset_map.capacity(); }
};
} // namespace Kokkos
/*--------------------------------------------------------------------------*/
/*--------------------------------------------------------------------------*/
namespace Test {
template< class DeviceType >
int TestViewAggregate()
{
typedef Kokkos::View< Test::Array<double,32> * , DeviceType > a32_type ;
typedef typename a32_type::array_type a32_base_type ;
typedef Kokkos::View< Test::Array<double> * , DeviceType > a0_type ;
typedef typename a0_type::array_type a0_base_type ;
a32_type a32("a32",100);
a32_base_type a32_base ;
a0_type a0("a0",100,32);
a0_base_type a0_base ;
a32_base = a32 ;
a0_base = a0 ;
return 0 ;
}
}
/*--------------------------------------------------------------------------*/
/*--------------------------------------------------------------------------*/
#else /* #if ! defined( KOKKOS_USING_EXPERIMENTAL_VIEW ) */
+#include <impl/KokkosExp_ViewArray.hpp>
+
namespace Test {
template< class DeviceType >
-int TestViewAggregate()
+void TestViewAggregate()
{
-/*
- typedef Kokkos::ViewTraits< Kokkos::Array<double,32> ** , DeviceType > a32_traits ;
+ typedef Kokkos::Array<double,32> value_type ;
+
+ typedef Kokkos::Experimental::Impl::
+ ViewDataAnalysis< value_type * , Kokkos::LayoutLeft , value_type >
+ analysis_1d ;
+
+ static_assert( std::is_same< typename analysis_1d::specialize , Kokkos::Array<> >::value , "" );
+
+
+ typedef Kokkos::ViewTraits< value_type ** , DeviceType > a32_traits ;
typedef Kokkos::ViewTraits< typename a32_traits::array_scalar_type , DeviceType > flat_traits ;
static_assert( std::is_same< typename a32_traits::specialize , Kokkos::Array<> >::value , "" );
+ static_assert( std::is_same< typename a32_traits::value_type , value_type >::value , "" );
static_assert( a32_traits::rank == 2 , "" );
static_assert( a32_traits::rank_dynamic == 2 , "" );
static_assert( std::is_same< typename flat_traits::specialize , void >::value , "" );
static_assert( flat_traits::rank == 3 , "" );
static_assert( flat_traits::rank_dynamic == 2 , "" );
static_assert( flat_traits::dimension::N2 == 32 , "" );
-
-
typedef Kokkos::View< Kokkos::Array<double,32> ** , DeviceType > a32_type ;
- typedef typename a32_type::array_type a32_flat_type ;
+ typedef typename a32_type::array_type a32_flat_type ;
+ static_assert( std::is_same< typename a32_type::value_type , value_type >::value , "" );
+ static_assert( std::is_same< typename a32_type::pointer_type , double * >::value , "" );
static_assert( a32_type::Rank == 2 , "" );
static_assert( a32_flat_type::Rank == 3 , "" );
-*/
- return 0 ;
+ a32_type x("test",4,5);
+ a32_flat_type y( x );
+
+ ASSERT_EQ( x.extent(0) , 4 );
+ ASSERT_EQ( x.extent(1) , 5 );
+ ASSERT_EQ( y.extent(0) , 4 );
+ ASSERT_EQ( y.extent(1) , 5 );
+ ASSERT_EQ( y.extent(2) , 32 );
}
}
#endif /* #if ! defined( KOKKOS_USING_EXPERIMENTAL_VIEW ) */
/*--------------------------------------------------------------------------*/
/*--------------------------------------------------------------------------*/
#endif /* #ifndef TEST_AGGREGATE_HPP */
diff --git a/lib/kokkos/core/unit_test/TestAllocationTracker.cpp b/lib/kokkos/core/unit_test/TestAllocationTracker.cpp
index 371b0ac75..16f13ff1a 100644
--- a/lib/kokkos/core/unit_test/TestAllocationTracker.cpp
+++ b/lib/kokkos/core/unit_test/TestAllocationTracker.cpp
@@ -1,145 +1,159 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <gtest/gtest.h>
#include <iostream>
#include <vector>
#include <Kokkos_Core.hpp>
#include <impl/Kokkos_AllocationTracker.hpp>
#include <impl/Kokkos_BasicAllocators.hpp>
namespace Test {
class alocation_tracker : public ::testing::Test {
protected:
static void SetUpTestCase()
{
Kokkos::initialize();
}
static void TearDownTestCase()
{
Kokkos::finalize();
}
};
TEST_F( alocation_tracker, simple)
{
+
+#if ! defined( KOKKOS_USING_EXPERIMENTAL_VIEW )
+
using namespace Kokkos::Impl;
{
AllocationTracker tracker;
EXPECT_FALSE( tracker.is_valid() );
}
// test ref count and label
{
int size = 100;
std::vector<AllocationTracker> trackers(size);
trackers[0] = AllocationTracker( MallocAllocator(), 128,"Test");
for (int i=0; i<size; ++i) {
trackers[i] = trackers[0];
}
EXPECT_EQ(100u, trackers[0].ref_count());
EXPECT_EQ(std::string("Test"), std::string(trackers[0].label()));
}
// test circular list
{
int num_allocs = 3000;
unsigned ref_count = 100;
std::vector<AllocationTracker> trackers(num_allocs);
for (int i=0; i<num_allocs; ++i) {
trackers[i] = AllocationTracker( MallocAllocator(), 128, "Test");
std::vector<AllocationTracker> ref_trackers(ref_count);
for (unsigned j=0; j<ref_count; ++j) {
ref_trackers[j] = trackers[i];
}
EXPECT_EQ( ref_count + 1u, trackers[i].ref_count() );
}
for (int i=0; i<num_allocs; ++i) {
EXPECT_EQ( 1u, trackers[i].ref_count() );
}
}
+
+#endif /* #if ! defined( KOKKOS_USING_EXPERIMENTAL_VIEW ) */
+
}
TEST_F( alocation_tracker, force_leaks)
{
// uncomment to force memory leaks
#if 0
using namespace Kokkos::Impl;
Kokkos::kokkos_malloc("Forced Leak", 4096*10);
Kokkos::kokkos_malloc<Kokkos::HostSpace>("Forced Leak", 4096*10);
#endif
}
TEST_F( alocation_tracker, disable_reference_counting)
{
+
+#if ! defined( KOKKOS_USING_EXPERIMENTAL_VIEW )
+
using namespace Kokkos::Impl;
// test ref count and label
{
int size = 100;
std::vector<AllocationTracker> trackers(size);
trackers[0] = AllocationTracker( MallocAllocator(), 128,"Test");
for (int i=1; i<size; ++i) {
- trackers[i] = CopyWithoutTracking::apply(trackers[0]);
+ Kokkos::Impl::AllocationTracker::disable_tracking();
+ trackers[i] = trackers[0] ;
+ Kokkos::Impl::AllocationTracker::enable_tracking();
}
EXPECT_EQ(1u, trackers[0].ref_count());
EXPECT_EQ(std::string("Test"), std::string(trackers[0].label()));
}
+
+#endif /* #if ! defined( KOKKOS_USING_EXPERIMENTAL_VIEW ) */
+
}
} // namespace Test
diff --git a/lib/kokkos/core/unit_test/TestAtomic.hpp b/lib/kokkos/core/unit_test/TestAtomic.hpp
index d273c287e..df8419794 100644
--- a/lib/kokkos/core/unit_test/TestAtomic.hpp
+++ b/lib/kokkos/core/unit_test/TestAtomic.hpp
@@ -1,376 +1,376 @@
/*
//@HEADER
// ************************************************************************
-//
+//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
-//
+//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
-//
+//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
-//
+//
// ************************************************************************
//@HEADER
*/
#include <Kokkos_Core.hpp>
namespace TestAtomic {
// Struct for testing arbitrary size atomics
template<int N>
struct SuperScalar {
double val[N];
KOKKOS_INLINE_FUNCTION
SuperScalar() {
for(int i=0; i<N; i++)
val[i] = 0.0;
}
KOKKOS_INLINE_FUNCTION
SuperScalar(const SuperScalar& src) {
for(int i=0; i<N; i++)
val[i] = src.val[i];
}
KOKKOS_INLINE_FUNCTION
SuperScalar(const volatile SuperScalar& src) {
for(int i=0; i<N; i++)
val[i] = src.val[i];
}
KOKKOS_INLINE_FUNCTION
SuperScalar& operator = (const SuperScalar& src) {
for(int i=0; i<N; i++)
val[i] = src.val[i];
return *this;
}
KOKKOS_INLINE_FUNCTION
SuperScalar& operator = (const volatile SuperScalar& src) {
for(int i=0; i<N; i++)
val[i] = src.val[i];
return *this;
}
KOKKOS_INLINE_FUNCTION
volatile SuperScalar& operator = (const SuperScalar& src) volatile {
for(int i=0; i<N; i++)
val[i] = src.val[i];
return *this;
}
KOKKOS_INLINE_FUNCTION
SuperScalar operator + (const SuperScalar& src) {
SuperScalar tmp = *this;
for(int i=0; i<N; i++)
tmp.val[i] += src.val[i];
return tmp;
}
KOKKOS_INLINE_FUNCTION
SuperScalar& operator += (const double& src) {
for(int i=0; i<N; i++)
val[i] += 1.0*(i+1)*src;
return *this;
}
KOKKOS_INLINE_FUNCTION
SuperScalar& operator += (const SuperScalar& src) {
for(int i=0; i<N; i++)
val[i] += src.val[i];
return *this;
}
KOKKOS_INLINE_FUNCTION
bool operator == (const SuperScalar& src) {
bool compare = true;
for(int i=0; i<N; i++)
compare = compare && ( val[i] == src.val[i]);
return compare;
}
KOKKOS_INLINE_FUNCTION
bool operator != (const SuperScalar& src) {
bool compare = true;
for(int i=0; i<N; i++)
compare = compare && ( val[i] == src.val[i]);
return !compare;
}
KOKKOS_INLINE_FUNCTION
SuperScalar(const double& src) {
for(int i=0; i<N; i++)
val[i] = 1.0 * (i+1) * src;
}
};
template<int N>
std::ostream& operator<<(std::ostream& os, const SuperScalar<N>& dt)
{
os << "{ ";
for(int i=0;i<N-1;i++)
os << dt.val[i] << ", ";
os << dt.val[N-1] << "}";
return os;
}
template<class T,class DEVICE_TYPE>
struct ZeroFunctor {
typedef DEVICE_TYPE execution_space;
typedef typename Kokkos::View<T,execution_space> type;
typedef typename Kokkos::View<T,execution_space>::HostMirror h_type;
type data;
KOKKOS_INLINE_FUNCTION
void operator()(int) const {
data() = 0;
}
};
//---------------------------------------------------
//--------------atomic_fetch_add---------------------
//---------------------------------------------------
template<class T,class DEVICE_TYPE>
struct AddFunctor{
typedef DEVICE_TYPE execution_space;
typedef Kokkos::View<T,execution_space> type;
type data;
KOKKOS_INLINE_FUNCTION
void operator()(int) const {
Kokkos::atomic_fetch_add(&data(),(T)1);
}
};
template<class T, class execution_space >
T AddLoop(int loop) {
struct ZeroFunctor<T,execution_space> f_zero;
typename ZeroFunctor<T,execution_space>::type data("Data");
typename ZeroFunctor<T,execution_space>::h_type h_data("HData");
f_zero.data = data;
Kokkos::parallel_for(1,f_zero);
execution_space::fence();
struct AddFunctor<T,execution_space> f_add;
f_add.data = data;
Kokkos::parallel_for(loop,f_add);
execution_space::fence();
Kokkos::deep_copy(h_data,data);
T val = h_data();
return val;
}
template<class T>
T AddLoopSerial(int loop) {
T* data = new T[1];
data[0] = 0;
for(int i=0;i<loop;i++)
*data+=(T)1;
T val = *data;
- delete data;
+ delete [] data;
return val;
}
template<class T,class DEVICE_TYPE>
struct CASFunctor{
typedef DEVICE_TYPE execution_space;
typedef Kokkos::View<T,execution_space> type;
type data;
KOKKOS_INLINE_FUNCTION
void operator()(int) const {
T old = data();
T newval, assumed;
do {
assumed = old;
newval = assumed + (T)1;
old = Kokkos::atomic_compare_exchange(&data(), assumed, newval);
}
while( old != assumed );
}
};
template<class T, class execution_space >
T CASLoop(int loop) {
struct ZeroFunctor<T,execution_space> f_zero;
typename ZeroFunctor<T,execution_space>::type data("Data");
typename ZeroFunctor<T,execution_space>::h_type h_data("HData");
f_zero.data = data;
Kokkos::parallel_for(1,f_zero);
execution_space::fence();
struct CASFunctor<T,execution_space> f_cas;
f_cas.data = data;
Kokkos::parallel_for(loop,f_cas);
execution_space::fence();
Kokkos::deep_copy(h_data,data);
T val = h_data();
return val;
}
template<class T>
T CASLoopSerial(int loop) {
T* data = new T[1];
data[0] = 0;
for(int i=0;i<loop;i++) {
T assumed;
T newval;
T old;
do {
assumed = *data;
newval = assumed + (T)1;
old = *data;
*data = newval;
}
while(!(assumed==old));
}
T val = *data;
- delete data;
+ delete [] data;
return val;
}
template<class T,class DEVICE_TYPE>
struct ExchFunctor{
typedef DEVICE_TYPE execution_space;
typedef Kokkos::View<T,execution_space> type;
type data, data2;
KOKKOS_INLINE_FUNCTION
void operator()(int i) const {
T old = Kokkos::atomic_exchange(&data(),(T)i);
Kokkos::atomic_fetch_add(&data2(),old);
}
};
template<class T, class execution_space >
T ExchLoop(int loop) {
struct ZeroFunctor<T,execution_space> f_zero;
typename ZeroFunctor<T,execution_space>::type data("Data");
typename ZeroFunctor<T,execution_space>::h_type h_data("HData");
f_zero.data = data;
Kokkos::parallel_for(1,f_zero);
execution_space::fence();
typename ZeroFunctor<T,execution_space>::type data2("Data");
typename ZeroFunctor<T,execution_space>::h_type h_data2("HData");
f_zero.data = data2;
Kokkos::parallel_for(1,f_zero);
execution_space::fence();
struct ExchFunctor<T,execution_space> f_exch;
f_exch.data = data;
f_exch.data2 = data2;
Kokkos::parallel_for(loop,f_exch);
execution_space::fence();
Kokkos::deep_copy(h_data,data);
Kokkos::deep_copy(h_data2,data2);
T val = h_data() + h_data2();
return val;
}
template<class T>
T ExchLoopSerial(int loop) {
T* data = new T[1];
T* data2 = new T[1];
data[0] = 0;
data2[0] = 0;
for(int i=0;i<loop;i++) {
T old = *data;
*data=(T) i;
*data2+=old;
}
T val = *data2 + *data;
- delete data;
- delete data2;
+ delete [] data;
+ delete [] data2;
return val;
}
template<class T, class DeviceType >
T LoopVariant(int loop, int test) {
switch (test) {
case 1: return AddLoop<T,DeviceType>(loop);
case 2: return CASLoop<T,DeviceType>(loop);
case 3: return ExchLoop<T,DeviceType>(loop);
}
return 0;
}
template<class T>
T LoopVariantSerial(int loop, int test) {
switch (test) {
case 1: return AddLoopSerial<T>(loop);
case 2: return CASLoopSerial<T>(loop);
case 3: return ExchLoopSerial<T>(loop);
}
return 0;
}
template<class T,class DeviceType>
bool Loop(int loop, int test)
{
T res = LoopVariant<T,DeviceType>(loop,test);
T resSerial = LoopVariantSerial<T>(loop,test);
bool passed = true;
if ( resSerial != res ) {
passed = false;
std::cout << "Loop<"
<< typeid(T).name()
<< ">( test = "
<< test << " FAILED : "
<< resSerial << " != " << res
<< std::endl ;
}
return passed ;
}
}
diff --git a/lib/kokkos/core/unit_test/TestCuda.cpp b/lib/kokkos/core/unit_test/TestCuda.cpp
index 17c757f2b..661484009 100644
--- a/lib/kokkos/core/unit_test/TestCuda.cpp
+++ b/lib/kokkos/core/unit_test/TestCuda.cpp
@@ -1,491 +1,509 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <gtest/gtest.h>
#include <iostream>
#include <Kokkos_Core.hpp>
//----------------------------------------------------------------------------
#include <impl/Kokkos_ViewTileLeft.hpp>
#include <TestTile.hpp>
//----------------------------------------------------------------------------
#include <TestSharedAlloc.hpp>
#include <TestViewMapping.hpp>
#include <TestViewImpl.hpp>
#include <TestAtomic.hpp>
#include <TestViewAPI.hpp>
#include <TestViewSubview.hpp>
+#include <TestViewOfClass.hpp>
#include <TestReduce.hpp>
#include <TestScan.hpp>
#include <TestRange.hpp>
#include <TestTeam.hpp>
#include <TestAggregate.hpp>
#include <TestAggregateReduction.hpp>
#include <TestCompilerMacros.hpp>
#include <TestMemorySpaceTracking.hpp>
#include <TestTeamVector.hpp>
#include <TestTemplateMetaFunctions.hpp>
#include <TestCXX11Deduction.hpp>
//----------------------------------------------------------------------------
class cuda : public ::testing::Test {
protected:
static void SetUpTestCase()
{
Kokkos::Cuda::print_configuration( std::cout );
Kokkos::HostSpace::execution_space::initialize();
Kokkos::Cuda::initialize( Kokkos::Cuda::SelectDevice(0) );
}
static void TearDownTestCase()
{
Kokkos::Cuda::finalize();
Kokkos::HostSpace::execution_space::finalize();
}
};
//----------------------------------------------------------------------------
namespace Test {
__global__
void test_abort()
{
Kokkos::Impl::VerifyExecutionCanAccessMemorySpace<
Kokkos::CudaSpace ,
Kokkos::HostSpace >::verify();
}
__global__
void test_cuda_spaces_int_value( int * ptr )
{
if ( *ptr == 42 ) { *ptr = 2 * 42 ; }
}
TEST_F( cuda , compiler_macros )
{
ASSERT_TRUE( ( TestCompilerMacros::Test< Kokkos::Cuda >() ) );
}
TEST_F( cuda , memory_space )
{
TestMemorySpace< Kokkos::Cuda >();
}
-TEST_F( cuda, spaces )
+TEST_F( cuda, uvm )
{
if ( Kokkos::CudaUVMSpace::available() ) {
- Kokkos::Impl::AllocationTracker tracker = Kokkos::CudaUVMSpace::allocate_and_track("uvm_ptr",sizeof(int));
-
- int * uvm_ptr = (int*) tracker.alloc_ptr();
+ int * uvm_ptr = (int*) Kokkos::kokkos_malloc< Kokkos::CudaUVMSpace >("uvm_ptr",sizeof(int));
*uvm_ptr = 42 ;
Kokkos::Cuda::fence();
test_cuda_spaces_int_value<<<1,1>>>(uvm_ptr);
Kokkos::Cuda::fence();
EXPECT_EQ( *uvm_ptr, int(2*42) );
+ Kokkos::kokkos_free< Kokkos::CudaUVMSpace >(uvm_ptr );
}
}
//----------------------------------------------------------------------------
TEST_F( cuda , impl_shared_alloc )
{
test_shared_alloc< Kokkos::CudaSpace , Kokkos::HostSpace::execution_space >();
test_shared_alloc< Kokkos::CudaUVMSpace , Kokkos::HostSpace::execution_space >();
test_shared_alloc< Kokkos::CudaHostPinnedSpace , Kokkos::HostSpace::execution_space >();
}
TEST_F( cuda , impl_view_mapping )
{
test_view_mapping< Kokkos::Cuda >();
test_view_mapping_subview< Kokkos::Cuda >();
test_view_mapping_operator< Kokkos::Cuda >();
TestViewMappingAtomic< Kokkos::Cuda >::run();
}
+TEST_F( cuda , view_of_class )
+{
+ TestViewMappingClassValue< Kokkos::Cuda >::run();
+}
+
template< class MemSpace >
struct TestViewCudaTexture {
enum { N = 1000 };
using V = Kokkos::Experimental::View<double*,MemSpace> ;
using T = Kokkos::Experimental::View<const double*, MemSpace, Kokkos::MemoryRandomAccess > ;
V m_base ;
T m_tex ;
struct TagInit {};
struct TagTest {};
KOKKOS_INLINE_FUNCTION
void operator()( const TagInit & , const int i ) const { m_base[i] = i + 1 ; }
KOKKOS_INLINE_FUNCTION
void operator()( const TagTest & , const int i , long & error_count ) const
{ if ( m_tex[i] != i + 1 ) ++error_count ; }
TestViewCudaTexture()
: m_base("base",N)
, m_tex( m_base )
{}
static void run()
{
EXPECT_TRUE( ( std::is_same< typename V::reference_type
, double &
>::value ) );
EXPECT_TRUE( ( std::is_same< typename T::reference_type
, const double
>::value ) );
EXPECT_TRUE( V::reference_type_is_lvalue_reference ); // An ordinary view
EXPECT_FALSE( T::reference_type_is_lvalue_reference ); // Texture fetch returns by value
TestViewCudaTexture self ;
Kokkos::parallel_for( Kokkos::RangePolicy< Kokkos::Cuda , TagInit >(0,N) , self );
long error_count = -1 ;
Kokkos::parallel_reduce( Kokkos::RangePolicy< Kokkos::Cuda , TagTest >(0,N) , self , error_count );
EXPECT_EQ( error_count , 0 );
}
};
TEST_F( cuda , impl_view_texture )
{
TestViewCudaTexture< Kokkos::CudaSpace >::run();
TestViewCudaTexture< Kokkos::CudaUVMSpace >::run();
}
template< class MemSpace , class ExecSpace >
struct TestViewCudaAccessible {
enum { N = 1000 };
using V = Kokkos::Experimental::View<double*,MemSpace> ;
V m_base ;
struct TagInit {};
struct TagTest {};
KOKKOS_INLINE_FUNCTION
void operator()( const TagInit & , const int i ) const { m_base[i] = i + 1 ; }
KOKKOS_INLINE_FUNCTION
void operator()( const TagTest & , const int i , long & error_count ) const
{ if ( m_base[i] != i + 1 ) ++error_count ; }
TestViewCudaAccessible()
: m_base("base",N)
{}
static void run()
{
TestViewCudaAccessible self ;
Kokkos::parallel_for( Kokkos::RangePolicy< typename MemSpace::execution_space , TagInit >(0,N) , self );
MemSpace::execution_space::fence();
// Next access is a different execution space, must complete prior kernel.
long error_count = -1 ;
Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace , TagTest >(0,N) , self , error_count );
EXPECT_EQ( error_count , 0 );
}
};
TEST_F( cuda , impl_view_accessible )
{
TestViewCudaAccessible< Kokkos::CudaSpace , Kokkos::Cuda >::run();
TestViewCudaAccessible< Kokkos::CudaUVMSpace , Kokkos::Cuda >::run();
TestViewCudaAccessible< Kokkos::CudaUVMSpace , Kokkos::HostSpace::execution_space >::run();
TestViewCudaAccessible< Kokkos::CudaHostPinnedSpace , Kokkos::Cuda >::run();
TestViewCudaAccessible< Kokkos::CudaHostPinnedSpace , Kokkos::HostSpace::execution_space >::run();
}
//----------------------------------------------------------------------------
TEST_F( cuda, view_impl )
{
// test_abort<<<32,32>>>(); // Aborts the kernel with CUDA version 4.1 or greater
test_view_impl< Kokkos::Cuda >();
}
TEST_F( cuda, view_api )
{
typedef Kokkos::View< const int * , Kokkos::Cuda , Kokkos::MemoryTraits< Kokkos::RandomAccess > > view_texture_managed ;
typedef Kokkos::View< const int * , Kokkos::Cuda , Kokkos::MemoryTraits< Kokkos::RandomAccess | Kokkos::Unmanaged > > view_texture_unmanaged ;
TestViewAPI< double , Kokkos::Cuda >();
#if 0
Kokkos::View<double, Kokkos::Cuda > x("x");
Kokkos::View<double[1], Kokkos::Cuda > y("y");
// *x = 10 ;
// x() = 10 ;
// y[0] = 10 ;
// y(0) = 10 ;
#endif
}
+
+TEST_F( cuda , view_nested_view )
+{
+ ::Test::view_nested_view< Kokkos::Cuda >();
+}
+
TEST_F( cuda, view_subview_auto_1d_left ) {
TestViewSubview::test_auto_1d< Kokkos::LayoutLeft,Kokkos::Cuda >();
}
TEST_F( cuda, view_subview_auto_1d_right ) {
TestViewSubview::test_auto_1d< Kokkos::LayoutRight,Kokkos::Cuda >();
}
TEST_F( cuda, view_subview_auto_1d_stride ) {
TestViewSubview::test_auto_1d< Kokkos::LayoutStride,Kokkos::Cuda >();
}
TEST_F( cuda, view_subview_assign_strided ) {
TestViewSubview::test_1d_strided_assignment< Kokkos::Cuda >();
}
TEST_F( cuda, view_subview_left_0 ) {
TestViewSubview::test_left_0< Kokkos::CudaUVMSpace >();
}
TEST_F( cuda, view_subview_left_1 ) {
TestViewSubview::test_left_1< Kokkos::CudaUVMSpace >();
}
TEST_F( cuda, view_subview_left_2 ) {
TestViewSubview::test_left_2< Kokkos::CudaUVMSpace >();
}
TEST_F( cuda, view_subview_left_3 ) {
TestViewSubview::test_left_3< Kokkos::CudaUVMSpace >();
}
TEST_F( cuda, view_subview_right_0 ) {
TestViewSubview::test_right_0< Kokkos::CudaUVMSpace >();
}
TEST_F( cuda, view_subview_right_1 ) {
TestViewSubview::test_right_1< Kokkos::CudaUVMSpace >();
}
TEST_F( cuda, view_subview_right_3 ) {
TestViewSubview::test_right_3< Kokkos::CudaUVMSpace >();
}
TEST_F( cuda, range_tag )
{
TestRange< Kokkos::Cuda >::test_for(1000);
TestRange< Kokkos::Cuda >::test_reduce(1000);
TestRange< Kokkos::Cuda >::test_scan(1000);
}
TEST_F( cuda, team_tag )
{
TestTeamPolicy< Kokkos::Cuda >::test_for(1000);
TestTeamPolicy< Kokkos::Cuda >::test_reduce(1000);
}
TEST_F( cuda, reduce )
{
TestReduce< long , Kokkos::Cuda >( 10000000 );
TestReduce< double , Kokkos::Cuda >( 1000000 );
}
TEST_F( cuda, reduce_team )
{
TestReduceTeam< long , Kokkos::Cuda >( 10000000 );
TestReduceTeam< double , Kokkos::Cuda >( 1000000 );
}
TEST_F( cuda, shared_team )
{
TestSharedTeam< Kokkos::Cuda >();
}
+#if defined (KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA)
+TEST_F( cuda, lambda_shared_team )
+{
+ TestLambdaSharedTeam< Kokkos::Cuda >();
+}
+#endif
+
TEST_F( cuda, reduce_dynamic )
{
TestReduceDynamic< long , Kokkos::Cuda >( 10000000 );
TestReduceDynamic< double , Kokkos::Cuda >( 1000000 );
}
TEST_F( cuda, reduce_dynamic_view )
{
TestReduceDynamicView< long , Kokkos::Cuda >( 10000000 );
TestReduceDynamicView< double , Kokkos::Cuda >( 1000000 );
}
TEST_F( cuda, atomic )
{
const int loop_count = 1e3 ;
ASSERT_TRUE( ( TestAtomic::Loop<int,Kokkos::Cuda>(loop_count,1) ) );
ASSERT_TRUE( ( TestAtomic::Loop<int,Kokkos::Cuda>(loop_count,2) ) );
ASSERT_TRUE( ( TestAtomic::Loop<int,Kokkos::Cuda>(loop_count,3) ) );
ASSERT_TRUE( ( TestAtomic::Loop<unsigned int,Kokkos::Cuda>(loop_count,1) ) );
ASSERT_TRUE( ( TestAtomic::Loop<unsigned int,Kokkos::Cuda>(loop_count,2) ) );
ASSERT_TRUE( ( TestAtomic::Loop<unsigned int,Kokkos::Cuda>(loop_count,3) ) );
ASSERT_TRUE( ( TestAtomic::Loop<long int,Kokkos::Cuda>(loop_count,1) ) );
ASSERT_TRUE( ( TestAtomic::Loop<long int,Kokkos::Cuda>(loop_count,2) ) );
ASSERT_TRUE( ( TestAtomic::Loop<long int,Kokkos::Cuda>(loop_count,3) ) );
ASSERT_TRUE( ( TestAtomic::Loop<unsigned long int,Kokkos::Cuda>(loop_count,1) ) );
ASSERT_TRUE( ( TestAtomic::Loop<unsigned long int,Kokkos::Cuda>(loop_count,2) ) );
ASSERT_TRUE( ( TestAtomic::Loop<unsigned long int,Kokkos::Cuda>(loop_count,3) ) );
ASSERT_TRUE( ( TestAtomic::Loop<long long int,Kokkos::Cuda>(loop_count,1) ) );
ASSERT_TRUE( ( TestAtomic::Loop<long long int,Kokkos::Cuda>(loop_count,2) ) );
ASSERT_TRUE( ( TestAtomic::Loop<long long int,Kokkos::Cuda>(loop_count,3) ) );
ASSERT_TRUE( ( TestAtomic::Loop<double,Kokkos::Cuda>(loop_count,1) ) );
ASSERT_TRUE( ( TestAtomic::Loop<double,Kokkos::Cuda>(loop_count,2) ) );
ASSERT_TRUE( ( TestAtomic::Loop<double,Kokkos::Cuda>(loop_count,3) ) );
ASSERT_TRUE( ( TestAtomic::Loop<float,Kokkos::Cuda>(100,1) ) );
ASSERT_TRUE( ( TestAtomic::Loop<float,Kokkos::Cuda>(100,2) ) );
ASSERT_TRUE( ( TestAtomic::Loop<float,Kokkos::Cuda>(100,3) ) );
}
//----------------------------------------------------------------------------
TEST_F( cuda, tile_layout)
{
TestTile::test< Kokkos::Cuda , 1 , 1 >( 1 , 1 );
TestTile::test< Kokkos::Cuda , 1 , 1 >( 2 , 3 );
TestTile::test< Kokkos::Cuda , 1 , 1 >( 9 , 10 );
TestTile::test< Kokkos::Cuda , 2 , 2 >( 1 , 1 );
TestTile::test< Kokkos::Cuda , 2 , 2 >( 2 , 3 );
TestTile::test< Kokkos::Cuda , 2 , 2 >( 4 , 4 );
TestTile::test< Kokkos::Cuda , 2 , 2 >( 9 , 9 );
TestTile::test< Kokkos::Cuda , 2 , 4 >( 9 , 9 );
TestTile::test< Kokkos::Cuda , 4 , 4 >( 9 , 9 );
TestTile::test< Kokkos::Cuda , 4 , 4 >( 1 , 1 );
TestTile::test< Kokkos::Cuda , 4 , 4 >( 4 , 4 );
TestTile::test< Kokkos::Cuda , 4 , 4 >( 9 , 9 );
TestTile::test< Kokkos::Cuda , 4 , 4 >( 9 , 11 );
TestTile::test< Kokkos::Cuda , 8 , 8 >( 1 , 1 );
TestTile::test< Kokkos::Cuda , 8 , 8 >( 4 , 4 );
TestTile::test< Kokkos::Cuda , 8 , 8 >( 9 , 9 );
TestTile::test< Kokkos::Cuda , 8 , 8 >( 9 , 11 );
}
TEST_F( cuda , view_aggregate )
{
TestViewAggregate< Kokkos::Cuda >();
TestViewAggregateReduction< Kokkos::Cuda >();
}
TEST_F( cuda , scan )
{
TestScan< Kokkos::Cuda >::test_range( 1 , 1000 );
TestScan< Kokkos::Cuda >( 1000000 );
TestScan< Kokkos::Cuda >( 10000000 );
Kokkos::Cuda::fence();
}
TEST_F( cuda , team_scan )
{
TestScanTeam< Kokkos::Cuda >( 10 );
TestScanTeam< Kokkos::Cuda >( 10000 );
}
}
//----------------------------------------------------------------------------
TEST_F( cuda , template_meta_functions )
{
TestTemplateMetaFunctions<int, Kokkos::Cuda >();
}
//----------------------------------------------------------------------------
namespace Test {
TEST_F( cuda , reduction_deduction )
{
TestCXX11::test_reduction_deduction< Kokkos::Cuda >();
}
TEST_F( cuda , team_vector )
{
ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Cuda >(0) ) );
ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Cuda >(1) ) );
ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Cuda >(2) ) );
ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Cuda >(3) ) );
ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Cuda >(4) ) );
ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Cuda >(5) ) );
ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Cuda >(6) ) );
ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Cuda >(7) ) );
ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Cuda >(8) ) );
ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Cuda >(9) ) );
ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Cuda >(10) ) );
}
}
diff --git a/lib/kokkos/core/unit_test/TestDefaultDeviceType.cpp b/lib/kokkos/core/unit_test/TestDefaultDeviceType.cpp
index 73e5bf85a..1f4a2e84d 100644
--- a/lib/kokkos/core/unit_test/TestDefaultDeviceType.cpp
+++ b/lib/kokkos/core/unit_test/TestDefaultDeviceType.cpp
@@ -1,248 +1,248 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <gtest/gtest.h>
#include <Kokkos_Core.hpp>
#if !defined(KOKKOS_HAVE_CUDA) || defined(__CUDACC__)
//----------------------------------------------------------------------------
#include <TestViewImpl.hpp>
#include <TestAtomic.hpp>
#include <TestViewAPI.hpp>
#include <TestReduce.hpp>
#include <TestScan.hpp>
#include <TestTeam.hpp>
#include <TestAggregate.hpp>
#include <TestCompilerMacros.hpp>
#include <TestCXX11.hpp>
#include <TestTeamVector.hpp>
namespace Test {
class defaultdevicetype : public ::testing::Test {
protected:
static void SetUpTestCase()
{
Kokkos::initialize();
}
static void TearDownTestCase()
{
Kokkos::finalize();
}
};
TEST_F( defaultdevicetype, view_impl) {
test_view_impl< Kokkos::DefaultExecutionSpace >();
}
TEST_F( defaultdevicetype, view_api) {
TestViewAPI< double , Kokkos::DefaultExecutionSpace >();
}
TEST_F( defaultdevicetype, long_reduce) {
TestReduce< long , Kokkos::DefaultExecutionSpace >( 100000 );
}
TEST_F( defaultdevicetype, double_reduce) {
TestReduce< double , Kokkos::DefaultExecutionSpace >( 100000 );
}
TEST_F( defaultdevicetype, long_reduce_dynamic ) {
TestReduceDynamic< long , Kokkos::DefaultExecutionSpace >( 100000 );
}
TEST_F( defaultdevicetype, double_reduce_dynamic ) {
TestReduceDynamic< double , Kokkos::DefaultExecutionSpace >( 100000 );
}
TEST_F( defaultdevicetype, long_reduce_dynamic_view ) {
TestReduceDynamicView< long , Kokkos::DefaultExecutionSpace >( 100000 );
}
TEST_F( defaultdevicetype , atomics )
{
const int loop_count = 1e4 ;
ASSERT_TRUE( ( TestAtomic::Loop<int,Kokkos::DefaultExecutionSpace>(loop_count,1) ) );
ASSERT_TRUE( ( TestAtomic::Loop<int,Kokkos::DefaultExecutionSpace>(loop_count,2) ) );
ASSERT_TRUE( ( TestAtomic::Loop<int,Kokkos::DefaultExecutionSpace>(loop_count,3) ) );
ASSERT_TRUE( ( TestAtomic::Loop<unsigned int,Kokkos::DefaultExecutionSpace>(loop_count,1) ) );
ASSERT_TRUE( ( TestAtomic::Loop<unsigned int,Kokkos::DefaultExecutionSpace>(loop_count,2) ) );
ASSERT_TRUE( ( TestAtomic::Loop<unsigned int,Kokkos::DefaultExecutionSpace>(loop_count,3) ) );
ASSERT_TRUE( ( TestAtomic::Loop<long int,Kokkos::DefaultExecutionSpace>(loop_count,1) ) );
ASSERT_TRUE( ( TestAtomic::Loop<long int,Kokkos::DefaultExecutionSpace>(loop_count,2) ) );
ASSERT_TRUE( ( TestAtomic::Loop<long int,Kokkos::DefaultExecutionSpace>(loop_count,3) ) );
ASSERT_TRUE( ( TestAtomic::Loop<unsigned long int,Kokkos::DefaultExecutionSpace>(loop_count,1) ) );
ASSERT_TRUE( ( TestAtomic::Loop<unsigned long int,Kokkos::DefaultExecutionSpace>(loop_count,2) ) );
ASSERT_TRUE( ( TestAtomic::Loop<unsigned long int,Kokkos::DefaultExecutionSpace>(loop_count,3) ) );
ASSERT_TRUE( ( TestAtomic::Loop<long long int,Kokkos::DefaultExecutionSpace>(loop_count,1) ) );
ASSERT_TRUE( ( TestAtomic::Loop<long long int,Kokkos::DefaultExecutionSpace>(loop_count,2) ) );
ASSERT_TRUE( ( TestAtomic::Loop<long long int,Kokkos::DefaultExecutionSpace>(loop_count,3) ) );
ASSERT_TRUE( ( TestAtomic::Loop<double,Kokkos::DefaultExecutionSpace>(loop_count,1) ) );
ASSERT_TRUE( ( TestAtomic::Loop<double,Kokkos::DefaultExecutionSpace>(loop_count,2) ) );
ASSERT_TRUE( ( TestAtomic::Loop<double,Kokkos::DefaultExecutionSpace>(loop_count,3) ) );
ASSERT_TRUE( ( TestAtomic::Loop<float,Kokkos::DefaultExecutionSpace>(100,1) ) );
ASSERT_TRUE( ( TestAtomic::Loop<float,Kokkos::DefaultExecutionSpace>(100,2) ) );
ASSERT_TRUE( ( TestAtomic::Loop<float,Kokkos::DefaultExecutionSpace>(100,3) ) );
}
/*TEST_F( defaultdevicetype , view_remap )
{
enum { N0 = 3 , N1 = 2 , N2 = 8 , N3 = 9 };
typedef Kokkos::View< double*[N1][N2][N3] ,
Kokkos::LayoutRight ,
Kokkos::DefaultExecutionSpace > output_type ;
typedef Kokkos::View< int**[N2][N3] ,
Kokkos::LayoutLeft ,
Kokkos::DefaultExecutionSpace > input_type ;
typedef Kokkos::View< int*[N0][N2][N3] ,
Kokkos::LayoutLeft ,
Kokkos::DefaultExecutionSpace > diff_type ;
output_type output( "output" , N0 );
input_type input ( "input" , N0 , N1 );
diff_type diff ( "diff" , N0 );
int value = 0 ;
for ( size_t i3 = 0 ; i3 < N3 ; ++i3 ) {
for ( size_t i2 = 0 ; i2 < N2 ; ++i2 ) {
for ( size_t i1 = 0 ; i1 < N1 ; ++i1 ) {
for ( size_t i0 = 0 ; i0 < N0 ; ++i0 ) {
input(i0,i1,i2,i3) = ++value ;
}}}}
// Kokkos::deep_copy( diff , input ); // throw with incompatible shape
Kokkos::deep_copy( output , input );
value = 0 ;
for ( size_t i3 = 0 ; i3 < N3 ; ++i3 ) {
for ( size_t i2 = 0 ; i2 < N2 ; ++i2 ) {
for ( size_t i1 = 0 ; i1 < N1 ; ++i1 ) {
for ( size_t i0 = 0 ; i0 < N0 ; ++i0 ) {
++value ;
ASSERT_EQ( value , ((int) output(i0,i1,i2,i3) ) );
}}}}
}*/
//----------------------------------------------------------------------------
TEST_F( defaultdevicetype , view_aggregate )
{
TestViewAggregate< Kokkos::DefaultExecutionSpace >();
}
//----------------------------------------------------------------------------
TEST_F( defaultdevicetype , scan )
{
TestScan< Kokkos::DefaultExecutionSpace >::test_range( 1 , 1000 );
TestScan< Kokkos::DefaultExecutionSpace >( 1000000 );
TestScan< Kokkos::DefaultExecutionSpace >( 10000000 );
Kokkos::DefaultExecutionSpace::fence();
}
TEST_F( defaultdevicetype , team_scan )
{
TestScanTeam< Kokkos::DefaultExecutionSpace >( 10 );
TestScanTeam< Kokkos::DefaultExecutionSpace >( 10000 );
}
//----------------------------------------------------------------------------
TEST_F( defaultdevicetype , compiler_macros )
{
ASSERT_TRUE( ( TestCompilerMacros::Test< Kokkos::DefaultExecutionSpace >() ) );
}
//----------------------------------------------------------------------------
TEST_F( defaultdevicetype , cxx11 )
{
ASSERT_TRUE( ( TestCXX11::Test< Kokkos::DefaultExecutionSpace >(1) ) );
ASSERT_TRUE( ( TestCXX11::Test< Kokkos::DefaultExecutionSpace >(2) ) );
ASSERT_TRUE( ( TestCXX11::Test< Kokkos::DefaultExecutionSpace >(3) ) );
ASSERT_TRUE( ( TestCXX11::Test< Kokkos::DefaultExecutionSpace >(4) ) );
}
TEST_F( defaultdevicetype , team_vector )
{
ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::DefaultExecutionSpace >(0) ) );
ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::DefaultExecutionSpace >(1) ) );
ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::DefaultExecutionSpace >(2) ) );
ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::DefaultExecutionSpace >(3) ) );
ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::DefaultExecutionSpace >(4) ) );
ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::DefaultExecutionSpace >(5) ) );
}
TEST_F( defaultdevicetype , malloc )
{
int* data = (int*) Kokkos::kokkos_malloc(100*sizeof(int));
ASSERT_NO_THROW(data = (int*) Kokkos::kokkos_realloc(data,120*sizeof(int)));
Kokkos::kokkos_free(data);
int* data2 = (int*) Kokkos::kokkos_malloc(0);
ASSERT_TRUE(data2==NULL);
- Kokkos::kokkos_free(data);
+ Kokkos::kokkos_free(data2);
}
} // namespace test
#endif
diff --git a/lib/kokkos/core/unit_test/TestOpenMP.cpp b/lib/kokkos/core/unit_test/TestOpenMP.cpp
index 7f3a24567..483352d1e 100644
--- a/lib/kokkos/core/unit_test/TestOpenMP.cpp
+++ b/lib/kokkos/core/unit_test/TestOpenMP.cpp
@@ -1,373 +1,383 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <gtest/gtest.h>
#include <Kokkos_Core.hpp>
//----------------------------------------------------------------------------
#include <TestViewImpl.hpp>
#include <TestAtomic.hpp>
#include <TestViewAPI.hpp>
#include <TestViewSubview.hpp>
+#include <TestViewOfClass.hpp>
#include <TestSharedAlloc.hpp>
#include <TestViewMapping.hpp>
#include <TestRange.hpp>
#include <TestTeam.hpp>
#include <TestReduce.hpp>
#include <TestScan.hpp>
#include <TestAggregate.hpp>
#include <TestAggregateReduction.hpp>
#include <TestCompilerMacros.hpp>
#include <TestCXX11.hpp>
#include <TestCXX11Deduction.hpp>
#include <TestTeamVector.hpp>
#include <TestMemorySpaceTracking.hpp>
#include <TestTemplateMetaFunctions.hpp>
namespace Test {
class openmp : public ::testing::Test {
protected:
static void SetUpTestCase()
{
const unsigned numa_count = Kokkos::hwloc::get_available_numa_count();
const unsigned cores_per_numa = Kokkos::hwloc::get_available_cores_per_numa();
const unsigned threads_per_core = Kokkos::hwloc::get_available_threads_per_core();
const unsigned threads_count = std::max( 1u , numa_count ) *
std::max( 2u , ( cores_per_numa * threads_per_core ) / 2 );
Kokkos::OpenMP::initialize( threads_count );
Kokkos::OpenMP::print_configuration( std::cout , true );
}
static void TearDownTestCase()
{
Kokkos::OpenMP::finalize();
omp_set_num_threads(1);
ASSERT_EQ( 1 , omp_get_max_threads() );
}
};
TEST_F( openmp , impl_shared_alloc ) {
test_shared_alloc< Kokkos::HostSpace , Kokkos::OpenMP >();
}
TEST_F( openmp , impl_view_mapping ) {
test_view_mapping< Kokkos::OpenMP >();
test_view_mapping_subview< Kokkos::OpenMP >();
test_view_mapping_operator< Kokkos::OpenMP >();
TestViewMappingAtomic< Kokkos::OpenMP >::run();
}
TEST_F( openmp, view_impl) {
test_view_impl< Kokkos::OpenMP >();
}
TEST_F( openmp, view_api) {
TestViewAPI< double , Kokkos::OpenMP >();
}
+TEST_F( openmp , view_nested_view )
+{
+ ::Test::view_nested_view< Kokkos::OpenMP >();
+}
TEST_F( openmp, view_subview_auto_1d_left ) {
TestViewSubview::test_auto_1d< Kokkos::LayoutLeft,Kokkos::OpenMP >();
}
TEST_F( openmp, view_subview_auto_1d_right ) {
TestViewSubview::test_auto_1d< Kokkos::LayoutRight,Kokkos::OpenMP >();
}
TEST_F( openmp, view_subview_auto_1d_stride ) {
TestViewSubview::test_auto_1d< Kokkos::LayoutStride,Kokkos::OpenMP >();
}
TEST_F( openmp, view_subview_assign_strided ) {
TestViewSubview::test_1d_strided_assignment< Kokkos::OpenMP >();
}
TEST_F( openmp, view_subview_left_0 ) {
TestViewSubview::test_left_0< Kokkos::OpenMP >();
}
TEST_F( openmp, view_subview_left_1 ) {
TestViewSubview::test_left_1< Kokkos::OpenMP >();
}
TEST_F( openmp, view_subview_left_2 ) {
TestViewSubview::test_left_2< Kokkos::OpenMP >();
}
TEST_F( openmp, view_subview_left_3 ) {
TestViewSubview::test_left_3< Kokkos::OpenMP >();
}
TEST_F( openmp, view_subview_right_0 ) {
TestViewSubview::test_right_0< Kokkos::OpenMP >();
}
TEST_F( openmp, view_subview_right_1 ) {
TestViewSubview::test_right_1< Kokkos::OpenMP >();
}
TEST_F( openmp, view_subview_right_3 ) {
TestViewSubview::test_right_3< Kokkos::OpenMP >();
}
TEST_F( openmp , range_tag )
{
TestRange< Kokkos::OpenMP >::test_for(1000);
TestRange< Kokkos::OpenMP >::test_reduce(1000);
TestRange< Kokkos::OpenMP >::test_scan(1000);
}
TEST_F( openmp , team_tag )
{
TestTeamPolicy< Kokkos::OpenMP >::test_for(1000);
TestTeamPolicy< Kokkos::OpenMP >::test_reduce(1000);
}
TEST_F( openmp, long_reduce) {
TestReduce< long , Kokkos::OpenMP >( 1000000 );
}
TEST_F( openmp, double_reduce) {
TestReduce< double , Kokkos::OpenMP >( 1000000 );
}
TEST_F( openmp, long_reduce_dynamic ) {
TestReduceDynamic< long , Kokkos::OpenMP >( 1000000 );
}
TEST_F( openmp, double_reduce_dynamic ) {
TestReduceDynamic< double , Kokkos::OpenMP >( 1000000 );
}
TEST_F( openmp, long_reduce_dynamic_view ) {
TestReduceDynamicView< long , Kokkos::OpenMP >( 1000000 );
}
TEST_F( openmp, team_long_reduce) {
TestReduceTeam< long , Kokkos::OpenMP >( 100000 );
}
TEST_F( openmp, team_double_reduce) {
TestReduceTeam< double , Kokkos::OpenMP >( 100000 );
}
TEST_F( openmp, team_shared_request) {
TestSharedTeam< Kokkos::OpenMP >();
}
+#if defined(KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA) && !defined(KOKKOS_HAVE_CUDA)
+TEST_F( openmp, team_lambda_shared_request) {
+ TestLambdaSharedTeam< Kokkos::OpenMP >();
+}
+#endif
TEST_F( openmp , atomics )
{
const int loop_count = 1e4 ;
ASSERT_TRUE( ( TestAtomic::Loop<int,Kokkos::OpenMP>(loop_count,1) ) );
ASSERT_TRUE( ( TestAtomic::Loop<int,Kokkos::OpenMP>(loop_count,2) ) );
ASSERT_TRUE( ( TestAtomic::Loop<int,Kokkos::OpenMP>(loop_count,3) ) );
ASSERT_TRUE( ( TestAtomic::Loop<unsigned int,Kokkos::OpenMP>(loop_count,1) ) );
ASSERT_TRUE( ( TestAtomic::Loop<unsigned int,Kokkos::OpenMP>(loop_count,2) ) );
ASSERT_TRUE( ( TestAtomic::Loop<unsigned int,Kokkos::OpenMP>(loop_count,3) ) );
ASSERT_TRUE( ( TestAtomic::Loop<long int,Kokkos::OpenMP>(loop_count,1) ) );
ASSERT_TRUE( ( TestAtomic::Loop<long int,Kokkos::OpenMP>(loop_count,2) ) );
ASSERT_TRUE( ( TestAtomic::Loop<long int,Kokkos::OpenMP>(loop_count,3) ) );
ASSERT_TRUE( ( TestAtomic::Loop<unsigned long int,Kokkos::OpenMP>(loop_count,1) ) );
ASSERT_TRUE( ( TestAtomic::Loop<unsigned long int,Kokkos::OpenMP>(loop_count,2) ) );
ASSERT_TRUE( ( TestAtomic::Loop<unsigned long int,Kokkos::OpenMP>(loop_count,3) ) );
ASSERT_TRUE( ( TestAtomic::Loop<long long int,Kokkos::OpenMP>(loop_count,1) ) );
ASSERT_TRUE( ( TestAtomic::Loop<long long int,Kokkos::OpenMP>(loop_count,2) ) );
ASSERT_TRUE( ( TestAtomic::Loop<long long int,Kokkos::OpenMP>(loop_count,3) ) );
ASSERT_TRUE( ( TestAtomic::Loop<double,Kokkos::OpenMP>(loop_count,1) ) );
ASSERT_TRUE( ( TestAtomic::Loop<double,Kokkos::OpenMP>(loop_count,2) ) );
ASSERT_TRUE( ( TestAtomic::Loop<double,Kokkos::OpenMP>(loop_count,3) ) );
ASSERT_TRUE( ( TestAtomic::Loop<float,Kokkos::OpenMP>(100,1) ) );
ASSERT_TRUE( ( TestAtomic::Loop<float,Kokkos::OpenMP>(100,2) ) );
ASSERT_TRUE( ( TestAtomic::Loop<float,Kokkos::OpenMP>(100,3) ) );
#if defined( KOKKOS_ENABLE_ASM )
ASSERT_TRUE( ( TestAtomic::Loop<Kokkos::complex<double> ,Kokkos::OpenMP>(100,1) ) );
ASSERT_TRUE( ( TestAtomic::Loop<Kokkos::complex<double> ,Kokkos::OpenMP>(100,2) ) );
ASSERT_TRUE( ( TestAtomic::Loop<Kokkos::complex<double> ,Kokkos::OpenMP>(100,3) ) );
#endif
}
TEST_F( openmp , view_remap )
{
enum { N0 = 3 , N1 = 2 , N2 = 8 , N3 = 9 };
typedef Kokkos::View< double*[N1][N2][N3] ,
Kokkos::LayoutRight ,
Kokkos::OpenMP > output_type ;
typedef Kokkos::View< int**[N2][N3] ,
Kokkos::LayoutLeft ,
Kokkos::OpenMP > input_type ;
typedef Kokkos::View< int*[N0][N2][N3] ,
Kokkos::LayoutLeft ,
Kokkos::OpenMP > diff_type ;
output_type output( "output" , N0 );
input_type input ( "input" , N0 , N1 );
diff_type diff ( "diff" , N0 );
int value = 0 ;
for ( size_t i3 = 0 ; i3 < N3 ; ++i3 ) {
for ( size_t i2 = 0 ; i2 < N2 ; ++i2 ) {
for ( size_t i1 = 0 ; i1 < N1 ; ++i1 ) {
for ( size_t i0 = 0 ; i0 < N0 ; ++i0 ) {
input(i0,i1,i2,i3) = ++value ;
}}}}
// Kokkos::deep_copy( diff , input ); // throw with incompatible shape
Kokkos::deep_copy( output , input );
value = 0 ;
for ( size_t i3 = 0 ; i3 < N3 ; ++i3 ) {
for ( size_t i2 = 0 ; i2 < N2 ; ++i2 ) {
for ( size_t i1 = 0 ; i1 < N1 ; ++i1 ) {
for ( size_t i0 = 0 ; i0 < N0 ; ++i0 ) {
++value ;
ASSERT_EQ( value , ((int) output(i0,i1,i2,i3) ) );
}}}}
}
//----------------------------------------------------------------------------
TEST_F( openmp , view_aggregate )
{
TestViewAggregate< Kokkos::OpenMP >();
TestViewAggregateReduction< Kokkos::OpenMP >();
}
//----------------------------------------------------------------------------
TEST_F( openmp , scan )
{
TestScan< Kokkos::OpenMP >::test_range( 1 , 1000 );
TestScan< Kokkos::OpenMP >( 1000000 );
TestScan< Kokkos::OpenMP >( 10000000 );
Kokkos::OpenMP::fence();
}
TEST_F( openmp , team_scan )
{
TestScanTeam< Kokkos::OpenMP >( 10000 );
TestScanTeam< Kokkos::OpenMP >( 10000 );
}
//----------------------------------------------------------------------------
TEST_F( openmp , compiler_macros )
{
ASSERT_TRUE( ( TestCompilerMacros::Test< Kokkos::OpenMP >() ) );
}
//----------------------------------------------------------------------------
TEST_F( openmp , memory_space )
{
TestMemorySpace< Kokkos::OpenMP >();
}
//----------------------------------------------------------------------------
TEST_F( openmp , template_meta_functions )
{
TestTemplateMetaFunctions<int, Kokkos::OpenMP >();
}
//----------------------------------------------------------------------------
#if defined( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_OPENMP )
TEST_F( openmp , cxx11 )
{
if ( Kokkos::Impl::is_same< Kokkos::DefaultExecutionSpace , Kokkos::OpenMP >::value ) {
ASSERT_TRUE( ( TestCXX11::Test< Kokkos::OpenMP >(1) ) );
ASSERT_TRUE( ( TestCXX11::Test< Kokkos::OpenMP >(2) ) );
ASSERT_TRUE( ( TestCXX11::Test< Kokkos::OpenMP >(3) ) );
ASSERT_TRUE( ( TestCXX11::Test< Kokkos::OpenMP >(4) ) );
}
}
#endif
TEST_F( openmp , reduction_deduction )
{
TestCXX11::test_reduction_deduction< Kokkos::OpenMP >();
}
TEST_F( openmp , team_vector )
{
ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::OpenMP >(0) ) );
ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::OpenMP >(1) ) );
ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::OpenMP >(2) ) );
ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::OpenMP >(3) ) );
ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::OpenMP >(4) ) );
ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::OpenMP >(5) ) );
ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::OpenMP >(6) ) );
ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::OpenMP >(7) ) );
ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::OpenMP >(8) ) );
ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::OpenMP >(9) ) );
ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::OpenMP >(10) ) );
}
} // namespace test
diff --git a/lib/kokkos/core/unit_test/TestQthread.cpp b/lib/kokkos/core/unit_test/TestQthread.cpp
index ff4053623..edcf7e90e 100644
--- a/lib/kokkos/core/unit_test/TestQthread.cpp
+++ b/lib/kokkos/core/unit_test/TestQthread.cpp
@@ -1,280 +1,286 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <gtest/gtest.h>
#include <Kokkos_Core.hpp>
#include <Kokkos_Qthread.hpp>
#include <Qthread/Kokkos_Qthread_TaskPolicy.hpp>
//----------------------------------------------------------------------------
#include <TestViewImpl.hpp>
#include <TestAtomic.hpp>
#include <TestViewAPI.hpp>
+#include <TestViewOfClass.hpp>
#include <TestTeam.hpp>
#include <TestRange.hpp>
#include <TestReduce.hpp>
#include <TestScan.hpp>
#include <TestAggregate.hpp>
#include <TestCompilerMacros.hpp>
#include <TestTaskPolicy.hpp>
// #include <TestTeamVector.hpp>
namespace Test {
class qthread : public ::testing::Test {
protected:
static void SetUpTestCase()
{
const unsigned numa_count = Kokkos::hwloc::get_available_numa_count();
const unsigned cores_per_numa = Kokkos::hwloc::get_available_cores_per_numa();
const unsigned threads_per_core = Kokkos::hwloc::get_available_threads_per_core();
int threads_count = std::max( 1u , numa_count )
* std::max( 2u , ( cores_per_numa * threads_per_core ) / 2 );
Kokkos::Qthread::initialize( threads_count );
Kokkos::Qthread::print_configuration( std::cout , true );
}
static void TearDownTestCase()
{
Kokkos::Qthread::finalize();
}
};
TEST_F( qthread , compiler_macros )
{
ASSERT_TRUE( ( TestCompilerMacros::Test< Kokkos::Qthread >() ) );
}
TEST_F( qthread, view_impl) {
test_view_impl< Kokkos::Qthread >();
}
TEST_F( qthread, view_api) {
TestViewAPI< double , Kokkos::Qthread >();
}
+TEST_F( qthread , view_nested_view )
+{
+ ::Test::view_nested_view< Kokkos::Qthread >();
+}
+
TEST_F( qthread , range_tag )
{
TestRange< Kokkos::Qthread >::test_for(1000);
TestRange< Kokkos::Qthread >::test_reduce(1000);
TestRange< Kokkos::Qthread >::test_scan(1000);
}
TEST_F( qthread , team_tag )
{
TestTeamPolicy< Kokkos::Qthread >::test_for( 1000 );
TestTeamPolicy< Kokkos::Qthread >::test_reduce( 1000 );
}
TEST_F( qthread, long_reduce) {
TestReduce< long , Kokkos::Qthread >( 1000000 );
}
TEST_F( qthread, double_reduce) {
TestReduce< double , Kokkos::Qthread >( 1000000 );
}
TEST_F( qthread, long_reduce_dynamic ) {
TestReduceDynamic< long , Kokkos::Qthread >( 1000000 );
}
TEST_F( qthread, double_reduce_dynamic ) {
TestReduceDynamic< double , Kokkos::Qthread >( 1000000 );
}
TEST_F( qthread, long_reduce_dynamic_view ) {
TestReduceDynamicView< long , Kokkos::Qthread >( 1000000 );
}
TEST_F( qthread, team_long_reduce) {
TestReduceTeam< long , Kokkos::Qthread >( 1000000 );
}
TEST_F( qthread, team_double_reduce) {
TestReduceTeam< double , Kokkos::Qthread >( 1000000 );
}
TEST_F( qthread , atomics )
{
const int loop_count = 1e4 ;
ASSERT_TRUE( ( TestAtomic::Loop<int,Kokkos::Qthread>(loop_count,1) ) );
ASSERT_TRUE( ( TestAtomic::Loop<int,Kokkos::Qthread>(loop_count,2) ) );
ASSERT_TRUE( ( TestAtomic::Loop<int,Kokkos::Qthread>(loop_count,3) ) );
ASSERT_TRUE( ( TestAtomic::Loop<unsigned int,Kokkos::Qthread>(loop_count,1) ) );
ASSERT_TRUE( ( TestAtomic::Loop<unsigned int,Kokkos::Qthread>(loop_count,2) ) );
ASSERT_TRUE( ( TestAtomic::Loop<unsigned int,Kokkos::Qthread>(loop_count,3) ) );
ASSERT_TRUE( ( TestAtomic::Loop<long int,Kokkos::Qthread>(loop_count,1) ) );
ASSERT_TRUE( ( TestAtomic::Loop<long int,Kokkos::Qthread>(loop_count,2) ) );
ASSERT_TRUE( ( TestAtomic::Loop<long int,Kokkos::Qthread>(loop_count,3) ) );
ASSERT_TRUE( ( TestAtomic::Loop<unsigned long int,Kokkos::Qthread>(loop_count,1) ) );
ASSERT_TRUE( ( TestAtomic::Loop<unsigned long int,Kokkos::Qthread>(loop_count,2) ) );
ASSERT_TRUE( ( TestAtomic::Loop<unsigned long int,Kokkos::Qthread>(loop_count,3) ) );
ASSERT_TRUE( ( TestAtomic::Loop<long long int,Kokkos::Qthread>(loop_count,1) ) );
ASSERT_TRUE( ( TestAtomic::Loop<long long int,Kokkos::Qthread>(loop_count,2) ) );
ASSERT_TRUE( ( TestAtomic::Loop<long long int,Kokkos::Qthread>(loop_count,3) ) );
ASSERT_TRUE( ( TestAtomic::Loop<double,Kokkos::Qthread>(loop_count,1) ) );
ASSERT_TRUE( ( TestAtomic::Loop<double,Kokkos::Qthread>(loop_count,2) ) );
ASSERT_TRUE( ( TestAtomic::Loop<double,Kokkos::Qthread>(loop_count,3) ) );
ASSERT_TRUE( ( TestAtomic::Loop<float,Kokkos::Qthread>(100,1) ) );
ASSERT_TRUE( ( TestAtomic::Loop<float,Kokkos::Qthread>(100,2) ) );
ASSERT_TRUE( ( TestAtomic::Loop<float,Kokkos::Qthread>(100,3) ) );
#if defined( KOKKOS_ENABLE_ASM )
ASSERT_TRUE( ( TestAtomic::Loop<Kokkos::complex<double> ,Kokkos::Qthread>(100,1) ) );
ASSERT_TRUE( ( TestAtomic::Loop<Kokkos::complex<double> ,Kokkos::Qthread>(100,2) ) );
ASSERT_TRUE( ( TestAtomic::Loop<Kokkos::complex<double> ,Kokkos::Qthread>(100,3) ) );
#endif
}
TEST_F( qthread , view_remap )
{
enum { N0 = 3 , N1 = 2 , N2 = 8 , N3 = 9 };
typedef Kokkos::View< double*[N1][N2][N3] ,
Kokkos::LayoutRight ,
Kokkos::Qthread > output_type ;
typedef Kokkos::View< int**[N2][N3] ,
Kokkos::LayoutLeft ,
Kokkos::Qthread > input_type ;
typedef Kokkos::View< int*[N0][N2][N3] ,
Kokkos::LayoutLeft ,
Kokkos::Qthread > diff_type ;
output_type output( "output" , N0 );
input_type input ( "input" , N0 , N1 );
diff_type diff ( "diff" , N0 );
int value = 0 ;
for ( size_t i3 = 0 ; i3 < N3 ; ++i3 ) {
for ( size_t i2 = 0 ; i2 < N2 ; ++i2 ) {
for ( size_t i1 = 0 ; i1 < N1 ; ++i1 ) {
for ( size_t i0 = 0 ; i0 < N0 ; ++i0 ) {
input(i0,i1,i2,i3) = ++value ;
}}}}
// Kokkos::deep_copy( diff , input ); // throw with incompatible shape
Kokkos::deep_copy( output , input );
value = 0 ;
for ( size_t i3 = 0 ; i3 < N3 ; ++i3 ) {
for ( size_t i2 = 0 ; i2 < N2 ; ++i2 ) {
for ( size_t i1 = 0 ; i1 < N1 ; ++i1 ) {
for ( size_t i0 = 0 ; i0 < N0 ; ++i0 ) {
++value ;
ASSERT_EQ( value , ((int) output(i0,i1,i2,i3) ) );
}}}}
}
//----------------------------------------------------------------------------
TEST_F( qthread , view_aggregate )
{
TestViewAggregate< Kokkos::Qthread >();
}
//----------------------------------------------------------------------------
TEST_F( qthread , scan )
{
TestScan< Kokkos::Qthread >::test_range( 1 , 1000 );
TestScan< Kokkos::Qthread >( 1000000 );
TestScan< Kokkos::Qthread >( 10000000 );
Kokkos::Qthread::fence();
}
TEST_F( qthread, team_shared ) {
TestSharedTeam< Kokkos::Qthread >();
}
TEST_F( qthread , team_scan )
{
TestScanTeam< Kokkos::Qthread >( 10 );
TestScanTeam< Kokkos::Qthread >( 10000 );
}
#if 0 /* disable */
TEST_F( qthread , team_vector )
{
ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Qthread >(0) ) );
ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Qthread >(1) ) );
ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Qthread >(2) ) );
ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Qthread >(3) ) );
ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Qthread >(4) ) );
}
#endif
//----------------------------------------------------------------------------
TEST_F( qthread , task_policy )
{
TestTaskPolicy::test_task_dep< Kokkos::Qthread >( 10 );
for ( long i = 0 ; i < 25 ; ++i ) TestTaskPolicy::test_fib< Kokkos::Qthread >(i);
for ( long i = 0 ; i < 35 ; ++i ) TestTaskPolicy::test_fib2< Kokkos::Qthread >(i);
}
TEST_F( qthread , task_team )
{
TestTaskPolicy::test_task_team< Kokkos::Qthread >(1000);
}
//----------------------------------------------------------------------------
} // namespace test
diff --git a/lib/kokkos/core/unit_test/TestSerial.cpp b/lib/kokkos/core/unit_test/TestSerial.cpp
index 4514492e4..212a96fdc 100644
--- a/lib/kokkos/core/unit_test/TestSerial.cpp
+++ b/lib/kokkos/core/unit_test/TestSerial.cpp
@@ -1,407 +1,413 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <gtest/gtest.h>
#include <Kokkos_Core.hpp>
#include <impl/Kokkos_ViewTileLeft.hpp>
#include <TestTile.hpp>
#include <impl/Kokkos_Serial_TaskPolicy.hpp>
//----------------------------------------------------------------------------
#include <TestSharedAlloc.hpp>
#include <TestViewMapping.hpp>
#include <TestViewImpl.hpp>
#include <TestViewAPI.hpp>
#include <TestViewOfClass.hpp>
#include <TestViewSubview.hpp>
#include <TestAtomic.hpp>
#include <TestRange.hpp>
#include <TestTeam.hpp>
#include <TestReduce.hpp>
#include <TestScan.hpp>
#include <TestAggregate.hpp>
#include <TestAggregateReduction.hpp>
#include <TestCompilerMacros.hpp>
#include <TestTaskPolicy.hpp>
#include <TestCXX11.hpp>
#include <TestCXX11Deduction.hpp>
#include <TestTeamVector.hpp>
#include <TestMemorySpaceTracking.hpp>
#include <TestTemplateMetaFunctions.hpp>
namespace Test {
class serial : public ::testing::Test {
protected:
static void SetUpTestCase()
{
Kokkos::HostSpace::execution_space::initialize();
}
static void TearDownTestCase()
{
Kokkos::HostSpace::execution_space::finalize();
}
};
TEST_F( serial , impl_shared_alloc ) {
test_shared_alloc< Kokkos::HostSpace , Kokkos::Serial >();
}
TEST_F( serial , impl_view_mapping ) {
test_view_mapping< Kokkos::Serial >();
test_view_mapping_subview< Kokkos::Serial >();
test_view_mapping_operator< Kokkos::Serial >();
TestViewMappingAtomic< Kokkos::Serial >::run();
}
TEST_F( serial, view_impl) {
test_view_impl< Kokkos::Serial >();
}
TEST_F( serial, view_api) {
TestViewAPI< double , Kokkos::Serial >();
}
TEST_F( serial , view_nested_view )
{
::Test::view_nested_view< Kokkos::Serial >();
}
TEST_F( serial, view_subview_auto_1d_left ) {
TestViewSubview::test_auto_1d< Kokkos::LayoutLeft,Kokkos::Serial >();
}
TEST_F( serial, view_subview_auto_1d_right ) {
TestViewSubview::test_auto_1d< Kokkos::LayoutRight,Kokkos::Serial >();
}
TEST_F( serial, view_subview_auto_1d_stride ) {
TestViewSubview::test_auto_1d< Kokkos::LayoutStride,Kokkos::Serial >();
}
TEST_F( serial, view_subview_assign_strided ) {
TestViewSubview::test_1d_strided_assignment< Kokkos::Serial >();
}
TEST_F( serial, view_subview_left_0 ) {
TestViewSubview::test_left_0< Kokkos::Serial >();
}
TEST_F( serial, view_subview_left_1 ) {
TestViewSubview::test_left_1< Kokkos::Serial >();
}
TEST_F( serial, view_subview_left_2 ) {
TestViewSubview::test_left_2< Kokkos::Serial >();
}
TEST_F( serial, view_subview_left_3 ) {
TestViewSubview::test_left_3< Kokkos::Serial >();
}
TEST_F( serial, view_subview_right_0 ) {
TestViewSubview::test_right_0< Kokkos::Serial >();
}
TEST_F( serial, view_subview_right_1 ) {
TestViewSubview::test_right_1< Kokkos::Serial >();
}
TEST_F( serial, view_subview_right_3 ) {
TestViewSubview::test_right_3< Kokkos::Serial >();
}
TEST_F( serial , range_tag )
{
TestRange< Kokkos::Serial >::test_for(1000);
TestRange< Kokkos::Serial >::test_reduce(1000);
TestRange< Kokkos::Serial >::test_scan(1000);
}
TEST_F( serial , team_tag )
{
TestTeamPolicy< Kokkos::Serial >::test_for( 1000 );
TestTeamPolicy< Kokkos::Serial >::test_reduce( 1000 );
}
TEST_F( serial, long_reduce) {
TestReduce< long , Kokkos::Serial >( 1000000 );
}
TEST_F( serial, double_reduce) {
TestReduce< double , Kokkos::Serial >( 1000000 );
}
TEST_F( serial, long_reduce_dynamic ) {
TestReduceDynamic< long , Kokkos::Serial >( 1000000 );
}
TEST_F( serial, double_reduce_dynamic ) {
TestReduceDynamic< double , Kokkos::Serial >( 1000000 );
}
TEST_F( serial, long_reduce_dynamic_view ) {
TestReduceDynamicView< long , Kokkos::Serial >( 1000000 );
}
TEST_F( serial , scan )
{
TestScan< Kokkos::Serial >::test_range( 1 , 1000 );
TestScan< Kokkos::Serial >( 10 );
TestScan< Kokkos::Serial >( 10000 );
}
TEST_F( serial , team_long_reduce) {
TestReduceTeam< long , Kokkos::Serial >( 100000 );
}
TEST_F( serial , team_double_reduce) {
TestReduceTeam< double , Kokkos::Serial >( 100000 );
}
TEST_F( serial , team_shared_request) {
TestSharedTeam< Kokkos::Serial >();
}
+#if defined(KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA) && !defined(KOKKOS_HAVE_CUDA)
+TEST_F( serial , team_lambda_shared_request) {
+ TestLambdaSharedTeam< Kokkos::Serial >();
+}
+#endif
+
TEST_F( serial , team_scan )
{
TestScanTeam< Kokkos::Serial >( 10 );
TestScanTeam< Kokkos::Serial >( 10000 );
}
TEST_F( serial , view_remap )
{
enum { N0 = 3 , N1 = 2 , N2 = 8 , N3 = 9 };
typedef Kokkos::View< double*[N1][N2][N3] ,
Kokkos::LayoutRight ,
Kokkos::Serial > output_type ;
typedef Kokkos::View< int**[N2][N3] ,
Kokkos::LayoutLeft ,
Kokkos::Serial > input_type ;
typedef Kokkos::View< int*[N0][N2][N3] ,
Kokkos::LayoutLeft ,
Kokkos::Serial > diff_type ;
output_type output( "output" , N0 );
input_type input ( "input" , N0 , N1 );
diff_type diff ( "diff" , N0 );
int value = 0 ;
for ( size_t i3 = 0 ; i3 < N3 ; ++i3 ) {
for ( size_t i2 = 0 ; i2 < N2 ; ++i2 ) {
for ( size_t i1 = 0 ; i1 < N1 ; ++i1 ) {
for ( size_t i0 = 0 ; i0 < N0 ; ++i0 ) {
input(i0,i1,i2,i3) = ++value ;
}}}}
// Kokkos::deep_copy( diff , input ); // throw with incompatible shape
Kokkos::deep_copy( output , input );
value = 0 ;
for ( size_t i3 = 0 ; i3 < N3 ; ++i3 ) {
for ( size_t i2 = 0 ; i2 < N2 ; ++i2 ) {
for ( size_t i1 = 0 ; i1 < N1 ; ++i1 ) {
for ( size_t i0 = 0 ; i0 < N0 ; ++i0 ) {
++value ;
ASSERT_EQ( value , ((int) output(i0,i1,i2,i3) ) );
}}}}
}
//----------------------------------------------------------------------------
TEST_F( serial , view_aggregate )
{
TestViewAggregate< Kokkos::Serial >();
TestViewAggregateReduction< Kokkos::Serial >();
}
//----------------------------------------------------------------------------
TEST_F( serial , atomics )
{
const int loop_count = 1e6 ;
ASSERT_TRUE( ( TestAtomic::Loop<int,Kokkos::Serial>(loop_count,1) ) );
ASSERT_TRUE( ( TestAtomic::Loop<int,Kokkos::Serial>(loop_count,2) ) );
ASSERT_TRUE( ( TestAtomic::Loop<int,Kokkos::Serial>(loop_count,3) ) );
ASSERT_TRUE( ( TestAtomic::Loop<unsigned int,Kokkos::Serial>(loop_count,1) ) );
ASSERT_TRUE( ( TestAtomic::Loop<unsigned int,Kokkos::Serial>(loop_count,2) ) );
ASSERT_TRUE( ( TestAtomic::Loop<unsigned int,Kokkos::Serial>(loop_count,3) ) );
ASSERT_TRUE( ( TestAtomic::Loop<long int,Kokkos::Serial>(loop_count,1) ) );
ASSERT_TRUE( ( TestAtomic::Loop<long int,Kokkos::Serial>(loop_count,2) ) );
ASSERT_TRUE( ( TestAtomic::Loop<long int,Kokkos::Serial>(loop_count,3) ) );
ASSERT_TRUE( ( TestAtomic::Loop<unsigned long int,Kokkos::Serial>(loop_count,1) ) );
ASSERT_TRUE( ( TestAtomic::Loop<unsigned long int,Kokkos::Serial>(loop_count,2) ) );
ASSERT_TRUE( ( TestAtomic::Loop<unsigned long int,Kokkos::Serial>(loop_count,3) ) );
ASSERT_TRUE( ( TestAtomic::Loop<long long int,Kokkos::Serial>(loop_count,1) ) );
ASSERT_TRUE( ( TestAtomic::Loop<long long int,Kokkos::Serial>(loop_count,2) ) );
ASSERT_TRUE( ( TestAtomic::Loop<long long int,Kokkos::Serial>(loop_count,3) ) );
ASSERT_TRUE( ( TestAtomic::Loop<double,Kokkos::Serial>(loop_count,1) ) );
ASSERT_TRUE( ( TestAtomic::Loop<double,Kokkos::Serial>(loop_count,2) ) );
ASSERT_TRUE( ( TestAtomic::Loop<double,Kokkos::Serial>(loop_count,3) ) );
ASSERT_TRUE( ( TestAtomic::Loop<float,Kokkos::Serial>(100,1) ) );
ASSERT_TRUE( ( TestAtomic::Loop<float,Kokkos::Serial>(100,2) ) );
ASSERT_TRUE( ( TestAtomic::Loop<float,Kokkos::Serial>(100,3) ) );
ASSERT_TRUE( ( TestAtomic::Loop<Kokkos::complex<double> ,Kokkos::Serial>(100,1) ) );
ASSERT_TRUE( ( TestAtomic::Loop<Kokkos::complex<double> ,Kokkos::Serial>(100,2) ) );
ASSERT_TRUE( ( TestAtomic::Loop<Kokkos::complex<double> ,Kokkos::Serial>(100,3) ) );
}
//----------------------------------------------------------------------------
TEST_F( serial, tile_layout )
{
TestTile::test< Kokkos::Serial , 1 , 1 >( 1 , 1 );
TestTile::test< Kokkos::Serial , 1 , 1 >( 2 , 3 );
TestTile::test< Kokkos::Serial , 1 , 1 >( 9 , 10 );
TestTile::test< Kokkos::Serial , 2 , 2 >( 1 , 1 );
TestTile::test< Kokkos::Serial , 2 , 2 >( 2 , 3 );
TestTile::test< Kokkos::Serial , 2 , 2 >( 4 , 4 );
TestTile::test< Kokkos::Serial , 2 , 2 >( 9 , 9 );
TestTile::test< Kokkos::Serial , 2 , 4 >( 9 , 9 );
TestTile::test< Kokkos::Serial , 4 , 2 >( 9 , 9 );
TestTile::test< Kokkos::Serial , 4 , 4 >( 1 , 1 );
TestTile::test< Kokkos::Serial , 4 , 4 >( 4 , 4 );
TestTile::test< Kokkos::Serial , 4 , 4 >( 9 , 9 );
TestTile::test< Kokkos::Serial , 4 , 4 >( 9 , 11 );
TestTile::test< Kokkos::Serial , 8 , 8 >( 1 , 1 );
TestTile::test< Kokkos::Serial , 8 , 8 >( 4 , 4 );
TestTile::test< Kokkos::Serial , 8 , 8 >( 9 , 9 );
TestTile::test< Kokkos::Serial , 8 , 8 >( 9 , 11 );
}
//----------------------------------------------------------------------------
TEST_F( serial , compiler_macros )
{
ASSERT_TRUE( ( TestCompilerMacros::Test< Kokkos::Serial >() ) );
}
//----------------------------------------------------------------------------
TEST_F( serial , memory_space )
{
TestMemorySpace< Kokkos::Serial >();
}
//----------------------------------------------------------------------------
TEST_F( serial , task_policy )
{
TestTaskPolicy::test_task_dep< Kokkos::Serial >( 10 );
// TestTaskPolicy::test_norm2< Kokkos::Serial >( 1000 );
// for ( long i = 0 ; i < 30 ; ++i ) TestTaskPolicy::test_fib< Kokkos::Serial >(i);
// for ( long i = 0 ; i < 40 ; ++i ) TestTaskPolicy::test_fib2< Kokkos::Serial >(i);
for ( long i = 0 ; i < 20 ; ++i ) TestTaskPolicy::test_fib< Kokkos::Serial >(i);
for ( long i = 0 ; i < 25 ; ++i ) TestTaskPolicy::test_fib2< Kokkos::Serial >(i);
}
TEST_F( serial , task_team )
{
TestTaskPolicy::test_task_team< Kokkos::Serial >(1000);
}
//----------------------------------------------------------------------------
TEST_F( serial , template_meta_functions )
{
TestTemplateMetaFunctions<int, Kokkos::Serial >();
}
//----------------------------------------------------------------------------
#if defined( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_SERIAL )
TEST_F( serial , cxx11 )
{
if ( Kokkos::Impl::is_same< Kokkos::DefaultExecutionSpace , Kokkos::Serial >::value ) {
ASSERT_TRUE( ( TestCXX11::Test< Kokkos::Serial >(1) ) );
ASSERT_TRUE( ( TestCXX11::Test< Kokkos::Serial >(2) ) );
ASSERT_TRUE( ( TestCXX11::Test< Kokkos::Serial >(3) ) );
ASSERT_TRUE( ( TestCXX11::Test< Kokkos::Serial >(4) ) );
}
}
#endif
TEST_F( serial , reduction_deduction )
{
TestCXX11::test_reduction_deduction< Kokkos::Serial >();
}
TEST_F( serial , team_vector )
{
ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Serial >(0) ) );
ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Serial >(1) ) );
ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Serial >(2) ) );
ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Serial >(3) ) );
ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Serial >(4) ) );
ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Serial >(5) ) );
ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Serial >(6) ) );
ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Serial >(7) ) );
ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Serial >(8) ) );
ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Serial >(9) ) );
ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Serial >(10) ) );
}
} // namespace test
diff --git a/lib/kokkos/core/unit_test/TestSharedAlloc.hpp b/lib/kokkos/core/unit_test/TestSharedAlloc.hpp
index 060f5f460..ab5c240ac 100644
--- a/lib/kokkos/core/unit_test/TestSharedAlloc.hpp
+++ b/lib/kokkos/core/unit_test/TestSharedAlloc.hpp
@@ -1,204 +1,214 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <gtest/gtest.h>
#include <stdexcept>
#include <sstream>
#include <iostream>
#include <Kokkos_Core.hpp>
/*--------------------------------------------------------------------------*/
namespace Test {
struct SharedAllocDestroy {
volatile int * count ;
SharedAllocDestroy() = default ;
SharedAllocDestroy( int * arg ) : count( arg ) {}
void destroy_shared_allocation()
{
Kokkos::atomic_fetch_add( count , 1 );
}
};
template< class MemorySpace , class ExecutionSpace >
void test_shared_alloc()
{
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
typedef const Kokkos::Experimental::Impl::SharedAllocationHeader Header ;
typedef Kokkos::Experimental::Impl::SharedAllocationTracker Tracker ;
typedef Kokkos::Experimental::Impl::SharedAllocationRecord< void , void > RecordBase ;
typedef Kokkos::Experimental::Impl::SharedAllocationRecord< MemorySpace , void > RecordMemS ;
typedef Kokkos::Experimental::Impl::SharedAllocationRecord< MemorySpace , SharedAllocDestroy > RecordFull ;
static_assert( sizeof(Tracker) == sizeof(int*), "SharedAllocationTracker has wrong size!" );
MemorySpace s ;
const size_t N = 1200 ;
const size_t size = 8 ;
RecordMemS * rarray[ N ];
Header * harray[ N ];
RecordMemS ** const r = rarray ;
Header ** const h = harray ;
Kokkos::RangePolicy< ExecutionSpace > range(0,N);
//----------------------------------------
{
Kokkos::parallel_for( range , [=]( size_t i ){
char name[64] ;
sprintf(name,"test_%.2d",int(i));
r[i] = RecordMemS::allocate( s , name , size * ( i + 1 ) );
h[i] = Header::get_header( r[i]->data() );
ASSERT_EQ( r[i]->use_count() , 0 );
for ( size_t j = 0 ; j < ( i / 10 ) + 1 ; ++j ) RecordBase::increment( r[i] );
ASSERT_EQ( r[i]->use_count() , ( i / 10 ) + 1 );
ASSERT_EQ( r[i] , RecordMemS::get_record( r[i]->data() ) );
});
// Sanity check for the whole set of allocation records to which this record belongs.
RecordBase::is_sane( r[0] );
// RecordMemS::print_records( std::cout , s , true );
Kokkos::parallel_for( range , [=]( size_t i ){
while ( 0 != ( r[i] = static_cast< RecordMemS *>( RecordBase::decrement( r[i] ) ) ) ) {
if ( r[i]->use_count() == 1 ) RecordBase::is_sane( r[i] );
}
});
}
//----------------------------------------
{
int destroy_count = 0 ;
SharedAllocDestroy counter( & destroy_count );
Kokkos::parallel_for( range , [=]( size_t i ){
char name[64] ;
sprintf(name,"test_%.2d",int(i));
RecordFull * rec = RecordFull::allocate( s , name , size * ( i + 1 ) );
rec->m_destroy = counter ;
r[i] = rec ;
h[i] = Header::get_header( r[i]->data() );
ASSERT_EQ( r[i]->use_count() , 0 );
for ( size_t j = 0 ; j < ( i / 10 ) + 1 ; ++j ) RecordBase::increment( r[i] );
ASSERT_EQ( r[i]->use_count() , ( i / 10 ) + 1 );
ASSERT_EQ( r[i] , RecordMemS::get_record( r[i]->data() ) );
});
RecordBase::is_sane( r[0] );
Kokkos::parallel_for( range , [=]( size_t i ){
while ( 0 != ( r[i] = static_cast< RecordMemS *>( RecordBase::decrement( r[i] ) ) ) ) {
if ( r[i]->use_count() == 1 ) RecordBase::is_sane( r[i] );
}
});
ASSERT_EQ( destroy_count , int(N) );
}
//----------------------------------------
{
int destroy_count = 0 ;
{
RecordFull * rec = RecordFull::allocate( s , "test" , size );
// ... Construction of the allocated { rec->data() , rec->size() }
// Copy destruction function object into the allocation record
rec->m_destroy = SharedAllocDestroy( & destroy_count );
+ ASSERT_EQ( rec->use_count() , 0 );
+
// Start tracking, increments the use count from 0 to 1
- Tracker track( rec );
+ Tracker track ;
+
+ track.assign_allocated_record_to_uninitialized( rec );
ASSERT_EQ( rec->use_count() , 1 );
+ ASSERT_EQ( track.use_count() , 1 );
// Verify construction / destruction increment
for ( size_t i = 0 ; i < N ; ++i ) {
ASSERT_EQ( rec->use_count() , 1 );
{
- Tracker local_tracker( rec );
+ Tracker local_tracker ;
+ local_tracker.assign_allocated_record_to_uninitialized( rec );
ASSERT_EQ( rec->use_count() , 2 );
+ ASSERT_EQ( local_tracker.use_count() , 2 );
}
ASSERT_EQ( rec->use_count() , 1 );
+ ASSERT_EQ( track.use_count() , 1 );
}
Kokkos::parallel_for( range , [=]( size_t i ){
- Tracker local_tracker( rec );
+ Tracker local_tracker ;
+ local_tracker.assign_allocated_record_to_uninitialized( rec );
ASSERT_GT( rec->use_count() , 1 );
});
ASSERT_EQ( rec->use_count() , 1 );
+ ASSERT_EQ( track.use_count() , 1 );
// Destruction of 'track' object deallocates the 'rec' and invokes the destroy function object.
}
ASSERT_EQ( destroy_count , 1 );
}
#endif /* #if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) */
}
}
diff --git a/lib/kokkos/core/unit_test/TestSynchronic.cpp b/lib/kokkos/core/unit_test/TestSynchronic.cpp
new file mode 100644
index 000000000..9121dc15a
--- /dev/null
+++ b/lib/kokkos/core/unit_test/TestSynchronic.cpp
@@ -0,0 +1,448 @@
+/*
+
+Copyright (c) 2014, NVIDIA Corporation
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+
+1. Redistributions of source code must retain the above copyright notice, this
+list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright notice,
+this list of conditions and the following disclaimer in the documentation
+and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+OF THE POSSIBILITY OF SUCH DAMAGE.
+
+*/
+
+//#undef _WIN32_WINNT
+//#define _WIN32_WINNT 0x0602
+
+#if defined(__powerpc__) || defined(__ppc__) || defined(__PPC__) || defined(__APPLE__)
+
+// Skip for now
+
+#else
+
+#include <gtest/gtest.h>
+
+#ifdef USEOMP
+#include <omp.h>
+#endif
+
+#include <iostream>
+#include <sstream>
+#include <algorithm>
+#include <string>
+#include <vector>
+#include <map>
+#include <cstring>
+#include <ctime>
+
+//#include <details/config>
+//#undef __SYNCHRONIC_COMPATIBLE
+
+#include <impl/Kokkos_Synchronic.hpp>
+#include <impl/Kokkos_Synchronic_n3998.hpp>
+
+#include "TestSynchronic.hpp"
+
+// Uncomment to allow test to dump output
+//#define VERBOSE_TEST
+
+namespace Test {
+
+unsigned next_table[] =
+ {
+ 0, 1, 2, 3, //0-3
+ 4, 4, 6, 6, //4-7
+ 8, 8, 8, 8, //8-11
+ 12, 12, 12, 12, //12-15
+ 16, 16, 16, 16, //16-19
+ 16, 16, 16, 16, //20-23
+ 24, 24, 24, 24, //24-27
+ 24, 24, 24, 24, //28-31
+ 32, 32, 32, 32, //32-35
+ 32, 32, 32, 32, //36-39
+ 40, 40, 40, 40, //40-43
+ 40, 40, 40, 40, //44-47
+ 48, 48, 48, 48, //48-51
+ 48, 48, 48, 48, //52-55
+ 56, 56, 56, 56, //56-59
+ 56, 56, 56, 56, //60-63
+ };
+
+//change this if you want to allow oversubscription of the system, by default only the range {1-(system size)} is tested
+#define FOR_GAUNTLET(x) for(unsigned x = (std::min)(std::thread::hardware_concurrency()*8,unsigned(sizeof(next_table)/sizeof(unsigned))); x; x = next_table[x-1])
+
+//set this to override the benchmark of barriers to use OMP barriers instead of n3998 std::barrier
+//#define USEOMP
+
+#if defined(__SYNCHRONIC_COMPATIBLE)
+ #define PREFIX "futex-"
+#else
+ #define PREFIX "backoff-"
+#endif
+
+//this test uses a custom Mersenne twister to eliminate implementation variation
+MersenneTwister mt;
+
+int dummya = 1, dummyb =1;
+
+int dummy1 = 1;
+std::atomic<int> dummy2(1);
+std::atomic<int> dummy3(1);
+
+double time_item(int const count = (int)1E8) {
+
+ clock_t const start = clock();
+
+ for(int i = 0;i < count; ++i)
+ mt.integer();
+
+ clock_t const end = clock();
+ double elapsed_seconds = (end - start) / double(CLOCKS_PER_SEC);
+
+ return elapsed_seconds / count;
+}
+double time_nil(int const count = (int)1E08) {
+
+ clock_t const start = clock();
+
+ dummy3 = count;
+ for(int i = 0;i < (int)1E6; ++i) {
+ if(dummy1) {
+ // Do some work while holding the lock
+ int workunits = dummy3;//(int) (mtc.poissonInterval((float)num_items_critical) + 0.5f);
+ for (int j = 1; j < workunits; j++)
+ dummy1 &= j; // Do one work unit
+ dummy2.fetch_add(dummy1,std::memory_order_relaxed);
+ }
+ }
+
+ clock_t const end = clock();
+ double elapsed_seconds = (end - start) / double(CLOCKS_PER_SEC);
+
+ return elapsed_seconds / count;
+}
+
+
+template <class mutex_type>
+void testmutex_inner(mutex_type& m, std::atomic<int>& t,std::atomic<int>& wc,std::atomic<int>& wnc, int const num_iterations,
+ int const num_items_critical, int const num_items_noncritical, MersenneTwister& mtc, MersenneTwister& mtnc, bool skip) {
+
+ for(int k = 0; k < num_iterations; ++k) {
+
+ if(num_items_noncritical) {
+ // Do some work without holding the lock
+ int workunits = num_items_noncritical;//(int) (mtnc.poissonInterval((float)num_items_noncritical) + 0.5f);
+ for (int i = 1; i < workunits; i++)
+ mtnc.integer(); // Do one work unit
+ wnc.fetch_add(workunits,std::memory_order_relaxed);
+ }
+
+ t.fetch_add(1,std::memory_order_relaxed);
+
+ if(!skip) {
+ std::unique_lock<mutex_type> l(m);
+ if(num_items_critical) {
+ // Do some work while holding the lock
+ int workunits = num_items_critical;//(int) (mtc.poissonInterval((float)num_items_critical) + 0.5f);
+ for (int i = 1; i < workunits; i++)
+ mtc.integer(); // Do one work unit
+ wc.fetch_add(workunits,std::memory_order_relaxed);
+ }
+ }
+ }
+}
+template <class mutex_type>
+void testmutex_outer(std::map<std::string,std::vector<double>>& results, std::string const& name, double critical_fraction, double critical_duration) {
+
+ std::ostringstream truename;
+ truename << name << " (f=" << critical_fraction << ",d=" << critical_duration << ")";
+
+ std::vector<double>& data = results[truename.str()];
+
+ double const workItemTime = time_item() ,
+ nilTime = time_nil();
+
+ int const num_items_critical = (critical_duration <= 0 ? 0 : (std::max)( int(critical_duration / workItemTime + 0.5), int(100 * nilTime / workItemTime + 0.5))),
+ num_items_noncritical = (num_items_critical <= 0 ? 0 : int( ( 1 - critical_fraction ) * num_items_critical / critical_fraction + 0.5 ));
+
+ FOR_GAUNTLET(num_threads) {
+
+ //Kokkos::Impl::portable_sleep(std::chrono::microseconds(2000000));
+
+ int const num_iterations = (num_items_critical + num_items_noncritical != 0) ?
+#ifdef __SYNCHRONIC_JUST_YIELD
+ int( 1 / ( 8 * workItemTime ) / (num_items_critical + num_items_noncritical) / num_threads + 0.5 ) :
+#else
+ int( 1 / ( 8 * workItemTime ) / (num_items_critical + num_items_noncritical) / num_threads + 0.5 ) :
+#endif
+#ifdef WIN32
+ int( 1 / workItemTime / (20 * num_threads * num_threads) );
+#else
+ int( 1 / workItemTime / (200 * num_threads * num_threads) );
+#endif
+
+#ifdef VERBOSE_TEST
+ std::cerr << "running " << truename.str() << " #" << num_threads << ", " << num_iterations << " * " << num_items_noncritical << "\n" << std::flush;
+#endif
+
+
+ std::atomic<int> t[2], wc[2], wnc[2];
+
+ clock_t start[2], end[2];
+ for(int pass = 0; pass < 2; ++pass) {
+
+ t[pass] = 0;
+ wc[pass] = 0;
+ wnc[pass] = 0;
+
+ srand(num_threads);
+ std::vector<MersenneTwister> randomsnc(num_threads),
+ randomsc(num_threads);
+
+ mutex_type m;
+
+ start[pass] = clock();
+#ifdef USEOMP
+ omp_set_num_threads(num_threads);
+ std::atomic<int> _j(0);
+ #pragma omp parallel
+ {
+ int const j = _j.fetch_add(1,std::memory_order_relaxed);
+ testmutex_inner(m, t[pass], wc[pass], wnc[pass], num_iterations, num_items_critical, num_items_noncritical, randomsc[j], randomsnc[j], pass==0);
+ num_threads = omp_get_num_threads();
+ }
+#else
+ std::vector<std::thread*> threads(num_threads);
+ for(unsigned j = 0; j < num_threads; ++j)
+ threads[j] = new std::thread([&,j](){
+ testmutex_inner(m, t[pass], wc[pass], wnc[pass], num_iterations, num_items_critical, num_items_noncritical, randomsc[j], randomsnc[j], pass==0);
+ }
+ );
+ for(unsigned j = 0; j < num_threads; ++j) {
+ threads[j]->join();
+ delete threads[j];
+ }
+#endif
+ end[pass] = clock();
+ }
+ if(t[0] != t[1]) throw std::string("mismatched iteration counts");
+ if(wnc[0] != wnc[1]) throw std::string("mismatched work item counts");
+
+ double elapsed_seconds_0 = (end[0] - start[0]) / double(CLOCKS_PER_SEC),
+ elapsed_seconds_1 = (end[1] - start[1]) / double(CLOCKS_PER_SEC);
+ double time = (elapsed_seconds_1 - elapsed_seconds_0 - wc[1]*workItemTime) / num_iterations;
+
+ data.push_back(time);
+#ifdef VERBOSE_TEST
+ std::cerr << truename.str() << " : " << num_threads << "," << elapsed_seconds_1 / num_iterations << " - " << elapsed_seconds_0 / num_iterations << " - " << wc[1]*workItemTime/num_iterations << " = " << time << " \n";
+#endif
+ }
+}
+
+template <class barrier_type>
+void testbarrier_inner(barrier_type& b, int const num_threads, int const j, std::atomic<int>& t,std::atomic<int>& w,
+ int const num_iterations_odd, int const num_iterations_even,
+ int const num_items_noncritical, MersenneTwister& arg_mt, bool skip) {
+
+ for(int k = 0; k < (std::max)(num_iterations_even,num_iterations_odd); ++k) {
+
+ if(k >= (~j & 0x1 ? num_iterations_odd : num_iterations_even )) {
+ if(!skip)
+ b.arrive_and_drop();
+ break;
+ }
+
+ if(num_items_noncritical) {
+ // Do some work without holding the lock
+ int workunits = (int) (arg_mt.poissonInterval((float)num_items_noncritical) + 0.5f);
+ for (int i = 1; i < workunits; i++)
+ arg_mt.integer(); // Do one work unit
+ w.fetch_add(workunits,std::memory_order_relaxed);
+ }
+
+ t.fetch_add(1,std::memory_order_relaxed);
+
+ if(!skip) {
+ int const thiscount = (std::min)(k+1,num_iterations_odd)*((num_threads>>1)+(num_threads&1)) + (std::min)(k+1,num_iterations_even)*(num_threads>>1);
+ if(t.load(std::memory_order_relaxed) > thiscount) {
+ std::cerr << "FAILURE: some threads have run ahead of the barrier (" << t.load(std::memory_order_relaxed) << ">" << thiscount << ").\n";
+ EXPECT_TRUE(false);
+ }
+#ifdef USEOMP
+ #pragma omp barrier
+#else
+ b.arrive_and_wait();
+#endif
+ if(t.load(std::memory_order_relaxed) < thiscount) {
+ std::cerr << "FAILURE: some threads have fallen behind the barrier (" << t.load(std::memory_order_relaxed) << "<" << thiscount << ").\n";
+ EXPECT_TRUE(false);
+ }
+ }
+ }
+}
+template <class barrier_type>
+void testbarrier_outer(std::map<std::string,std::vector<double>>& results, std::string const& name, double barrier_frequency, double phase_duration, bool randomIterations = false) {
+
+ std::vector<double>& data = results[name];
+
+ double const workItemTime = time_item();
+ int const num_items_noncritical = int( phase_duration / workItemTime + 0.5 );
+
+ FOR_GAUNTLET(num_threads) {
+
+ int const num_iterations = int( barrier_frequency );
+#ifdef VERBOSE_TEST
+ std::cerr << "running " << name << " #" << num_threads << ", " << num_iterations << " * " << num_items_noncritical << "\r" << std::flush;
+#endif
+
+ srand(num_threads);
+
+ MersenneTwister local_mt;
+ int const num_iterations_odd = randomIterations ? int(local_mt.poissonInterval((float)num_iterations)+0.5f) : num_iterations,
+ num_iterations_even = randomIterations ? int(local_mt.poissonInterval((float)num_iterations)+0.5f) : num_iterations;
+
+ std::atomic<int> t[2], w[2];
+ std::chrono::time_point<std::chrono::high_resolution_clock> start[2], end[2];
+ for(int pass = 0; pass < 2; ++pass) {
+
+ t[pass] = 0;
+ w[pass] = 0;
+
+ srand(num_threads);
+ std::vector<MersenneTwister> randoms(num_threads);
+
+ barrier_type b(num_threads);
+
+ start[pass] = std::chrono::high_resolution_clock::now();
+#ifdef USEOMP
+ omp_set_num_threads(num_threads);
+ std::atomic<int> _j(0);
+ #pragma omp parallel
+ {
+ int const j = _j.fetch_add(1,std::memory_order_relaxed);
+ testbarrier_inner(b, num_threads, j, t[pass], w[pass], num_iterations_odd, num_iterations_even, num_items_noncritical, randoms[j], pass==0);
+ num_threads = omp_get_num_threads();
+ }
+#else
+ std::vector<std::thread*> threads(num_threads);
+ for(unsigned j = 0; j < num_threads; ++j)
+ threads[j] = new std::thread([&,j](){
+ testbarrier_inner(b, num_threads, j, t[pass], w[pass], num_iterations_odd, num_iterations_even, num_items_noncritical, randoms[j], pass==0);
+ });
+ for(unsigned j = 0; j < num_threads; ++j) {
+ threads[j]->join();
+ delete threads[j];
+ }
+#endif
+ end[pass] = std::chrono::high_resolution_clock::now();
+ }
+
+ if(t[0] != t[1]) throw std::string("mismatched iteration counts");
+ if(w[0] != w[1]) throw std::string("mismatched work item counts");
+
+ int const phases = (std::max)(num_iterations_odd, num_iterations_even);
+
+ std::chrono::duration<double> elapsed_seconds_0 = end[0]-start[0],
+ elapsed_seconds_1 = end[1]-start[1];
+ double const time = (elapsed_seconds_1.count() - elapsed_seconds_0.count()) / phases;
+
+ data.push_back(time);
+#ifdef VERBOSE_TEST
+ std::cerr << name << " : " << num_threads << "," << elapsed_seconds_1.count() / phases << " - " << elapsed_seconds_0.count() / phases << " = " << time << " \n";
+#endif
+ }
+}
+
+template <class... T>
+struct mutex_tester;
+template <class F>
+struct mutex_tester<F> {
+ static void run(std::map<std::string,std::vector<double>>& results, std::string const name[], double critical_fraction, double critical_duration) {
+ testmutex_outer<F>(results, *name, critical_fraction, critical_duration);
+ }
+};
+template <class F, class... T>
+struct mutex_tester<F,T...> {
+ static void run(std::map<std::string,std::vector<double>>& results, std::string const name[], double critical_fraction, double critical_duration) {
+ mutex_tester<F>::run(results, name, critical_fraction, critical_duration);
+ mutex_tester<T...>::run(results, ++name, critical_fraction, critical_duration);
+ }
+};
+
+TEST( synchronic, main )
+{
+ //warm up
+ time_item();
+
+ //measure up
+#ifdef VERBOSE_TEST
+ std::cerr << "measuring work item speed...\r";
+ std::cerr << "work item speed is " << time_item() << " per item, nil is " << time_nil() << "\n";
+#endif
+ try {
+
+ std::pair<double,double> testpoints[] = { {1, 0}, /*{1E-1, 10E-3}, {5E-1, 2E-6}, {3E-1, 50E-9},*/ };
+ for(auto x : testpoints ) {
+
+ std::map<std::string,std::vector<double>> results;
+
+ //testbarrier_outer<std::barrier>(results, PREFIX"bar 1khz 100us", 1E3, x.second);
+
+ std::string const names[] = {
+ PREFIX"tkt", PREFIX"mcs", PREFIX"ttas", PREFIX"std"
+#ifdef WIN32
+ ,PREFIX"srw"
+#endif
+ };
+
+ //run -->
+
+ mutex_tester<
+ ticket_mutex, mcs_mutex, ttas_mutex, std::mutex
+#ifdef WIN32
+ ,srw_mutex
+#endif
+ >::run(results, names, x.first, x.second);
+
+ //<-- run
+
+#ifdef VERBOSE_TEST
+ std::cout << "threads";
+ for(auto & i : results)
+ std::cout << ",\"" << i.first << '\"';
+ std::cout << std::endl;
+ int j = 0;
+ FOR_GAUNTLET(num_threads) {
+ std::cout << num_threads;
+ for(auto & i : results)
+ std::cout << ',' << i.second[j];
+ std::cout << std::endl;
+ ++j;
+ }
+#endif
+ }
+ }
+ catch(std::string & e) {
+ std::cerr << "EXCEPTION : " << e << std::endl;
+ EXPECT_TRUE( false );
+ }
+}
+
+} // namespace Test
+
+#endif
diff --git a/lib/kokkos/core/unit_test/TestSynchronic.hpp b/lib/kokkos/core/unit_test/TestSynchronic.hpp
new file mode 100644
index 000000000..d820129e8
--- /dev/null
+++ b/lib/kokkos/core/unit_test/TestSynchronic.hpp
@@ -0,0 +1,240 @@
+/*
+
+Copyright (c) 2014, NVIDIA Corporation
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+
+1. Redistributions of source code must retain the above copyright notice, this
+list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright notice,
+this list of conditions and the following disclaimer in the documentation
+and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+OF THE POSSIBILITY OF SUCH DAMAGE.
+
+*/
+
+#ifndef TEST_SYNCHRONIC_HPP
+#define TEST_SYNCHRONIC_HPP
+
+#include <impl/Kokkos_Synchronic.hpp>
+#include <mutex>
+
+namespace Test {
+
+template <bool truly>
+struct dumb_mutex {
+
+ dumb_mutex () : locked(0) {
+ }
+
+ void lock() {
+ while(1) {
+ bool state = false;
+ if (locked.compare_exchange_weak(state,true,std::memory_order_acquire)) {
+ break;
+ }
+ while (locked.load(std::memory_order_relaxed)) {
+ if (!truly) {
+ Kokkos::Impl::portable_yield();
+ }
+ }
+ }
+ }
+
+ void unlock() {
+ locked.store(false,std::memory_order_release);
+ }
+
+private :
+ std::atomic<bool> locked;
+};
+
+#ifdef WIN32
+#include <winsock2.h>
+#include <windows.h>
+#include <synchapi.h>
+struct srw_mutex {
+
+ srw_mutex () {
+ InitializeSRWLock(&_lock);
+ }
+
+ void lock() {
+ AcquireSRWLockExclusive(&_lock);
+ }
+ void unlock() {
+ ReleaseSRWLockExclusive(&_lock);
+ }
+
+private :
+ SRWLOCK _lock;
+};
+#endif
+
+struct ttas_mutex {
+
+ ttas_mutex() : locked(false) {
+ }
+
+ ttas_mutex(const ttas_mutex&) = delete;
+ ttas_mutex& operator=(const ttas_mutex&) = delete;
+
+ void lock() {
+ for(int i = 0;; ++i) {
+ bool state = false;
+ if(locked.compare_exchange_weak(state,true,std::memory_order_relaxed,Kokkos::Impl::notify_none))
+ break;
+ locked.expect_update(true);
+ }
+ std::atomic_thread_fence(std::memory_order_acquire);
+ }
+ void unlock() {
+ locked.store(false,std::memory_order_release);
+ }
+
+private :
+ Kokkos::Impl::synchronic<bool> locked;
+};
+
+struct ticket_mutex {
+
+ ticket_mutex() : active(0), queue(0) {
+ }
+
+ ticket_mutex(const ticket_mutex&) = delete;
+ ticket_mutex& operator=(const ticket_mutex&) = delete;
+
+ void lock() {
+ int const me = queue.fetch_add(1, std::memory_order_relaxed);
+ while(me != active.load_when_equal(me, std::memory_order_acquire))
+ ;
+ }
+
+ void unlock() {
+ active.fetch_add(1,std::memory_order_release);
+ }
+private :
+ Kokkos::Impl::synchronic<int> active;
+ std::atomic<int> queue;
+};
+
+struct mcs_mutex {
+
+ mcs_mutex() : head(nullptr) {
+ }
+
+ mcs_mutex(const mcs_mutex&) = delete;
+ mcs_mutex& operator=(const mcs_mutex&) = delete;
+
+ struct unique_lock {
+
+ unique_lock(mcs_mutex & arg_m) : m(arg_m), next(nullptr), ready(false) {
+
+ unique_lock * const h = m.head.exchange(this,std::memory_order_acquire);
+ if(__builtin_expect(h != nullptr,0)) {
+ h->next.store(this,std::memory_order_seq_cst,Kokkos::Impl::notify_one);
+ while(!ready.load_when_not_equal(false,std::memory_order_acquire))
+ ;
+ }
+ }
+
+ unique_lock(const unique_lock&) = delete;
+ unique_lock& operator=(const unique_lock&) = delete;
+
+ ~unique_lock() {
+ unique_lock * h = this;
+ if(__builtin_expect(!m.head.compare_exchange_strong(h,nullptr,std::memory_order_release, std::memory_order_relaxed),0)) {
+ unique_lock * n = next.load(std::memory_order_relaxed);
+ while(!n)
+ n = next.load_when_not_equal(n,std::memory_order_relaxed);
+ n->ready.store(true,std::memory_order_release,Kokkos::Impl::notify_one);
+ }
+ }
+
+ private:
+ mcs_mutex & m;
+ Kokkos::Impl::synchronic<unique_lock*> next;
+ Kokkos::Impl::synchronic<bool> ready;
+ };
+
+private :
+ std::atomic<unique_lock*> head;
+};
+
+}
+
+namespace std {
+template<>
+struct unique_lock<Test::mcs_mutex> : Test::mcs_mutex::unique_lock {
+ unique_lock(Test::mcs_mutex & arg_m) : Test::mcs_mutex::unique_lock(arg_m) {
+ }
+ unique_lock(const unique_lock&) = delete;
+ unique_lock& operator=(const unique_lock&) = delete;
+};
+
+}
+
+/* #include <cmath> */
+#include <stdlib.h>
+
+namespace Test {
+
+//-------------------------------------
+// MersenneTwister
+//-------------------------------------
+#define MT_IA 397
+#define MT_LEN 624
+
+class MersenneTwister
+{
+ volatile unsigned long m_buffer[MT_LEN][64/sizeof(unsigned long)];
+ volatile int m_index;
+
+public:
+ MersenneTwister() {
+ for (int i = 0; i < MT_LEN; i++)
+ m_buffer[i][0] = rand();
+ m_index = 0;
+ for (int i = 0; i < MT_LEN * 100; i++)
+ integer();
+ }
+ unsigned long integer() {
+ // Indices
+ int i = m_index;
+ int i2 = m_index + 1; if (i2 >= MT_LEN) i2 = 0; // wrap-around
+ int j = m_index + MT_IA; if (j >= MT_LEN) j -= MT_LEN; // wrap-around
+
+ // Twist
+ unsigned long s = (m_buffer[i][0] & 0x80000000) | (m_buffer[i2][0] & 0x7fffffff);
+ unsigned long r = m_buffer[j][0] ^ (s >> 1) ^ ((s & 1) * 0x9908B0DF);
+ m_buffer[m_index][0] = r;
+ m_index = i2;
+
+ // Swizzle
+ r ^= (r >> 11);
+ r ^= (r << 7) & 0x9d2c5680UL;
+ r ^= (r << 15) & 0xefc60000UL;
+ r ^= (r >> 18);
+ return r;
+ }
+ float poissonInterval(float ooLambda) {
+ return -logf(1.0f - integer() * 2.3283e-10f) * ooLambda;
+ }
+};
+
+} // namespace Test
+
+#endif //TEST_HPP
diff --git a/lib/kokkos/core/unit_test/TestTeam.hpp b/lib/kokkos/core/unit_test/TestTeam.hpp
index 4849f18df..fb001917a 100644
--- a/lib/kokkos/core/unit_test/TestTeam.hpp
+++ b/lib/kokkos/core/unit_test/TestTeam.hpp
@@ -1,466 +1,531 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <stdio.h>
#include <stdexcept>
#include <sstream>
#include <iostream>
#include <Kokkos_Core.hpp>
/*--------------------------------------------------------------------------*/
namespace Test {
namespace {
template< class ExecSpace >
struct TestTeamPolicy {
typedef typename Kokkos::TeamPolicy< ExecSpace >::member_type team_member ;
typedef Kokkos::View<int**,ExecSpace> view_type ;
view_type m_flags ;
TestTeamPolicy( const size_t league_size )
: m_flags( Kokkos::ViewAllocateWithoutInitializing("flags")
, Kokkos::TeamPolicy< ExecSpace >::team_size_max( *this )
, league_size )
{}
struct VerifyInitTag {};
KOKKOS_INLINE_FUNCTION
void operator()( const team_member & member ) const
{
const int tid = member.team_rank() + member.team_size() * member.league_rank();
m_flags( member.team_rank() , member.league_rank() ) = tid ;
}
KOKKOS_INLINE_FUNCTION
void operator()( const VerifyInitTag & , const team_member & member ) const
{
const int tid = member.team_rank() + member.team_size() * member.league_rank();
if ( tid != m_flags( member.team_rank() , member.league_rank() ) ) {
printf("TestTeamPolicy member(%d,%d) error %d != %d\n"
, member.league_rank() , member.team_rank()
, tid , m_flags( member.team_rank() , member.league_rank() ) );
}
}
static void test_for( const size_t league_size )
{
TestTeamPolicy functor( league_size );
const int team_size = Kokkos::TeamPolicy< ExecSpace >::team_size_max( functor );
Kokkos::parallel_for( Kokkos::TeamPolicy< ExecSpace >( league_size , team_size ) , functor );
Kokkos::parallel_for( Kokkos::TeamPolicy< ExecSpace , VerifyInitTag >( league_size , team_size ) , functor );
}
struct ReduceTag {};
typedef long value_type ;
KOKKOS_INLINE_FUNCTION
void operator()( const team_member & member , value_type & update ) const
{
update += member.team_rank() + member.team_size() * member.league_rank();
}
KOKKOS_INLINE_FUNCTION
void operator()( const ReduceTag & , const team_member & member , value_type & update ) const
{
update += 1 + member.team_rank() + member.team_size() * member.league_rank();
}
static void test_reduce( const size_t league_size )
{
TestTeamPolicy functor( league_size );
const int team_size = Kokkos::TeamPolicy< ExecSpace >::team_size_max( functor );
const long N = team_size * league_size ;
long total = 0 ;
Kokkos::parallel_reduce( Kokkos::TeamPolicy< ExecSpace >( league_size , team_size ) , functor , total );
ASSERT_EQ( size_t((N-1)*(N))/2 , size_t(total) );
Kokkos::parallel_reduce( Kokkos::TeamPolicy< ExecSpace , ReduceTag >( league_size , team_size ) , functor , total );
ASSERT_EQ( (size_t(N)*size_t(N+1))/2 , size_t(total) );
}
};
}
}
/*--------------------------------------------------------------------------*/
namespace Test {
template< typename ScalarType , class DeviceType >
class ReduceTeamFunctor
{
public:
typedef DeviceType execution_space ;
typedef Kokkos::TeamPolicy< execution_space > policy_type ;
typedef typename execution_space::size_type size_type ;
struct value_type {
ScalarType value[3] ;
};
const size_type nwork ;
ReduceTeamFunctor( const size_type & arg_nwork ) : nwork( arg_nwork ) {}
ReduceTeamFunctor( const ReduceTeamFunctor & rhs )
: nwork( rhs.nwork ) {}
KOKKOS_INLINE_FUNCTION
void init( value_type & dst ) const
{
dst.value[0] = 0 ;
dst.value[1] = 0 ;
dst.value[2] = 0 ;
}
KOKKOS_INLINE_FUNCTION
void join( volatile value_type & dst ,
const volatile value_type & src ) const
{
dst.value[0] += src.value[0] ;
dst.value[1] += src.value[1] ;
dst.value[2] += src.value[2] ;
}
KOKKOS_INLINE_FUNCTION
void operator()( const typename policy_type::member_type ind , value_type & dst ) const
{
const int thread_rank = ind.team_rank() + ind.team_size() * ind.league_rank();
const int thread_size = ind.team_size() * ind.league_size();
const int chunk = ( nwork + thread_size - 1 ) / thread_size ;
size_type iwork = chunk * thread_rank ;
const size_type iwork_end = iwork + chunk < nwork ? iwork + chunk : nwork ;
for ( ; iwork < iwork_end ; ++iwork ) {
dst.value[0] += 1 ;
dst.value[1] += iwork + 1 ;
dst.value[2] += nwork - iwork ;
}
}
};
} // namespace Test
namespace {
template< typename ScalarType , class DeviceType >
class TestReduceTeam
{
public:
typedef DeviceType execution_space ;
typedef Kokkos::TeamPolicy< execution_space > policy_type ;
typedef typename execution_space::size_type size_type ;
//------------------------------------
TestReduceTeam( const size_type & nwork )
{
run_test(nwork);
}
void run_test( const size_type & nwork )
{
typedef Test::ReduceTeamFunctor< ScalarType , execution_space > functor_type ;
typedef typename functor_type::value_type value_type ;
typedef Kokkos::View< value_type, Kokkos::HostSpace, Kokkos::MemoryUnmanaged > result_type ;
enum { Count = 3 };
enum { Repeat = 100 };
value_type result[ Repeat ];
const unsigned long nw = nwork ;
const unsigned long nsum = nw % 2 ? nw * (( nw + 1 )/2 )
: (nw/2) * ( nw + 1 );
const unsigned team_size = policy_type::team_size_recommended( functor_type(nwork) );
const unsigned league_size = ( nwork + team_size - 1 ) / team_size ;
policy_type team_exec( league_size , team_size );
for ( unsigned i = 0 ; i < Repeat ; ++i ) {
result_type tmp( & result[i] );
Kokkos::parallel_reduce( team_exec , functor_type(nwork) , tmp );
}
execution_space::fence();
for ( unsigned i = 0 ; i < Repeat ; ++i ) {
for ( unsigned j = 0 ; j < Count ; ++j ) {
const unsigned long correct = 0 == j % 3 ? nw : nsum ;
ASSERT_EQ( (ScalarType) correct , result[i].value[j] );
}
}
}
};
}
/*--------------------------------------------------------------------------*/
namespace Test {
template< class DeviceType >
class ScanTeamFunctor
{
public:
typedef DeviceType execution_space ;
typedef Kokkos::TeamPolicy< execution_space > policy_type ;
typedef long int value_type ;
Kokkos::View< value_type , execution_space > accum ;
Kokkos::View< value_type , execution_space > total ;
ScanTeamFunctor() : accum("accum"), total("total") {}
KOKKOS_INLINE_FUNCTION
void init( value_type & error ) const { error = 0 ; }
KOKKOS_INLINE_FUNCTION
void join( value_type volatile & error ,
value_type volatile const & input ) const
{ if ( input ) error = 1 ; }
struct JoinMax {
typedef long int value_type ;
KOKKOS_INLINE_FUNCTION
void join( value_type volatile & dst
, value_type volatile const & input ) const
{ if ( dst < input ) dst = input ; }
};
KOKKOS_INLINE_FUNCTION
void operator()( const typename policy_type::member_type ind , value_type & error ) const
{
if ( 0 == ind.league_rank() && 0 == ind.team_rank() ) {
const long int thread_count = ind.league_size() * ind.team_size();
total() = ( thread_count * ( thread_count + 1 ) ) / 2 ;
}
// Team max:
const int long m = ind.team_reduce( (long int) ( ind.league_rank() + ind.team_rank() ) , JoinMax() );
if ( m != ind.league_rank() + ( ind.team_size() - 1 ) ) {
printf("ScanTeamFunctor[%d.%d of %d.%d] reduce_max_answer(%ld) != reduce_max(%ld)\n"
, ind.league_rank(), ind.team_rank()
, ind.league_size(), ind.team_size()
, (long int)(ind.league_rank() + ( ind.team_size() - 1 )) , m );
}
// Scan:
const long int answer =
( ind.league_rank() + 1 ) * ind.team_rank() +
( ind.team_rank() * ( ind.team_rank() + 1 ) ) / 2 ;
const long int result =
ind.team_scan( ind.league_rank() + 1 + ind.team_rank() + 1 );
const long int result2 =
ind.team_scan( ind.league_rank() + 1 + ind.team_rank() + 1 );
if ( answer != result || answer != result2 ) {
printf("ScanTeamFunctor[%d.%d of %d.%d] answer(%ld) != scan_first(%ld) or scan_second(%ld)\n",
ind.league_rank(), ind.team_rank(),
ind.league_size(), ind.team_size(),
answer,result,result2);
error = 1 ;
}
const long int thread_rank = ind.team_rank() +
ind.team_size() * ind.league_rank();
ind.team_scan( 1 + thread_rank , accum.ptr_on_device() );
}
};
template< class DeviceType >
class TestScanTeam
{
public:
typedef DeviceType execution_space ;
typedef long int value_type ;
typedef Kokkos::TeamPolicy< execution_space > policy_type ;
typedef Test::ScanTeamFunctor<DeviceType> functor_type ;
//------------------------------------
TestScanTeam( const size_t nteam )
{
run_test(nteam);
}
void run_test( const size_t nteam )
{
typedef Kokkos::View< long int , Kokkos::HostSpace , Kokkos::MemoryUnmanaged > result_type ;
const unsigned REPEAT = 100000 ;
const unsigned Repeat = ( REPEAT + nteam - 1 ) / nteam ;
functor_type functor ;
policy_type team_exec( nteam , policy_type::team_size_max( functor ) );
for ( unsigned i = 0 ; i < Repeat ; ++i ) {
long int accum = 0 ;
long int total = 0 ;
long int error = 0 ;
Kokkos::deep_copy( functor.accum , total );
Kokkos::parallel_reduce( team_exec , functor , result_type( & error ) );
DeviceType::fence();
Kokkos::deep_copy( accum , functor.accum );
Kokkos::deep_copy( total , functor.total );
ASSERT_EQ( error , 0 );
ASSERT_EQ( total , accum );
}
execution_space::fence();
}
};
} // namespace Test
/*--------------------------------------------------------------------------*/
namespace Test {
template< class ExecSpace >
struct SharedTeamFunctor {
typedef ExecSpace execution_space ;
typedef int value_type ;
typedef Kokkos::TeamPolicy< execution_space > policy_type ;
enum { SHARED_COUNT = 1000 };
typedef typename ExecSpace::scratch_memory_space shmem_space ;
// tbd: MemoryUnmanaged should be the default for shared memory space
typedef Kokkos::View<int*,shmem_space,Kokkos::MemoryUnmanaged> shared_int_array_type ;
// Tell how much shared memory will be required by this functor:
inline
unsigned team_shmem_size( int /* team_size */ ) const
{
return shared_int_array_type::shmem_size( SHARED_COUNT ) +
shared_int_array_type::shmem_size( SHARED_COUNT );
}
KOKKOS_INLINE_FUNCTION
void operator()( const typename policy_type::member_type & ind , value_type & update ) const
{
const shared_int_array_type shared_A( ind.team_shmem() , SHARED_COUNT );
const shared_int_array_type shared_B( ind.team_shmem() , SHARED_COUNT );
if ((shared_A.ptr_on_device () == NULL && SHARED_COUNT > 0) ||
(shared_B.ptr_on_device () == NULL && SHARED_COUNT > 0)) {
printf ("Failed to allocate shared memory of size %lu\n",
static_cast<unsigned long> (SHARED_COUNT));
++update; // failure to allocate is an error
}
else {
for ( int i = ind.team_rank() ; i < SHARED_COUNT ; i += ind.team_size() ) {
shared_A[i] = i + ind.league_rank();
shared_B[i] = 2 * i + ind.league_rank();
}
ind.team_barrier();
if ( ind.team_rank() + 1 == ind.team_size() ) {
for ( int i = 0 ; i < SHARED_COUNT ; ++i ) {
if ( shared_A[i] != i + ind.league_rank() ) {
++update ;
}
if ( shared_B[i] != 2 * i + ind.league_rank() ) {
++update ;
}
}
}
}
}
};
}
namespace {
template< class ExecSpace >
struct TestSharedTeam {
TestSharedTeam()
{ run(); }
void run()
{
typedef Test::SharedTeamFunctor<ExecSpace> Functor ;
typedef Kokkos::View< typename Functor::value_type , Kokkos::HostSpace , Kokkos::MemoryUnmanaged > result_type ;
const size_t team_size = Kokkos::TeamPolicy< ExecSpace >::team_size_max( Functor() );
Kokkos::TeamPolicy< ExecSpace > team_exec( 8192 / team_size , team_size );
typename Functor::value_type error_count = 0 ;
Kokkos::parallel_reduce( team_exec , Functor() , result_type( & error_count ) );
ASSERT_EQ( error_count , 0 );
}
};
+#if defined (KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA)
+
+template< class ExecSpace >
+struct TestLambdaSharedTeam {
+
+ TestLambdaSharedTeam()
+ { run(); }
+
+ void run()
+ {
+ typedef Test::SharedTeamFunctor<ExecSpace> Functor ;
+ typedef Kokkos::View< typename Functor::value_type , Kokkos::HostSpace , Kokkos::MemoryUnmanaged > result_type ;
+ typedef typename ExecSpace::scratch_memory_space shmem_space ;
+
+ // tbd: MemoryUnmanaged should be the default for shared memory space
+ typedef Kokkos::View<int*,shmem_space,Kokkos::MemoryUnmanaged> shared_int_array_type ;
+
+ const int SHARED_COUNT = 1000;
+ int team_size = 1;
+#ifdef KOKKOS_HAVE_CUDA
+ if(std::is_same<ExecSpace,Kokkos::Cuda>::value)
+ team_size = 128;
+#endif
+ Kokkos::TeamPolicy< ExecSpace > team_exec( 8192 / team_size , team_size ,
+ Kokkos::Experimental::TeamScratchRequest<shmem_space>(SHARED_COUNT*2*sizeof(int)));
+
+ typename Functor::value_type error_count = 0 ;
+
+ Kokkos::parallel_reduce( team_exec , KOKKOS_LAMBDA
+ ( const typename Kokkos::TeamPolicy< ExecSpace >::member_type & ind , int & update ) {
+
+ const shared_int_array_type shared_A( ind.team_shmem() , SHARED_COUNT );
+ const shared_int_array_type shared_B( ind.team_shmem() , SHARED_COUNT );
+
+ if ((shared_A.ptr_on_device () == NULL && SHARED_COUNT > 0) ||
+ (shared_B.ptr_on_device () == NULL && SHARED_COUNT > 0)) {
+ printf ("Failed to allocate shared memory of size %lu\n",
+ static_cast<unsigned long> (SHARED_COUNT));
+ ++update; // failure to allocate is an error
+ } else {
+ for ( int i = ind.team_rank() ; i < SHARED_COUNT ; i += ind.team_size() ) {
+ shared_A[i] = i + ind.league_rank();
+ shared_B[i] = 2 * i + ind.league_rank();
+ }
+
+ ind.team_barrier();
+
+ if ( ind.team_rank() + 1 == ind.team_size() ) {
+ for ( int i = 0 ; i < SHARED_COUNT ; ++i ) {
+ if ( shared_A[i] != i + ind.league_rank() ) {
+ ++update ;
+ }
+ if ( shared_B[i] != 2 * i + ind.league_rank() ) {
+ ++update ;
+ }
+ }
+ }
+ }
+ }, result_type( & error_count ) );
+
+ ASSERT_EQ( error_count , 0 );
+ }
+};
+
+#endif
}
/*--------------------------------------------------------------------------*/
diff --git a/lib/kokkos/core/unit_test/TestThreads.cpp b/lib/kokkos/core/unit_test/TestThreads.cpp
index b254aacaf..772c82255 100644
--- a/lib/kokkos/core/unit_test/TestThreads.cpp
+++ b/lib/kokkos/core/unit_test/TestThreads.cpp
@@ -1,436 +1,458 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <gtest/gtest.h>
#include <Kokkos_Macros.hpp>
#if defined( KOKKOS_HAVE_PTHREAD )
#include <Kokkos_Core.hpp>
#include <Threads/Kokkos_Threads_TaskPolicy.hpp>
//----------------------------------------------------------------------------
#include <TestSharedAlloc.hpp>
#include <TestViewMapping.hpp>
#include <TestViewImpl.hpp>
#include <TestViewAPI.hpp>
#include <TestViewSubview.hpp>
+#include <TestViewOfClass.hpp>
#include <TestAtomic.hpp>
#include <TestReduce.hpp>
#include <TestScan.hpp>
#include <TestRange.hpp>
#include <TestTeam.hpp>
#include <TestAggregate.hpp>
#include <TestAggregateReduction.hpp>
#include <TestCompilerMacros.hpp>
#include <TestCXX11.hpp>
#include <TestCXX11Deduction.hpp>
#include <TestTeamVector.hpp>
#include <TestMemorySpaceTracking.hpp>
#include <TestTemplateMetaFunctions.hpp>
#include <TestTaskPolicy.hpp>
namespace Test {
class threads : public ::testing::Test {
protected:
static void SetUpTestCase()
{
// Finalize without initialize is a no-op:
Kokkos::Threads::finalize();
const unsigned numa_count = Kokkos::hwloc::get_available_numa_count();
const unsigned cores_per_numa = Kokkos::hwloc::get_available_cores_per_numa();
const unsigned threads_per_core = Kokkos::hwloc::get_available_threads_per_core();
unsigned threads_count = 0 ;
// Initialize and finalize with no threads:
Kokkos::Threads::initialize( 1u );
Kokkos::Threads::finalize();
threads_count = std::max( 1u , numa_count )
* std::max( 2u , cores_per_numa * threads_per_core );
Kokkos::Threads::initialize( threads_count );
Kokkos::Threads::finalize();
threads_count = std::max( 1u , numa_count * 2 )
* std::max( 2u , ( cores_per_numa * threads_per_core ) / 2 );
Kokkos::Threads::initialize( threads_count );
Kokkos::Threads::finalize();
// Quick attempt to verify thread start/terminate don't have race condition:
threads_count = std::max( 1u , numa_count )
* std::max( 2u , ( cores_per_numa * threads_per_core ) / 2 );
for ( unsigned i = 0 ; i < 10 ; ++i ) {
Kokkos::Threads::initialize( threads_count );
Kokkos::Threads::sleep();
Kokkos::Threads::wake();
Kokkos::Threads::finalize();
}
Kokkos::Threads::initialize( threads_count );
Kokkos::Threads::print_configuration( std::cout , true /* detailed */ );
}
static void TearDownTestCase()
{
Kokkos::Threads::finalize();
}
};
TEST_F( threads , init ) {
;
}
+TEST_F( threads , dispatch )
+{
+ const int repeat = 100 ;
+ for ( int i = 0 ; i < repeat ; ++i ) {
+ for ( int j = 0 ; j < repeat ; ++j ) {
+ Kokkos::parallel_for( Kokkos::RangePolicy< Kokkos::Threads >(0,j)
+ , KOKKOS_LAMBDA( int ) {} );
+ }}
+}
+
TEST_F( threads , impl_shared_alloc ) {
test_shared_alloc< Kokkos::HostSpace , Kokkos::Threads >();
}
TEST_F( threads , impl_view_mapping ) {
test_view_mapping< Kokkos::Threads >();
test_view_mapping_subview< Kokkos::Threads >();
test_view_mapping_operator< Kokkos::Threads >();
TestViewMappingAtomic< Kokkos::Threads >::run();
}
TEST_F( threads, view_impl) {
test_view_impl< Kokkos::Threads >();
}
TEST_F( threads, view_api) {
TestViewAPI< double , Kokkos::Threads >();
}
+TEST_F( threads , view_nested_view )
+{
+ ::Test::view_nested_view< Kokkos::Threads >();
+}
+
TEST_F( threads, view_subview_auto_1d_left ) {
TestViewSubview::test_auto_1d< Kokkos::LayoutLeft,Kokkos::Threads >();
}
TEST_F( threads, view_subview_auto_1d_right ) {
TestViewSubview::test_auto_1d< Kokkos::LayoutRight,Kokkos::Threads >();
}
TEST_F( threads, view_subview_auto_1d_stride ) {
TestViewSubview::test_auto_1d< Kokkos::LayoutStride,Kokkos::Threads >();
}
TEST_F( threads, view_subview_assign_strided ) {
TestViewSubview::test_1d_strided_assignment< Kokkos::Threads >();
}
TEST_F( threads, view_subview_left_0 ) {
TestViewSubview::test_left_0< Kokkos::Threads >();
}
TEST_F( threads, view_subview_left_1 ) {
TestViewSubview::test_left_1< Kokkos::Threads >();
}
TEST_F( threads, view_subview_left_2 ) {
TestViewSubview::test_left_2< Kokkos::Threads >();
}
TEST_F( threads, view_subview_left_3 ) {
TestViewSubview::test_left_3< Kokkos::Threads >();
}
TEST_F( threads, view_subview_right_0 ) {
TestViewSubview::test_right_0< Kokkos::Threads >();
}
TEST_F( threads, view_subview_right_1 ) {
TestViewSubview::test_right_1< Kokkos::Threads >();
}
TEST_F( threads, view_subview_right_3 ) {
TestViewSubview::test_right_3< Kokkos::Threads >();
}
TEST_F( threads, view_aggregate ) {
TestViewAggregate< Kokkos::Threads >();
TestViewAggregateReduction< Kokkos::Threads >();
}
TEST_F( threads , range_tag )
{
TestRange< Kokkos::Threads >::test_for(1000);
TestRange< Kokkos::Threads >::test_reduce(1000);
TestRange< Kokkos::Threads >::test_scan(1000);
}
TEST_F( threads , team_tag )
{
TestTeamPolicy< Kokkos::Threads >::test_for(1000);
TestTeamPolicy< Kokkos::Threads >::test_reduce(1000);
}
TEST_F( threads, long_reduce) {
TestReduce< long , Kokkos::Threads >( 1000000 );
}
TEST_F( threads, double_reduce) {
TestReduce< double , Kokkos::Threads >( 1000000 );
}
TEST_F( threads, team_long_reduce) {
TestReduceTeam< long , Kokkos::Threads >( 100000 );
}
TEST_F( threads, team_double_reduce) {
TestReduceTeam< double , Kokkos::Threads >( 100000 );
}
TEST_F( threads, long_reduce_dynamic ) {
TestReduceDynamic< long , Kokkos::Threads >( 1000000 );
}
TEST_F( threads, double_reduce_dynamic ) {
TestReduceDynamic< double , Kokkos::Threads >( 1000000 );
}
TEST_F( threads, long_reduce_dynamic_view ) {
TestReduceDynamicView< long , Kokkos::Threads >( 1000000 );
}
TEST_F( threads, team_shared_request) {
TestSharedTeam< Kokkos::Threads >();
}
+#if defined(KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA) && !defined(KOKKOS_HAVE_CUDA)
+TEST_F( threads, team_lambda_shared_request) {
+ TestLambdaSharedTeam< Kokkos::Threads >();
+}
+#endif
+
TEST_F( threads , view_remap )
{
enum { N0 = 3 , N1 = 2 , N2 = 8 , N3 = 9 };
typedef Kokkos::View< double*[N1][N2][N3] ,
Kokkos::LayoutRight ,
Kokkos::Threads > output_type ;
typedef Kokkos::View< int**[N2][N3] ,
Kokkos::LayoutLeft ,
Kokkos::Threads > input_type ;
typedef Kokkos::View< int*[N0][N2][N3] ,
Kokkos::LayoutLeft ,
Kokkos::Threads > diff_type ;
output_type output( "output" , N0 );
input_type input ( "input" , N0 , N1 );
diff_type diff ( "diff" , N0 );
int value = 0 ;
for ( size_t i3 = 0 ; i3 < N3 ; ++i3 ) {
for ( size_t i2 = 0 ; i2 < N2 ; ++i2 ) {
for ( size_t i1 = 0 ; i1 < N1 ; ++i1 ) {
for ( size_t i0 = 0 ; i0 < N0 ; ++i0 ) {
input(i0,i1,i2,i3) = ++value ;
}}}}
// Kokkos::deep_copy( diff , input ); // throw with incompatible shape
Kokkos::deep_copy( output , input );
value = 0 ;
for ( size_t i3 = 0 ; i3 < N3 ; ++i3 ) {
for ( size_t i2 = 0 ; i2 < N2 ; ++i2 ) {
for ( size_t i1 = 0 ; i1 < N1 ; ++i1 ) {
for ( size_t i0 = 0 ; i0 < N0 ; ++i0 ) {
++value ;
ASSERT_EQ( value , ((int) output(i0,i1,i2,i3) ) );
}}}}
}
//----------------------------------------------------------------------------
TEST_F( threads , atomics )
{
const int loop_count = 1e6 ;
ASSERT_TRUE( ( TestAtomic::Loop<int,Kokkos::Threads>(loop_count,1) ) );
ASSERT_TRUE( ( TestAtomic::Loop<int,Kokkos::Threads>(loop_count,2) ) );
ASSERT_TRUE( ( TestAtomic::Loop<int,Kokkos::Threads>(loop_count,3) ) );
ASSERT_TRUE( ( TestAtomic::Loop<unsigned int,Kokkos::Threads>(loop_count,1) ) );
ASSERT_TRUE( ( TestAtomic::Loop<unsigned int,Kokkos::Threads>(loop_count,2) ) );
ASSERT_TRUE( ( TestAtomic::Loop<unsigned int,Kokkos::Threads>(loop_count,3) ) );
ASSERT_TRUE( ( TestAtomic::Loop<long int,Kokkos::Threads>(loop_count,1) ) );
ASSERT_TRUE( ( TestAtomic::Loop<long int,Kokkos::Threads>(loop_count,2) ) );
ASSERT_TRUE( ( TestAtomic::Loop<long int,Kokkos::Threads>(loop_count,3) ) );
ASSERT_TRUE( ( TestAtomic::Loop<unsigned long int,Kokkos::Threads>(loop_count,1) ) );
ASSERT_TRUE( ( TestAtomic::Loop<unsigned long int,Kokkos::Threads>(loop_count,2) ) );
ASSERT_TRUE( ( TestAtomic::Loop<unsigned long int,Kokkos::Threads>(loop_count,3) ) );
ASSERT_TRUE( ( TestAtomic::Loop<long long int,Kokkos::Threads>(loop_count,1) ) );
ASSERT_TRUE( ( TestAtomic::Loop<long long int,Kokkos::Threads>(loop_count,2) ) );
ASSERT_TRUE( ( TestAtomic::Loop<long long int,Kokkos::Threads>(loop_count,3) ) );
ASSERT_TRUE( ( TestAtomic::Loop<double,Kokkos::Threads>(loop_count,1) ) );
ASSERT_TRUE( ( TestAtomic::Loop<double,Kokkos::Threads>(loop_count,2) ) );
ASSERT_TRUE( ( TestAtomic::Loop<double,Kokkos::Threads>(loop_count,3) ) );
ASSERT_TRUE( ( TestAtomic::Loop<float,Kokkos::Threads>(100,1) ) );
ASSERT_TRUE( ( TestAtomic::Loop<float,Kokkos::Threads>(100,2) ) );
ASSERT_TRUE( ( TestAtomic::Loop<float,Kokkos::Threads>(100,3) ) );
#if defined( KOKKOS_ENABLE_ASM )
ASSERT_TRUE( ( TestAtomic::Loop<Kokkos::complex<double> ,Kokkos::Threads>(100,1) ) );
ASSERT_TRUE( ( TestAtomic::Loop<Kokkos::complex<double> ,Kokkos::Threads>(100,2) ) );
ASSERT_TRUE( ( TestAtomic::Loop<Kokkos::complex<double> ,Kokkos::Threads>(100,3) ) );
#endif
ASSERT_TRUE( ( TestAtomic::Loop<TestAtomic::SuperScalar<3>, Kokkos::Threads>(loop_count,1) ) );
ASSERT_TRUE( ( TestAtomic::Loop<TestAtomic::SuperScalar<3>, Kokkos::Threads>(loop_count,2) ) );
}
//----------------------------------------------------------------------------
#if 0
TEST_F( threads , scan_small )
{
typedef TestScan< Kokkos::Threads , Kokkos::Impl::ThreadsExecUseScanSmall > TestScanFunctor ;
for ( int i = 0 ; i < 1000 ; ++i ) {
TestScanFunctor( 10 );
TestScanFunctor( 10000 );
}
TestScanFunctor( 1000000 );
TestScanFunctor( 10000000 );
Kokkos::Threads::fence();
}
#endif
TEST_F( threads , scan )
{
TestScan< Kokkos::Threads >::test_range( 1 , 1000 );
TestScan< Kokkos::Threads >( 1000000 );
TestScan< Kokkos::Threads >( 10000000 );
Kokkos::Threads::fence();
}
//----------------------------------------------------------------------------
TEST_F( threads , team_scan )
{
TestScanTeam< Kokkos::Threads >( 10 );
TestScanTeam< Kokkos::Threads >( 10000 );
}
//----------------------------------------------------------------------------
TEST_F( threads , compiler_macros )
{
ASSERT_TRUE( ( TestCompilerMacros::Test< Kokkos::Threads >() ) );
}
TEST_F( threads , memory_space )
{
TestMemorySpace< Kokkos::Threads >();
}
//----------------------------------------------------------------------------
TEST_F( threads , template_meta_functions )
{
TestTemplateMetaFunctions<int, Kokkos::Threads >();
}
//----------------------------------------------------------------------------
#if defined( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_THREADS )
TEST_F( threads , cxx11 )
{
if ( Kokkos::Impl::is_same< Kokkos::DefaultExecutionSpace , Kokkos::Threads >::value ) {
ASSERT_TRUE( ( TestCXX11::Test< Kokkos::Threads >(1) ) );
ASSERT_TRUE( ( TestCXX11::Test< Kokkos::Threads >(2) ) );
ASSERT_TRUE( ( TestCXX11::Test< Kokkos::Threads >(3) ) );
ASSERT_TRUE( ( TestCXX11::Test< Kokkos::Threads >(4) ) );
}
}
TEST_F( threads , reduction_deduction )
{
TestCXX11::test_reduction_deduction< Kokkos::Threads >();
}
#endif /* #if defined( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_THREADS ) */
TEST_F( threads , team_vector )
{
ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Threads >(0) ) );
ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Threads >(1) ) );
ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Threads >(2) ) );
ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Threads >(3) ) );
ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Threads >(4) ) );
ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Threads >(5) ) );
ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Threads >(6) ) );
ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Threads >(7) ) );
ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Threads >(8) ) );
ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Threads >(9) ) );
ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Threads >(10) ) );
}
TEST_F( threads , task_policy )
{
TestTaskPolicy::test_task_dep< Kokkos::Threads >( 10 );
for ( long i = 0 ; i < 25 ; ++i ) TestTaskPolicy::test_fib< Kokkos::Threads >(i);
for ( long i = 0 ; i < 35 ; ++i ) TestTaskPolicy::test_fib2< Kokkos::Threads >(i);
}
TEST_F( threads , task_team )
{
TestTaskPolicy::test_task_team< Kokkos::Threads >(1000);
}
} // namespace Test
#endif /* #if defined( KOKKOS_HAVE_PTHREAD ) */
diff --git a/lib/kokkos/core/unit_test/TestViewAPI.hpp b/lib/kokkos/core/unit_test/TestViewAPI.hpp
index 1aeab1e41..7b4dac679 100644
--- a/lib/kokkos/core/unit_test/TestViewAPI.hpp
+++ b/lib/kokkos/core/unit_test/TestViewAPI.hpp
@@ -1,1370 +1,1406 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <gtest/gtest.h>
#include <Kokkos_Core.hpp>
#include <stdexcept>
#include <sstream>
#include <iostream>
/*--------------------------------------------------------------------------*/
#if defined( KOKKOS_USING_EXPERIMENTAL_VIEW )
namespace Test {
template< typename T, class DeviceType >
class TestViewAPI {
public:
TestViewAPI() {}
};
}
#else
/*--------------------------------------------------------------------------*/
namespace Test {
template< class T , class L , class D , class M , class S >
size_t allocation_count( const Kokkos::View<T,L,D,M,S> & view )
{
const size_t card = Kokkos::Impl::cardinality_count( view.shape() );
const size_t alloc = view.capacity();
return card <= alloc ? alloc : 0 ;
}
/*--------------------------------------------------------------------------*/
template< typename T, class DeviceType>
struct TestViewOperator
{
typedef DeviceType execution_space ;
static const unsigned N = 100 ;
static const unsigned D = 3 ;
typedef Kokkos::View< T*[D] , execution_space > view_type ;
const view_type v1 ;
const view_type v2 ;
TestViewOperator()
: v1( "v1" , N )
, v2( "v2" , N )
{}
static void testit()
{
Kokkos::parallel_for( N , TestViewOperator() );
}
KOKKOS_INLINE_FUNCTION
void operator()( const unsigned i ) const
{
const unsigned X = 0 ;
const unsigned Y = 1 ;
const unsigned Z = 2 ;
v2(i,X) = v1(i,X);
v2(i,Y) = v1(i,Y);
v2(i,Z) = v1(i,Z);
}
};
/*--------------------------------------------------------------------------*/
template< class DataType >
struct rank {
private:
typedef typename Kokkos::Impl::AnalyzeShape<DataType>::shape shape ;
public:
static const unsigned value = shape::rank ;
};
template< class DataType ,
class DeviceType ,
unsigned Rank = rank< DataType >::value >
struct TestViewOperator_LeftAndRight ;
template< class DataType , class DeviceType >
struct TestViewOperator_LeftAndRight< DataType , DeviceType , 8 >
{
typedef DeviceType execution_space ;
typedef typename execution_space::memory_space memory_space ;
typedef typename execution_space::size_type size_type ;
typedef int value_type ;
KOKKOS_INLINE_FUNCTION
static void join( volatile value_type & update ,
const volatile value_type & input )
{ update |= input ; }
KOKKOS_INLINE_FUNCTION
static void init( value_type & update )
{ update = 0 ; }
typedef Kokkos::
View< DataType, Kokkos::LayoutLeft, execution_space > left_view ;
typedef Kokkos::
View< DataType, Kokkos::LayoutRight, execution_space > right_view ;
typedef Kokkos::
View< DataType, Kokkos::LayoutStride, execution_space > stride_view ;
typedef typename left_view ::shape_type left_shape ;
typedef typename right_view::shape_type right_shape ;
left_shape lsh ;
right_shape rsh ;
left_view left ;
right_view right ;
stride_view left_stride ;
stride_view right_stride ;
long left_alloc ;
long right_alloc ;
TestViewOperator_LeftAndRight()
: lsh()
, rsh()
, left( "left" )
, right( "right" )
, left_stride( left )
, right_stride( right )
, left_alloc( allocation_count( left ) )
, right_alloc( allocation_count( right ) )
{}
static void testit()
{
TestViewOperator_LeftAndRight driver ;
ASSERT_TRUE( (long) Kokkos::Impl::cardinality_count( driver.lsh ) <= driver.left_alloc );
ASSERT_TRUE( (long) Kokkos::Impl::cardinality_count( driver.rsh ) <= driver.right_alloc );
int error_flag = 0 ;
Kokkos::parallel_reduce( 1 , driver , error_flag );
ASSERT_EQ( error_flag , 0 );
}
KOKKOS_INLINE_FUNCTION
void operator()( const size_type , value_type & update ) const
{
long offset ;
offset = -1 ;
for ( unsigned i7 = 0 ; i7 < unsigned(lsh.N7) ; ++i7 )
for ( unsigned i6 = 0 ; i6 < unsigned(lsh.N6) ; ++i6 )
for ( unsigned i5 = 0 ; i5 < unsigned(lsh.N5) ; ++i5 )
for ( unsigned i4 = 0 ; i4 < unsigned(lsh.N4) ; ++i4 )
for ( unsigned i3 = 0 ; i3 < unsigned(lsh.N3) ; ++i3 )
for ( unsigned i2 = 0 ; i2 < unsigned(lsh.N2) ; ++i2 )
for ( unsigned i1 = 0 ; i1 < unsigned(lsh.N1) ; ++i1 )
for ( unsigned i0 = 0 ; i0 < unsigned(lsh.N0) ; ++i0 )
{
const long j = & left( i0, i1, i2, i3, i4, i5, i6, i7 ) -
& left( 0, 0, 0, 0, 0, 0, 0, 0 );
if ( j <= offset || left_alloc <= j ) { update |= 1 ; }
offset = j ;
if ( & left(i0,i1,i2,i3,i4,i5,i6,i7) !=
& left_stride(i0,i1,i2,i3,i4,i5,i6,i7) ) {
update |= 4 ;
}
}
offset = -1 ;
for ( unsigned i0 = 0 ; i0 < unsigned(rsh.N0) ; ++i0 )
for ( unsigned i1 = 0 ; i1 < unsigned(rsh.N1) ; ++i1 )
for ( unsigned i2 = 0 ; i2 < unsigned(rsh.N2) ; ++i2 )
for ( unsigned i3 = 0 ; i3 < unsigned(rsh.N3) ; ++i3 )
for ( unsigned i4 = 0 ; i4 < unsigned(rsh.N4) ; ++i4 )
for ( unsigned i5 = 0 ; i5 < unsigned(rsh.N5) ; ++i5 )
for ( unsigned i6 = 0 ; i6 < unsigned(rsh.N6) ; ++i6 )
for ( unsigned i7 = 0 ; i7 < unsigned(rsh.N7) ; ++i7 )
{
const long j = & right( i0, i1, i2, i3, i4, i5, i6, i7 ) -
& right( 0, 0, 0, 0, 0, 0, 0, 0 );
if ( j <= offset || right_alloc <= j ) { update |= 2 ; }
offset = j ;
if ( & right(i0,i1,i2,i3,i4,i5,i6,i7) !=
& right_stride(i0,i1,i2,i3,i4,i5,i6,i7) ) {
update |= 8 ;
}
}
}
};
template< class DataType , class DeviceType >
struct TestViewOperator_LeftAndRight< DataType , DeviceType , 7 >
{
typedef DeviceType execution_space ;
typedef typename execution_space::memory_space memory_space ;
typedef typename execution_space::size_type size_type ;
typedef int value_type ;
KOKKOS_INLINE_FUNCTION
static void join( volatile value_type & update ,
const volatile value_type & input )
{ update |= input ; }
KOKKOS_INLINE_FUNCTION
static void init( value_type & update )
{ update = 0 ; }
typedef Kokkos::
View< DataType, Kokkos::LayoutLeft, execution_space > left_view ;
typedef Kokkos::
View< DataType, Kokkos::LayoutRight, execution_space > right_view ;
typedef typename left_view ::shape_type left_shape ;
typedef typename right_view::shape_type right_shape ;
left_shape lsh ;
right_shape rsh ;
left_view left ;
right_view right ;
long left_alloc ;
long right_alloc ;
TestViewOperator_LeftAndRight()
: lsh()
, rsh()
, left( "left" )
, right( "right" )
, left_alloc( allocation_count( left ) )
, right_alloc( allocation_count( right ) )
{}
static void testit()
{
TestViewOperator_LeftAndRight driver ;
ASSERT_TRUE( (long) Kokkos::Impl::cardinality_count( driver.lsh ) <= driver.left_alloc );
ASSERT_TRUE( (long) Kokkos::Impl::cardinality_count( driver.rsh ) <= driver.right_alloc );
int error_flag = 0 ;
Kokkos::parallel_reduce( 1 , driver , error_flag );
ASSERT_EQ( error_flag , 0 );
}
KOKKOS_INLINE_FUNCTION
void operator()( const size_type , value_type & update ) const
{
long offset ;
offset = -1 ;
for ( unsigned i6 = 0 ; i6 < unsigned(lsh.N6) ; ++i6 )
for ( unsigned i5 = 0 ; i5 < unsigned(lsh.N5) ; ++i5 )
for ( unsigned i4 = 0 ; i4 < unsigned(lsh.N4) ; ++i4 )
for ( unsigned i3 = 0 ; i3 < unsigned(lsh.N3) ; ++i3 )
for ( unsigned i2 = 0 ; i2 < unsigned(lsh.N2) ; ++i2 )
for ( unsigned i1 = 0 ; i1 < unsigned(lsh.N1) ; ++i1 )
for ( unsigned i0 = 0 ; i0 < unsigned(lsh.N0) ; ++i0 )
{
const long j = & left( i0, i1, i2, i3, i4, i5, i6 ) -
& left( 0, 0, 0, 0, 0, 0, 0 );
if ( j <= offset || left_alloc <= j ) { update |= 1 ; }
offset = j ;
}
offset = -1 ;
for ( unsigned i0 = 0 ; i0 < unsigned(rsh.N0) ; ++i0 )
for ( unsigned i1 = 0 ; i1 < unsigned(rsh.N1) ; ++i1 )
for ( unsigned i2 = 0 ; i2 < unsigned(rsh.N2) ; ++i2 )
for ( unsigned i3 = 0 ; i3 < unsigned(rsh.N3) ; ++i3 )
for ( unsigned i4 = 0 ; i4 < unsigned(rsh.N4) ; ++i4 )
for ( unsigned i5 = 0 ; i5 < unsigned(rsh.N5) ; ++i5 )
for ( unsigned i6 = 0 ; i6 < unsigned(rsh.N6) ; ++i6 )
{
const long j = & right( i0, i1, i2, i3, i4, i5, i6 ) -
& right( 0, 0, 0, 0, 0, 0, 0 );
if ( j <= offset || right_alloc <= j ) { update |= 2 ; }
offset = j ;
}
}
};
template< class DataType , class DeviceType >
struct TestViewOperator_LeftAndRight< DataType , DeviceType , 6 >
{
typedef DeviceType execution_space ;
typedef typename execution_space::memory_space memory_space ;
typedef typename execution_space::size_type size_type ;
typedef int value_type ;
KOKKOS_INLINE_FUNCTION
static void join( volatile value_type & update ,
const volatile value_type & input )
{ update |= input ; }
KOKKOS_INLINE_FUNCTION
static void init( value_type & update )
{ update = 0 ; }
typedef Kokkos::
View< DataType, Kokkos::LayoutLeft, execution_space > left_view ;
typedef Kokkos::
View< DataType, Kokkos::LayoutRight, execution_space > right_view ;
typedef typename left_view ::shape_type left_shape ;
typedef typename right_view::shape_type right_shape ;
left_shape lsh ;
right_shape rsh ;
left_view left ;
right_view right ;
long left_alloc ;
long right_alloc ;
TestViewOperator_LeftAndRight()
: lsh()
, rsh()
, left( "left" )
, right( "right" )
, left_alloc( allocation_count( left ) )
, right_alloc( allocation_count( right ) )
{}
static void testit()
{
TestViewOperator_LeftAndRight driver ;
ASSERT_TRUE( (long) Kokkos::Impl::cardinality_count( driver.lsh ) <= driver.left_alloc );
ASSERT_TRUE( (long) Kokkos::Impl::cardinality_count( driver.rsh ) <= driver.right_alloc );
int error_flag = 0 ;
Kokkos::parallel_reduce( 1 , driver , error_flag );
ASSERT_EQ( error_flag , 0 );
}
KOKKOS_INLINE_FUNCTION
void operator()( const size_type , value_type & update ) const
{
long offset ;
offset = -1 ;
for ( unsigned i5 = 0 ; i5 < unsigned(lsh.N5) ; ++i5 )
for ( unsigned i4 = 0 ; i4 < unsigned(lsh.N4) ; ++i4 )
for ( unsigned i3 = 0 ; i3 < unsigned(lsh.N3) ; ++i3 )
for ( unsigned i2 = 0 ; i2 < unsigned(lsh.N2) ; ++i2 )
for ( unsigned i1 = 0 ; i1 < unsigned(lsh.N1) ; ++i1 )
for ( unsigned i0 = 0 ; i0 < unsigned(lsh.N0) ; ++i0 )
{
const long j = & left( i0, i1, i2, i3, i4, i5 ) -
& left( 0, 0, 0, 0, 0, 0 );
if ( j <= offset || left_alloc <= j ) { update |= 1 ; }
offset = j ;
}
offset = -1 ;
for ( unsigned i0 = 0 ; i0 < unsigned(rsh.N0) ; ++i0 )
for ( unsigned i1 = 0 ; i1 < unsigned(rsh.N1) ; ++i1 )
for ( unsigned i2 = 0 ; i2 < unsigned(rsh.N2) ; ++i2 )
for ( unsigned i3 = 0 ; i3 < unsigned(rsh.N3) ; ++i3 )
for ( unsigned i4 = 0 ; i4 < unsigned(rsh.N4) ; ++i4 )
for ( unsigned i5 = 0 ; i5 < unsigned(rsh.N5) ; ++i5 )
{
const long j = & right( i0, i1, i2, i3, i4, i5 ) -
& right( 0, 0, 0, 0, 0, 0 );
if ( j <= offset || right_alloc <= j ) { update |= 2 ; }
offset = j ;
}
}
};
template< class DataType , class DeviceType >
struct TestViewOperator_LeftAndRight< DataType , DeviceType , 5 >
{
typedef DeviceType execution_space ;
typedef typename execution_space::memory_space memory_space ;
typedef typename execution_space::size_type size_type ;
typedef int value_type ;
KOKKOS_INLINE_FUNCTION
static void join( volatile value_type & update ,
const volatile value_type & input )
{ update |= input ; }
KOKKOS_INLINE_FUNCTION
static void init( value_type & update )
{ update = 0 ; }
typedef Kokkos::
View< DataType, Kokkos::LayoutLeft, execution_space > left_view ;
typedef Kokkos::
View< DataType, Kokkos::LayoutRight, execution_space > right_view ;
typedef Kokkos::
View< DataType, Kokkos::LayoutStride, execution_space > stride_view ;
typedef typename left_view ::shape_type left_shape ;
typedef typename right_view::shape_type right_shape ;
left_shape lsh ;
right_shape rsh ;
left_view left ;
right_view right ;
stride_view left_stride ;
stride_view right_stride ;
long left_alloc ;
long right_alloc ;
TestViewOperator_LeftAndRight()
: lsh()
, rsh()
, left( "left" )
, right( "right" )
, left_stride( left )
, right_stride( right )
, left_alloc( allocation_count( left ) )
, right_alloc( allocation_count( right ) )
{}
static void testit()
{
TestViewOperator_LeftAndRight driver ;
ASSERT_TRUE( (long) Kokkos::Impl::cardinality_count( driver.lsh ) <= driver.left_alloc );
ASSERT_TRUE( (long) Kokkos::Impl::cardinality_count( driver.rsh ) <= driver.right_alloc );
int error_flag = 0 ;
Kokkos::parallel_reduce( 1 , driver , error_flag );
ASSERT_EQ( error_flag , 0 );
}
KOKKOS_INLINE_FUNCTION
void operator()( const size_type , value_type & update ) const
{
long offset ;
offset = -1 ;
for ( unsigned i4 = 0 ; i4 < unsigned(lsh.N4) ; ++i4 )
for ( unsigned i3 = 0 ; i3 < unsigned(lsh.N3) ; ++i3 )
for ( unsigned i2 = 0 ; i2 < unsigned(lsh.N2) ; ++i2 )
for ( unsigned i1 = 0 ; i1 < unsigned(lsh.N1) ; ++i1 )
for ( unsigned i0 = 0 ; i0 < unsigned(lsh.N0) ; ++i0 )
{
const long j = & left( i0, i1, i2, i3, i4 ) -
& left( 0, 0, 0, 0, 0 );
if ( j <= offset || left_alloc <= j ) { update |= 1 ; }
offset = j ;
if ( & left( i0, i1, i2, i3, i4 ) !=
& left_stride( i0, i1, i2, i3, i4 ) ) { update |= 4 ; }
}
offset = -1 ;
for ( unsigned i0 = 0 ; i0 < unsigned(rsh.N0) ; ++i0 )
for ( unsigned i1 = 0 ; i1 < unsigned(rsh.N1) ; ++i1 )
for ( unsigned i2 = 0 ; i2 < unsigned(rsh.N2) ; ++i2 )
for ( unsigned i3 = 0 ; i3 < unsigned(rsh.N3) ; ++i3 )
for ( unsigned i4 = 0 ; i4 < unsigned(rsh.N4) ; ++i4 )
{
const long j = & right( i0, i1, i2, i3, i4 ) -
& right( 0, 0, 0, 0, 0 );
if ( j <= offset || right_alloc <= j ) { update |= 2 ; }
offset = j ;
if ( & right( i0, i1, i2, i3, i4 ) !=
& right_stride( i0, i1, i2, i3, i4 ) ) { update |= 8 ; }
}
}
};
template< class DataType , class DeviceType >
struct TestViewOperator_LeftAndRight< DataType , DeviceType , 4 >
{
typedef DeviceType execution_space ;
typedef typename execution_space::memory_space memory_space ;
typedef typename execution_space::size_type size_type ;
typedef int value_type ;
KOKKOS_INLINE_FUNCTION
static void join( volatile value_type & update ,
const volatile value_type & input )
{ update |= input ; }
KOKKOS_INLINE_FUNCTION
static void init( value_type & update )
{ update = 0 ; }
typedef Kokkos::
View< DataType, Kokkos::LayoutLeft, execution_space > left_view ;
typedef Kokkos::
View< DataType, Kokkos::LayoutRight, execution_space > right_view ;
typedef typename left_view ::shape_type left_shape ;
typedef typename right_view::shape_type right_shape ;
left_shape lsh ;
right_shape rsh ;
left_view left ;
right_view right ;
long left_alloc ;
long right_alloc ;
TestViewOperator_LeftAndRight()
: lsh()
, rsh()
, left( "left" )
, right( "right" )
, left_alloc( allocation_count( left ) )
, right_alloc( allocation_count( right ) )
{}
static void testit()
{
TestViewOperator_LeftAndRight driver ;
ASSERT_TRUE( (long) Kokkos::Impl::cardinality_count( driver.lsh ) <= driver.left_alloc );
ASSERT_TRUE( (long) Kokkos::Impl::cardinality_count( driver.rsh ) <= driver.right_alloc );
int error_flag = 0 ;
Kokkos::parallel_reduce( 1 , driver , error_flag );
ASSERT_EQ( error_flag , 0 );
}
KOKKOS_INLINE_FUNCTION
void operator()( const size_type , value_type & update ) const
{
long offset ;
offset = -1 ;
for ( unsigned i3 = 0 ; i3 < unsigned(lsh.N3) ; ++i3 )
for ( unsigned i2 = 0 ; i2 < unsigned(lsh.N2) ; ++i2 )
for ( unsigned i1 = 0 ; i1 < unsigned(lsh.N1) ; ++i1 )
for ( unsigned i0 = 0 ; i0 < unsigned(lsh.N0) ; ++i0 )
{
const long j = & left( i0, i1, i2, i3 ) -
& left( 0, 0, 0, 0 );
if ( j <= offset || left_alloc <= j ) { update |= 1 ; }
offset = j ;
}
offset = -1 ;
for ( unsigned i0 = 0 ; i0 < unsigned(rsh.N0) ; ++i0 )
for ( unsigned i1 = 0 ; i1 < unsigned(rsh.N1) ; ++i1 )
for ( unsigned i2 = 0 ; i2 < unsigned(rsh.N2) ; ++i2 )
for ( unsigned i3 = 0 ; i3 < unsigned(rsh.N3) ; ++i3 )
{
const long j = & right( i0, i1, i2, i3 ) -
& right( 0, 0, 0, 0 );
if ( j <= offset || right_alloc <= j ) { update |= 2 ; }
offset = j ;
}
}
};
template< class DataType , class DeviceType >
struct TestViewOperator_LeftAndRight< DataType , DeviceType , 3 >
{
typedef DeviceType execution_space ;
typedef typename execution_space::memory_space memory_space ;
typedef typename execution_space::size_type size_type ;
typedef int value_type ;
KOKKOS_INLINE_FUNCTION
static void join( volatile value_type & update ,
const volatile value_type & input )
{ update |= input ; }
KOKKOS_INLINE_FUNCTION
static void init( value_type & update )
{ update = 0 ; }
typedef Kokkos::
View< DataType, Kokkos::LayoutLeft, execution_space > left_view ;
typedef Kokkos::
View< DataType, Kokkos::LayoutRight, execution_space > right_view ;
typedef Kokkos::
View< DataType, Kokkos::LayoutStride, execution_space > stride_view ;
typedef typename left_view ::shape_type left_shape ;
typedef typename right_view::shape_type right_shape ;
left_shape lsh ;
right_shape rsh ;
left_view left ;
right_view right ;
stride_view left_stride ;
stride_view right_stride ;
long left_alloc ;
long right_alloc ;
TestViewOperator_LeftAndRight()
: lsh()
, rsh()
, left( std::string("left") )
, right( std::string("right") )
, left_stride( left )
, right_stride( right )
, left_alloc( allocation_count( left ) )
, right_alloc( allocation_count( right ) )
{}
static void testit()
{
TestViewOperator_LeftAndRight driver ;
ASSERT_TRUE( (long) Kokkos::Impl::cardinality_count( driver.lsh ) <= driver.left_alloc );
ASSERT_TRUE( (long) Kokkos::Impl::cardinality_count( driver.rsh ) <= driver.right_alloc );
int error_flag = 0 ;
Kokkos::parallel_reduce( 1 , driver , error_flag );
ASSERT_EQ( error_flag , 0 );
}
KOKKOS_INLINE_FUNCTION
void operator()( const size_type , value_type & update ) const
{
long offset ;
offset = -1 ;
for ( unsigned i2 = 0 ; i2 < unsigned(lsh.N2) ; ++i2 )
for ( unsigned i1 = 0 ; i1 < unsigned(lsh.N1) ; ++i1 )
for ( unsigned i0 = 0 ; i0 < unsigned(lsh.N0) ; ++i0 )
{
const long j = & left( i0, i1, i2 ) -
& left( 0, 0, 0 );
if ( j <= offset || left_alloc <= j ) { update |= 1 ; }
offset = j ;
if ( & left(i0,i1,i2) != & left_stride(i0,i1,i2) ) { update |= 4 ; }
}
offset = -1 ;
for ( unsigned i0 = 0 ; i0 < unsigned(rsh.N0) ; ++i0 )
for ( unsigned i1 = 0 ; i1 < unsigned(rsh.N1) ; ++i1 )
for ( unsigned i2 = 0 ; i2 < unsigned(rsh.N2) ; ++i2 )
{
const long j = & right( i0, i1, i2 ) -
& right( 0, 0, 0 );
if ( j <= offset || right_alloc <= j ) { update |= 2 ; }
offset = j ;
if ( & right(i0,i1,i2) != & right_stride(i0,i1,i2) ) { update |= 8 ; }
}
for ( unsigned i0 = 0 ; i0 < unsigned(lsh.N0) ; ++i0 )
for ( unsigned i1 = 0 ; i1 < unsigned(lsh.N1) ; ++i1 )
for ( unsigned i2 = 0 ; i2 < unsigned(lsh.N2) ; ++i2 )
{
if ( & left(i0,i1,i2) != & left.at(i0,i1,i2,0,0,0,0,0) ) { update |= 3 ; }
if ( & right(i0,i1,i2) != & right.at(i0,i1,i2,0,0,0,0,0) ) { update |= 3 ; }
}
}
};
template< class DataType , class DeviceType >
struct TestViewOperator_LeftAndRight< DataType , DeviceType , 2 >
{
typedef DeviceType execution_space ;
typedef typename execution_space::memory_space memory_space ;
typedef typename execution_space::size_type size_type ;
typedef int value_type ;
KOKKOS_INLINE_FUNCTION
static void join( volatile value_type & update ,
const volatile value_type & input )
{ update |= input ; }
KOKKOS_INLINE_FUNCTION
static void init( value_type & update )
{ update = 0 ; }
typedef Kokkos::
View< DataType, Kokkos::LayoutLeft, execution_space > left_view ;
typedef Kokkos::
View< DataType, Kokkos::LayoutRight, execution_space > right_view ;
typedef typename left_view ::shape_type left_shape ;
typedef typename right_view::shape_type right_shape ;
left_shape lsh ;
right_shape rsh ;
left_view left ;
right_view right ;
long left_alloc ;
long right_alloc ;
TestViewOperator_LeftAndRight()
: lsh()
, rsh()
, left( Kokkos::ViewAllocate("left") )
, right( Kokkos::ViewAllocate("right") )
, left_alloc( allocation_count( left ) )
, right_alloc( allocation_count( right ) )
{}
static void testit()
{
TestViewOperator_LeftAndRight driver ;
ASSERT_TRUE( (long) Kokkos::Impl::cardinality_count( driver.lsh ) <= driver.left_alloc );
ASSERT_TRUE( (long) Kokkos::Impl::cardinality_count( driver.rsh ) <= driver.right_alloc );
int error_flag = 0 ;
Kokkos::parallel_reduce( 1 , driver , error_flag );
ASSERT_EQ( error_flag , 0 );
}
KOKKOS_INLINE_FUNCTION
void operator()( const size_type , value_type & update ) const
{
long offset ;
offset = -1 ;
for ( unsigned i1 = 0 ; i1 < unsigned(lsh.N1) ; ++i1 )
for ( unsigned i0 = 0 ; i0 < unsigned(lsh.N0) ; ++i0 )
{
const long j = & left( i0, i1 ) -
& left( 0, 0 );
if ( j <= offset || left_alloc <= j ) { update |= 1 ; }
offset = j ;
}
offset = -1 ;
for ( unsigned i0 = 0 ; i0 < unsigned(rsh.N0) ; ++i0 )
for ( unsigned i1 = 0 ; i1 < unsigned(rsh.N1) ; ++i1 )
{
const long j = & right( i0, i1 ) -
& right( 0, 0 );
if ( j <= offset || right_alloc <= j ) { update |= 2 ; }
offset = j ;
}
for ( unsigned i0 = 0 ; i0 < unsigned(lsh.N0) ; ++i0 )
for ( unsigned i1 = 0 ; i1 < unsigned(lsh.N1) ; ++i1 )
{
if ( & left(i0,i1) != & left.at(i0,i1,0,0,0,0,0,0) ) { update |= 3 ; }
if ( & right(i0,i1) != & right.at(i0,i1,0,0,0,0,0,0) ) { update |= 3 ; }
}
}
};
template< class DataType , class DeviceType >
struct TestViewOperator_LeftAndRight< DataType , DeviceType , 1 >
{
typedef DeviceType execution_space ;
typedef typename execution_space::memory_space memory_space ;
typedef typename execution_space::size_type size_type ;
typedef int value_type ;
KOKKOS_INLINE_FUNCTION
static void join( volatile value_type & update ,
const volatile value_type & input )
{ update |= input ; }
KOKKOS_INLINE_FUNCTION
static void init( value_type & update )
{ update = 0 ; }
typedef Kokkos::
View< DataType, Kokkos::LayoutLeft, execution_space > left_view ;
typedef Kokkos::
View< DataType, Kokkos::LayoutRight, execution_space > right_view ;
typedef Kokkos::
View< DataType, Kokkos::LayoutStride, execution_space > stride_view ;
typedef typename left_view ::shape_type left_shape ;
typedef typename right_view::shape_type right_shape ;
left_shape lsh ;
right_shape rsh ;
left_view left ;
right_view right ;
stride_view left_stride ;
stride_view right_stride ;
long left_alloc ;
long right_alloc ;
TestViewOperator_LeftAndRight()
: lsh()
, rsh()
, left( Kokkos::ViewAllocate() )
, right( Kokkos::ViewAllocate() )
, left_stride( left )
, right_stride( right )
, left_alloc( allocation_count( left ) )
, right_alloc( allocation_count( right ) )
{}
static void testit()
{
TestViewOperator_LeftAndRight driver ;
ASSERT_TRUE( (long) Kokkos::Impl::cardinality_count( driver.lsh ) <= driver.left_alloc );
ASSERT_TRUE( (long) Kokkos::Impl::cardinality_count( driver.rsh ) <= driver.right_alloc );
int error_flag = 0 ;
Kokkos::parallel_reduce( 1 , driver , error_flag );
ASSERT_EQ( error_flag , 0 );
}
KOKKOS_INLINE_FUNCTION
void operator()( const size_type , value_type & update ) const
{
for ( unsigned i0 = 0 ; i0 < unsigned(lsh.N0) ; ++i0 )
{
if ( & left(i0) != & left.at(i0,0,0,0,0,0,0,0) ) { update |= 3 ; }
if ( & right(i0) != & right.at(i0,0,0,0,0,0,0,0) ) { update |= 3 ; }
if ( & left(i0) != & left_stride(i0) ) { update |= 4 ; }
if ( & right(i0) != & right_stride(i0) ) { update |= 8 ; }
}
}
};
/*--------------------------------------------------------------------------*/
template< typename T, class DeviceType >
class TestViewAPI
{
public:
typedef DeviceType device ;
enum { N0 = 1000 ,
N1 = 3 ,
N2 = 5 ,
N3 = 7 };
typedef Kokkos::View< T , device > dView0 ;
typedef Kokkos::View< T* , device > dView1 ;
typedef Kokkos::View< T*[N1] , device > dView2 ;
typedef Kokkos::View< T*[N1][N2] , device > dView3 ;
typedef Kokkos::View< T*[N1][N2][N3] , device > dView4 ;
typedef Kokkos::View< const T*[N1][N2][N3] , device > const_dView4 ;
typedef Kokkos::View< T****, device, Kokkos::MemoryUnmanaged > dView4_unmanaged ;
typedef typename dView0::host_mirror_space host ;
TestViewAPI()
{
run_test_mirror();
run_test();
run_test_scalar();
run_test_const();
run_test_subview();
run_test_subview_strided();
run_test_vector();
TestViewOperator< T , device >::testit();
TestViewOperator_LeftAndRight< int[2][3][4][2][3][4][2][3] , device >::testit();
TestViewOperator_LeftAndRight< int[2][3][4][2][3][4][2] , device >::testit();
TestViewOperator_LeftAndRight< int[2][3][4][2][3][4] , device >::testit();
TestViewOperator_LeftAndRight< int[2][3][4][2][3] , device >::testit();
TestViewOperator_LeftAndRight< int[2][3][4][2] , device >::testit();
TestViewOperator_LeftAndRight< int[2][3][4] , device >::testit();
TestViewOperator_LeftAndRight< int[2][3] , device >::testit();
TestViewOperator_LeftAndRight< int[2] , device >::testit();
}
static void run_test_mirror()
{
typedef Kokkos::View< int , host > view_type ;
typedef typename view_type::HostMirror mirror_type ;
view_type a("a");
mirror_type am = Kokkos::create_mirror_view(a);
mirror_type ax = Kokkos::create_mirror(a);
ASSERT_EQ( & a() , & am() );
}
static void run_test_scalar()
{
typedef typename dView0::HostMirror hView0 ;
dView0 dx , dy ;
hView0 hx , hy ;
dx = dView0( "dx" );
dy = dView0( "dy" );
hx = Kokkos::create_mirror( dx );
hy = Kokkos::create_mirror( dy );
hx = 1 ;
Kokkos::deep_copy( dx , hx );
Kokkos::deep_copy( dy , dx );
Kokkos::deep_copy( hy , dy );
ASSERT_EQ( hx(), hy() );
}
static void run_test()
{
// mfh 14 Feb 2014: This test doesn't actually create instances of
// these types. In order to avoid "declared but unused typedef"
// warnings, we declare empty instances of these types, with the
// usual "(void)" marker to avoid compiler warnings for unused
// variables.
typedef typename dView0::HostMirror hView0 ;
typedef typename dView1::HostMirror hView1 ;
typedef typename dView2::HostMirror hView2 ;
typedef typename dView3::HostMirror hView3 ;
typedef typename dView4::HostMirror hView4 ;
{
hView0 thing;
(void) thing;
}
{
hView1 thing;
(void) thing;
}
{
hView2 thing;
(void) thing;
}
{
hView3 thing;
(void) thing;
}
{
hView4 thing;
(void) thing;
}
dView4 dx , dy , dz ;
hView4 hx , hy , hz ;
ASSERT_TRUE( dx.is_null() );
ASSERT_TRUE( dy.is_null() );
ASSERT_TRUE( dz.is_null() );
ASSERT_TRUE( hx.is_null() );
ASSERT_TRUE( hy.is_null() );
ASSERT_TRUE( hz.is_null() );
ASSERT_EQ( dx.dimension_0() , 0u );
ASSERT_EQ( dy.dimension_0() , 0u );
ASSERT_EQ( dz.dimension_0() , 0u );
ASSERT_EQ( hx.dimension_0() , 0u );
ASSERT_EQ( hy.dimension_0() , 0u );
ASSERT_EQ( hz.dimension_0() , 0u );
ASSERT_EQ( dx.dimension_1() , unsigned(N1) );
ASSERT_EQ( dy.dimension_1() , unsigned(N1) );
ASSERT_EQ( dz.dimension_1() , unsigned(N1) );
ASSERT_EQ( hx.dimension_1() , unsigned(N1) );
ASSERT_EQ( hy.dimension_1() , unsigned(N1) );
ASSERT_EQ( hz.dimension_1() , unsigned(N1) );
dx = dView4( "dx" , N0 );
dy = dView4( "dy" , N0 );
-
+ #ifndef KOKKOS_USING_EXPERIMENTAL_VIEW
+ ASSERT_EQ( dx.tracker().ref_count() , size_t(1) );
+ #endif
dView4_unmanaged unmanaged_dx = dx;
+ #ifndef KOKKOS_USING_EXPERIMENTAL_VIEW
+ ASSERT_EQ( dx.tracker().ref_count() , size_t(1) );
+ #endif
+
dView4_unmanaged unmanaged_from_ptr_dx = dView4_unmanaged(dx.ptr_on_device(),
dx.dimension_0(),
dx.dimension_1(),
dx.dimension_2(),
dx.dimension_3());
{
// Destruction of this view should be harmless
const_dView4 unmanaged_from_ptr_const_dx( dx.ptr_on_device() ,
dx.dimension_0() ,
dx.dimension_1() ,
dx.dimension_2() ,
dx.dimension_3() );
}
const_dView4 const_dx = dx ;
+ #ifndef KOKKOS_USING_EXPERIMENTAL_VIEW
+ ASSERT_EQ( dx.tracker().ref_count() , size_t(2) );
+ #endif
+
+ {
+ const_dView4 const_dx2;
+ const_dx2 = const_dx;
+ #ifndef KOKKOS_USING_EXPERIMENTAL_VIEW
+ ASSERT_EQ( dx.tracker().ref_count() , size_t(3) );
+ #endif
+
+ const_dx2 = dy;
+ #ifndef KOKKOS_USING_EXPERIMENTAL_VIEW
+ ASSERT_EQ( dx.tracker().ref_count() , size_t(2) );
+ #endif
+
+ const_dView4 const_dx3(dx);
+ #ifndef KOKKOS_USING_EXPERIMENTAL_VIEW
+ ASSERT_EQ( dx.tracker().ref_count() , size_t(3) );
+ #endif
+
+ dView4_unmanaged dx4_unmanaged(dx);
+ #ifndef KOKKOS_USING_EXPERIMENTAL_VIEW
+ ASSERT_EQ( dx.tracker().ref_count() , size_t(3) );
+ #endif
+ }
+
+ #ifndef KOKKOS_USING_EXPERIMENTAL_VIEW
+ ASSERT_EQ( dx.tracker().ref_count() , size_t(2) );
+ #endif
ASSERT_FALSE( dx.is_null() );
ASSERT_FALSE( const_dx.is_null() );
ASSERT_FALSE( unmanaged_dx.is_null() );
ASSERT_FALSE( unmanaged_from_ptr_dx.is_null() );
ASSERT_FALSE( dy.is_null() );
ASSERT_NE( dx , dy );
ASSERT_EQ( dx.dimension_0() , unsigned(N0) );
ASSERT_EQ( dx.dimension_1() , unsigned(N1) );
ASSERT_EQ( dx.dimension_2() , unsigned(N2) );
ASSERT_EQ( dx.dimension_3() , unsigned(N3) );
ASSERT_EQ( dy.dimension_0() , unsigned(N0) );
ASSERT_EQ( dy.dimension_1() , unsigned(N1) );
ASSERT_EQ( dy.dimension_2() , unsigned(N2) );
ASSERT_EQ( dy.dimension_3() , unsigned(N3) );
ASSERT_EQ( unmanaged_from_ptr_dx.capacity(),unsigned(N0)*unsigned(N1)*unsigned(N2)*unsigned(N3) );
hx = Kokkos::create_mirror( dx );
hy = Kokkos::create_mirror( dy );
// T v1 = hx() ; // Generates compile error as intended
// T v2 = hx(0,0) ; // Generates compile error as intended
// hx(0,0) = v2 ; // Generates compile error as intended
// Testing with asynchronous deep copy with respect to device
{
size_t count = 0 ;
for ( size_t ip = 0 ; ip < N0 ; ++ip ) {
for ( size_t i1 = 0 ; i1 < hx.dimension_1() ; ++i1 ) {
for ( size_t i2 = 0 ; i2 < hx.dimension_2() ; ++i2 ) {
for ( size_t i3 = 0 ; i3 < hx.dimension_3() ; ++i3 ) {
hx(ip,i1,i2,i3) = ++count ;
}}}}
Kokkos::deep_copy(typename hView4::execution_space(), dx , hx );
Kokkos::deep_copy(typename hView4::execution_space(), dy , dx );
Kokkos::deep_copy(typename hView4::execution_space(), hy , dy );
for ( size_t ip = 0 ; ip < N0 ; ++ip ) {
for ( size_t i1 = 0 ; i1 < N1 ; ++i1 ) {
for ( size_t i2 = 0 ; i2 < N2 ; ++i2 ) {
for ( size_t i3 = 0 ; i3 < N3 ; ++i3 ) {
{ ASSERT_EQ( hx(ip,i1,i2,i3) , hy(ip,i1,i2,i3) ); }
}}}}
Kokkos::deep_copy(typename hView4::execution_space(), dx , T(0) );
Kokkos::deep_copy(typename hView4::execution_space(), hx , dx );
for ( size_t ip = 0 ; ip < N0 ; ++ip ) {
for ( size_t i1 = 0 ; i1 < N1 ; ++i1 ) {
for ( size_t i2 = 0 ; i2 < N2 ; ++i2 ) {
for ( size_t i3 = 0 ; i3 < N3 ; ++i3 ) {
{ ASSERT_EQ( hx(ip,i1,i2,i3) , T(0) ); }
}}}}
}
// Testing with asynchronous deep copy with respect to host
{
size_t count = 0 ;
for ( size_t ip = 0 ; ip < N0 ; ++ip ) {
for ( size_t i1 = 0 ; i1 < hx.dimension_1() ; ++i1 ) {
for ( size_t i2 = 0 ; i2 < hx.dimension_2() ; ++i2 ) {
for ( size_t i3 = 0 ; i3 < hx.dimension_3() ; ++i3 ) {
hx(ip,i1,i2,i3) = ++count ;
}}}}
Kokkos::deep_copy(typename dView4::execution_space(), dx , hx );
Kokkos::deep_copy(typename dView4::execution_space(), dy , dx );
Kokkos::deep_copy(typename dView4::execution_space(), hy , dy );
for ( size_t ip = 0 ; ip < N0 ; ++ip ) {
for ( size_t i1 = 0 ; i1 < N1 ; ++i1 ) {
for ( size_t i2 = 0 ; i2 < N2 ; ++i2 ) {
for ( size_t i3 = 0 ; i3 < N3 ; ++i3 ) {
{ ASSERT_EQ( hx(ip,i1,i2,i3) , hy(ip,i1,i2,i3) ); }
}}}}
Kokkos::deep_copy(typename dView4::execution_space(), dx , T(0) );
Kokkos::deep_copy(typename dView4::execution_space(), hx , dx );
for ( size_t ip = 0 ; ip < N0 ; ++ip ) {
for ( size_t i1 = 0 ; i1 < N1 ; ++i1 ) {
for ( size_t i2 = 0 ; i2 < N2 ; ++i2 ) {
for ( size_t i3 = 0 ; i3 < N3 ; ++i3 ) {
{ ASSERT_EQ( hx(ip,i1,i2,i3) , T(0) ); }
}}}}
}
// Testing with synchronous deep copy
{
size_t count = 0 ;
for ( size_t ip = 0 ; ip < N0 ; ++ip ) {
for ( size_t i1 = 0 ; i1 < hx.dimension_1() ; ++i1 ) {
for ( size_t i2 = 0 ; i2 < hx.dimension_2() ; ++i2 ) {
for ( size_t i3 = 0 ; i3 < hx.dimension_3() ; ++i3 ) {
hx(ip,i1,i2,i3) = ++count ;
}}}}
Kokkos::deep_copy( dx , hx );
Kokkos::deep_copy( dy , dx );
Kokkos::deep_copy( hy , dy );
for ( size_t ip = 0 ; ip < N0 ; ++ip ) {
for ( size_t i1 = 0 ; i1 < N1 ; ++i1 ) {
for ( size_t i2 = 0 ; i2 < N2 ; ++i2 ) {
for ( size_t i3 = 0 ; i3 < N3 ; ++i3 ) {
{ ASSERT_EQ( hx(ip,i1,i2,i3) , hy(ip,i1,i2,i3) ); }
}}}}
Kokkos::deep_copy( dx , T(0) );
Kokkos::deep_copy( hx , dx );
for ( size_t ip = 0 ; ip < N0 ; ++ip ) {
for ( size_t i1 = 0 ; i1 < N1 ; ++i1 ) {
for ( size_t i2 = 0 ; i2 < N2 ; ++i2 ) {
for ( size_t i3 = 0 ; i3 < N3 ; ++i3 ) {
{ ASSERT_EQ( hx(ip,i1,i2,i3) , T(0) ); }
}}}}
}
dz = dx ; ASSERT_EQ( dx, dz); ASSERT_NE( dy, dz);
dz = dy ; ASSERT_EQ( dy, dz); ASSERT_NE( dx, dz);
dx = dView4();
ASSERT_TRUE( dx.is_null() );
ASSERT_FALSE( dy.is_null() );
ASSERT_FALSE( dz.is_null() );
dy = dView4();
ASSERT_TRUE( dx.is_null() );
ASSERT_TRUE( dy.is_null() );
ASSERT_FALSE( dz.is_null() );
dz = dView4();
ASSERT_TRUE( dx.is_null() );
ASSERT_TRUE( dy.is_null() );
ASSERT_TRUE( dz.is_null() );
}
typedef T DataType[2] ;
static void
check_auto_conversion_to_const(
const Kokkos::View< const DataType , device > & arg_const ,
const Kokkos::View< DataType , device > & arg )
{
ASSERT_TRUE( arg_const == arg );
}
static void run_test_const()
{
typedef Kokkos::View< DataType , device > typeX ;
typedef Kokkos::View< const DataType , device > const_typeX ;
typedef Kokkos::View< const DataType , device , Kokkos::MemoryRandomAccess > const_typeR ;
typeX x( "X" );
const_typeX xc = x ;
const_typeR xr = x ;
ASSERT_TRUE( xc == x );
ASSERT_TRUE( x == xc );
ASSERT_TRUE( x.ptr_on_device() == xr.ptr_on_device() );
// typeX xf = xc ; // setting non-const from const must not compile
check_auto_conversion_to_const( x , x );
}
static void run_test_subview()
{
typedef Kokkos::View< const T , device > sView ;
dView0 d0( "d0" );
dView1 d1( "d1" , N0 );
dView2 d2( "d2" , N0 );
dView3 d3( "d3" , N0 );
dView4 d4( "d4" , N0 );
sView s0 = d0 ;
sView s1 = Kokkos::subview( d1 , 1 );
sView s2 = Kokkos::subview( d2 , 1 , 1 );
sView s3 = Kokkos::subview( d3 , 1 , 1 , 1 );
sView s4 = Kokkos::subview( d4 , 1 , 1 , 1 , 1 );
}
static void run_test_subview_strided()
{
typedef Kokkos::View< int **** , Kokkos::LayoutLeft , host > view_left_4 ;
typedef Kokkos::View< int **** , Kokkos::LayoutRight , host > view_right_4 ;
typedef Kokkos::View< int ** , Kokkos::LayoutLeft , host > view_left_2 ;
typedef Kokkos::View< int ** , Kokkos::LayoutRight , host > view_right_2 ;
typedef Kokkos::View< int * , Kokkos::LayoutStride , host > view_stride_1 ;
typedef Kokkos::View< int ** , Kokkos::LayoutStride , host > view_stride_2 ;
view_left_2 xl2("xl2", 100 , 200 );
view_right_2 xr2("xr2", 100 , 200 );
view_stride_1 yl1 = Kokkos::subview( xl2 , 0 , Kokkos::ALL() );
view_stride_1 yl2 = Kokkos::subview( xl2 , 1 , Kokkos::ALL() );
view_stride_1 yr1 = Kokkos::subview( xr2 , 0 , Kokkos::ALL() );
view_stride_1 yr2 = Kokkos::subview( xr2 , 1 , Kokkos::ALL() );
ASSERT_EQ( yl1.dimension_0() , xl2.dimension_1() );
ASSERT_EQ( yl2.dimension_0() , xl2.dimension_1() );
ASSERT_EQ( yr1.dimension_0() , xr2.dimension_1() );
ASSERT_EQ( yr2.dimension_0() , xr2.dimension_1() );
ASSERT_EQ( & yl1(0) - & xl2(0,0) , 0 );
ASSERT_EQ( & yl2(0) - & xl2(1,0) , 0 );
ASSERT_EQ( & yr1(0) - & xr2(0,0) , 0 );
ASSERT_EQ( & yr2(0) - & xr2(1,0) , 0 );
view_left_4 xl4( "xl4" , 10 , 20 , 30 , 40 );
view_right_4 xr4( "xr4" , 10 , 20 , 30 , 40 );
view_stride_2 yl4 = Kokkos::subview( xl4 , 1 , Kokkos::ALL() , 2 , Kokkos::ALL() );
view_stride_2 yr4 = Kokkos::subview( xr4 , 1 , Kokkos::ALL() , 2 , Kokkos::ALL() );
ASSERT_EQ( yl4.dimension_0() , xl4.dimension_1() );
ASSERT_EQ( yl4.dimension_1() , xl4.dimension_3() );
ASSERT_EQ( yr4.dimension_0() , xr4.dimension_1() );
ASSERT_EQ( yr4.dimension_1() , xr4.dimension_3() );
ASSERT_EQ( & yl4(4,4) - & xl4(1,4,2,4) , 0 );
ASSERT_EQ( & yr4(4,4) - & xr4(1,4,2,4) , 0 );
}
static void run_test_vector()
{
static const unsigned Length = 1000 , Count = 8 ;
typedef Kokkos::View< T* , Kokkos::LayoutLeft , host > vector_type ;
typedef Kokkos::View< T** , Kokkos::LayoutLeft , host > multivector_type ;
typedef Kokkos::View< T* , Kokkos::LayoutRight , host > vector_right_type ;
typedef Kokkos::View< T** , Kokkos::LayoutRight , host > multivector_right_type ;
typedef Kokkos::View< const T* , Kokkos::LayoutRight, host > const_vector_right_type ;
typedef Kokkos::View< const T* , Kokkos::LayoutLeft , host > const_vector_type ;
typedef Kokkos::View< const T** , Kokkos::LayoutLeft , host > const_multivector_type ;
multivector_type mv = multivector_type( "mv" , Length , Count );
multivector_right_type mv_right = multivector_right_type( "mv" , Length , Count );
vector_type v1 = Kokkos::subview( mv , Kokkos::ALL() , 0 );
vector_type v2 = Kokkos::subview( mv , Kokkos::ALL() , 1 );
vector_type v3 = Kokkos::subview( mv , Kokkos::ALL() , 2 );
vector_type rv1 = Kokkos::subview( mv_right , 0 , Kokkos::ALL() );
vector_type rv2 = Kokkos::subview( mv_right , 1 , Kokkos::ALL() );
vector_type rv3 = Kokkos::subview( mv_right , 2 , Kokkos::ALL() );
multivector_type mv1 = Kokkos::subview( mv , std::make_pair( 1 , 998 ) ,
std::make_pair( 2 , 5 ) );
multivector_right_type mvr1 =
Kokkos::subview( mv_right ,
std::make_pair( 1 , 998 ) ,
std::make_pair( 2 , 5 ) );
const_vector_type cv1 = Kokkos::subview( mv , Kokkos::ALL(), 0 );
const_vector_type cv2 = Kokkos::subview( mv , Kokkos::ALL(), 1 );
const_vector_type cv3 = Kokkos::subview( mv , Kokkos::ALL(), 2 );
vector_right_type vr1 = Kokkos::subview( mv , Kokkos::ALL() , 0 );
vector_right_type vr2 = Kokkos::subview( mv , Kokkos::ALL() , 1 );
vector_right_type vr3 = Kokkos::subview( mv , Kokkos::ALL() , 2 );
const_vector_right_type cvr1 = Kokkos::subview( mv , Kokkos::ALL() , 0 );
const_vector_right_type cvr2 = Kokkos::subview( mv , Kokkos::ALL() , 1 );
const_vector_right_type cvr3 = Kokkos::subview( mv , Kokkos::ALL() , 2 );
ASSERT_TRUE( & v1[0] == & v1(0) );
ASSERT_TRUE( & v1[0] == & mv(0,0) );
ASSERT_TRUE( & v2[0] == & mv(0,1) );
ASSERT_TRUE( & v3[0] == & mv(0,2) );
ASSERT_TRUE( & cv1[0] == & mv(0,0) );
ASSERT_TRUE( & cv2[0] == & mv(0,1) );
ASSERT_TRUE( & cv3[0] == & mv(0,2) );
ASSERT_TRUE( & vr1[0] == & mv(0,0) );
ASSERT_TRUE( & vr2[0] == & mv(0,1) );
ASSERT_TRUE( & vr3[0] == & mv(0,2) );
ASSERT_TRUE( & cvr1[0] == & mv(0,0) );
ASSERT_TRUE( & cvr2[0] == & mv(0,1) );
ASSERT_TRUE( & cvr3[0] == & mv(0,2) );
ASSERT_TRUE( & mv1(0,0) == & mv( 1 , 2 ) );
ASSERT_TRUE( & mv1(1,1) == & mv( 2 , 3 ) );
ASSERT_TRUE( & mv1(3,2) == & mv( 4 , 4 ) );
ASSERT_TRUE( & mvr1(0,0) == & mv_right( 1 , 2 ) );
ASSERT_TRUE( & mvr1(1,1) == & mv_right( 2 , 3 ) );
ASSERT_TRUE( & mvr1(3,2) == & mv_right( 4 , 4 ) );
const_vector_type c_cv1( v1 );
typename vector_type::const_type c_cv2( v2 );
typename const_vector_type::const_type c_ccv2( v2 );
const_multivector_type cmv( mv );
typename multivector_type::const_type cmvX( cmv );
typename const_multivector_type::const_type ccmvX( cmv );
}
};
} // namespace Test
#endif
/*--------------------------------------------------------------------------*/
diff --git a/lib/kokkos/core/unit_test/TestViewMapping.hpp b/lib/kokkos/core/unit_test/TestViewMapping.hpp
index e38098410..a184b70e6 100644
--- a/lib/kokkos/core/unit_test/TestViewMapping.hpp
+++ b/lib/kokkos/core/unit_test/TestViewMapping.hpp
@@ -1,1080 +1,1256 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <gtest/gtest.h>
#include <stdexcept>
#include <sstream>
#include <iostream>
#include <Kokkos_Core.hpp>
/*--------------------------------------------------------------------------*/
namespace Test {
-template< class RangeType >
-void test_view_range( const size_t N , const RangeType & range , const size_t begin , const size_t dim )
-{
- typedef Kokkos::Experimental::Impl::ViewOffsetRange< RangeType > query ;
-
- ASSERT_EQ( query::begin( range ) , begin );
- ASSERT_EQ( query::dimension( N , range ) , dim );
- ASSERT_EQ( query::is_range , dim != 0 );
-}
-
-
template< class ExecSpace >
void test_view_mapping()
{
typedef Kokkos::Experimental::Impl::ViewDimension<> dim_0 ;
typedef Kokkos::Experimental::Impl::ViewDimension<2> dim_s2 ;
typedef Kokkos::Experimental::Impl::ViewDimension<2,3> dim_s2_s3 ;
typedef Kokkos::Experimental::Impl::ViewDimension<2,3,4> dim_s2_s3_s4 ;
typedef Kokkos::Experimental::Impl::ViewDimension<0> dim_s0 ;
typedef Kokkos::Experimental::Impl::ViewDimension<0,3> dim_s0_s3 ;
typedef Kokkos::Experimental::Impl::ViewDimension<0,3,4> dim_s0_s3_s4 ;
typedef Kokkos::Experimental::Impl::ViewDimension<0,0> dim_s0_s0 ;
typedef Kokkos::Experimental::Impl::ViewDimension<0,0,4> dim_s0_s0_s4 ;
typedef Kokkos::Experimental::Impl::ViewDimension<0,0,0> dim_s0_s0_s0 ;
typedef Kokkos::Experimental::Impl::ViewDimension<0,0,0,0> dim_s0_s0_s0_s0 ;
typedef Kokkos::Experimental::Impl::ViewDimension<0,0,0,0,0> dim_s0_s0_s0_s0_s0 ;
typedef Kokkos::Experimental::Impl::ViewDimension<0,0,0,0,0,0> dim_s0_s0_s0_s0_s0_s0 ;
typedef Kokkos::Experimental::Impl::ViewDimension<0,0,0,0,0,0,0> dim_s0_s0_s0_s0_s0_s0_s0 ;
typedef Kokkos::Experimental::Impl::ViewDimension<0,0,0,0,0,0,0,0> dim_s0_s0_s0_s0_s0_s0_s0_s0 ;
// Fully static dimensions should not be larger than an int
ASSERT_LE( sizeof(dim_0) , sizeof(int) );
ASSERT_LE( sizeof(dim_s2) , sizeof(int) );
ASSERT_LE( sizeof(dim_s2_s3) , sizeof(int) );
ASSERT_LE( sizeof(dim_s2_s3_s4) , sizeof(int) );
// Rank 1 is size_t
ASSERT_EQ( sizeof(dim_s0) , sizeof(size_t) );
ASSERT_EQ( sizeof(dim_s0_s3) , sizeof(size_t) );
ASSERT_EQ( sizeof(dim_s0_s3_s4) , sizeof(size_t) );
// Allow for padding
ASSERT_LE( sizeof(dim_s0_s0) , 2 * sizeof(size_t) );
ASSERT_LE( sizeof(dim_s0_s0_s4) , 2 * sizeof(size_t) );
ASSERT_LE( sizeof(dim_s0_s0_s0) , 4 * sizeof(size_t) );
ASSERT_EQ( sizeof(dim_s0_s0_s0_s0) , 4 * sizeof(unsigned) );
ASSERT_LE( sizeof(dim_s0_s0_s0_s0_s0) , 6 * sizeof(unsigned) );
ASSERT_EQ( sizeof(dim_s0_s0_s0_s0_s0_s0) , 6 * sizeof(unsigned) );
ASSERT_LE( sizeof(dim_s0_s0_s0_s0_s0_s0_s0) , 8 * sizeof(unsigned) );
ASSERT_EQ( sizeof(dim_s0_s0_s0_s0_s0_s0_s0_s0) , 8 * sizeof(unsigned) );
ASSERT_EQ( int(dim_0::rank) , int(0) );
ASSERT_EQ( int(dim_0::rank_dynamic) , int(0) );
ASSERT_EQ( int(dim_s2::rank) , int(1) );
ASSERT_EQ( int(dim_s2::rank_dynamic) , int(0) );
ASSERT_EQ( int(dim_s2_s3::rank) , int(2) );
ASSERT_EQ( int(dim_s2_s3::rank_dynamic) , int(0) );
ASSERT_EQ( int(dim_s2_s3_s4::rank) , int(3) );
ASSERT_EQ( int(dim_s2_s3_s4::rank_dynamic) , int(0) );
ASSERT_EQ( int(dim_s0::rank) , int(1) );
ASSERT_EQ( int(dim_s0::rank_dynamic) , int(1) );
ASSERT_EQ( int(dim_s0_s3::rank) , int(2) );
ASSERT_EQ( int(dim_s0_s3::rank_dynamic) , int(1) );
ASSERT_EQ( int(dim_s0_s3_s4::rank) , int(3) );
ASSERT_EQ( int(dim_s0_s3_s4::rank_dynamic) , int(1) );
ASSERT_EQ( int(dim_s0_s0_s4::rank) , int(3) );
ASSERT_EQ( int(dim_s0_s0_s4::rank_dynamic) , int(2) );
ASSERT_EQ( int(dim_s0_s0_s0::rank) , int(3) );
ASSERT_EQ( int(dim_s0_s0_s0::rank_dynamic) , int(3) );
ASSERT_EQ( int(dim_s0_s0_s0_s0::rank) , int(4) );
ASSERT_EQ( int(dim_s0_s0_s0_s0::rank_dynamic) , int(4) );
ASSERT_EQ( int(dim_s0_s0_s0_s0_s0::rank) , int(5) );
ASSERT_EQ( int(dim_s0_s0_s0_s0_s0::rank_dynamic) , int(5) );
ASSERT_EQ( int(dim_s0_s0_s0_s0_s0_s0::rank) , int(6) );
ASSERT_EQ( int(dim_s0_s0_s0_s0_s0_s0::rank_dynamic) , int(6) );
ASSERT_EQ( int(dim_s0_s0_s0_s0_s0_s0_s0::rank) , int(7) );
ASSERT_EQ( int(dim_s0_s0_s0_s0_s0_s0_s0::rank_dynamic) , int(7) );
ASSERT_EQ( int(dim_s0_s0_s0_s0_s0_s0_s0_s0::rank) , int(8) );
ASSERT_EQ( int(dim_s0_s0_s0_s0_s0_s0_s0_s0::rank_dynamic) , int(8) );
dim_s0 d1( 2, 3, 4, 5, 6, 7, 8, 9 );
dim_s0_s0 d2( 2, 3, 4, 5, 6, 7, 8, 9 );
dim_s0_s0_s0 d3( 2, 3, 4, 5, 6, 7, 8, 9 );
dim_s0_s0_s0_s0 d4( 2, 3, 4, 5, 6, 7, 8, 9 );
ASSERT_EQ( d1.N0 , 2 );
ASSERT_EQ( d2.N0 , 2 );
ASSERT_EQ( d3.N0 , 2 );
ASSERT_EQ( d4.N0 , 2 );
ASSERT_EQ( d1.N1 , 1 );
ASSERT_EQ( d2.N1 , 3 );
ASSERT_EQ( d3.N1 , 3 );
ASSERT_EQ( d4.N1 , 3 );
ASSERT_EQ( d1.N2 , 1 );
ASSERT_EQ( d2.N2 , 1 );
ASSERT_EQ( d3.N2 , 4 );
ASSERT_EQ( d4.N2 , 4 );
ASSERT_EQ( d1.N3 , 1 );
ASSERT_EQ( d2.N3 , 1 );
ASSERT_EQ( d3.N3 , 1 );
ASSERT_EQ( d4.N3 , 5 );
//----------------------------------------
typedef Kokkos::Experimental::Impl::ViewOffset< dim_s0_s0_s0 , Kokkos::LayoutStride > stride_s0_s0_s0 ;
//----------------------------------------
// Static dimension
{
typedef Kokkos::Experimental::Impl::ViewOffset< dim_s2_s3_s4 , Kokkos::LayoutLeft > left_s2_s3_s4 ;
ASSERT_EQ( sizeof(left_s2_s3_s4) , sizeof(dim_s2_s3_s4) );
left_s2_s3_s4 off3 ;
stride_s0_s0_s0 stride3( off3 );
ASSERT_EQ( off3.stride_0() , 1 );
ASSERT_EQ( off3.stride_1() , 2 );
ASSERT_EQ( off3.stride_2() , 6 );
ASSERT_EQ( off3.span() , 24 );
ASSERT_EQ( off3.stride_0() , stride3.stride_0() );
ASSERT_EQ( off3.stride_1() , stride3.stride_1() );
ASSERT_EQ( off3.stride_2() , stride3.stride_2() );
ASSERT_EQ( off3.span() , stride3.span() );
int offset = 0 ;
for ( int k = 0 ; k < 4 ; ++k ){
for ( int j = 0 ; j < 3 ; ++j ){
for ( int i = 0 ; i < 2 ; ++i , ++offset ){
ASSERT_EQ( off3(i,j,k) , offset );
ASSERT_EQ( stride3(i,j,k) , off3(i,j,k) );
}}}
}
//----------------------------------------
// Small dimension is unpadded
{
typedef Kokkos::Experimental::Impl::ViewOffset< dim_s0_s0_s4 , Kokkos::LayoutLeft > left_s0_s0_s4 ;
left_s0_s0_s4 dyn_off3( std::integral_constant<unsigned,sizeof(int)>(), 2, 3, 0, 0, 0, 0, 0, 0 );
stride_s0_s0_s0 stride3( dyn_off3 );
ASSERT_EQ( dyn_off3.m_dim.rank , 3 );
ASSERT_EQ( dyn_off3.m_dim.N0 , 2 );
ASSERT_EQ( dyn_off3.m_dim.N1 , 3 );
ASSERT_EQ( dyn_off3.m_dim.N2 , 4 );
ASSERT_EQ( dyn_off3.m_dim.N3 , 1 );
ASSERT_EQ( dyn_off3.size() , 2 * 3 * 4 );
ASSERT_EQ( stride3.m_dim.rank , 3 );
ASSERT_EQ( stride3.m_dim.N0 , 2 );
ASSERT_EQ( stride3.m_dim.N1 , 3 );
ASSERT_EQ( stride3.m_dim.N2 , 4 );
ASSERT_EQ( stride3.m_dim.N3 , 1 );
ASSERT_EQ( stride3.size() , 2 * 3 * 4 );
int offset = 0 ;
for ( int k = 0 ; k < 4 ; ++k ){
for ( int j = 0 ; j < 3 ; ++j ){
for ( int i = 0 ; i < 2 ; ++i , ++offset ){
ASSERT_EQ( offset , dyn_off3(i,j,k) );
ASSERT_EQ( stride3(i,j,k) , dyn_off3(i,j,k) );
}}}
ASSERT_EQ( dyn_off3.span() , offset );
ASSERT_EQ( stride3.span() , dyn_off3.span() );
}
// Large dimension is likely padded
{
constexpr int N0 = 2000 ;
constexpr int N1 = 300 ;
typedef Kokkos::Experimental::Impl::ViewOffset< dim_s0_s0_s4 , Kokkos::LayoutLeft > left_s0_s0_s4 ;
left_s0_s0_s4 dyn_off3( std::integral_constant<unsigned,sizeof(int)>(), N0, N1, 0, 0, 0, 0, 0, 0 );
stride_s0_s0_s0 stride3( dyn_off3 );
ASSERT_EQ( dyn_off3.m_dim.rank , 3 );
ASSERT_EQ( dyn_off3.m_dim.N0 , N0 );
ASSERT_EQ( dyn_off3.m_dim.N1 , N1 );
ASSERT_EQ( dyn_off3.m_dim.N2 , 4 );
ASSERT_EQ( dyn_off3.m_dim.N3 , 1 );
ASSERT_EQ( dyn_off3.size() , N0 * N1 * 4 );
ASSERT_EQ( stride3.m_dim.rank , 3 );
ASSERT_EQ( stride3.m_dim.N0 , N0 );
ASSERT_EQ( stride3.m_dim.N1 , N1 );
ASSERT_EQ( stride3.m_dim.N2 , 4 );
ASSERT_EQ( stride3.m_dim.N3 , 1 );
ASSERT_EQ( stride3.size() , N0 * N1 * 4 );
ASSERT_EQ( stride3.span() , dyn_off3.span() );
int offset = 0 ;
for ( int k = 0 ; k < 4 ; ++k ){
for ( int j = 0 ; j < N1 ; ++j ){
for ( int i = 0 ; i < N0 ; ++i ){
ASSERT_LE( offset , dyn_off3(i,j,k) );
ASSERT_EQ( stride3(i,j,k) , dyn_off3(i,j,k) );
offset = dyn_off3(i,j,k) + 1 ;
}}}
ASSERT_LE( offset , dyn_off3.span() );
}
//----------------------------------------
// Static dimension
{
typedef Kokkos::Experimental::Impl::ViewOffset< dim_s2_s3_s4 , Kokkos::LayoutRight > right_s2_s3_s4 ;
ASSERT_EQ( sizeof(right_s2_s3_s4) , sizeof(dim_s2_s3_s4) );
right_s2_s3_s4 off3 ;
stride_s0_s0_s0 stride3( off3 );
ASSERT_EQ( off3.stride_0() , 12 );
ASSERT_EQ( off3.stride_1() , 4 );
ASSERT_EQ( off3.stride_2() , 1 );
ASSERT_EQ( off3.dimension_0() , stride3.dimension_0() );
ASSERT_EQ( off3.dimension_1() , stride3.dimension_1() );
ASSERT_EQ( off3.dimension_2() , stride3.dimension_2() );
ASSERT_EQ( off3.stride_0() , stride3.stride_0() );
ASSERT_EQ( off3.stride_1() , stride3.stride_1() );
ASSERT_EQ( off3.stride_2() , stride3.stride_2() );
ASSERT_EQ( off3.span() , stride3.span() );
int offset = 0 ;
for ( int i = 0 ; i < 2 ; ++i ){
for ( int j = 0 ; j < 3 ; ++j ){
for ( int k = 0 ; k < 4 ; ++k , ++offset ){
ASSERT_EQ( off3(i,j,k) , offset );
ASSERT_EQ( off3(i,j,k) , stride3(i,j,k) );
}}}
ASSERT_EQ( off3.span() , offset );
}
//----------------------------------------
// Small dimension is unpadded
{
typedef Kokkos::Experimental::Impl::ViewOffset< dim_s0_s0_s4 , Kokkos::LayoutRight > right_s0_s0_s4 ;
right_s0_s0_s4 dyn_off3( std::integral_constant<unsigned,sizeof(int)>(), 2, 3, 0, 0, 0, 0, 0, 0 );
stride_s0_s0_s0 stride3( dyn_off3 );
ASSERT_EQ( dyn_off3.m_dim.rank , 3 );
ASSERT_EQ( dyn_off3.m_dim.N0 , 2 );
ASSERT_EQ( dyn_off3.m_dim.N1 , 3 );
ASSERT_EQ( dyn_off3.m_dim.N2 , 4 );
ASSERT_EQ( dyn_off3.m_dim.N3 , 1 );
ASSERT_EQ( dyn_off3.size() , 2 * 3 * 4 );
ASSERT_EQ( dyn_off3.dimension_0() , stride3.dimension_0() );
ASSERT_EQ( dyn_off3.dimension_1() , stride3.dimension_1() );
ASSERT_EQ( dyn_off3.dimension_2() , stride3.dimension_2() );
ASSERT_EQ( dyn_off3.stride_0() , stride3.stride_0() );
ASSERT_EQ( dyn_off3.stride_1() , stride3.stride_1() );
ASSERT_EQ( dyn_off3.stride_2() , stride3.stride_2() );
ASSERT_EQ( dyn_off3.span() , stride3.span() );
int offset = 0 ;
for ( int i = 0 ; i < 2 ; ++i ){
for ( int j = 0 ; j < 3 ; ++j ){
for ( int k = 0 ; k < 4 ; ++k , ++offset ){
ASSERT_EQ( offset , dyn_off3(i,j,k) );
ASSERT_EQ( dyn_off3(i,j,k) , stride3(i,j,k) );
}}}
ASSERT_EQ( dyn_off3.span() , offset );
}
// Large dimension is likely padded
{
constexpr int N0 = 2000 ;
constexpr int N1 = 300 ;
typedef Kokkos::Experimental::Impl::ViewOffset< dim_s0_s0_s4 , Kokkos::LayoutRight > right_s0_s0_s4 ;
right_s0_s0_s4 dyn_off3( std::integral_constant<unsigned,sizeof(int)>(), N0, N1, 0, 0, 0, 0, 0, 0 );
stride_s0_s0_s0 stride3( dyn_off3 );
ASSERT_EQ( dyn_off3.m_dim.rank , 3 );
ASSERT_EQ( dyn_off3.m_dim.N0 , N0 );
ASSERT_EQ( dyn_off3.m_dim.N1 , N1 );
ASSERT_EQ( dyn_off3.m_dim.N2 , 4 );
ASSERT_EQ( dyn_off3.m_dim.N3 , 1 );
ASSERT_EQ( dyn_off3.size() , N0 * N1 * 4 );
ASSERT_EQ( dyn_off3.dimension_0() , stride3.dimension_0() );
ASSERT_EQ( dyn_off3.dimension_1() , stride3.dimension_1() );
ASSERT_EQ( dyn_off3.dimension_2() , stride3.dimension_2() );
ASSERT_EQ( dyn_off3.stride_0() , stride3.stride_0() );
ASSERT_EQ( dyn_off3.stride_1() , stride3.stride_1() );
ASSERT_EQ( dyn_off3.stride_2() , stride3.stride_2() );
ASSERT_EQ( dyn_off3.span() , stride3.span() );
int offset = 0 ;
for ( int i = 0 ; i < N0 ; ++i ){
for ( int j = 0 ; j < N1 ; ++j ){
for ( int k = 0 ; k < 4 ; ++k ){
ASSERT_LE( offset , dyn_off3(i,j,k) );
ASSERT_EQ( dyn_off3(i,j,k) , stride3(i,j,k) );
offset = dyn_off3(i,j,k) + 1 ;
}}}
ASSERT_LE( offset , dyn_off3.span() );
}
//----------------------------------------
// Subview
+ {
+ // Mapping rank 4 to rank 3
+ typedef Kokkos::Experimental::Impl::SubviewExtents<4,3> SubviewExtents ;
+
+ constexpr int N0 = 1000 ;
+ constexpr int N1 = 2000 ;
+ constexpr int N2 = 3000 ;
+ constexpr int N3 = 4000 ;
+
+ Kokkos::Experimental::Impl::ViewDimension<N0,N1,N2,N3> dim ;
+
+ SubviewExtents tmp( dim
+ , N0 / 2
+ , Kokkos::Experimental::ALL
+ , std::pair<int,int>( N2 / 4 , 10 + N2 / 4 )
+ , Kokkos::pair<int,int>( N3 / 4 , 20 + N3 / 4 )
+ );
+
+ ASSERT_EQ( tmp.domain_offset(0) , N0 / 2 );
+ ASSERT_EQ( tmp.domain_offset(1) , 0 );
+ ASSERT_EQ( tmp.domain_offset(2) , N2 / 4 );
+ ASSERT_EQ( tmp.domain_offset(3) , N3 / 4 );
+
+ ASSERT_EQ( tmp.range_index(0) , 1 );
+ ASSERT_EQ( tmp.range_index(1) , 2 );
+ ASSERT_EQ( tmp.range_index(2) , 3 );
+
+ ASSERT_EQ( tmp.range_extent(0) , N1 );
+ ASSERT_EQ( tmp.range_extent(1) , 10 );
+ ASSERT_EQ( tmp.range_extent(2) , 20 );
+ }
+ //----------------------------------------
{
constexpr int N0 = 2000 ;
constexpr int N1 = 300 ;
constexpr int sub_N0 = 1000 ;
constexpr int sub_N1 = 200 ;
constexpr int sub_N2 = 4 ;
typedef Kokkos::Experimental::Impl::ViewOffset< dim_s0_s0_s4 , Kokkos::LayoutLeft > left_s0_s0_s4 ;
left_s0_s0_s4 dyn_off3( std::integral_constant<unsigned,sizeof(int)>(), N0, N1, 0, 0, 0, 0, 0, 0 );
- stride_s0_s0_s0 stride3( dyn_off3 , sub_N0 , sub_N1 , sub_N2 , 0 , 0 , 0 , 0 , 0 );
+ Kokkos::Experimental::Impl::SubviewExtents< 3 , 3 >
+ sub( dyn_off3.m_dim
+ , Kokkos::pair<int,int>(0,sub_N0)
+ , Kokkos::pair<int,int>(0,sub_N1)
+ , Kokkos::pair<int,int>(0,sub_N2)
+ );
+
+ stride_s0_s0_s0 stride3( dyn_off3 , sub );
ASSERT_EQ( stride3.dimension_0() , sub_N0 );
ASSERT_EQ( stride3.dimension_1() , sub_N1 );
ASSERT_EQ( stride3.dimension_2() , sub_N2 );
ASSERT_EQ( stride3.size() , sub_N0 * sub_N1 * sub_N2 );
ASSERT_EQ( dyn_off3.stride_0() , stride3.stride_0() );
ASSERT_EQ( dyn_off3.stride_1() , stride3.stride_1() );
ASSERT_EQ( dyn_off3.stride_2() , stride3.stride_2() );
ASSERT_GE( dyn_off3.span() , stride3.span() );
for ( int k = 0 ; k < sub_N2 ; ++k ){
for ( int j = 0 ; j < sub_N1 ; ++j ){
for ( int i = 0 ; i < sub_N0 ; ++i ){
ASSERT_EQ( stride3(i,j,k) , dyn_off3(i,j,k) );
}}}
}
{
constexpr int N0 = 2000 ;
constexpr int N1 = 300 ;
constexpr int sub_N0 = 1000 ;
constexpr int sub_N1 = 200 ;
constexpr int sub_N2 = 4 ;
typedef Kokkos::Experimental::Impl::ViewOffset< dim_s0_s0_s4 , Kokkos::LayoutRight > right_s0_s0_s4 ;
right_s0_s0_s4 dyn_off3( std::integral_constant<unsigned,sizeof(int)>(), N0, N1, 0, 0, 0, 0, 0, 0 );
- stride_s0_s0_s0 stride3( dyn_off3 , sub_N0 , sub_N1 , sub_N2 , 0 , 0 , 0 , 0 , 0 );
+ Kokkos::Experimental::Impl::SubviewExtents< 3 , 3 >
+ sub( dyn_off3.m_dim
+ , Kokkos::pair<int,int>(0,sub_N0)
+ , Kokkos::pair<int,int>(0,sub_N1)
+ , Kokkos::pair<int,int>(0,sub_N2)
+ );
+
+ stride_s0_s0_s0 stride3( dyn_off3 , sub );
ASSERT_EQ( stride3.dimension_0() , sub_N0 );
ASSERT_EQ( stride3.dimension_1() , sub_N1 );
ASSERT_EQ( stride3.dimension_2() , sub_N2 );
ASSERT_EQ( stride3.size() , sub_N0 * sub_N1 * sub_N2 );
ASSERT_EQ( dyn_off3.stride_0() , stride3.stride_0() );
ASSERT_EQ( dyn_off3.stride_1() , stride3.stride_1() );
ASSERT_EQ( dyn_off3.stride_2() , stride3.stride_2() );
ASSERT_GE( dyn_off3.span() , stride3.span() );
for ( int i = 0 ; i < sub_N0 ; ++i ){
for ( int j = 0 ; j < sub_N1 ; ++j ){
for ( int k = 0 ; k < sub_N2 ; ++k ){
ASSERT_EQ( stride3(i,j,k) , dyn_off3(i,j,k) );
}}}
}
//----------------------------------------
+ // view data analysis
{
- constexpr int N = 1000 ;
-
- test_view_range( N , N / 2 , N / 2 , 0 );
- test_view_range( N , Kokkos::Experimental::ALL , 0 , N );
- test_view_range( N , std::pair<int,int>( N / 4 , 10 + N / 4 ) , N / 4 , 10 );
- test_view_range( N , Kokkos::pair<int,int>( N / 4 , 10 + N / 4 ) , N / 4 , 10 );
+ using namespace Kokkos::Experimental::Impl ;
+ static_assert( rank_dynamic<>::value == 0 , "" );
+ static_assert( rank_dynamic<1>::value == 0 , "" );
+ static_assert( rank_dynamic<0>::value == 1 , "" );
+ static_assert( rank_dynamic<0,1>::value == 1 , "" );
+ static_assert( rank_dynamic<0,0,1>::value == 2 , "" );
}
- //----------------------------------------
- // view data analysis
{
using namespace Kokkos::Experimental::Impl ;
typedef ViewArrayAnalysis< int[] > a_int_r1 ;
typedef ViewArrayAnalysis< int**[4][5][6] > a_int_r5 ;
typedef ViewArrayAnalysis< const int[] > a_const_int_r1 ;
typedef ViewArrayAnalysis< const int**[4][5][6] > a_const_int_r5 ;
static_assert( a_int_r1::dimension::rank == 1 , "" );
static_assert( a_int_r1::dimension::rank_dynamic == 1 , "" );
static_assert( std::is_same< typename a_int_r1::dimension , ViewDimension<0> >::value , "" );
static_assert( std::is_same< typename a_int_r1::non_const_value_type , int >::value , "" );
static_assert( a_const_int_r1::dimension::rank == 1 , "" );
static_assert( a_const_int_r1::dimension::rank_dynamic == 1 , "" );
static_assert( std::is_same< typename a_const_int_r1::dimension , ViewDimension<0> >::value , "" );
static_assert( std::is_same< typename a_const_int_r1::non_const_value_type , int >::value , "" );
static_assert( a_const_int_r5::dimension::rank == 5 , "" );
static_assert( a_const_int_r5::dimension::rank_dynamic == 2 , "" );
+
static_assert( std::is_same< typename a_const_int_r5::dimension , ViewDimension<0,0,4,5,6> >::value , "" );
+
static_assert( std::is_same< typename a_const_int_r5::non_const_value_type , int >::value , "" );
static_assert( a_int_r5::dimension::rank == 5 , "" );
static_assert( a_int_r5::dimension::rank_dynamic == 2 , "" );
static_assert( std::is_same< typename a_int_r5::dimension , ViewDimension<0,0,4,5,6> >::value , "" );
static_assert( std::is_same< typename a_int_r5::non_const_value_type , int >::value , "" );
}
{
using namespace Kokkos::Experimental::Impl ;
typedef int t_i4[4] ;
// Dimensions of t_i4 are appended to the multdimensional array.
typedef ViewArrayAnalysis< t_i4 ***[3] > a_int_r5 ;
static_assert( a_int_r5::dimension::rank == 5 , "" );
static_assert( a_int_r5::dimension::rank_dynamic == 3 , "" );
- static_assert( a_int_r5::dimension::arg_N0 == 0 , "" );
- static_assert( a_int_r5::dimension::arg_N1 == 0 , "" );
- static_assert( a_int_r5::dimension::arg_N2 == 0 , "" );
- static_assert( a_int_r5::dimension::arg_N3 == 3 , "" );
- static_assert( a_int_r5::dimension::arg_N4 == 4 , "" );
+ static_assert( a_int_r5::dimension::ArgN0 == 0 , "" );
+ static_assert( a_int_r5::dimension::ArgN1 == 0 , "" );
+ static_assert( a_int_r5::dimension::ArgN2 == 0 , "" );
+ static_assert( a_int_r5::dimension::ArgN3 == 3 , "" );
+ static_assert( a_int_r5::dimension::ArgN4 == 4 , "" );
static_assert( std::is_same< typename a_int_r5::non_const_value_type , int >::value , "" );
}
{
using namespace Kokkos::Experimental::Impl ;
- typedef ViewDataAnalysis< const int[] , typename ViewArrayAnalysis<const int[]>::non_const_value_type , void > a_const_int_r1 ;
+ typedef ViewDataAnalysis< const int[] , void > a_const_int_r1 ;
static_assert( std::is_same< typename a_const_int_r1::specialize , void >::value , "" );
static_assert( std::is_same< typename a_const_int_r1::dimension , Kokkos::Experimental::Impl::ViewDimension<0> >::value , "" );
static_assert( std::is_same< typename a_const_int_r1::type , const int * >::value , "" );
static_assert( std::is_same< typename a_const_int_r1::value_type , const int >::value , "" );
static_assert( std::is_same< typename a_const_int_r1::array_scalar_type , const int * >::value , "" );
static_assert( std::is_same< typename a_const_int_r1::const_type , const int * >::value , "" );
static_assert( std::is_same< typename a_const_int_r1::const_value_type , const int >::value , "" );
static_assert( std::is_same< typename a_const_int_r1::const_array_scalar_type , const int * >::value , "" );
static_assert( std::is_same< typename a_const_int_r1::non_const_type , int * >::value , "" );
static_assert( std::is_same< typename a_const_int_r1::non_const_value_type , int >::value , "" );
- typedef ViewDataAnalysis< const int**[4] , typename ViewArrayAnalysis< const int **[4] >::non_const_value_type , void > a_const_int_r3 ;
+ typedef ViewDataAnalysis< const int**[4] , void > a_const_int_r3 ;
static_assert( std::is_same< typename a_const_int_r3::specialize , void >::value , "" );
+
static_assert( std::is_same< typename a_const_int_r3::dimension , Kokkos::Experimental::Impl::ViewDimension<0,0,4> >::value , "" );
+
static_assert( std::is_same< typename a_const_int_r3::type , const int**[4] >::value , "" );
static_assert( std::is_same< typename a_const_int_r3::value_type , const int >::value , "" );
static_assert( std::is_same< typename a_const_int_r3::array_scalar_type , const int**[4] >::value , "" );
static_assert( std::is_same< typename a_const_int_r3::const_type , const int**[4] >::value , "" );
static_assert( std::is_same< typename a_const_int_r3::const_value_type , const int >::value , "" );
static_assert( std::is_same< typename a_const_int_r3::const_array_scalar_type , const int**[4] >::value , "" );
static_assert( std::is_same< typename a_const_int_r3::non_const_type , int**[4] >::value , "" );
static_assert( std::is_same< typename a_const_int_r3::non_const_value_type , int >::value , "" );
static_assert( std::is_same< typename a_const_int_r3::non_const_array_scalar_type , int**[4] >::value , "" );
// std::cout << "typeid(const int**[4]).name() = " << typeid(const int**[4]).name() << std::endl ;
}
//----------------------------------------
{
constexpr int N = 10 ;
typedef Kokkos::Experimental::View<int*,ExecSpace> T ;
typedef Kokkos::Experimental::View<const int*,ExecSpace> C ;
int data[N] ;
- T vr1(data,N);
- C cr1(vr1);
+ T vr1(data,N); // view of non-const
+ C cr1(vr1); // view of const from view of non-const
+ C cr2( (const int *) data , N );
// Generate static_assert error:
// T tmp( cr1 );
ASSERT_EQ( vr1.span() , N );
ASSERT_EQ( cr1.span() , N );
ASSERT_EQ( vr1.data() , & data[0] );
ASSERT_EQ( cr1.data() , & data[0] );
ASSERT_TRUE( ( std::is_same< typename T::data_type , int* >::value ) );
ASSERT_TRUE( ( std::is_same< typename T::const_data_type , const int* >::value ) );
ASSERT_TRUE( ( std::is_same< typename T::non_const_data_type , int* >::value ) );
ASSERT_TRUE( ( std::is_same< typename T::array_scalar_type , int* >::value ) );
ASSERT_TRUE( ( std::is_same< typename T::const_array_scalar_type , const int* >::value ) );
ASSERT_TRUE( ( std::is_same< typename T::non_const_array_scalar_type , int* >::value ) );
ASSERT_TRUE( ( std::is_same< typename T::value_type , int >::value ) );
ASSERT_TRUE( ( std::is_same< typename T::const_value_type , const int >::value ) );
ASSERT_TRUE( ( std::is_same< typename T::non_const_value_type , int >::value ) );
ASSERT_TRUE( ( std::is_same< typename T::memory_space , typename ExecSpace::memory_space >::value ) );
ASSERT_TRUE( ( std::is_same< typename T::reference_type , int & >::value ) );
ASSERT_EQ( T::Rank , 1 );
ASSERT_TRUE( ( std::is_same< typename C::data_type , const int* >::value ) );
ASSERT_TRUE( ( std::is_same< typename C::const_data_type , const int* >::value ) );
ASSERT_TRUE( ( std::is_same< typename C::non_const_data_type , int* >::value ) );
ASSERT_TRUE( ( std::is_same< typename C::array_scalar_type , const int* >::value ) );
ASSERT_TRUE( ( std::is_same< typename C::const_array_scalar_type , const int* >::value ) );
ASSERT_TRUE( ( std::is_same< typename C::non_const_array_scalar_type , int* >::value ) );
ASSERT_TRUE( ( std::is_same< typename C::value_type , const int >::value ) );
ASSERT_TRUE( ( std::is_same< typename C::const_value_type , const int >::value ) );
ASSERT_TRUE( ( std::is_same< typename C::non_const_value_type , int >::value ) );
ASSERT_TRUE( ( std::is_same< typename C::memory_space , typename ExecSpace::memory_space >::value ) );
ASSERT_TRUE( ( std::is_same< typename C::reference_type , const int & >::value ) );
ASSERT_EQ( C::Rank , 1 );
ASSERT_EQ( vr1.dimension_0() , N );
if ( Kokkos::Impl::VerifyExecutionCanAccessMemorySpace< typename ExecSpace::memory_space , Kokkos::HostSpace >::value ) {
for ( int i = 0 ; i < N ; ++i ) data[i] = i + 1 ;
for ( int i = 0 ; i < N ; ++i ) ASSERT_EQ( vr1[i] , i + 1 );
for ( int i = 0 ; i < N ; ++i ) ASSERT_EQ( cr1[i] , i + 1 );
{
T tmp( vr1 );
for ( int i = 0 ; i < N ; ++i ) ASSERT_EQ( tmp[i] , i + 1 );
for ( int i = 0 ; i < N ; ++i ) vr1(i) = i + 2 ;
for ( int i = 0 ; i < N ; ++i ) ASSERT_EQ( tmp[i] , i + 2 );
}
for ( int i = 0 ; i < N ; ++i ) ASSERT_EQ( vr1[i] , i + 2 );
}
}
{
constexpr int N = 10 ;
typedef Kokkos::Experimental::View<int*,ExecSpace> T ;
typedef Kokkos::Experimental::View<const int*,ExecSpace> C ;
T vr1("vr1",N);
C cr1(vr1);
ASSERT_TRUE( ( std::is_same< typename T::data_type , int* >::value ) );
ASSERT_TRUE( ( std::is_same< typename T::const_data_type , const int* >::value ) );
ASSERT_TRUE( ( std::is_same< typename T::non_const_data_type , int* >::value ) );
ASSERT_TRUE( ( std::is_same< typename T::array_scalar_type , int* >::value ) );
ASSERT_TRUE( ( std::is_same< typename T::const_array_scalar_type , const int* >::value ) );
ASSERT_TRUE( ( std::is_same< typename T::non_const_array_scalar_type , int* >::value ) );
ASSERT_TRUE( ( std::is_same< typename T::value_type , int >::value ) );
ASSERT_TRUE( ( std::is_same< typename T::const_value_type , const int >::value ) );
ASSERT_TRUE( ( std::is_same< typename T::non_const_value_type , int >::value ) );
ASSERT_TRUE( ( std::is_same< typename T::memory_space , typename ExecSpace::memory_space >::value ) );
ASSERT_TRUE( ( std::is_same< typename T::reference_type , int & >::value ) );
ASSERT_EQ( T::Rank , 1 );
ASSERT_EQ( vr1.dimension_0() , N );
if ( Kokkos::Impl::VerifyExecutionCanAccessMemorySpace< typename ExecSpace::memory_space , Kokkos::HostSpace >::value ) {
for ( int i = 0 ; i < N ; ++i ) vr1(i) = i + 1 ;
for ( int i = 0 ; i < N ; ++i ) ASSERT_EQ( vr1[i] , i + 1 );
for ( int i = 0 ; i < N ; ++i ) ASSERT_EQ( cr1[i] , i + 1 );
{
T tmp( vr1 );
for ( int i = 0 ; i < N ; ++i ) ASSERT_EQ( tmp[i] , i + 1 );
for ( int i = 0 ; i < N ; ++i ) vr1(i) = i + 2 ;
for ( int i = 0 ; i < N ; ++i ) ASSERT_EQ( tmp[i] , i + 2 );
}
for ( int i = 0 ; i < N ; ++i ) ASSERT_EQ( vr1[i] , i + 2 );
}
}
// Testing using space instance for allocation.
// The execution space of the memory space must be available for view data initialization
if ( std::is_same< ExecSpace , typename ExecSpace::memory_space::execution_space >::value ) {
using namespace Kokkos::Experimental ;
typedef typename ExecSpace::memory_space memory_space ;
typedef View<int*,memory_space> V ;
constexpr int N = 10 ;
memory_space mem_space ;
V v( "v" , N );
V va( view_alloc() , N );
V vb( view_alloc( "vb" ) , N );
V vc( view_alloc( "vc" , AllowPadding ) , N );
V vd( view_alloc( "vd" , WithoutInitializing ) , N );
V ve( view_alloc( "ve" , WithoutInitializing , AllowPadding ) , N );
V vf( view_alloc( "vf" , mem_space , WithoutInitializing , AllowPadding ) , N );
V vg( view_alloc( mem_space , "vg" , WithoutInitializing , AllowPadding ) , N );
V vh( view_alloc( WithoutInitializing , AllowPadding ) , N );
V vi( view_alloc( WithoutInitializing ) , N );
V vj( view_alloc( std::string("vj") , AllowPadding ) , N );
V vk( view_alloc( mem_space , std::string("vk") , AllowPadding ) , N );
}
{
typedef Kokkos::Experimental::ViewTraits<int***,Kokkos::LayoutStride,ExecSpace> traits_t ;
typedef Kokkos::Experimental::Impl::ViewDimension<0,0,0> dims_t ;
typedef Kokkos::Experimental::Impl::ViewOffset< dims_t , Kokkos::LayoutStride > offset_t ;
Kokkos::LayoutStride stride ;
stride.dimension[0] = 3 ;
stride.dimension[1] = 4 ;
stride.dimension[2] = 5 ;
stride.stride[0] = 4 ;
stride.stride[1] = 1 ;
stride.stride[2] = 12 ;
const offset_t offset( stride );
ASSERT_EQ( offset.dimension_0() , 3 );
ASSERT_EQ( offset.dimension_1() , 4 );
ASSERT_EQ( offset.dimension_2() , 5 );
ASSERT_EQ( offset.stride_0() , 4 );
ASSERT_EQ( offset.stride_1() , 1 );
ASSERT_EQ( offset.stride_2() , 12 );
ASSERT_EQ( offset.span() , 60 );
ASSERT_TRUE( offset.span_is_contiguous() );
Kokkos::Experimental::Impl::ViewMapping< traits_t , void > v( (int*) 0 , std::false_type() , stride );
}
{
typedef Kokkos::Experimental::View<int**,ExecSpace> V ;
typedef typename V::HostMirror M ;
constexpr int N0 = 10 ;
constexpr int N1 = 11 ;
V a("a",N0,N1);
M b = Kokkos::Experimental::create_mirror(a);
M c = Kokkos::Experimental::create_mirror_view(a);
M d ;
for ( int i0 = 0 ; i0 < N0 ; ++i0 )
for ( int i1 = 0 ; i1 < N1 ; ++i1 )
b(i0,i1) = 1 + i0 + i1 * N0 ;
Kokkos::Experimental::deep_copy( a , b );
Kokkos::Experimental::deep_copy( c , a );
for ( int i0 = 0 ; i0 < N0 ; ++i0 )
for ( int i1 = 0 ; i1 < N1 ; ++i1 )
ASSERT_EQ( b(i0,i1) , c(i0,i1) );
Kokkos::Experimental::resize( b , 5 , 6 );
Kokkos::Experimental::realloc( c , 5 , 6 );
Kokkos::Experimental::realloc( d , 5 , 6 );
ASSERT_EQ( b.dimension_0() , 5 );
ASSERT_EQ( b.dimension_1() , 6 );
ASSERT_EQ( c.dimension_0() , 5 );
ASSERT_EQ( c.dimension_1() , 6 );
ASSERT_EQ( d.dimension_0() , 5 );
ASSERT_EQ( d.dimension_1() , 6 );
}
+
+#if defined( KOKKOS_USING_EXPERIMENTAL_VIEW )
+ /* Only works when experimental view is activated */
+ {
+ typedef Kokkos::Experimental::View<int*,ExecSpace> V ;
+ typedef Kokkos::Experimental::View<int*,ExecSpace,Kokkos::MemoryUnmanaged> U ;
+
+
+ V a("a",10);
+
+ ASSERT_EQ( a.use_count() , 1 );
+
+ V b = a ;
+
+ ASSERT_EQ( a.use_count() , 2 );
+ ASSERT_EQ( b.use_count() , 2 );
+
+ {
+ U c = b ; // 'c' is compile-time unmanaged
+
+ ASSERT_EQ( a.use_count() , 2 );
+ ASSERT_EQ( b.use_count() , 2 );
+ ASSERT_EQ( c.use_count() , 2 );
+
+ V d = c ; // 'd' is run-time unmanaged
+
+ ASSERT_EQ( a.use_count() , 2 );
+ ASSERT_EQ( b.use_count() , 2 );
+ ASSERT_EQ( c.use_count() , 2 );
+ ASSERT_EQ( d.use_count() , 2 );
+ }
+
+ ASSERT_EQ( a.use_count() , 2 );
+ ASSERT_EQ( b.use_count() , 2 );
+
+ b = V();
+
+ ASSERT_EQ( a.use_count() , 1 );
+ ASSERT_EQ( b.use_count() , 0 );
+
+ Kokkos::parallel_for(
+ Kokkos::RangePolicy< Kokkos::DefaultHostExecutionSpace >(0,10) ,
+ [=]( int i ){
+ // 'a' is captured by copy and the capture mechanism
+ // converts 'a' to an unmanaged copy.
+ // When the parallel dispatch accepts a move for the lambda
+ // this count should become 1
+ ASSERT_EQ( a.use_count() , 2 );
+ V x = a ;
+ ASSERT_EQ( a.use_count() , 2 );
+ ASSERT_EQ( x.use_count() , 2 );
+ });
+ }
+#endif /* #if defined( KOKKOS_USING_EXPERIMENTAL_VIEW ) */
}
template< class ExecSpace >
struct TestViewMappingSubview {
- constexpr static int AN = 10 ;
+ typedef Kokkos::pair<int,int> range ;
+
+ enum { AN = 10 };
typedef Kokkos::Experimental::View<int*,ExecSpace> AT ;
- typedef Kokkos::Experimental::Subview< AT , true > AS ;
+ typedef Kokkos::Experimental::View<const int*,ExecSpace> ACT ;
+ typedef Kokkos::Experimental::Subview< AT , range > AS ;
- constexpr static int BN0 = 10 , BN1 = 11 , BN2 = 12 ;
+ enum { BN0 = 10 , BN1 = 11 , BN2 = 12 };
typedef Kokkos::Experimental::View<int***,ExecSpace> BT ;
- typedef Kokkos::Experimental::Subview< BT , true , true , true > BS ;
+ typedef Kokkos::Experimental::Subview< BT , range , range , range > BS ;
- constexpr static int CN0 = 10 , CN1 = 11 , CN2 = 12 ;
+ enum { CN0 = 10 , CN1 = 11 , CN2 = 12 };
typedef Kokkos::Experimental::View<int***[13][14],ExecSpace> CT ;
- typedef Kokkos::Experimental::Subview< CT , true , true , true , false , false > CS ;
+ typedef Kokkos::Experimental::Subview< CT , range , range , range , int , int > CS ;
- constexpr static int DN0 = 10 , DN1 = 11 , DN2 = 12 ;
- typedef Kokkos::Experimental::View<int***[13][14],ExecSpace> DT ;
- typedef Kokkos::Experimental::Subview< DT , false , true , true , true , false > DS ;
+ enum { DN0 = 10 , DN1 = 11 , DN2 = 12 , DN3 = 13 , DN4 = 14 };
+ typedef Kokkos::Experimental::View<int***[DN3][DN4],ExecSpace> DT ;
+ typedef Kokkos::Experimental::Subview< DT , int , range , range , range , int > DS ;
typedef Kokkos::Experimental::View<int***[13][14],Kokkos::LayoutLeft,ExecSpace> DLT ;
- typedef Kokkos::Experimental::Subview< DLT , true , false , false , false , false > DLS1 ;
+ typedef Kokkos::Experimental::Subview< DLT , range , int , int , int , int > DLS1 ;
static_assert( DLS1::rank == 1 && std::is_same< typename DLS1::array_layout , Kokkos::LayoutLeft >::value
, "Subview layout error for rank 1 subview of left-most range of LayoutLeft" );
typedef Kokkos::Experimental::View<int***[13][14],Kokkos::LayoutRight,ExecSpace> DRT ;
- typedef Kokkos::Experimental::Subview< DRT , false , false , false , false , true > DRS1 ;
+ typedef Kokkos::Experimental::Subview< DRT , int , int , int , int , range > DRS1 ;
static_assert( DRS1::rank == 1 && std::is_same< typename DRS1::array_layout , Kokkos::LayoutRight >::value
, "Subview layout error for rank 1 subview of right-most range of LayoutRight" );
AT Aa ;
AS Ab ;
+ ACT Ac ;
BT Ba ;
BS Bb ;
CT Ca ;
CS Cb ;
DT Da ;
DS Db ;
TestViewMappingSubview()
: Aa("Aa",AN)
, Ab( Kokkos::Experimental::subview( Aa , std::pair<int,int>(1,AN-1) ) )
+ , Ac( Aa , std::pair<int,int>(1,AN-1) )
, Ba("Ba",BN0,BN1,BN2)
, Bb( Kokkos::Experimental::subview( Ba
, std::pair<int,int>(1,BN0-1)
, std::pair<int,int>(1,BN1-1)
, std::pair<int,int>(1,BN2-1)
) )
, Ca("Ca",CN0,CN1,CN2)
, Cb( Kokkos::Experimental::subview( Ca
, std::pair<int,int>(1,CN0-1)
, std::pair<int,int>(1,CN1-1)
, std::pair<int,int>(1,CN2-1)
, 1
, 2
) )
, Da("Da",DN0,DN1,DN2)
, Db( Kokkos::Experimental::subview( Da
, 1
- , std::pair<int,int>(1,DN0-1)
, std::pair<int,int>(1,DN1-1)
, std::pair<int,int>(1,DN2-1)
+ , std::pair<int,int>(1,DN3-1)
, 2
) )
{
}
KOKKOS_INLINE_FUNCTION
void operator()( const int , long & error_count ) const
{
+ auto Ad = Kokkos::Experimental::subview< Kokkos::MemoryUnmanaged >( Aa , Kokkos::pair<int,int>(1,AN-1) );
+
for ( int i = 1 ; i < AN-1 ; ++i ) if( & Aa[i] != & Ab[i-1] ) ++error_count ;
+ for ( int i = 1 ; i < AN-1 ; ++i ) if( & Aa[i] != & Ac[i-1] ) ++error_count ;
+ for ( int i = 1 ; i < AN-1 ; ++i ) if( & Aa[i] != & Ad[i-1] ) ++error_count ;
for ( int i2 = 1 ; i2 < BN2-1 ; ++i2 ) {
for ( int i1 = 1 ; i1 < BN1-1 ; ++i1 ) {
for ( int i0 = 1 ; i0 < BN0-1 ; ++i0 ) {
if ( & Ba(i0,i1,i2) != & Bb(i0-1,i1-1,i2-1) ) ++error_count ;
}}}
for ( int i2 = 1 ; i2 < CN2-1 ; ++i2 ) {
for ( int i1 = 1 ; i1 < CN1-1 ; ++i1 ) {
for ( int i0 = 1 ; i0 < CN0-1 ; ++i0 ) {
if ( & Ca(i0,i1,i2,1,2) != & Cb(i0-1,i1-1,i2-1) ) ++error_count ;
}}}
- for ( int i2 = 1 ; i2 < DN2-1 ; ++i2 ) {
- for ( int i1 = 1 ; i1 < DN1-1 ; ++i1 ) {
- for ( int i0 = 1 ; i0 < DN0-1 ; ++i0 ) {
+ for ( int i2 = 1 ; i2 < DN3-1 ; ++i2 ) {
+ for ( int i1 = 1 ; i1 < DN2-1 ; ++i1 ) {
+ for ( int i0 = 1 ; i0 < DN1-1 ; ++i0 ) {
if ( & Da(1,i0,i1,i2,2) != & Db(i0-1,i1-1,i2-1) ) ++error_count ;
}}}
}
static void run()
{
TestViewMappingSubview self ;
+ ASSERT_EQ( self.Aa.dimension_0() , AN );
+ ASSERT_EQ( self.Ab.dimension_0() , AN - 2 );
+ ASSERT_EQ( self.Ac.dimension_0() , AN - 2 );
+ ASSERT_EQ( self.Ba.dimension_0() , BN0 );
+ ASSERT_EQ( self.Ba.dimension_1() , BN1 );
+ ASSERT_EQ( self.Ba.dimension_2() , BN2 );
+ ASSERT_EQ( self.Bb.dimension_0() , BN0 - 2 );
+ ASSERT_EQ( self.Bb.dimension_1() , BN1 - 2 );
+ ASSERT_EQ( self.Bb.dimension_2() , BN2 - 2 );
+
+ ASSERT_EQ( self.Ca.dimension_0() , CN0 );
+ ASSERT_EQ( self.Ca.dimension_1() , CN1 );
+ ASSERT_EQ( self.Ca.dimension_2() , CN2 );
+ ASSERT_EQ( self.Ca.dimension_3() , 13 );
+ ASSERT_EQ( self.Ca.dimension_4() , 14 );
+ ASSERT_EQ( self.Cb.dimension_0() , CN0 - 2 );
+ ASSERT_EQ( self.Cb.dimension_1() , CN1 - 2 );
+ ASSERT_EQ( self.Cb.dimension_2() , CN2 - 2 );
+
+ ASSERT_EQ( self.Da.dimension_0() , DN0 );
+ ASSERT_EQ( self.Da.dimension_1() , DN1 );
+ ASSERT_EQ( self.Da.dimension_2() , DN2 );
+ ASSERT_EQ( self.Da.dimension_3() , DN3 );
+ ASSERT_EQ( self.Da.dimension_4() , DN4 );
+
+ ASSERT_EQ( self.Db.dimension_0() , DN1 - 2 );
+ ASSERT_EQ( self.Db.dimension_1() , DN2 - 2 );
+ ASSERT_EQ( self.Db.dimension_2() , DN3 - 2 );
+
ASSERT_EQ( self.Da.stride_1() , self.Db.stride_0() );
ASSERT_EQ( self.Da.stride_2() , self.Db.stride_1() );
ASSERT_EQ( self.Da.stride_3() , self.Db.stride_2() );
long error_count = -1 ;
Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >(0,1) , self , error_count );
ASSERT_EQ( error_count , 0 );
}
};
template< class ExecSpace >
void test_view_mapping_subview()
{
TestViewMappingSubview< ExecSpace >::run();
}
/*--------------------------------------------------------------------------*/
template< class ViewType >
struct TestViewMapOperator {
static_assert( ViewType::reference_type_is_lvalue_reference
, "Test only valid for lvalue reference type" );
const ViewType v ;
KOKKOS_INLINE_FUNCTION
void test_left( size_t i0 , long & error_count ) const
{
typename ViewType::value_type * const base_ptr = & v(0,0,0,0,0,0,0,0);
const size_t n1 = v.dimension_1();
const size_t n2 = v.dimension_2();
const size_t n3 = v.dimension_3();
const size_t n4 = v.dimension_4();
const size_t n5 = v.dimension_5();
const size_t n6 = v.dimension_6();
const size_t n7 = v.dimension_7();
long offset = 0 ;
for ( size_t i7 = 0 ; i7 < n7 ; ++i7 )
for ( size_t i6 = 0 ; i6 < n6 ; ++i6 )
for ( size_t i5 = 0 ; i5 < n5 ; ++i5 )
for ( size_t i4 = 0 ; i4 < n4 ; ++i4 )
for ( size_t i3 = 0 ; i3 < n3 ; ++i3 )
for ( size_t i2 = 0 ; i2 < n2 ; ++i2 )
for ( size_t i1 = 0 ; i1 < n1 ; ++i1 )
{
const long d = & v(i0,i1,i2,i3,i4,i5,i6,i7) - base_ptr ;
if ( d < offset ) ++error_count ;
offset = d ;
}
if ( v.span() <= size_t(offset) ) ++error_count ;
}
KOKKOS_INLINE_FUNCTION
void test_right( size_t i0 , long & error_count ) const
{
typename ViewType::value_type * const base_ptr = & v(0,0,0,0,0,0,0,0);
const size_t n1 = v.dimension_1();
const size_t n2 = v.dimension_2();
const size_t n3 = v.dimension_3();
const size_t n4 = v.dimension_4();
const size_t n5 = v.dimension_5();
const size_t n6 = v.dimension_6();
const size_t n7 = v.dimension_7();
long offset = 0 ;
for ( size_t i1 = 0 ; i1 < n1 ; ++i1 )
for ( size_t i2 = 0 ; i2 < n2 ; ++i2 )
for ( size_t i3 = 0 ; i3 < n3 ; ++i3 )
for ( size_t i4 = 0 ; i4 < n4 ; ++i4 )
for ( size_t i5 = 0 ; i5 < n5 ; ++i5 )
for ( size_t i6 = 0 ; i6 < n6 ; ++i6 )
for ( size_t i7 = 0 ; i7 < n7 ; ++i7 )
{
const long d = & v(i0,i1,i2,i3,i4,i5,i6,i7) - base_ptr ;
if ( d < offset ) ++error_count ;
offset = d ;
}
if ( v.span() <= size_t(offset) ) ++error_count ;
}
KOKKOS_INLINE_FUNCTION
void operator()( size_t i , long & error_count ) const
{
if ( std::is_same< typename ViewType::array_layout , Kokkos::LayoutLeft >::value )
test_left(i,error_count);
else if ( std::is_same< typename ViewType::array_layout , Kokkos::LayoutRight >::value )
test_right(i,error_count);
}
constexpr static size_t N0 = 10 ;
constexpr static size_t N1 = 9 ;
constexpr static size_t N2 = 8 ;
constexpr static size_t N3 = 7 ;
constexpr static size_t N4 = 6 ;
constexpr static size_t N5 = 5 ;
constexpr static size_t N6 = 4 ;
constexpr static size_t N7 = 3 ;
TestViewMapOperator() : v( "Test" , N0, N1, N2, N3, N4, N5, N6, N7 ) {}
static void run()
{
TestViewMapOperator self ;
ASSERT_EQ( self.v.dimension_0() , ( 0 < ViewType::rank ? N0 : 1 ) );
ASSERT_EQ( self.v.dimension_1() , ( 1 < ViewType::rank ? N1 : 1 ) );
ASSERT_EQ( self.v.dimension_2() , ( 2 < ViewType::rank ? N2 : 1 ) );
ASSERT_EQ( self.v.dimension_3() , ( 3 < ViewType::rank ? N3 : 1 ) );
ASSERT_EQ( self.v.dimension_4() , ( 4 < ViewType::rank ? N4 : 1 ) );
ASSERT_EQ( self.v.dimension_5() , ( 5 < ViewType::rank ? N5 : 1 ) );
ASSERT_EQ( self.v.dimension_6() , ( 6 < ViewType::rank ? N6 : 1 ) );
ASSERT_EQ( self.v.dimension_7() , ( 7 < ViewType::rank ? N7 : 1 ) );
ASSERT_LE( self.v.dimension_0()*
self.v.dimension_1()*
self.v.dimension_2()*
self.v.dimension_3()*
self.v.dimension_4()*
self.v.dimension_5()*
self.v.dimension_6()*
self.v.dimension_7()
, self.v.span() );
long error_count ;
Kokkos::RangePolicy< typename ViewType::execution_space > range(0,self.v.dimension_0());
Kokkos::parallel_reduce( range , self , error_count );
ASSERT_EQ( 0 , error_count );
}
};
template< class ExecSpace >
void test_view_mapping_operator()
{
TestViewMapOperator< Kokkos::Experimental::View<int,Kokkos::LayoutLeft,ExecSpace> >::run();
TestViewMapOperator< Kokkos::Experimental::View<int*,Kokkos::LayoutLeft,ExecSpace> >::run();
TestViewMapOperator< Kokkos::Experimental::View<int**,Kokkos::LayoutLeft,ExecSpace> >::run();
TestViewMapOperator< Kokkos::Experimental::View<int***,Kokkos::LayoutLeft,ExecSpace> >::run();
TestViewMapOperator< Kokkos::Experimental::View<int****,Kokkos::LayoutLeft,ExecSpace> >::run();
TestViewMapOperator< Kokkos::Experimental::View<int*****,Kokkos::LayoutLeft,ExecSpace> >::run();
TestViewMapOperator< Kokkos::Experimental::View<int******,Kokkos::LayoutLeft,ExecSpace> >::run();
TestViewMapOperator< Kokkos::Experimental::View<int*******,Kokkos::LayoutLeft,ExecSpace> >::run();
TestViewMapOperator< Kokkos::Experimental::View<int,Kokkos::LayoutRight,ExecSpace> >::run();
TestViewMapOperator< Kokkos::Experimental::View<int*,Kokkos::LayoutRight,ExecSpace> >::run();
TestViewMapOperator< Kokkos::Experimental::View<int**,Kokkos::LayoutRight,ExecSpace> >::run();
TestViewMapOperator< Kokkos::Experimental::View<int***,Kokkos::LayoutRight,ExecSpace> >::run();
TestViewMapOperator< Kokkos::Experimental::View<int****,Kokkos::LayoutRight,ExecSpace> >::run();
TestViewMapOperator< Kokkos::Experimental::View<int*****,Kokkos::LayoutRight,ExecSpace> >::run();
TestViewMapOperator< Kokkos::Experimental::View<int******,Kokkos::LayoutRight,ExecSpace> >::run();
TestViewMapOperator< Kokkos::Experimental::View<int*******,Kokkos::LayoutRight,ExecSpace> >::run();
}
/*--------------------------------------------------------------------------*/
template< class ExecSpace >
struct TestViewMappingAtomic {
typedef Kokkos::MemoryTraits< Kokkos::Atomic > mem_trait ;
typedef Kokkos::Experimental::View< int * , ExecSpace > T ;
typedef Kokkos::Experimental::View< int * , ExecSpace , mem_trait > T_atom ;
T x ;
T_atom x_atom ;
constexpr static size_t N = 100000 ;
struct TagInit {};
struct TagUpdate {};
struct TagVerify {};
KOKKOS_INLINE_FUNCTION
void operator()( const TagInit & , const int i ) const
{ x(i) = i ; }
KOKKOS_INLINE_FUNCTION
void operator()( const TagUpdate & , const int i ) const
{ x_atom(i%2) += 1 ; }
KOKKOS_INLINE_FUNCTION
void operator()( const TagVerify & , const int i , long & error_count ) const
{
if ( i < 2 ) { if ( x(i) != int(i + N / 2) ) ++error_count ; }
else { if ( x(i) != int(i) ) ++error_count ; }
}
TestViewMappingAtomic()
: x("x",N)
, x_atom( x )
{}
static void run()
{
ASSERT_TRUE( T::reference_type_is_lvalue_reference );
ASSERT_FALSE( T_atom::reference_type_is_lvalue_reference );
TestViewMappingAtomic self ;
Kokkos::parallel_for( Kokkos::RangePolicy< ExecSpace , TagInit >(0,N) , self );
Kokkos::parallel_for( Kokkos::RangePolicy< ExecSpace , TagUpdate >(0,N) , self );
long error_count = -1 ;
Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace , TagVerify >(0,N) , self , error_count );
ASSERT_EQ( 0 , error_count );
}
};
+/*--------------------------------------------------------------------------*/
+
+template< class ExecSpace >
+struct TestViewMappingClassValue {
+
+ struct ValueType {
+ KOKKOS_INLINE_FUNCTION
+ ValueType()
+ {
+#if 0
+#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_CUDA )
+ printf("TestViewMappingClassValue construct on Cuda\n");
+#elif defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
+ printf("TestViewMappingClassValue construct on Host\n");
+#else
+ printf("TestViewMappingClassValue construct unknown\n");
+#endif
+#endif
+ }
+ KOKKOS_INLINE_FUNCTION
+ ~ValueType()
+ {
+#if 0
+#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_CUDA )
+ printf("TestViewMappingClassValue destruct on Cuda\n");
+#elif defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
+ printf("TestViewMappingClassValue destruct on Host\n");
+#else
+ printf("TestViewMappingClassValue destruct unknown\n");
+#endif
+#endif
+ }
+ };
+
+ static void run()
+ {
+ using namespace Kokkos::Experimental ;
+ ExecSpace::fence();
+ {
+ View< ValueType , ExecSpace > a("a");
+ ExecSpace::fence();
+ }
+ ExecSpace::fence();
+ }
+};
} /* namespace Test */
/*--------------------------------------------------------------------------*/
diff --git a/lib/kokkos/core/unit_test/TestViewOfClass.hpp b/lib/kokkos/core/unit_test/TestViewOfClass.hpp
index 09abacd80..9fe3fabbd 100644
--- a/lib/kokkos/core/unit_test/TestViewOfClass.hpp
+++ b/lib/kokkos/core/unit_test/TestViewOfClass.hpp
@@ -1,126 +1,163 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <gtest/gtest.h>
#include <Kokkos_Core.hpp>
#include <stdexcept>
#include <sstream>
#include <iostream>
/*--------------------------------------------------------------------------*/
namespace Test {
-namespace {
-volatile int nested_view_count ;
-}
-
template< class Space >
-class NestedView {
-private:
+struct NestedView {
+
Kokkos::View<int*,Space> member ;
public:
KOKKOS_INLINE_FUNCTION
- NestedView()
-#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
- : member("member",2)
- { Kokkos::atomic_increment( & nested_view_count ); }
-#else
- : member(){}
-#endif
+ NestedView() : member()
+ {}
+
+ KOKKOS_INLINE_FUNCTION
+ NestedView & operator = ( const Kokkos::View<int*,Space> & lhs )
+ {
+ member = lhs ;
+ if ( member.dimension_0() ) Kokkos::atomic_add( & member(0) , 1 );
+ return *this ;
+ }
+ KOKKOS_INLINE_FUNCTION
~NestedView()
-#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
- { Kokkos::atomic_decrement( & nested_view_count ); }
-#else
- {}
-#endif
+ {
+ if ( member.dimension_0() ) {
+ Kokkos::atomic_add( & member(0) , -1 );
+ }
+ }
+};
+template< class Space >
+struct NestedViewFunctor {
+
+ Kokkos::View< NestedView<Space> * , Space > nested ;
+ Kokkos::View<int*,Space> array ;
+
+ NestedViewFunctor(
+ const Kokkos::View< NestedView<Space> * , Space > & arg_nested ,
+ const Kokkos::View<int*,Space> & arg_array )
+ : nested( arg_nested )
+ , array( arg_array )
+ {}
+
+ KOKKOS_INLINE_FUNCTION
+ void operator()( int i ) const
+ { nested[i] = array ; }
};
template< class Space >
void view_nested_view()
{
- ASSERT_EQ( 0 , nested_view_count );
+ Kokkos::View<int*,Space> tracking("tracking",1);
+
+ typename Kokkos::View<int*,Space>::HostMirror
+ host_tracking = Kokkos::create_mirror( tracking );
+
{
Kokkos::View< NestedView<Space> * , Space > a("a_nested_view",2);
- ASSERT_EQ( 2 , nested_view_count );
+
+ Kokkos::parallel_for( Kokkos::RangePolicy<Space>(0,2) , NestedViewFunctor<Space>( a , tracking ) );
+ Kokkos::deep_copy( host_tracking , tracking );
+ ASSERT_EQ( 2 , host_tracking(0) );
+
Kokkos::View< NestedView<Space> * , Space > b("b_nested_view",2);
- ASSERT_EQ( 4 , nested_view_count );
+ Kokkos::parallel_for( Kokkos::RangePolicy<Space>(0,2) , NestedViewFunctor<Space>( b , tracking ) );
+ Kokkos::deep_copy( host_tracking , tracking );
+ ASSERT_EQ( 4 , host_tracking(0) );
+
}
- // ASSERT_EQ( 0 , nested_view_count );
+ Kokkos::deep_copy( host_tracking , tracking );
+
+#if defined( KOKKOS_USING_EXPERIMENTAL_VIEW )
+ ASSERT_EQ( 0 , host_tracking(0) );
+#endif
+
}
}
+#if ! defined( KOKKOS_USING_EXPERIMENTAL_VIEW )
+
namespace Kokkos {
namespace Impl {
template< class ExecSpace , class S >
struct ViewDefaultConstruct< ExecSpace , Test::NestedView<S> , true >
{
typedef Test::NestedView<S> type ;
type * const m_ptr ;
KOKKOS_FORCEINLINE_FUNCTION
void operator()( const typename ExecSpace::size_type& i ) const
{ new(m_ptr+i) type(); }
ViewDefaultConstruct( type * pointer , size_t capacity )
: m_ptr( pointer )
{
Kokkos::RangePolicy< ExecSpace > range( 0 , capacity );
parallel_for( range , *this );
ExecSpace::fence();
}
};
} // namespace Impl
} // namespace Kokkos
+#endif
+
/*--------------------------------------------------------------------------*/
diff --git a/lib/kokkos/core/unit_test/TestViewSubview.hpp b/lib/kokkos/core/unit_test/TestViewSubview.hpp
index e0c00d3a8..39f286e53 100644
--- a/lib/kokkos/core/unit_test/TestViewSubview.hpp
+++ b/lib/kokkos/core/unit_test/TestViewSubview.hpp
@@ -1,646 +1,655 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <gtest/gtest.h>
#include <Kokkos_Core.hpp>
#include <stdexcept>
#include <sstream>
#include <iostream>
/*--------------------------------------------------------------------------*/
namespace TestViewSubview {
template<class Layout, class Space>
struct getView {
static
Kokkos::View<double**,Layout,Space> get(int n, int m) {
return Kokkos::View<double**,Layout,Space>("G",n,m);
}
};
template<class Space>
struct getView<Kokkos::LayoutStride,Space> {
static
Kokkos::View<double**,Kokkos::LayoutStride,Space> get(int n, int m) {
const int rank = 2 ;
const int order[] = { 0, 1 };
const unsigned dim[] = { unsigned(n), unsigned(m) };
Kokkos::LayoutStride stride = Kokkos::LayoutStride::order_dimensions( rank , order , dim );
return Kokkos::View<double**,Kokkos::LayoutStride,Space>("G",stride);
}
};
template<class ViewType, class Space>
struct fill_1D {
typedef typename Space::execution_space execution_space;
typedef typename ViewType::size_type size_type;
ViewType a;
double val;
fill_1D(ViewType a_, double val_):a(a_),val(val_) {
}
KOKKOS_INLINE_FUNCTION
void operator() (const int i) const {
a(i) = val;
}
};
template<class ViewType, class Space>
struct fill_2D {
typedef typename Space::execution_space execution_space;
typedef typename ViewType::size_type size_type;
ViewType a;
double val;
fill_2D(ViewType a_, double val_):a(a_),val(val_) {
}
KOKKOS_INLINE_FUNCTION
void operator() (const int i) const{
for(int j = 0; j < static_cast<int>(a.dimension_1()); j++)
a(i,j) = val;
}
};
template<class Layout, class Space>
void test_auto_1d ()
{
typedef Kokkos::View<double**, Layout, Space> mv_type;
typedef typename mv_type::size_type size_type;
const double ZERO = 0.0;
const double ONE = 1.0;
const double TWO = 2.0;
const size_type numRows = 10;
const size_type numCols = 3;
mv_type X = getView<Layout,Space>::get(numRows, numCols);
typename mv_type::HostMirror X_h = Kokkos::create_mirror_view (X);
fill_2D<mv_type,Space> f1(X, ONE);
Kokkos::parallel_for(X.dimension_0(),f1);
Kokkos::deep_copy (X_h, X);
for (size_type j = 0; j < numCols; ++j) {
for (size_type i = 0; i < numRows; ++i) {
ASSERT_TRUE(X_h(i,j) == ONE);
}
}
fill_2D<mv_type,Space> f2(X, 0.0);
Kokkos::parallel_for(X.dimension_0(),f2);
Kokkos::deep_copy (X_h, X);
for (size_type j = 0; j < numCols; ++j) {
for (size_type i = 0; i < numRows; ++i) {
ASSERT_TRUE(X_h(i,j) == ZERO);
}
}
fill_2D<mv_type,Space> f3(X, TWO);
Kokkos::parallel_for(X.dimension_0(),f3);
Kokkos::deep_copy (X_h, X);
for (size_type j = 0; j < numCols; ++j) {
for (size_type i = 0; i < numRows; ++i) {
ASSERT_TRUE(X_h(i,j) == TWO);
}
}
for (size_type j = 0; j < numCols; ++j) {
auto X_j = Kokkos::subview (X, Kokkos::ALL(), j);
fill_1D<decltype(X_j),Space> f4(X_j, ZERO);
Kokkos::parallel_for(X_j.dimension_0(),f4);
Kokkos::deep_copy (X_h, X);
for (size_type i = 0; i < numRows; ++i) {
ASSERT_TRUE(X_h(i,j) == ZERO);
}
for (size_type jj = 0; jj < numCols; ++jj) {
auto X_jj = Kokkos::subview (X, Kokkos::ALL(), jj);
fill_1D<decltype(X_jj),Space> f5(X_jj, ONE);
Kokkos::parallel_for(X_jj.dimension_0(),f5);
Kokkos::deep_copy (X_h, X);
for (size_type i = 0; i < numRows; ++i) {
ASSERT_TRUE(X_h(i,jj) == ONE);
}
}
}
}
template<class LD, class LS, class Space>
void test_1d_strided_assignment_impl(bool a, bool b, bool c, bool d, int n, int m) {
Kokkos::View<double**,LS,Space> l2d("l2d",n,m);
int col = n>2?2:0;
int row = m>2?2:0;
if(Kokkos::Impl::VerifyExecutionCanAccessMemorySpace<Kokkos::HostSpace,Space>::value) {
if(a) {
Kokkos::View<double*,LD,Space> l1da = Kokkos::subview(l2d,Kokkos::ALL(),row);
ASSERT_TRUE( & l1da(0) == & l2d(0,row) );
if(n>1)
ASSERT_TRUE( & l1da(1) == & l2d(1,row) );
}
if(b && n>13) {
Kokkos::View<double*,LD,Space> l1db = Kokkos::subview(l2d,std::pair<unsigned,unsigned>(2,13),row);
ASSERT_TRUE( & l1db(0) == & l2d(2,row) );
ASSERT_TRUE( & l1db(1) == & l2d(3,row) );
}
if(c) {
Kokkos::View<double*,LD,Space> l1dc = Kokkos::subview(l2d,col,Kokkos::ALL());
ASSERT_TRUE( & l1dc(0) == & l2d(col,0) );
if(m>1)
ASSERT_TRUE( & l1dc(1) == & l2d(col,1) );
}
if(d && m>13) {
Kokkos::View<double*,LD,Space> l1dd = Kokkos::subview(l2d,col,std::pair<unsigned,unsigned>(2,13));
ASSERT_TRUE( & l1dd(0) == & l2d(col,2) );
ASSERT_TRUE( & l1dd(1) == & l2d(col,3) );
}
}
}
template<class Space >
void test_1d_strided_assignment() {
test_1d_strided_assignment_impl<Kokkos::LayoutStride,Kokkos::LayoutLeft,Space>(true,true,true,true,17,3);
test_1d_strided_assignment_impl<Kokkos::LayoutStride,Kokkos::LayoutRight,Space>(true,true,true,true,17,3);
test_1d_strided_assignment_impl<Kokkos::LayoutLeft,Kokkos::LayoutLeft,Space>(true,true,false,false,17,3);
test_1d_strided_assignment_impl<Kokkos::LayoutRight,Kokkos::LayoutLeft,Space>(true,true,false,false,17,3);
test_1d_strided_assignment_impl<Kokkos::LayoutLeft,Kokkos::LayoutRight,Space>(false,false,true,true,17,3);
test_1d_strided_assignment_impl<Kokkos::LayoutRight,Kokkos::LayoutRight,Space>(false,false,true,true,17,3);
test_1d_strided_assignment_impl<Kokkos::LayoutLeft,Kokkos::LayoutLeft,Space>(true,true,false,false,17,1);
test_1d_strided_assignment_impl<Kokkos::LayoutLeft,Kokkos::LayoutLeft,Space>(true,true,true,true,1,17);
test_1d_strided_assignment_impl<Kokkos::LayoutRight,Kokkos::LayoutLeft,Space>(true,true,true,true,1,17);
test_1d_strided_assignment_impl<Kokkos::LayoutRight,Kokkos::LayoutLeft,Space>(true,true,false,false,17,1);
test_1d_strided_assignment_impl<Kokkos::LayoutLeft,Kokkos::LayoutRight,Space>(true,true,true,true,17,1);
test_1d_strided_assignment_impl<Kokkos::LayoutLeft,Kokkos::LayoutRight,Space>(false,false,true,true,1,17);
test_1d_strided_assignment_impl<Kokkos::LayoutRight,Kokkos::LayoutRight,Space>(false,false,true,true,1,17);
test_1d_strided_assignment_impl<Kokkos::LayoutRight,Kokkos::LayoutRight,Space>(true,true,true,true,17,1);
}
template< class Space >
void test_left_0()
{
typedef Kokkos::View< int [2][3][4][5][2][3][4][5] , Kokkos::LayoutLeft , Space >
view_static_8_type ;
if(Kokkos::Impl::VerifyExecutionCanAccessMemorySpace<Kokkos::HostSpace,Space>::value) {
view_static_8_type x_static_8("x_static_left_8");
ASSERT_TRUE( x_static_8.is_contiguous() );
Kokkos::View<int,Kokkos::LayoutLeft,Space> x0 = Kokkos::subview( x_static_8 , 0, 0, 0, 0, 0, 0, 0, 0 );
ASSERT_TRUE( x0.is_contiguous() );
ASSERT_TRUE( & x0() == & x_static_8(0,0,0,0,0,0,0,0) );
Kokkos::View<int*,Kokkos::LayoutLeft,Space> x1 =
Kokkos::subview( x_static_8, Kokkos::pair<int,int>(0,2), 1, 2, 3, 0, 1, 2, 3 );
ASSERT_TRUE( x1.is_contiguous() );
ASSERT_TRUE( & x1(0) == & x_static_8(0,1,2,3,0,1,2,3) );
ASSERT_TRUE( & x1(1) == & x_static_8(1,1,2,3,0,1,2,3) );
Kokkos::View<int**,Kokkos::LayoutLeft,Space> x2 =
Kokkos::subview( x_static_8, Kokkos::pair<int,int>(0,2), 1, 2, 3
, Kokkos::pair<int,int>(0,2), 1, 2, 3 );
ASSERT_TRUE( ! x2.is_contiguous() );
ASSERT_TRUE( & x2(0,0) == & x_static_8(0,1,2,3,0,1,2,3) );
ASSERT_TRUE( & x2(1,0) == & x_static_8(1,1,2,3,0,1,2,3) );
ASSERT_TRUE( & x2(0,1) == & x_static_8(0,1,2,3,1,1,2,3) );
ASSERT_TRUE( & x2(1,1) == & x_static_8(1,1,2,3,1,1,2,3) );
// Kokkos::View<int**,Kokkos::LayoutLeft,Space> error_2 =
Kokkos::View<int**,Kokkos::LayoutStride,Space> sx2 =
Kokkos::subview( x_static_8, 1, Kokkos::pair<int,int>(0,2), 2, 3
, Kokkos::pair<int,int>(0,2), 1, 2, 3 );
ASSERT_TRUE( ! sx2.is_contiguous() );
ASSERT_TRUE( & sx2(0,0) == & x_static_8(1,0,2,3,0,1,2,3) );
ASSERT_TRUE( & sx2(1,0) == & x_static_8(1,1,2,3,0,1,2,3) );
ASSERT_TRUE( & sx2(0,1) == & x_static_8(1,0,2,3,1,1,2,3) );
ASSERT_TRUE( & sx2(1,1) == & x_static_8(1,1,2,3,1,1,2,3) );
Kokkos::View<int****,Kokkos::LayoutStride,Space> sx4 =
Kokkos::subview( x_static_8, 0, Kokkos::pair<int,int>(0,2) /* of [3] */
, 1, Kokkos::pair<int,int>(1,3) /* of [5] */
, 1, Kokkos::pair<int,int>(0,2) /* of [3] */
, 2, Kokkos::pair<int,int>(2,4) /* of [5] */
);
ASSERT_TRUE( ! sx4.is_contiguous() );
for ( int i0 = 0 ; i0 < (int) sx4.dimension_0() ; ++i0 )
for ( int i1 = 0 ; i1 < (int) sx4.dimension_1() ; ++i1 )
for ( int i2 = 0 ; i2 < (int) sx4.dimension_2() ; ++i2 )
for ( int i3 = 0 ; i3 < (int) sx4.dimension_3() ; ++i3 ) {
ASSERT_TRUE( & sx4(i0,i1,i2,i3) == & x_static_8(0,0+i0, 1,1+i1, 1,0+i2, 2,2+i3) );
}
}
}
template< class Space >
void test_left_1()
{
typedef Kokkos::View< int ****[2][3][4][5] , Kokkos::LayoutLeft , Space >
view_type ;
if(Kokkos::Impl::VerifyExecutionCanAccessMemorySpace<Kokkos::HostSpace,Space>::value) {
view_type x8("x_left_8",2,3,4,5);
ASSERT_TRUE( x8.is_contiguous() );
Kokkos::View<int,Kokkos::LayoutLeft,Space> x0 = Kokkos::subview( x8 , 0, 0, 0, 0, 0, 0, 0, 0 );
ASSERT_TRUE( x0.is_contiguous() );
ASSERT_TRUE( & x0() == & x8(0,0,0,0,0,0,0,0) );
Kokkos::View<int*,Kokkos::LayoutLeft,Space> x1 =
Kokkos::subview( x8, Kokkos::pair<int,int>(0,2), 1, 2, 3, 0, 1, 2, 3 );
ASSERT_TRUE( x1.is_contiguous() );
ASSERT_TRUE( & x1(0) == & x8(0,1,2,3,0,1,2,3) );
ASSERT_TRUE( & x1(1) == & x8(1,1,2,3,0,1,2,3) );
Kokkos::View<int**,Kokkos::LayoutLeft,Space> x2 =
Kokkos::subview( x8, Kokkos::pair<int,int>(0,2), 1, 2, 3
, Kokkos::pair<int,int>(0,2), 1, 2, 3 );
ASSERT_TRUE( ! x2.is_contiguous() );
ASSERT_TRUE( & x2(0,0) == & x8(0,1,2,3,0,1,2,3) );
ASSERT_TRUE( & x2(1,0) == & x8(1,1,2,3,0,1,2,3) );
ASSERT_TRUE( & x2(0,1) == & x8(0,1,2,3,1,1,2,3) );
ASSERT_TRUE( & x2(1,1) == & x8(1,1,2,3,1,1,2,3) );
// Kokkos::View<int**,Kokkos::LayoutLeft,Space> error_2 =
Kokkos::View<int**,Kokkos::LayoutStride,Space> sx2 =
Kokkos::subview( x8, 1, Kokkos::pair<int,int>(0,2), 2, 3
, Kokkos::pair<int,int>(0,2), 1, 2, 3 );
ASSERT_TRUE( ! sx2.is_contiguous() );
ASSERT_TRUE( & sx2(0,0) == & x8(1,0,2,3,0,1,2,3) );
ASSERT_TRUE( & sx2(1,0) == & x8(1,1,2,3,0,1,2,3) );
ASSERT_TRUE( & sx2(0,1) == & x8(1,0,2,3,1,1,2,3) );
ASSERT_TRUE( & sx2(1,1) == & x8(1,1,2,3,1,1,2,3) );
Kokkos::View<int****,Kokkos::LayoutStride,Space> sx4 =
Kokkos::subview( x8, 0, Kokkos::pair<int,int>(0,2) /* of [3] */
, 1, Kokkos::pair<int,int>(1,3) /* of [5] */
, 1, Kokkos::pair<int,int>(0,2) /* of [3] */
, 2, Kokkos::pair<int,int>(2,4) /* of [5] */
);
ASSERT_TRUE( ! sx4.is_contiguous() );
for ( int i0 = 0 ; i0 < (int) sx4.dimension_0() ; ++i0 )
for ( int i1 = 0 ; i1 < (int) sx4.dimension_1() ; ++i1 )
for ( int i2 = 0 ; i2 < (int) sx4.dimension_2() ; ++i2 )
for ( int i3 = 0 ; i3 < (int) sx4.dimension_3() ; ++i3 ) {
ASSERT_TRUE( & sx4(i0,i1,i2,i3) == & x8(0,0+i0, 1,1+i1, 1,0+i2, 2,2+i3) );
}
}
}
template< class Space >
void test_left_2()
{
typedef Kokkos::View< int **** , Kokkos::LayoutLeft , Space > view_type ;
if(Kokkos::Impl::VerifyExecutionCanAccessMemorySpace<Kokkos::HostSpace,Space>::value) {
view_type x4("x4",2,3,4,5);
ASSERT_TRUE( x4.is_contiguous() );
Kokkos::View<int,Kokkos::LayoutLeft,Space> x0 = Kokkos::subview( x4 , 0, 0, 0, 0 );
ASSERT_TRUE( x0.is_contiguous() );
ASSERT_TRUE( & x0() == & x4(0,0,0,0) );
Kokkos::View<int*,Kokkos::LayoutLeft,Space> x1 =
Kokkos::subview( x4, Kokkos::pair<int,int>(0,2), 1, 2, 3 );
ASSERT_TRUE( x1.is_contiguous() );
ASSERT_TRUE( & x1(0) == & x4(0,1,2,3) );
ASSERT_TRUE( & x1(1) == & x4(1,1,2,3) );
Kokkos::View<int**,Kokkos::LayoutLeft,Space> x2 =
Kokkos::subview( x4, Kokkos::pair<int,int>(0,2), 1, Kokkos::pair<int,int>(1,3), 2 );
ASSERT_TRUE( ! x2.is_contiguous() );
ASSERT_TRUE( & x2(0,0) == & x4(0,1,1,2) );
ASSERT_TRUE( & x2(1,0) == & x4(1,1,1,2) );
ASSERT_TRUE( & x2(0,1) == & x4(0,1,2,2) );
ASSERT_TRUE( & x2(1,1) == & x4(1,1,2,2) );
// Kokkos::View<int**,Kokkos::LayoutLeft,Space> error_2 =
Kokkos::View<int**,Kokkos::LayoutStride,Space> sx2 =
Kokkos::subview( x4, 1, Kokkos::pair<int,int>(0,2)
, 2, Kokkos::pair<int,int>(1,4) );
ASSERT_TRUE( ! sx2.is_contiguous() );
ASSERT_TRUE( & sx2(0,0) == & x4(1,0,2,1) );
ASSERT_TRUE( & sx2(1,0) == & x4(1,1,2,1) );
ASSERT_TRUE( & sx2(0,1) == & x4(1,0,2,2) );
ASSERT_TRUE( & sx2(1,1) == & x4(1,1,2,2) );
ASSERT_TRUE( & sx2(0,2) == & x4(1,0,2,3) );
ASSERT_TRUE( & sx2(1,2) == & x4(1,1,2,3) );
Kokkos::View<int****,Kokkos::LayoutStride,Space> sx4 =
Kokkos::subview( x4, Kokkos::pair<int,int>(1,2) /* of [2] */
, Kokkos::pair<int,int>(1,3) /* of [3] */
, Kokkos::pair<int,int>(0,4) /* of [4] */
, Kokkos::pair<int,int>(2,4) /* of [5] */
);
ASSERT_TRUE( ! sx4.is_contiguous() );
for ( int i0 = 0 ; i0 < (int) sx4.dimension_0() ; ++i0 )
for ( int i1 = 0 ; i1 < (int) sx4.dimension_1() ; ++i1 )
for ( int i2 = 0 ; i2 < (int) sx4.dimension_2() ; ++i2 )
for ( int i3 = 0 ; i3 < (int) sx4.dimension_3() ; ++i3 ) {
ASSERT_TRUE( & sx4(i0,i1,i2,i3) == & x4( 1+i0, 1+i1, 0+i2, 2+i3 ) );
}
}
}
template< class Space >
void test_left_3()
{
typedef Kokkos::View< int ** , Kokkos::LayoutLeft , Space > view_type ;
if(Kokkos::Impl::VerifyExecutionCanAccessMemorySpace<Kokkos::HostSpace,Space>::value) {
view_type xm("x4",10,5);
ASSERT_TRUE( xm.is_contiguous() );
Kokkos::View<int,Kokkos::LayoutLeft,Space> x0 = Kokkos::subview( xm , 5, 3 );
ASSERT_TRUE( x0.is_contiguous() );
ASSERT_TRUE( & x0() == & xm(5,3) );
Kokkos::View<int*,Kokkos::LayoutLeft,Space> x1 =
Kokkos::subview( xm, Kokkos::ALL(), 3 );
ASSERT_TRUE( x1.is_contiguous() );
for ( int i = 0 ; i < int(xm.dimension_0()) ; ++i ) {
ASSERT_TRUE( & x1(i) == & xm(i,3) );
}
Kokkos::View<int**,Kokkos::LayoutLeft,Space> x2 =
Kokkos::subview( xm, Kokkos::pair<int,int>(1,9), Kokkos::ALL() );
ASSERT_TRUE( ! x2.is_contiguous() );
for ( int j = 0 ; j < int(x2.dimension_1()) ; ++j )
for ( int i = 0 ; i < int(x2.dimension_0()) ; ++i ) {
ASSERT_TRUE( & x2(i,j) == & xm(1+i,j) );
}
Kokkos::View<int**,Kokkos::LayoutLeft,Space> x2c =
Kokkos::subview( xm, Kokkos::ALL(), std::pair<int,int>(2,4) );
ASSERT_TRUE( x2c.is_contiguous() );
for ( int j = 0 ; j < int(x2c.dimension_1()) ; ++j )
for ( int i = 0 ; i < int(x2c.dimension_0()) ; ++i ) {
ASSERT_TRUE( & x2c(i,j) == & xm(i,2+j) );
}
Kokkos::View<int**,Kokkos::LayoutLeft,Space> x2_n1 =
Kokkos::subview( xm , std::pair<int,int>(1,1) , Kokkos::ALL() );
ASSERT_TRUE( x2_n1.dimension_0() == 0 );
ASSERT_TRUE( x2_n1.dimension_1() == xm.dimension_1() );
Kokkos::View<int**,Kokkos::LayoutLeft,Space> x2_n2 =
Kokkos::subview( xm , Kokkos::ALL() , std::pair<int,int>(1,1) );
ASSERT_TRUE( x2_n2.dimension_0() == xm.dimension_0() );
ASSERT_TRUE( x2_n2.dimension_1() == 0 );
}
}
//----------------------------------------------------------------------------
template< class Space >
void test_right_0()
{
typedef Kokkos::View< int [2][3][4][5][2][3][4][5] , Kokkos::LayoutRight , Space >
view_static_8_type ;
if(Kokkos::Impl::VerifyExecutionCanAccessMemorySpace<Kokkos::HostSpace,Space>::value) {
view_static_8_type x_static_8("x_static_right_8");
Kokkos::View<int,Kokkos::LayoutRight,Space> x0 = Kokkos::subview( x_static_8 , 0, 0, 0, 0, 0, 0, 0, 0 );
ASSERT_TRUE( & x0() == & x_static_8(0,0,0,0,0,0,0,0) );
Kokkos::View<int*,Kokkos::LayoutRight,Space> x1 =
Kokkos::subview( x_static_8, 0, 1, 2, 3, 0, 1, 2, Kokkos::pair<int,int>(1,3) );
+ ASSERT_TRUE( x1.dimension_0() == 2 );
ASSERT_TRUE( & x1(0) == & x_static_8(0,1,2,3,0,1,2,1) );
ASSERT_TRUE( & x1(1) == & x_static_8(0,1,2,3,0,1,2,2) );
Kokkos::View<int**,Kokkos::LayoutRight,Space> x2 =
Kokkos::subview( x_static_8, 0, 1, 2, Kokkos::pair<int,int>(1,3)
, 0, 1, 2, Kokkos::pair<int,int>(1,3) );
+ ASSERT_TRUE( x2.dimension_0() == 2 );
+ ASSERT_TRUE( x2.dimension_1() == 2 );
ASSERT_TRUE( & x2(0,0) == & x_static_8(0,1,2,1,0,1,2,1) );
ASSERT_TRUE( & x2(1,0) == & x_static_8(0,1,2,2,0,1,2,1) );
ASSERT_TRUE( & x2(0,1) == & x_static_8(0,1,2,1,0,1,2,2) );
ASSERT_TRUE( & x2(1,1) == & x_static_8(0,1,2,2,0,1,2,2) );
// Kokkos::View<int**,Kokkos::LayoutRight,Space> error_2 =
Kokkos::View<int**,Kokkos::LayoutStride,Space> sx2 =
Kokkos::subview( x_static_8, 1, Kokkos::pair<int,int>(0,2), 2, 3
, Kokkos::pair<int,int>(0,2), 1, 2, 3 );
+ ASSERT_TRUE( sx2.dimension_0() == 2 );
+ ASSERT_TRUE( sx2.dimension_1() == 2 );
ASSERT_TRUE( & sx2(0,0) == & x_static_8(1,0,2,3,0,1,2,3) );
ASSERT_TRUE( & sx2(1,0) == & x_static_8(1,1,2,3,0,1,2,3) );
ASSERT_TRUE( & sx2(0,1) == & x_static_8(1,0,2,3,1,1,2,3) );
ASSERT_TRUE( & sx2(1,1) == & x_static_8(1,1,2,3,1,1,2,3) );
Kokkos::View<int****,Kokkos::LayoutStride,Space> sx4 =
Kokkos::subview( x_static_8, 0, Kokkos::pair<int,int>(0,2) /* of [3] */
, 1, Kokkos::pair<int,int>(1,3) /* of [5] */
, 1, Kokkos::pair<int,int>(0,2) /* of [3] */
, 2, Kokkos::pair<int,int>(2,4) /* of [5] */
);
+ ASSERT_TRUE( sx4.dimension_0() == 2 );
+ ASSERT_TRUE( sx4.dimension_1() == 2 );
+ ASSERT_TRUE( sx4.dimension_2() == 2 );
+ ASSERT_TRUE( sx4.dimension_3() == 2 );
for ( int i0 = 0 ; i0 < (int) sx4.dimension_0() ; ++i0 )
for ( int i1 = 0 ; i1 < (int) sx4.dimension_1() ; ++i1 )
for ( int i2 = 0 ; i2 < (int) sx4.dimension_2() ; ++i2 )
for ( int i3 = 0 ; i3 < (int) sx4.dimension_3() ; ++i3 ) {
ASSERT_TRUE( & sx4(i0,i1,i2,i3) == & x_static_8(0, 0+i0, 1, 1+i1, 1, 0+i2, 2, 2+i3) );
}
}
}
template< class Space >
void test_right_1()
{
typedef Kokkos::View< int ****[2][3][4][5] , Kokkos::LayoutRight , Space >
view_type ;
if(Kokkos::Impl::VerifyExecutionCanAccessMemorySpace<Kokkos::HostSpace,Space>::value) {
view_type x8("x_right_8",2,3,4,5);
Kokkos::View<int,Kokkos::LayoutRight,Space> x0 = Kokkos::subview( x8 , 0, 0, 0, 0, 0, 0, 0, 0 );
ASSERT_TRUE( & x0() == & x8(0,0,0,0,0,0,0,0) );
Kokkos::View<int*,Kokkos::LayoutRight,Space> x1 =
Kokkos::subview( x8, 0, 1, 2, 3, 0, 1, 2, Kokkos::pair<int,int>(1,3) );
ASSERT_TRUE( & x1(0) == & x8(0,1,2,3,0,1,2,1) );
ASSERT_TRUE( & x1(1) == & x8(0,1,2,3,0,1,2,2) );
Kokkos::View<int**,Kokkos::LayoutRight,Space> x2 =
Kokkos::subview( x8, 0, 1, 2, Kokkos::pair<int,int>(1,3)
, 0, 1, 2, Kokkos::pair<int,int>(1,3) );
ASSERT_TRUE( & x2(0,0) == & x8(0,1,2,1,0,1,2,1) );
ASSERT_TRUE( & x2(1,0) == & x8(0,1,2,2,0,1,2,1) );
ASSERT_TRUE( & x2(0,1) == & x8(0,1,2,1,0,1,2,2) );
ASSERT_TRUE( & x2(1,1) == & x8(0,1,2,2,0,1,2,2) );
// Kokkos::View<int**,Kokkos::LayoutRight,Space> error_2 =
Kokkos::View<int**,Kokkos::LayoutStride,Space> sx2 =
Kokkos::subview( x8, 1, Kokkos::pair<int,int>(0,2), 2, 3
, Kokkos::pair<int,int>(0,2), 1, 2, 3 );
ASSERT_TRUE( & sx2(0,0) == & x8(1,0,2,3,0,1,2,3) );
ASSERT_TRUE( & sx2(1,0) == & x8(1,1,2,3,0,1,2,3) );
ASSERT_TRUE( & sx2(0,1) == & x8(1,0,2,3,1,1,2,3) );
ASSERT_TRUE( & sx2(1,1) == & x8(1,1,2,3,1,1,2,3) );
Kokkos::View<int****,Kokkos::LayoutStride,Space> sx4 =
Kokkos::subview( x8, 0, Kokkos::pair<int,int>(0,2) /* of [3] */
, 1, Kokkos::pair<int,int>(1,3) /* of [5] */
, 1, Kokkos::pair<int,int>(0,2) /* of [3] */
, 2, Kokkos::pair<int,int>(2,4) /* of [5] */
);
for ( int i0 = 0 ; i0 < (int) sx4.dimension_0() ; ++i0 )
for ( int i1 = 0 ; i1 < (int) sx4.dimension_1() ; ++i1 )
for ( int i2 = 0 ; i2 < (int) sx4.dimension_2() ; ++i2 )
for ( int i3 = 0 ; i3 < (int) sx4.dimension_3() ; ++i3 ) {
ASSERT_TRUE( & sx4(i0,i1,i2,i3) == & x8(0,0+i0, 1,1+i1, 1,0+i2, 2,2+i3) );
}
}
}
template< class Space >
void test_right_3()
{
typedef Kokkos::View< int ** , Kokkos::LayoutRight , Space > view_type ;
if(Kokkos::Impl::VerifyExecutionCanAccessMemorySpace<Kokkos::HostSpace,Space>::value) {
view_type xm("x4",10,5);
ASSERT_TRUE( xm.is_contiguous() );
Kokkos::View<int,Kokkos::LayoutRight,Space> x0 = Kokkos::subview( xm , 5, 3 );
ASSERT_TRUE( x0.is_contiguous() );
ASSERT_TRUE( & x0() == & xm(5,3) );
Kokkos::View<int*,Kokkos::LayoutRight,Space> x1 =
Kokkos::subview( xm, 3, Kokkos::ALL() );
ASSERT_TRUE( x1.is_contiguous() );
for ( int i = 0 ; i < int(xm.dimension_1()) ; ++i ) {
ASSERT_TRUE( & x1(i) == & xm(3,i) );
}
Kokkos::View<int**,Kokkos::LayoutRight,Space> x2c =
Kokkos::subview( xm, Kokkos::pair<int,int>(1,9), Kokkos::ALL() );
ASSERT_TRUE( x2c.is_contiguous() );
for ( int j = 0 ; j < int(x2c.dimension_1()) ; ++j )
for ( int i = 0 ; i < int(x2c.dimension_0()) ; ++i ) {
ASSERT_TRUE( & x2c(i,j) == & xm(1+i,j) );
}
Kokkos::View<int**,Kokkos::LayoutRight,Space> x2 =
Kokkos::subview( xm, Kokkos::ALL(), std::pair<int,int>(2,4) );
ASSERT_TRUE( ! x2.is_contiguous() );
for ( int j = 0 ; j < int(x2.dimension_1()) ; ++j )
for ( int i = 0 ; i < int(x2.dimension_0()) ; ++i ) {
ASSERT_TRUE( & x2(i,j) == & xm(i,2+j) );
}
Kokkos::View<int**,Kokkos::LayoutRight,Space> x2_n1 =
Kokkos::subview( xm , std::pair<int,int>(1,1) , Kokkos::ALL() );
ASSERT_TRUE( x2_n1.dimension_0() == 0 );
ASSERT_TRUE( x2_n1.dimension_1() == xm.dimension_1() );
Kokkos::View<int**,Kokkos::LayoutRight,Space> x2_n2 =
Kokkos::subview( xm , Kokkos::ALL() , std::pair<int,int>(1,1) );
ASSERT_TRUE( x2_n2.dimension_0() == xm.dimension_0() );
ASSERT_TRUE( x2_n2.dimension_1() == 0 );
}
}
//----------------------------------------------------------------------------
}
diff --git a/lib/kokkos/doc/Doxyfile b/lib/kokkos/doc/Doxyfile
new file mode 100644
index 000000000..bc5c7486b
--- /dev/null
+++ b/lib/kokkos/doc/Doxyfile
@@ -0,0 +1,127 @@
+#
+# Include the global look and feel options
+#
+@INCLUDE = ../../common/Doxyfile
+#
+# Package options
+#
+PROJECT_NAME = "Kokkos Core Kernels Package"
+PROJECT_NUMBER = "Version of the Day"
+OUTPUT_DIRECTORY = .
+OUTPUT_LANGUAGE = English
+
+EXTRACT_ALL = NO
+EXTRACT_PRIVATE = NO
+EXTRACT_STATIC = YES
+HIDE_UNDOC_MEMBERS = YES
+HIDE_UNDOC_CLASSES = YES
+BRIEF_MEMBER_DESC = YES
+REPEAT_BRIEF = YES
+ALWAYS_DETAILED_SEC = YES
+FULL_PATH_NAMES = NO
+STRIP_FROM_PATH =
+INTERNAL_DOCS = NO
+CLASS_DIAGRAMS = YES
+SOURCE_BROWSER = YES
+INLINE_SOURCES = NO
+STRIP_CODE_COMMENTS = YES
+REFERENCED_BY_RELATION = NO
+REFERENCES_RELATION = NO
+CASE_SENSE_NAMES = YES
+HIDE_SCOPE_NAMES = NO
+VERBATIM_HEADERS = YES
+SHOW_INCLUDE_FILES = YES
+#JAVADOC_AUTOBRIEF = YES
+INHERIT_DOCS = YES
+INLINE_INHERITED_MEMB = YES
+INLINE_INFO = YES
+SORT_MEMBER_DOCS = NO
+TAB_SIZE = 2
+ENABLED_SECTIONS =
+SORT_BRIEF_DOCS = NO
+GENERATE_TODOLIST = YES
+GENERATE_TESTLIST = YES
+QUIET = NO
+WARNINGS = YES
+WARN_IF_UNDOCUMENTED = YES
+WARN_FORMAT = "$file:$line: $text"
+
+#
+# INPUT: Where to find files that Doxygen should process. ../classic
+# has a doc/ subdirectory with its own Doxyfile that points to its own
+# files. The other Kokkos subpackages don't currently have their own
+# Doxyfile files, so we have to do it manually here.
+#
+# mfh 26 Sep 2013: I've only added those directories in the Core
+# subpackage that constitute the "public interface" of that
+# subpackage. Please feel free to include additional subdirectories
+# of ../core if you want to generate their documentation as well.
+#
+# mfh 26 Sep 2013: I've only added the Kokkos subpackages here that I
+# think are ready for Doxygen documentation generation. Please feel
+# free to amend this list as you see fit.
+#
+
+INPUT = index.doc ../classic ../core/src ../containers/src ../linalg/src
+FILE_PATTERNS = *.hpp *.cpp *.cuh *.cu
+RECURSIVE = NO
+EXCLUDE_PATTERNS = *.x *.o *.out
+EXAMPLE_PATH =
+EXAMPLE_RECURSIVE = YES
+EXAMPLE_PATTERNS = *.cpp *.hpp
+IMAGE_PATH =
+INPUT_FILTER =
+ALPHABETICAL_INDEX = YES
+COLS_IN_ALPHA_INDEX = 4
+IGNORE_PREFIX =
+#
+# What diagrams are created
+#
+CLASS_GRAPH = YES
+COLLABORATION_GRAPH = NO
+INCLUDE_GRAPH = NO
+INCLUDED_BY_GRAPH = NO
+GRAPHICAL_HIERARCHY = YES
+#
+# Preprocessing
+#
+ENABLE_PREPROCESSING = YES
+MACRO_EXPANSION = YES
+EXPAND_ONLY_PREDEF = YES
+SEARCH_INCLUDES = YES
+INCLUDE_FILE_PATTERNS =
+PREDEFINED = DOXYGEN_SHOULD_SKIP_THIS DOXYGEN_USE_ONLY
+INCLUDE_PATH = ../src
+EXPAND_AS_DEFINED =
+#
+# Links to other packages
+#
+TAGFILES = ../../common/tag_files/teuchos.tag=../../../teuchos/doc/html ../../common/tag_files/epetra.tag=../../../epetra/doc/html \
+ ../../common/tag_files/belos.tag=../../../belos/doc/html ../../common/tag_files/anasazi.tag=../../../anasazi/doc/html \
+ ../../common/tag_files/kokkos.tag=../../../kokkos/doc/html
+GENERATE_TAGFILE = ../../common/tag_files/tpetra.tag
+ALLEXTERNALS = NO
+EXTERNAL_GROUPS = NO
+#
+# Environment
+#
+PERL_PATH = /usr/bin/perl
+HAVE_DOT = YES
+DOT_PATH =
+MAX_DOT_GRAPH_WIDTH = 1024
+MAX_DOT_GRAPH_HEIGHT = 1024
+#
+# What kind of documentation is generated
+#
+#GENERATE_HTML = YES
+#HTML_OUTPUT = html
+#HTML_HEADER = includes/header.html
+#HTML_FOOTER = includes/footer.html
+#HTML_STYLESHEET = includes/stylesheet.css
+#HTML_ALIGN_MEMBERS = YES
+GENERATE_HTMLHELP = NO
+DISABLE_INDEX = NO
+GENERATE_LATEX = NO
+GENERATE_RTF = NO
+GENERATE_MAN = NO
+GENERATE_XML = NO
diff --git a/lib/kokkos/doc/Kokkos_PG.pdf b/lib/kokkos/doc/Kokkos_PG.pdf
new file mode 100644
index 000000000..3c415698c
Binary files /dev/null and b/lib/kokkos/doc/Kokkos_PG.pdf differ
diff --git a/lib/kokkos/doc/README b/lib/kokkos/doc/README
new file mode 100644
index 000000000..31e75f365
--- /dev/null
+++ b/lib/kokkos/doc/README
@@ -0,0 +1,32 @@
+Kokkos uses the Doxygen tool for providing three documentation
+sources:
+- man pages
+- Latex User Guide
+- HTML Online User Guide.
+
+Man Pages
+
+Man pages are available for all files and functions in the directory
+TRILINOS_HOME/doc/kokkos/man, where TRILINOS_HOME is the location of your
+copy of Trilinos. To use these pages with the Unix man utility, add
+the directory to your man path as follows:
+
+setenv MANPATH `echo $MANPATH`:TRILINOS_HOME/doc/kokkos/man
+
+
+LaTeX User Guide
+
+A postscript version of this guide is in
+TRILINOS_HOME/doc/kokkos/latex/user_guide.ps. The LaTeX source is in the
+directory TRILINOS_HOME/doc/kokkos/latex.
+
+HTML Online User Guide
+
+The online guide is initiated by pointing your browser to
+TRILINOS_HOME/doc/kokkos/html/index.html
+
+Any question, comments or suggestions are welcome. Please send to
+Mike Heroux at
+
+320-845-7695
+maherou@sandia.gov
diff --git a/lib/kokkos/doc/build_docs b/lib/kokkos/doc/build_docs
new file mode 100755
index 000000000..da1d3e4f6
--- /dev/null
+++ b/lib/kokkos/doc/build_docs
@@ -0,0 +1,15 @@
+#!/bin/sh
+
+if [ $TRILINOS_HOME ]; then
+ echo "TRILINOS_HOME has already been set!"
+else
+ echo "TRILINOS_HOME has not been set. Setting it!"
+ export TRILINOS_HOME=`pwd`/../../..
+fi
+
+echo
+echo "Generating main Kokkos doxygen documentation ..."
+echo
+
+doxygen Doxyfile
+
diff --git a/lib/kokkos/doc/index.doc b/lib/kokkos/doc/index.doc
new file mode 100644
index 000000000..27a9e4f2e
--- /dev/null
+++ b/lib/kokkos/doc/index.doc
@@ -0,0 +1,72 @@
+/*!
+\mainpage Trilinos/Kokkos: Shared-memory programming interface and computational kernels
+
+\section Kokkos_Intro Introduction
+
+The %Kokkos package has two main components. The first, sometimes
+called "%Kokkos Array" or just "%Kokkos," implements a
+performance-portable shared-memory parallel programming model and data
+containers. The second, called "%Kokkos Classic," consists of
+computational kernels that support the %Tpetra package.
+
+\section Kokkos_Kokkos The %Kokkos programming model
+
+%Kokkos implements a performance-portable shared-memory parallel
+programming model and data containers. It lets you write an algorithm
+once, and just change a template parameter to get the optimal data
+layout for your hardware. %Kokkos has back-ends for the following
+parallel programming models:
+
+- Kokkos::Threads: POSIX Threads (Pthreads)
+- Kokkos::OpenMP: OpenMP
+- Kokkos::Cuda: NVIDIA's CUDA programming model for graphics
+ processing units (GPUs)
+- Kokkos::Serial: No thread parallelism
+
+%Kokkos also has optimizations for shared-memory parallel systems with
+nonuniform memory access (NUMA). Its containers can hold data of any
+primitive ("plain old") data type (and some aggregate types). %Kokkos
+Array may be used as a stand-alone programming model.
+
+%Kokkos' parallel operations include the following:
+
+- parallel_for: a thread-parallel "for loop"
+- parallel_reduce: a thread-parallel reduction
+- parallel_scan: a thread-parallel prefix scan operation
+
+as well as expert-level platform-independent interfaces to thread
+"teams," per-team "shared memory," synchronization, and atomic update
+operations.
+
+%Kokkos' data containers include the following:
+
+- Kokkos::View: A multidimensional array suitable for thread-parallel
+ operations. Its layout (e.g., row-major or column-major) is
+ optimized by default for the particular thread-parallel device.
+- Kokkos::Vector: A drop-in replacement for std::vector that eases
+ porting from standard sequential C++ data structures to %Kokkos'
+ parallel data structures.
+- Kokkos::UnorderedMap: A parallel lookup table comparable in
+ functionality to std::unordered_map.
+
+%Kokkos also uses the above basic containers to implement higher-level
+data structures, like sparse graphs and matrices.
+
+A good place to start learning about %Kokkos would be <a href="http://trilinos.sandia.gov/events/trilinos_user_group_2013/presentations/2013-11-TUG-Kokkos-Tutorial.pdf">these tutorial slides</a> from the 2013 Trilinos Users' Group meeting.
+
+\section Kokkos_Classic %Kokkos Classic
+
+"%Kokkos Classic" consists of computational kernels that support the
+%Tpetra package. These kernels include sparse matrix-vector multiply,
+sparse triangular solve, Gauss-Seidel, and dense vector operations.
+They are templated on the type of objects (\c Scalar) on which they
+operate. This component was not meant to be visible to users; it is
+an implementation detail of the %Tpetra distributed linear algebra
+package.
+
+%Kokkos Classic also implements a shared-memory parallel programming
+model. This inspired and preceded the %Kokkos programming model
+described in the previous section. Users should consider the %Kokkos
+Classic programming model deprecated, and prefer the new %Kokkos
+programming model.
+*/
diff --git a/lib/kokkos/example/CMakeLists.txt b/lib/kokkos/example/CMakeLists.txt
new file mode 100644
index 000000000..3809cc2ea
--- /dev/null
+++ b/lib/kokkos/example/CMakeLists.txt
@@ -0,0 +1,20 @@
+
+
+# Subpackage name must match what appears in kokkos/cmake/Dependencies.cmake
+#
+TRIBITS_SUBPACKAGE(Example)
+
+TRIBITS_ADD_EXAMPLE_DIRECTORIES(query_device)
+TRIBITS_ADD_EXAMPLE_DIRECTORIES(fixture)
+TRIBITS_ADD_EXAMPLE_DIRECTORIES(feint)
+TRIBITS_ADD_EXAMPLE_DIRECTORIES(fenl)
+TRIBITS_ADD_EXAMPLE_DIRECTORIES(multi_fem)
+TRIBITS_ADD_EXAMPLE_DIRECTORIES(md_skeleton)
+TRIBITS_ADD_EXAMPLE_DIRECTORIES(global_2_local_ids)
+TRIBITS_ADD_EXAMPLE_DIRECTORIES(grow_array)
+TRIBITS_ADD_EXAMPLE_DIRECTORIES(sort_array)
+if(NOT Kokkos_ENABLE_Cuda)
+TRIBITS_ADD_EXAMPLE_DIRECTORIES(tutorial)
+endif()
+TRIBITS_SUBPACKAGE_POSTPROCESS()
+
diff --git a/lib/kokkos/example/cmake/Dependencies.cmake b/lib/kokkos/example/cmake/Dependencies.cmake
new file mode 100644
index 000000000..0d86e7871
--- /dev/null
+++ b/lib/kokkos/example/cmake/Dependencies.cmake
@@ -0,0 +1,4 @@
+TRIBITS_PACKAGE_DEFINE_DEPENDENCIES(
+ LIB_REQUIRED_DEP_PACKAGES KokkosCore KokkosContainers KokkosAlgorithms
+ TEST_OPTIONAL_DEP_TPLS CUSPARSE MKL
+ )
diff --git a/lib/kokkos/example/feint/CMakeLists.txt b/lib/kokkos/example/feint/CMakeLists.txt
new file mode 100644
index 000000000..0018b9f9f
--- /dev/null
+++ b/lib/kokkos/example/feint/CMakeLists.txt
@@ -0,0 +1,18 @@
+
+INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR})
+INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
+INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}/../common)
+INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}/../fixture)
+
+SET(SOURCES "")
+
+FILE(GLOB SOURCES *.cpp)
+
+LIST( APPEND SOURCES ../fixture/BoxElemPart.cpp)
+
+TRIBITS_ADD_EXECUTABLE(
+ feint
+ SOURCES ${SOURCES}
+ COMM serial mpi
+ )
+
diff --git a/lib/kokkos/example/fenl/CMakeLists.txt b/lib/kokkos/example/fenl/CMakeLists.txt
new file mode 100644
index 000000000..150656b16
--- /dev/null
+++ b/lib/kokkos/example/fenl/CMakeLists.txt
@@ -0,0 +1,17 @@
+
+INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR})
+INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
+INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}/../common)
+INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}/../fixture)
+
+SET(SOURCES "")
+
+FILE( GLOB SOURCES *.cpp )
+
+LIST( APPEND SOURCES ../fixture/BoxElemPart.cpp )
+
+TRIBITS_ADD_EXECUTABLE(
+ fenl
+ SOURCES ${SOURCES}
+ COMM serial mpi
+ )
diff --git a/lib/kokkos/example/fenl/Makefile b/lib/kokkos/example/fenl/Makefile
index 491ed4ee6..2e64e11e1 100644
--- a/lib/kokkos/example/fenl/Makefile
+++ b/lib/kokkos/example/fenl/Makefile
@@ -1,50 +1,57 @@
-KOKKOS_PATH = ../..
+KOKKOS_PATH ?= ../..
-vpath %.cpp ${KOKKOS_PATH}/example/fixture ${KOKKOS_PATH}/example/fenl
+MAKEFILE_PATH := $(abspath $(lastword $(MAKEFILE_LIST)))
+SRC_DIR := $(dir $(MAKEFILE_PATH))
-EXAMPLE_HEADERS = $(wildcard $(KOKKOS_PATH)/example/common/*.hpp ${KOKKOS_PATH}/example/fixture/*.hpp ${KOKKOS_PATH}/example/fenl/*.hpp)
+vpath %.cpp ${SRC_DIR}/../fixture ${SRC_DIR}
+
+EXAMPLE_HEADERS = $(wildcard $(SRC_DIR)/../common/*.hpp ${SRC_DIR}/../fixture/*.hpp ${SRC_DIR}/*.hpp)
default: build_all
echo "End Build"
-
+
include $(KOKKOS_PATH)/Makefile.kokkos
-ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
+# KOKKOS_INTERNAL_USE_CUDA is not exported to installed Makefile.kokkos
+# use KOKKOS_DEVICE here
+ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
CXX = nvcc_wrapper
CXXFLAGS ?= -O3
LINK = $(CXX)
LDFLAGS ?= -lpthread
else
CXX ?= g++
CXXFLAGS ?= -O3
LINK ?= $(CXX)
LDFLAGS ?= -lpthread
endif
KOKKOS_CXXFLAGS += \
- -I${KOKKOS_PATH}/example/common \
- -I${KOKKOS_PATH}/example/fixture \
- -I${KOKKOS_PATH}/example/fenl
+ -I${SRC_DIR}/../common \
+ -I${SRC_DIR}/../fixture \
+ -I${SRC_DIR}
EXE_EXAMPLE_FENL = KokkosExample_Fenl
OBJ_EXAMPLE_FENL = BoxElemPart.o main.o fenl.o
TARGETS = $(EXE_EXAMPLE_FENL)
#TEST_TARGETS =
$(EXE_EXAMPLE_FENL) : $(OBJ_EXAMPLE_FENL) $(KOKKOS_LINK_DEPENDS)
$(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_EXAMPLE_FENL) $(KOKKOS_LIBS) $(LIB) -o $(EXE_EXAMPLE_FENL)
build_all : $(TARGETS)
test : build_all
+clean:
+ rm -f *.o $(EXE_EXAMPLE_FENL) KokkosCore_config.*
# Compilation rules
%.o:%.cpp $(KOKKOS_CPP_DEPENDS) $(EXAMPLE_HEADERS)
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<
diff --git a/lib/kokkos/example/fixture/CMakeLists.txt b/lib/kokkos/example/fixture/CMakeLists.txt
new file mode 100644
index 000000000..298c54c5b
--- /dev/null
+++ b/lib/kokkos/example/fixture/CMakeLists.txt
@@ -0,0 +1,13 @@
+
+INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR})
+INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
+INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}/../common)
+
+SET(SOURCES_TEST Main.cpp TestFixture.cpp BoxElemPart.cpp )
+
+# Automatically picks up 'kokkosexample_fixture'
+TRIBITS_ADD_EXECUTABLE_AND_TEST(
+ TestFixture
+ SOURCES ${SOURCES_TEST}
+ )
+
diff --git a/lib/kokkos/example/global_2_local_ids/CMakeLists.txt b/lib/kokkos/example/global_2_local_ids/CMakeLists.txt
new file mode 100644
index 000000000..9f32fe580
--- /dev/null
+++ b/lib/kokkos/example/global_2_local_ids/CMakeLists.txt
@@ -0,0 +1,17 @@
+
+INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR})
+INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
+
+SET(SOURCES "")
+
+SET(SOURCES
+ G2L_Main.cpp
+ )
+
+TRIBITS_ADD_EXECUTABLE(
+ global_2_local_ids
+ SOURCES ${SOURCES}
+ COMM serial mpi
+ )
+
+
diff --git a/lib/kokkos/example/global_2_local_ids/Makefile b/lib/kokkos/example/global_2_local_ids/Makefile
new file mode 100644
index 000000000..bf8fbea3e
--- /dev/null
+++ b/lib/kokkos/example/global_2_local_ids/Makefile
@@ -0,0 +1,53 @@
+KOKKOS_PATH ?= ../..
+
+MAKEFILE_PATH := $(abspath $(lastword $(MAKEFILE_LIST)))
+SRC_DIR := $(dir $(MAKEFILE_PATH))
+
+SRC = $(wildcard $(SRC_DIR)/*.cpp)
+OBJ = $(SRC:$(SRC_DIR)/%.cpp=%.o)
+
+#SRC = $(wildcard *.cpp)
+#OBJ = $(SRC:%.cpp=%.o)
+
+default: build
+ echo "Start Build"
+
+# use installed Makefile.kokkos
+include $(KOKKOS_PATH)/Makefile.kokkos
+
+ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
+CXX = $(NVCC_WRAPPER)
+CXXFLAGS = -I$(SRC_DIR) -O3
+LINK = $(CXX)
+LINKFLAGS =
+EXE = $(addsuffix .cuda, $(shell basename $(SRC_DIR)))
+#KOKKOS_DEVICES = "Cuda,OpenMP"
+#KOKKOS_ARCH = "SNB,Kepler35"
+else
+CXX = g++
+CXXFLAGS = -I$(SRC_DIR) -O3
+LINK = $(CXX)
+LINKFLAGS =
+EXE = $(addsuffix .host, $(shell basename $(SRC_DIR)))
+#KOKKOS_DEVICES = "OpenMP"
+#KOKKOS_ARCH = "SNB"
+endif
+
+DEPFLAGS = -M
+
+LIB =
+
+
+build: $(EXE)
+
+$(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS)
+ $(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE)
+
+clean:
+ rm -f *.a *.o *.cuda *.host
+
+# Compilation rules
+
+%.o:$(SRC_DIR)/%.cpp $(KOKKOS_CPP_DEPENDS)
+ $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<
+
diff --git a/lib/kokkos/example/grow_array/CMakeLists.txt b/lib/kokkos/example/grow_array/CMakeLists.txt
new file mode 100644
index 000000000..d9ff17049
--- /dev/null
+++ b/lib/kokkos/example/grow_array/CMakeLists.txt
@@ -0,0 +1,14 @@
+
+INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR})
+INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
+
+SET(SOURCES "")
+
+FILE(GLOB SOURCES *.cpp)
+
+TRIBITS_ADD_EXECUTABLE(
+ grow_array
+ SOURCES ${SOURCES}
+ COMM serial mpi
+ )
+
diff --git a/lib/kokkos/example/grow_array/Makefile b/lib/kokkos/example/grow_array/Makefile
new file mode 100644
index 000000000..bf8fbea3e
--- /dev/null
+++ b/lib/kokkos/example/grow_array/Makefile
@@ -0,0 +1,53 @@
+KOKKOS_PATH ?= ../..
+
+MAKEFILE_PATH := $(abspath $(lastword $(MAKEFILE_LIST)))
+SRC_DIR := $(dir $(MAKEFILE_PATH))
+
+SRC = $(wildcard $(SRC_DIR)/*.cpp)
+OBJ = $(SRC:$(SRC_DIR)/%.cpp=%.o)
+
+#SRC = $(wildcard *.cpp)
+#OBJ = $(SRC:%.cpp=%.o)
+
+default: build
+ echo "Start Build"
+
+# use installed Makefile.kokkos
+include $(KOKKOS_PATH)/Makefile.kokkos
+
+ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
+CXX = $(NVCC_WRAPPER)
+CXXFLAGS = -I$(SRC_DIR) -O3
+LINK = $(CXX)
+LINKFLAGS =
+EXE = $(addsuffix .cuda, $(shell basename $(SRC_DIR)))
+#KOKKOS_DEVICES = "Cuda,OpenMP"
+#KOKKOS_ARCH = "SNB,Kepler35"
+else
+CXX = g++
+CXXFLAGS = -I$(SRC_DIR) -O3
+LINK = $(CXX)
+LINKFLAGS =
+EXE = $(addsuffix .host, $(shell basename $(SRC_DIR)))
+#KOKKOS_DEVICES = "OpenMP"
+#KOKKOS_ARCH = "SNB"
+endif
+
+DEPFLAGS = -M
+
+LIB =
+
+
+build: $(EXE)
+
+$(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS)
+ $(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE)
+
+clean:
+ rm -f *.a *.o *.cuda *.host
+
+# Compilation rules
+
+%.o:$(SRC_DIR)/%.cpp $(KOKKOS_CPP_DEPENDS)
+ $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<
+
diff --git a/lib/kokkos/example/md_skeleton/CMakeLists.txt b/lib/kokkos/example/md_skeleton/CMakeLists.txt
new file mode 100644
index 000000000..28412c378
--- /dev/null
+++ b/lib/kokkos/example/md_skeleton/CMakeLists.txt
@@ -0,0 +1,16 @@
+
+INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR})
+INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
+
+SET(SOURCES "")
+SET(LIBRARIES "")
+
+FILE(GLOB SOURCES *.cpp )
+
+TRIBITS_ADD_EXECUTABLE(
+ md_skeleton
+ SOURCES ${SOURCES}
+ COMM serial mpi
+ DEPLIBS ${LIBRARIES}
+ )
+
diff --git a/lib/kokkos/example/md_skeleton/Makefile b/lib/kokkos/example/md_skeleton/Makefile
new file mode 100644
index 000000000..bf8fbea3e
--- /dev/null
+++ b/lib/kokkos/example/md_skeleton/Makefile
@@ -0,0 +1,53 @@
+KOKKOS_PATH ?= ../..
+
+MAKEFILE_PATH := $(abspath $(lastword $(MAKEFILE_LIST)))
+SRC_DIR := $(dir $(MAKEFILE_PATH))
+
+SRC = $(wildcard $(SRC_DIR)/*.cpp)
+OBJ = $(SRC:$(SRC_DIR)/%.cpp=%.o)
+
+#SRC = $(wildcard *.cpp)
+#OBJ = $(SRC:%.cpp=%.o)
+
+default: build
+ echo "Start Build"
+
+# use installed Makefile.kokkos
+include $(KOKKOS_PATH)/Makefile.kokkos
+
+ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
+CXX = $(NVCC_WRAPPER)
+CXXFLAGS = -I$(SRC_DIR) -O3
+LINK = $(CXX)
+LINKFLAGS =
+EXE = $(addsuffix .cuda, $(shell basename $(SRC_DIR)))
+#KOKKOS_DEVICES = "Cuda,OpenMP"
+#KOKKOS_ARCH = "SNB,Kepler35"
+else
+CXX = g++
+CXXFLAGS = -I$(SRC_DIR) -O3
+LINK = $(CXX)
+LINKFLAGS =
+EXE = $(addsuffix .host, $(shell basename $(SRC_DIR)))
+#KOKKOS_DEVICES = "OpenMP"
+#KOKKOS_ARCH = "SNB"
+endif
+
+DEPFLAGS = -M
+
+LIB =
+
+
+build: $(EXE)
+
+$(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS)
+ $(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE)
+
+clean:
+ rm -f *.a *.o *.cuda *.host
+
+# Compilation rules
+
+%.o:$(SRC_DIR)/%.cpp $(KOKKOS_CPP_DEPENDS)
+ $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<
+
diff --git a/lib/kokkos/example/multi_fem/CMakeLists.txt b/lib/kokkos/example/multi_fem/CMakeLists.txt
new file mode 100644
index 000000000..e3a40bc26
--- /dev/null
+++ b/lib/kokkos/example/multi_fem/CMakeLists.txt
@@ -0,0 +1,16 @@
+
+INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR})
+INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
+
+SET(SOURCES "")
+
+FILE(GLOB SOURCES *.cpp)
+
+SET(LIBRARIES kokkoscore)
+
+TRIBITS_ADD_EXECUTABLE(
+ multi_fem
+ SOURCES ${SOURCES}
+ COMM serial mpi
+ )
+
diff --git a/lib/kokkos/example/multi_fem/Makefile b/lib/kokkos/example/multi_fem/Makefile
new file mode 100644
index 000000000..72e1768fc
--- /dev/null
+++ b/lib/kokkos/example/multi_fem/Makefile
@@ -0,0 +1,53 @@
+KOKKOS_PATH ?= ../..
+
+MAKEFILE_PATH := $(abspath $(lastword $(MAKEFILE_LIST)))
+SRC_DIR := $(dir $(MAKEFILE_PATH))
+
+SRC = $(wildcard $(SRC_DIR)/*.cpp)
+OBJ = $(SRC:$(SRC_DIR)/%.cpp=%.o)
+
+#SRC = $(wildcard *.cpp)
+#OBJ = $(SRC:%.cpp=%.o)
+
+default: build
+ echo "Start Build"
+
+# use installed Makefile.kokkos
+include $(KOKKOS_PATH)/Makefile.kokkos
+
+ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
+CXX = $(NVCC_WRAPPER)
+CXXFLAGS = -I$(SRC_DIR) -I$(CUDA_PATH) -O3
+LINK = $(CXX)
+LINKFLAGS = -L$(CUDA_PATH)/lib64 -lcusparse
+EXE = $(addsuffix .cuda, $(shell basename $(SRC_DIR)))
+#KOKKOS_DEVICES = "Cuda,OpenMP"
+#KOKKOS_ARCH = "SNB,Kepler35"
+else
+CXX = g++
+CXXFLAGS = -I$(SRC_DIR) -O3
+LINK = $(CXX)
+LINKFLAGS =
+EXE = $(addsuffix .host, $(shell basename $(SRC_DIR)))
+#KOKKOS_DEVICES = "OpenMP"
+#KOKKOS_ARCH = "SNB"
+endif
+
+DEPFLAGS = -M
+
+LIB =
+
+
+build: $(EXE)
+
+$(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS)
+ $(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE)
+
+clean:
+ rm -f *.a *.o *.cuda *.host
+
+# Compilation rules
+
+%.o:$(SRC_DIR)/%.cpp $(KOKKOS_CPP_DEPENDS)
+ $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<
+
diff --git a/lib/kokkos/example/query_device/CMakeLists.txt b/lib/kokkos/example/query_device/CMakeLists.txt
new file mode 100644
index 000000000..dade7f01f
--- /dev/null
+++ b/lib/kokkos/example/query_device/CMakeLists.txt
@@ -0,0 +1,14 @@
+
+INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR})
+INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
+
+SET(SOURCES "")
+
+FILE(GLOB SOURCES *.cpp)
+
+TRIBITS_ADD_EXECUTABLE(
+ query_device
+ SOURCES ${SOURCES}
+ COMM serial mpi
+ )
+
diff --git a/lib/kokkos/example/query_device/Makefile b/lib/kokkos/example/query_device/Makefile
new file mode 100644
index 000000000..bf8fbea3e
--- /dev/null
+++ b/lib/kokkos/example/query_device/Makefile
@@ -0,0 +1,53 @@
+KOKKOS_PATH ?= ../..
+
+MAKEFILE_PATH := $(abspath $(lastword $(MAKEFILE_LIST)))
+SRC_DIR := $(dir $(MAKEFILE_PATH))
+
+SRC = $(wildcard $(SRC_DIR)/*.cpp)
+OBJ = $(SRC:$(SRC_DIR)/%.cpp=%.o)
+
+#SRC = $(wildcard *.cpp)
+#OBJ = $(SRC:%.cpp=%.o)
+
+default: build
+ echo "Start Build"
+
+# use installed Makefile.kokkos
+include $(KOKKOS_PATH)/Makefile.kokkos
+
+ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
+CXX = $(NVCC_WRAPPER)
+CXXFLAGS = -I$(SRC_DIR) -O3
+LINK = $(CXX)
+LINKFLAGS =
+EXE = $(addsuffix .cuda, $(shell basename $(SRC_DIR)))
+#KOKKOS_DEVICES = "Cuda,OpenMP"
+#KOKKOS_ARCH = "SNB,Kepler35"
+else
+CXX = g++
+CXXFLAGS = -I$(SRC_DIR) -O3
+LINK = $(CXX)
+LINKFLAGS =
+EXE = $(addsuffix .host, $(shell basename $(SRC_DIR)))
+#KOKKOS_DEVICES = "OpenMP"
+#KOKKOS_ARCH = "SNB"
+endif
+
+DEPFLAGS = -M
+
+LIB =
+
+
+build: $(EXE)
+
+$(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS)
+ $(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE)
+
+clean:
+ rm -f *.a *.o *.cuda *.host
+
+# Compilation rules
+
+%.o:$(SRC_DIR)/%.cpp $(KOKKOS_CPP_DEPENDS)
+ $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<
+
diff --git a/lib/kokkos/example/sort_array/CMakeLists.txt b/lib/kokkos/example/sort_array/CMakeLists.txt
new file mode 100644
index 000000000..3e58198d7
--- /dev/null
+++ b/lib/kokkos/example/sort_array/CMakeLists.txt
@@ -0,0 +1,15 @@
+INCLUDE(TribitsAddExecutableAndTest)
+
+INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR})
+INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
+
+SET(SOURCES "")
+
+FILE(GLOB SOURCES *.cpp)
+
+TRIBITS_ADD_EXECUTABLE(
+ sort_array
+ SOURCES ${SOURCES}
+ COMM serial mpi
+ )
+
diff --git a/lib/kokkos/example/sort_array/Makefile b/lib/kokkos/example/sort_array/Makefile
new file mode 100644
index 000000000..bf8fbea3e
--- /dev/null
+++ b/lib/kokkos/example/sort_array/Makefile
@@ -0,0 +1,53 @@
+KOKKOS_PATH ?= ../..
+
+MAKEFILE_PATH := $(abspath $(lastword $(MAKEFILE_LIST)))
+SRC_DIR := $(dir $(MAKEFILE_PATH))
+
+SRC = $(wildcard $(SRC_DIR)/*.cpp)
+OBJ = $(SRC:$(SRC_DIR)/%.cpp=%.o)
+
+#SRC = $(wildcard *.cpp)
+#OBJ = $(SRC:%.cpp=%.o)
+
+default: build
+ echo "Start Build"
+
+# use installed Makefile.kokkos
+include $(KOKKOS_PATH)/Makefile.kokkos
+
+ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
+CXX = $(NVCC_WRAPPER)
+CXXFLAGS = -I$(SRC_DIR) -O3
+LINK = $(CXX)
+LINKFLAGS =
+EXE = $(addsuffix .cuda, $(shell basename $(SRC_DIR)))
+#KOKKOS_DEVICES = "Cuda,OpenMP"
+#KOKKOS_ARCH = "SNB,Kepler35"
+else
+CXX = g++
+CXXFLAGS = -I$(SRC_DIR) -O3
+LINK = $(CXX)
+LINKFLAGS =
+EXE = $(addsuffix .host, $(shell basename $(SRC_DIR)))
+#KOKKOS_DEVICES = "OpenMP"
+#KOKKOS_ARCH = "SNB"
+endif
+
+DEPFLAGS = -M
+
+LIB =
+
+
+build: $(EXE)
+
+$(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS)
+ $(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE)
+
+clean:
+ rm -f *.a *.o *.cuda *.host
+
+# Compilation rules
+
+%.o:$(SRC_DIR)/%.cpp $(KOKKOS_CPP_DEPENDS)
+ $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<
+
diff --git a/lib/kokkos/example/tutorial/01_hello_world/CMakeLists.txt b/lib/kokkos/example/tutorial/01_hello_world/CMakeLists.txt
new file mode 100644
index 000000000..5e5b1fcb4
--- /dev/null
+++ b/lib/kokkos/example/tutorial/01_hello_world/CMakeLists.txt
@@ -0,0 +1,11 @@
+
+INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR})
+INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
+
+# This is a tutorial, not a test, so we don't ask CTest to run it.
+TRIBITS_ADD_EXECUTABLE(
+ tutorial_01_hello_world
+ SOURCES hello_world.cpp
+ COMM serial mpi
+ )
+
diff --git a/lib/kokkos/example/tutorial/01_hello_world_lambda/CMakeLists.txt b/lib/kokkos/example/tutorial/01_hello_world_lambda/CMakeLists.txt
new file mode 100644
index 000000000..3fcca4bce
--- /dev/null
+++ b/lib/kokkos/example/tutorial/01_hello_world_lambda/CMakeLists.txt
@@ -0,0 +1,13 @@
+
+INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR})
+INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
+
+IF (Kokkos_ENABLE_CXX11)
+ # This is a tutorial, not a test, so we don't ask CTest to run it.
+ TRIBITS_ADD_EXECUTABLE(
+ tutorial_01_hello_world_lambda
+ SOURCES hello_world_lambda.cpp
+ COMM serial mpi
+ )
+ENDIF ()
+
diff --git a/lib/kokkos/example/tutorial/02_simple_reduce/CMakeLists.txt b/lib/kokkos/example/tutorial/02_simple_reduce/CMakeLists.txt
new file mode 100644
index 000000000..7c78db840
--- /dev/null
+++ b/lib/kokkos/example/tutorial/02_simple_reduce/CMakeLists.txt
@@ -0,0 +1,10 @@
+
+INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR})
+INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
+
+# This is a tutorial, not a test, so we don't ask CTest to run it.
+TRIBITS_ADD_EXECUTABLE(
+ tutorial_02_simple_reduce
+ SOURCES simple_reduce.cpp
+ COMM serial mpi
+ )
diff --git a/lib/kokkos/example/tutorial/02_simple_reduce_lambda/CMakeLists.txt b/lib/kokkos/example/tutorial/02_simple_reduce_lambda/CMakeLists.txt
new file mode 100644
index 000000000..e2e3a929f
--- /dev/null
+++ b/lib/kokkos/example/tutorial/02_simple_reduce_lambda/CMakeLists.txt
@@ -0,0 +1,12 @@
+
+INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR})
+INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
+
+IF (Kokkos_ENABLE_CXX11)
+ # This is a tutorial, not a test, so we don't ask CTest to run it.
+ TRIBITS_ADD_EXECUTABLE(
+ tutorial_02_simple_reduce_lambda
+ SOURCES simple_reduce_lambda.cpp
+ COMM serial mpi
+ )
+ENDIF ()
diff --git a/lib/kokkos/example/tutorial/03_simple_view/CMakeLists.txt b/lib/kokkos/example/tutorial/03_simple_view/CMakeLists.txt
new file mode 100644
index 000000000..7475a99e4
--- /dev/null
+++ b/lib/kokkos/example/tutorial/03_simple_view/CMakeLists.txt
@@ -0,0 +1,10 @@
+
+INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR})
+INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
+
+# This is a tutorial, not a test, so we don't ask CTest to run it.
+TRIBITS_ADD_EXECUTABLE(
+ tutorial_03_simple_view
+ SOURCES simple_view.cpp
+ COMM serial mpi
+ )
diff --git a/lib/kokkos/example/tutorial/03_simple_view_lambda/CMakeLists.txt b/lib/kokkos/example/tutorial/03_simple_view_lambda/CMakeLists.txt
new file mode 100644
index 000000000..601fe452a
--- /dev/null
+++ b/lib/kokkos/example/tutorial/03_simple_view_lambda/CMakeLists.txt
@@ -0,0 +1,12 @@
+
+INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR})
+INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
+
+IF (Kokkos_ENABLE_CXX11)
+ # This is a tutorial, not a test, so we don't ask CTest to run it.
+ TRIBITS_ADD_EXECUTABLE(
+ tutorial_03_simple_view_lambda
+ SOURCES simple_view_lambda.cpp
+ COMM serial mpi
+ )
+ENDIF ()
diff --git a/lib/kokkos/example/tutorial/04_simple_memoryspaces/CMakeLists.txt b/lib/kokkos/example/tutorial/04_simple_memoryspaces/CMakeLists.txt
new file mode 100644
index 000000000..09f209077
--- /dev/null
+++ b/lib/kokkos/example/tutorial/04_simple_memoryspaces/CMakeLists.txt
@@ -0,0 +1,10 @@
+
+INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR})
+INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
+
+# This is a tutorial, not a test, so we don't ask CTest to run it.
+TRIBITS_ADD_EXECUTABLE(
+ tutorial_04_simple_memoryspaces
+ SOURCES simple_memoryspaces.cpp
+ COMM serial mpi
+ )
diff --git a/lib/kokkos/example/tutorial/05_simple_atomics/CMakeLists.txt b/lib/kokkos/example/tutorial/05_simple_atomics/CMakeLists.txt
new file mode 100644
index 000000000..5a5790fb0
--- /dev/null
+++ b/lib/kokkos/example/tutorial/05_simple_atomics/CMakeLists.txt
@@ -0,0 +1,10 @@
+
+INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR})
+INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
+
+# This is a tutorial, not a test, so we don't ask CTest to run it.
+TRIBITS_ADD_EXECUTABLE(
+ tutorial_05_simple_atomics
+ SOURCES simple_atomics.cpp
+ COMM serial mpi
+ )
diff --git a/lib/kokkos/example/tutorial/Advanced_Views/01_data_layouts/CMakeLists.txt b/lib/kokkos/example/tutorial/Advanced_Views/01_data_layouts/CMakeLists.txt
new file mode 100644
index 000000000..2eb3a8f6c
--- /dev/null
+++ b/lib/kokkos/example/tutorial/Advanced_Views/01_data_layouts/CMakeLists.txt
@@ -0,0 +1,10 @@
+
+INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR})
+INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
+
+# This is a tutorial, not a test, so we don't ask CTest to run it.
+TRIBITS_ADD_EXECUTABLE(
+ tutorial_advancedviews_01_data_layouts
+ SOURCES data_layouts.cpp
+ COMM serial mpi
+ )
diff --git a/lib/kokkos/example/tutorial/Advanced_Views/02_memory_traits/CMakeLists.txt b/lib/kokkos/example/tutorial/Advanced_Views/02_memory_traits/CMakeLists.txt
new file mode 100644
index 000000000..1963e544d
--- /dev/null
+++ b/lib/kokkos/example/tutorial/Advanced_Views/02_memory_traits/CMakeLists.txt
@@ -0,0 +1,10 @@
+
+INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR})
+INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
+
+# This is a tutorial, not a test, so we don't ask CTest to run it.
+TRIBITS_ADD_EXECUTABLE(
+ tutorial_advancedviews_02_memory_traits
+ SOURCES memory_traits.cpp
+ COMM serial mpi
+ )
diff --git a/lib/kokkos/example/tutorial/Advanced_Views/03_subviews/CMakeLists.txt b/lib/kokkos/example/tutorial/Advanced_Views/03_subviews/CMakeLists.txt
new file mode 100644
index 000000000..cbe394c78
--- /dev/null
+++ b/lib/kokkos/example/tutorial/Advanced_Views/03_subviews/CMakeLists.txt
@@ -0,0 +1,10 @@
+
+INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR})
+INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
+
+# This is a tutorial, not a test, so we don't ask CTest to run it.
+TRIBITS_ADD_EXECUTABLE(
+ tutorial_advancedviews_03_subviews
+ SOURCES subviews.cpp
+ COMM serial mpi
+ )
diff --git a/lib/kokkos/example/tutorial/Advanced_Views/04_dualviews/CMakeLists.txt b/lib/kokkos/example/tutorial/Advanced_Views/04_dualviews/CMakeLists.txt
new file mode 100644
index 000000000..300dab128
--- /dev/null
+++ b/lib/kokkos/example/tutorial/Advanced_Views/04_dualviews/CMakeLists.txt
@@ -0,0 +1,10 @@
+
+INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR})
+INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
+
+# This is a tutorial, not a test, so we don't ask CTest to run it.
+TRIBITS_ADD_EXECUTABLE(
+ tutorial_advancedviews_04_dualviews
+ SOURCES dual_view.cpp
+ COMM serial mpi
+ )
diff --git a/lib/kokkos/example/tutorial/Advanced_Views/05_NVIDIA_UVM/CMakeLists.txt b/lib/kokkos/example/tutorial/Advanced_Views/05_NVIDIA_UVM/CMakeLists.txt
new file mode 100644
index 000000000..f0ed569f9
--- /dev/null
+++ b/lib/kokkos/example/tutorial/Advanced_Views/05_NVIDIA_UVM/CMakeLists.txt
@@ -0,0 +1,13 @@
+
+INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR})
+INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
+
+IF (Kokkos_ENABLE_Cuda_UVM)
+ # This is a tutorial, not a test, so we don't ask CTest to run it.
+ TRIBITS_ADD_EXECUTABLE(
+ tutorial_advancedviews_05_nvidia_uvm
+ SOURCES uvm_example.cpp
+ COMM serial mpi
+ DEPLIBS kokkoscontainers kokkoscore
+ )
+ENDIF ()
diff --git a/lib/kokkos/example/tutorial/Advanced_Views/CMakeLists.txt b/lib/kokkos/example/tutorial/Advanced_Views/CMakeLists.txt
new file mode 100644
index 000000000..f4f1addc5
--- /dev/null
+++ b/lib/kokkos/example/tutorial/Advanced_Views/CMakeLists.txt
@@ -0,0 +1,9 @@
+
+TRIBITS_ADD_EXAMPLE_DIRECTORIES(01_data_layouts)
+TRIBITS_ADD_EXAMPLE_DIRECTORIES(02_memory_traits)
+TRIBITS_ADD_EXAMPLE_DIRECTORIES(03_subviews)
+TRIBITS_ADD_EXAMPLE_DIRECTORIES(04_dualviews)
+
+IF (Kokkos_ENABLE_Cuda_UVM)
+ TRIBITS_ADD_EXAMPLE_DIRECTORIES(05_NVIDIA_UVM)
+ENDIF ()
diff --git a/lib/kokkos/example/tutorial/CMakeLists.txt b/lib/kokkos/example/tutorial/CMakeLists.txt
new file mode 100644
index 000000000..d1fd4c0ae
--- /dev/null
+++ b/lib/kokkos/example/tutorial/CMakeLists.txt
@@ -0,0 +1,17 @@
+
+TRIBITS_ADD_EXAMPLE_DIRECTORIES(01_hello_world)
+TRIBITS_ADD_EXAMPLE_DIRECTORIES(02_simple_reduce)
+TRIBITS_ADD_EXAMPLE_DIRECTORIES(03_simple_view)
+TRIBITS_ADD_EXAMPLE_DIRECTORIES(04_simple_memoryspaces)
+TRIBITS_ADD_EXAMPLE_DIRECTORIES(05_simple_atomics)
+TRIBITS_ADD_EXAMPLE_DIRECTORIES(Advanced_Views)
+TRIBITS_ADD_EXAMPLE_DIRECTORIES(Hierarchical_Parallelism)
+
+IF (Kokkos_ENABLE_CXX11)
+ TRIBITS_ADD_EXAMPLE_DIRECTORIES(01_hello_world_lambda)
+ TRIBITS_ADD_EXAMPLE_DIRECTORIES(02_simple_reduce_lambda)
+ TRIBITS_ADD_EXAMPLE_DIRECTORIES(03_simple_view_lambda)
+ENDIF ()
+
+
+
diff --git a/lib/kokkos/example/tutorial/Hierarchical_Parallelism/01_thread_teams/CMakeLists.txt b/lib/kokkos/example/tutorial/Hierarchical_Parallelism/01_thread_teams/CMakeLists.txt
new file mode 100644
index 000000000..2d8a514a4
--- /dev/null
+++ b/lib/kokkos/example/tutorial/Hierarchical_Parallelism/01_thread_teams/CMakeLists.txt
@@ -0,0 +1,10 @@
+
+INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR})
+INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
+
+# This is a tutorial, not a test, so we don't ask CTest to run it.
+TRIBITS_ADD_EXECUTABLE(
+ tutorial_hierarchicalparallelism_01_thread_teams
+ SOURCES thread_teams.cpp
+ COMM serial mpi
+ )
diff --git a/lib/kokkos/example/tutorial/Hierarchical_Parallelism/01_thread_teams_lambda/CMakeLists.txt b/lib/kokkos/example/tutorial/Hierarchical_Parallelism/01_thread_teams_lambda/CMakeLists.txt
new file mode 100644
index 000000000..ec7f1e115
--- /dev/null
+++ b/lib/kokkos/example/tutorial/Hierarchical_Parallelism/01_thread_teams_lambda/CMakeLists.txt
@@ -0,0 +1,13 @@
+
+INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR})
+INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
+
+IF (Kokkos_ENABLE_CXX11)
+ # This is a tutorial, not a test, so we don't ask CTest to run it.
+ TRIBITS_ADD_EXECUTABLE(
+ tutorial_hierarchical_01_thread_teams_lambda
+ SOURCES thread_teams_lambda.cpp
+ COMM serial mpi
+ )
+ENDIF ()
+
diff --git a/lib/kokkos/example/tutorial/Hierarchical_Parallelism/02_nested_parallel_for/CMakeLists.txt b/lib/kokkos/example/tutorial/Hierarchical_Parallelism/02_nested_parallel_for/CMakeLists.txt
new file mode 100644
index 000000000..e66040534
--- /dev/null
+++ b/lib/kokkos/example/tutorial/Hierarchical_Parallelism/02_nested_parallel_for/CMakeLists.txt
@@ -0,0 +1,10 @@
+
+INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR})
+INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
+
+# This is a tutorial, not a test, so we don't ask CTest to run it.
+TRIBITS_ADD_EXECUTABLE(
+ tutorial_hierarchicalparallelism_02_nested_parallel_for
+ SOURCES nested_parallel_for.cpp
+ COMM serial mpi
+ )
diff --git a/lib/kokkos/example/tutorial/Hierarchical_Parallelism/03_vectorization/CMakeLists.txt b/lib/kokkos/example/tutorial/Hierarchical_Parallelism/03_vectorization/CMakeLists.txt
new file mode 100644
index 000000000..ea6b0b1e4
--- /dev/null
+++ b/lib/kokkos/example/tutorial/Hierarchical_Parallelism/03_vectorization/CMakeLists.txt
@@ -0,0 +1,16 @@
+
+INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR})
+INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
+
+# This is a tutorial, not a test, so we don't ask CTest to run it.
+
+IF(Kokkos_ENABLE_CXX11)
+
+TRIBITS_ADD_EXECUTABLE(
+ tutorial_hierarchicalparallelism_03_vectorization
+ SOURCES vectorization.cpp
+ COMM serial mpi
+ )
+
+ENDIF()
+
diff --git a/lib/kokkos/example/tutorial/Hierarchical_Parallelism/04_team_scan/CMakeLists.txt b/lib/kokkos/example/tutorial/Hierarchical_Parallelism/04_team_scan/CMakeLists.txt
new file mode 100644
index 000000000..15ad5d780
--- /dev/null
+++ b/lib/kokkos/example/tutorial/Hierarchical_Parallelism/04_team_scan/CMakeLists.txt
@@ -0,0 +1,10 @@
+
+INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR})
+INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
+
+# This is a tutorial, not a test, so we don't ask CTest to run it.
+TRIBITS_ADD_EXECUTABLE(
+ tutorial_hierarchicalparallelism_04_team_scan
+ SOURCES team_scan.cpp
+ COMM serial mpi
+ )
diff --git a/lib/kokkos/example/tutorial/Hierarchical_Parallelism/CMakeLists.txt b/lib/kokkos/example/tutorial/Hierarchical_Parallelism/CMakeLists.txt
new file mode 100644
index 000000000..e03d7aeb9
--- /dev/null
+++ b/lib/kokkos/example/tutorial/Hierarchical_Parallelism/CMakeLists.txt
@@ -0,0 +1,8 @@
+
+TRIBITS_ADD_EXAMPLE_DIRECTORIES(01_thread_teams)
+
+IF (Kokkos_ENABLE_CXX11)
+ TRIBITS_ADD_EXAMPLE_DIRECTORIES(01_thread_teams_lambda)
+ TRIBITS_ADD_EXAMPLE_DIRECTORIES(02_nested_parallel_for)
+ TRIBITS_ADD_EXAMPLE_DIRECTORIES(03_vectorization)
+ENDIF ()
diff --git a/lib/kokkos/generate_makefile.bash b/lib/kokkos/generate_makefile.bash
index e9e103e74..f60bc7be8 100755
--- a/lib/kokkos/generate_makefile.bash
+++ b/lib/kokkos/generate_makefile.bash
@@ -1,212 +1,220 @@
#!/bin/bash
KOKKOS_DEVICES=""
while [[ $# > 0 ]]
do
key="$1"
case $key in
--kokkos-path*)
KOKKOS_PATH="${key#*=}"
;;
--prefix*)
PREFIX="${key#*=}"
;;
--with-cuda)
KOKKOS_DEVICES="${KOKKOS_DEVICES},Cuda"
CUDA_PATH_NVCC=`which nvcc`
CUDA_PATH=${CUDA_PATH_NVCC%/bin/nvcc}
;;
--with-cuda*)
KOKKOS_DEVICES="${KOKKOS_DEVICES},Cuda"
CUDA_PATH="${key#*=}"
;;
--with-openmp)
KOKKOS_DEVICES="${KOKKOS_DEVICES},OpenMP"
;;
--with-pthread)
KOKKOS_DEVICES="${KOKKOS_DEVICES},Pthread"
;;
--with-serial)
KOKKOS_DEVICES="${KOKKOS_DEVICES},Serial"
;;
--with-qthread*)
KOKKOS_DEVICES="${KOKKOS_DEVICES},Qthread"
QTHREAD_PATH="${key#*=}"
;;
--with-devices*)
DEVICES="${key#*=}"
KOKKOS_DEVICES="${KOKKOS_DEVICES},${DEVICES}"
;;
--with-gtest*)
GTEST_PATH="${key#*=}"
;;
--with-hwloc*)
HWLOC_PATH="${key#*=}"
;;
--arch*)
KOKKOS_ARCH="${key#*=}"
;;
--cxxflags*)
CXXFLAGS="${key#*=}"
;;
--ldflags*)
LDFLAGS="${key#*=}"
;;
--debug|-dbg)
KOKKOS_DEBUG=yes
;;
--compiler*)
COMPILER="${key#*=}"
;;
+ --with-options*)
+ KOKKOS_OPT="${key#*=}"
+ ;;
--help)
echo "Kokkos configure options:"
echo "--kokkos-path=/Path/To/Kokkos: Path to the Kokkos root directory"
echo ""
echo "--with-cuda[=/Path/To/Cuda]: enable Cuda and set path to Cuda Toolkit"
echo "--with-openmp: enable OpenMP backend"
echo "--with-pthread: enable Pthreads backend"
echo "--with-serial: enable Serial backend"
echo "--with-qthread=/Path/To/Qthread: enable Qthread backend"
echo "--with-devices: explicitly add a set of backends"
echo ""
echo "--arch=[OPTIONS]: set target architectures. Options are:"
echo " SNB = Intel Sandy/Ivy Bridge CPUs"
echo " HSW = Intel Haswell CPUs"
echo " KNC = Intel Knights Corner Xeon Phi"
echo " Kepler30 = NVIDIA Kepler generation CC 3.0"
echo " Kepler35 = NVIDIA Kepler generation CC 3.5"
echo " Kepler37 = NVIDIA Kepler generation CC 3.7"
echo " Maxwell50 = NVIDIA Maxwell generation CC 5.0"
echo " Power8 = IBM Power 8 CPUs"
echo ""
echo "--compiler=/Path/To/Compiler set the compiler"
echo "--debug,-dbg: enable Debugging"
echo "--cxxflags=[FLAGS] overwrite CXXFLAGS for library build and test build"
echo " This will still set certain required flags via"
echo " KOKKOS_CXXFLAGS (such as -fopenmp, --std=c++11, etc.)"
echo "--ldflags=[FLAGS] overwrite LDFLAGS for library build and test build"
echo " This will still set certain required flags via"
echo " KOKKOS_LDFLAGS (such as -fopenmp, -lpthread, etc.)"
echo "--with-gtest=/Path/To/Gtest: set path to gtest (used in unit and performance tests"
echo "--with-hwloc=/Path/To/Hwloc: set path to hwloc"
+ echo "--with-options=[OPTIONS]: additional options to Kokkos:"
+ echo " aggressive_vectorization = add ivdep on loops"
exit 0
;;
*)
# unknown option
;;
esac
shift
done
# If KOKKOS_PATH undefined, assume parent dir of this
# script is the KOKKOS_PATH
if [ -z "$KOKKOS_PATH" ]; then
KOKKOS_PATH=$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )
else
# Ensure KOKKOS_PATH is abs path
KOKKOS_PATH=$( cd $KOKKOS_PATH && pwd )
fi
KOKKOS_OPTIONS="KOKKOS_PATH=${KOKKOS_PATH}"
if [ ${#COMPILER} -gt 0 ]; then
KOKKOS_OPTIONS="${KOKKOS_OPTIONS} CXX=${COMPILER}"
fi
if [ ${#PREFIX} -gt 0 ]; then
KOKKOS_OPTIONS="${KOKKOS_OPTIONS} PREFIX=${PREFIX}"
fi
if [ ${#KOKKOS_DEVICES} -gt 0 ]; then
KOKKOS_OPTIONS="${KOKKOS_OPTIONS} KOKKOS_DEVICES=${KOKKOS_DEVICES}"
fi
if [ ${#KOKKOS_ARCH} -gt 0 ]; then
KOKKOS_OPTIONS="${KOKKOS_OPTIONS} KOKKOS_ARCH=${KOKKOS_ARCH}"
fi
if [ ${#KOKKOS_DEBUG} -gt 0 ]; then
KOKKOS_OPTIONS="${KOKKOS_OPTIONS} KOKKOS_DEBUG=${KOKKOS_DEBUG}"
fi
if [ ${#CUDA_PATH} -gt 0 ]; then
KOKKOS_OPTIONS="${KOKKOS_OPTIONS} CUDA_PATH=${CUDA_PATH}"
fi
if [ ${#CXXFLAGS} -gt 0 ]; then
KOKKOS_OPTIONS="${KOKKOS_OPTIONS} CXXFLAGS=\"${CXXFLAGS}\""
fi
if [ ${#LDFLAGS} -gt 0 ]; then
KOKKOS_OPTIONS="${KOKKOS_OPTIONS} LDFLAGS=\"${LDFLAGS}\""
fi
if [ ${#GTEST_PATH} -gt 0 ]; then
KOKKOS_OPTIONS="${KOKKOS_OPTIONS} GTEST_PATH=${GTEST_PATH}"
else
GTEST_PATH=${KOKKOS_PATH}/tpls/gtest
KOKKOS_OPTIONS="${KOKKOS_OPTIONS} GTEST_PATH=${GTEST_PATH}"
fi
if [ ${#HWLOC_PATH} -gt 0 ]; then
KOKKOS_OPTIONS="${KOKKOS_OPTIONS} HWLOC_PATH=${HWLOC_PATH} KOKKOS_USE_TPLS=hwloc"
fi
if [ ${#QTHREAD_PATH} -gt 0 ]; then
KOKKOS_OPTIONS="${KOKKOS_OPTIONS} QTHREAD_PATH=${QTHREAD_PATH}"
fi
+if [ ${#KOKKOS_OPT} -gt 0 ]; then
+KOKKOS_OPTIONS="${KOKKOS_OPTIONS} KOKKOS_OPTIONS=${KOKKOS_OPT}"
+fi
mkdir core
mkdir core/unit_test
mkdir core/perf_test
mkdir containers
mkdir containers/unit_tests
mkdir containers/performance_tests
mkdir algorithms
mkdir algorithms/unit_tests
mkdir algorithms/performance_tests
mkdir example
mkdir example/fixture
mkdir example/feint
mkdir example/fenl
echo "Generating Makefile with options " ${KOKKOS_OPTIONS}
echo "KOKKOS_OPTIONS=${KOKKOS_OPTIONS}" > Makefile
echo "" >> Makefile
echo "lib:" >> Makefile
echo -e "\tcd core; \\" >> Makefile
echo -e "\tmake -j -j -f ${KOKKOS_PATH}/core/src/Makefile ${KOKKOS_OPTIONS}" >> Makefile
echo "" >> Makefile
echo "install: lib" >> Makefile
echo -e "\tcd core; \\" >> Makefile
echo -e "\tmake -j -f ${KOKKOS_PATH}/core/src/Makefile ${KOKKOS_OPTIONS} install" >> Makefile
echo "" >> Makefile
echo "build-test:" >> Makefile
echo -e "\tcd core/unit_test; \\" >> Makefile
echo -e "\tmake -j -f ${KOKKOS_PATH}/core/unit_test/Makefile ${KOKKOS_OPTIONS}" >> Makefile
echo -e "\tcd core/perf_test; \\" >> Makefile
echo -e "\tmake -j -f ${KOKKOS_PATH}/core/perf_test/Makefile ${KOKKOS_OPTIONS}" >> Makefile
echo -e "\tcd containers/unit_tests; \\" >> Makefile
echo -e "\tmake -j -f ${KOKKOS_PATH}/containers/unit_tests/Makefile ${KOKKOS_OPTIONS}" >> Makefile
echo -e "\tcd containers/performance_tests; \\" >> Makefile
echo -e "\tmake -j -f ${KOKKOS_PATH}/containers/performance_tests/Makefile ${KOKKOS_OPTIONS}" >> Makefile
echo -e "\tcd algorithms/unit_tests; \\" >> Makefile
echo -e "\tmake -j -f ${KOKKOS_PATH}/algorithms/unit_tests/Makefile ${KOKKOS_OPTIONS}" >> Makefile
echo -e "\tcd example/fixture; \\" >> Makefile
echo -e "\tmake -f ${KOKKOS_PATH}/example/fixture/Makefile ${KOKKOS_OPTIONS}" >> Makefile
echo -e "\tcd example/feint; \\" >> Makefile
echo -e "\tmake -f ${KOKKOS_PATH}/example/feint/Makefile ${KOKKOS_OPTIONS}" >> Makefile
echo -e "\tcd example/fenl; \\" >> Makefile
echo -e "\tmake -f ${KOKKOS_PATH}/example/fenl/Makefile ${KOKKOS_OPTIONS}" >> Makefile
echo "" >> Makefile
echo "test: build-test" >> Makefile
echo -e "\tcd core/unit_test; \\" >> Makefile
echo -e "\tmake -f ${KOKKOS_PATH}/core/unit_test/Makefile ${KOKKOS_OPTIONS} test" >> Makefile
echo -e "\tcd core/perf_test; \\" >> Makefile
echo -e "\tmake -f ${KOKKOS_PATH}/core/perf_test/Makefile ${KOKKOS_OPTIONS} test" >> Makefile
echo -e "\tcd containers/unit_tests; \\" >> Makefile
echo -e "\tmake -f ${KOKKOS_PATH}/containers/unit_tests/Makefile ${KOKKOS_OPTIONS} test" >> Makefile
echo -e "\tcd containers/performance_tests; \\" >> Makefile
echo -e "\tmake -f ${KOKKOS_PATH}/containers/performance_tests/Makefile ${KOKKOS_OPTIONS} test" >> Makefile
echo -e "\tcd algorithms/unit_tests; \\" >> Makefile
echo -e "\tmake -f ${KOKKOS_PATH}/algorithms/unit_tests/Makefile ${KOKKOS_OPTIONS} test" >> Makefile
echo -e "\tcd example/fixture; \\" >> Makefile
echo -e "\tmake -f ${KOKKOS_PATH}/example/fixture/Makefile ${KOKKOS_OPTIONS} test" >> Makefile
echo -e "\tcd example/feint; \\" >> Makefile
echo -e "\tmake -f ${KOKKOS_PATH}/example/feint/Makefile ${KOKKOS_OPTIONS} test" >> Makefile
echo -e "\tcd example/fenl; \\" >> Makefile
echo -e "\tmake -f ${KOKKOS_PATH}/example/fenl/Makefile ${KOKKOS_OPTIONS} test" >> Makefile
diff --git a/lib/kokkos/tpls/gtest/gtest/LICENSE b/lib/kokkos/tpls/gtest/gtest/LICENSE
new file mode 100644
index 000000000..1941a11f8
--- /dev/null
+++ b/lib/kokkos/tpls/gtest/gtest/LICENSE
@@ -0,0 +1,28 @@
+Copyright 2008, Google Inc.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+ * Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above
+copyright notice, this list of conditions and the following disclaimer
+in the documentation and/or other materials provided with the
+distribution.
+ * Neither the name of Google Inc. nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/lib/kokkos/tpls/gtest/gtest/README b/lib/kokkos/tpls/gtest/gtest/README
new file mode 100644
index 000000000..82964ecc3
--- /dev/null
+++ b/lib/kokkos/tpls/gtest/gtest/README
@@ -0,0 +1,13 @@
+This is a fused source version of gtest 1.7.0. All that should be necessary to
+start using gtest in your package is to declare the dependency and include
+gtest/gtest.h.
+
+However, because some of the packages that are developed in Sierra do not use a
+fused source version of gtest we need to make it possible for them to build with
+this version as well as with their native build. To facilitate this we have
+created symlinks for the other gtest headers that they use to the fused source
+gtest.h. This will make it possible for them find the headers while still using
+the fuse source version. This should not have any ill effects since the header is
+protected and allows for only using the non-gtest.h headers in their files.
+
+
diff --git a/lib/kokkos/tpls/gtest/gtest/gtest-all.cc b/lib/kokkos/tpls/gtest/gtest/gtest-all.cc
new file mode 100644
index 000000000..538c78db9
--- /dev/null
+++ b/lib/kokkos/tpls/gtest/gtest/gtest-all.cc
@@ -0,0 +1,9594 @@
+// Copyright 2008, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Author: mheule@google.com (Markus Heule)
+//
+// Google C++ Testing Framework (Google Test)
+//
+// Sometimes it's desirable to build Google Test by compiling a single file.
+// This file serves this purpose.
+
+// This line ensures that gtest.h can be compiled on its own, even
+// when it's fused.
+#include "gtest/gtest.h"
+
+// The following lines pull in the real gtest *.cc files.
+// Copyright 2005, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Author: wan@google.com (Zhanyong Wan)
+//
+// The Google C++ Testing Framework (Google Test)
+
+// Copyright 2007, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Author: wan@google.com (Zhanyong Wan)
+//
+// Utilities for testing Google Test itself and code that uses Google Test
+// (e.g. frameworks built on top of Google Test).
+
+#ifndef GTEST_INCLUDE_GTEST_GTEST_SPI_H_
+#define GTEST_INCLUDE_GTEST_GTEST_SPI_H_
+
+
+namespace testing {
+
+// This helper class can be used to mock out Google Test failure reporting
+// so that we can test Google Test or code that builds on Google Test.
+//
+// An object of this class appends a TestPartResult object to the
+// TestPartResultArray object given in the constructor whenever a Google Test
+// failure is reported. It can either intercept only failures that are
+// generated in the same thread that created this object or it can intercept
+// all generated failures. The scope of this mock object can be controlled with
+// the second argument to the two arguments constructor.
+class GTEST_API_ ScopedFakeTestPartResultReporter
+ : public TestPartResultReporterInterface {
+ public:
+ // The two possible mocking modes of this object.
+ enum InterceptMode {
+ INTERCEPT_ONLY_CURRENT_THREAD, // Intercepts only thread local failures.
+ INTERCEPT_ALL_THREADS // Intercepts all failures.
+ };
+
+ // The c'tor sets this object as the test part result reporter used
+ // by Google Test. The 'result' parameter specifies where to report the
+ // results. This reporter will only catch failures generated in the current
+ // thread. DEPRECATED
+ explicit ScopedFakeTestPartResultReporter(TestPartResultArray* result);
+
+ // Same as above, but you can choose the interception scope of this object.
+ ScopedFakeTestPartResultReporter(InterceptMode intercept_mode,
+ TestPartResultArray* result);
+
+ // The d'tor restores the previous test part result reporter.
+ virtual ~ScopedFakeTestPartResultReporter();
+
+ // Appends the TestPartResult object to the TestPartResultArray
+ // received in the constructor.
+ //
+ // This method is from the TestPartResultReporterInterface
+ // interface.
+ virtual void ReportTestPartResult(const TestPartResult& result);
+ private:
+ void Init();
+
+ const InterceptMode intercept_mode_;
+ TestPartResultReporterInterface* old_reporter_;
+ TestPartResultArray* const result_;
+
+ GTEST_DISALLOW_COPY_AND_ASSIGN_(ScopedFakeTestPartResultReporter);
+};
+
+namespace internal {
+
+// A helper class for implementing EXPECT_FATAL_FAILURE() and
+// EXPECT_NONFATAL_FAILURE(). Its destructor verifies that the given
+// TestPartResultArray contains exactly one failure that has the given
+// type and contains the given substring. If that's not the case, a
+// non-fatal failure will be generated.
+class GTEST_API_ SingleFailureChecker {
+ public:
+ // The constructor remembers the arguments.
+ SingleFailureChecker(const TestPartResultArray* results,
+ TestPartResult::Type type,
+ const string& substr);
+ ~SingleFailureChecker();
+ private:
+ const TestPartResultArray* const results_;
+ const TestPartResult::Type type_;
+ const string substr_;
+
+ GTEST_DISALLOW_COPY_AND_ASSIGN_(SingleFailureChecker);
+};
+
+} // namespace internal
+
+} // namespace testing
+
+// A set of macros for testing Google Test assertions or code that's expected
+// to generate Google Test fatal failures. It verifies that the given
+// statement will cause exactly one fatal Google Test failure with 'substr'
+// being part of the failure message.
+//
+// There are two different versions of this macro. EXPECT_FATAL_FAILURE only
+// affects and considers failures generated in the current thread and
+// EXPECT_FATAL_FAILURE_ON_ALL_THREADS does the same but for all threads.
+//
+// The verification of the assertion is done correctly even when the statement
+// throws an exception or aborts the current function.
+//
+// Known restrictions:
+// - 'statement' cannot reference local non-static variables or
+// non-static members of the current object.
+// - 'statement' cannot return a value.
+// - You cannot stream a failure message to this macro.
+//
+// Note that even though the implementations of the following two
+// macros are much alike, we cannot refactor them to use a common
+// helper macro, due to some peculiarity in how the preprocessor
+// works. The AcceptsMacroThatExpandsToUnprotectedComma test in
+// gtest_unittest.cc will fail to compile if we do that.
+#define EXPECT_FATAL_FAILURE(statement, substr) \
+ do { \
+ class GTestExpectFatalFailureHelper {\
+ public:\
+ static void Execute() { statement; }\
+ };\
+ ::testing::TestPartResultArray gtest_failures;\
+ ::testing::internal::SingleFailureChecker gtest_checker(\
+ &gtest_failures, ::testing::TestPartResult::kFatalFailure, (substr));\
+ {\
+ ::testing::ScopedFakeTestPartResultReporter gtest_reporter(\
+ ::testing::ScopedFakeTestPartResultReporter:: \
+ INTERCEPT_ONLY_CURRENT_THREAD, &gtest_failures);\
+ GTestExpectFatalFailureHelper::Execute();\
+ }\
+ } while (::testing::internal::AlwaysFalse())
+
+#define EXPECT_FATAL_FAILURE_ON_ALL_THREADS(statement, substr) \
+ do { \
+ class GTestExpectFatalFailureHelper {\
+ public:\
+ static void Execute() { statement; }\
+ };\
+ ::testing::TestPartResultArray gtest_failures;\
+ ::testing::internal::SingleFailureChecker gtest_checker(\
+ &gtest_failures, ::testing::TestPartResult::kFatalFailure, (substr));\
+ {\
+ ::testing::ScopedFakeTestPartResultReporter gtest_reporter(\
+ ::testing::ScopedFakeTestPartResultReporter:: \
+ INTERCEPT_ALL_THREADS, &gtest_failures);\
+ GTestExpectFatalFailureHelper::Execute();\
+ }\
+ } while (::testing::internal::AlwaysFalse())
+
+// A macro for testing Google Test assertions or code that's expected to
+// generate Google Test non-fatal failures. It asserts that the given
+// statement will cause exactly one non-fatal Google Test failure with 'substr'
+// being part of the failure message.
+//
+// There are two different versions of this macro. EXPECT_NONFATAL_FAILURE only
+// affects and considers failures generated in the current thread and
+// EXPECT_NONFATAL_FAILURE_ON_ALL_THREADS does the same but for all threads.
+//
+// 'statement' is allowed to reference local variables and members of
+// the current object.
+//
+// The verification of the assertion is done correctly even when the statement
+// throws an exception or aborts the current function.
+//
+// Known restrictions:
+// - You cannot stream a failure message to this macro.
+//
+// Note that even though the implementations of the following two
+// macros are much alike, we cannot refactor them to use a common
+// helper macro, due to some peculiarity in how the preprocessor
+// works. If we do that, the code won't compile when the user gives
+// EXPECT_NONFATAL_FAILURE() a statement that contains a macro that
+// expands to code containing an unprotected comma. The
+// AcceptsMacroThatExpandsToUnprotectedComma test in gtest_unittest.cc
+// catches that.
+//
+// For the same reason, we have to write
+// if (::testing::internal::AlwaysTrue()) { statement; }
+// instead of
+// GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement)
+// to avoid an MSVC warning on unreachable code.
+#define EXPECT_NONFATAL_FAILURE(statement, substr) \
+ do {\
+ ::testing::TestPartResultArray gtest_failures;\
+ ::testing::internal::SingleFailureChecker gtest_checker(\
+ &gtest_failures, ::testing::TestPartResult::kNonFatalFailure, \
+ (substr));\
+ {\
+ ::testing::ScopedFakeTestPartResultReporter gtest_reporter(\
+ ::testing::ScopedFakeTestPartResultReporter:: \
+ INTERCEPT_ONLY_CURRENT_THREAD, &gtest_failures);\
+ if (::testing::internal::AlwaysTrue()) { statement; }\
+ }\
+ } while (::testing::internal::AlwaysFalse())
+
+#define EXPECT_NONFATAL_FAILURE_ON_ALL_THREADS(statement, substr) \
+ do {\
+ ::testing::TestPartResultArray gtest_failures;\
+ ::testing::internal::SingleFailureChecker gtest_checker(\
+ &gtest_failures, ::testing::TestPartResult::kNonFatalFailure, \
+ (substr));\
+ {\
+ ::testing::ScopedFakeTestPartResultReporter gtest_reporter(\
+ ::testing::ScopedFakeTestPartResultReporter::INTERCEPT_ALL_THREADS, \
+ &gtest_failures);\
+ if (::testing::internal::AlwaysTrue()) { statement; }\
+ }\
+ } while (::testing::internal::AlwaysFalse())
+
+#endif // GTEST_INCLUDE_GTEST_GTEST_SPI_H_
+
+#include <ctype.h>
+#include <math.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+#include <wchar.h>
+#include <wctype.h>
+
+#include <algorithm>
+#include <iomanip>
+#include <limits>
+#include <ostream> // NOLINT
+#include <sstream>
+#include <vector>
+
+#if GTEST_OS_LINUX
+
+// TODO(kenton@google.com): Use autoconf to detect availability of
+// gettimeofday().
+# define GTEST_HAS_GETTIMEOFDAY_ 1
+
+# include <fcntl.h> // NOLINT
+# include <limits.h> // NOLINT
+# include <sched.h> // NOLINT
+// Declares vsnprintf(). This header is not available on Windows.
+# include <strings.h> // NOLINT
+# include <sys/mman.h> // NOLINT
+# include <sys/time.h> // NOLINT
+# include <unistd.h> // NOLINT
+# include <string>
+
+#elif GTEST_OS_SYMBIAN
+# define GTEST_HAS_GETTIMEOFDAY_ 1
+# include <sys/time.h> // NOLINT
+
+#elif GTEST_OS_ZOS
+# define GTEST_HAS_GETTIMEOFDAY_ 1
+# include <sys/time.h> // NOLINT
+
+// On z/OS we additionally need strings.h for strcasecmp.
+# include <strings.h> // NOLINT
+
+#elif GTEST_OS_WINDOWS_MOBILE // We are on Windows CE.
+
+# include <windows.h> // NOLINT
+
+#elif GTEST_OS_WINDOWS // We are on Windows proper.
+
+# include <io.h> // NOLINT
+# include <sys/timeb.h> // NOLINT
+# include <sys/types.h> // NOLINT
+# include <sys/stat.h> // NOLINT
+
+# if GTEST_OS_WINDOWS_MINGW
+// MinGW has gettimeofday() but not _ftime64().
+// TODO(kenton@google.com): Use autoconf to detect availability of
+// gettimeofday().
+// TODO(kenton@google.com): There are other ways to get the time on
+// Windows, like GetTickCount() or GetSystemTimeAsFileTime(). MinGW
+// supports these. consider using them instead.
+# define GTEST_HAS_GETTIMEOFDAY_ 1
+# include <sys/time.h> // NOLINT
+# endif // GTEST_OS_WINDOWS_MINGW
+
+// cpplint thinks that the header is already included, so we want to
+// silence it.
+# include <windows.h> // NOLINT
+
+#else
+
+// Assume other platforms have gettimeofday().
+// TODO(kenton@google.com): Use autoconf to detect availability of
+// gettimeofday().
+# define GTEST_HAS_GETTIMEOFDAY_ 1
+
+// cpplint thinks that the header is already included, so we want to
+// silence it.
+# include <sys/time.h> // NOLINT
+# include <unistd.h> // NOLINT
+
+#endif // GTEST_OS_LINUX
+
+#if GTEST_HAS_EXCEPTIONS
+# include <stdexcept>
+#endif
+
+#if GTEST_CAN_STREAM_RESULTS_
+# include <arpa/inet.h> // NOLINT
+# include <netdb.h> // NOLINT
+#endif
+
+// Indicates that this translation unit is part of Google Test's
+// implementation. It must come before gtest-internal-inl.h is
+// included, or there will be a compiler error. This trick is to
+// prevent a user from accidentally including gtest-internal-inl.h in
+// his code.
+#define GTEST_IMPLEMENTATION_ 1
+// Copyright 2005, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// Utility functions and classes used by the Google C++ testing framework.
+//
+// Author: wan@google.com (Zhanyong Wan)
+//
+// This file contains purely Google Test's internal implementation. Please
+// DO NOT #INCLUDE IT IN A USER PROGRAM.
+
+#ifndef GTEST_SRC_GTEST_INTERNAL_INL_H_
+#define GTEST_SRC_GTEST_INTERNAL_INL_H_
+
+// GTEST_IMPLEMENTATION_ is defined to 1 iff the current translation unit is
+// part of Google Test's implementation; otherwise it's undefined.
+#if !GTEST_IMPLEMENTATION_
+// A user is trying to include this from his code - just say no.
+# error "gtest-internal-inl.h is part of Google Test's internal implementation."
+# error "It must not be included except by Google Test itself."
+#endif // GTEST_IMPLEMENTATION_
+
+#ifndef _WIN32_WCE
+# include <errno.h>
+#endif // !_WIN32_WCE
+#include <stddef.h>
+#include <stdlib.h> // For strtoll/_strtoul64/malloc/free.
+#include <string.h> // For memmove.
+
+#include <algorithm>
+#include <string>
+#include <vector>
+
+
+#if GTEST_CAN_STREAM_RESULTS_
+# include <arpa/inet.h> // NOLINT
+# include <netdb.h> // NOLINT
+#endif
+
+#if GTEST_OS_WINDOWS
+# include <windows.h> // NOLINT
+#endif // GTEST_OS_WINDOWS
+
+
+namespace testing {
+
+// Declares the flags.
+//
+// We don't want the users to modify this flag in the code, but want
+// Google Test's own unit tests to be able to access it. Therefore we
+// declare it here as opposed to in gtest.h.
+GTEST_DECLARE_bool_(death_test_use_fork);
+
+namespace internal {
+
+// The value of GetTestTypeId() as seen from within the Google Test
+// library. This is solely for testing GetTestTypeId().
+GTEST_API_ extern const TypeId kTestTypeIdInGoogleTest;
+
+// Names of the flags (needed for parsing Google Test flags).
+const char kAlsoRunDisabledTestsFlag[] = "also_run_disabled_tests";
+const char kBreakOnFailureFlag[] = "break_on_failure";
+const char kCatchExceptionsFlag[] = "catch_exceptions";
+const char kColorFlag[] = "color";
+const char kFilterFlag[] = "filter";
+const char kListTestsFlag[] = "list_tests";
+const char kOutputFlag[] = "output";
+const char kPrintTimeFlag[] = "print_time";
+const char kRandomSeedFlag[] = "random_seed";
+const char kRepeatFlag[] = "repeat";
+const char kShuffleFlag[] = "shuffle";
+const char kStackTraceDepthFlag[] = "stack_trace_depth";
+const char kStreamResultToFlag[] = "stream_result_to";
+const char kThrowOnFailureFlag[] = "throw_on_failure";
+
+// A valid random seed must be in [1, kMaxRandomSeed].
+const int kMaxRandomSeed = 99999;
+
+// g_help_flag is true iff the --help flag or an equivalent form is
+// specified on the command line.
+GTEST_API_ extern bool g_help_flag;
+
+// Returns the current time in milliseconds.
+GTEST_API_ TimeInMillis GetTimeInMillis();
+
+// Returns true iff Google Test should use colors in the output.
+GTEST_API_ bool ShouldUseColor(bool stdout_is_tty);
+
+// Formats the given time in milliseconds as seconds.
+GTEST_API_ std::string FormatTimeInMillisAsSeconds(TimeInMillis ms);
+
+// Converts the given time in milliseconds to a date string in the ISO 8601
+// format, without the timezone information. N.B.: due to the use the
+// non-reentrant localtime() function, this function is not thread safe. Do
+// not use it in any code that can be called from multiple threads.
+GTEST_API_ std::string FormatEpochTimeInMillisAsIso8601(TimeInMillis ms);
+
+// Parses a string for an Int32 flag, in the form of "--flag=value".
+//
+// On success, stores the value of the flag in *value, and returns
+// true. On failure, returns false without changing *value.
+GTEST_API_ bool ParseInt32Flag(
+ const char* str, const char* flag, Int32* value);
+
+// Returns a random seed in range [1, kMaxRandomSeed] based on the
+// given --gtest_random_seed flag value.
+inline int GetRandomSeedFromFlag(Int32 random_seed_flag) {
+ const unsigned int raw_seed = (random_seed_flag == 0) ?
+ static_cast<unsigned int>(GetTimeInMillis()) :
+ static_cast<unsigned int>(random_seed_flag);
+
+ // Normalizes the actual seed to range [1, kMaxRandomSeed] such that
+ // it's easy to type.
+ const int normalized_seed =
+ static_cast<int>((raw_seed - 1U) %
+ static_cast<unsigned int>(kMaxRandomSeed)) + 1;
+ return normalized_seed;
+}
+
+// Returns the first valid random seed after 'seed'. The behavior is
+// undefined if 'seed' is invalid. The seed after kMaxRandomSeed is
+// considered to be 1.
+inline int GetNextRandomSeed(int seed) {
+ GTEST_CHECK_(1 <= seed && seed <= kMaxRandomSeed)
+ << "Invalid random seed " << seed << " - must be in [1, "
+ << kMaxRandomSeed << "].";
+ const int next_seed = seed + 1;
+ return (next_seed > kMaxRandomSeed) ? 1 : next_seed;
+}
+
+// This class saves the values of all Google Test flags in its c'tor, and
+// restores them in its d'tor.
+class GTestFlagSaver {
+ public:
+ // The c'tor.
+ GTestFlagSaver() {
+ also_run_disabled_tests_ = GTEST_FLAG(also_run_disabled_tests);
+ break_on_failure_ = GTEST_FLAG(break_on_failure);
+ catch_exceptions_ = GTEST_FLAG(catch_exceptions);
+ color_ = GTEST_FLAG(color);
+ death_test_style_ = GTEST_FLAG(death_test_style);
+ death_test_use_fork_ = GTEST_FLAG(death_test_use_fork);
+ filter_ = GTEST_FLAG(filter);
+ internal_run_death_test_ = GTEST_FLAG(internal_run_death_test);
+ list_tests_ = GTEST_FLAG(list_tests);
+ output_ = GTEST_FLAG(output);
+ print_time_ = GTEST_FLAG(print_time);
+ random_seed_ = GTEST_FLAG(random_seed);
+ repeat_ = GTEST_FLAG(repeat);
+ shuffle_ = GTEST_FLAG(shuffle);
+ stack_trace_depth_ = GTEST_FLAG(stack_trace_depth);
+ stream_result_to_ = GTEST_FLAG(stream_result_to);
+ throw_on_failure_ = GTEST_FLAG(throw_on_failure);
+ }
+
+ // The d'tor is not virtual. DO NOT INHERIT FROM THIS CLASS.
+ ~GTestFlagSaver() {
+ GTEST_FLAG(also_run_disabled_tests) = also_run_disabled_tests_;
+ GTEST_FLAG(break_on_failure) = break_on_failure_;
+ GTEST_FLAG(catch_exceptions) = catch_exceptions_;
+ GTEST_FLAG(color) = color_;
+ GTEST_FLAG(death_test_style) = death_test_style_;
+ GTEST_FLAG(death_test_use_fork) = death_test_use_fork_;
+ GTEST_FLAG(filter) = filter_;
+ GTEST_FLAG(internal_run_death_test) = internal_run_death_test_;
+ GTEST_FLAG(list_tests) = list_tests_;
+ GTEST_FLAG(output) = output_;
+ GTEST_FLAG(print_time) = print_time_;
+ GTEST_FLAG(random_seed) = random_seed_;
+ GTEST_FLAG(repeat) = repeat_;
+ GTEST_FLAG(shuffle) = shuffle_;
+ GTEST_FLAG(stack_trace_depth) = stack_trace_depth_;
+ GTEST_FLAG(stream_result_to) = stream_result_to_;
+ GTEST_FLAG(throw_on_failure) = throw_on_failure_;
+ }
+
+ private:
+ // Fields for saving the original values of flags.
+ bool also_run_disabled_tests_;
+ bool break_on_failure_;
+ bool catch_exceptions_;
+ std::string color_;
+ std::string death_test_style_;
+ bool death_test_use_fork_;
+ std::string filter_;
+ std::string internal_run_death_test_;
+ bool list_tests_;
+ std::string output_;
+ bool print_time_;
+ internal::Int32 random_seed_;
+ internal::Int32 repeat_;
+ bool shuffle_;
+ internal::Int32 stack_trace_depth_;
+ std::string stream_result_to_;
+ bool throw_on_failure_;
+} GTEST_ATTRIBUTE_UNUSED_;
+
+// Converts a Unicode code point to a narrow string in UTF-8 encoding.
+// code_point parameter is of type UInt32 because wchar_t may not be
+// wide enough to contain a code point.
+// If the code_point is not a valid Unicode code point
+// (i.e. outside of Unicode range U+0 to U+10FFFF) it will be converted
+// to "(Invalid Unicode 0xXXXXXXXX)".
+GTEST_API_ std::string CodePointToUtf8(UInt32 code_point);
+
+// Converts a wide string to a narrow string in UTF-8 encoding.
+// The wide string is assumed to have the following encoding:
+// UTF-16 if sizeof(wchar_t) == 2 (on Windows, Cygwin, Symbian OS)
+// UTF-32 if sizeof(wchar_t) == 4 (on Linux)
+// Parameter str points to a null-terminated wide string.
+// Parameter num_chars may additionally limit the number
+// of wchar_t characters processed. -1 is used when the entire string
+// should be processed.
+// If the string contains code points that are not valid Unicode code points
+// (i.e. outside of Unicode range U+0 to U+10FFFF) they will be output
+// as '(Invalid Unicode 0xXXXXXXXX)'. If the string is in UTF16 encoding
+// and contains invalid UTF-16 surrogate pairs, values in those pairs
+// will be encoded as individual Unicode characters from Basic Normal Plane.
+GTEST_API_ std::string WideStringToUtf8(const wchar_t* str, int num_chars);
+
+// Reads the GTEST_SHARD_STATUS_FILE environment variable, and creates the file
+// if the variable is present. If a file already exists at this location, this
+// function will write over it. If the variable is present, but the file cannot
+// be created, prints an error and exits.
+void WriteToShardStatusFileIfNeeded();
+
+// Checks whether sharding is enabled by examining the relevant
+// environment variable values. If the variables are present,
+// but inconsistent (e.g., shard_index >= total_shards), prints
+// an error and exits. If in_subprocess_for_death_test, sharding is
+// disabled because it must only be applied to the original test
+// process. Otherwise, we could filter out death tests we intended to execute.
+GTEST_API_ bool ShouldShard(const char* total_shards_str,
+ const char* shard_index_str,
+ bool in_subprocess_for_death_test);
+
+// Parses the environment variable var as an Int32. If it is unset,
+// returns default_val. If it is not an Int32, prints an error and
+// and aborts.
+GTEST_API_ Int32 Int32FromEnvOrDie(const char* env_var, Int32 default_val);
+
+// Given the total number of shards, the shard index, and the test id,
+// returns true iff the test should be run on this shard. The test id is
+// some arbitrary but unique non-negative integer assigned to each test
+// method. Assumes that 0 <= shard_index < total_shards.
+GTEST_API_ bool ShouldRunTestOnShard(
+ int total_shards, int shard_index, int test_id);
+
+// STL container utilities.
+
+// Returns the number of elements in the given container that satisfy
+// the given predicate.
+template <class Container, typename Predicate>
+inline int CountIf(const Container& c, Predicate predicate) {
+ // Implemented as an explicit loop since std::count_if() in libCstd on
+ // Solaris has a non-standard signature.
+ int count = 0;
+ for (typename Container::const_iterator it = c.begin(); it != c.end(); ++it) {
+ if (predicate(*it))
+ ++count;
+ }
+ return count;
+}
+
+// Applies a function/functor to each element in the container.
+template <class Container, typename Functor>
+void ForEach(const Container& c, Functor functor) {
+ std::for_each(c.begin(), c.end(), functor);
+}
+
+// Returns the i-th element of the vector, or default_value if i is not
+// in range [0, v.size()).
+template <typename E>
+inline E GetElementOr(const std::vector<E>& v, int i, E default_value) {
+ return (i < 0 || i >= static_cast<int>(v.size())) ? default_value : v[i];
+}
+
+// Performs an in-place shuffle of a range of the vector's elements.
+// 'begin' and 'end' are element indices as an STL-style range;
+// i.e. [begin, end) are shuffled, where 'end' == size() means to
+// shuffle to the end of the vector.
+template <typename E>
+void ShuffleRange(internal::Random* random, int begin, int end,
+ std::vector<E>* v) {
+ const int size = static_cast<int>(v->size());
+ GTEST_CHECK_(0 <= begin && begin <= size)
+ << "Invalid shuffle range start " << begin << ": must be in range [0, "
+ << size << "].";
+ GTEST_CHECK_(begin <= end && end <= size)
+ << "Invalid shuffle range finish " << end << ": must be in range ["
+ << begin << ", " << size << "].";
+
+ // Fisher-Yates shuffle, from
+ // http://en.wikipedia.org/wiki/Fisher-Yates_shuffle
+ for (int range_width = end - begin; range_width >= 2; range_width--) {
+ const int last_in_range = begin + range_width - 1;
+ const int selected = begin + random->Generate(range_width);
+ std::swap((*v)[selected], (*v)[last_in_range]);
+ }
+}
+
+// Performs an in-place shuffle of the vector's elements.
+template <typename E>
+inline void Shuffle(internal::Random* random, std::vector<E>* v) {
+ ShuffleRange(random, 0, static_cast<int>(v->size()), v);
+}
+
+// A function for deleting an object. Handy for being used as a
+// functor.
+template <typename T>
+static void Delete(T* x) {
+ delete x;
+}
+
+// A predicate that checks the key of a TestProperty against a known key.
+//
+// TestPropertyKeyIs is copyable.
+class TestPropertyKeyIs {
+ public:
+ // Constructor.
+ //
+ // TestPropertyKeyIs has NO default constructor.
+ explicit TestPropertyKeyIs(const std::string& key) : key_(key) {}
+
+ // Returns true iff the test name of test property matches on key_.
+ bool operator()(const TestProperty& test_property) const {
+ return test_property.key() == key_;
+ }
+
+ private:
+ std::string key_;
+};
+
+// Class UnitTestOptions.
+//
+// This class contains functions for processing options the user
+// specifies when running the tests. It has only static members.
+//
+// In most cases, the user can specify an option using either an
+// environment variable or a command line flag. E.g. you can set the
+// test filter using either GTEST_FILTER or --gtest_filter. If both
+// the variable and the flag are present, the latter overrides the
+// former.
+class GTEST_API_ UnitTestOptions {
+ public:
+ // Functions for processing the gtest_output flag.
+
+ // Returns the output format, or "" for normal printed output.
+ static std::string GetOutputFormat();
+
+ // Returns the absolute path of the requested output file, or the
+ // default (test_detail.xml in the original working directory) if
+ // none was explicitly specified.
+ static std::string GetAbsolutePathToOutputFile();
+
+ // Functions for processing the gtest_filter flag.
+
+ // Returns true iff the wildcard pattern matches the string. The
+ // first ':' or '\0' character in pattern marks the end of it.
+ //
+ // This recursive algorithm isn't very efficient, but is clear and
+ // works well enough for matching test names, which are short.
+ static bool PatternMatchesString(const char *pattern, const char *str);
+
+ // Returns true iff the user-specified filter matches the test case
+ // name and the test name.
+ static bool FilterMatchesTest(const std::string &test_case_name,
+ const std::string &test_name);
+
+#if GTEST_OS_WINDOWS
+ // Function for supporting the gtest_catch_exception flag.
+
+ // Returns EXCEPTION_EXECUTE_HANDLER if Google Test should handle the
+ // given SEH exception, or EXCEPTION_CONTINUE_SEARCH otherwise.
+ // This function is useful as an __except condition.
+ static int GTestShouldProcessSEH(DWORD exception_code);
+#endif // GTEST_OS_WINDOWS
+
+ // Returns true if "name" matches the ':' separated list of glob-style
+ // filters in "filter".
+ static bool MatchesFilter(const std::string& name, const char* filter);
+};
+
+// Returns the current application's name, removing directory path if that
+// is present. Used by UnitTestOptions::GetOutputFile.
+GTEST_API_ FilePath GetCurrentExecutableName();
+
+// The role interface for getting the OS stack trace as a string.
+class OsStackTraceGetterInterface {
+ public:
+ OsStackTraceGetterInterface() {}
+ virtual ~OsStackTraceGetterInterface() {}
+
+ // Returns the current OS stack trace as an std::string. Parameters:
+ //
+ // max_depth - the maximum number of stack frames to be included
+ // in the trace.
+ // skip_count - the number of top frames to be skipped; doesn't count
+ // against max_depth.
+ virtual string CurrentStackTrace(int max_depth, int skip_count) = 0;
+
+ // UponLeavingGTest() should be called immediately before Google Test calls
+ // user code. It saves some information about the current stack that
+ // CurrentStackTrace() will use to find and hide Google Test stack frames.
+ virtual void UponLeavingGTest() = 0;
+
+ private:
+ GTEST_DISALLOW_COPY_AND_ASSIGN_(OsStackTraceGetterInterface);
+};
+
+// A working implementation of the OsStackTraceGetterInterface interface.
+class OsStackTraceGetter : public OsStackTraceGetterInterface {
+ public:
+ OsStackTraceGetter() : caller_frame_(NULL) {}
+
+ virtual string CurrentStackTrace(int max_depth, int skip_count)
+ GTEST_LOCK_EXCLUDED_(mutex_);
+
+ virtual void UponLeavingGTest() GTEST_LOCK_EXCLUDED_(mutex_);
+
+ // This string is inserted in place of stack frames that are part of
+ // Google Test's implementation.
+ static const char* const kElidedFramesMarker;
+
+ private:
+ Mutex mutex_; // protects all internal state
+
+ // We save the stack frame below the frame that calls user code.
+ // We do this because the address of the frame immediately below
+ // the user code changes between the call to UponLeavingGTest()
+ // and any calls to CurrentStackTrace() from within the user code.
+ void* caller_frame_;
+
+ GTEST_DISALLOW_COPY_AND_ASSIGN_(OsStackTraceGetter);
+};
+
+// Information about a Google Test trace point.
+struct TraceInfo {
+ const char* file;
+ int line;
+ std::string message;
+};
+
+// This is the default global test part result reporter used in UnitTestImpl.
+// This class should only be used by UnitTestImpl.
+class DefaultGlobalTestPartResultReporter
+ : public TestPartResultReporterInterface {
+ public:
+ explicit DefaultGlobalTestPartResultReporter(UnitTestImpl* unit_test);
+ // Implements the TestPartResultReporterInterface. Reports the test part
+ // result in the current test.
+ virtual void ReportTestPartResult(const TestPartResult& result);
+
+ private:
+ UnitTestImpl* const unit_test_;
+
+ GTEST_DISALLOW_COPY_AND_ASSIGN_(DefaultGlobalTestPartResultReporter);
+};
+
+// This is the default per thread test part result reporter used in
+// UnitTestImpl. This class should only be used by UnitTestImpl.
+class DefaultPerThreadTestPartResultReporter
+ : public TestPartResultReporterInterface {
+ public:
+ explicit DefaultPerThreadTestPartResultReporter(UnitTestImpl* unit_test);
+ // Implements the TestPartResultReporterInterface. The implementation just
+ // delegates to the current global test part result reporter of *unit_test_.
+ virtual void ReportTestPartResult(const TestPartResult& result);
+
+ private:
+ UnitTestImpl* const unit_test_;
+
+ GTEST_DISALLOW_COPY_AND_ASSIGN_(DefaultPerThreadTestPartResultReporter);
+};
+
+// The private implementation of the UnitTest class. We don't protect
+// the methods under a mutex, as this class is not accessible by a
+// user and the UnitTest class that delegates work to this class does
+// proper locking.
+class GTEST_API_ UnitTestImpl {
+ public:
+ explicit UnitTestImpl(UnitTest* parent);
+ virtual ~UnitTestImpl();
+
+ // There are two different ways to register your own TestPartResultReporter.
+ // You can register your own repoter to listen either only for test results
+ // from the current thread or for results from all threads.
+ // By default, each per-thread test result repoter just passes a new
+ // TestPartResult to the global test result reporter, which registers the
+ // test part result for the currently running test.
+
+ // Returns the global test part result reporter.
+ TestPartResultReporterInterface* GetGlobalTestPartResultReporter();
+
+ // Sets the global test part result reporter.
+ void SetGlobalTestPartResultReporter(
+ TestPartResultReporterInterface* reporter);
+
+ // Returns the test part result reporter for the current thread.
+ TestPartResultReporterInterface* GetTestPartResultReporterForCurrentThread();
+
+ // Sets the test part result reporter for the current thread.
+ void SetTestPartResultReporterForCurrentThread(
+ TestPartResultReporterInterface* reporter);
+
+ // Gets the number of successful test cases.
+ int successful_test_case_count() const;
+
+ // Gets the number of failed test cases.
+ int failed_test_case_count() const;
+
+ // Gets the number of all test cases.
+ int total_test_case_count() const;
+
+ // Gets the number of all test cases that contain at least one test
+ // that should run.
+ int test_case_to_run_count() const;
+
+ // Gets the number of successful tests.
+ int successful_test_count() const;
+
+ // Gets the number of failed tests.
+ int failed_test_count() const;
+
+ // Gets the number of disabled tests that will be reported in the XML report.
+ int reportable_disabled_test_count() const;
+
+ // Gets the number of disabled tests.
+ int disabled_test_count() const;
+
+ // Gets the number of tests to be printed in the XML report.
+ int reportable_test_count() const;
+
+ // Gets the number of all tests.
+ int total_test_count() const;
+
+ // Gets the number of tests that should run.
+ int test_to_run_count() const;
+
+ // Gets the time of the test program start, in ms from the start of the
+ // UNIX epoch.
+ TimeInMillis start_timestamp() const { return start_timestamp_; }
+
+ // Gets the elapsed time, in milliseconds.
+ TimeInMillis elapsed_time() const { return elapsed_time_; }
+
+ // Returns true iff the unit test passed (i.e. all test cases passed).
+ bool Passed() const { return !Failed(); }
+
+ // Returns true iff the unit test failed (i.e. some test case failed
+ // or something outside of all tests failed).
+ bool Failed() const {
+ return failed_test_case_count() > 0 || ad_hoc_test_result()->Failed();
+ }
+
+ // Gets the i-th test case among all the test cases. i can range from 0 to
+ // total_test_case_count() - 1. If i is not in that range, returns NULL.
+ const TestCase* GetTestCase(int i) const {
+ const int index = GetElementOr(test_case_indices_, i, -1);
+ return index < 0 ? NULL : test_cases_[i];
+ }
+
+ // Gets the i-th test case among all the test cases. i can range from 0 to
+ // total_test_case_count() - 1. If i is not in that range, returns NULL.
+ TestCase* GetMutableTestCase(int i) {
+ const int index = GetElementOr(test_case_indices_, i, -1);
+ return index < 0 ? NULL : test_cases_[index];
+ }
+
+ // Provides access to the event listener list.
+ TestEventListeners* listeners() { return &listeners_; }
+
+ // Returns the TestResult for the test that's currently running, or
+ // the TestResult for the ad hoc test if no test is running.
+ TestResult* current_test_result();
+
+ // Returns the TestResult for the ad hoc test.
+ const TestResult* ad_hoc_test_result() const { return &ad_hoc_test_result_; }
+
+ // Sets the OS stack trace getter.
+ //
+ // Does nothing if the input and the current OS stack trace getter
+ // are the same; otherwise, deletes the old getter and makes the
+ // input the current getter.
+ void set_os_stack_trace_getter(OsStackTraceGetterInterface* getter);
+
+ // Returns the current OS stack trace getter if it is not NULL;
+ // otherwise, creates an OsStackTraceGetter, makes it the current
+ // getter, and returns it.
+ OsStackTraceGetterInterface* os_stack_trace_getter();
+
+ // Returns the current OS stack trace as an std::string.
+ //
+ // The maximum number of stack frames to be included is specified by
+ // the gtest_stack_trace_depth flag. The skip_count parameter
+ // specifies the number of top frames to be skipped, which doesn't
+ // count against the number of frames to be included.
+ //
+ // For example, if Foo() calls Bar(), which in turn calls
+ // CurrentOsStackTraceExceptTop(1), Foo() will be included in the
+ // trace but Bar() and CurrentOsStackTraceExceptTop() won't.
+ std::string CurrentOsStackTraceExceptTop(int skip_count) GTEST_NO_INLINE_;
+
+ // Finds and returns a TestCase with the given name. If one doesn't
+ // exist, creates one and returns it.
+ //
+ // Arguments:
+ //
+ // test_case_name: name of the test case
+ // type_param: the name of the test's type parameter, or NULL if
+ // this is not a typed or a type-parameterized test.
+ // set_up_tc: pointer to the function that sets up the test case
+ // tear_down_tc: pointer to the function that tears down the test case
+ TestCase* GetTestCase(const char* test_case_name,
+ const char* type_param,
+ Test::SetUpTestCaseFunc set_up_tc,
+ Test::TearDownTestCaseFunc tear_down_tc);
+
+ // Adds a TestInfo to the unit test.
+ //
+ // Arguments:
+ //
+ // set_up_tc: pointer to the function that sets up the test case
+ // tear_down_tc: pointer to the function that tears down the test case
+ // test_info: the TestInfo object
+ void AddTestInfo(Test::SetUpTestCaseFunc set_up_tc,
+ Test::TearDownTestCaseFunc tear_down_tc,
+ TestInfo* test_info) {
+ // In order to support thread-safe death tests, we need to
+ // remember the original working directory when the test program
+ // was first invoked. We cannot do this in RUN_ALL_TESTS(), as
+ // the user may have changed the current directory before calling
+ // RUN_ALL_TESTS(). Therefore we capture the current directory in
+ // AddTestInfo(), which is called to register a TEST or TEST_F
+ // before main() is reached.
+ if (original_working_dir_.IsEmpty()) {
+ original_working_dir_.Set(FilePath::GetCurrentDir());
+ GTEST_CHECK_(!original_working_dir_.IsEmpty())
+ << "Failed to get the current working directory.";
+ }
+
+ GetTestCase(test_info->test_case_name(),
+ test_info->type_param(),
+ set_up_tc,
+ tear_down_tc)->AddTestInfo(test_info);
+ }
+
+#if GTEST_HAS_PARAM_TEST
+ // Returns ParameterizedTestCaseRegistry object used to keep track of
+ // value-parameterized tests and instantiate and register them.
+ internal::ParameterizedTestCaseRegistry& parameterized_test_registry() {
+ return parameterized_test_registry_;
+ }
+#endif // GTEST_HAS_PARAM_TEST
+
+ // Sets the TestCase object for the test that's currently running.
+ void set_current_test_case(TestCase* a_current_test_case) {
+ current_test_case_ = a_current_test_case;
+ }
+
+ // Sets the TestInfo object for the test that's currently running. If
+ // current_test_info is NULL, the assertion results will be stored in
+ // ad_hoc_test_result_.
+ void set_current_test_info(TestInfo* a_current_test_info) {
+ current_test_info_ = a_current_test_info;
+ }
+
+ // Registers all parameterized tests defined using TEST_P and
+ // INSTANTIATE_TEST_CASE_P, creating regular tests for each test/parameter
+ // combination. This method can be called more then once; it has guards
+ // protecting from registering the tests more then once. If
+ // value-parameterized tests are disabled, RegisterParameterizedTests is
+ // present but does nothing.
+ void RegisterParameterizedTests();
+
+ // Runs all tests in this UnitTest object, prints the result, and
+ // returns true if all tests are successful. If any exception is
+ // thrown during a test, this test is considered to be failed, but
+ // the rest of the tests will still be run.
+ bool RunAllTests();
+
+ // Clears the results of all tests, except the ad hoc tests.
+ void ClearNonAdHocTestResult() {
+ ForEach(test_cases_, TestCase::ClearTestCaseResult);
+ }
+
+ // Clears the results of ad-hoc test assertions.
+ void ClearAdHocTestResult() {
+ ad_hoc_test_result_.Clear();
+ }
+
+ // Adds a TestProperty to the current TestResult object when invoked in a
+ // context of a test or a test case, or to the global property set. If the
+ // result already contains a property with the same key, the value will be
+ // updated.
+ void RecordProperty(const TestProperty& test_property);
+
+ enum ReactionToSharding {
+ HONOR_SHARDING_PROTOCOL,
+ IGNORE_SHARDING_PROTOCOL
+ };
+
+ // Matches the full name of each test against the user-specified
+ // filter to decide whether the test should run, then records the
+ // result in each TestCase and TestInfo object.
+ // If shard_tests == HONOR_SHARDING_PROTOCOL, further filters tests
+ // based on sharding variables in the environment.
+ // Returns the number of tests that should run.
+ int FilterTests(ReactionToSharding shard_tests);
+
+ // Prints the names of the tests matching the user-specified filter flag.
+ void ListTestsMatchingFilter();
+
+ const TestCase* current_test_case() const { return current_test_case_; }
+ TestInfo* current_test_info() { return current_test_info_; }
+ const TestInfo* current_test_info() const { return current_test_info_; }
+
+ // Returns the vector of environments that need to be set-up/torn-down
+ // before/after the tests are run.
+ std::vector<Environment*>& environments() { return environments_; }
+
+ // Getters for the per-thread Google Test trace stack.
+ std::vector<TraceInfo>& gtest_trace_stack() {
+ return *(gtest_trace_stack_.pointer());
+ }
+ const std::vector<TraceInfo>& gtest_trace_stack() const {
+ return gtest_trace_stack_.get();
+ }
+
+#if GTEST_HAS_DEATH_TEST
+ void InitDeathTestSubprocessControlInfo() {
+ internal_run_death_test_flag_.reset(ParseInternalRunDeathTestFlag());
+ }
+ // Returns a pointer to the parsed --gtest_internal_run_death_test
+ // flag, or NULL if that flag was not specified.
+ // This information is useful only in a death test child process.
+ // Must not be called before a call to InitGoogleTest.
+ const InternalRunDeathTestFlag* internal_run_death_test_flag() const {
+ return internal_run_death_test_flag_.get();
+ }
+
+ // Returns a pointer to the current death test factory.
+ internal::DeathTestFactory* death_test_factory() {
+ return death_test_factory_.get();
+ }
+
+ void SuppressTestEventsIfInSubprocess();
+
+ friend class ReplaceDeathTestFactory;
+#endif // GTEST_HAS_DEATH_TEST
+
+ // Initializes the event listener performing XML output as specified by
+ // UnitTestOptions. Must not be called before InitGoogleTest.
+ void ConfigureXmlOutput();
+
+#if GTEST_CAN_STREAM_RESULTS_
+ // Initializes the event listener for streaming test results to a socket.
+ // Must not be called before InitGoogleTest.
+ void ConfigureStreamingOutput();
+#endif
+
+ // Performs initialization dependent upon flag values obtained in
+ // ParseGoogleTestFlagsOnly. Is called from InitGoogleTest after the call to
+ // ParseGoogleTestFlagsOnly. In case a user neglects to call InitGoogleTest
+ // this function is also called from RunAllTests. Since this function can be
+ // called more than once, it has to be idempotent.
+ void PostFlagParsingInit();
+
+ // Gets the random seed used at the start of the current test iteration.
+ int random_seed() const { return random_seed_; }
+
+ // Gets the random number generator.
+ internal::Random* random() { return &random_; }
+
+ // Shuffles all test cases, and the tests within each test case,
+ // making sure that death tests are still run first.
+ void ShuffleTests();
+
+ // Restores the test cases and tests to their order before the first shuffle.
+ void UnshuffleTests();
+
+ // Returns the value of GTEST_FLAG(catch_exceptions) at the moment
+ // UnitTest::Run() starts.
+ bool catch_exceptions() const { return catch_exceptions_; }
+
+ private:
+ friend class ::testing::UnitTest;
+
+ // Used by UnitTest::Run() to capture the state of
+ // GTEST_FLAG(catch_exceptions) at the moment it starts.
+ void set_catch_exceptions(bool value) { catch_exceptions_ = value; }
+
+ // The UnitTest object that owns this implementation object.
+ UnitTest* const parent_;
+
+ // The working directory when the first TEST() or TEST_F() was
+ // executed.
+ internal::FilePath original_working_dir_;
+
+ // The default test part result reporters.
+ DefaultGlobalTestPartResultReporter default_global_test_part_result_reporter_;
+ DefaultPerThreadTestPartResultReporter
+ default_per_thread_test_part_result_reporter_;
+
+ // Points to (but doesn't own) the global test part result reporter.
+ TestPartResultReporterInterface* global_test_part_result_repoter_;
+
+ // Protects read and write access to global_test_part_result_reporter_.
+ internal::Mutex global_test_part_result_reporter_mutex_;
+
+ // Points to (but doesn't own) the per-thread test part result reporter.
+ internal::ThreadLocal<TestPartResultReporterInterface*>
+ per_thread_test_part_result_reporter_;
+
+ // The vector of environments that need to be set-up/torn-down
+ // before/after the tests are run.
+ std::vector<Environment*> environments_;
+
+ // The vector of TestCases in their original order. It owns the
+ // elements in the vector.
+ std::vector<TestCase*> test_cases_;
+
+ // Provides a level of indirection for the test case list to allow
+ // easy shuffling and restoring the test case order. The i-th
+ // element of this vector is the index of the i-th test case in the
+ // shuffled order.
+ std::vector<int> test_case_indices_;
+
+#if GTEST_HAS_PARAM_TEST
+ // ParameterizedTestRegistry object used to register value-parameterized
+ // tests.
+ internal::ParameterizedTestCaseRegistry parameterized_test_registry_;
+
+ // Indicates whether RegisterParameterizedTests() has been called already.
+ bool parameterized_tests_registered_;
+#endif // GTEST_HAS_PARAM_TEST
+
+ // Index of the last death test case registered. Initially -1.
+ int last_death_test_case_;
+
+ // This points to the TestCase for the currently running test. It
+ // changes as Google Test goes through one test case after another.
+ // When no test is running, this is set to NULL and Google Test
+ // stores assertion results in ad_hoc_test_result_. Initially NULL.
+ TestCase* current_test_case_;
+
+ // This points to the TestInfo for the currently running test. It
+ // changes as Google Test goes through one test after another. When
+ // no test is running, this is set to NULL and Google Test stores
+ // assertion results in ad_hoc_test_result_. Initially NULL.
+ TestInfo* current_test_info_;
+
+ // Normally, a user only writes assertions inside a TEST or TEST_F,
+ // or inside a function called by a TEST or TEST_F. Since Google
+ // Test keeps track of which test is current running, it can
+ // associate such an assertion with the test it belongs to.
+ //
+ // If an assertion is encountered when no TEST or TEST_F is running,
+ // Google Test attributes the assertion result to an imaginary "ad hoc"
+ // test, and records the result in ad_hoc_test_result_.
+ TestResult ad_hoc_test_result_;
+
+ // The list of event listeners that can be used to track events inside
+ // Google Test.
+ TestEventListeners listeners_;
+
+ // The OS stack trace getter. Will be deleted when the UnitTest
+ // object is destructed. By default, an OsStackTraceGetter is used,
+ // but the user can set this field to use a custom getter if that is
+ // desired.
+ OsStackTraceGetterInterface* os_stack_trace_getter_;
+
+ // True iff PostFlagParsingInit() has been called.
+ bool post_flag_parse_init_performed_;
+
+ // The random number seed used at the beginning of the test run.
+ int random_seed_;
+
+ // Our random number generator.
+ internal::Random random_;
+
+ // The time of the test program start, in ms from the start of the
+ // UNIX epoch.
+ TimeInMillis start_timestamp_;
+
+ // How long the test took to run, in milliseconds.
+ TimeInMillis elapsed_time_;
+
+#if GTEST_HAS_DEATH_TEST
+ // The decomposed components of the gtest_internal_run_death_test flag,
+ // parsed when RUN_ALL_TESTS is called.
+ internal::scoped_ptr<InternalRunDeathTestFlag> internal_run_death_test_flag_;
+ internal::scoped_ptr<internal::DeathTestFactory> death_test_factory_;
+#endif // GTEST_HAS_DEATH_TEST
+
+ // A per-thread stack of traces created by the SCOPED_TRACE() macro.
+ internal::ThreadLocal<std::vector<TraceInfo> > gtest_trace_stack_;
+
+ // The value of GTEST_FLAG(catch_exceptions) at the moment RunAllTests()
+ // starts.
+ bool catch_exceptions_;
+
+ GTEST_DISALLOW_COPY_AND_ASSIGN_(UnitTestImpl);
+}; // class UnitTestImpl
+
+// Convenience function for accessing the global UnitTest
+// implementation object.
+inline UnitTestImpl* GetUnitTestImpl() {
+ return UnitTest::GetInstance()->impl();
+}
+
+#if GTEST_USES_SIMPLE_RE
+
+// Internal helper functions for implementing the simple regular
+// expression matcher.
+GTEST_API_ bool IsInSet(char ch, const char* str);
+GTEST_API_ bool IsAsciiDigit(char ch);
+GTEST_API_ bool IsAsciiPunct(char ch);
+GTEST_API_ bool IsRepeat(char ch);
+GTEST_API_ bool IsAsciiWhiteSpace(char ch);
+GTEST_API_ bool IsAsciiWordChar(char ch);
+GTEST_API_ bool IsValidEscape(char ch);
+GTEST_API_ bool AtomMatchesChar(bool escaped, char pattern, char ch);
+GTEST_API_ bool ValidateRegex(const char* regex);
+GTEST_API_ bool MatchRegexAtHead(const char* regex, const char* str);
+GTEST_API_ bool MatchRepetitionAndRegexAtHead(
+ bool escaped, char ch, char repeat, const char* regex, const char* str);
+GTEST_API_ bool MatchRegexAnywhere(const char* regex, const char* str);
+
+#endif // GTEST_USES_SIMPLE_RE
+
+// Parses the command line for Google Test flags, without initializing
+// other parts of Google Test.
+GTEST_API_ void ParseGoogleTestFlagsOnly(int* argc, char** argv);
+GTEST_API_ void ParseGoogleTestFlagsOnly(int* argc, wchar_t** argv);
+
+#if GTEST_HAS_DEATH_TEST
+
+// Returns the message describing the last system error, regardless of the
+// platform.
+GTEST_API_ std::string GetLastErrnoDescription();
+
+# if GTEST_OS_WINDOWS
+// Provides leak-safe Windows kernel handle ownership.
+class AutoHandle {
+ public:
+ AutoHandle() : handle_(INVALID_HANDLE_VALUE) {}
+ explicit AutoHandle(HANDLE handle) : handle_(handle) {}
+
+ ~AutoHandle() { Reset(); }
+
+ HANDLE Get() const { return handle_; }
+ void Reset() { Reset(INVALID_HANDLE_VALUE); }
+ void Reset(HANDLE handle) {
+ if (handle != handle_) {
+ if (handle_ != INVALID_HANDLE_VALUE)
+ ::CloseHandle(handle_);
+ handle_ = handle;
+ }
+ }
+
+ private:
+ HANDLE handle_;
+
+ GTEST_DISALLOW_COPY_AND_ASSIGN_(AutoHandle);
+};
+# endif // GTEST_OS_WINDOWS
+
+// Attempts to parse a string into a positive integer pointed to by the
+// number parameter. Returns true if that is possible.
+// GTEST_HAS_DEATH_TEST implies that we have ::std::string, so we can use
+// it here.
+template <typename Integer>
+bool ParseNaturalNumber(const ::std::string& str, Integer* number) {
+ // Fail fast if the given string does not begin with a digit;
+ // this bypasses strtoXXX's "optional leading whitespace and plus
+ // or minus sign" semantics, which are undesirable here.
+ if (str.empty() || !IsDigit(str[0])) {
+ return false;
+ }
+ errno = 0;
+
+ char* end;
+ // BiggestConvertible is the largest integer type that system-provided
+ // string-to-number conversion routines can return.
+
+# if GTEST_OS_WINDOWS && !defined(__GNUC__)
+
+ // MSVC and C++ Builder define __int64 instead of the standard long long.
+ typedef unsigned __int64 BiggestConvertible;
+ const BiggestConvertible parsed = _strtoui64(str.c_str(), &end, 10);
+
+# else
+
+ typedef unsigned long long BiggestConvertible; // NOLINT
+ const BiggestConvertible parsed = strtoull(str.c_str(), &end, 10);
+
+# endif // GTEST_OS_WINDOWS && !defined(__GNUC__)
+
+ const bool parse_success = *end == '\0' && errno == 0;
+
+ // TODO(vladl@google.com): Convert this to compile time assertion when it is
+ // available.
+ GTEST_CHECK_(sizeof(Integer) <= sizeof(parsed));
+
+ const Integer result = static_cast<Integer>(parsed);
+ if (parse_success && static_cast<BiggestConvertible>(result) == parsed) {
+ *number = result;
+ return true;
+ }
+ return false;
+}
+#endif // GTEST_HAS_DEATH_TEST
+
+// TestResult contains some private methods that should be hidden from
+// Google Test user but are required for testing. This class allow our tests
+// to access them.
+//
+// This class is supplied only for the purpose of testing Google Test's own
+// constructs. Do not use it in user tests, either directly or indirectly.
+class TestResultAccessor {
+ public:
+ static void RecordProperty(TestResult* test_result,
+ const std::string& xml_element,
+ const TestProperty& property) {
+ test_result->RecordProperty(xml_element, property);
+ }
+
+ static void ClearTestPartResults(TestResult* test_result) {
+ test_result->ClearTestPartResults();
+ }
+
+ static const std::vector<testing::TestPartResult>& test_part_results(
+ const TestResult& test_result) {
+ return test_result.test_part_results();
+ }
+};
+
+#if GTEST_CAN_STREAM_RESULTS_
+
+// Streams test results to the given port on the given host machine.
+class StreamingListener : public EmptyTestEventListener {
+ public:
+ // Abstract base class for writing strings to a socket.
+ class AbstractSocketWriter {
+ public:
+ virtual ~AbstractSocketWriter() {}
+
+ // Sends a string to the socket.
+ virtual void Send(const string& message) = 0;
+
+ // Closes the socket.
+ virtual void CloseConnection() {}
+
+ // Sends a string and a newline to the socket.
+ void SendLn(const string& message) {
+ Send(message + "\n");
+ }
+ };
+
+ // Concrete class for actually writing strings to a socket.
+ class SocketWriter : public AbstractSocketWriter {
+ public:
+ SocketWriter(const string& host, const string& port)
+ : sockfd_(-1), host_name_(host), port_num_(port) {
+ MakeConnection();
+ }
+
+ virtual ~SocketWriter() {
+ if (sockfd_ != -1)
+ CloseConnection();
+ }
+
+ // Sends a string to the socket.
+ virtual void Send(const string& message) {
+ GTEST_CHECK_(sockfd_ != -1)
+ << "Send() can be called only when there is a connection.";
+
+ const int len = static_cast<int>(message.length());
+ if (write(sockfd_, message.c_str(), len) != len) {
+ GTEST_LOG_(WARNING)
+ << "stream_result_to: failed to stream to "
+ << host_name_ << ":" << port_num_;
+ }
+ }
+
+ private:
+ // Creates a client socket and connects to the server.
+ void MakeConnection();
+
+ // Closes the socket.
+ void CloseConnection() {
+ GTEST_CHECK_(sockfd_ != -1)
+ << "CloseConnection() can be called only when there is a connection.";
+
+ close(sockfd_);
+ sockfd_ = -1;
+ }
+
+ int sockfd_; // socket file descriptor
+ const string host_name_;
+ const string port_num_;
+
+ GTEST_DISALLOW_COPY_AND_ASSIGN_(SocketWriter);
+ }; // class SocketWriter
+
+ // Escapes '=', '&', '%', and '\n' characters in str as "%xx".
+ static string UrlEncode(const char* str);
+
+ StreamingListener(const string& host, const string& port)
+ : socket_writer_(new SocketWriter(host, port)) { Start(); }
+
+ explicit StreamingListener(AbstractSocketWriter* socket_writer)
+ : socket_writer_(socket_writer) { Start(); }
+
+ void OnTestProgramStart(const UnitTest& /* unit_test */) {
+ SendLn("event=TestProgramStart");
+ }
+
+ void OnTestProgramEnd(const UnitTest& unit_test) {
+ // Note that Google Test current only report elapsed time for each
+ // test iteration, not for the entire test program.
+ SendLn("event=TestProgramEnd&passed=" + FormatBool(unit_test.Passed()));
+
+ // Notify the streaming server to stop.
+ socket_writer_->CloseConnection();
+ }
+
+ void OnTestIterationStart(const UnitTest& /* unit_test */, int iteration) {
+ SendLn("event=TestIterationStart&iteration=" +
+ StreamableToString(iteration));
+ }
+
+ void OnTestIterationEnd(const UnitTest& unit_test, int /* iteration */) {
+ SendLn("event=TestIterationEnd&passed=" +
+ FormatBool(unit_test.Passed()) + "&elapsed_time=" +
+ StreamableToString(unit_test.elapsed_time()) + "ms");
+ }
+
+ void OnTestCaseStart(const TestCase& test_case) {
+ SendLn(std::string("event=TestCaseStart&name=") + test_case.name());
+ }
+
+ void OnTestCaseEnd(const TestCase& test_case) {
+ SendLn("event=TestCaseEnd&passed=" + FormatBool(test_case.Passed())
+ + "&elapsed_time=" + StreamableToString(test_case.elapsed_time())
+ + "ms");
+ }
+
+ void OnTestStart(const TestInfo& test_info) {
+ SendLn(std::string("event=TestStart&name=") + test_info.name());
+ }
+
+ void OnTestEnd(const TestInfo& test_info) {
+ SendLn("event=TestEnd&passed=" +
+ FormatBool((test_info.result())->Passed()) +
+ "&elapsed_time=" +
+ StreamableToString((test_info.result())->elapsed_time()) + "ms");
+ }
+
+ void OnTestPartResult(const TestPartResult& test_part_result) {
+ const char* file_name = test_part_result.file_name();
+ if (file_name == NULL)
+ file_name = "";
+ SendLn("event=TestPartResult&file=" + UrlEncode(file_name) +
+ "&line=" + StreamableToString(test_part_result.line_number()) +
+ "&message=" + UrlEncode(test_part_result.message()));
+ }
+
+ private:
+ // Sends the given message and a newline to the socket.
+ void SendLn(const string& message) { socket_writer_->SendLn(message); }
+
+ // Called at the start of streaming to notify the receiver what
+ // protocol we are using.
+ void Start() { SendLn("gtest_streaming_protocol_version=1.0"); }
+
+ string FormatBool(bool value) { return value ? "1" : "0"; }
+
+ const scoped_ptr<AbstractSocketWriter> socket_writer_;
+
+ GTEST_DISALLOW_COPY_AND_ASSIGN_(StreamingListener);
+}; // class StreamingListener
+
+#endif // GTEST_CAN_STREAM_RESULTS_
+
+} // namespace internal
+} // namespace testing
+
+#endif // GTEST_SRC_GTEST_INTERNAL_INL_H_
+#undef GTEST_IMPLEMENTATION_
+
+#if GTEST_OS_WINDOWS
+# define vsnprintf _vsnprintf
+#endif // GTEST_OS_WINDOWS
+
+namespace testing {
+
+using internal::CountIf;
+using internal::ForEach;
+using internal::GetElementOr;
+using internal::Shuffle;
+
+// Constants.
+
+// A test whose test case name or test name matches this filter is
+// disabled and not run.
+static const char kDisableTestFilter[] = "DISABLED_*:*/DISABLED_*";
+
+// A test case whose name matches this filter is considered a death
+// test case and will be run before test cases whose name doesn't
+// match this filter.
+static const char kDeathTestCaseFilter[] = "*DeathTest:*DeathTest/*";
+
+// A test filter that matches everything.
+static const char kUniversalFilter[] = "*";
+
+// The default output file for XML output.
+static const char kDefaultOutputFile[] = "test_detail.xml";
+
+// The environment variable name for the test shard index.
+static const char kTestShardIndex[] = "GTEST_SHARD_INDEX";
+// The environment variable name for the total number of test shards.
+static const char kTestTotalShards[] = "GTEST_TOTAL_SHARDS";
+// The environment variable name for the test shard status file.
+static const char kTestShardStatusFile[] = "GTEST_SHARD_STATUS_FILE";
+
+namespace internal {
+
+// The text used in failure messages to indicate the start of the
+// stack trace.
+const char kStackTraceMarker[] = "\nStack trace:\n";
+
+// g_help_flag is true iff the --help flag or an equivalent form is
+// specified on the command line.
+bool g_help_flag = false;
+
+} // namespace internal
+
+static const char* GetDefaultFilter() {
+ return kUniversalFilter;
+}
+
+GTEST_DEFINE_bool_(
+ also_run_disabled_tests,
+ internal::BoolFromGTestEnv("also_run_disabled_tests", false),
+ "Run disabled tests too, in addition to the tests normally being run.");
+
+GTEST_DEFINE_bool_(
+ break_on_failure,
+ internal::BoolFromGTestEnv("break_on_failure", false),
+ "True iff a failed assertion should be a debugger break-point.");
+
+GTEST_DEFINE_bool_(
+ catch_exceptions,
+ internal::BoolFromGTestEnv("catch_exceptions", true),
+ "True iff " GTEST_NAME_
+ " should catch exceptions and treat them as test failures.");
+
+GTEST_DEFINE_string_(
+ color,
+ internal::StringFromGTestEnv("color", "auto"),
+ "Whether to use colors in the output. Valid values: yes, no, "
+ "and auto. 'auto' means to use colors if the output is "
+ "being sent to a terminal and the TERM environment variable "
+ "is set to a terminal type that supports colors.");
+
+GTEST_DEFINE_string_(
+ filter,
+ internal::StringFromGTestEnv("filter", GetDefaultFilter()),
+ "A colon-separated list of glob (not regex) patterns "
+ "for filtering the tests to run, optionally followed by a "
+ "'-' and a : separated list of negative patterns (tests to "
+ "exclude). A test is run if it matches one of the positive "
+ "patterns and does not match any of the negative patterns.");
+
+GTEST_DEFINE_bool_(list_tests, false,
+ "List all tests without running them.");
+
+GTEST_DEFINE_string_(
+ output,
+ internal::StringFromGTestEnv("output", ""),
+ "A format (currently must be \"xml\"), optionally followed "
+ "by a colon and an output file name or directory. A directory "
+ "is indicated by a trailing pathname separator. "
+ "Examples: \"xml:filename.xml\", \"xml::directoryname/\". "
+ "If a directory is specified, output files will be created "
+ "within that directory, with file-names based on the test "
+ "executable's name and, if necessary, made unique by adding "
+ "digits.");
+
+GTEST_DEFINE_bool_(
+ print_time,
+ internal::BoolFromGTestEnv("print_time", true),
+ "True iff " GTEST_NAME_
+ " should display elapsed time in text output.");
+
+GTEST_DEFINE_int32_(
+ random_seed,
+ internal::Int32FromGTestEnv("random_seed", 0),
+ "Random number seed to use when shuffling test orders. Must be in range "
+ "[1, 99999], or 0 to use a seed based on the current time.");
+
+GTEST_DEFINE_int32_(
+ repeat,
+ internal::Int32FromGTestEnv("repeat", 1),
+ "How many times to repeat each test. Specify a negative number "
+ "for repeating forever. Useful for shaking out flaky tests.");
+
+GTEST_DEFINE_bool_(
+ show_internal_stack_frames, false,
+ "True iff " GTEST_NAME_ " should include internal stack frames when "
+ "printing test failure stack traces.");
+
+GTEST_DEFINE_bool_(
+ shuffle,
+ internal::BoolFromGTestEnv("shuffle", false),
+ "True iff " GTEST_NAME_
+ " should randomize tests' order on every run.");
+
+GTEST_DEFINE_int32_(
+ stack_trace_depth,
+ internal::Int32FromGTestEnv("stack_trace_depth", kMaxStackTraceDepth),
+ "The maximum number of stack frames to print when an "
+ "assertion fails. The valid range is 0 through 100, inclusive.");
+
+GTEST_DEFINE_string_(
+ stream_result_to,
+ internal::StringFromGTestEnv("stream_result_to", ""),
+ "This flag specifies the host name and the port number on which to stream "
+ "test results. Example: \"localhost:555\". The flag is effective only on "
+ "Linux.");
+
+GTEST_DEFINE_bool_(
+ throw_on_failure,
+ internal::BoolFromGTestEnv("throw_on_failure", false),
+ "When this flag is specified, a failed assertion will throw an exception "
+ "if exceptions are enabled or exit the program with a non-zero code "
+ "otherwise.");
+
+namespace internal {
+
+// Generates a random number from [0, range), using a Linear
+// Congruential Generator (LCG). Crashes if 'range' is 0 or greater
+// than kMaxRange.
+UInt32 Random::Generate(UInt32 range) {
+ // These constants are the same as are used in glibc's rand(3).
+ state_ = (1103515245U*state_ + 12345U) % kMaxRange;
+
+ GTEST_CHECK_(range > 0)
+ << "Cannot generate a number in the range [0, 0).";
+ GTEST_CHECK_(range <= kMaxRange)
+ << "Generation of a number in [0, " << range << ") was requested, "
+ << "but this can only generate numbers in [0, " << kMaxRange << ").";
+
+ // Converting via modulus introduces a bit of downward bias, but
+ // it's simple, and a linear congruential generator isn't too good
+ // to begin with.
+ return state_ % range;
+}
+
+// GTestIsInitialized() returns true iff the user has initialized
+// Google Test. Useful for catching the user mistake of not initializing
+// Google Test before calling RUN_ALL_TESTS().
+//
+// A user must call testing::InitGoogleTest() to initialize Google
+// Test. g_init_gtest_count is set to the number of times
+// InitGoogleTest() has been called. We don't protect this variable
+// under a mutex as it is only accessed in the main thread.
+GTEST_API_ int g_init_gtest_count = 0;
+static bool GTestIsInitialized() { return g_init_gtest_count != 0; }
+
+// Iterates over a vector of TestCases, keeping a running sum of the
+// results of calling a given int-returning method on each.
+// Returns the sum.
+static int SumOverTestCaseList(const std::vector<TestCase*>& case_list,
+ int (TestCase::*method)() const) {
+ int sum = 0;
+ for (size_t i = 0; i < case_list.size(); i++) {
+ sum += (case_list[i]->*method)();
+ }
+ return sum;
+}
+
+// Returns true iff the test case passed.
+static bool TestCasePassed(const TestCase* test_case) {
+ return test_case->should_run() && test_case->Passed();
+}
+
+// Returns true iff the test case failed.
+static bool TestCaseFailed(const TestCase* test_case) {
+ return test_case->should_run() && test_case->Failed();
+}
+
+// Returns true iff test_case contains at least one test that should
+// run.
+static bool ShouldRunTestCase(const TestCase* test_case) {
+ return test_case->should_run();
+}
+
+// AssertHelper constructor.
+AssertHelper::AssertHelper(TestPartResult::Type type,
+ const char* file,
+ int line,
+ const char* message)
+ : data_(new AssertHelperData(type, file, line, message)) {
+}
+
+AssertHelper::~AssertHelper() {
+ delete data_;
+}
+
+// Message assignment, for assertion streaming support.
+void AssertHelper::operator=(const Message& message) const {
+ UnitTest::GetInstance()->
+ AddTestPartResult(data_->type, data_->file, data_->line,
+ AppendUserMessage(data_->message, message),
+ UnitTest::GetInstance()->impl()
+ ->CurrentOsStackTraceExceptTop(1)
+ // Skips the stack frame for this function itself.
+ ); // NOLINT
+}
+
+// Mutex for linked pointers.
+GTEST_API_ GTEST_DEFINE_STATIC_MUTEX_(g_linked_ptr_mutex);
+
+// Application pathname gotten in InitGoogleTest.
+std::string g_executable_path;
+
+// Returns the current application's name, removing directory path if that
+// is present.
+FilePath GetCurrentExecutableName() {
+ FilePath result;
+
+#if GTEST_OS_WINDOWS
+ result.Set(FilePath(g_executable_path).RemoveExtension("exe"));
+#else
+ result.Set(FilePath(g_executable_path));
+#endif // GTEST_OS_WINDOWS
+
+ return result.RemoveDirectoryName();
+}
+
+// Functions for processing the gtest_output flag.
+
+// Returns the output format, or "" for normal printed output.
+std::string UnitTestOptions::GetOutputFormat() {
+ const char* const gtest_output_flag = GTEST_FLAG(output).c_str();
+ if (gtest_output_flag == NULL) return std::string("");
+
+ const char* const colon = strchr(gtest_output_flag, ':');
+ return (colon == NULL) ?
+ std::string(gtest_output_flag) :
+ std::string(gtest_output_flag, colon - gtest_output_flag);
+}
+
+// Returns the name of the requested output file, or the default if none
+// was explicitly specified.
+std::string UnitTestOptions::GetAbsolutePathToOutputFile() {
+ const char* const gtest_output_flag = GTEST_FLAG(output).c_str();
+ if (gtest_output_flag == NULL)
+ return "";
+
+ const char* const colon = strchr(gtest_output_flag, ':');
+ if (colon == NULL)
+ return internal::FilePath::ConcatPaths(
+ internal::FilePath(
+ UnitTest::GetInstance()->original_working_dir()),
+ internal::FilePath(kDefaultOutputFile)).string();
+
+ internal::FilePath output_name(colon + 1);
+ if (!output_name.IsAbsolutePath())
+ // TODO(wan@google.com): on Windows \some\path is not an absolute
+ // path (as its meaning depends on the current drive), yet the
+ // following logic for turning it into an absolute path is wrong.
+ // Fix it.
+ output_name = internal::FilePath::ConcatPaths(
+ internal::FilePath(UnitTest::GetInstance()->original_working_dir()),
+ internal::FilePath(colon + 1));
+
+ if (!output_name.IsDirectory())
+ return output_name.string();
+
+ internal::FilePath result(internal::FilePath::GenerateUniqueFileName(
+ output_name, internal::GetCurrentExecutableName(),
+ GetOutputFormat().c_str()));
+ return result.string();
+}
+
+// Returns true iff the wildcard pattern matches the string. The
+// first ':' or '\0' character in pattern marks the end of it.
+//
+// This recursive algorithm isn't very efficient, but is clear and
+// works well enough for matching test names, which are short.
+bool UnitTestOptions::PatternMatchesString(const char *pattern,
+ const char *str) {
+ switch (*pattern) {
+ case '\0':
+ case ':': // Either ':' or '\0' marks the end of the pattern.
+ return *str == '\0';
+ case '?': // Matches any single character.
+ return *str != '\0' && PatternMatchesString(pattern + 1, str + 1);
+ case '*': // Matches any string (possibly empty) of characters.
+ return (*str != '\0' && PatternMatchesString(pattern, str + 1)) ||
+ PatternMatchesString(pattern + 1, str);
+ default: // Non-special character. Matches itself.
+ return *pattern == *str &&
+ PatternMatchesString(pattern + 1, str + 1);
+ }
+}
+
+bool UnitTestOptions::MatchesFilter(
+ const std::string& name, const char* filter) {
+ const char *cur_pattern = filter;
+ for (;;) {
+ if (PatternMatchesString(cur_pattern, name.c_str())) {
+ return true;
+ }
+
+ // Finds the next pattern in the filter.
+ cur_pattern = strchr(cur_pattern, ':');
+
+ // Returns if no more pattern can be found.
+ if (cur_pattern == NULL) {
+ return false;
+ }
+
+ // Skips the pattern separater (the ':' character).
+ cur_pattern++;
+ }
+}
+
+// Returns true iff the user-specified filter matches the test case
+// name and the test name.
+bool UnitTestOptions::FilterMatchesTest(const std::string &test_case_name,
+ const std::string &test_name) {
+ const std::string& full_name = test_case_name + "." + test_name.c_str();
+
+ // Split --gtest_filter at '-', if there is one, to separate into
+ // positive filter and negative filter portions
+ const char* const p = GTEST_FLAG(filter).c_str();
+ const char* const dash = strchr(p, '-');
+ std::string positive;
+ std::string negative;
+ if (dash == NULL) {
+ positive = GTEST_FLAG(filter).c_str(); // Whole string is a positive filter
+ negative = "";
+ } else {
+ positive = std::string(p, dash); // Everything up to the dash
+ negative = std::string(dash + 1); // Everything after the dash
+ if (positive.empty()) {
+ // Treat '-test1' as the same as '*-test1'
+ positive = kUniversalFilter;
+ }
+ }
+
+ // A filter is a colon-separated list of patterns. It matches a
+ // test if any pattern in it matches the test.
+ return (MatchesFilter(full_name, positive.c_str()) &&
+ !MatchesFilter(full_name, negative.c_str()));
+}
+
+#if GTEST_HAS_SEH
+// Returns EXCEPTION_EXECUTE_HANDLER if Google Test should handle the
+// given SEH exception, or EXCEPTION_CONTINUE_SEARCH otherwise.
+// This function is useful as an __except condition.
+int UnitTestOptions::GTestShouldProcessSEH(DWORD exception_code) {
+ // Google Test should handle a SEH exception if:
+ // 1. the user wants it to, AND
+ // 2. this is not a breakpoint exception, AND
+ // 3. this is not a C++ exception (VC++ implements them via SEH,
+ // apparently).
+ //
+ // SEH exception code for C++ exceptions.
+ // (see http://support.microsoft.com/kb/185294 for more information).
+ const DWORD kCxxExceptionCode = 0xe06d7363;
+
+ bool should_handle = true;
+
+ if (!GTEST_FLAG(catch_exceptions))
+ should_handle = false;
+ else if (exception_code == EXCEPTION_BREAKPOINT)
+ should_handle = false;
+ else if (exception_code == kCxxExceptionCode)
+ should_handle = false;
+
+ return should_handle ? EXCEPTION_EXECUTE_HANDLER : EXCEPTION_CONTINUE_SEARCH;
+}
+#endif // GTEST_HAS_SEH
+
+} // namespace internal
+
+// The c'tor sets this object as the test part result reporter used by
+// Google Test. The 'result' parameter specifies where to report the
+// results. Intercepts only failures from the current thread.
+ScopedFakeTestPartResultReporter::ScopedFakeTestPartResultReporter(
+ TestPartResultArray* result)
+ : intercept_mode_(INTERCEPT_ONLY_CURRENT_THREAD),
+ result_(result) {
+ Init();
+}
+
+// The c'tor sets this object as the test part result reporter used by
+// Google Test. The 'result' parameter specifies where to report the
+// results.
+ScopedFakeTestPartResultReporter::ScopedFakeTestPartResultReporter(
+ InterceptMode intercept_mode, TestPartResultArray* result)
+ : intercept_mode_(intercept_mode),
+ result_(result) {
+ Init();
+}
+
+void ScopedFakeTestPartResultReporter::Init() {
+ internal::UnitTestImpl* const impl = internal::GetUnitTestImpl();
+ if (intercept_mode_ == INTERCEPT_ALL_THREADS) {
+ old_reporter_ = impl->GetGlobalTestPartResultReporter();
+ impl->SetGlobalTestPartResultReporter(this);
+ } else {
+ old_reporter_ = impl->GetTestPartResultReporterForCurrentThread();
+ impl->SetTestPartResultReporterForCurrentThread(this);
+ }
+}
+
+// The d'tor restores the test part result reporter used by Google Test
+// before.
+ScopedFakeTestPartResultReporter::~ScopedFakeTestPartResultReporter() {
+ internal::UnitTestImpl* const impl = internal::GetUnitTestImpl();
+ if (intercept_mode_ == INTERCEPT_ALL_THREADS) {
+ impl->SetGlobalTestPartResultReporter(old_reporter_);
+ } else {
+ impl->SetTestPartResultReporterForCurrentThread(old_reporter_);
+ }
+}
+
+// Increments the test part result count and remembers the result.
+// This method is from the TestPartResultReporterInterface interface.
+void ScopedFakeTestPartResultReporter::ReportTestPartResult(
+ const TestPartResult& result) {
+ result_->Append(result);
+}
+
+namespace internal {
+
+// Returns the type ID of ::testing::Test. We should always call this
+// instead of GetTypeId< ::testing::Test>() to get the type ID of
+// testing::Test. This is to work around a suspected linker bug when
+// using Google Test as a framework on Mac OS X. The bug causes
+// GetTypeId< ::testing::Test>() to return different values depending
+// on whether the call is from the Google Test framework itself or
+// from user test code. GetTestTypeId() is guaranteed to always
+// return the same value, as it always calls GetTypeId<>() from the
+// gtest.cc, which is within the Google Test framework.
+TypeId GetTestTypeId() {
+ return GetTypeId<Test>();
+}
+
+// The value of GetTestTypeId() as seen from within the Google Test
+// library. This is solely for testing GetTestTypeId().
+extern const TypeId kTestTypeIdInGoogleTest = GetTestTypeId();
+
+// This predicate-formatter checks that 'results' contains a test part
+// failure of the given type and that the failure message contains the
+// given substring.
+AssertionResult HasOneFailure(const char* /* results_expr */,
+ const char* /* type_expr */,
+ const char* /* substr_expr */,
+ const TestPartResultArray& results,
+ TestPartResult::Type type,
+ const string& substr) {
+ const std::string expected(type == TestPartResult::kFatalFailure ?
+ "1 fatal failure" :
+ "1 non-fatal failure");
+ Message msg;
+ if (results.size() != 1) {
+ msg << "Expected: " << expected << "\n"
+ << " Actual: " << results.size() << " failures";
+ for (int i = 0; i < results.size(); i++) {
+ msg << "\n" << results.GetTestPartResult(i);
+ }
+ return AssertionFailure() << msg;
+ }
+
+ const TestPartResult& r = results.GetTestPartResult(0);
+ if (r.type() != type) {
+ return AssertionFailure() << "Expected: " << expected << "\n"
+ << " Actual:\n"
+ << r;
+ }
+
+ if (strstr(r.message(), substr.c_str()) == NULL) {
+ return AssertionFailure() << "Expected: " << expected << " containing \""
+ << substr << "\"\n"
+ << " Actual:\n"
+ << r;
+ }
+
+ return AssertionSuccess();
+}
+
+// The constructor of SingleFailureChecker remembers where to look up
+// test part results, what type of failure we expect, and what
+// substring the failure message should contain.
+SingleFailureChecker:: SingleFailureChecker(
+ const TestPartResultArray* results,
+ TestPartResult::Type type,
+ const string& substr)
+ : results_(results),
+ type_(type),
+ substr_(substr) {}
+
+// The destructor of SingleFailureChecker verifies that the given
+// TestPartResultArray contains exactly one failure that has the given
+// type and contains the given substring. If that's not the case, a
+// non-fatal failure will be generated.
+SingleFailureChecker::~SingleFailureChecker() {
+ EXPECT_PRED_FORMAT3(HasOneFailure, *results_, type_, substr_);
+}
+
+DefaultGlobalTestPartResultReporter::DefaultGlobalTestPartResultReporter(
+ UnitTestImpl* unit_test) : unit_test_(unit_test) {}
+
+void DefaultGlobalTestPartResultReporter::ReportTestPartResult(
+ const TestPartResult& result) {
+ unit_test_->current_test_result()->AddTestPartResult(result);
+ unit_test_->listeners()->repeater()->OnTestPartResult(result);
+}
+
+DefaultPerThreadTestPartResultReporter::DefaultPerThreadTestPartResultReporter(
+ UnitTestImpl* unit_test) : unit_test_(unit_test) {}
+
+void DefaultPerThreadTestPartResultReporter::ReportTestPartResult(
+ const TestPartResult& result) {
+ unit_test_->GetGlobalTestPartResultReporter()->ReportTestPartResult(result);
+}
+
+// Returns the global test part result reporter.
+TestPartResultReporterInterface*
+UnitTestImpl::GetGlobalTestPartResultReporter() {
+ internal::MutexLock lock(&global_test_part_result_reporter_mutex_);
+ return global_test_part_result_repoter_;
+}
+
+// Sets the global test part result reporter.
+void UnitTestImpl::SetGlobalTestPartResultReporter(
+ TestPartResultReporterInterface* reporter) {
+ internal::MutexLock lock(&global_test_part_result_reporter_mutex_);
+ global_test_part_result_repoter_ = reporter;
+}
+
+// Returns the test part result reporter for the current thread.
+TestPartResultReporterInterface*
+UnitTestImpl::GetTestPartResultReporterForCurrentThread() {
+ return per_thread_test_part_result_reporter_.get();
+}
+
+// Sets the test part result reporter for the current thread.
+void UnitTestImpl::SetTestPartResultReporterForCurrentThread(
+ TestPartResultReporterInterface* reporter) {
+ per_thread_test_part_result_reporter_.set(reporter);
+}
+
+// Gets the number of successful test cases.
+int UnitTestImpl::successful_test_case_count() const {
+ return CountIf(test_cases_, TestCasePassed);
+}
+
+// Gets the number of failed test cases.
+int UnitTestImpl::failed_test_case_count() const {
+ return CountIf(test_cases_, TestCaseFailed);
+}
+
+// Gets the number of all test cases.
+int UnitTestImpl::total_test_case_count() const {
+ return static_cast<int>(test_cases_.size());
+}
+
+// Gets the number of all test cases that contain at least one test
+// that should run.
+int UnitTestImpl::test_case_to_run_count() const {
+ return CountIf(test_cases_, ShouldRunTestCase);
+}
+
+// Gets the number of successful tests.
+int UnitTestImpl::successful_test_count() const {
+ return SumOverTestCaseList(test_cases_, &TestCase::successful_test_count);
+}
+
+// Gets the number of failed tests.
+int UnitTestImpl::failed_test_count() const {
+ return SumOverTestCaseList(test_cases_, &TestCase::failed_test_count);
+}
+
+// Gets the number of disabled tests that will be reported in the XML report.
+int UnitTestImpl::reportable_disabled_test_count() const {
+ return SumOverTestCaseList(test_cases_,
+ &TestCase::reportable_disabled_test_count);
+}
+
+// Gets the number of disabled tests.
+int UnitTestImpl::disabled_test_count() const {
+ return SumOverTestCaseList(test_cases_, &TestCase::disabled_test_count);
+}
+
+// Gets the number of tests to be printed in the XML report.
+int UnitTestImpl::reportable_test_count() const {
+ return SumOverTestCaseList(test_cases_, &TestCase::reportable_test_count);
+}
+
+// Gets the number of all tests.
+int UnitTestImpl::total_test_count() const {
+ return SumOverTestCaseList(test_cases_, &TestCase::total_test_count);
+}
+
+// Gets the number of tests that should run.
+int UnitTestImpl::test_to_run_count() const {
+ return SumOverTestCaseList(test_cases_, &TestCase::test_to_run_count);
+}
+
+// Returns the current OS stack trace as an std::string.
+//
+// The maximum number of stack frames to be included is specified by
+// the gtest_stack_trace_depth flag. The skip_count parameter
+// specifies the number of top frames to be skipped, which doesn't
+// count against the number of frames to be included.
+//
+// For example, if Foo() calls Bar(), which in turn calls
+// CurrentOsStackTraceExceptTop(1), Foo() will be included in the
+// trace but Bar() and CurrentOsStackTraceExceptTop() won't.
+std::string UnitTestImpl::CurrentOsStackTraceExceptTop(int skip_count) {
+ (void)skip_count;
+ return "";
+}
+
+// Returns the current time in milliseconds.
+TimeInMillis GetTimeInMillis() {
+#if GTEST_OS_WINDOWS_MOBILE || defined(__BORLANDC__)
+ // Difference between 1970-01-01 and 1601-01-01 in milliseconds.
+ // http://analogous.blogspot.com/2005/04/epoch.html
+ const TimeInMillis kJavaEpochToWinFileTimeDelta =
+ static_cast<TimeInMillis>(116444736UL) * 100000UL;
+ const DWORD kTenthMicrosInMilliSecond = 10000;
+
+ SYSTEMTIME now_systime;
+ FILETIME now_filetime;
+ ULARGE_INTEGER now_int64;
+ // TODO(kenton@google.com): Shouldn't this just use
+ // GetSystemTimeAsFileTime()?
+ GetSystemTime(&now_systime);
+ if (SystemTimeToFileTime(&now_systime, &now_filetime)) {
+ now_int64.LowPart = now_filetime.dwLowDateTime;
+ now_int64.HighPart = now_filetime.dwHighDateTime;
+ now_int64.QuadPart = (now_int64.QuadPart / kTenthMicrosInMilliSecond) -
+ kJavaEpochToWinFileTimeDelta;
+ return now_int64.QuadPart;
+ }
+ return 0;
+#elif GTEST_OS_WINDOWS && !GTEST_HAS_GETTIMEOFDAY_
+ __timeb64 now;
+
+# ifdef _MSC_VER
+
+ // MSVC 8 deprecates _ftime64(), so we want to suppress warning 4996
+ // (deprecated function) there.
+ // TODO(kenton@google.com): Use GetTickCount()? Or use
+ // SystemTimeToFileTime()
+# pragma warning(push) // Saves the current warning state.
+# pragma warning(disable:4996) // Temporarily disables warning 4996.
+ _ftime64(&now);
+# pragma warning(pop) // Restores the warning state.
+# else
+
+ _ftime64(&now);
+
+# endif // _MSC_VER
+
+ return static_cast<TimeInMillis>(now.time) * 1000 + now.millitm;
+#elif GTEST_HAS_GETTIMEOFDAY_
+ struct timeval now;
+ gettimeofday(&now, NULL);
+ return static_cast<TimeInMillis>(now.tv_sec) * 1000 + now.tv_usec / 1000;
+#else
+# error "Don't know how to get the current time on your system."
+#endif
+}
+
+// Utilities
+
+// class String.
+
+#if GTEST_OS_WINDOWS_MOBILE
+// Creates a UTF-16 wide string from the given ANSI string, allocating
+// memory using new. The caller is responsible for deleting the return
+// value using delete[]. Returns the wide string, or NULL if the
+// input is NULL.
+LPCWSTR String::AnsiToUtf16(const char* ansi) {
+ if (!ansi) return NULL;
+ const int length = strlen(ansi);
+ const int unicode_length =
+ MultiByteToWideChar(CP_ACP, 0, ansi, length,
+ NULL, 0);
+ WCHAR* unicode = new WCHAR[unicode_length + 1];
+ MultiByteToWideChar(CP_ACP, 0, ansi, length,
+ unicode, unicode_length);
+ unicode[unicode_length] = 0;
+ return unicode;
+}
+
+// Creates an ANSI string from the given wide string, allocating
+// memory using new. The caller is responsible for deleting the return
+// value using delete[]. Returns the ANSI string, or NULL if the
+// input is NULL.
+const char* String::Utf16ToAnsi(LPCWSTR utf16_str) {
+ if (!utf16_str) return NULL;
+ const int ansi_length =
+ WideCharToMultiByte(CP_ACP, 0, utf16_str, -1,
+ NULL, 0, NULL, NULL);
+ char* ansi = new char[ansi_length + 1];
+ WideCharToMultiByte(CP_ACP, 0, utf16_str, -1,
+ ansi, ansi_length, NULL, NULL);
+ ansi[ansi_length] = 0;
+ return ansi;
+}
+
+#endif // GTEST_OS_WINDOWS_MOBILE
+
+// Compares two C strings. Returns true iff they have the same content.
+//
+// Unlike strcmp(), this function can handle NULL argument(s). A NULL
+// C string is considered different to any non-NULL C string,
+// including the empty string.
+bool String::CStringEquals(const char * lhs, const char * rhs) {
+ if ( lhs == NULL ) return rhs == NULL;
+
+ if ( rhs == NULL ) return false;
+
+ return strcmp(lhs, rhs) == 0;
+}
+
+#if GTEST_HAS_STD_WSTRING || GTEST_HAS_GLOBAL_WSTRING
+
+// Converts an array of wide chars to a narrow string using the UTF-8
+// encoding, and streams the result to the given Message object.
+static void StreamWideCharsToMessage(const wchar_t* wstr, size_t length,
+ Message* msg) {
+ for (size_t i = 0; i != length; ) { // NOLINT
+ if (wstr[i] != L'\0') {
+ *msg << WideStringToUtf8(wstr + i, static_cast<int>(length - i));
+ while (i != length && wstr[i] != L'\0')
+ i++;
+ } else {
+ *msg << '\0';
+ i++;
+ }
+ }
+}
+
+#endif // GTEST_HAS_STD_WSTRING || GTEST_HAS_GLOBAL_WSTRING
+
+} // namespace internal
+
+// Constructs an empty Message.
+// We allocate the stringstream separately because otherwise each use of
+// ASSERT/EXPECT in a procedure adds over 200 bytes to the procedure's
+// stack frame leading to huge stack frames in some cases; gcc does not reuse
+// the stack space.
+Message::Message() : ss_(new ::std::stringstream) {
+ // By default, we want there to be enough precision when printing
+ // a double to a Message.
+ *ss_ << std::setprecision(std::numeric_limits<double>::digits10 + 2);
+}
+
+// These two overloads allow streaming a wide C string to a Message
+// using the UTF-8 encoding.
+Message& Message::operator <<(const wchar_t* wide_c_str) {
+ return *this << internal::String::ShowWideCString(wide_c_str);
+}
+Message& Message::operator <<(wchar_t* wide_c_str) {
+ return *this << internal::String::ShowWideCString(wide_c_str);
+}
+
+#if GTEST_HAS_STD_WSTRING
+// Converts the given wide string to a narrow string using the UTF-8
+// encoding, and streams the result to this Message object.
+Message& Message::operator <<(const ::std::wstring& wstr) {
+ internal::StreamWideCharsToMessage(wstr.c_str(), wstr.length(), this);
+ return *this;
+}
+#endif // GTEST_HAS_STD_WSTRING
+
+#if GTEST_HAS_GLOBAL_WSTRING
+// Converts the given wide string to a narrow string using the UTF-8
+// encoding, and streams the result to this Message object.
+Message& Message::operator <<(const ::wstring& wstr) {
+ internal::StreamWideCharsToMessage(wstr.c_str(), wstr.length(), this);
+ return *this;
+}
+#endif // GTEST_HAS_GLOBAL_WSTRING
+
+// Gets the text streamed to this object so far as an std::string.
+// Each '\0' character in the buffer is replaced with "\\0".
+std::string Message::GetString() const {
+ return internal::StringStreamToString(ss_.get());
+}
+
+// AssertionResult constructors.
+// Used in EXPECT_TRUE/FALSE(assertion_result).
+AssertionResult::AssertionResult(const AssertionResult& other)
+ : success_(other.success_),
+ message_(other.message_.get() != NULL ?
+ new ::std::string(*other.message_) :
+ static_cast< ::std::string*>(NULL)) {
+}
+
+// Returns the assertion's negation. Used with EXPECT/ASSERT_FALSE.
+AssertionResult AssertionResult::operator!() const {
+ AssertionResult negation(!success_);
+ if (message_.get() != NULL)
+ negation << *message_;
+ return negation;
+}
+
+// Makes a successful assertion result.
+AssertionResult AssertionSuccess() {
+ return AssertionResult(true);
+}
+
+// Makes a failed assertion result.
+AssertionResult AssertionFailure() {
+ return AssertionResult(false);
+}
+
+// Makes a failed assertion result with the given failure message.
+// Deprecated; use AssertionFailure() << message.
+AssertionResult AssertionFailure(const Message& message) {
+ return AssertionFailure() << message;
+}
+
+namespace internal {
+
+// Constructs and returns the message for an equality assertion
+// (e.g. ASSERT_EQ, EXPECT_STREQ, etc) failure.
+//
+// The first four parameters are the expressions used in the assertion
+// and their values, as strings. For example, for ASSERT_EQ(foo, bar)
+// where foo is 5 and bar is 6, we have:
+//
+// expected_expression: "foo"
+// actual_expression: "bar"
+// expected_value: "5"
+// actual_value: "6"
+//
+// The ignoring_case parameter is true iff the assertion is a
+// *_STRCASEEQ*. When it's true, the string " (ignoring case)" will
+// be inserted into the message.
+AssertionResult EqFailure(const char* expected_expression,
+ const char* actual_expression,
+ const std::string& expected_value,
+ const std::string& actual_value,
+ bool ignoring_case) {
+ Message msg;
+ msg << "Value of: " << actual_expression;
+ if (actual_value != actual_expression) {
+ msg << "\n Actual: " << actual_value;
+ }
+
+ msg << "\nExpected: " << expected_expression;
+ if (ignoring_case) {
+ msg << " (ignoring case)";
+ }
+ if (expected_value != expected_expression) {
+ msg << "\nWhich is: " << expected_value;
+ }
+
+ return AssertionFailure() << msg;
+}
+
+// Constructs a failure message for Boolean assertions such as EXPECT_TRUE.
+std::string GetBoolAssertionFailureMessage(
+ const AssertionResult& assertion_result,
+ const char* expression_text,
+ const char* actual_predicate_value,
+ const char* expected_predicate_value) {
+ const char* actual_message = assertion_result.message();
+ Message msg;
+ msg << "Value of: " << expression_text
+ << "\n Actual: " << actual_predicate_value;
+ if (actual_message[0] != '\0')
+ msg << " (" << actual_message << ")";
+ msg << "\nExpected: " << expected_predicate_value;
+ return msg.GetString();
+}
+
+// Helper function for implementing ASSERT_NEAR.
+AssertionResult DoubleNearPredFormat(const char* expr1,
+ const char* expr2,
+ const char* abs_error_expr,
+ double val1,
+ double val2,
+ double abs_error) {
+ const double diff = fabs(val1 - val2);
+ if (diff <= abs_error) return AssertionSuccess();
+
+ // TODO(wan): do not print the value of an expression if it's
+ // already a literal.
+ return AssertionFailure()
+ << "The difference between " << expr1 << " and " << expr2
+ << " is " << diff << ", which exceeds " << abs_error_expr << ", where\n"
+ << expr1 << " evaluates to " << val1 << ",\n"
+ << expr2 << " evaluates to " << val2 << ", and\n"
+ << abs_error_expr << " evaluates to " << abs_error << ".";
+}
+
+
+// Helper template for implementing FloatLE() and DoubleLE().
+template <typename RawType>
+AssertionResult FloatingPointLE(const char* expr1,
+ const char* expr2,
+ RawType val1,
+ RawType val2) {
+ // Returns success if val1 is less than val2,
+ if (val1 < val2) {
+ return AssertionSuccess();
+ }
+
+ // or if val1 is almost equal to val2.
+ const FloatingPoint<RawType> lhs(val1), rhs(val2);
+ if (lhs.AlmostEquals(rhs)) {
+ return AssertionSuccess();
+ }
+
+ // Note that the above two checks will both fail if either val1 or
+ // val2 is NaN, as the IEEE floating-point standard requires that
+ // any predicate involving a NaN must return false.
+
+ ::std::stringstream val1_ss;
+ val1_ss << std::setprecision(std::numeric_limits<RawType>::digits10 + 2)
+ << val1;
+
+ ::std::stringstream val2_ss;
+ val2_ss << std::setprecision(std::numeric_limits<RawType>::digits10 + 2)
+ << val2;
+
+ return AssertionFailure()
+ << "Expected: (" << expr1 << ") <= (" << expr2 << ")\n"
+ << " Actual: " << StringStreamToString(&val1_ss) << " vs "
+ << StringStreamToString(&val2_ss);
+}
+
+} // namespace internal
+
+// Asserts that val1 is less than, or almost equal to, val2. Fails
+// otherwise. In particular, it fails if either val1 or val2 is NaN.
+AssertionResult FloatLE(const char* expr1, const char* expr2,
+ float val1, float val2) {
+ return internal::FloatingPointLE<float>(expr1, expr2, val1, val2);
+}
+
+// Asserts that val1 is less than, or almost equal to, val2. Fails
+// otherwise. In particular, it fails if either val1 or val2 is NaN.
+AssertionResult DoubleLE(const char* expr1, const char* expr2,
+ double val1, double val2) {
+ return internal::FloatingPointLE<double>(expr1, expr2, val1, val2);
+}
+
+namespace internal {
+
+// The helper function for {ASSERT|EXPECT}_EQ with int or enum
+// arguments.
+AssertionResult CmpHelperEQ(const char* expected_expression,
+ const char* actual_expression,
+ BiggestInt expected,
+ BiggestInt actual) {
+ if (expected == actual) {
+ return AssertionSuccess();
+ }
+
+ return EqFailure(expected_expression,
+ actual_expression,
+ FormatForComparisonFailureMessage(expected, actual),
+ FormatForComparisonFailureMessage(actual, expected),
+ false);
+}
+
+// A macro for implementing the helper functions needed to implement
+// ASSERT_?? and EXPECT_?? with integer or enum arguments. It is here
+// just to avoid copy-and-paste of similar code.
+#define GTEST_IMPL_CMP_HELPER_(op_name, op)\
+AssertionResult CmpHelper##op_name(const char* expr1, const char* expr2, \
+ BiggestInt val1, BiggestInt val2) {\
+ if (val1 op val2) {\
+ return AssertionSuccess();\
+ } else {\
+ return AssertionFailure() \
+ << "Expected: (" << expr1 << ") " #op " (" << expr2\
+ << "), actual: " << FormatForComparisonFailureMessage(val1, val2)\
+ << " vs " << FormatForComparisonFailureMessage(val2, val1);\
+ }\
+}
+
+// Implements the helper function for {ASSERT|EXPECT}_NE with int or
+// enum arguments.
+GTEST_IMPL_CMP_HELPER_(NE, !=)
+// Implements the helper function for {ASSERT|EXPECT}_LE with int or
+// enum arguments.
+GTEST_IMPL_CMP_HELPER_(LE, <=)
+// Implements the helper function for {ASSERT|EXPECT}_LT with int or
+// enum arguments.
+GTEST_IMPL_CMP_HELPER_(LT, < )
+// Implements the helper function for {ASSERT|EXPECT}_GE with int or
+// enum arguments.
+GTEST_IMPL_CMP_HELPER_(GE, >=)
+// Implements the helper function for {ASSERT|EXPECT}_GT with int or
+// enum arguments.
+GTEST_IMPL_CMP_HELPER_(GT, > )
+
+#undef GTEST_IMPL_CMP_HELPER_
+
+// The helper function for {ASSERT|EXPECT}_STREQ.
+AssertionResult CmpHelperSTREQ(const char* expected_expression,
+ const char* actual_expression,
+ const char* expected,
+ const char* actual) {
+ if (String::CStringEquals(expected, actual)) {
+ return AssertionSuccess();
+ }
+
+ return EqFailure(expected_expression,
+ actual_expression,
+ PrintToString(expected),
+ PrintToString(actual),
+ false);
+}
+
+// The helper function for {ASSERT|EXPECT}_STRCASEEQ.
+AssertionResult CmpHelperSTRCASEEQ(const char* expected_expression,
+ const char* actual_expression,
+ const char* expected,
+ const char* actual) {
+ if (String::CaseInsensitiveCStringEquals(expected, actual)) {
+ return AssertionSuccess();
+ }
+
+ return EqFailure(expected_expression,
+ actual_expression,
+ PrintToString(expected),
+ PrintToString(actual),
+ true);
+}
+
+// The helper function for {ASSERT|EXPECT}_STRNE.
+AssertionResult CmpHelperSTRNE(const char* s1_expression,
+ const char* s2_expression,
+ const char* s1,
+ const char* s2) {
+ if (!String::CStringEquals(s1, s2)) {
+ return AssertionSuccess();
+ } else {
+ return AssertionFailure() << "Expected: (" << s1_expression << ") != ("
+ << s2_expression << "), actual: \""
+ << s1 << "\" vs \"" << s2 << "\"";
+ }
+}
+
+// The helper function for {ASSERT|EXPECT}_STRCASENE.
+AssertionResult CmpHelperSTRCASENE(const char* s1_expression,
+ const char* s2_expression,
+ const char* s1,
+ const char* s2) {
+ if (!String::CaseInsensitiveCStringEquals(s1, s2)) {
+ return AssertionSuccess();
+ } else {
+ return AssertionFailure()
+ << "Expected: (" << s1_expression << ") != ("
+ << s2_expression << ") (ignoring case), actual: \""
+ << s1 << "\" vs \"" << s2 << "\"";
+ }
+}
+
+} // namespace internal
+
+namespace {
+
+// Helper functions for implementing IsSubString() and IsNotSubstring().
+
+// This group of overloaded functions return true iff needle is a
+// substring of haystack. NULL is considered a substring of itself
+// only.
+
+bool IsSubstringPred(const char* needle, const char* haystack) {
+ if (needle == NULL || haystack == NULL)
+ return needle == haystack;
+
+ return strstr(haystack, needle) != NULL;
+}
+
+bool IsSubstringPred(const wchar_t* needle, const wchar_t* haystack) {
+ if (needle == NULL || haystack == NULL)
+ return needle == haystack;
+
+ return wcsstr(haystack, needle) != NULL;
+}
+
+// StringType here can be either ::std::string or ::std::wstring.
+template <typename StringType>
+bool IsSubstringPred(const StringType& needle,
+ const StringType& haystack) {
+ return haystack.find(needle) != StringType::npos;
+}
+
+// This function implements either IsSubstring() or IsNotSubstring(),
+// depending on the value of the expected_to_be_substring parameter.
+// StringType here can be const char*, const wchar_t*, ::std::string,
+// or ::std::wstring.
+template <typename StringType>
+AssertionResult IsSubstringImpl(
+ bool expected_to_be_substring,
+ const char* needle_expr, const char* haystack_expr,
+ const StringType& needle, const StringType& haystack) {
+ if (IsSubstringPred(needle, haystack) == expected_to_be_substring)
+ return AssertionSuccess();
+
+ const bool is_wide_string = sizeof(needle[0]) > 1;
+ const char* const begin_string_quote = is_wide_string ? "L\"" : "\"";
+ return AssertionFailure()
+ << "Value of: " << needle_expr << "\n"
+ << " Actual: " << begin_string_quote << needle << "\"\n"
+ << "Expected: " << (expected_to_be_substring ? "" : "not ")
+ << "a substring of " << haystack_expr << "\n"
+ << "Which is: " << begin_string_quote << haystack << "\"";
+}
+
+} // namespace
+
+// IsSubstring() and IsNotSubstring() check whether needle is a
+// substring of haystack (NULL is considered a substring of itself
+// only), and return an appropriate error message when they fail.
+
+AssertionResult IsSubstring(
+ const char* needle_expr, const char* haystack_expr,
+ const char* needle, const char* haystack) {
+ return IsSubstringImpl(true, needle_expr, haystack_expr, needle, haystack);
+}
+
+AssertionResult IsSubstring(
+ const char* needle_expr, const char* haystack_expr,
+ const wchar_t* needle, const wchar_t* haystack) {
+ return IsSubstringImpl(true, needle_expr, haystack_expr, needle, haystack);
+}
+
+AssertionResult IsNotSubstring(
+ const char* needle_expr, const char* haystack_expr,
+ const char* needle, const char* haystack) {
+ return IsSubstringImpl(false, needle_expr, haystack_expr, needle, haystack);
+}
+
+AssertionResult IsNotSubstring(
+ const char* needle_expr, const char* haystack_expr,
+ const wchar_t* needle, const wchar_t* haystack) {
+ return IsSubstringImpl(false, needle_expr, haystack_expr, needle, haystack);
+}
+
+AssertionResult IsSubstring(
+ const char* needle_expr, const char* haystack_expr,
+ const ::std::string& needle, const ::std::string& haystack) {
+ return IsSubstringImpl(true, needle_expr, haystack_expr, needle, haystack);
+}
+
+AssertionResult IsNotSubstring(
+ const char* needle_expr, const char* haystack_expr,
+ const ::std::string& needle, const ::std::string& haystack) {
+ return IsSubstringImpl(false, needle_expr, haystack_expr, needle, haystack);
+}
+
+#if GTEST_HAS_STD_WSTRING
+AssertionResult IsSubstring(
+ const char* needle_expr, const char* haystack_expr,
+ const ::std::wstring& needle, const ::std::wstring& haystack) {
+ return IsSubstringImpl(true, needle_expr, haystack_expr, needle, haystack);
+}
+
+AssertionResult IsNotSubstring(
+ const char* needle_expr, const char* haystack_expr,
+ const ::std::wstring& needle, const ::std::wstring& haystack) {
+ return IsSubstringImpl(false, needle_expr, haystack_expr, needle, haystack);
+}
+#endif // GTEST_HAS_STD_WSTRING
+
+namespace internal {
+
+#if GTEST_OS_WINDOWS
+
+namespace {
+
+// Helper function for IsHRESULT{SuccessFailure} predicates
+AssertionResult HRESULTFailureHelper(const char* expr,
+ const char* expected,
+ long hr) { // NOLINT
+# if GTEST_OS_WINDOWS_MOBILE
+
+ // Windows CE doesn't support FormatMessage.
+ const char error_text[] = "";
+
+# else
+
+ // Looks up the human-readable system message for the HRESULT code
+ // and since we're not passing any params to FormatMessage, we don't
+ // want inserts expanded.
+ const DWORD kFlags = FORMAT_MESSAGE_FROM_SYSTEM |
+ FORMAT_MESSAGE_IGNORE_INSERTS;
+ const DWORD kBufSize = 4096;
+ // Gets the system's human readable message string for this HRESULT.
+ char error_text[kBufSize] = { '\0' };
+ DWORD message_length = ::FormatMessageA(kFlags,
+ 0, // no source, we're asking system
+ hr, // the error
+ 0, // no line width restrictions
+ error_text, // output buffer
+ kBufSize, // buf size
+ NULL); // no arguments for inserts
+ // Trims tailing white space (FormatMessage leaves a trailing CR-LF)
+ for (; message_length && IsSpace(error_text[message_length - 1]);
+ --message_length) {
+ error_text[message_length - 1] = '\0';
+ }
+
+# endif // GTEST_OS_WINDOWS_MOBILE
+
+ const std::string error_hex("0x" + String::FormatHexInt(hr));
+ return ::testing::AssertionFailure()
+ << "Expected: " << expr << " " << expected << ".\n"
+ << " Actual: " << error_hex << " " << error_text << "\n";
+}
+
+} // namespace
+
+AssertionResult IsHRESULTSuccess(const char* expr, long hr) { // NOLINT
+ if (SUCCEEDED(hr)) {
+ return AssertionSuccess();
+ }
+ return HRESULTFailureHelper(expr, "succeeds", hr);
+}
+
+AssertionResult IsHRESULTFailure(const char* expr, long hr) { // NOLINT
+ if (FAILED(hr)) {
+ return AssertionSuccess();
+ }
+ return HRESULTFailureHelper(expr, "fails", hr);
+}
+
+#endif // GTEST_OS_WINDOWS
+
+// Utility functions for encoding Unicode text (wide strings) in
+// UTF-8.
+
+// A Unicode code-point can have upto 21 bits, and is encoded in UTF-8
+// like this:
+//
+// Code-point length Encoding
+// 0 - 7 bits 0xxxxxxx
+// 8 - 11 bits 110xxxxx 10xxxxxx
+// 12 - 16 bits 1110xxxx 10xxxxxx 10xxxxxx
+// 17 - 21 bits 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
+
+// The maximum code-point a one-byte UTF-8 sequence can represent.
+const UInt32 kMaxCodePoint1 = (static_cast<UInt32>(1) << 7) - 1;
+
+// The maximum code-point a two-byte UTF-8 sequence can represent.
+const UInt32 kMaxCodePoint2 = (static_cast<UInt32>(1) << (5 + 6)) - 1;
+
+// The maximum code-point a three-byte UTF-8 sequence can represent.
+const UInt32 kMaxCodePoint3 = (static_cast<UInt32>(1) << (4 + 2*6)) - 1;
+
+// The maximum code-point a four-byte UTF-8 sequence can represent.
+const UInt32 kMaxCodePoint4 = (static_cast<UInt32>(1) << (3 + 3*6)) - 1;
+
+// Chops off the n lowest bits from a bit pattern. Returns the n
+// lowest bits. As a side effect, the original bit pattern will be
+// shifted to the right by n bits.
+inline UInt32 ChopLowBits(UInt32* bits, int n) {
+ const UInt32 low_bits = *bits & ((static_cast<UInt32>(1) << n) - 1);
+ *bits >>= n;
+ return low_bits;
+}
+
+// Converts a Unicode code point to a narrow string in UTF-8 encoding.
+// code_point parameter is of type UInt32 because wchar_t may not be
+// wide enough to contain a code point.
+// If the code_point is not a valid Unicode code point
+// (i.e. outside of Unicode range U+0 to U+10FFFF) it will be converted
+// to "(Invalid Unicode 0xXXXXXXXX)".
+std::string CodePointToUtf8(UInt32 code_point) {
+ if (code_point > kMaxCodePoint4) {
+ return "(Invalid Unicode 0x" + String::FormatHexInt(code_point) + ")";
+ }
+
+ char str[5]; // Big enough for the largest valid code point.
+ if (code_point <= kMaxCodePoint1) {
+ str[1] = '\0';
+ str[0] = static_cast<char>(code_point); // 0xxxxxxx
+ } else if (code_point <= kMaxCodePoint2) {
+ str[2] = '\0';
+ str[1] = static_cast<char>(0x80 | ChopLowBits(&code_point, 6)); // 10xxxxxx
+ str[0] = static_cast<char>(0xC0 | code_point); // 110xxxxx
+ } else if (code_point <= kMaxCodePoint3) {
+ str[3] = '\0';
+ str[2] = static_cast<char>(0x80 | ChopLowBits(&code_point, 6)); // 10xxxxxx
+ str[1] = static_cast<char>(0x80 | ChopLowBits(&code_point, 6)); // 10xxxxxx
+ str[0] = static_cast<char>(0xE0 | code_point); // 1110xxxx
+ } else { // code_point <= kMaxCodePoint4
+ str[4] = '\0';
+ str[3] = static_cast<char>(0x80 | ChopLowBits(&code_point, 6)); // 10xxxxxx
+ str[2] = static_cast<char>(0x80 | ChopLowBits(&code_point, 6)); // 10xxxxxx
+ str[1] = static_cast<char>(0x80 | ChopLowBits(&code_point, 6)); // 10xxxxxx
+ str[0] = static_cast<char>(0xF0 | code_point); // 11110xxx
+ }
+ return str;
+}
+
+// The following two functions only make sense if the the system
+// uses UTF-16 for wide string encoding. All supported systems
+// with 16 bit wchar_t (Windows, Cygwin, Symbian OS) do use UTF-16.
+
+// Determines if the arguments constitute UTF-16 surrogate pair
+// and thus should be combined into a single Unicode code point
+// using CreateCodePointFromUtf16SurrogatePair.
+inline bool IsUtf16SurrogatePair(wchar_t first, wchar_t second) {
+ return sizeof(wchar_t) == 2 &&
+ (first & 0xFC00) == 0xD800 && (second & 0xFC00) == 0xDC00;
+}
+
+// Creates a Unicode code point from UTF16 surrogate pair.
+inline UInt32 CreateCodePointFromUtf16SurrogatePair(wchar_t first,
+ wchar_t second) {
+ const UInt32 mask = (1 << 10) - 1;
+ return (sizeof(wchar_t) == 2) ?
+ (((first & mask) << 10) | (second & mask)) + 0x10000 :
+ // This function should not be called when the condition is
+ // false, but we provide a sensible default in case it is.
+ static_cast<UInt32>(first);
+}
+
+// Converts a wide string to a narrow string in UTF-8 encoding.
+// The wide string is assumed to have the following encoding:
+// UTF-16 if sizeof(wchar_t) == 2 (on Windows, Cygwin, Symbian OS)
+// UTF-32 if sizeof(wchar_t) == 4 (on Linux)
+// Parameter str points to a null-terminated wide string.
+// Parameter num_chars may additionally limit the number
+// of wchar_t characters processed. -1 is used when the entire string
+// should be processed.
+// If the string contains code points that are not valid Unicode code points
+// (i.e. outside of Unicode range U+0 to U+10FFFF) they will be output
+// as '(Invalid Unicode 0xXXXXXXXX)'. If the string is in UTF16 encoding
+// and contains invalid UTF-16 surrogate pairs, values in those pairs
+// will be encoded as individual Unicode characters from Basic Normal Plane.
+std::string WideStringToUtf8(const wchar_t* str, int num_chars) {
+ if (num_chars == -1)
+ num_chars = static_cast<int>(wcslen(str));
+
+ ::std::stringstream stream;
+ for (int i = 0; i < num_chars; ++i) {
+ UInt32 unicode_code_point;
+
+ if (str[i] == L'\0') {
+ break;
+ } else if (i + 1 < num_chars && IsUtf16SurrogatePair(str[i], str[i + 1])) {
+ unicode_code_point = CreateCodePointFromUtf16SurrogatePair(str[i],
+ str[i + 1]);
+ i++;
+ } else {
+ unicode_code_point = static_cast<UInt32>(str[i]);
+ }
+
+ stream << CodePointToUtf8(unicode_code_point);
+ }
+ return StringStreamToString(&stream);
+}
+
+// Converts a wide C string to an std::string using the UTF-8 encoding.
+// NULL will be converted to "(null)".
+std::string String::ShowWideCString(const wchar_t * wide_c_str) {
+ if (wide_c_str == NULL) return "(null)";
+
+ return internal::WideStringToUtf8(wide_c_str, -1);
+}
+
+// Compares two wide C strings. Returns true iff they have the same
+// content.
+//
+// Unlike wcscmp(), this function can handle NULL argument(s). A NULL
+// C string is considered different to any non-NULL C string,
+// including the empty string.
+bool String::WideCStringEquals(const wchar_t * lhs, const wchar_t * rhs) {
+ if (lhs == NULL) return rhs == NULL;
+
+ if (rhs == NULL) return false;
+
+ return wcscmp(lhs, rhs) == 0;
+}
+
+// Helper function for *_STREQ on wide strings.
+AssertionResult CmpHelperSTREQ(const char* expected_expression,
+ const char* actual_expression,
+ const wchar_t* expected,
+ const wchar_t* actual) {
+ if (String::WideCStringEquals(expected, actual)) {
+ return AssertionSuccess();
+ }
+
+ return EqFailure(expected_expression,
+ actual_expression,
+ PrintToString(expected),
+ PrintToString(actual),
+ false);
+}
+
+// Helper function for *_STRNE on wide strings.
+AssertionResult CmpHelperSTRNE(const char* s1_expression,
+ const char* s2_expression,
+ const wchar_t* s1,
+ const wchar_t* s2) {
+ if (!String::WideCStringEquals(s1, s2)) {
+ return AssertionSuccess();
+ }
+
+ return AssertionFailure() << "Expected: (" << s1_expression << ") != ("
+ << s2_expression << "), actual: "
+ << PrintToString(s1)
+ << " vs " << PrintToString(s2);
+}
+
+// Compares two C strings, ignoring case. Returns true iff they have
+// the same content.
+//
+// Unlike strcasecmp(), this function can handle NULL argument(s). A
+// NULL C string is considered different to any non-NULL C string,
+// including the empty string.
+bool String::CaseInsensitiveCStringEquals(const char * lhs, const char * rhs) {
+ if (lhs == NULL)
+ return rhs == NULL;
+ if (rhs == NULL)
+ return false;
+ return posix::StrCaseCmp(lhs, rhs) == 0;
+}
+
+ // Compares two wide C strings, ignoring case. Returns true iff they
+ // have the same content.
+ //
+ // Unlike wcscasecmp(), this function can handle NULL argument(s).
+ // A NULL C string is considered different to any non-NULL wide C string,
+ // including the empty string.
+ // NB: The implementations on different platforms slightly differ.
+ // On windows, this method uses _wcsicmp which compares according to LC_CTYPE
+ // environment variable. On GNU platform this method uses wcscasecmp
+ // which compares according to LC_CTYPE category of the current locale.
+ // On MacOS X, it uses towlower, which also uses LC_CTYPE category of the
+ // current locale.
+bool String::CaseInsensitiveWideCStringEquals(const wchar_t* lhs,
+ const wchar_t* rhs) {
+ if (lhs == NULL) return rhs == NULL;
+
+ if (rhs == NULL) return false;
+
+#if GTEST_OS_WINDOWS
+ return _wcsicmp(lhs, rhs) == 0;
+#elif GTEST_OS_LINUX && !GTEST_OS_LINUX_ANDROID
+ return wcscasecmp(lhs, rhs) == 0;
+#else
+ // Android, Mac OS X and Cygwin don't define wcscasecmp.
+ // Other unknown OSes may not define it either.
+ wint_t left, right;
+ do {
+ left = towlower(*lhs++);
+ right = towlower(*rhs++);
+ } while (left && left == right);
+ return left == right;
+#endif // OS selector
+}
+
+// Returns true iff str ends with the given suffix, ignoring case.
+// Any string is considered to end with an empty suffix.
+bool String::EndsWithCaseInsensitive(
+ const std::string& str, const std::string& suffix) {
+ const size_t str_len = str.length();
+ const size_t suffix_len = suffix.length();
+ return (str_len >= suffix_len) &&
+ CaseInsensitiveCStringEquals(str.c_str() + str_len - suffix_len,
+ suffix.c_str());
+}
+
+// Formats an int value as "%02d".
+std::string String::FormatIntWidth2(int value) {
+ std::stringstream ss;
+ ss << std::setfill('0') << std::setw(2) << value;
+ return ss.str();
+}
+
+// Formats an int value as "%X".
+std::string String::FormatHexInt(int value) {
+ std::stringstream ss;
+ ss << std::hex << std::uppercase << value;
+ return ss.str();
+}
+
+// Formats a byte as "%02X".
+std::string String::FormatByte(unsigned char value) {
+ std::stringstream ss;
+ ss << std::setfill('0') << std::setw(2) << std::hex << std::uppercase
+ << static_cast<unsigned int>(value);
+ return ss.str();
+}
+
+// Converts the buffer in a stringstream to an std::string, converting NUL
+// bytes to "\\0" along the way.
+std::string StringStreamToString(::std::stringstream* ss) {
+ const ::std::string& str = ss->str();
+ const char* const start = str.c_str();
+ const char* const end = start + str.length();
+
+ std::string result;
+ result.reserve(2 * (end - start));
+ for (const char* ch = start; ch != end; ++ch) {
+ if (*ch == '\0') {
+ result += "\\0"; // Replaces NUL with "\\0";
+ } else {
+ result += *ch;
+ }
+ }
+
+ return result;
+}
+
+// Appends the user-supplied message to the Google-Test-generated message.
+std::string AppendUserMessage(const std::string& gtest_msg,
+ const Message& user_msg) {
+ // Appends the user message if it's non-empty.
+ const std::string user_msg_string = user_msg.GetString();
+ if (user_msg_string.empty()) {
+ return gtest_msg;
+ }
+
+ return gtest_msg + "\n" + user_msg_string;
+}
+
+} // namespace internal
+
+// class TestResult
+
+// Creates an empty TestResult.
+TestResult::TestResult()
+ : death_test_count_(0),
+ elapsed_time_(0) {
+}
+
+// D'tor.
+TestResult::~TestResult() {
+}
+
+// Returns the i-th test part result among all the results. i can
+// range from 0 to total_part_count() - 1. If i is not in that range,
+// aborts the program.
+const TestPartResult& TestResult::GetTestPartResult(int i) const {
+ if (i < 0 || i >= total_part_count())
+ internal::posix::Abort();
+ return test_part_results_.at(i);
+}
+
+// Returns the i-th test property. i can range from 0 to
+// test_property_count() - 1. If i is not in that range, aborts the
+// program.
+const TestProperty& TestResult::GetTestProperty(int i) const {
+ if (i < 0 || i >= test_property_count())
+ internal::posix::Abort();
+ return test_properties_.at(i);
+}
+
+// Clears the test part results.
+void TestResult::ClearTestPartResults() {
+ test_part_results_.clear();
+}
+
+// Adds a test part result to the list.
+void TestResult::AddTestPartResult(const TestPartResult& test_part_result) {
+ test_part_results_.push_back(test_part_result);
+}
+
+// Adds a test property to the list. If a property with the same key as the
+// supplied property is already represented, the value of this test_property
+// replaces the old value for that key.
+void TestResult::RecordProperty(const std::string& xml_element,
+ const TestProperty& test_property) {
+ if (!ValidateTestProperty(xml_element, test_property)) {
+ return;
+ }
+ internal::MutexLock lock(&test_properites_mutex_);
+ const std::vector<TestProperty>::iterator property_with_matching_key =
+ std::find_if(test_properties_.begin(), test_properties_.end(),
+ internal::TestPropertyKeyIs(test_property.key()));
+ if (property_with_matching_key == test_properties_.end()) {
+ test_properties_.push_back(test_property);
+ return;
+ }
+ property_with_matching_key->SetValue(test_property.value());
+}
+
+// The list of reserved attributes used in the <testsuites> element of XML
+// output.
+static const char* const kReservedTestSuitesAttributes[] = {
+ "disabled",
+ "errors",
+ "failures",
+ "name",
+ "random_seed",
+ "tests",
+ "time",
+ "timestamp"
+};
+
+// The list of reserved attributes used in the <testsuite> element of XML
+// output.
+static const char* const kReservedTestSuiteAttributes[] = {
+ "disabled",
+ "errors",
+ "failures",
+ "name",
+ "tests",
+ "time"
+};
+
+// The list of reserved attributes used in the <testcase> element of XML output.
+static const char* const kReservedTestCaseAttributes[] = {
+ "classname",
+ "name",
+ "status",
+ "time",
+ "type_param",
+ "value_param"
+};
+
+template <int kSize>
+std::vector<std::string> ArrayAsVector(const char* const (&array)[kSize]) {
+ return std::vector<std::string>(array, array + kSize);
+}
+
+static std::vector<std::string> GetReservedAttributesForElement(
+ const std::string& xml_element) {
+ if (xml_element == "testsuites") {
+ return ArrayAsVector(kReservedTestSuitesAttributes);
+ } else if (xml_element == "testsuite") {
+ return ArrayAsVector(kReservedTestSuiteAttributes);
+ } else if (xml_element == "testcase") {
+ return ArrayAsVector(kReservedTestCaseAttributes);
+ } else {
+ GTEST_CHECK_(false) << "Unrecognized xml_element provided: " << xml_element;
+ }
+ // This code is unreachable but some compilers may not realizes that.
+ return std::vector<std::string>();
+}
+
+static std::string FormatWordList(const std::vector<std::string>& words) {
+ Message word_list;
+ for (size_t i = 0; i < words.size(); ++i) {
+ if (i > 0 && words.size() > 2) {
+ word_list << ", ";
+ }
+ if (i == words.size() - 1) {
+ word_list << "and ";
+ }
+ word_list << "'" << words[i] << "'";
+ }
+ return word_list.GetString();
+}
+
+bool ValidateTestPropertyName(const std::string& property_name,
+ const std::vector<std::string>& reserved_names) {
+ if (std::find(reserved_names.begin(), reserved_names.end(), property_name) !=
+ reserved_names.end()) {
+ ADD_FAILURE() << "Reserved key used in RecordProperty(): " << property_name
+ << " (" << FormatWordList(reserved_names)
+ << " are reserved by " << GTEST_NAME_ << ")";
+ return false;
+ }
+ return true;
+}
+
+// Adds a failure if the key is a reserved attribute of the element named
+// xml_element. Returns true if the property is valid.
+bool TestResult::ValidateTestProperty(const std::string& xml_element,
+ const TestProperty& test_property) {
+ return ValidateTestPropertyName(test_property.key(),
+ GetReservedAttributesForElement(xml_element));
+}
+
+// Clears the object.
+void TestResult::Clear() {
+ test_part_results_.clear();
+ test_properties_.clear();
+ death_test_count_ = 0;
+ elapsed_time_ = 0;
+}
+
+// Returns true iff the test failed.
+bool TestResult::Failed() const {
+ for (int i = 0; i < total_part_count(); ++i) {
+ if (GetTestPartResult(i).failed())
+ return true;
+ }
+ return false;
+}
+
+// Returns true iff the test part fatally failed.
+static bool TestPartFatallyFailed(const TestPartResult& result) {
+ return result.fatally_failed();
+}
+
+// Returns true iff the test fatally failed.
+bool TestResult::HasFatalFailure() const {
+ return CountIf(test_part_results_, TestPartFatallyFailed) > 0;
+}
+
+// Returns true iff the test part non-fatally failed.
+static bool TestPartNonfatallyFailed(const TestPartResult& result) {
+ return result.nonfatally_failed();
+}
+
+// Returns true iff the test has a non-fatal failure.
+bool TestResult::HasNonfatalFailure() const {
+ return CountIf(test_part_results_, TestPartNonfatallyFailed) > 0;
+}
+
+// Gets the number of all test parts. This is the sum of the number
+// of successful test parts and the number of failed test parts.
+int TestResult::total_part_count() const {
+ return static_cast<int>(test_part_results_.size());
+}
+
+// Returns the number of the test properties.
+int TestResult::test_property_count() const {
+ return static_cast<int>(test_properties_.size());
+}
+
+// class Test
+
+// Creates a Test object.
+
+// The c'tor saves the values of all Google Test flags.
+Test::Test()
+ : gtest_flag_saver_(new internal::GTestFlagSaver) {
+}
+
+// The d'tor restores the values of all Google Test flags.
+Test::~Test() {
+ delete gtest_flag_saver_;
+}
+
+// Sets up the test fixture.
+//
+// A sub-class may override this.
+void Test::SetUp() {
+}
+
+// Tears down the test fixture.
+//
+// A sub-class may override this.
+void Test::TearDown() {
+}
+
+// Allows user supplied key value pairs to be recorded for later output.
+void Test::RecordProperty(const std::string& key, const std::string& value) {
+ UnitTest::GetInstance()->RecordProperty(key, value);
+}
+
+// Allows user supplied key value pairs to be recorded for later output.
+void Test::RecordProperty(const std::string& key, int value) {
+ Message value_message;
+ value_message << value;
+ RecordProperty(key, value_message.GetString().c_str());
+}
+
+namespace internal {
+
+void ReportFailureInUnknownLocation(TestPartResult::Type result_type,
+ const std::string& message) {
+ // This function is a friend of UnitTest and as such has access to
+ // AddTestPartResult.
+ UnitTest::GetInstance()->AddTestPartResult(
+ result_type,
+ NULL, // No info about the source file where the exception occurred.
+ -1, // We have no info on which line caused the exception.
+ message,
+ ""); // No stack trace, either.
+}
+
+} // namespace internal
+
+// Google Test requires all tests in the same test case to use the same test
+// fixture class. This function checks if the current test has the
+// same fixture class as the first test in the current test case. If
+// yes, it returns true; otherwise it generates a Google Test failure and
+// returns false.
+bool Test::HasSameFixtureClass() {
+ internal::UnitTestImpl* const impl = internal::GetUnitTestImpl();
+ const TestCase* const test_case = impl->current_test_case();
+
+ // Info about the first test in the current test case.
+ const TestInfo* const first_test_info = test_case->test_info_list()[0];
+ const internal::TypeId first_fixture_id = first_test_info->fixture_class_id_;
+ const char* const first_test_name = first_test_info->name();
+
+ // Info about the current test.
+ const TestInfo* const this_test_info = impl->current_test_info();
+ const internal::TypeId this_fixture_id = this_test_info->fixture_class_id_;
+ const char* const this_test_name = this_test_info->name();
+
+ if (this_fixture_id != first_fixture_id) {
+ // Is the first test defined using TEST?
+ const bool first_is_TEST = first_fixture_id == internal::GetTestTypeId();
+ // Is this test defined using TEST?
+ const bool this_is_TEST = this_fixture_id == internal::GetTestTypeId();
+
+ if (first_is_TEST || this_is_TEST) {
+ // The user mixed TEST and TEST_F in this test case - we'll tell
+ // him/her how to fix it.
+
+ // Gets the name of the TEST and the name of the TEST_F. Note
+ // that first_is_TEST and this_is_TEST cannot both be true, as
+ // the fixture IDs are different for the two tests.
+ const char* const TEST_name =
+ first_is_TEST ? first_test_name : this_test_name;
+ const char* const TEST_F_name =
+ first_is_TEST ? this_test_name : first_test_name;
+
+ ADD_FAILURE()
+ << "All tests in the same test case must use the same test fixture\n"
+ << "class, so mixing TEST_F and TEST in the same test case is\n"
+ << "illegal. In test case " << this_test_info->test_case_name()
+ << ",\n"
+ << "test " << TEST_F_name << " is defined using TEST_F but\n"
+ << "test " << TEST_name << " is defined using TEST. You probably\n"
+ << "want to change the TEST to TEST_F or move it to another test\n"
+ << "case.";
+ } else {
+ // The user defined two fixture classes with the same name in
+ // two namespaces - we'll tell him/her how to fix it.
+ ADD_FAILURE()
+ << "All tests in the same test case must use the same test fixture\n"
+ << "class. However, in test case "
+ << this_test_info->test_case_name() << ",\n"
+ << "you defined test " << first_test_name
+ << " and test " << this_test_name << "\n"
+ << "using two different test fixture classes. This can happen if\n"
+ << "the two classes are from different namespaces or translation\n"
+ << "units and have the same name. You should probably rename one\n"
+ << "of the classes to put the tests into different test cases.";
+ }
+ return false;
+ }
+
+ return true;
+}
+
+#if GTEST_HAS_SEH
+
+// Adds an "exception thrown" fatal failure to the current test. This
+// function returns its result via an output parameter pointer because VC++
+// prohibits creation of objects with destructors on stack in functions
+// using __try (see error C2712).
+static std::string* FormatSehExceptionMessage(DWORD exception_code,
+ const char* location) {
+ Message message;
+ message << "SEH exception with code 0x" << std::setbase(16) <<
+ exception_code << std::setbase(10) << " thrown in " << location << ".";
+
+ return new std::string(message.GetString());
+}
+
+#endif // GTEST_HAS_SEH
+
+namespace internal {
+
+#if GTEST_HAS_EXCEPTIONS
+
+// Adds an "exception thrown" fatal failure to the current test.
+static std::string FormatCxxExceptionMessage(const char* description,
+ const char* location) {
+ Message message;
+ if (description != NULL) {
+ message << "C++ exception with description \"" << description << "\"";
+ } else {
+ message << "Unknown C++ exception";
+ }
+ message << " thrown in " << location << ".";
+
+ return message.GetString();
+}
+
+static std::string PrintTestPartResultToString(
+ const TestPartResult& test_part_result);
+
+GoogleTestFailureException::GoogleTestFailureException(
+ const TestPartResult& failure)
+ : ::std::runtime_error(PrintTestPartResultToString(failure).c_str()) {}
+
+#endif // GTEST_HAS_EXCEPTIONS
+
+// We put these helper functions in the internal namespace as IBM's xlC
+// compiler rejects the code if they were declared static.
+
+// Runs the given method and handles SEH exceptions it throws, when
+// SEH is supported; returns the 0-value for type Result in case of an
+// SEH exception. (Microsoft compilers cannot handle SEH and C++
+// exceptions in the same function. Therefore, we provide a separate
+// wrapper function for handling SEH exceptions.)
+template <class T, typename Result>
+Result HandleSehExceptionsInMethodIfSupported(
+ T* object, Result (T::*method)(), const char* location) {
+#if GTEST_HAS_SEH
+ __try {
+ return (object->*method)();
+ } __except (internal::UnitTestOptions::GTestShouldProcessSEH( // NOLINT
+ GetExceptionCode())) {
+ // We create the exception message on the heap because VC++ prohibits
+ // creation of objects with destructors on stack in functions using __try
+ // (see error C2712).
+ std::string* exception_message = FormatSehExceptionMessage(
+ GetExceptionCode(), location);
+ internal::ReportFailureInUnknownLocation(TestPartResult::kFatalFailure,
+ *exception_message);
+ delete exception_message;
+ return static_cast<Result>(0);
+ }
+#else
+ (void)location;
+ return (object->*method)();
+#endif // GTEST_HAS_SEH
+}
+
+// Runs the given method and catches and reports C++ and/or SEH-style
+// exceptions, if they are supported; returns the 0-value for type
+// Result in case of an SEH exception.
+template <class T, typename Result>
+Result HandleExceptionsInMethodIfSupported(
+ T* object, Result (T::*method)(), const char* location) {
+ // NOTE: The user code can affect the way in which Google Test handles
+ // exceptions by setting GTEST_FLAG(catch_exceptions), but only before
+ // RUN_ALL_TESTS() starts. It is technically possible to check the flag
+ // after the exception is caught and either report or re-throw the
+ // exception based on the flag's value:
+ //
+ // try {
+ // // Perform the test method.
+ // } catch (...) {
+ // if (GTEST_FLAG(catch_exceptions))
+ // // Report the exception as failure.
+ // else
+ // throw; // Re-throws the original exception.
+ // }
+ //
+ // However, the purpose of this flag is to allow the program to drop into
+ // the debugger when the exception is thrown. On most platforms, once the
+ // control enters the catch block, the exception origin information is
+ // lost and the debugger will stop the program at the point of the
+ // re-throw in this function -- instead of at the point of the original
+ // throw statement in the code under test. For this reason, we perform
+ // the check early, sacrificing the ability to affect Google Test's
+ // exception handling in the method where the exception is thrown.
+ if (internal::GetUnitTestImpl()->catch_exceptions()) {
+#if GTEST_HAS_EXCEPTIONS
+ try {
+ return HandleSehExceptionsInMethodIfSupported(object, method, location);
+ } catch (const internal::GoogleTestFailureException&) { // NOLINT
+ // This exception type can only be thrown by a failed Google
+ // Test assertion with the intention of letting another testing
+ // framework catch it. Therefore we just re-throw it.
+ throw;
+ } catch (const std::exception& e) { // NOLINT
+ internal::ReportFailureInUnknownLocation(
+ TestPartResult::kFatalFailure,
+ FormatCxxExceptionMessage(e.what(), location));
+ } catch (...) { // NOLINT
+ internal::ReportFailureInUnknownLocation(
+ TestPartResult::kFatalFailure,
+ FormatCxxExceptionMessage(NULL, location));
+ }
+ return static_cast<Result>(0);
+#else
+ return HandleSehExceptionsInMethodIfSupported(object, method, location);
+#endif // GTEST_HAS_EXCEPTIONS
+ } else {
+ return (object->*method)();
+ }
+}
+
+} // namespace internal
+
+// Runs the test and updates the test result.
+void Test::Run() {
+ if (!HasSameFixtureClass()) return;
+
+ internal::UnitTestImpl* const impl = internal::GetUnitTestImpl();
+ impl->os_stack_trace_getter()->UponLeavingGTest();
+ internal::HandleExceptionsInMethodIfSupported(this, &Test::SetUp, "SetUp()");
+ // We will run the test only if SetUp() was successful.
+ if (!HasFatalFailure()) {
+ impl->os_stack_trace_getter()->UponLeavingGTest();
+ internal::HandleExceptionsInMethodIfSupported(
+ this, &Test::TestBody, "the test body");
+ }
+
+ // However, we want to clean up as much as possible. Hence we will
+ // always call TearDown(), even if SetUp() or the test body has
+ // failed.
+ impl->os_stack_trace_getter()->UponLeavingGTest();
+ internal::HandleExceptionsInMethodIfSupported(
+ this, &Test::TearDown, "TearDown()");
+}
+
+// Returns true iff the current test has a fatal failure.
+bool Test::HasFatalFailure() {
+ return internal::GetUnitTestImpl()->current_test_result()->HasFatalFailure();
+}
+
+// Returns true iff the current test has a non-fatal failure.
+bool Test::HasNonfatalFailure() {
+ return internal::GetUnitTestImpl()->current_test_result()->
+ HasNonfatalFailure();
+}
+
+// class TestInfo
+
+// Constructs a TestInfo object. It assumes ownership of the test factory
+// object.
+TestInfo::TestInfo(const std::string& a_test_case_name,
+ const std::string& a_name,
+ const char* a_type_param,
+ const char* a_value_param,
+ internal::TypeId fixture_class_id,
+ internal::TestFactoryBase* factory)
+ : test_case_name_(a_test_case_name),
+ name_(a_name),
+ type_param_(a_type_param ? new std::string(a_type_param) : NULL),
+ value_param_(a_value_param ? new std::string(a_value_param) : NULL),
+ fixture_class_id_(fixture_class_id),
+ should_run_(false),
+ is_disabled_(false),
+ matches_filter_(false),
+ factory_(factory),
+ result_() {}
+
+// Destructs a TestInfo object.
+TestInfo::~TestInfo() { delete factory_; }
+
+namespace internal {
+
+// Creates a new TestInfo object and registers it with Google Test;
+// returns the created object.
+//
+// Arguments:
+//
+// test_case_name: name of the test case
+// name: name of the test
+// type_param: the name of the test's type parameter, or NULL if
+// this is not a typed or a type-parameterized test.
+// value_param: text representation of the test's value parameter,
+// or NULL if this is not a value-parameterized test.
+// fixture_class_id: ID of the test fixture class
+// set_up_tc: pointer to the function that sets up the test case
+// tear_down_tc: pointer to the function that tears down the test case
+// factory: pointer to the factory that creates a test object.
+// The newly created TestInfo instance will assume
+// ownership of the factory object.
+TestInfo* MakeAndRegisterTestInfo(
+ const char* test_case_name,
+ const char* name,
+ const char* type_param,
+ const char* value_param,
+ TypeId fixture_class_id,
+ SetUpTestCaseFunc set_up_tc,
+ TearDownTestCaseFunc tear_down_tc,
+ TestFactoryBase* factory) {
+ TestInfo* const test_info =
+ new TestInfo(test_case_name, name, type_param, value_param,
+ fixture_class_id, factory);
+ GetUnitTestImpl()->AddTestInfo(set_up_tc, tear_down_tc, test_info);
+ return test_info;
+}
+
+#if GTEST_HAS_PARAM_TEST
+void ReportInvalidTestCaseType(const char* test_case_name,
+ const char* file, int line) {
+ Message errors;
+ errors
+ << "Attempted redefinition of test case " << test_case_name << ".\n"
+ << "All tests in the same test case must use the same test fixture\n"
+ << "class. However, in test case " << test_case_name << ", you tried\n"
+ << "to define a test using a fixture class different from the one\n"
+ << "used earlier. This can happen if the two fixture classes are\n"
+ << "from different namespaces and have the same name. You should\n"
+ << "probably rename one of the classes to put the tests into different\n"
+ << "test cases.";
+
+ fprintf(stderr, "%s %s", FormatFileLocation(file, line).c_str(),
+ errors.GetString().c_str());
+}
+#endif // GTEST_HAS_PARAM_TEST
+
+} // namespace internal
+
+namespace {
+
+// A predicate that checks the test name of a TestInfo against a known
+// value.
+//
+// This is used for implementation of the TestCase class only. We put
+// it in the anonymous namespace to prevent polluting the outer
+// namespace.
+//
+// TestNameIs is copyable.
+
+//Commenting out this class since its not used and wherefor produces warnings
+// class TestNameIs {
+// public:
+// // Constructor.
+// //
+// // TestNameIs has NO default constructor.
+// explicit TestNameIs(const char* name)
+// : name_(name) {}
+//
+// // Returns true iff the test name of test_info matches name_.
+// bool operator()(const TestInfo * test_info) const {
+// return test_info && test_info->name() == name_;
+// }
+//
+// private:
+// std::string name_;
+//};
+
+} // namespace
+
+namespace internal {
+
+// This method expands all parameterized tests registered with macros TEST_P
+// and INSTANTIATE_TEST_CASE_P into regular tests and registers those.
+// This will be done just once during the program runtime.
+void UnitTestImpl::RegisterParameterizedTests() {
+#if GTEST_HAS_PARAM_TEST
+ if (!parameterized_tests_registered_) {
+ parameterized_test_registry_.RegisterTests();
+ parameterized_tests_registered_ = true;
+ }
+#endif
+}
+
+} // namespace internal
+
+// Creates the test object, runs it, records its result, and then
+// deletes it.
+void TestInfo::Run() {
+ if (!should_run_) return;
+
+ // Tells UnitTest where to store test result.
+ internal::UnitTestImpl* const impl = internal::GetUnitTestImpl();
+ impl->set_current_test_info(this);
+
+ TestEventListener* repeater = UnitTest::GetInstance()->listeners().repeater();
+
+ // Notifies the unit test event listeners that a test is about to start.
+ repeater->OnTestStart(*this);
+
+ const TimeInMillis start = internal::GetTimeInMillis();
+
+ impl->os_stack_trace_getter()->UponLeavingGTest();
+
+ // Creates the test object.
+ Test* const test = internal::HandleExceptionsInMethodIfSupported(
+ factory_, &internal::TestFactoryBase::CreateTest,
+ "the test fixture's constructor");
+
+ // Runs the test only if the test object was created and its
+ // constructor didn't generate a fatal failure.
+ if ((test != NULL) && !Test::HasFatalFailure()) {
+ // This doesn't throw as all user code that can throw are wrapped into
+ // exception handling code.
+ test->Run();
+ }
+
+ // Deletes the test object.
+ impl->os_stack_trace_getter()->UponLeavingGTest();
+ internal::HandleExceptionsInMethodIfSupported(
+ test, &Test::DeleteSelf_, "the test fixture's destructor");
+
+ result_.set_elapsed_time(internal::GetTimeInMillis() - start);
+
+ // Notifies the unit test event listener that a test has just finished.
+ repeater->OnTestEnd(*this);
+
+ // Tells UnitTest to stop associating assertion results to this
+ // test.
+ impl->set_current_test_info(NULL);
+}
+
+// class TestCase
+
+// Gets the number of successful tests in this test case.
+int TestCase::successful_test_count() const {
+ return CountIf(test_info_list_, TestPassed);
+}
+
+// Gets the number of failed tests in this test case.
+int TestCase::failed_test_count() const {
+ return CountIf(test_info_list_, TestFailed);
+}
+
+// Gets the number of disabled tests that will be reported in the XML report.
+int TestCase::reportable_disabled_test_count() const {
+ return CountIf(test_info_list_, TestReportableDisabled);
+}
+
+// Gets the number of disabled tests in this test case.
+int TestCase::disabled_test_count() const {
+ return CountIf(test_info_list_, TestDisabled);
+}
+
+// Gets the number of tests to be printed in the XML report.
+int TestCase::reportable_test_count() const {
+ return CountIf(test_info_list_, TestReportable);
+}
+
+// Get the number of tests in this test case that should run.
+int TestCase::test_to_run_count() const {
+ return CountIf(test_info_list_, ShouldRunTest);
+}
+
+// Gets the number of all tests.
+int TestCase::total_test_count() const {
+ return static_cast<int>(test_info_list_.size());
+}
+
+// Creates a TestCase with the given name.
+//
+// Arguments:
+//
+// name: name of the test case
+// a_type_param: the name of the test case's type parameter, or NULL if
+// this is not a typed or a type-parameterized test case.
+// set_up_tc: pointer to the function that sets up the test case
+// tear_down_tc: pointer to the function that tears down the test case
+TestCase::TestCase(const char* a_name, const char* a_type_param,
+ Test::SetUpTestCaseFunc set_up_tc,
+ Test::TearDownTestCaseFunc tear_down_tc)
+ : name_(a_name),
+ type_param_(a_type_param ? new std::string(a_type_param) : NULL),
+ set_up_tc_(set_up_tc),
+ tear_down_tc_(tear_down_tc),
+ should_run_(false),
+ elapsed_time_(0) {
+}
+
+// Destructor of TestCase.
+TestCase::~TestCase() {
+ // Deletes every Test in the collection.
+ ForEach(test_info_list_, internal::Delete<TestInfo>);
+}
+
+// Returns the i-th test among all the tests. i can range from 0 to
+// total_test_count() - 1. If i is not in that range, returns NULL.
+const TestInfo* TestCase::GetTestInfo(int i) const {
+ const int index = GetElementOr(test_indices_, i, -1);
+ return index < 0 ? NULL : test_info_list_[index];
+}
+
+// Returns the i-th test among all the tests. i can range from 0 to
+// total_test_count() - 1. If i is not in that range, returns NULL.
+TestInfo* TestCase::GetMutableTestInfo(int i) {
+ const int index = GetElementOr(test_indices_, i, -1);
+ return index < 0 ? NULL : test_info_list_[index];
+}
+
+// Adds a test to this test case. Will delete the test upon
+// destruction of the TestCase object.
+void TestCase::AddTestInfo(TestInfo * test_info) {
+ test_info_list_.push_back(test_info);
+ test_indices_.push_back(static_cast<int>(test_indices_.size()));
+}
+
+// Runs every test in this TestCase.
+void TestCase::Run() {
+ if (!should_run_) return;
+
+ internal::UnitTestImpl* const impl = internal::GetUnitTestImpl();
+ impl->set_current_test_case(this);
+
+ TestEventListener* repeater = UnitTest::GetInstance()->listeners().repeater();
+
+ repeater->OnTestCaseStart(*this);
+ impl->os_stack_trace_getter()->UponLeavingGTest();
+ internal::HandleExceptionsInMethodIfSupported(
+ this, &TestCase::RunSetUpTestCase, "SetUpTestCase()");
+
+ const internal::TimeInMillis start = internal::GetTimeInMillis();
+ for (int i = 0; i < total_test_count(); i++) {
+ GetMutableTestInfo(i)->Run();
+ }
+ elapsed_time_ = internal::GetTimeInMillis() - start;
+
+ impl->os_stack_trace_getter()->UponLeavingGTest();
+ internal::HandleExceptionsInMethodIfSupported(
+ this, &TestCase::RunTearDownTestCase, "TearDownTestCase()");
+
+ repeater->OnTestCaseEnd(*this);
+ impl->set_current_test_case(NULL);
+}
+
+// Clears the results of all tests in this test case.
+void TestCase::ClearResult() {
+ ad_hoc_test_result_.Clear();
+ ForEach(test_info_list_, TestInfo::ClearTestResult);
+}
+
+// Shuffles the tests in this test case.
+void TestCase::ShuffleTests(internal::Random* random) {
+ Shuffle(random, &test_indices_);
+}
+
+// Restores the test order to before the first shuffle.
+void TestCase::UnshuffleTests() {
+ for (size_t i = 0; i < test_indices_.size(); i++) {
+ test_indices_[i] = static_cast<int>(i);
+ }
+}
+
+// Formats a countable noun. Depending on its quantity, either the
+// singular form or the plural form is used. e.g.
+//
+// FormatCountableNoun(1, "formula", "formuli") returns "1 formula".
+// FormatCountableNoun(5, "book", "books") returns "5 books".
+static std::string FormatCountableNoun(int count,
+ const char * singular_form,
+ const char * plural_form) {
+ return internal::StreamableToString(count) + " " +
+ (count == 1 ? singular_form : plural_form);
+}
+
+// Formats the count of tests.
+static std::string FormatTestCount(int test_count) {
+ return FormatCountableNoun(test_count, "test", "tests");
+}
+
+// Formats the count of test cases.
+static std::string FormatTestCaseCount(int test_case_count) {
+ return FormatCountableNoun(test_case_count, "test case", "test cases");
+}
+
+// Converts a TestPartResult::Type enum to human-friendly string
+// representation. Both kNonFatalFailure and kFatalFailure are translated
+// to "Failure", as the user usually doesn't care about the difference
+// between the two when viewing the test result.
+static const char * TestPartResultTypeToString(TestPartResult::Type type) {
+ switch (type) {
+ case TestPartResult::kSuccess:
+ return "Success";
+
+ case TestPartResult::kNonFatalFailure:
+ case TestPartResult::kFatalFailure:
+#ifdef _MSC_VER
+ return "error: ";
+#else
+ return "Failure\n";
+#endif
+ default:
+ return "Unknown result type";
+ }
+}
+
+namespace internal {
+
+// Prints a TestPartResult to an std::string.
+static std::string PrintTestPartResultToString(
+ const TestPartResult& test_part_result) {
+ return (Message()
+ << internal::FormatFileLocation(test_part_result.file_name(),
+ test_part_result.line_number())
+ << " " << TestPartResultTypeToString(test_part_result.type())
+ << test_part_result.message()).GetString();
+}
+
+// Prints a TestPartResult.
+static void PrintTestPartResult(const TestPartResult& test_part_result) {
+ const std::string& result =
+ PrintTestPartResultToString(test_part_result);
+ printf("%s\n", result.c_str());
+ fflush(stdout);
+ // If the test program runs in Visual Studio or a debugger, the
+ // following statements add the test part result message to the Output
+ // window such that the user can double-click on it to jump to the
+ // corresponding source code location; otherwise they do nothing.
+#if GTEST_OS_WINDOWS && !GTEST_OS_WINDOWS_MOBILE
+ // We don't call OutputDebugString*() on Windows Mobile, as printing
+ // to stdout is done by OutputDebugString() there already - we don't
+ // want the same message printed twice.
+ ::OutputDebugStringA(result.c_str());
+ ::OutputDebugStringA("\n");
+#endif
+}
+
+// class PrettyUnitTestResultPrinter
+
+enum GTestColor {
+ COLOR_DEFAULT,
+ COLOR_RED,
+ COLOR_GREEN,
+ COLOR_YELLOW
+};
+
+#if GTEST_OS_WINDOWS && !GTEST_OS_WINDOWS_MOBILE
+
+// Returns the character attribute for the given color.
+WORD GetColorAttribute(GTestColor color) {
+ switch (color) {
+ case COLOR_RED: return FOREGROUND_RED;
+ case COLOR_GREEN: return FOREGROUND_GREEN;
+ case COLOR_YELLOW: return FOREGROUND_RED | FOREGROUND_GREEN;
+ default: return 0;
+ }
+}
+
+#else
+
+// Returns the ANSI color code for the given color. COLOR_DEFAULT is
+// an invalid input.
+const char* GetAnsiColorCode(GTestColor color) {
+ switch (color) {
+ case COLOR_RED: return "1";
+ case COLOR_GREEN: return "2";
+ case COLOR_YELLOW: return "3";
+ default: return NULL;
+ };
+}
+
+#endif // GTEST_OS_WINDOWS && !GTEST_OS_WINDOWS_MOBILE
+
+// Returns true iff Google Test should use colors in the output.
+bool ShouldUseColor(bool stdout_is_tty) {
+ const char* const gtest_color = GTEST_FLAG(color).c_str();
+
+ if (String::CaseInsensitiveCStringEquals(gtest_color, "auto")) {
+#if GTEST_OS_WINDOWS
+ // On Windows the TERM variable is usually not set, but the
+ // console there does support colors.
+ return stdout_is_tty;
+#else
+ // On non-Windows platforms, we rely on the TERM variable.
+ const char* const term = posix::GetEnv("TERM");
+ const bool term_supports_color =
+ String::CStringEquals(term, "xterm") ||
+ String::CStringEquals(term, "xterm-color") ||
+ String::CStringEquals(term, "xterm-256color") ||
+ String::CStringEquals(term, "screen") ||
+ String::CStringEquals(term, "screen-256color") ||
+ String::CStringEquals(term, "linux") ||
+ String::CStringEquals(term, "cygwin");
+ return stdout_is_tty && term_supports_color;
+#endif // GTEST_OS_WINDOWS
+ }
+
+ return String::CaseInsensitiveCStringEquals(gtest_color, "yes") ||
+ String::CaseInsensitiveCStringEquals(gtest_color, "true") ||
+ String::CaseInsensitiveCStringEquals(gtest_color, "t") ||
+ String::CStringEquals(gtest_color, "1");
+ // We take "yes", "true", "t", and "1" as meaning "yes". If the
+ // value is neither one of these nor "auto", we treat it as "no" to
+ // be conservative.
+}
+
+// Helpers for printing colored strings to stdout. Note that on Windows, we
+// cannot simply emit special characters and have the terminal change colors.
+// This routine must actually emit the characters rather than return a string
+// that would be colored when printed, as can be done on Linux.
+void ColoredPrintf(GTestColor color, const char* fmt, ...) {
+ va_list args;
+ va_start(args, fmt);
+
+#if GTEST_OS_WINDOWS_MOBILE || GTEST_OS_SYMBIAN || GTEST_OS_ZOS || GTEST_OS_IOS
+ const bool use_color = false;
+#else
+ static const bool in_color_mode =
+ ShouldUseColor(posix::IsATTY(posix::FileNo(stdout)) != 0);
+ const bool use_color = in_color_mode && (color != COLOR_DEFAULT);
+#endif // GTEST_OS_WINDOWS_MOBILE || GTEST_OS_SYMBIAN || GTEST_OS_ZOS
+ // The '!= 0' comparison is necessary to satisfy MSVC 7.1.
+
+ if (!use_color) {
+ vprintf(fmt, args);
+ va_end(args);
+ return;
+ }
+
+#if GTEST_OS_WINDOWS && !GTEST_OS_WINDOWS_MOBILE
+ const HANDLE stdout_handle = GetStdHandle(STD_OUTPUT_HANDLE);
+
+ // Gets the current text color.
+ CONSOLE_SCREEN_BUFFER_INFO buffer_info;
+ GetConsoleScreenBufferInfo(stdout_handle, &buffer_info);
+ const WORD old_color_attrs = buffer_info.wAttributes;
+
+ // We need to flush the stream buffers into the console before each
+ // SetConsoleTextAttribute call lest it affect the text that is already
+ // printed but has not yet reached the console.
+ fflush(stdout);
+ SetConsoleTextAttribute(stdout_handle,
+ GetColorAttribute(color) | FOREGROUND_INTENSITY);
+ vprintf(fmt, args);
+
+ fflush(stdout);
+ // Restores the text color.
+ SetConsoleTextAttribute(stdout_handle, old_color_attrs);
+#else
+ printf("\033[0;3%sm", GetAnsiColorCode(color));
+ vprintf(fmt, args);
+ printf("\033[m"); // Resets the terminal to default.
+#endif // GTEST_OS_WINDOWS && !GTEST_OS_WINDOWS_MOBILE
+ va_end(args);
+}
+
+// Text printed in Google Test's text output and --gunit_list_tests
+// output to label the type parameter and value parameter for a test.
+static const char kTypeParamLabel[] = "TypeParam";
+static const char kValueParamLabel[] = "GetParam()";
+
+void PrintFullTestCommentIfPresent(const TestInfo& test_info) {
+ const char* const type_param = test_info.type_param();
+ const char* const value_param = test_info.value_param();
+
+ if (type_param != NULL || value_param != NULL) {
+ printf(", where ");
+ if (type_param != NULL) {
+ printf("%s = %s", kTypeParamLabel, type_param);
+ if (value_param != NULL)
+ printf(" and ");
+ }
+ if (value_param != NULL) {
+ printf("%s = %s", kValueParamLabel, value_param);
+ }
+ }
+}
+
+// This class implements the TestEventListener interface.
+//
+// Class PrettyUnitTestResultPrinter is copyable.
+class PrettyUnitTestResultPrinter : public TestEventListener {
+ public:
+ PrettyUnitTestResultPrinter() {}
+ static void PrintTestName(const char * test_case, const char * test) {
+ printf("%s.%s", test_case, test);
+ }
+
+ // The following methods override what's in the TestEventListener class.
+ virtual void OnTestProgramStart(const UnitTest& /*unit_test*/) {}
+ virtual void OnTestIterationStart(const UnitTest& unit_test, int iteration);
+ virtual void OnEnvironmentsSetUpStart(const UnitTest& unit_test);
+ virtual void OnEnvironmentsSetUpEnd(const UnitTest& /*unit_test*/) {}
+ virtual void OnTestCaseStart(const TestCase& test_case);
+ virtual void OnTestStart(const TestInfo& test_info);
+ virtual void OnTestPartResult(const TestPartResult& result);
+ virtual void OnTestEnd(const TestInfo& test_info);
+ virtual void OnTestCaseEnd(const TestCase& test_case);
+ virtual void OnEnvironmentsTearDownStart(const UnitTest& unit_test);
+ virtual void OnEnvironmentsTearDownEnd(const UnitTest& /*unit_test*/) {}
+ virtual void OnTestIterationEnd(const UnitTest& unit_test, int iteration);
+ virtual void OnTestProgramEnd(const UnitTest& /*unit_test*/) {}
+
+ private:
+ static void PrintFailedTests(const UnitTest& unit_test);
+};
+
+ // Fired before each iteration of tests starts.
+void PrettyUnitTestResultPrinter::OnTestIterationStart(
+ const UnitTest& unit_test, int iteration) {
+ if (GTEST_FLAG(repeat) != 1)
+ printf("\nRepeating all tests (iteration %d) . . .\n\n", iteration + 1);
+
+ const char* const filter = GTEST_FLAG(filter).c_str();
+
+ // Prints the filter if it's not *. This reminds the user that some
+ // tests may be skipped.
+ if (!String::CStringEquals(filter, kUniversalFilter)) {
+ ColoredPrintf(COLOR_YELLOW,
+ "Note: %s filter = %s\n", GTEST_NAME_, filter);
+ }
+
+ if (internal::ShouldShard(kTestTotalShards, kTestShardIndex, false)) {
+ const Int32 shard_index = Int32FromEnvOrDie(kTestShardIndex, -1);
+ ColoredPrintf(COLOR_YELLOW,
+ "Note: This is test shard %d of %s.\n",
+ static_cast<int>(shard_index) + 1,
+ internal::posix::GetEnv(kTestTotalShards));
+ }
+
+ if (GTEST_FLAG(shuffle)) {
+ ColoredPrintf(COLOR_YELLOW,
+ "Note: Randomizing tests' orders with a seed of %d .\n",
+ unit_test.random_seed());
+ }
+
+ ColoredPrintf(COLOR_GREEN, "[==========] ");
+ printf("Running %s from %s.\n",
+ FormatTestCount(unit_test.test_to_run_count()).c_str(),
+ FormatTestCaseCount(unit_test.test_case_to_run_count()).c_str());
+ fflush(stdout);
+}
+
+void PrettyUnitTestResultPrinter::OnEnvironmentsSetUpStart(
+ const UnitTest& /*unit_test*/) {
+ ColoredPrintf(COLOR_GREEN, "[----------] ");
+ printf("Global test environment set-up.\n");
+ fflush(stdout);
+}
+
+void PrettyUnitTestResultPrinter::OnTestCaseStart(const TestCase& test_case) {
+ const std::string counts =
+ FormatCountableNoun(test_case.test_to_run_count(), "test", "tests");
+ ColoredPrintf(COLOR_GREEN, "[----------] ");
+ printf("%s from %s", counts.c_str(), test_case.name());
+ if (test_case.type_param() == NULL) {
+ printf("\n");
+ } else {
+ printf(", where %s = %s\n", kTypeParamLabel, test_case.type_param());
+ }
+ fflush(stdout);
+}
+
+void PrettyUnitTestResultPrinter::OnTestStart(const TestInfo& test_info) {
+ ColoredPrintf(COLOR_GREEN, "[ RUN ] ");
+ PrintTestName(test_info.test_case_name(), test_info.name());
+ printf("\n");
+ fflush(stdout);
+}
+
+// Called after an assertion failure.
+void PrettyUnitTestResultPrinter::OnTestPartResult(
+ const TestPartResult& result) {
+ // If the test part succeeded, we don't need to do anything.
+ if (result.type() == TestPartResult::kSuccess)
+ return;
+
+ // Print failure message from the assertion (e.g. expected this and got that).
+ PrintTestPartResult(result);
+ fflush(stdout);
+}
+
+void PrettyUnitTestResultPrinter::OnTestEnd(const TestInfo& test_info) {
+ if (test_info.result()->Passed()) {
+ ColoredPrintf(COLOR_GREEN, "[ OK ] ");
+ } else {
+ ColoredPrintf(COLOR_RED, "[ FAILED ] ");
+ }
+ PrintTestName(test_info.test_case_name(), test_info.name());
+ if (test_info.result()->Failed())
+ PrintFullTestCommentIfPresent(test_info);
+
+ if (GTEST_FLAG(print_time)) {
+ printf(" (%s ms)\n", internal::StreamableToString(
+ test_info.result()->elapsed_time()).c_str());
+ } else {
+ printf("\n");
+ }
+ fflush(stdout);
+}
+
+void PrettyUnitTestResultPrinter::OnTestCaseEnd(const TestCase& test_case) {
+ if (!GTEST_FLAG(print_time)) return;
+
+ const std::string counts =
+ FormatCountableNoun(test_case.test_to_run_count(), "test", "tests");
+ ColoredPrintf(COLOR_GREEN, "[----------] ");
+ printf("%s from %s (%s ms total)\n\n",
+ counts.c_str(), test_case.name(),
+ internal::StreamableToString(test_case.elapsed_time()).c_str());
+ fflush(stdout);
+}
+
+void PrettyUnitTestResultPrinter::OnEnvironmentsTearDownStart(
+ const UnitTest& /*unit_test*/) {
+ ColoredPrintf(COLOR_GREEN, "[----------] ");
+ printf("Global test environment tear-down\n");
+ fflush(stdout);
+}
+
+// Internal helper for printing the list of failed tests.
+void PrettyUnitTestResultPrinter::PrintFailedTests(const UnitTest& unit_test) {
+ const int failed_test_count = unit_test.failed_test_count();
+ if (failed_test_count == 0) {
+ return;
+ }
+
+ for (int i = 0; i < unit_test.total_test_case_count(); ++i) {
+ const TestCase& test_case = *unit_test.GetTestCase(i);
+ if (!test_case.should_run() || (test_case.failed_test_count() == 0)) {
+ continue;
+ }
+ for (int j = 0; j < test_case.total_test_count(); ++j) {
+ const TestInfo& test_info = *test_case.GetTestInfo(j);
+ if (!test_info.should_run() || test_info.result()->Passed()) {
+ continue;
+ }
+ ColoredPrintf(COLOR_RED, "[ FAILED ] ");
+ printf("%s.%s", test_case.name(), test_info.name());
+ PrintFullTestCommentIfPresent(test_info);
+ printf("\n");
+ }
+ }
+}
+
+void PrettyUnitTestResultPrinter::OnTestIterationEnd(const UnitTest& unit_test,
+ int /*iteration*/) {
+ ColoredPrintf(COLOR_GREEN, "[==========] ");
+ printf("%s from %s ran.",
+ FormatTestCount(unit_test.test_to_run_count()).c_str(),
+ FormatTestCaseCount(unit_test.test_case_to_run_count()).c_str());
+ if (GTEST_FLAG(print_time)) {
+ printf(" (%s ms total)",
+ internal::StreamableToString(unit_test.elapsed_time()).c_str());
+ }
+ printf("\n");
+ ColoredPrintf(COLOR_GREEN, "[ PASSED ] ");
+ printf("%s.\n", FormatTestCount(unit_test.successful_test_count()).c_str());
+
+ int num_failures = unit_test.failed_test_count();
+ if (!unit_test.Passed()) {
+ const int failed_test_count = unit_test.failed_test_count();
+ ColoredPrintf(COLOR_RED, "[ FAILED ] ");
+ printf("%s, listed below:\n", FormatTestCount(failed_test_count).c_str());
+ PrintFailedTests(unit_test);
+ printf("\n%2d FAILED %s\n", num_failures,
+ num_failures == 1 ? "TEST" : "TESTS");
+ }
+
+ int num_disabled = unit_test.reportable_disabled_test_count();
+ if (num_disabled && !GTEST_FLAG(also_run_disabled_tests)) {
+ if (!num_failures) {
+ printf("\n"); // Add a spacer if no FAILURE banner is displayed.
+ }
+ ColoredPrintf(COLOR_YELLOW,
+ " YOU HAVE %d DISABLED %s\n\n",
+ num_disabled,
+ num_disabled == 1 ? "TEST" : "TESTS");
+ }
+ // Ensure that Google Test output is printed before, e.g., heapchecker output.
+ fflush(stdout);
+}
+
+// End PrettyUnitTestResultPrinter
+
+// class TestEventRepeater
+//
+// This class forwards events to other event listeners.
+class TestEventRepeater : public TestEventListener {
+ public:
+ TestEventRepeater() : forwarding_enabled_(true) {}
+ virtual ~TestEventRepeater();
+ void Append(TestEventListener *listener);
+ TestEventListener* Release(TestEventListener* listener);
+
+ // Controls whether events will be forwarded to listeners_. Set to false
+ // in death test child processes.
+ bool forwarding_enabled() const { return forwarding_enabled_; }
+ void set_forwarding_enabled(bool enable) { forwarding_enabled_ = enable; }
+
+ virtual void OnTestProgramStart(const UnitTest& unit_test);
+ virtual void OnTestIterationStart(const UnitTest& unit_test, int iteration);
+ virtual void OnEnvironmentsSetUpStart(const UnitTest& unit_test);
+ virtual void OnEnvironmentsSetUpEnd(const UnitTest& unit_test);
+ virtual void OnTestCaseStart(const TestCase& test_case);
+ virtual void OnTestStart(const TestInfo& test_info);
+ virtual void OnTestPartResult(const TestPartResult& result);
+ virtual void OnTestEnd(const TestInfo& test_info);
+ virtual void OnTestCaseEnd(const TestCase& test_case);
+ virtual void OnEnvironmentsTearDownStart(const UnitTest& unit_test);
+ virtual void OnEnvironmentsTearDownEnd(const UnitTest& unit_test);
+ virtual void OnTestIterationEnd(const UnitTest& unit_test, int iteration);
+ virtual void OnTestProgramEnd(const UnitTest& unit_test);
+
+ private:
+ // Controls whether events will be forwarded to listeners_. Set to false
+ // in death test child processes.
+ bool forwarding_enabled_;
+ // The list of listeners that receive events.
+ std::vector<TestEventListener*> listeners_;
+
+ GTEST_DISALLOW_COPY_AND_ASSIGN_(TestEventRepeater);
+};
+
+TestEventRepeater::~TestEventRepeater() {
+ ForEach(listeners_, Delete<TestEventListener>);
+}
+
+void TestEventRepeater::Append(TestEventListener *listener) {
+ listeners_.push_back(listener);
+}
+
+// TODO(vladl@google.com): Factor the search functionality into Vector::Find.
+TestEventListener* TestEventRepeater::Release(TestEventListener *listener) {
+ for (size_t i = 0; i < listeners_.size(); ++i) {
+ if (listeners_[i] == listener) {
+ listeners_.erase(listeners_.begin() + i);
+ return listener;
+ }
+ }
+
+ return NULL;
+}
+
+// Since most methods are very similar, use macros to reduce boilerplate.
+// This defines a member that forwards the call to all listeners.
+#define GTEST_REPEATER_METHOD_(Name, Type) \
+void TestEventRepeater::Name(const Type& parameter) { \
+ if (forwarding_enabled_) { \
+ for (size_t i = 0; i < listeners_.size(); i++) { \
+ listeners_[i]->Name(parameter); \
+ } \
+ } \
+}
+// This defines a member that forwards the call to all listeners in reverse
+// order.
+#define GTEST_REVERSE_REPEATER_METHOD_(Name, Type) \
+void TestEventRepeater::Name(const Type& parameter) { \
+ if (forwarding_enabled_) { \
+ for (int i = static_cast<int>(listeners_.size()) - 1; i >= 0; i--) { \
+ listeners_[i]->Name(parameter); \
+ } \
+ } \
+}
+
+GTEST_REPEATER_METHOD_(OnTestProgramStart, UnitTest)
+GTEST_REPEATER_METHOD_(OnEnvironmentsSetUpStart, UnitTest)
+GTEST_REPEATER_METHOD_(OnTestCaseStart, TestCase)
+GTEST_REPEATER_METHOD_(OnTestStart, TestInfo)
+GTEST_REPEATER_METHOD_(OnTestPartResult, TestPartResult)
+GTEST_REPEATER_METHOD_(OnEnvironmentsTearDownStart, UnitTest)
+GTEST_REVERSE_REPEATER_METHOD_(OnEnvironmentsSetUpEnd, UnitTest)
+GTEST_REVERSE_REPEATER_METHOD_(OnEnvironmentsTearDownEnd, UnitTest)
+GTEST_REVERSE_REPEATER_METHOD_(OnTestEnd, TestInfo)
+GTEST_REVERSE_REPEATER_METHOD_(OnTestCaseEnd, TestCase)
+GTEST_REVERSE_REPEATER_METHOD_(OnTestProgramEnd, UnitTest)
+
+#undef GTEST_REPEATER_METHOD_
+#undef GTEST_REVERSE_REPEATER_METHOD_
+
+void TestEventRepeater::OnTestIterationStart(const UnitTest& unit_test,
+ int iteration) {
+ if (forwarding_enabled_) {
+ for (size_t i = 0; i < listeners_.size(); i++) {
+ listeners_[i]->OnTestIterationStart(unit_test, iteration);
+ }
+ }
+}
+
+void TestEventRepeater::OnTestIterationEnd(const UnitTest& unit_test,
+ int iteration) {
+ if (forwarding_enabled_) {
+ for (int i = static_cast<int>(listeners_.size()) - 1; i >= 0; i--) {
+ listeners_[i]->OnTestIterationEnd(unit_test, iteration);
+ }
+ }
+}
+
+// End TestEventRepeater
+
+// This class generates an XML output file.
+class XmlUnitTestResultPrinter : public EmptyTestEventListener {
+ public:
+ explicit XmlUnitTestResultPrinter(const char* output_file);
+
+ virtual void OnTestIterationEnd(const UnitTest& unit_test, int iteration);
+
+ private:
+ // Is c a whitespace character that is normalized to a space character
+ // when it appears in an XML attribute value?
+ static bool IsNormalizableWhitespace(char c) {
+ return c == 0x9 || c == 0xA || c == 0xD;
+ }
+
+ // May c appear in a well-formed XML document?
+ static bool IsValidXmlCharacter(char c) {
+ return IsNormalizableWhitespace(c) || c >= 0x20;
+ }
+
+ // Returns an XML-escaped copy of the input string str. If
+ // is_attribute is true, the text is meant to appear as an attribute
+ // value, and normalizable whitespace is preserved by replacing it
+ // with character references.
+ static std::string EscapeXml(const std::string& str, bool is_attribute);
+
+ // Returns the given string with all characters invalid in XML removed.
+ static std::string RemoveInvalidXmlCharacters(const std::string& str);
+
+ // Convenience wrapper around EscapeXml when str is an attribute value.
+ static std::string EscapeXmlAttribute(const std::string& str) {
+ return EscapeXml(str, true);
+ }
+
+ // Convenience wrapper around EscapeXml when str is not an attribute value.
+ static std::string EscapeXmlText(const char* str) {
+ return EscapeXml(str, false);
+ }
+
+ // Verifies that the given attribute belongs to the given element and
+ // streams the attribute as XML.
+ static void OutputXmlAttribute(std::ostream* stream,
+ const std::string& element_name,
+ const std::string& name,
+ const std::string& value);
+
+ // Streams an XML CDATA section, escaping invalid CDATA sequences as needed.
+ static void OutputXmlCDataSection(::std::ostream* stream, const char* data);
+
+ // Streams an XML representation of a TestInfo object.
+ static void OutputXmlTestInfo(::std::ostream* stream,
+ const char* test_case_name,
+ const TestInfo& test_info);
+
+ // Prints an XML representation of a TestCase object
+ static void PrintXmlTestCase(::std::ostream* stream,
+ const TestCase& test_case);
+
+ // Prints an XML summary of unit_test to output stream out.
+ static void PrintXmlUnitTest(::std::ostream* stream,
+ const UnitTest& unit_test);
+
+ // Produces a string representing the test properties in a result as space
+ // delimited XML attributes based on the property key="value" pairs.
+ // When the std::string is not empty, it includes a space at the beginning,
+ // to delimit this attribute from prior attributes.
+ static std::string TestPropertiesAsXmlAttributes(const TestResult& result);
+
+ // The output file.
+ const std::string output_file_;
+
+ GTEST_DISALLOW_COPY_AND_ASSIGN_(XmlUnitTestResultPrinter);
+};
+
+// Creates a new XmlUnitTestResultPrinter.
+XmlUnitTestResultPrinter::XmlUnitTestResultPrinter(const char* output_file)
+ : output_file_(output_file) {
+ if (output_file_.c_str() == NULL || output_file_.empty()) {
+ fprintf(stderr, "XML output file may not be null\n");
+ fflush(stderr);
+ exit(EXIT_FAILURE);
+ }
+}
+
+// Called after the unit test ends.
+void XmlUnitTestResultPrinter::OnTestIterationEnd(const UnitTest& unit_test,
+ int /*iteration*/) {
+ FILE* xmlout = NULL;
+ FilePath output_file(output_file_);
+ FilePath output_dir(output_file.RemoveFileName());
+
+ if (output_dir.CreateDirectoriesRecursively()) {
+ xmlout = posix::FOpen(output_file_.c_str(), "w");
+ }
+ if (xmlout == NULL) {
+ // TODO(wan): report the reason of the failure.
+ //
+ // We don't do it for now as:
+ //
+ // 1. There is no urgent need for it.
+ // 2. It's a bit involved to make the errno variable thread-safe on
+ // all three operating systems (Linux, Windows, and Mac OS).
+ // 3. To interpret the meaning of errno in a thread-safe way,
+ // we need the strerror_r() function, which is not available on
+ // Windows.
+ fprintf(stderr,
+ "Unable to open file \"%s\"\n",
+ output_file_.c_str());
+ fflush(stderr);
+ exit(EXIT_FAILURE);
+ }
+ std::stringstream stream;
+ PrintXmlUnitTest(&stream, unit_test);
+ fprintf(xmlout, "%s", StringStreamToString(&stream).c_str());
+ fclose(xmlout);
+}
+
+// Returns an XML-escaped copy of the input string str. If is_attribute
+// is true, the text is meant to appear as an attribute value, and
+// normalizable whitespace is preserved by replacing it with character
+// references.
+//
+// Invalid XML characters in str, if any, are stripped from the output.
+// It is expected that most, if not all, of the text processed by this
+// module will consist of ordinary English text.
+// If this module is ever modified to produce version 1.1 XML output,
+// most invalid characters can be retained using character references.
+// TODO(wan): It might be nice to have a minimally invasive, human-readable
+// escaping scheme for invalid characters, rather than dropping them.
+std::string XmlUnitTestResultPrinter::EscapeXml(
+ const std::string& str, bool is_attribute) {
+ Message m;
+
+ for (size_t i = 0; i < str.size(); ++i) {
+ const char ch = str[i];
+ switch (ch) {
+ case '<':
+ m << "&lt;";
+ break;
+ case '>':
+ m << "&gt;";
+ break;
+ case '&':
+ m << "&amp;";
+ break;
+ case '\'':
+ if (is_attribute)
+ m << "&apos;";
+ else
+ m << '\'';
+ break;
+ case '"':
+ if (is_attribute)
+ m << "&quot;";
+ else
+ m << '"';
+ break;
+ default:
+ if (IsValidXmlCharacter(ch)) {
+ if (is_attribute && IsNormalizableWhitespace(ch))
+ m << "&#x" << String::FormatByte(static_cast<unsigned char>(ch))
+ << ";";
+ else
+ m << ch;
+ }
+ break;
+ }
+ }
+
+ return m.GetString();
+}
+
+// Returns the given string with all characters invalid in XML removed.
+// Currently invalid characters are dropped from the string. An
+// alternative is to replace them with certain characters such as . or ?.
+std::string XmlUnitTestResultPrinter::RemoveInvalidXmlCharacters(
+ const std::string& str) {
+ std::string output;
+ output.reserve(str.size());
+ for (std::string::const_iterator it = str.begin(); it != str.end(); ++it)
+ if (IsValidXmlCharacter(*it))
+ output.push_back(*it);
+
+ return output;
+}
+
+// The following routines generate an XML representation of a UnitTest
+// object.
+//
+// This is how Google Test concepts map to the DTD:
+//
+// <testsuites name="AllTests"> <-- corresponds to a UnitTest object
+// <testsuite name="testcase-name"> <-- corresponds to a TestCase object
+// <testcase name="test-name"> <-- corresponds to a TestInfo object
+// <failure message="...">...</failure>
+// <failure message="...">...</failure>
+// <failure message="...">...</failure>
+// <-- individual assertion failures
+// </testcase>
+// </testsuite>
+// </testsuites>
+
+// Formats the given time in milliseconds as seconds.
+std::string FormatTimeInMillisAsSeconds(TimeInMillis ms) {
+ ::std::stringstream ss;
+ ss << ms/1000.0;
+ return ss.str();
+}
+
+// Converts the given epoch time in milliseconds to a date string in the ISO
+// 8601 format, without the timezone information.
+std::string FormatEpochTimeInMillisAsIso8601(TimeInMillis ms) {
+ // Using non-reentrant version as localtime_r is not portable.
+ time_t seconds = static_cast<time_t>(ms / 1000);
+#ifdef _MSC_VER
+# pragma warning(push) // Saves the current warning state.
+# pragma warning(disable:4996) // Temporarily disables warning 4996
+ // (function or variable may be unsafe).
+ const struct tm* const time_struct = localtime(&seconds); // NOLINT
+# pragma warning(pop) // Restores the warning state again.
+#else
+ const struct tm* const time_struct = localtime(&seconds); // NOLINT
+#endif
+ if (time_struct == NULL)
+ return ""; // Invalid ms value
+
+ // YYYY-MM-DDThh:mm:ss
+ return StreamableToString(time_struct->tm_year + 1900) + "-" +
+ String::FormatIntWidth2(time_struct->tm_mon + 1) + "-" +
+ String::FormatIntWidth2(time_struct->tm_mday) + "T" +
+ String::FormatIntWidth2(time_struct->tm_hour) + ":" +
+ String::FormatIntWidth2(time_struct->tm_min) + ":" +
+ String::FormatIntWidth2(time_struct->tm_sec);
+}
+
+// Streams an XML CDATA section, escaping invalid CDATA sequences as needed.
+void XmlUnitTestResultPrinter::OutputXmlCDataSection(::std::ostream* stream,
+ const char* data) {
+ const char* segment = data;
+ *stream << "<![CDATA[";
+ for (;;) {
+ const char* const next_segment = strstr(segment, "]]>");
+ if (next_segment != NULL) {
+ stream->write(
+ segment, static_cast<std::streamsize>(next_segment - segment));
+ *stream << "]]>]]&gt;<![CDATA[";
+ segment = next_segment + strlen("]]>");
+ } else {
+ *stream << segment;
+ break;
+ }
+ }
+ *stream << "]]>";
+}
+
+void XmlUnitTestResultPrinter::OutputXmlAttribute(
+ std::ostream* stream,
+ const std::string& element_name,
+ const std::string& name,
+ const std::string& value) {
+ const std::vector<std::string>& allowed_names =
+ GetReservedAttributesForElement(element_name);
+
+ GTEST_CHECK_(std::find(allowed_names.begin(), allowed_names.end(), name) !=
+ allowed_names.end())
+ << "Attribute " << name << " is not allowed for element <" << element_name
+ << ">.";
+
+ *stream << " " << name << "=\"" << EscapeXmlAttribute(value) << "\"";
+}
+
+// Prints an XML representation of a TestInfo object.
+// TODO(wan): There is also value in printing properties with the plain printer.
+void XmlUnitTestResultPrinter::OutputXmlTestInfo(::std::ostream* stream,
+ const char* test_case_name,
+ const TestInfo& test_info) {
+ const TestResult& result = *test_info.result();
+ const std::string kTestcase = "testcase";
+
+ *stream << " <testcase";
+ OutputXmlAttribute(stream, kTestcase, "name", test_info.name());
+
+ if (test_info.value_param() != NULL) {
+ OutputXmlAttribute(stream, kTestcase, "value_param",
+ test_info.value_param());
+ }
+ if (test_info.type_param() != NULL) {
+ OutputXmlAttribute(stream, kTestcase, "type_param", test_info.type_param());
+ }
+
+ OutputXmlAttribute(stream, kTestcase, "status",
+ test_info.should_run() ? "run" : "notrun");
+ OutputXmlAttribute(stream, kTestcase, "time",
+ FormatTimeInMillisAsSeconds(result.elapsed_time()));
+ OutputXmlAttribute(stream, kTestcase, "classname", test_case_name);
+ *stream << TestPropertiesAsXmlAttributes(result);
+
+ int failures = 0;
+ for (int i = 0; i < result.total_part_count(); ++i) {
+ const TestPartResult& part = result.GetTestPartResult(i);
+ if (part.failed()) {
+ if (++failures == 1) {
+ *stream << ">\n";
+ }
+ const string location = internal::FormatCompilerIndependentFileLocation(
+ part.file_name(), part.line_number());
+ const string summary = location + "\n" + part.summary();
+ *stream << " <failure message=\""
+ << EscapeXmlAttribute(summary.c_str())
+ << "\" type=\"\">";
+ const string detail = location + "\n" + part.message();
+ OutputXmlCDataSection(stream, RemoveInvalidXmlCharacters(detail).c_str());
+ *stream << "</failure>\n";
+ }
+ }
+
+ if (failures == 0)
+ *stream << " />\n";
+ else
+ *stream << " </testcase>\n";
+}
+
+// Prints an XML representation of a TestCase object
+void XmlUnitTestResultPrinter::PrintXmlTestCase(std::ostream* stream,
+ const TestCase& test_case) {
+ const std::string kTestsuite = "testsuite";
+ *stream << " <" << kTestsuite;
+ OutputXmlAttribute(stream, kTestsuite, "name", test_case.name());
+ OutputXmlAttribute(stream, kTestsuite, "tests",
+ StreamableToString(test_case.reportable_test_count()));
+ OutputXmlAttribute(stream, kTestsuite, "failures",
+ StreamableToString(test_case.failed_test_count()));
+ OutputXmlAttribute(
+ stream, kTestsuite, "disabled",
+ StreamableToString(test_case.reportable_disabled_test_count()));
+ OutputXmlAttribute(stream, kTestsuite, "errors", "0");
+ OutputXmlAttribute(stream, kTestsuite, "time",
+ FormatTimeInMillisAsSeconds(test_case.elapsed_time()));
+ *stream << TestPropertiesAsXmlAttributes(test_case.ad_hoc_test_result())
+ << ">\n";
+
+ for (int i = 0; i < test_case.total_test_count(); ++i) {
+ if (test_case.GetTestInfo(i)->is_reportable())
+ OutputXmlTestInfo(stream, test_case.name(), *test_case.GetTestInfo(i));
+ }
+ *stream << " </" << kTestsuite << ">\n";
+}
+
+// Prints an XML summary of unit_test to output stream out.
+void XmlUnitTestResultPrinter::PrintXmlUnitTest(std::ostream* stream,
+ const UnitTest& unit_test) {
+ const std::string kTestsuites = "testsuites";
+
+ *stream << "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n";
+ *stream << "<" << kTestsuites;
+
+ OutputXmlAttribute(stream, kTestsuites, "tests",
+ StreamableToString(unit_test.reportable_test_count()));
+ OutputXmlAttribute(stream, kTestsuites, "failures",
+ StreamableToString(unit_test.failed_test_count()));
+ OutputXmlAttribute(
+ stream, kTestsuites, "disabled",
+ StreamableToString(unit_test.reportable_disabled_test_count()));
+ OutputXmlAttribute(stream, kTestsuites, "errors", "0");
+ OutputXmlAttribute(
+ stream, kTestsuites, "timestamp",
+ FormatEpochTimeInMillisAsIso8601(unit_test.start_timestamp()));
+ OutputXmlAttribute(stream, kTestsuites, "time",
+ FormatTimeInMillisAsSeconds(unit_test.elapsed_time()));
+
+ if (GTEST_FLAG(shuffle)) {
+ OutputXmlAttribute(stream, kTestsuites, "random_seed",
+ StreamableToString(unit_test.random_seed()));
+ }
+
+ *stream << TestPropertiesAsXmlAttributes(unit_test.ad_hoc_test_result());
+
+ OutputXmlAttribute(stream, kTestsuites, "name", "AllTests");
+ *stream << ">\n";
+
+ for (int i = 0; i < unit_test.total_test_case_count(); ++i) {
+ if (unit_test.GetTestCase(i)->reportable_test_count() > 0)
+ PrintXmlTestCase(stream, *unit_test.GetTestCase(i));
+ }
+ *stream << "</" << kTestsuites << ">\n";
+}
+
+// Produces a string representing the test properties in a result as space
+// delimited XML attributes based on the property key="value" pairs.
+std::string XmlUnitTestResultPrinter::TestPropertiesAsXmlAttributes(
+ const TestResult& result) {
+ Message attributes;
+ for (int i = 0; i < result.test_property_count(); ++i) {
+ const TestProperty& property = result.GetTestProperty(i);
+ attributes << " " << property.key() << "="
+ << "\"" << EscapeXmlAttribute(property.value()) << "\"";
+ }
+ return attributes.GetString();
+}
+
+// End XmlUnitTestResultPrinter
+
+#if GTEST_CAN_STREAM_RESULTS_
+
+// Checks if str contains '=', '&', '%' or '\n' characters. If yes,
+// replaces them by "%xx" where xx is their hexadecimal value. For
+// example, replaces "=" with "%3D". This algorithm is O(strlen(str))
+// in both time and space -- important as the input str may contain an
+// arbitrarily long test failure message and stack trace.
+string StreamingListener::UrlEncode(const char* str) {
+ string result;
+ result.reserve(strlen(str) + 1);
+ for (char ch = *str; ch != '\0'; ch = *++str) {
+ switch (ch) {
+ case '%':
+ case '=':
+ case '&':
+ case '\n':
+ result.append("%" + String::FormatByte(static_cast<unsigned char>(ch)));
+ break;
+ default:
+ result.push_back(ch);
+ break;
+ }
+ }
+ return result;
+}
+
+void StreamingListener::SocketWriter::MakeConnection() {
+ GTEST_CHECK_(sockfd_ == -1)
+ << "MakeConnection() can't be called when there is already a connection.";
+
+ addrinfo hints;
+ memset(&hints, 0, sizeof(hints));
+ hints.ai_family = AF_UNSPEC; // To allow both IPv4 and IPv6 addresses.
+ hints.ai_socktype = SOCK_STREAM;
+ addrinfo* servinfo = NULL;
+
+ // Use the getaddrinfo() to get a linked list of IP addresses for
+ // the given host name.
+ const int error_num = getaddrinfo(
+ host_name_.c_str(), port_num_.c_str(), &hints, &servinfo);
+ if (error_num != 0) {
+ GTEST_LOG_(WARNING) << "stream_result_to: getaddrinfo() failed: "
+ << gai_strerror(error_num);
+ }
+
+ // Loop through all the results and connect to the first we can.
+ for (addrinfo* cur_addr = servinfo; sockfd_ == -1 && cur_addr != NULL;
+ cur_addr = cur_addr->ai_next) {
+ sockfd_ = socket(
+ cur_addr->ai_family, cur_addr->ai_socktype, cur_addr->ai_protocol);
+ if (sockfd_ != -1) {
+ // Connect the client socket to the server socket.
+ if (connect(sockfd_, cur_addr->ai_addr, cur_addr->ai_addrlen) == -1) {
+ close(sockfd_);
+ sockfd_ = -1;
+ }
+ }
+ }
+
+ freeaddrinfo(servinfo); // all done with this structure
+
+ if (sockfd_ == -1) {
+ GTEST_LOG_(WARNING) << "stream_result_to: failed to connect to "
+ << host_name_ << ":" << port_num_;
+ }
+}
+
+// End of class Streaming Listener
+#endif // GTEST_CAN_STREAM_RESULTS__
+
+// Class ScopedTrace
+
+// Pushes the given source file location and message onto a per-thread
+// trace stack maintained by Google Test.
+ScopedTrace::ScopedTrace(const char* file, int line, const Message& message)
+ GTEST_LOCK_EXCLUDED_(&UnitTest::mutex_) {
+ TraceInfo trace;
+ trace.file = file;
+ trace.line = line;
+ trace.message = message.GetString();
+
+ UnitTest::GetInstance()->PushGTestTrace(trace);
+}
+
+// Pops the info pushed by the c'tor.
+ScopedTrace::~ScopedTrace()
+ GTEST_LOCK_EXCLUDED_(&UnitTest::mutex_) {
+ UnitTest::GetInstance()->PopGTestTrace();
+}
+
+
+// class OsStackTraceGetter
+
+// Returns the current OS stack trace as an std::string. Parameters:
+//
+// max_depth - the maximum number of stack frames to be included
+// in the trace.
+// skip_count - the number of top frames to be skipped; doesn't count
+// against max_depth.
+//
+string OsStackTraceGetter::CurrentStackTrace(int /* max_depth */,
+ int /* skip_count */)
+ GTEST_LOCK_EXCLUDED_(mutex_) {
+ return "";
+}
+
+void OsStackTraceGetter::UponLeavingGTest()
+ GTEST_LOCK_EXCLUDED_(mutex_) {
+}
+
+const char* const
+OsStackTraceGetter::kElidedFramesMarker =
+ "... " GTEST_NAME_ " internal frames ...";
+
+// A helper class that creates the premature-exit file in its
+// constructor and deletes the file in its destructor.
+class ScopedPrematureExitFile {
+ public:
+ explicit ScopedPrematureExitFile(const char* premature_exit_filepath)
+ : premature_exit_filepath_(premature_exit_filepath) {
+ // If a path to the premature-exit file is specified...
+ if (premature_exit_filepath != NULL && *premature_exit_filepath != '\0') {
+ // create the file with a single "0" character in it. I/O
+ // errors are ignored as there's nothing better we can do and we
+ // don't want to fail the test because of this.
+ FILE* pfile = posix::FOpen(premature_exit_filepath, "w");
+ fwrite("0", 1, 1, pfile);
+ fclose(pfile);
+ }
+ }
+
+ ~ScopedPrematureExitFile() {
+ if (premature_exit_filepath_ != NULL && *premature_exit_filepath_ != '\0') {
+ remove(premature_exit_filepath_);
+ }
+ }
+
+ private:
+ const char* const premature_exit_filepath_;
+
+ GTEST_DISALLOW_COPY_AND_ASSIGN_(ScopedPrematureExitFile);
+};
+
+} // namespace internal
+
+// class TestEventListeners
+
+TestEventListeners::TestEventListeners()
+ : repeater_(new internal::TestEventRepeater()),
+ default_result_printer_(NULL),
+ default_xml_generator_(NULL) {
+}
+
+TestEventListeners::~TestEventListeners() { delete repeater_; }
+
+// Returns the standard listener responsible for the default console
+// output. Can be removed from the listeners list to shut down default
+// console output. Note that removing this object from the listener list
+// with Release transfers its ownership to the user.
+void TestEventListeners::Append(TestEventListener* listener) {
+ repeater_->Append(listener);
+}
+
+// Removes the given event listener from the list and returns it. It then
+// becomes the caller's responsibility to delete the listener. Returns
+// NULL if the listener is not found in the list.
+TestEventListener* TestEventListeners::Release(TestEventListener* listener) {
+ if (listener == default_result_printer_)
+ default_result_printer_ = NULL;
+ else if (listener == default_xml_generator_)
+ default_xml_generator_ = NULL;
+ return repeater_->Release(listener);
+}
+
+// Returns repeater that broadcasts the TestEventListener events to all
+// subscribers.
+TestEventListener* TestEventListeners::repeater() { return repeater_; }
+
+// Sets the default_result_printer attribute to the provided listener.
+// The listener is also added to the listener list and previous
+// default_result_printer is removed from it and deleted. The listener can
+// also be NULL in which case it will not be added to the list. Does
+// nothing if the previous and the current listener objects are the same.
+void TestEventListeners::SetDefaultResultPrinter(TestEventListener* listener) {
+ if (default_result_printer_ != listener) {
+ // It is an error to pass this method a listener that is already in the
+ // list.
+ delete Release(default_result_printer_);
+ default_result_printer_ = listener;
+ if (listener != NULL)
+ Append(listener);
+ }
+}
+
+// Sets the default_xml_generator attribute to the provided listener. The
+// listener is also added to the listener list and previous
+// default_xml_generator is removed from it and deleted. The listener can
+// also be NULL in which case it will not be added to the list. Does
+// nothing if the previous and the current listener objects are the same.
+void TestEventListeners::SetDefaultXmlGenerator(TestEventListener* listener) {
+ if (default_xml_generator_ != listener) {
+ // It is an error to pass this method a listener that is already in the
+ // list.
+ delete Release(default_xml_generator_);
+ default_xml_generator_ = listener;
+ if (listener != NULL)
+ Append(listener);
+ }
+}
+
+// Controls whether events will be forwarded by the repeater to the
+// listeners in the list.
+bool TestEventListeners::EventForwardingEnabled() const {
+ return repeater_->forwarding_enabled();
+}
+
+void TestEventListeners::SuppressEventForwarding() {
+ repeater_->set_forwarding_enabled(false);
+}
+
+// class UnitTest
+
+// Gets the singleton UnitTest object. The first time this method is
+// called, a UnitTest object is constructed and returned. Consecutive
+// calls will return the same object.
+//
+// We don't protect this under mutex_ as a user is not supposed to
+// call this before main() starts, from which point on the return
+// value will never change.
+UnitTest* UnitTest::GetInstance() {
+ // When compiled with MSVC 7.1 in optimized mode, destroying the
+ // UnitTest object upon exiting the program messes up the exit code,
+ // causing successful tests to appear failed. We have to use a
+ // different implementation in this case to bypass the compiler bug.
+ // This implementation makes the compiler happy, at the cost of
+ // leaking the UnitTest object.
+
+ // CodeGear C++Builder insists on a public destructor for the
+ // default implementation. Use this implementation to keep good OO
+ // design with private destructor.
+
+#if (_MSC_VER == 1310 && !defined(_DEBUG)) || defined(__BORLANDC__)
+ static UnitTest* const instance = new UnitTest;
+ return instance;
+#else
+ static UnitTest instance;
+ return &instance;
+#endif // (_MSC_VER == 1310 && !defined(_DEBUG)) || defined(__BORLANDC__)
+}
+
+// Gets the number of successful test cases.
+int UnitTest::successful_test_case_count() const {
+ return impl()->successful_test_case_count();
+}
+
+// Gets the number of failed test cases.
+int UnitTest::failed_test_case_count() const {
+ return impl()->failed_test_case_count();
+}
+
+// Gets the number of all test cases.
+int UnitTest::total_test_case_count() const {
+ return impl()->total_test_case_count();
+}
+
+// Gets the number of all test cases that contain at least one test
+// that should run.
+int UnitTest::test_case_to_run_count() const {
+ return impl()->test_case_to_run_count();
+}
+
+// Gets the number of successful tests.
+int UnitTest::successful_test_count() const {
+ return impl()->successful_test_count();
+}
+
+// Gets the number of failed tests.
+int UnitTest::failed_test_count() const { return impl()->failed_test_count(); }
+
+// Gets the number of disabled tests that will be reported in the XML report.
+int UnitTest::reportable_disabled_test_count() const {
+ return impl()->reportable_disabled_test_count();
+}
+
+// Gets the number of disabled tests.
+int UnitTest::disabled_test_count() const {
+ return impl()->disabled_test_count();
+}
+
+// Gets the number of tests to be printed in the XML report.
+int UnitTest::reportable_test_count() const {
+ return impl()->reportable_test_count();
+}
+
+// Gets the number of all tests.
+int UnitTest::total_test_count() const { return impl()->total_test_count(); }
+
+// Gets the number of tests that should run.
+int UnitTest::test_to_run_count() const { return impl()->test_to_run_count(); }
+
+// Gets the time of the test program start, in ms from the start of the
+// UNIX epoch.
+internal::TimeInMillis UnitTest::start_timestamp() const {
+ return impl()->start_timestamp();
+}
+
+// Gets the elapsed time, in milliseconds.
+internal::TimeInMillis UnitTest::elapsed_time() const {
+ return impl()->elapsed_time();
+}
+
+// Returns true iff the unit test passed (i.e. all test cases passed).
+bool UnitTest::Passed() const { return impl()->Passed(); }
+
+// Returns true iff the unit test failed (i.e. some test case failed
+// or something outside of all tests failed).
+bool UnitTest::Failed() const { return impl()->Failed(); }
+
+// Gets the i-th test case among all the test cases. i can range from 0 to
+// total_test_case_count() - 1. If i is not in that range, returns NULL.
+const TestCase* UnitTest::GetTestCase(int i) const {
+ return impl()->GetTestCase(i);
+}
+
+// Returns the TestResult containing information on test failures and
+// properties logged outside of individual test cases.
+const TestResult& UnitTest::ad_hoc_test_result() const {
+ return *impl()->ad_hoc_test_result();
+}
+
+// Gets the i-th test case among all the test cases. i can range from 0 to
+// total_test_case_count() - 1. If i is not in that range, returns NULL.
+TestCase* UnitTest::GetMutableTestCase(int i) {
+ return impl()->GetMutableTestCase(i);
+}
+
+// Returns the list of event listeners that can be used to track events
+// inside Google Test.
+TestEventListeners& UnitTest::listeners() {
+ return *impl()->listeners();
+}
+
+// Registers and returns a global test environment. When a test
+// program is run, all global test environments will be set-up in the
+// order they were registered. After all tests in the program have
+// finished, all global test environments will be torn-down in the
+// *reverse* order they were registered.
+//
+// The UnitTest object takes ownership of the given environment.
+//
+// We don't protect this under mutex_, as we only support calling it
+// from the main thread.
+Environment* UnitTest::AddEnvironment(Environment* env) {
+ if (env == NULL) {
+ return NULL;
+ }
+
+ impl_->environments().push_back(env);
+ return env;
+}
+
+// Adds a TestPartResult to the current TestResult object. All Google Test
+// assertion macros (e.g. ASSERT_TRUE, EXPECT_EQ, etc) eventually call
+// this to report their results. The user code should use the
+// assertion macros instead of calling this directly.
+void UnitTest::AddTestPartResult(
+ TestPartResult::Type result_type,
+ const char* file_name,
+ int line_number,
+ const std::string& message,
+ const std::string& os_stack_trace) GTEST_LOCK_EXCLUDED_(mutex_) {
+ Message msg;
+ msg << message;
+
+ internal::MutexLock lock(&mutex_);
+ if (impl_->gtest_trace_stack().size() > 0) {
+ msg << "\n" << GTEST_NAME_ << " trace:";
+
+ for (int i = static_cast<int>(impl_->gtest_trace_stack().size());
+ i > 0; --i) {
+ const internal::TraceInfo& trace = impl_->gtest_trace_stack()[i - 1];
+ msg << "\n" << internal::FormatFileLocation(trace.file, trace.line)
+ << " " << trace.message;
+ }
+ }
+
+ if (os_stack_trace.c_str() != NULL && !os_stack_trace.empty()) {
+ msg << internal::kStackTraceMarker << os_stack_trace;
+ }
+
+ const TestPartResult result =
+ TestPartResult(result_type, file_name, line_number,
+ msg.GetString().c_str());
+ impl_->GetTestPartResultReporterForCurrentThread()->
+ ReportTestPartResult(result);
+
+ if (result_type != TestPartResult::kSuccess) {
+ // gtest_break_on_failure takes precedence over
+ // gtest_throw_on_failure. This allows a user to set the latter
+ // in the code (perhaps in order to use Google Test assertions
+ // with another testing framework) and specify the former on the
+ // command line for debugging.
+ if (GTEST_FLAG(break_on_failure)) {
+#if GTEST_OS_WINDOWS
+ // Using DebugBreak on Windows allows gtest to still break into a debugger
+ // when a failure happens and both the --gtest_break_on_failure and
+ // the --gtest_catch_exceptions flags are specified.
+ DebugBreak();
+#else
+ // Dereference NULL through a volatile pointer to prevent the compiler
+ // from removing. We use this rather than abort() or __builtin_trap() for
+ // portability: Symbian doesn't implement abort() well, and some debuggers
+ // don't correctly trap abort().
+ *static_cast<volatile int*>(NULL) = 1;
+#endif // GTEST_OS_WINDOWS
+ } else if (GTEST_FLAG(throw_on_failure)) {
+#if GTEST_HAS_EXCEPTIONS
+ throw internal::GoogleTestFailureException(result);
+#else
+ // We cannot call abort() as it generates a pop-up in debug mode
+ // that cannot be suppressed in VC 7.1 or below.
+ exit(1);
+#endif
+ }
+ }
+}
+
+// Adds a TestProperty to the current TestResult object when invoked from
+// inside a test, to current TestCase's ad_hoc_test_result_ when invoked
+// from SetUpTestCase or TearDownTestCase, or to the global property set
+// when invoked elsewhere. If the result already contains a property with
+// the same key, the value will be updated.
+void UnitTest::RecordProperty(const std::string& key,
+ const std::string& value) {
+ impl_->RecordProperty(TestProperty(key, value));
+}
+
+// Runs all tests in this UnitTest object and prints the result.
+// Returns 0 if successful, or 1 otherwise.
+//
+// We don't protect this under mutex_, as we only support calling it
+// from the main thread.
+int UnitTest::Run() {
+ const bool in_death_test_child_process =
+ internal::GTEST_FLAG(internal_run_death_test).length() > 0;
+
+ // Google Test implements this protocol for catching that a test
+ // program exits before returning control to Google Test:
+ //
+ // 1. Upon start, Google Test creates a file whose absolute path
+ // is specified by the environment variable
+ // TEST_PREMATURE_EXIT_FILE.
+ // 2. When Google Test has finished its work, it deletes the file.
+ //
+ // This allows a test runner to set TEST_PREMATURE_EXIT_FILE before
+ // running a Google-Test-based test program and check the existence
+ // of the file at the end of the test execution to see if it has
+ // exited prematurely.
+
+ // If we are in the child process of a death test, don't
+ // create/delete the premature exit file, as doing so is unnecessary
+ // and will confuse the parent process. Otherwise, create/delete
+ // the file upon entering/leaving this function. If the program
+ // somehow exits before this function has a chance to return, the
+ // premature-exit file will be left undeleted, causing a test runner
+ // that understands the premature-exit-file protocol to report the
+ // test as having failed.
+ const internal::ScopedPrematureExitFile premature_exit_file(
+ in_death_test_child_process ?
+ NULL : internal::posix::GetEnv("TEST_PREMATURE_EXIT_FILE"));
+
+ // Captures the value of GTEST_FLAG(catch_exceptions). This value will be
+ // used for the duration of the program.
+ impl()->set_catch_exceptions(GTEST_FLAG(catch_exceptions));
+
+#if GTEST_HAS_SEH
+ // Either the user wants Google Test to catch exceptions thrown by the
+ // tests or this is executing in the context of death test child
+ // process. In either case the user does not want to see pop-up dialogs
+ // about crashes - they are expected.
+ if (impl()->catch_exceptions() || in_death_test_child_process) {
+# if !GTEST_OS_WINDOWS_MOBILE
+ // SetErrorMode doesn't exist on CE.
+ SetErrorMode(SEM_FAILCRITICALERRORS | SEM_NOALIGNMENTFAULTEXCEPT |
+ SEM_NOGPFAULTERRORBOX | SEM_NOOPENFILEERRORBOX);
+# endif // !GTEST_OS_WINDOWS_MOBILE
+
+# if (defined(_MSC_VER) || GTEST_OS_WINDOWS_MINGW) && !GTEST_OS_WINDOWS_MOBILE
+ // Death test children can be terminated with _abort(). On Windows,
+ // _abort() can show a dialog with a warning message. This forces the
+ // abort message to go to stderr instead.
+ _set_error_mode(_OUT_TO_STDERR);
+# endif
+
+# if _MSC_VER >= 1400 && !GTEST_OS_WINDOWS_MOBILE
+ // In the debug version, Visual Studio pops up a separate dialog
+ // offering a choice to debug the aborted program. We need to suppress
+ // this dialog or it will pop up for every EXPECT/ASSERT_DEATH statement
+ // executed. Google Test will notify the user of any unexpected
+ // failure via stderr.
+ //
+ // VC++ doesn't define _set_abort_behavior() prior to the version 8.0.
+ // Users of prior VC versions shall suffer the agony and pain of
+ // clicking through the countless debug dialogs.
+ // TODO(vladl@google.com): find a way to suppress the abort dialog() in the
+ // debug mode when compiled with VC 7.1 or lower.
+ if (!GTEST_FLAG(break_on_failure))
+ _set_abort_behavior(
+ 0x0, // Clear the following flags:
+ _WRITE_ABORT_MSG | _CALL_REPORTFAULT); // pop-up window, core dump.
+# endif
+ }
+#endif // GTEST_HAS_SEH
+
+ return internal::HandleExceptionsInMethodIfSupported(
+ impl(),
+ &internal::UnitTestImpl::RunAllTests,
+ "auxiliary test code (environments or event listeners)") ? 0 : 1;
+}
+
+// Returns the working directory when the first TEST() or TEST_F() was
+// executed.
+const char* UnitTest::original_working_dir() const {
+ return impl_->original_working_dir_.c_str();
+}
+
+// Returns the TestCase object for the test that's currently running,
+// or NULL if no test is running.
+const TestCase* UnitTest::current_test_case() const
+ GTEST_LOCK_EXCLUDED_(mutex_) {
+ internal::MutexLock lock(&mutex_);
+ return impl_->current_test_case();
+}
+
+// Returns the TestInfo object for the test that's currently running,
+// or NULL if no test is running.
+const TestInfo* UnitTest::current_test_info() const
+ GTEST_LOCK_EXCLUDED_(mutex_) {
+ internal::MutexLock lock(&mutex_);
+ return impl_->current_test_info();
+}
+
+// Returns the random seed used at the start of the current test run.
+int UnitTest::random_seed() const { return impl_->random_seed(); }
+
+#if GTEST_HAS_PARAM_TEST
+// Returns ParameterizedTestCaseRegistry object used to keep track of
+// value-parameterized tests and instantiate and register them.
+internal::ParameterizedTestCaseRegistry&
+ UnitTest::parameterized_test_registry()
+ GTEST_LOCK_EXCLUDED_(mutex_) {
+ return impl_->parameterized_test_registry();
+}
+#endif // GTEST_HAS_PARAM_TEST
+
+// Creates an empty UnitTest.
+UnitTest::UnitTest() {
+ impl_ = new internal::UnitTestImpl(this);
+}
+
+// Destructor of UnitTest.
+UnitTest::~UnitTest() {
+ delete impl_;
+}
+
+// Pushes a trace defined by SCOPED_TRACE() on to the per-thread
+// Google Test trace stack.
+void UnitTest::PushGTestTrace(const internal::TraceInfo& trace)
+ GTEST_LOCK_EXCLUDED_(mutex_) {
+ internal::MutexLock lock(&mutex_);
+ impl_->gtest_trace_stack().push_back(trace);
+}
+
+// Pops a trace from the per-thread Google Test trace stack.
+void UnitTest::PopGTestTrace()
+ GTEST_LOCK_EXCLUDED_(mutex_) {
+ internal::MutexLock lock(&mutex_);
+ impl_->gtest_trace_stack().pop_back();
+}
+
+namespace internal {
+
+UnitTestImpl::UnitTestImpl(UnitTest* parent)
+ : parent_(parent),
+#ifdef _MSC_VER
+# pragma warning(push) // Saves the current warning state.
+# pragma warning(disable:4355) // Temporarily disables warning 4355
+ // (using this in initializer).
+ default_global_test_part_result_reporter_(this),
+ default_per_thread_test_part_result_reporter_(this),
+# pragma warning(pop) // Restores the warning state again.
+#else
+ default_global_test_part_result_reporter_(this),
+ default_per_thread_test_part_result_reporter_(this),
+#endif // _MSC_VER
+ global_test_part_result_repoter_(
+ &default_global_test_part_result_reporter_),
+ per_thread_test_part_result_reporter_(
+ &default_per_thread_test_part_result_reporter_),
+#if GTEST_HAS_PARAM_TEST
+ parameterized_test_registry_(),
+ parameterized_tests_registered_(false),
+#endif // GTEST_HAS_PARAM_TEST
+ last_death_test_case_(-1),
+ current_test_case_(NULL),
+ current_test_info_(NULL),
+ ad_hoc_test_result_(),
+ os_stack_trace_getter_(NULL),
+ post_flag_parse_init_performed_(false),
+ random_seed_(0), // Will be overridden by the flag before first use.
+ random_(0), // Will be reseeded before first use.
+ start_timestamp_(0),
+ elapsed_time_(0),
+#if GTEST_HAS_DEATH_TEST
+ death_test_factory_(new DefaultDeathTestFactory),
+#endif
+ // Will be overridden by the flag before first use.
+ catch_exceptions_(false) {
+ listeners()->SetDefaultResultPrinter(new PrettyUnitTestResultPrinter);
+}
+
+UnitTestImpl::~UnitTestImpl() {
+ // Deletes every TestCase.
+ ForEach(test_cases_, internal::Delete<TestCase>);
+
+ // Deletes every Environment.
+ ForEach(environments_, internal::Delete<Environment>);
+
+ delete os_stack_trace_getter_;
+}
+
+// Adds a TestProperty to the current TestResult object when invoked in a
+// context of a test, to current test case's ad_hoc_test_result when invoke
+// from SetUpTestCase/TearDownTestCase, or to the global property set
+// otherwise. If the result already contains a property with the same key,
+// the value will be updated.
+void UnitTestImpl::RecordProperty(const TestProperty& test_property) {
+ std::string xml_element;
+ TestResult* test_result; // TestResult appropriate for property recording.
+
+ if (current_test_info_ != NULL) {
+ xml_element = "testcase";
+ test_result = &(current_test_info_->result_);
+ } else if (current_test_case_ != NULL) {
+ xml_element = "testsuite";
+ test_result = &(current_test_case_->ad_hoc_test_result_);
+ } else {
+ xml_element = "testsuites";
+ test_result = &ad_hoc_test_result_;
+ }
+ test_result->RecordProperty(xml_element, test_property);
+}
+
+#if GTEST_HAS_DEATH_TEST
+// Disables event forwarding if the control is currently in a death test
+// subprocess. Must not be called before InitGoogleTest.
+void UnitTestImpl::SuppressTestEventsIfInSubprocess() {
+ if (internal_run_death_test_flag_.get() != NULL)
+ listeners()->SuppressEventForwarding();
+}
+#endif // GTEST_HAS_DEATH_TEST
+
+// Initializes event listeners performing XML output as specified by
+// UnitTestOptions. Must not be called before InitGoogleTest.
+void UnitTestImpl::ConfigureXmlOutput() {
+ const std::string& output_format = UnitTestOptions::GetOutputFormat();
+ if (output_format == "xml") {
+ listeners()->SetDefaultXmlGenerator(new XmlUnitTestResultPrinter(
+ UnitTestOptions::GetAbsolutePathToOutputFile().c_str()));
+ } else if (output_format != "") {
+ printf("WARNING: unrecognized output format \"%s\" ignored.\n",
+ output_format.c_str());
+ fflush(stdout);
+ }
+}
+
+#if GTEST_CAN_STREAM_RESULTS_
+// Initializes event listeners for streaming test results in string form.
+// Must not be called before InitGoogleTest.
+void UnitTestImpl::ConfigureStreamingOutput() {
+ const std::string& target = GTEST_FLAG(stream_result_to);
+ if (!target.empty()) {
+ const size_t pos = target.find(':');
+ if (pos != std::string::npos) {
+ listeners()->Append(new StreamingListener(target.substr(0, pos),
+ target.substr(pos+1)));
+ } else {
+ printf("WARNING: unrecognized streaming target \"%s\" ignored.\n",
+ target.c_str());
+ fflush(stdout);
+ }
+ }
+}
+#endif // GTEST_CAN_STREAM_RESULTS_
+
+// Performs initialization dependent upon flag values obtained in
+// ParseGoogleTestFlagsOnly. Is called from InitGoogleTest after the call to
+// ParseGoogleTestFlagsOnly. In case a user neglects to call InitGoogleTest
+// this function is also called from RunAllTests. Since this function can be
+// called more than once, it has to be idempotent.
+void UnitTestImpl::PostFlagParsingInit() {
+ // Ensures that this function does not execute more than once.
+ if (!post_flag_parse_init_performed_) {
+ post_flag_parse_init_performed_ = true;
+
+#if GTEST_HAS_DEATH_TEST
+ InitDeathTestSubprocessControlInfo();
+ SuppressTestEventsIfInSubprocess();
+#endif // GTEST_HAS_DEATH_TEST
+
+ // Registers parameterized tests. This makes parameterized tests
+ // available to the UnitTest reflection API without running
+ // RUN_ALL_TESTS.
+ RegisterParameterizedTests();
+
+ // Configures listeners for XML output. This makes it possible for users
+ // to shut down the default XML output before invoking RUN_ALL_TESTS.
+ ConfigureXmlOutput();
+
+#if GTEST_CAN_STREAM_RESULTS_
+ // Configures listeners for streaming test results to the specified server.
+ ConfigureStreamingOutput();
+#endif // GTEST_CAN_STREAM_RESULTS_
+ }
+}
+
+// A predicate that checks the name of a TestCase against a known
+// value.
+//
+// This is used for implementation of the UnitTest class only. We put
+// it in the anonymous namespace to prevent polluting the outer
+// namespace.
+//
+// TestCaseNameIs is copyable.
+class TestCaseNameIs {
+ public:
+ // Constructor.
+ explicit TestCaseNameIs(const std::string& name)
+ : name_(name) {}
+
+ // Returns true iff the name of test_case matches name_.
+ bool operator()(const TestCase* test_case) const {
+ return test_case != NULL && strcmp(test_case->name(), name_.c_str()) == 0;
+ }
+
+ private:
+ std::string name_;
+};
+
+// Finds and returns a TestCase with the given name. If one doesn't
+// exist, creates one and returns it. It's the CALLER'S
+// RESPONSIBILITY to ensure that this function is only called WHEN THE
+// TESTS ARE NOT SHUFFLED.
+//
+// Arguments:
+//
+// test_case_name: name of the test case
+// type_param: the name of the test case's type parameter, or NULL if
+// this is not a typed or a type-parameterized test case.
+// set_up_tc: pointer to the function that sets up the test case
+// tear_down_tc: pointer to the function that tears down the test case
+TestCase* UnitTestImpl::GetTestCase(const char* test_case_name,
+ const char* type_param,
+ Test::SetUpTestCaseFunc set_up_tc,
+ Test::TearDownTestCaseFunc tear_down_tc) {
+ // Can we find a TestCase with the given name?
+ const std::vector<TestCase*>::const_iterator test_case =
+ std::find_if(test_cases_.begin(), test_cases_.end(),
+ TestCaseNameIs(test_case_name));
+
+ if (test_case != test_cases_.end())
+ return *test_case;
+
+ // No. Let's create one.
+ TestCase* const new_test_case =
+ new TestCase(test_case_name, type_param, set_up_tc, tear_down_tc);
+
+ // Is this a death test case?
+ if (internal::UnitTestOptions::MatchesFilter(test_case_name,
+ kDeathTestCaseFilter)) {
+ // Yes. Inserts the test case after the last death test case
+ // defined so far. This only works when the test cases haven't
+ // been shuffled. Otherwise we may end up running a death test
+ // after a non-death test.
+ ++last_death_test_case_;
+ test_cases_.insert(test_cases_.begin() + last_death_test_case_,
+ new_test_case);
+ } else {
+ // No. Appends to the end of the list.
+ test_cases_.push_back(new_test_case);
+ }
+
+ test_case_indices_.push_back(static_cast<int>(test_case_indices_.size()));
+ return new_test_case;
+}
+
+// Helpers for setting up / tearing down the given environment. They
+// are for use in the ForEach() function.
+static void SetUpEnvironment(Environment* env) { env->SetUp(); }
+static void TearDownEnvironment(Environment* env) { env->TearDown(); }
+
+// Runs all tests in this UnitTest object, prints the result, and
+// returns true if all tests are successful. If any exception is
+// thrown during a test, the test is considered to be failed, but the
+// rest of the tests will still be run.
+//
+// When parameterized tests are enabled, it expands and registers
+// parameterized tests first in RegisterParameterizedTests().
+// All other functions called from RunAllTests() may safely assume that
+// parameterized tests are ready to be counted and run.
+bool UnitTestImpl::RunAllTests() {
+ // Makes sure InitGoogleTest() was called.
+ if (!GTestIsInitialized()) {
+ printf("%s",
+ "\nThis test program did NOT call ::testing::InitGoogleTest "
+ "before calling RUN_ALL_TESTS(). Please fix it.\n");
+ return false;
+ }
+
+ // Do not run any test if the --help flag was specified.
+ if (g_help_flag)
+ return true;
+
+ // Repeats the call to the post-flag parsing initialization in case the
+ // user didn't call InitGoogleTest.
+ PostFlagParsingInit();
+
+ // Even if sharding is not on, test runners may want to use the
+ // GTEST_SHARD_STATUS_FILE to query whether the test supports the sharding
+ // protocol.
+ internal::WriteToShardStatusFileIfNeeded();
+
+ // True iff we are in a subprocess for running a thread-safe-style
+ // death test.
+ bool in_subprocess_for_death_test = false;
+
+#if GTEST_HAS_DEATH_TEST
+ in_subprocess_for_death_test = (internal_run_death_test_flag_.get() != NULL);
+#endif // GTEST_HAS_DEATH_TEST
+
+ const bool should_shard = ShouldShard(kTestTotalShards, kTestShardIndex,
+ in_subprocess_for_death_test);
+
+ // Compares the full test names with the filter to decide which
+ // tests to run.
+ const bool has_tests_to_run = FilterTests(should_shard
+ ? HONOR_SHARDING_PROTOCOL
+ : IGNORE_SHARDING_PROTOCOL) > 0;
+
+ // Lists the tests and exits if the --gtest_list_tests flag was specified.
+ if (GTEST_FLAG(list_tests)) {
+ // This must be called *after* FilterTests() has been called.
+ ListTestsMatchingFilter();
+ return true;
+ }
+
+ random_seed_ = GTEST_FLAG(shuffle) ?
+ GetRandomSeedFromFlag(GTEST_FLAG(random_seed)) : 0;
+
+ // True iff at least one test has failed.
+ bool failed = false;
+
+ TestEventListener* repeater = listeners()->repeater();
+
+ start_timestamp_ = GetTimeInMillis();
+ repeater->OnTestProgramStart(*parent_);
+
+ // How many times to repeat the tests? We don't want to repeat them
+ // when we are inside the subprocess of a death test.
+ const int repeat = in_subprocess_for_death_test ? 1 : GTEST_FLAG(repeat);
+ // Repeats forever if the repeat count is negative.
+ const bool forever = repeat < 0;
+ for (int i = 0; forever || i != repeat; i++) {
+ // We want to preserve failures generated by ad-hoc test
+ // assertions executed before RUN_ALL_TESTS().
+ ClearNonAdHocTestResult();
+
+ const TimeInMillis start = GetTimeInMillis();
+
+ // Shuffles test cases and tests if requested.
+ if (has_tests_to_run && GTEST_FLAG(shuffle)) {
+ random()->Reseed(random_seed_);
+ // This should be done before calling OnTestIterationStart(),
+ // such that a test event listener can see the actual test order
+ // in the event.
+ ShuffleTests();
+ }
+
+ // Tells the unit test event listeners that the tests are about to start.
+ repeater->OnTestIterationStart(*parent_, i);
+
+ // Runs each test case if there is at least one test to run.
+ if (has_tests_to_run) {
+ // Sets up all environments beforehand.
+ repeater->OnEnvironmentsSetUpStart(*parent_);
+ ForEach(environments_, SetUpEnvironment);
+ repeater->OnEnvironmentsSetUpEnd(*parent_);
+
+ // Runs the tests only if there was no fatal failure during global
+ // set-up.
+ if (!Test::HasFatalFailure()) {
+ for (int test_index = 0; test_index < total_test_case_count();
+ test_index++) {
+ GetMutableTestCase(test_index)->Run();
+ }
+ }
+
+ // Tears down all environments in reverse order afterwards.
+ repeater->OnEnvironmentsTearDownStart(*parent_);
+ std::for_each(environments_.rbegin(), environments_.rend(),
+ TearDownEnvironment);
+ repeater->OnEnvironmentsTearDownEnd(*parent_);
+ }
+
+ elapsed_time_ = GetTimeInMillis() - start;
+
+ // Tells the unit test event listener that the tests have just finished.
+ repeater->OnTestIterationEnd(*parent_, i);
+
+ // Gets the result and clears it.
+ if (!Passed()) {
+ failed = true;
+ }
+
+ // Restores the original test order after the iteration. This
+ // allows the user to quickly repro a failure that happens in the
+ // N-th iteration without repeating the first (N - 1) iterations.
+ // This is not enclosed in "if (GTEST_FLAG(shuffle)) { ... }", in
+ // case the user somehow changes the value of the flag somewhere
+ // (it's always safe to unshuffle the tests).
+ UnshuffleTests();
+
+ if (GTEST_FLAG(shuffle)) {
+ // Picks a new random seed for each iteration.
+ random_seed_ = GetNextRandomSeed(random_seed_);
+ }
+ }
+
+ repeater->OnTestProgramEnd(*parent_);
+
+ return !failed;
+}
+
+// Reads the GTEST_SHARD_STATUS_FILE environment variable, and creates the file
+// if the variable is present. If a file already exists at this location, this
+// function will write over it. If the variable is present, but the file cannot
+// be created, prints an error and exits.
+void WriteToShardStatusFileIfNeeded() {
+ const char* const test_shard_file = posix::GetEnv(kTestShardStatusFile);
+ if (test_shard_file != NULL) {
+ FILE* const file = posix::FOpen(test_shard_file, "w");
+ if (file == NULL) {
+ ColoredPrintf(COLOR_RED,
+ "Could not write to the test shard status file \"%s\" "
+ "specified by the %s environment variable.\n",
+ test_shard_file, kTestShardStatusFile);
+ fflush(stdout);
+ exit(EXIT_FAILURE);
+ }
+ fclose(file);
+ }
+}
+
+// Checks whether sharding is enabled by examining the relevant
+// environment variable values. If the variables are present,
+// but inconsistent (i.e., shard_index >= total_shards), prints
+// an error and exits. If in_subprocess_for_death_test, sharding is
+// disabled because it must only be applied to the original test
+// process. Otherwise, we could filter out death tests we intended to execute.
+bool ShouldShard(const char* total_shards_env,
+ const char* shard_index_env,
+ bool in_subprocess_for_death_test) {
+ if (in_subprocess_for_death_test) {
+ return false;
+ }
+
+ const Int32 total_shards = Int32FromEnvOrDie(total_shards_env, -1);
+ const Int32 shard_index = Int32FromEnvOrDie(shard_index_env, -1);
+
+ if (total_shards == -1 && shard_index == -1) {
+ return false;
+ } else if (total_shards == -1 && shard_index != -1) {
+ const Message msg = Message()
+ << "Invalid environment variables: you have "
+ << kTestShardIndex << " = " << shard_index
+ << ", but have left " << kTestTotalShards << " unset.\n";
+ ColoredPrintf(COLOR_RED, msg.GetString().c_str());
+ fflush(stdout);
+ exit(EXIT_FAILURE);
+ } else if (total_shards != -1 && shard_index == -1) {
+ const Message msg = Message()
+ << "Invalid environment variables: you have "
+ << kTestTotalShards << " = " << total_shards
+ << ", but have left " << kTestShardIndex << " unset.\n";
+ ColoredPrintf(COLOR_RED, msg.GetString().c_str());
+ fflush(stdout);
+ exit(EXIT_FAILURE);
+ } else if (shard_index < 0 || shard_index >= total_shards) {
+ const Message msg = Message()
+ << "Invalid environment variables: we require 0 <= "
+ << kTestShardIndex << " < " << kTestTotalShards
+ << ", but you have " << kTestShardIndex << "=" << shard_index
+ << ", " << kTestTotalShards << "=" << total_shards << ".\n";
+ ColoredPrintf(COLOR_RED, msg.GetString().c_str());
+ fflush(stdout);
+ exit(EXIT_FAILURE);
+ }
+
+ return total_shards > 1;
+}
+
+// Parses the environment variable var as an Int32. If it is unset,
+// returns default_val. If it is not an Int32, prints an error
+// and aborts.
+Int32 Int32FromEnvOrDie(const char* var, Int32 default_val) {
+ const char* str_val = posix::GetEnv(var);
+ if (str_val == NULL) {
+ return default_val;
+ }
+
+ Int32 result;
+ if (!ParseInt32(Message() << "The value of environment variable " << var,
+ str_val, &result)) {
+ exit(EXIT_FAILURE);
+ }
+ return result;
+}
+
+// Given the total number of shards, the shard index, and the test id,
+// returns true iff the test should be run on this shard. The test id is
+// some arbitrary but unique non-negative integer assigned to each test
+// method. Assumes that 0 <= shard_index < total_shards.
+bool ShouldRunTestOnShard(int total_shards, int shard_index, int test_id) {
+ return (test_id % total_shards) == shard_index;
+}
+
+// Compares the name of each test with the user-specified filter to
+// decide whether the test should be run, then records the result in
+// each TestCase and TestInfo object.
+// If shard_tests == true, further filters tests based on sharding
+// variables in the environment - see
+// http://code.google.com/p/googletest/wiki/GoogleTestAdvancedGuide.
+// Returns the number of tests that should run.
+int UnitTestImpl::FilterTests(ReactionToSharding shard_tests) {
+ const Int32 total_shards = shard_tests == HONOR_SHARDING_PROTOCOL ?
+ Int32FromEnvOrDie(kTestTotalShards, -1) : -1;
+ const Int32 shard_index = shard_tests == HONOR_SHARDING_PROTOCOL ?
+ Int32FromEnvOrDie(kTestShardIndex, -1) : -1;
+
+ // num_runnable_tests are the number of tests that will
+ // run across all shards (i.e., match filter and are not disabled).
+ // num_selected_tests are the number of tests to be run on
+ // this shard.
+ int num_runnable_tests = 0;
+ int num_selected_tests = 0;
+ for (size_t i = 0; i < test_cases_.size(); i++) {
+ TestCase* const test_case = test_cases_[i];
+ const std::string &test_case_name = test_case->name();
+ test_case->set_should_run(false);
+
+ for (size_t j = 0; j < test_case->test_info_list().size(); j++) {
+ TestInfo* const test_info = test_case->test_info_list()[j];
+ const std::string test_name(test_info->name());
+ // A test is disabled if test case name or test name matches
+ // kDisableTestFilter.
+ const bool is_disabled =
+ internal::UnitTestOptions::MatchesFilter(test_case_name,
+ kDisableTestFilter) ||
+ internal::UnitTestOptions::MatchesFilter(test_name,
+ kDisableTestFilter);
+ test_info->is_disabled_ = is_disabled;
+
+ const bool matches_filter =
+ internal::UnitTestOptions::FilterMatchesTest(test_case_name,
+ test_name);
+ test_info->matches_filter_ = matches_filter;
+
+ const bool is_runnable =
+ (GTEST_FLAG(also_run_disabled_tests) || !is_disabled) &&
+ matches_filter;
+
+ const bool is_selected = is_runnable &&
+ (shard_tests == IGNORE_SHARDING_PROTOCOL ||
+ ShouldRunTestOnShard(total_shards, shard_index,
+ num_runnable_tests));
+
+ num_runnable_tests += is_runnable;
+ num_selected_tests += is_selected;
+
+ test_info->should_run_ = is_selected;
+ test_case->set_should_run(test_case->should_run() || is_selected);
+ }
+ }
+ return num_selected_tests;
+}
+
+// Prints the given C-string on a single line by replacing all '\n'
+// characters with string "\\n". If the output takes more than
+// max_length characters, only prints the first max_length characters
+// and "...".
+static void PrintOnOneLine(const char* str, int max_length) {
+ if (str != NULL) {
+ for (int i = 0; *str != '\0'; ++str) {
+ if (i >= max_length) {
+ printf("...");
+ break;
+ }
+ if (*str == '\n') {
+ printf("\\n");
+ i += 2;
+ } else {
+ printf("%c", *str);
+ ++i;
+ }
+ }
+ }
+}
+
+// Prints the names of the tests matching the user-specified filter flag.
+void UnitTestImpl::ListTestsMatchingFilter() {
+ // Print at most this many characters for each type/value parameter.
+ const int kMaxParamLength = 250;
+
+ for (size_t i = 0; i < test_cases_.size(); i++) {
+ const TestCase* const test_case = test_cases_[i];
+ bool printed_test_case_name = false;
+
+ for (size_t j = 0; j < test_case->test_info_list().size(); j++) {
+ const TestInfo* const test_info =
+ test_case->test_info_list()[j];
+ if (test_info->matches_filter_) {
+ if (!printed_test_case_name) {
+ printed_test_case_name = true;
+ printf("%s.", test_case->name());
+ if (test_case->type_param() != NULL) {
+ printf(" # %s = ", kTypeParamLabel);
+ // We print the type parameter on a single line to make
+ // the output easy to parse by a program.
+ PrintOnOneLine(test_case->type_param(), kMaxParamLength);
+ }
+ printf("\n");
+ }
+ printf(" %s", test_info->name());
+ if (test_info->value_param() != NULL) {
+ printf(" # %s = ", kValueParamLabel);
+ // We print the value parameter on a single line to make the
+ // output easy to parse by a program.
+ PrintOnOneLine(test_info->value_param(), kMaxParamLength);
+ }
+ printf("\n");
+ }
+ }
+ }
+ fflush(stdout);
+}
+
+// Sets the OS stack trace getter.
+//
+// Does nothing if the input and the current OS stack trace getter are
+// the same; otherwise, deletes the old getter and makes the input the
+// current getter.
+void UnitTestImpl::set_os_stack_trace_getter(
+ OsStackTraceGetterInterface* getter) {
+ if (os_stack_trace_getter_ != getter) {
+ delete os_stack_trace_getter_;
+ os_stack_trace_getter_ = getter;
+ }
+}
+
+// Returns the current OS stack trace getter if it is not NULL;
+// otherwise, creates an OsStackTraceGetter, makes it the current
+// getter, and returns it.
+OsStackTraceGetterInterface* UnitTestImpl::os_stack_trace_getter() {
+ if (os_stack_trace_getter_ == NULL) {
+ os_stack_trace_getter_ = new OsStackTraceGetter;
+ }
+
+ return os_stack_trace_getter_;
+}
+
+// Returns the TestResult for the test that's currently running, or
+// the TestResult for the ad hoc test if no test is running.
+TestResult* UnitTestImpl::current_test_result() {
+ return current_test_info_ ?
+ &(current_test_info_->result_) : &ad_hoc_test_result_;
+}
+
+// Shuffles all test cases, and the tests within each test case,
+// making sure that death tests are still run first.
+void UnitTestImpl::ShuffleTests() {
+ // Shuffles the death test cases.
+ ShuffleRange(random(), 0, last_death_test_case_ + 1, &test_case_indices_);
+
+ // Shuffles the non-death test cases.
+ ShuffleRange(random(), last_death_test_case_ + 1,
+ static_cast<int>(test_cases_.size()), &test_case_indices_);
+
+ // Shuffles the tests inside each test case.
+ for (size_t i = 0; i < test_cases_.size(); i++) {
+ test_cases_[i]->ShuffleTests(random());
+ }
+}
+
+// Restores the test cases and tests to their order before the first shuffle.
+void UnitTestImpl::UnshuffleTests() {
+ for (size_t i = 0; i < test_cases_.size(); i++) {
+ // Unshuffles the tests in each test case.
+ test_cases_[i]->UnshuffleTests();
+ // Resets the index of each test case.
+ test_case_indices_[i] = static_cast<int>(i);
+ }
+}
+
+// Returns the current OS stack trace as an std::string.
+//
+// The maximum number of stack frames to be included is specified by
+// the gtest_stack_trace_depth flag. The skip_count parameter
+// specifies the number of top frames to be skipped, which doesn't
+// count against the number of frames to be included.
+//
+// For example, if Foo() calls Bar(), which in turn calls
+// GetCurrentOsStackTraceExceptTop(..., 1), Foo() will be included in
+// the trace but Bar() and GetCurrentOsStackTraceExceptTop() won't.
+std::string GetCurrentOsStackTraceExceptTop(UnitTest* /*unit_test*/,
+ int skip_count) {
+ // We pass skip_count + 1 to skip this wrapper function in addition
+ // to what the user really wants to skip.
+ return GetUnitTestImpl()->CurrentOsStackTraceExceptTop(skip_count + 1);
+}
+
+// Used by the GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_ macro to
+// suppress unreachable code warnings.
+namespace {
+class ClassUniqueToAlwaysTrue {};
+}
+
+bool IsTrue(bool condition) { return condition; }
+
+bool AlwaysTrue() {
+#if GTEST_HAS_EXCEPTIONS
+ // This condition is always false so AlwaysTrue() never actually throws,
+ // but it makes the compiler think that it may throw.
+ if (IsTrue(false))
+ throw ClassUniqueToAlwaysTrue();
+#endif // GTEST_HAS_EXCEPTIONS
+ return true;
+}
+
+// If *pstr starts with the given prefix, modifies *pstr to be right
+// past the prefix and returns true; otherwise leaves *pstr unchanged
+// and returns false. None of pstr, *pstr, and prefix can be NULL.
+bool SkipPrefix(const char* prefix, const char** pstr) {
+ const size_t prefix_len = strlen(prefix);
+ if (strncmp(*pstr, prefix, prefix_len) == 0) {
+ *pstr += prefix_len;
+ return true;
+ }
+ return false;
+}
+
+// Parses a string as a command line flag. The string should have
+// the format "--flag=value". When def_optional is true, the "=value"
+// part can be omitted.
+//
+// Returns the value of the flag, or NULL if the parsing failed.
+const char* ParseFlagValue(const char* str,
+ const char* flag,
+ bool def_optional) {
+ // str and flag must not be NULL.
+ if (str == NULL || flag == NULL) return NULL;
+
+ // The flag must start with "--" followed by GTEST_FLAG_PREFIX_.
+ const std::string flag_str = std::string("--") + GTEST_FLAG_PREFIX_ + flag;
+ const size_t flag_len = flag_str.length();
+ if (strncmp(str, flag_str.c_str(), flag_len) != 0) return NULL;
+
+ // Skips the flag name.
+ const char* flag_end = str + flag_len;
+
+ // When def_optional is true, it's OK to not have a "=value" part.
+ if (def_optional && (flag_end[0] == '\0')) {
+ return flag_end;
+ }
+
+ // If def_optional is true and there are more characters after the
+ // flag name, or if def_optional is false, there must be a '=' after
+ // the flag name.
+ if (flag_end[0] != '=') return NULL;
+
+ // Returns the string after "=".
+ return flag_end + 1;
+}
+
+// Parses a string for a bool flag, in the form of either
+// "--flag=value" or "--flag".
+//
+// In the former case, the value is taken as true as long as it does
+// not start with '0', 'f', or 'F'.
+//
+// In the latter case, the value is taken as true.
+//
+// On success, stores the value of the flag in *value, and returns
+// true. On failure, returns false without changing *value.
+bool ParseBoolFlag(const char* str, const char* flag, bool* value) {
+ // Gets the value of the flag as a string.
+ const char* const value_str = ParseFlagValue(str, flag, true);
+
+ // Aborts if the parsing failed.
+ if (value_str == NULL) return false;
+
+ // Converts the string value to a bool.
+ *value = !(*value_str == '0' || *value_str == 'f' || *value_str == 'F');
+ return true;
+}
+
+// Parses a string for an Int32 flag, in the form of
+// "--flag=value".
+//
+// On success, stores the value of the flag in *value, and returns
+// true. On failure, returns false without changing *value.
+bool ParseInt32Flag(const char* str, const char* flag, Int32* value) {
+ // Gets the value of the flag as a string.
+ const char* const value_str = ParseFlagValue(str, flag, false);
+
+ // Aborts if the parsing failed.
+ if (value_str == NULL) return false;
+
+ // Sets *value to the value of the flag.
+ return ParseInt32(Message() << "The value of flag --" << flag,
+ value_str, value);
+}
+
+// Parses a string for a string flag, in the form of
+// "--flag=value".
+//
+// On success, stores the value of the flag in *value, and returns
+// true. On failure, returns false without changing *value.
+bool ParseStringFlag(const char* str, const char* flag, std::string* value) {
+ // Gets the value of the flag as a string.
+ const char* const value_str = ParseFlagValue(str, flag, false);
+
+ // Aborts if the parsing failed.
+ if (value_str == NULL) return false;
+
+ // Sets *value to the value of the flag.
+ *value = value_str;
+ return true;
+}
+
+// Determines whether a string has a prefix that Google Test uses for its
+// flags, i.e., starts with GTEST_FLAG_PREFIX_ or GTEST_FLAG_PREFIX_DASH_.
+// If Google Test detects that a command line flag has its prefix but is not
+// recognized, it will print its help message. Flags starting with
+// GTEST_INTERNAL_PREFIX_ followed by "internal_" are considered Google Test
+// internal flags and do not trigger the help message.
+static bool HasGoogleTestFlagPrefix(const char* str) {
+ return (SkipPrefix("--", &str) ||
+ SkipPrefix("-", &str) ||
+ SkipPrefix("/", &str)) &&
+ !SkipPrefix(GTEST_FLAG_PREFIX_ "internal_", &str) &&
+ (SkipPrefix(GTEST_FLAG_PREFIX_, &str) ||
+ SkipPrefix(GTEST_FLAG_PREFIX_DASH_, &str));
+}
+
+// Prints a string containing code-encoded text. The following escape
+// sequences can be used in the string to control the text color:
+//
+// @@ prints a single '@' character.
+// @R changes the color to red.
+// @G changes the color to green.
+// @Y changes the color to yellow.
+// @D changes to the default terminal text color.
+//
+// TODO(wan@google.com): Write tests for this once we add stdout
+// capturing to Google Test.
+static void PrintColorEncoded(const char* str) {
+ GTestColor color = COLOR_DEFAULT; // The current color.
+
+ // Conceptually, we split the string into segments divided by escape
+ // sequences. Then we print one segment at a time. At the end of
+ // each iteration, the str pointer advances to the beginning of the
+ // next segment.
+ for (;;) {
+ const char* p = strchr(str, '@');
+ if (p == NULL) {
+ ColoredPrintf(color, "%s", str);
+ return;
+ }
+
+ ColoredPrintf(color, "%s", std::string(str, p).c_str());
+
+ const char ch = p[1];
+ str = p + 2;
+ if (ch == '@') {
+ ColoredPrintf(color, "@");
+ } else if (ch == 'D') {
+ color = COLOR_DEFAULT;
+ } else if (ch == 'R') {
+ color = COLOR_RED;
+ } else if (ch == 'G') {
+ color = COLOR_GREEN;
+ } else if (ch == 'Y') {
+ color = COLOR_YELLOW;
+ } else {
+ --str;
+ }
+ }
+}
+
+static const char kColorEncodedHelpMessage[] =
+"This program contains tests written using " GTEST_NAME_ ". You can use the\n"
+"following command line flags to control its behavior:\n"
+"\n"
+"Test Selection:\n"
+" @G--" GTEST_FLAG_PREFIX_ "list_tests@D\n"
+" List the names of all tests instead of running them. The name of\n"
+" TEST(Foo, Bar) is \"Foo.Bar\".\n"
+" @G--" GTEST_FLAG_PREFIX_ "filter=@YPOSTIVE_PATTERNS"
+ "[@G-@YNEGATIVE_PATTERNS]@D\n"
+" Run only the tests whose name matches one of the positive patterns but\n"
+" none of the negative patterns. '?' matches any single character; '*'\n"
+" matches any substring; ':' separates two patterns.\n"
+" @G--" GTEST_FLAG_PREFIX_ "also_run_disabled_tests@D\n"
+" Run all disabled tests too.\n"
+"\n"
+"Test Execution:\n"
+" @G--" GTEST_FLAG_PREFIX_ "repeat=@Y[COUNT]@D\n"
+" Run the tests repeatedly; use a negative count to repeat forever.\n"
+" @G--" GTEST_FLAG_PREFIX_ "shuffle@D\n"
+" Randomize tests' orders on every iteration.\n"
+" @G--" GTEST_FLAG_PREFIX_ "random_seed=@Y[NUMBER]@D\n"
+" Random number seed to use for shuffling test orders (between 1 and\n"
+" 99999, or 0 to use a seed based on the current time).\n"
+"\n"
+"Test Output:\n"
+" @G--" GTEST_FLAG_PREFIX_ "color=@Y(@Gyes@Y|@Gno@Y|@Gauto@Y)@D\n"
+" Enable/disable colored output. The default is @Gauto@D.\n"
+" -@G-" GTEST_FLAG_PREFIX_ "print_time=0@D\n"
+" Don't print the elapsed time of each test.\n"
+" @G--" GTEST_FLAG_PREFIX_ "output=xml@Y[@G:@YDIRECTORY_PATH@G"
+ GTEST_PATH_SEP_ "@Y|@G:@YFILE_PATH]@D\n"
+" Generate an XML report in the given directory or with the given file\n"
+" name. @YFILE_PATH@D defaults to @Gtest_details.xml@D.\n"
+#if GTEST_CAN_STREAM_RESULTS_
+" @G--" GTEST_FLAG_PREFIX_ "stream_result_to=@YHOST@G:@YPORT@D\n"
+" Stream test results to the given server.\n"
+#endif // GTEST_CAN_STREAM_RESULTS_
+"\n"
+"Assertion Behavior:\n"
+#if GTEST_HAS_DEATH_TEST && !GTEST_OS_WINDOWS
+" @G--" GTEST_FLAG_PREFIX_ "death_test_style=@Y(@Gfast@Y|@Gthreadsafe@Y)@D\n"
+" Set the default death test style.\n"
+#endif // GTEST_HAS_DEATH_TEST && !GTEST_OS_WINDOWS
+" @G--" GTEST_FLAG_PREFIX_ "break_on_failure@D\n"
+" Turn assertion failures into debugger break-points.\n"
+" @G--" GTEST_FLAG_PREFIX_ "throw_on_failure@D\n"
+" Turn assertion failures into C++ exceptions.\n"
+" @G--" GTEST_FLAG_PREFIX_ "catch_exceptions=0@D\n"
+" Do not report exceptions as test failures. Instead, allow them\n"
+" to crash the program or throw a pop-up (on Windows).\n"
+"\n"
+"Except for @G--" GTEST_FLAG_PREFIX_ "list_tests@D, you can alternatively set "
+ "the corresponding\n"
+"environment variable of a flag (all letters in upper-case). For example, to\n"
+"disable colored text output, you can either specify @G--" GTEST_FLAG_PREFIX_
+ "color=no@D or set\n"
+"the @G" GTEST_FLAG_PREFIX_UPPER_ "COLOR@D environment variable to @Gno@D.\n"
+"\n"
+"For more information, please read the " GTEST_NAME_ " documentation at\n"
+"@G" GTEST_PROJECT_URL_ "@D. If you find a bug in " GTEST_NAME_ "\n"
+"(not one in your own code or tests), please report it to\n"
+"@G<" GTEST_DEV_EMAIL_ ">@D.\n";
+
+// Parses the command line for Google Test flags, without initializing
+// other parts of Google Test. The type parameter CharType can be
+// instantiated to either char or wchar_t.
+template <typename CharType>
+void ParseGoogleTestFlagsOnlyImpl(int* argc, CharType** argv) {
+ for (int i = 1; i < *argc; i++) {
+ const std::string arg_string = StreamableToString(argv[i]);
+ const char* const arg = arg_string.c_str();
+
+ using internal::ParseBoolFlag;
+ using internal::ParseInt32Flag;
+ using internal::ParseStringFlag;
+
+ // Do we see a Google Test flag?
+ if (ParseBoolFlag(arg, kAlsoRunDisabledTestsFlag,
+ &GTEST_FLAG(also_run_disabled_tests)) ||
+ ParseBoolFlag(arg, kBreakOnFailureFlag,
+ &GTEST_FLAG(break_on_failure)) ||
+ ParseBoolFlag(arg, kCatchExceptionsFlag,
+ &GTEST_FLAG(catch_exceptions)) ||
+ ParseStringFlag(arg, kColorFlag, &GTEST_FLAG(color)) ||
+ ParseStringFlag(arg, kDeathTestStyleFlag,
+ &GTEST_FLAG(death_test_style)) ||
+ ParseBoolFlag(arg, kDeathTestUseFork,
+ &GTEST_FLAG(death_test_use_fork)) ||
+ ParseStringFlag(arg, kFilterFlag, &GTEST_FLAG(filter)) ||
+ ParseStringFlag(arg, kInternalRunDeathTestFlag,
+ &GTEST_FLAG(internal_run_death_test)) ||
+ ParseBoolFlag(arg, kListTestsFlag, &GTEST_FLAG(list_tests)) ||
+ ParseStringFlag(arg, kOutputFlag, &GTEST_FLAG(output)) ||
+ ParseBoolFlag(arg, kPrintTimeFlag, &GTEST_FLAG(print_time)) ||
+ ParseInt32Flag(arg, kRandomSeedFlag, &GTEST_FLAG(random_seed)) ||
+ ParseInt32Flag(arg, kRepeatFlag, &GTEST_FLAG(repeat)) ||
+ ParseBoolFlag(arg, kShuffleFlag, &GTEST_FLAG(shuffle)) ||
+ ParseInt32Flag(arg, kStackTraceDepthFlag,
+ &GTEST_FLAG(stack_trace_depth)) ||
+ ParseStringFlag(arg, kStreamResultToFlag,
+ &GTEST_FLAG(stream_result_to)) ||
+ ParseBoolFlag(arg, kThrowOnFailureFlag,
+ &GTEST_FLAG(throw_on_failure))
+ ) {
+ // Yes. Shift the remainder of the argv list left by one. Note
+ // that argv has (*argc + 1) elements, the last one always being
+ // NULL. The following loop moves the trailing NULL element as
+ // well.
+ for (int j = i; j != *argc; j++) {
+ argv[j] = argv[j + 1];
+ }
+
+ // Decrements the argument count.
+ (*argc)--;
+
+ // We also need to decrement the iterator as we just removed
+ // an element.
+ i--;
+ } else if (arg_string == "--help" || arg_string == "-h" ||
+ arg_string == "-?" || arg_string == "/?" ||
+ HasGoogleTestFlagPrefix(arg)) {
+ // Both help flag and unrecognized Google Test flags (excluding
+ // internal ones) trigger help display.
+ g_help_flag = true;
+ }
+ }
+
+ if (g_help_flag) {
+ // We print the help here instead of in RUN_ALL_TESTS(), as the
+ // latter may not be called at all if the user is using Google
+ // Test with another testing framework.
+ PrintColorEncoded(kColorEncodedHelpMessage);
+ }
+}
+
+// Parses the command line for Google Test flags, without initializing
+// other parts of Google Test.
+void ParseGoogleTestFlagsOnly(int* argc, char** argv) {
+ ParseGoogleTestFlagsOnlyImpl(argc, argv);
+}
+void ParseGoogleTestFlagsOnly(int* argc, wchar_t** argv) {
+ ParseGoogleTestFlagsOnlyImpl(argc, argv);
+}
+
+// The internal implementation of InitGoogleTest().
+//
+// The type parameter CharType can be instantiated to either char or
+// wchar_t.
+template <typename CharType>
+void InitGoogleTestImpl(int* argc, CharType** argv) {
+ g_init_gtest_count++;
+
+ // We don't want to run the initialization code twice.
+ if (g_init_gtest_count != 1) return;
+
+ if (*argc <= 0) return;
+
+ internal::g_executable_path = internal::StreamableToString(argv[0]);
+
+#if GTEST_HAS_DEATH_TEST
+
+ g_argvs.clear();
+ for (int i = 0; i != *argc; i++) {
+ g_argvs.push_back(StreamableToString(argv[i]));
+ }
+
+#endif // GTEST_HAS_DEATH_TEST
+
+ ParseGoogleTestFlagsOnly(argc, argv);
+ GetUnitTestImpl()->PostFlagParsingInit();
+}
+
+} // namespace internal
+
+// Initializes Google Test. This must be called before calling
+// RUN_ALL_TESTS(). In particular, it parses a command line for the
+// flags that Google Test recognizes. Whenever a Google Test flag is
+// seen, it is removed from argv, and *argc is decremented.
+//
+// No value is returned. Instead, the Google Test flag variables are
+// updated.
+//
+// Calling the function for the second time has no user-visible effect.
+void InitGoogleTest(int* argc, char** argv) {
+ internal::InitGoogleTestImpl(argc, argv);
+}
+
+// This overloaded version can be used in Windows programs compiled in
+// UNICODE mode.
+void InitGoogleTest(int* argc, wchar_t** argv) {
+ internal::InitGoogleTestImpl(argc, argv);
+}
+
+} // namespace testing
+// Copyright 2005, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Author: wan@google.com (Zhanyong Wan), vladl@google.com (Vlad Losev)
+//
+// This file implements death tests.
+
+
+#if GTEST_HAS_DEATH_TEST
+
+# if GTEST_OS_MAC
+# include <crt_externs.h>
+# endif // GTEST_OS_MAC
+
+# include <errno.h>
+# include <fcntl.h>
+# include <limits.h>
+
+# if GTEST_OS_LINUX
+# include <signal.h>
+# endif // GTEST_OS_LINUX
+
+# include <stdarg.h>
+
+# if GTEST_OS_WINDOWS
+# include <windows.h>
+# else
+# include <sys/mman.h>
+# include <sys/wait.h>
+# endif // GTEST_OS_WINDOWS
+
+# if GTEST_OS_QNX
+# include <spawn.h>
+# endif // GTEST_OS_QNX
+
+#endif // GTEST_HAS_DEATH_TEST
+
+
+// Indicates that this translation unit is part of Google Test's
+// implementation. It must come before gtest-internal-inl.h is
+// included, or there will be a compiler error. This trick is to
+// prevent a user from accidentally including gtest-internal-inl.h in
+// his code.
+#define GTEST_IMPLEMENTATION_ 1
+#undef GTEST_IMPLEMENTATION_
+
+namespace testing {
+
+// Constants.
+
+// The default death test style.
+static const char kDefaultDeathTestStyle[] = "fast";
+
+GTEST_DEFINE_string_(
+ death_test_style,
+ internal::StringFromGTestEnv("death_test_style", kDefaultDeathTestStyle),
+ "Indicates how to run a death test in a forked child process: "
+ "\"threadsafe\" (child process re-executes the test binary "
+ "from the beginning, running only the specific death test) or "
+ "\"fast\" (child process runs the death test immediately "
+ "after forking).");
+
+GTEST_DEFINE_bool_(
+ death_test_use_fork,
+ internal::BoolFromGTestEnv("death_test_use_fork", false),
+ "Instructs to use fork()/_exit() instead of clone() in death tests. "
+ "Ignored and always uses fork() on POSIX systems where clone() is not "
+ "implemented. Useful when running under valgrind or similar tools if "
+ "those do not support clone(). Valgrind 3.3.1 will just fail if "
+ "it sees an unsupported combination of clone() flags. "
+ "It is not recommended to use this flag w/o valgrind though it will "
+ "work in 99% of the cases. Once valgrind is fixed, this flag will "
+ "most likely be removed.");
+
+namespace internal {
+GTEST_DEFINE_string_(
+ internal_run_death_test, "",
+ "Indicates the file, line number, temporal index of "
+ "the single death test to run, and a file descriptor to "
+ "which a success code may be sent, all separated by "
+ "the '|' characters. This flag is specified if and only if the current "
+ "process is a sub-process launched for running a thread-safe "
+ "death test. FOR INTERNAL USE ONLY.");
+} // namespace internal
+
+#if GTEST_HAS_DEATH_TEST
+
+namespace internal {
+
+// Valid only for fast death tests. Indicates the code is running in the
+// child process of a fast style death test.
+static bool g_in_fast_death_test_child = false;
+
+// Returns a Boolean value indicating whether the caller is currently
+// executing in the context of the death test child process. Tools such as
+// Valgrind heap checkers may need this to modify their behavior in death
+// tests. IMPORTANT: This is an internal utility. Using it may break the
+// implementation of death tests. User code MUST NOT use it.
+bool InDeathTestChild() {
+# if GTEST_OS_WINDOWS
+
+ // On Windows, death tests are thread-safe regardless of the value of the
+ // death_test_style flag.
+ return !GTEST_FLAG(internal_run_death_test).empty();
+
+# else
+
+ if (GTEST_FLAG(death_test_style) == "threadsafe")
+ return !GTEST_FLAG(internal_run_death_test).empty();
+ else
+ return g_in_fast_death_test_child;
+#endif
+}
+
+} // namespace internal
+
+// ExitedWithCode constructor.
+ExitedWithCode::ExitedWithCode(int exit_code) : exit_code_(exit_code) {
+}
+
+// ExitedWithCode function-call operator.
+bool ExitedWithCode::operator()(int exit_status) const {
+# if GTEST_OS_WINDOWS
+
+ return exit_status == exit_code_;
+
+# else
+
+ return WIFEXITED(exit_status) && WEXITSTATUS(exit_status) == exit_code_;
+
+# endif // GTEST_OS_WINDOWS
+}
+
+# if !GTEST_OS_WINDOWS
+// KilledBySignal constructor.
+KilledBySignal::KilledBySignal(int signum) : signum_(signum) {
+}
+
+// KilledBySignal function-call operator.
+bool KilledBySignal::operator()(int exit_status) const {
+ return WIFSIGNALED(exit_status) && WTERMSIG(exit_status) == signum_;
+}
+# endif // !GTEST_OS_WINDOWS
+
+namespace internal {
+
+// Utilities needed for death tests.
+
+// Generates a textual description of a given exit code, in the format
+// specified by wait(2).
+static std::string ExitSummary(int exit_code) {
+ Message m;
+
+# if GTEST_OS_WINDOWS
+
+ m << "Exited with exit status " << exit_code;
+
+# else
+
+ if (WIFEXITED(exit_code)) {
+ m << "Exited with exit status " << WEXITSTATUS(exit_code);
+ } else if (WIFSIGNALED(exit_code)) {
+ m << "Terminated by signal " << WTERMSIG(exit_code);
+ }
+# ifdef WCOREDUMP
+ if (WCOREDUMP(exit_code)) {
+ m << " (core dumped)";
+ }
+# endif
+# endif // GTEST_OS_WINDOWS
+
+ return m.GetString();
+}
+
+// Returns true if exit_status describes a process that was terminated
+// by a signal, or exited normally with a nonzero exit code.
+bool ExitedUnsuccessfully(int exit_status) {
+ return !ExitedWithCode(0)(exit_status);
+}
+
+# if !GTEST_OS_WINDOWS
+// Generates a textual failure message when a death test finds more than
+// one thread running, or cannot determine the number of threads, prior
+// to executing the given statement. It is the responsibility of the
+// caller not to pass a thread_count of 1.
+static std::string DeathTestThreadWarning(size_t thread_count) {
+ Message msg;
+ msg << "Death tests use fork(), which is unsafe particularly"
+ << " in a threaded context. For this test, " << GTEST_NAME_ << " ";
+ if (thread_count == 0)
+ msg << "couldn't detect the number of threads.";
+ else
+ msg << "detected " << thread_count << " threads.";
+ return msg.GetString();
+}
+# endif // !GTEST_OS_WINDOWS
+
+// Flag characters for reporting a death test that did not die.
+static const char kDeathTestLived = 'L';
+static const char kDeathTestReturned = 'R';
+static const char kDeathTestThrew = 'T';
+static const char kDeathTestInternalError = 'I';
+
+// An enumeration describing all of the possible ways that a death test can
+// conclude. DIED means that the process died while executing the test
+// code; LIVED means that process lived beyond the end of the test code;
+// RETURNED means that the test statement attempted to execute a return
+// statement, which is not allowed; THREW means that the test statement
+// returned control by throwing an exception. IN_PROGRESS means the test
+// has not yet concluded.
+// TODO(vladl@google.com): Unify names and possibly values for
+// AbortReason, DeathTestOutcome, and flag characters above.
+enum DeathTestOutcome { IN_PROGRESS, DIED, LIVED, RETURNED, THREW };
+
+// Routine for aborting the program which is safe to call from an
+// exec-style death test child process, in which case the error
+// message is propagated back to the parent process. Otherwise, the
+// message is simply printed to stderr. In either case, the program
+// then exits with status 1.
+void DeathTestAbort(const std::string& message) {
+ // On a POSIX system, this function may be called from a threadsafe-style
+ // death test child process, which operates on a very small stack. Use
+ // the heap for any additional non-minuscule memory requirements.
+ const InternalRunDeathTestFlag* const flag =
+ GetUnitTestImpl()->internal_run_death_test_flag();
+ if (flag != NULL) {
+ FILE* parent = posix::FDOpen(flag->write_fd(), "w");
+ fputc(kDeathTestInternalError, parent);
+ fprintf(parent, "%s", message.c_str());
+ fflush(parent);
+ _exit(1);
+ } else {
+ fprintf(stderr, "%s", message.c_str());
+ fflush(stderr);
+ posix::Abort();
+ }
+}
+
+// A replacement for CHECK that calls DeathTestAbort if the assertion
+// fails.
+# define GTEST_DEATH_TEST_CHECK_(expression) \
+ do { \
+ if (!::testing::internal::IsTrue(expression)) { \
+ DeathTestAbort( \
+ ::std::string("CHECK failed: File ") + __FILE__ + ", line " \
+ + ::testing::internal::StreamableToString(__LINE__) + ": " \
+ + #expression); \
+ } \
+ } while (::testing::internal::AlwaysFalse())
+
+// This macro is similar to GTEST_DEATH_TEST_CHECK_, but it is meant for
+// evaluating any system call that fulfills two conditions: it must return
+// -1 on failure, and set errno to EINTR when it is interrupted and
+// should be tried again. The macro expands to a loop that repeatedly
+// evaluates the expression as long as it evaluates to -1 and sets
+// errno to EINTR. If the expression evaluates to -1 but errno is
+// something other than EINTR, DeathTestAbort is called.
+# define GTEST_DEATH_TEST_CHECK_SYSCALL_(expression) \
+ do { \
+ int gtest_retval; \
+ do { \
+ gtest_retval = (expression); \
+ } while (gtest_retval == -1 && errno == EINTR); \
+ if (gtest_retval == -1) { \
+ DeathTestAbort( \
+ ::std::string("CHECK failed: File ") + __FILE__ + ", line " \
+ + ::testing::internal::StreamableToString(__LINE__) + ": " \
+ + #expression + " != -1"); \
+ } \
+ } while (::testing::internal::AlwaysFalse())
+
+// Returns the message describing the last system error in errno.
+std::string GetLastErrnoDescription() {
+ return errno == 0 ? "" : posix::StrError(errno);
+}
+
+// This is called from a death test parent process to read a failure
+// message from the death test child process and log it with the FATAL
+// severity. On Windows, the message is read from a pipe handle. On other
+// platforms, it is read from a file descriptor.
+static void FailFromInternalError(int fd) {
+ Message error;
+ char buffer[256];
+ int num_read;
+
+ do {
+ while ((num_read = posix::Read(fd, buffer, 255)) > 0) {
+ buffer[num_read] = '\0';
+ error << buffer;
+ }
+ } while (num_read == -1 && errno == EINTR);
+
+ if (num_read == 0) {
+ GTEST_LOG_(FATAL) << error.GetString();
+ } else {
+ const int last_error = errno;
+ GTEST_LOG_(FATAL) << "Error while reading death test internal: "
+ << GetLastErrnoDescription() << " [" << last_error << "]";
+ }
+}
+
+// Death test constructor. Increments the running death test count
+// for the current test.
+DeathTest::DeathTest() {
+ TestInfo* const info = GetUnitTestImpl()->current_test_info();
+ if (info == NULL) {
+ DeathTestAbort("Cannot run a death test outside of a TEST or "
+ "TEST_F construct");
+ }
+}
+
+// Creates and returns a death test by dispatching to the current
+// death test factory.
+bool DeathTest::Create(const char* statement, const RE* regex,
+ const char* file, int line, DeathTest** test) {
+ return GetUnitTestImpl()->death_test_factory()->Create(
+ statement, regex, file, line, test);
+}
+
+const char* DeathTest::LastMessage() {
+ return last_death_test_message_.c_str();
+}
+
+void DeathTest::set_last_death_test_message(const std::string& message) {
+ last_death_test_message_ = message;
+}
+
+std::string DeathTest::last_death_test_message_;
+
+// Provides cross platform implementation for some death functionality.
+class DeathTestImpl : public DeathTest {
+ protected:
+ DeathTestImpl(const char* a_statement, const RE* a_regex)
+ : statement_(a_statement),
+ regex_(a_regex),
+ spawned_(false),
+ status_(-1),
+ outcome_(IN_PROGRESS),
+ read_fd_(-1),
+ write_fd_(-1) {}
+
+ // read_fd_ is expected to be closed and cleared by a derived class.
+ ~DeathTestImpl() { GTEST_DEATH_TEST_CHECK_(read_fd_ == -1); }
+
+ void Abort(AbortReason reason);
+ virtual bool Passed(bool status_ok);
+
+ const char* statement() const { return statement_; }
+ const RE* regex() const { return regex_; }
+ bool spawned() const { return spawned_; }
+ void set_spawned(bool is_spawned) { spawned_ = is_spawned; }
+ int status() const { return status_; }
+ void set_status(int a_status) { status_ = a_status; }
+ DeathTestOutcome outcome() const { return outcome_; }
+ void set_outcome(DeathTestOutcome an_outcome) { outcome_ = an_outcome; }
+ int read_fd() const { return read_fd_; }
+ void set_read_fd(int fd) { read_fd_ = fd; }
+ int write_fd() const { return write_fd_; }
+ void set_write_fd(int fd) { write_fd_ = fd; }
+
+ // Called in the parent process only. Reads the result code of the death
+ // test child process via a pipe, interprets it to set the outcome_
+ // member, and closes read_fd_. Outputs diagnostics and terminates in
+ // case of unexpected codes.
+ void ReadAndInterpretStatusByte();
+
+ private:
+ // The textual content of the code this object is testing. This class
+ // doesn't own this string and should not attempt to delete it.
+ const char* const statement_;
+ // The regular expression which test output must match. DeathTestImpl
+ // doesn't own this object and should not attempt to delete it.
+ const RE* const regex_;
+ // True if the death test child process has been successfully spawned.
+ bool spawned_;
+ // The exit status of the child process.
+ int status_;
+ // How the death test concluded.
+ DeathTestOutcome outcome_;
+ // Descriptor to the read end of the pipe to the child process. It is
+ // always -1 in the child process. The child keeps its write end of the
+ // pipe in write_fd_.
+ int read_fd_;
+ // Descriptor to the child's write end of the pipe to the parent process.
+ // It is always -1 in the parent process. The parent keeps its end of the
+ // pipe in read_fd_.
+ int write_fd_;
+};
+
+// Called in the parent process only. Reads the result code of the death
+// test child process via a pipe, interprets it to set the outcome_
+// member, and closes read_fd_. Outputs diagnostics and terminates in
+// case of unexpected codes.
+void DeathTestImpl::ReadAndInterpretStatusByte() {
+ char flag;
+ int bytes_read;
+
+ // The read() here blocks until data is available (signifying the
+ // failure of the death test) or until the pipe is closed (signifying
+ // its success), so it's okay to call this in the parent before
+ // the child process has exited.
+ do {
+ bytes_read = posix::Read(read_fd(), &flag, 1);
+ } while (bytes_read == -1 && errno == EINTR);
+
+ if (bytes_read == 0) {
+ set_outcome(DIED);
+ } else if (bytes_read == 1) {
+ switch (flag) {
+ case kDeathTestReturned:
+ set_outcome(RETURNED);
+ break;
+ case kDeathTestThrew:
+ set_outcome(THREW);
+ break;
+ case kDeathTestLived:
+ set_outcome(LIVED);
+ break;
+ case kDeathTestInternalError:
+ FailFromInternalError(read_fd()); // Does not return.
+ break;
+ default:
+ GTEST_LOG_(FATAL) << "Death test child process reported "
+ << "unexpected status byte ("
+ << static_cast<unsigned int>(flag) << ")";
+ }
+ } else {
+ GTEST_LOG_(FATAL) << "Read from death test child process failed: "
+ << GetLastErrnoDescription();
+ }
+ GTEST_DEATH_TEST_CHECK_SYSCALL_(posix::Close(read_fd()));
+ set_read_fd(-1);
+}
+
+// Signals that the death test code which should have exited, didn't.
+// Should be called only in a death test child process.
+// Writes a status byte to the child's status file descriptor, then
+// calls _exit(1).
+void DeathTestImpl::Abort(AbortReason reason) {
+ // The parent process considers the death test to be a failure if
+ // it finds any data in our pipe. So, here we write a single flag byte
+ // to the pipe, then exit.
+ const char status_ch =
+ reason == TEST_DID_NOT_DIE ? kDeathTestLived :
+ reason == TEST_THREW_EXCEPTION ? kDeathTestThrew : kDeathTestReturned;
+
+ GTEST_DEATH_TEST_CHECK_SYSCALL_(posix::Write(write_fd(), &status_ch, 1));
+ // We are leaking the descriptor here because on some platforms (i.e.,
+ // when built as Windows DLL), destructors of global objects will still
+ // run after calling _exit(). On such systems, write_fd_ will be
+ // indirectly closed from the destructor of UnitTestImpl, causing double
+ // close if it is also closed here. On debug configurations, double close
+ // may assert. As there are no in-process buffers to flush here, we are
+ // relying on the OS to close the descriptor after the process terminates
+ // when the destructors are not run.
+ _exit(1); // Exits w/o any normal exit hooks (we were supposed to crash)
+}
+
+// Returns an indented copy of stderr output for a death test.
+// This makes distinguishing death test output lines from regular log lines
+// much easier.
+static ::std::string FormatDeathTestOutput(const ::std::string& output) {
+ ::std::string ret;
+ for (size_t at = 0; ; ) {
+ const size_t line_end = output.find('\n', at);
+ ret += "[ DEATH ] ";
+ if (line_end == ::std::string::npos) {
+ ret += output.substr(at);
+ break;
+ }
+ ret += output.substr(at, line_end + 1 - at);
+ at = line_end + 1;
+ }
+ return ret;
+}
+
+// Assesses the success or failure of a death test, using both private
+// members which have previously been set, and one argument:
+//
+// Private data members:
+// outcome: An enumeration describing how the death test
+// concluded: DIED, LIVED, THREW, or RETURNED. The death test
+// fails in the latter three cases.
+// status: The exit status of the child process. On *nix, it is in the
+// in the format specified by wait(2). On Windows, this is the
+// value supplied to the ExitProcess() API or a numeric code
+// of the exception that terminated the program.
+// regex: A regular expression object to be applied to
+// the test's captured standard error output; the death test
+// fails if it does not match.
+//
+// Argument:
+// status_ok: true if exit_status is acceptable in the context of
+// this particular death test, which fails if it is false
+//
+// Returns true iff all of the above conditions are met. Otherwise, the
+// first failing condition, in the order given above, is the one that is
+// reported. Also sets the last death test message string.
+bool DeathTestImpl::Passed(bool status_ok) {
+ if (!spawned())
+ return false;
+
+ const std::string error_message = GetCapturedStderr();
+
+ bool success = false;
+ Message buffer;
+
+ buffer << "Death test: " << statement() << "\n";
+ switch (outcome()) {
+ case LIVED:
+ buffer << " Result: failed to die.\n"
+ << " Error msg:\n" << FormatDeathTestOutput(error_message);
+ break;
+ case THREW:
+ buffer << " Result: threw an exception.\n"
+ << " Error msg:\n" << FormatDeathTestOutput(error_message);
+ break;
+ case RETURNED:
+ buffer << " Result: illegal return in test statement.\n"
+ << " Error msg:\n" << FormatDeathTestOutput(error_message);
+ break;
+ case DIED:
+ if (status_ok) {
+ const bool matched = RE::PartialMatch(error_message.c_str(), *regex());
+ if (matched) {
+ success = true;
+ } else {
+ buffer << " Result: died but not with expected error.\n"
+ << " Expected: " << regex()->pattern() << "\n"
+ << "Actual msg:\n" << FormatDeathTestOutput(error_message);
+ }
+ } else {
+ buffer << " Result: died but not with expected exit code:\n"
+ << " " << ExitSummary(status()) << "\n"
+ << "Actual msg:\n" << FormatDeathTestOutput(error_message);
+ }
+ break;
+ case IN_PROGRESS:
+ default:
+ GTEST_LOG_(FATAL)
+ << "DeathTest::Passed somehow called before conclusion of test";
+ }
+
+ DeathTest::set_last_death_test_message(buffer.GetString());
+ return success;
+}
+
+# if GTEST_OS_WINDOWS
+// WindowsDeathTest implements death tests on Windows. Due to the
+// specifics of starting new processes on Windows, death tests there are
+// always threadsafe, and Google Test considers the
+// --gtest_death_test_style=fast setting to be equivalent to
+// --gtest_death_test_style=threadsafe there.
+//
+// A few implementation notes: Like the Linux version, the Windows
+// implementation uses pipes for child-to-parent communication. But due to
+// the specifics of pipes on Windows, some extra steps are required:
+//
+// 1. The parent creates a communication pipe and stores handles to both
+// ends of it.
+// 2. The parent starts the child and provides it with the information
+// necessary to acquire the handle to the write end of the pipe.
+// 3. The child acquires the write end of the pipe and signals the parent
+// using a Windows event.
+// 4. Now the parent can release the write end of the pipe on its side. If
+// this is done before step 3, the object's reference count goes down to
+// 0 and it is destroyed, preventing the child from acquiring it. The
+// parent now has to release it, or read operations on the read end of
+// the pipe will not return when the child terminates.
+// 5. The parent reads child's output through the pipe (outcome code and
+// any possible error messages) from the pipe, and its stderr and then
+// determines whether to fail the test.
+//
+// Note: to distinguish Win32 API calls from the local method and function
+// calls, the former are explicitly resolved in the global namespace.
+//
+class WindowsDeathTest : public DeathTestImpl {
+ public:
+ WindowsDeathTest(const char* a_statement,
+ const RE* a_regex,
+ const char* file,
+ int line)
+ : DeathTestImpl(a_statement, a_regex), file_(file), line_(line) {}
+
+ // All of these virtual functions are inherited from DeathTest.
+ virtual int Wait();
+ virtual TestRole AssumeRole();
+
+ private:
+ // The name of the file in which the death test is located.
+ const char* const file_;
+ // The line number on which the death test is located.
+ const int line_;
+ // Handle to the write end of the pipe to the child process.
+ AutoHandle write_handle_;
+ // Child process handle.
+ AutoHandle child_handle_;
+ // Event the child process uses to signal the parent that it has
+ // acquired the handle to the write end of the pipe. After seeing this
+ // event the parent can release its own handles to make sure its
+ // ReadFile() calls return when the child terminates.
+ AutoHandle event_handle_;
+};
+
+// Waits for the child in a death test to exit, returning its exit
+// status, or 0 if no child process exists. As a side effect, sets the
+// outcome data member.
+int WindowsDeathTest::Wait() {
+ if (!spawned())
+ return 0;
+
+ // Wait until the child either signals that it has acquired the write end
+ // of the pipe or it dies.
+ const HANDLE wait_handles[2] = { child_handle_.Get(), event_handle_.Get() };
+ switch (::WaitForMultipleObjects(2,
+ wait_handles,
+ FALSE, // Waits for any of the handles.
+ INFINITE)) {
+ case WAIT_OBJECT_0:
+ case WAIT_OBJECT_0 + 1:
+ break;
+ default:
+ GTEST_DEATH_TEST_CHECK_(false); // Should not get here.
+ }
+
+ // The child has acquired the write end of the pipe or exited.
+ // We release the handle on our side and continue.
+ write_handle_.Reset();
+ event_handle_.Reset();
+
+ ReadAndInterpretStatusByte();
+
+ // Waits for the child process to exit if it haven't already. This
+ // returns immediately if the child has already exited, regardless of
+ // whether previous calls to WaitForMultipleObjects synchronized on this
+ // handle or not.
+ GTEST_DEATH_TEST_CHECK_(
+ WAIT_OBJECT_0 == ::WaitForSingleObject(child_handle_.Get(),
+ INFINITE));
+ DWORD status_code;
+ GTEST_DEATH_TEST_CHECK_(
+ ::GetExitCodeProcess(child_handle_.Get(), &status_code) != FALSE);
+ child_handle_.Reset();
+ set_status(static_cast<int>(status_code));
+ return status();
+}
+
+// The AssumeRole process for a Windows death test. It creates a child
+// process with the same executable as the current process to run the
+// death test. The child process is given the --gtest_filter and
+// --gtest_internal_run_death_test flags such that it knows to run the
+// current death test only.
+DeathTest::TestRole WindowsDeathTest::AssumeRole() {
+ const UnitTestImpl* const impl = GetUnitTestImpl();
+ const InternalRunDeathTestFlag* const flag =
+ impl->internal_run_death_test_flag();
+ const TestInfo* const info = impl->current_test_info();
+ const int death_test_index = info->result()->death_test_count();
+
+ if (flag != NULL) {
+ // ParseInternalRunDeathTestFlag() has performed all the necessary
+ // processing.
+ set_write_fd(flag->write_fd());
+ return EXECUTE_TEST;
+ }
+
+ // WindowsDeathTest uses an anonymous pipe to communicate results of
+ // a death test.
+ SECURITY_ATTRIBUTES handles_are_inheritable = {
+ sizeof(SECURITY_ATTRIBUTES), NULL, TRUE };
+ HANDLE read_handle, write_handle;
+ GTEST_DEATH_TEST_CHECK_(
+ ::CreatePipe(&read_handle, &write_handle, &handles_are_inheritable,
+ 0) // Default buffer size.
+ != FALSE);
+ set_read_fd(::_open_osfhandle(reinterpret_cast<intptr_t>(read_handle),
+ O_RDONLY));
+ write_handle_.Reset(write_handle);
+ event_handle_.Reset(::CreateEvent(
+ &handles_are_inheritable,
+ TRUE, // The event will automatically reset to non-signaled state.
+ FALSE, // The initial state is non-signalled.
+ NULL)); // The even is unnamed.
+ GTEST_DEATH_TEST_CHECK_(event_handle_.Get() != NULL);
+ const std::string filter_flag =
+ std::string("--") + GTEST_FLAG_PREFIX_ + kFilterFlag + "=" +
+ info->test_case_name() + "." + info->name();
+ const std::string internal_flag =
+ std::string("--") + GTEST_FLAG_PREFIX_ + kInternalRunDeathTestFlag +
+ "=" + file_ + "|" + StreamableToString(line_) + "|" +
+ StreamableToString(death_test_index) + "|" +
+ StreamableToString(static_cast<unsigned int>(::GetCurrentProcessId())) +
+ // size_t has the same width as pointers on both 32-bit and 64-bit
+ // Windows platforms.
+ // See http://msdn.microsoft.com/en-us/library/tcxf1dw6.aspx.
+ "|" + StreamableToString(reinterpret_cast<size_t>(write_handle)) +
+ "|" + StreamableToString(reinterpret_cast<size_t>(event_handle_.Get()));
+
+ char executable_path[_MAX_PATH + 1]; // NOLINT
+ GTEST_DEATH_TEST_CHECK_(
+ _MAX_PATH + 1 != ::GetModuleFileNameA(NULL,
+ executable_path,
+ _MAX_PATH));
+
+ std::string command_line =
+ std::string(::GetCommandLineA()) + " " + filter_flag + " \"" +
+ internal_flag + "\"";
+
+ DeathTest::set_last_death_test_message("");
+
+ CaptureStderr();
+ // Flush the log buffers since the log streams are shared with the child.
+ FlushInfoLog();
+
+ // The child process will share the standard handles with the parent.
+ STARTUPINFOA startup_info;
+ memset(&startup_info, 0, sizeof(STARTUPINFO));
+ startup_info.dwFlags = STARTF_USESTDHANDLES;
+ startup_info.hStdInput = ::GetStdHandle(STD_INPUT_HANDLE);
+ startup_info.hStdOutput = ::GetStdHandle(STD_OUTPUT_HANDLE);
+ startup_info.hStdError = ::GetStdHandle(STD_ERROR_HANDLE);
+
+ PROCESS_INFORMATION process_info;
+ GTEST_DEATH_TEST_CHECK_(::CreateProcessA(
+ executable_path,
+ const_cast<char*>(command_line.c_str()),
+ NULL, // Retuned process handle is not inheritable.
+ NULL, // Retuned thread handle is not inheritable.
+ TRUE, // Child inherits all inheritable handles (for write_handle_).
+ 0x0, // Default creation flags.
+ NULL, // Inherit the parent's environment.
+ UnitTest::GetInstance()->original_working_dir(),
+ &startup_info,
+ &process_info) != FALSE);
+ child_handle_.Reset(process_info.hProcess);
+ ::CloseHandle(process_info.hThread);
+ set_spawned(true);
+ return OVERSEE_TEST;
+}
+# else // We are not on Windows.
+
+// ForkingDeathTest provides implementations for most of the abstract
+// methods of the DeathTest interface. Only the AssumeRole method is
+// left undefined.
+class ForkingDeathTest : public DeathTestImpl {
+ public:
+ ForkingDeathTest(const char* statement, const RE* regex);
+
+ // All of these virtual functions are inherited from DeathTest.
+ virtual int Wait();
+
+ protected:
+ void set_child_pid(pid_t child_pid) { child_pid_ = child_pid; }
+
+ private:
+ // PID of child process during death test; 0 in the child process itself.
+ pid_t child_pid_;
+};
+
+// Constructs a ForkingDeathTest.
+ForkingDeathTest::ForkingDeathTest(const char* a_statement, const RE* a_regex)
+ : DeathTestImpl(a_statement, a_regex),
+ child_pid_(-1) {}
+
+// Waits for the child in a death test to exit, returning its exit
+// status, or 0 if no child process exists. As a side effect, sets the
+// outcome data member.
+int ForkingDeathTest::Wait() {
+ if (!spawned())
+ return 0;
+
+ ReadAndInterpretStatusByte();
+
+ int status_value;
+ GTEST_DEATH_TEST_CHECK_SYSCALL_(waitpid(child_pid_, &status_value, 0));
+ set_status(status_value);
+ return status_value;
+}
+
+// A concrete death test class that forks, then immediately runs the test
+// in the child process.
+class NoExecDeathTest : public ForkingDeathTest {
+ public:
+ NoExecDeathTest(const char* a_statement, const RE* a_regex) :
+ ForkingDeathTest(a_statement, a_regex) { }
+ virtual TestRole AssumeRole();
+};
+
+// The AssumeRole process for a fork-and-run death test. It implements a
+// straightforward fork, with a simple pipe to transmit the status byte.
+DeathTest::TestRole NoExecDeathTest::AssumeRole() {
+ const size_t thread_count = GetThreadCount();
+ if (thread_count != 1) {
+ GTEST_LOG_(WARNING) << DeathTestThreadWarning(thread_count);
+ }
+
+ int pipe_fd[2];
+ GTEST_DEATH_TEST_CHECK_(pipe(pipe_fd) != -1);
+
+ DeathTest::set_last_death_test_message("");
+ CaptureStderr();
+ // When we fork the process below, the log file buffers are copied, but the
+ // file descriptors are shared. We flush all log files here so that closing
+ // the file descriptors in the child process doesn't throw off the
+ // synchronization between descriptors and buffers in the parent process.
+ // This is as close to the fork as possible to avoid a race condition in case
+ // there are multiple threads running before the death test, and another
+ // thread writes to the log file.
+ FlushInfoLog();
+
+ const pid_t child_pid = fork();
+ GTEST_DEATH_TEST_CHECK_(child_pid != -1);
+ set_child_pid(child_pid);
+ if (child_pid == 0) {
+ GTEST_DEATH_TEST_CHECK_SYSCALL_(close(pipe_fd[0]));
+ set_write_fd(pipe_fd[1]);
+ // Redirects all logging to stderr in the child process to prevent
+ // concurrent writes to the log files. We capture stderr in the parent
+ // process and append the child process' output to a log.
+ LogToStderr();
+ // Event forwarding to the listeners of event listener API mush be shut
+ // down in death test subprocesses.
+ GetUnitTestImpl()->listeners()->SuppressEventForwarding();
+ g_in_fast_death_test_child = true;
+ return EXECUTE_TEST;
+ } else {
+ GTEST_DEATH_TEST_CHECK_SYSCALL_(close(pipe_fd[1]));
+ set_read_fd(pipe_fd[0]);
+ set_spawned(true);
+ return OVERSEE_TEST;
+ }
+}
+
+// A concrete death test class that forks and re-executes the main
+// program from the beginning, with command-line flags set that cause
+// only this specific death test to be run.
+class ExecDeathTest : public ForkingDeathTest {
+ public:
+ ExecDeathTest(const char* a_statement, const RE* a_regex,
+ const char* file, int line) :
+ ForkingDeathTest(a_statement, a_regex), file_(file), line_(line) { }
+ virtual TestRole AssumeRole();
+ private:
+ static ::std::vector<testing::internal::string>
+ GetArgvsForDeathTestChildProcess() {
+ ::std::vector<testing::internal::string> args = GetInjectableArgvs();
+ return args;
+ }
+ // The name of the file in which the death test is located.
+ const char* const file_;
+ // The line number on which the death test is located.
+ const int line_;
+};
+
+// Utility class for accumulating command-line arguments.
+class Arguments {
+ public:
+ Arguments() {
+ args_.push_back(NULL);
+ }
+
+ ~Arguments() {
+ for (std::vector<char*>::iterator i = args_.begin(); i != args_.end();
+ ++i) {
+ free(*i);
+ }
+ }
+ void AddArgument(const char* argument) {
+ args_.insert(args_.end() - 1, posix::StrDup(argument));
+ }
+
+ template <typename Str>
+ void AddArguments(const ::std::vector<Str>& arguments) {
+ for (typename ::std::vector<Str>::const_iterator i = arguments.begin();
+ i != arguments.end();
+ ++i) {
+ args_.insert(args_.end() - 1, posix::StrDup(i->c_str()));
+ }
+ }
+ char* const* Argv() {
+ return &args_[0];
+ }
+
+ private:
+ std::vector<char*> args_;
+};
+
+// A struct that encompasses the arguments to the child process of a
+// threadsafe-style death test process.
+struct ExecDeathTestArgs {
+ char* const* argv; // Command-line arguments for the child's call to exec
+ int close_fd; // File descriptor to close; the read end of a pipe
+};
+
+# if GTEST_OS_MAC
+inline char** GetEnviron() {
+ // When Google Test is built as a framework on MacOS X, the environ variable
+ // is unavailable. Apple's documentation (man environ) recommends using
+ // _NSGetEnviron() instead.
+ return *_NSGetEnviron();
+}
+# else
+// Some POSIX platforms expect you to declare environ. extern "C" makes
+// it reside in the global namespace.
+extern "C" char** environ;
+inline char** GetEnviron() { return environ; }
+# endif // GTEST_OS_MAC
+
+# if !GTEST_OS_QNX
+// The main function for a threadsafe-style death test child process.
+// This function is called in a clone()-ed process and thus must avoid
+// any potentially unsafe operations like malloc or libc functions.
+static int ExecDeathTestChildMain(void* child_arg) {
+ ExecDeathTestArgs* const args = static_cast<ExecDeathTestArgs*>(child_arg);
+ GTEST_DEATH_TEST_CHECK_SYSCALL_(close(args->close_fd));
+
+ // We need to execute the test program in the same environment where
+ // it was originally invoked. Therefore we change to the original
+ // working directory first.
+ const char* const original_dir =
+ UnitTest::GetInstance()->original_working_dir();
+ // We can safely call chdir() as it's a direct system call.
+ if (chdir(original_dir) != 0) {
+ DeathTestAbort(std::string("chdir(\"") + original_dir + "\") failed: " +
+ GetLastErrnoDescription());
+ return EXIT_FAILURE;
+ }
+
+ // We can safely call execve() as it's a direct system call. We
+ // cannot use execvp() as it's a libc function and thus potentially
+ // unsafe. Since execve() doesn't search the PATH, the user must
+ // invoke the test program via a valid path that contains at least
+ // one path separator.
+ execve(args->argv[0], args->argv, GetEnviron());
+ DeathTestAbort(std::string("execve(") + args->argv[0] + ", ...) in " +
+ original_dir + " failed: " +
+ GetLastErrnoDescription());
+ return EXIT_FAILURE;
+}
+# endif // !GTEST_OS_QNX
+
+// Two utility routines that together determine the direction the stack
+// grows.
+// This could be accomplished more elegantly by a single recursive
+// function, but we want to guard against the unlikely possibility of
+// a smart compiler optimizing the recursion away.
+//
+// GTEST_NO_INLINE_ is required to prevent GCC 4.6 from inlining
+// StackLowerThanAddress into StackGrowsDown, which then doesn't give
+// correct answer.
+void StackLowerThanAddress(const void* ptr, bool* result) GTEST_NO_INLINE_;
+void StackLowerThanAddress(const void* ptr, bool* result) {
+ int dummy;
+ *result = (&dummy < ptr);
+}
+
+bool StackGrowsDown() {
+ int dummy;
+ bool result;
+ StackLowerThanAddress(&dummy, &result);
+ return result;
+}
+
+// Spawns a child process with the same executable as the current process in
+// a thread-safe manner and instructs it to run the death test. The
+// implementation uses fork(2) + exec. On systems where clone(2) is
+// available, it is used instead, being slightly more thread-safe. On QNX,
+// fork supports only single-threaded environments, so this function uses
+// spawn(2) there instead. The function dies with an error message if
+// anything goes wrong.
+static pid_t ExecDeathTestSpawnChild(char* const* argv, int close_fd) {
+ ExecDeathTestArgs args = { argv, close_fd };
+ pid_t child_pid = -1;
+
+# if GTEST_OS_QNX
+ // Obtains the current directory and sets it to be closed in the child
+ // process.
+ const int cwd_fd = open(".", O_RDONLY);
+ GTEST_DEATH_TEST_CHECK_(cwd_fd != -1);
+ GTEST_DEATH_TEST_CHECK_SYSCALL_(fcntl(cwd_fd, F_SETFD, FD_CLOEXEC));
+ // We need to execute the test program in the same environment where
+ // it was originally invoked. Therefore we change to the original
+ // working directory first.
+ const char* const original_dir =
+ UnitTest::GetInstance()->original_working_dir();
+ // We can safely call chdir() as it's a direct system call.
+ if (chdir(original_dir) != 0) {
+ DeathTestAbort(std::string("chdir(\"") + original_dir + "\") failed: " +
+ GetLastErrnoDescription());
+ return EXIT_FAILURE;
+ }
+
+ int fd_flags;
+ // Set close_fd to be closed after spawn.
+ GTEST_DEATH_TEST_CHECK_SYSCALL_(fd_flags = fcntl(close_fd, F_GETFD));
+ GTEST_DEATH_TEST_CHECK_SYSCALL_(fcntl(close_fd, F_SETFD,
+ fd_flags | FD_CLOEXEC));
+ struct inheritance inherit = {0};
+ // spawn is a system call.
+ child_pid = spawn(args.argv[0], 0, NULL, &inherit, args.argv, GetEnviron());
+ // Restores the current working directory.
+ GTEST_DEATH_TEST_CHECK_(fchdir(cwd_fd) != -1);
+ GTEST_DEATH_TEST_CHECK_SYSCALL_(close(cwd_fd));
+
+# else // GTEST_OS_QNX
+# if GTEST_OS_LINUX
+ // When a SIGPROF signal is received while fork() or clone() are executing,
+ // the process may hang. To avoid this, we ignore SIGPROF here and re-enable
+ // it after the call to fork()/clone() is complete.
+ struct sigaction saved_sigprof_action;
+ struct sigaction ignore_sigprof_action;
+ memset(&ignore_sigprof_action, 0, sizeof(ignore_sigprof_action));
+ sigemptyset(&ignore_sigprof_action.sa_mask);
+ ignore_sigprof_action.sa_handler = SIG_IGN;
+ GTEST_DEATH_TEST_CHECK_SYSCALL_(sigaction(
+ SIGPROF, &ignore_sigprof_action, &saved_sigprof_action));
+# endif // GTEST_OS_LINUX
+
+# if GTEST_HAS_CLONE
+ const bool use_fork = GTEST_FLAG(death_test_use_fork);
+
+ if (!use_fork) {
+ static const bool stack_grows_down = StackGrowsDown();
+ const size_t stack_size = getpagesize();
+ // MMAP_ANONYMOUS is not defined on Mac, so we use MAP_ANON instead.
+ void* const stack = mmap(NULL, stack_size, PROT_READ | PROT_WRITE,
+ MAP_ANON | MAP_PRIVATE, -1, 0);
+ GTEST_DEATH_TEST_CHECK_(stack != MAP_FAILED);
+
+ // Maximum stack alignment in bytes: For a downward-growing stack, this
+ // amount is subtracted from size of the stack space to get an address
+ // that is within the stack space and is aligned on all systems we care
+ // about. As far as I know there is no ABI with stack alignment greater
+ // than 64. We assume stack and stack_size already have alignment of
+ // kMaxStackAlignment.
+ const size_t kMaxStackAlignment = 64;
+ void* const stack_top =
+ static_cast<char*>(stack) +
+ (stack_grows_down ? stack_size - kMaxStackAlignment : 0);
+ GTEST_DEATH_TEST_CHECK_(stack_size > kMaxStackAlignment &&
+ reinterpret_cast<intptr_t>(stack_top) % kMaxStackAlignment == 0);
+
+ child_pid = clone(&ExecDeathTestChildMain, stack_top, SIGCHLD, &args);
+
+ GTEST_DEATH_TEST_CHECK_(munmap(stack, stack_size) != -1);
+ }
+# else
+ const bool use_fork = true;
+# endif // GTEST_HAS_CLONE
+
+ if (use_fork && (child_pid = fork()) == 0) {
+ ExecDeathTestChildMain(&args);
+ _exit(0);
+ }
+# endif // GTEST_OS_QNX
+# if GTEST_OS_LINUX
+ GTEST_DEATH_TEST_CHECK_SYSCALL_(
+ sigaction(SIGPROF, &saved_sigprof_action, NULL));
+# endif // GTEST_OS_LINUX
+
+ GTEST_DEATH_TEST_CHECK_(child_pid != -1);
+ return child_pid;
+}
+
+// The AssumeRole process for a fork-and-exec death test. It re-executes the
+// main program from the beginning, setting the --gtest_filter
+// and --gtest_internal_run_death_test flags to cause only the current
+// death test to be re-run.
+DeathTest::TestRole ExecDeathTest::AssumeRole() {
+ const UnitTestImpl* const impl = GetUnitTestImpl();
+ const InternalRunDeathTestFlag* const flag =
+ impl->internal_run_death_test_flag();
+ const TestInfo* const info = impl->current_test_info();
+ const int death_test_index = info->result()->death_test_count();
+
+ if (flag != NULL) {
+ set_write_fd(flag->write_fd());
+ return EXECUTE_TEST;
+ }
+
+ int pipe_fd[2];
+ GTEST_DEATH_TEST_CHECK_(pipe(pipe_fd) != -1);
+ // Clear the close-on-exec flag on the write end of the pipe, lest
+ // it be closed when the child process does an exec:
+ GTEST_DEATH_TEST_CHECK_(fcntl(pipe_fd[1], F_SETFD, 0) != -1);
+
+ const std::string filter_flag =
+ std::string("--") + GTEST_FLAG_PREFIX_ + kFilterFlag + "="
+ + info->test_case_name() + "." + info->name();
+ const std::string internal_flag =
+ std::string("--") + GTEST_FLAG_PREFIX_ + kInternalRunDeathTestFlag + "="
+ + file_ + "|" + StreamableToString(line_) + "|"
+ + StreamableToString(death_test_index) + "|"
+ + StreamableToString(pipe_fd[1]);
+ Arguments args;
+ args.AddArguments(GetArgvsForDeathTestChildProcess());
+ args.AddArgument(filter_flag.c_str());
+ args.AddArgument(internal_flag.c_str());
+
+ DeathTest::set_last_death_test_message("");
+
+ CaptureStderr();
+ // See the comment in NoExecDeathTest::AssumeRole for why the next line
+ // is necessary.
+ FlushInfoLog();
+
+ const pid_t child_pid = ExecDeathTestSpawnChild(args.Argv(), pipe_fd[0]);
+ GTEST_DEATH_TEST_CHECK_SYSCALL_(close(pipe_fd[1]));
+ set_child_pid(child_pid);
+ set_read_fd(pipe_fd[0]);
+ set_spawned(true);
+ return OVERSEE_TEST;
+}
+
+# endif // !GTEST_OS_WINDOWS
+
+// Creates a concrete DeathTest-derived class that depends on the
+// --gtest_death_test_style flag, and sets the pointer pointed to
+// by the "test" argument to its address. If the test should be
+// skipped, sets that pointer to NULL. Returns true, unless the
+// flag is set to an invalid value.
+bool DefaultDeathTestFactory::Create(const char* statement, const RE* regex,
+ const char* file, int line,
+ DeathTest** test) {
+ UnitTestImpl* const impl = GetUnitTestImpl();
+ const InternalRunDeathTestFlag* const flag =
+ impl->internal_run_death_test_flag();
+ const int death_test_index = impl->current_test_info()
+ ->increment_death_test_count();
+
+ if (flag != NULL) {
+ if (death_test_index > flag->index()) {
+ DeathTest::set_last_death_test_message(
+ "Death test count (" + StreamableToString(death_test_index)
+ + ") somehow exceeded expected maximum ("
+ + StreamableToString(flag->index()) + ")");
+ return false;
+ }
+
+ if (!(flag->file() == file && flag->line() == line &&
+ flag->index() == death_test_index)) {
+ *test = NULL;
+ return true;
+ }
+ }
+
+# if GTEST_OS_WINDOWS
+
+ if (GTEST_FLAG(death_test_style) == "threadsafe" ||
+ GTEST_FLAG(death_test_style) == "fast") {
+ *test = new WindowsDeathTest(statement, regex, file, line);
+ }
+
+# else
+
+ if (GTEST_FLAG(death_test_style) == "threadsafe") {
+ *test = new ExecDeathTest(statement, regex, file, line);
+ } else if (GTEST_FLAG(death_test_style) == "fast") {
+ *test = new NoExecDeathTest(statement, regex);
+ }
+
+# endif // GTEST_OS_WINDOWS
+
+ else { // NOLINT - this is more readable than unbalanced brackets inside #if.
+ DeathTest::set_last_death_test_message(
+ "Unknown death test style \"" + GTEST_FLAG(death_test_style)
+ + "\" encountered");
+ return false;
+ }
+
+ return true;
+}
+
+// Splits a given string on a given delimiter, populating a given
+// vector with the fields. GTEST_HAS_DEATH_TEST implies that we have
+// ::std::string, so we can use it here.
+static void SplitString(const ::std::string& str, char delimiter,
+ ::std::vector< ::std::string>* dest) {
+ ::std::vector< ::std::string> parsed;
+ ::std::string::size_type pos = 0;
+ while (::testing::internal::AlwaysTrue()) {
+ const ::std::string::size_type colon = str.find(delimiter, pos);
+ if (colon == ::std::string::npos) {
+ parsed.push_back(str.substr(pos));
+ break;
+ } else {
+ parsed.push_back(str.substr(pos, colon - pos));
+ pos = colon + 1;
+ }
+ }
+ dest->swap(parsed);
+}
+
+# if GTEST_OS_WINDOWS
+// Recreates the pipe and event handles from the provided parameters,
+// signals the event, and returns a file descriptor wrapped around the pipe
+// handle. This function is called in the child process only.
+int GetStatusFileDescriptor(unsigned int parent_process_id,
+ size_t write_handle_as_size_t,
+ size_t event_handle_as_size_t) {
+ AutoHandle parent_process_handle(::OpenProcess(PROCESS_DUP_HANDLE,
+ FALSE, // Non-inheritable.
+ parent_process_id));
+ if (parent_process_handle.Get() == INVALID_HANDLE_VALUE) {
+ DeathTestAbort("Unable to open parent process " +
+ StreamableToString(parent_process_id));
+ }
+
+ // TODO(vladl@google.com): Replace the following check with a
+ // compile-time assertion when available.
+ GTEST_CHECK_(sizeof(HANDLE) <= sizeof(size_t));
+
+ const HANDLE write_handle =
+ reinterpret_cast<HANDLE>(write_handle_as_size_t);
+ HANDLE dup_write_handle;
+
+ // The newly initialized handle is accessible only in in the parent
+ // process. To obtain one accessible within the child, we need to use
+ // DuplicateHandle.
+ if (!::DuplicateHandle(parent_process_handle.Get(), write_handle,
+ ::GetCurrentProcess(), &dup_write_handle,
+ 0x0, // Requested privileges ignored since
+ // DUPLICATE_SAME_ACCESS is used.
+ FALSE, // Request non-inheritable handler.
+ DUPLICATE_SAME_ACCESS)) {
+ DeathTestAbort("Unable to duplicate the pipe handle " +
+ StreamableToString(write_handle_as_size_t) +
+ " from the parent process " +
+ StreamableToString(parent_process_id));
+ }
+
+ const HANDLE event_handle = reinterpret_cast<HANDLE>(event_handle_as_size_t);
+ HANDLE dup_event_handle;
+
+ if (!::DuplicateHandle(parent_process_handle.Get(), event_handle,
+ ::GetCurrentProcess(), &dup_event_handle,
+ 0x0,
+ FALSE,
+ DUPLICATE_SAME_ACCESS)) {
+ DeathTestAbort("Unable to duplicate the event handle " +
+ StreamableToString(event_handle_as_size_t) +
+ " from the parent process " +
+ StreamableToString(parent_process_id));
+ }
+
+ const int write_fd =
+ ::_open_osfhandle(reinterpret_cast<intptr_t>(dup_write_handle), O_APPEND);
+ if (write_fd == -1) {
+ DeathTestAbort("Unable to convert pipe handle " +
+ StreamableToString(write_handle_as_size_t) +
+ " to a file descriptor");
+ }
+
+ // Signals the parent that the write end of the pipe has been acquired
+ // so the parent can release its own write end.
+ ::SetEvent(dup_event_handle);
+
+ return write_fd;
+}
+# endif // GTEST_OS_WINDOWS
+
+// Returns a newly created InternalRunDeathTestFlag object with fields
+// initialized from the GTEST_FLAG(internal_run_death_test) flag if
+// the flag is specified; otherwise returns NULL.
+InternalRunDeathTestFlag* ParseInternalRunDeathTestFlag() {
+ if (GTEST_FLAG(internal_run_death_test) == "") return NULL;
+
+ // GTEST_HAS_DEATH_TEST implies that we have ::std::string, so we
+ // can use it here.
+ int line = -1;
+ int index = -1;
+ ::std::vector< ::std::string> fields;
+ SplitString(GTEST_FLAG(internal_run_death_test).c_str(), '|', &fields);
+ int write_fd = -1;
+
+# if GTEST_OS_WINDOWS
+
+ unsigned int parent_process_id = 0;
+ size_t write_handle_as_size_t = 0;
+ size_t event_handle_as_size_t = 0;
+
+ if (fields.size() != 6
+ || !ParseNaturalNumber(fields[1], &line)
+ || !ParseNaturalNumber(fields[2], &index)
+ || !ParseNaturalNumber(fields[3], &parent_process_id)
+ || !ParseNaturalNumber(fields[4], &write_handle_as_size_t)
+ || !ParseNaturalNumber(fields[5], &event_handle_as_size_t)) {
+ DeathTestAbort("Bad --gtest_internal_run_death_test flag: " +
+ GTEST_FLAG(internal_run_death_test));
+ }
+ write_fd = GetStatusFileDescriptor(parent_process_id,
+ write_handle_as_size_t,
+ event_handle_as_size_t);
+# else
+
+ if (fields.size() != 4
+ || !ParseNaturalNumber(fields[1], &line)
+ || !ParseNaturalNumber(fields[2], &index)
+ || !ParseNaturalNumber(fields[3], &write_fd)) {
+ DeathTestAbort("Bad --gtest_internal_run_death_test flag: "
+ + GTEST_FLAG(internal_run_death_test));
+ }
+
+# endif // GTEST_OS_WINDOWS
+
+ return new InternalRunDeathTestFlag(fields[0], line, index, write_fd);
+}
+
+} // namespace internal
+
+#endif // GTEST_HAS_DEATH_TEST
+
+} // namespace testing
+// Copyright 2008, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Authors: keith.ray@gmail.com (Keith Ray)
+
+
+#include <stdlib.h>
+
+#if GTEST_OS_WINDOWS_MOBILE
+# include <windows.h>
+#elif GTEST_OS_WINDOWS
+# include <direct.h>
+# include <io.h>
+#elif GTEST_OS_SYMBIAN
+// Symbian OpenC has PATH_MAX in sys/syslimits.h
+# include <sys/syslimits.h>
+#else
+# include <limits.h>
+# include <climits> // Some Linux distributions define PATH_MAX here.
+#endif // GTEST_OS_WINDOWS_MOBILE
+
+#if GTEST_OS_WINDOWS
+# define GTEST_PATH_MAX_ _MAX_PATH
+#elif defined(PATH_MAX)
+# define GTEST_PATH_MAX_ PATH_MAX
+#elif defined(_XOPEN_PATH_MAX)
+# define GTEST_PATH_MAX_ _XOPEN_PATH_MAX
+#else
+# define GTEST_PATH_MAX_ _POSIX_PATH_MAX
+#endif // GTEST_OS_WINDOWS
+
+
+namespace testing {
+namespace internal {
+
+#if GTEST_OS_WINDOWS
+// On Windows, '\\' is the standard path separator, but many tools and the
+// Windows API also accept '/' as an alternate path separator. Unless otherwise
+// noted, a file path can contain either kind of path separators, or a mixture
+// of them.
+const char kPathSeparator = '\\';
+const char kAlternatePathSeparator = '/';
+//const char kPathSeparatorString[] = "\\";
+const char kAlternatePathSeparatorString[] = "/";
+# if GTEST_OS_WINDOWS_MOBILE
+// Windows CE doesn't have a current directory. You should not use
+// the current directory in tests on Windows CE, but this at least
+// provides a reasonable fallback.
+const char kCurrentDirectoryString[] = "\\";
+// Windows CE doesn't define INVALID_FILE_ATTRIBUTES
+const DWORD kInvalidFileAttributes = 0xffffffff;
+# else
+const char kCurrentDirectoryString[] = ".\\";
+# endif // GTEST_OS_WINDOWS_MOBILE
+#else
+const char kPathSeparator = '/';
+//const char kPathSeparatorString[] = "/";
+const char kCurrentDirectoryString[] = "./";
+#endif // GTEST_OS_WINDOWS
+
+// Returns whether the given character is a valid path separator.
+static bool IsPathSeparator(char c) {
+#if GTEST_HAS_ALT_PATH_SEP_
+ return (c == kPathSeparator) || (c == kAlternatePathSeparator);
+#else
+ return c == kPathSeparator;
+#endif
+}
+
+// Returns the current working directory, or "" if unsuccessful.
+FilePath FilePath::GetCurrentDir() {
+#if GTEST_OS_WINDOWS_MOBILE
+ // Windows CE doesn't have a current directory, so we just return
+ // something reasonable.
+ return FilePath(kCurrentDirectoryString);
+#elif GTEST_OS_WINDOWS
+ char cwd[GTEST_PATH_MAX_ + 1] = { '\0' };
+ return FilePath(_getcwd(cwd, sizeof(cwd)) == NULL ? "" : cwd);
+#else
+ char cwd[GTEST_PATH_MAX_ + 1] = { '\0' };
+ return FilePath(getcwd(cwd, sizeof(cwd)) == NULL ? "" : cwd);
+#endif // GTEST_OS_WINDOWS_MOBILE
+}
+
+// Returns a copy of the FilePath with the case-insensitive extension removed.
+// Example: FilePath("dir/file.exe").RemoveExtension("EXE") returns
+// FilePath("dir/file"). If a case-insensitive extension is not
+// found, returns a copy of the original FilePath.
+FilePath FilePath::RemoveExtension(const char* extension) const {
+ const std::string dot_extension = std::string(".") + extension;
+ if (String::EndsWithCaseInsensitive(pathname_, dot_extension)) {
+ return FilePath(pathname_.substr(
+ 0, pathname_.length() - dot_extension.length()));
+ }
+ return *this;
+}
+
+// Returns a pointer to the last occurence of a valid path separator in
+// the FilePath. On Windows, for example, both '/' and '\' are valid path
+// separators. Returns NULL if no path separator was found.
+const char* FilePath::FindLastPathSeparator() const {
+ const char* const last_sep = strrchr(c_str(), kPathSeparator);
+#if GTEST_HAS_ALT_PATH_SEP_
+ const char* const last_alt_sep = strrchr(c_str(), kAlternatePathSeparator);
+ // Comparing two pointers of which only one is NULL is undefined.
+ if (last_alt_sep != NULL &&
+ (last_sep == NULL || last_alt_sep > last_sep)) {
+ return last_alt_sep;
+ }
+#endif
+ return last_sep;
+}
+
+// Returns a copy of the FilePath with the directory part removed.
+// Example: FilePath("path/to/file").RemoveDirectoryName() returns
+// FilePath("file"). If there is no directory part ("just_a_file"), it returns
+// the FilePath unmodified. If there is no file part ("just_a_dir/") it
+// returns an empty FilePath ("").
+// On Windows platform, '\' is the path separator, otherwise it is '/'.
+FilePath FilePath::RemoveDirectoryName() const {
+ const char* const last_sep = FindLastPathSeparator();
+ return last_sep ? FilePath(last_sep + 1) : *this;
+}
+
+// RemoveFileName returns the directory path with the filename removed.
+// Example: FilePath("path/to/file").RemoveFileName() returns "path/to/".
+// If the FilePath is "a_file" or "/a_file", RemoveFileName returns
+// FilePath("./") or, on Windows, FilePath(".\\"). If the filepath does
+// not have a file, like "just/a/dir/", it returns the FilePath unmodified.
+// On Windows platform, '\' is the path separator, otherwise it is '/'.
+FilePath FilePath::RemoveFileName() const {
+ const char* const last_sep = FindLastPathSeparator();
+ std::string dir;
+ if (last_sep) {
+ dir = std::string(c_str(), last_sep + 1 - c_str());
+ } else {
+ dir = kCurrentDirectoryString;
+ }
+ return FilePath(dir);
+}
+
+// Helper functions for naming files in a directory for xml output.
+
+// Given directory = "dir", base_name = "test", number = 0,
+// extension = "xml", returns "dir/test.xml". If number is greater
+// than zero (e.g., 12), returns "dir/test_12.xml".
+// On Windows platform, uses \ as the separator rather than /.
+FilePath FilePath::MakeFileName(const FilePath& directory,
+ const FilePath& base_name,
+ int number,
+ const char* extension) {
+ std::string file;
+ if (number == 0) {
+ file = base_name.string() + "." + extension;
+ } else {
+ file = base_name.string() + "_" + StreamableToString(number)
+ + "." + extension;
+ }
+ return ConcatPaths(directory, FilePath(file));
+}
+
+// Given directory = "dir", relative_path = "test.xml", returns "dir/test.xml".
+// On Windows, uses \ as the separator rather than /.
+FilePath FilePath::ConcatPaths(const FilePath& directory,
+ const FilePath& relative_path) {
+ if (directory.IsEmpty())
+ return relative_path;
+ const FilePath dir(directory.RemoveTrailingPathSeparator());
+ return FilePath(dir.string() + kPathSeparator + relative_path.string());
+}
+
+// Returns true if pathname describes something findable in the file-system,
+// either a file, directory, or whatever.
+bool FilePath::FileOrDirectoryExists() const {
+#if GTEST_OS_WINDOWS_MOBILE
+ LPCWSTR unicode = String::AnsiToUtf16(pathname_.c_str());
+ const DWORD attributes = GetFileAttributes(unicode);
+ delete [] unicode;
+ return attributes != kInvalidFileAttributes;
+#else
+ posix::StatStruct file_stat;
+ return posix::Stat(pathname_.c_str(), &file_stat) == 0;
+#endif // GTEST_OS_WINDOWS_MOBILE
+}
+
+// Returns true if pathname describes a directory in the file-system
+// that exists.
+bool FilePath::DirectoryExists() const {
+ bool result = false;
+#if GTEST_OS_WINDOWS
+ // Don't strip off trailing separator if path is a root directory on
+ // Windows (like "C:\\").
+ const FilePath& path(IsRootDirectory() ? *this :
+ RemoveTrailingPathSeparator());
+#else
+ const FilePath& path(*this);
+#endif
+
+#if GTEST_OS_WINDOWS_MOBILE
+ LPCWSTR unicode = String::AnsiToUtf16(path.c_str());
+ const DWORD attributes = GetFileAttributes(unicode);
+ delete [] unicode;
+ if ((attributes != kInvalidFileAttributes) &&
+ (attributes & FILE_ATTRIBUTE_DIRECTORY)) {
+ result = true;
+ }
+#else
+ posix::StatStruct file_stat;
+ result = posix::Stat(path.c_str(), &file_stat) == 0 &&
+ posix::IsDir(file_stat);
+#endif // GTEST_OS_WINDOWS_MOBILE
+
+ return result;
+}
+
+// Returns true if pathname describes a root directory. (Windows has one
+// root directory per disk drive.)
+bool FilePath::IsRootDirectory() const {
+#if GTEST_OS_WINDOWS
+ // TODO(wan@google.com): on Windows a network share like
+ // \\server\share can be a root directory, although it cannot be the
+ // current directory. Handle this properly.
+ return pathname_.length() == 3 && IsAbsolutePath();
+#else
+ return pathname_.length() == 1 && IsPathSeparator(pathname_.c_str()[0]);
+#endif
+}
+
+// Returns true if pathname describes an absolute path.
+bool FilePath::IsAbsolutePath() const {
+ const char* const name = pathname_.c_str();
+#if GTEST_OS_WINDOWS
+ return pathname_.length() >= 3 &&
+ ((name[0] >= 'a' && name[0] <= 'z') ||
+ (name[0] >= 'A' && name[0] <= 'Z')) &&
+ name[1] == ':' &&
+ IsPathSeparator(name[2]);
+#else
+ return IsPathSeparator(name[0]);
+#endif
+}
+
+// Returns a pathname for a file that does not currently exist. The pathname
+// will be directory/base_name.extension or
+// directory/base_name_<number>.extension if directory/base_name.extension
+// already exists. The number will be incremented until a pathname is found
+// that does not already exist.
+// Examples: 'dir/foo_test.xml' or 'dir/foo_test_1.xml'.
+// There could be a race condition if two or more processes are calling this
+// function at the same time -- they could both pick the same filename.
+FilePath FilePath::GenerateUniqueFileName(const FilePath& directory,
+ const FilePath& base_name,
+ const char* extension) {
+ FilePath full_pathname;
+ int number = 0;
+ do {
+ full_pathname.Set(MakeFileName(directory, base_name, number++, extension));
+ } while (full_pathname.FileOrDirectoryExists());
+ return full_pathname;
+}
+
+// Returns true if FilePath ends with a path separator, which indicates that
+// it is intended to represent a directory. Returns false otherwise.
+// This does NOT check that a directory (or file) actually exists.
+bool FilePath::IsDirectory() const {
+ return !pathname_.empty() &&
+ IsPathSeparator(pathname_.c_str()[pathname_.length() - 1]);
+}
+
+// Create directories so that path exists. Returns true if successful or if
+// the directories already exist; returns false if unable to create directories
+// for any reason.
+bool FilePath::CreateDirectoriesRecursively() const {
+ if (!this->IsDirectory()) {
+ return false;
+ }
+
+ if (pathname_.length() == 0 || this->DirectoryExists()) {
+ return true;
+ }
+
+ const FilePath parent(this->RemoveTrailingPathSeparator().RemoveFileName());
+ return parent.CreateDirectoriesRecursively() && this->CreateFolder();
+}
+
+// Create the directory so that path exists. Returns true if successful or
+// if the directory already exists; returns false if unable to create the
+// directory for any reason, including if the parent directory does not
+// exist. Not named "CreateDirectory" because that's a macro on Windows.
+bool FilePath::CreateFolder() const {
+#if GTEST_OS_WINDOWS_MOBILE
+ FilePath removed_sep(this->RemoveTrailingPathSeparator());
+ LPCWSTR unicode = String::AnsiToUtf16(removed_sep.c_str());
+ int result = CreateDirectory(unicode, NULL) ? 0 : -1;
+ delete [] unicode;
+#elif GTEST_OS_WINDOWS
+ int result = _mkdir(pathname_.c_str());
+#else
+ int result = mkdir(pathname_.c_str(), 0777);
+#endif // GTEST_OS_WINDOWS_MOBILE
+
+ if (result == -1) {
+ return this->DirectoryExists(); // An error is OK if the directory exists.
+ }
+ return true; // No error.
+}
+
+// If input name has a trailing separator character, remove it and return the
+// name, otherwise return the name string unmodified.
+// On Windows platform, uses \ as the separator, other platforms use /.
+FilePath FilePath::RemoveTrailingPathSeparator() const {
+ return IsDirectory()
+ ? FilePath(pathname_.substr(0, pathname_.length() - 1))
+ : *this;
+}
+
+// Removes any redundant separators that might be in the pathname.
+// For example, "bar///foo" becomes "bar/foo". Does not eliminate other
+// redundancies that might be in a pathname involving "." or "..".
+// TODO(wan@google.com): handle Windows network shares (e.g. \\server\share).
+void FilePath::Normalize() {
+ if (pathname_.c_str() == NULL) {
+ pathname_ = "";
+ return;
+ }
+ const char* src = pathname_.c_str();
+ char* const dest = new char[pathname_.length() + 1];
+ char* dest_ptr = dest;
+ memset(dest_ptr, 0, pathname_.length() + 1);
+
+ while (*src != '\0') {
+ *dest_ptr = *src;
+ if (!IsPathSeparator(*src)) {
+ src++;
+ } else {
+#if GTEST_HAS_ALT_PATH_SEP_
+ if (*dest_ptr == kAlternatePathSeparator) {
+ *dest_ptr = kPathSeparator;
+ }
+#endif
+ while (IsPathSeparator(*src))
+ src++;
+ }
+ dest_ptr++;
+ }
+ *dest_ptr = '\0';
+ pathname_ = dest;
+ delete[] dest;
+}
+
+} // namespace internal
+} // namespace testing
+// Copyright 2008, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Author: wan@google.com (Zhanyong Wan)
+
+
+#include <limits.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+#if GTEST_OS_WINDOWS_MOBILE
+# include <windows.h> // For TerminateProcess()
+#elif GTEST_OS_WINDOWS
+# include <io.h>
+# include <sys/stat.h>
+#else
+# include <unistd.h>
+#endif // GTEST_OS_WINDOWS_MOBILE
+
+#if GTEST_OS_MAC
+# include <mach/mach_init.h>
+# include <mach/task.h>
+# include <mach/vm_map.h>
+#endif // GTEST_OS_MAC
+
+#if GTEST_OS_QNX
+# include <devctl.h>
+# include <sys/procfs.h>
+#endif // GTEST_OS_QNX
+
+
+// Indicates that this translation unit is part of Google Test's
+// implementation. It must come before gtest-internal-inl.h is
+// included, or there will be a compiler error. This trick is to
+// prevent a user from accidentally including gtest-internal-inl.h in
+// his code.
+#define GTEST_IMPLEMENTATION_ 1
+#undef GTEST_IMPLEMENTATION_
+
+namespace testing {
+namespace internal {
+
+#if defined(_MSC_VER) || defined(__BORLANDC__)
+// MSVC and C++Builder do not provide a definition of STDERR_FILENO.
+const int kStdOutFileno = 1;
+const int kStdErrFileno = 2;
+#else
+const int kStdOutFileno = STDOUT_FILENO;
+const int kStdErrFileno = STDERR_FILENO;
+#endif // _MSC_VER
+
+#if GTEST_OS_MAC
+
+// Returns the number of threads running in the process, or 0 to indicate that
+// we cannot detect it.
+size_t GetThreadCount() {
+ const task_t task = mach_task_self();
+ mach_msg_type_number_t thread_count;
+ thread_act_array_t thread_list;
+ const kern_return_t status = task_threads(task, &thread_list, &thread_count);
+ if (status == KERN_SUCCESS) {
+ // task_threads allocates resources in thread_list and we need to free them
+ // to avoid leaks.
+ vm_deallocate(task,
+ reinterpret_cast<vm_address_t>(thread_list),
+ sizeof(thread_t) * thread_count);
+ return static_cast<size_t>(thread_count);
+ } else {
+ return 0;
+ }
+}
+
+#elif GTEST_OS_QNX
+
+// Returns the number of threads running in the process, or 0 to indicate that
+// we cannot detect it.
+size_t GetThreadCount() {
+ const int fd = open("/proc/self/as", O_RDONLY);
+ if (fd < 0) {
+ return 0;
+ }
+ procfs_info process_info;
+ const int status =
+ devctl(fd, DCMD_PROC_INFO, &process_info, sizeof(process_info), NULL);
+ close(fd);
+ if (status == EOK) {
+ return static_cast<size_t>(process_info.num_threads);
+ } else {
+ return 0;
+ }
+}
+
+#else
+
+size_t GetThreadCount() {
+ // There's no portable way to detect the number of threads, so we just
+ // return 0 to indicate that we cannot detect it.
+ return 0;
+}
+
+#endif // GTEST_OS_MAC
+
+#if GTEST_USES_POSIX_RE
+
+// Implements RE. Currently only needed for death tests.
+
+RE::~RE() {
+ if (is_valid_) {
+ // regfree'ing an invalid regex might crash because the content
+ // of the regex is undefined. Since the regex's are essentially
+ // the same, one cannot be valid (or invalid) without the other
+ // being so too.
+ regfree(&partial_regex_);
+ regfree(&full_regex_);
+ }
+ free(const_cast<char*>(pattern_));
+}
+
+// Returns true iff regular expression re matches the entire str.
+bool RE::FullMatch(const char* str, const RE& re) {
+ if (!re.is_valid_) return false;
+
+ regmatch_t match;
+ return regexec(&re.full_regex_, str, 1, &match, 0) == 0;
+}
+
+// Returns true iff regular expression re matches a substring of str
+// (including str itself).
+bool RE::PartialMatch(const char* str, const RE& re) {
+ if (!re.is_valid_) return false;
+
+ regmatch_t match;
+ return regexec(&re.partial_regex_, str, 1, &match, 0) == 0;
+}
+
+// Initializes an RE from its string representation.
+void RE::Init(const char* regex) {
+ pattern_ = posix::StrDup(regex);
+
+ // Reserves enough bytes to hold the regular expression used for a
+ // full match.
+ const size_t full_regex_len = strlen(regex) + 10;
+ char* const full_pattern = new char[full_regex_len];
+
+ snprintf(full_pattern, full_regex_len, "^(%s)$", regex);
+ is_valid_ = regcomp(&full_regex_, full_pattern, REG_EXTENDED) == 0;
+ // We want to call regcomp(&partial_regex_, ...) even if the
+ // previous expression returns false. Otherwise partial_regex_ may
+ // not be properly initialized can may cause trouble when it's
+ // freed.
+ //
+ // Some implementation of POSIX regex (e.g. on at least some
+ // versions of Cygwin) doesn't accept the empty string as a valid
+ // regex. We change it to an equivalent form "()" to be safe.
+ if (is_valid_) {
+ const char* const partial_regex = (*regex == '\0') ? "()" : regex;
+ is_valid_ = regcomp(&partial_regex_, partial_regex, REG_EXTENDED) == 0;
+ }
+ EXPECT_TRUE(is_valid_)
+ << "Regular expression \"" << regex
+ << "\" is not a valid POSIX Extended regular expression.";
+
+ delete[] full_pattern;
+}
+
+#elif GTEST_USES_SIMPLE_RE
+
+// Returns true iff ch appears anywhere in str (excluding the
+// terminating '\0' character).
+bool IsInSet(char ch, const char* str) {
+ return ch != '\0' && strchr(str, ch) != NULL;
+}
+
+// Returns true iff ch belongs to the given classification. Unlike
+// similar functions in <ctype.h>, these aren't affected by the
+// current locale.
+bool IsAsciiDigit(char ch) { return '0' <= ch && ch <= '9'; }
+bool IsAsciiPunct(char ch) {
+ return IsInSet(ch, "^-!\"#$%&'()*+,./:;<=>?@[\\]_`{|}~");
+}
+bool IsRepeat(char ch) { return IsInSet(ch, "?*+"); }
+bool IsAsciiWhiteSpace(char ch) { return IsInSet(ch, " \f\n\r\t\v"); }
+bool IsAsciiWordChar(char ch) {
+ return ('a' <= ch && ch <= 'z') || ('A' <= ch && ch <= 'Z') ||
+ ('0' <= ch && ch <= '9') || ch == '_';
+}
+
+// Returns true iff "\\c" is a supported escape sequence.
+bool IsValidEscape(char c) {
+ return (IsAsciiPunct(c) || IsInSet(c, "dDfnrsStvwW"));
+}
+
+// Returns true iff the given atom (specified by escaped and pattern)
+// matches ch. The result is undefined if the atom is invalid.
+bool AtomMatchesChar(bool escaped, char pattern_char, char ch) {
+ if (escaped) { // "\\p" where p is pattern_char.
+ switch (pattern_char) {
+ case 'd': return IsAsciiDigit(ch);
+ case 'D': return !IsAsciiDigit(ch);
+ case 'f': return ch == '\f';
+ case 'n': return ch == '\n';
+ case 'r': return ch == '\r';
+ case 's': return IsAsciiWhiteSpace(ch);
+ case 'S': return !IsAsciiWhiteSpace(ch);
+ case 't': return ch == '\t';
+ case 'v': return ch == '\v';
+ case 'w': return IsAsciiWordChar(ch);
+ case 'W': return !IsAsciiWordChar(ch);
+ }
+ return IsAsciiPunct(pattern_char) && pattern_char == ch;
+ }
+
+ return (pattern_char == '.' && ch != '\n') || pattern_char == ch;
+}
+
+// Helper function used by ValidateRegex() to format error messages.
+std::string FormatRegexSyntaxError(const char* regex, int index) {
+ return (Message() << "Syntax error at index " << index
+ << " in simple regular expression \"" << regex << "\": ").GetString();
+}
+
+// Generates non-fatal failures and returns false if regex is invalid;
+// otherwise returns true.
+bool ValidateRegex(const char* regex) {
+ if (regex == NULL) {
+ // TODO(wan@google.com): fix the source file location in the
+ // assertion failures to match where the regex is used in user
+ // code.
+ ADD_FAILURE() << "NULL is not a valid simple regular expression.";
+ return false;
+ }
+
+ bool is_valid = true;
+
+ // True iff ?, *, or + can follow the previous atom.
+ bool prev_repeatable = false;
+ for (int i = 0; regex[i]; i++) {
+ if (regex[i] == '\\') { // An escape sequence
+ i++;
+ if (regex[i] == '\0') {
+ ADD_FAILURE() << FormatRegexSyntaxError(regex, i - 1)
+ << "'\\' cannot appear at the end.";
+ return false;
+ }
+
+ if (!IsValidEscape(regex[i])) {
+ ADD_FAILURE() << FormatRegexSyntaxError(regex, i - 1)
+ << "invalid escape sequence \"\\" << regex[i] << "\".";
+ is_valid = false;
+ }
+ prev_repeatable = true;
+ } else { // Not an escape sequence.
+ const char ch = regex[i];
+
+ if (ch == '^' && i > 0) {
+ ADD_FAILURE() << FormatRegexSyntaxError(regex, i)
+ << "'^' can only appear at the beginning.";
+ is_valid = false;
+ } else if (ch == '$' && regex[i + 1] != '\0') {
+ ADD_FAILURE() << FormatRegexSyntaxError(regex, i)
+ << "'$' can only appear at the end.";
+ is_valid = false;
+ } else if (IsInSet(ch, "()[]{}|")) {
+ ADD_FAILURE() << FormatRegexSyntaxError(regex, i)
+ << "'" << ch << "' is unsupported.";
+ is_valid = false;
+ } else if (IsRepeat(ch) && !prev_repeatable) {
+ ADD_FAILURE() << FormatRegexSyntaxError(regex, i)
+ << "'" << ch << "' can only follow a repeatable token.";
+ is_valid = false;
+ }
+
+ prev_repeatable = !IsInSet(ch, "^$?*+");
+ }
+ }
+
+ return is_valid;
+}
+
+// Matches a repeated regex atom followed by a valid simple regular
+// expression. The regex atom is defined as c if escaped is false,
+// or \c otherwise. repeat is the repetition meta character (?, *,
+// or +). The behavior is undefined if str contains too many
+// characters to be indexable by size_t, in which case the test will
+// probably time out anyway. We are fine with this limitation as
+// std::string has it too.
+bool MatchRepetitionAndRegexAtHead(
+ bool escaped, char c, char repeat, const char* regex,
+ const char* str) {
+ const size_t min_count = (repeat == '+') ? 1 : 0;
+ const size_t max_count = (repeat == '?') ? 1 :
+ static_cast<size_t>(-1) - 1;
+ // We cannot call numeric_limits::max() as it conflicts with the
+ // max() macro on Windows.
+
+ for (size_t i = 0; i <= max_count; ++i) {
+ // We know that the atom matches each of the first i characters in str.
+ if (i >= min_count && MatchRegexAtHead(regex, str + i)) {
+ // We have enough matches at the head, and the tail matches too.
+ // Since we only care about *whether* the pattern matches str
+ // (as opposed to *how* it matches), there is no need to find a
+ // greedy match.
+ return true;
+ }
+ if (str[i] == '\0' || !AtomMatchesChar(escaped, c, str[i]))
+ return false;
+ }
+ return false;
+}
+
+// Returns true iff regex matches a prefix of str. regex must be a
+// valid simple regular expression and not start with "^", or the
+// result is undefined.
+bool MatchRegexAtHead(const char* regex, const char* str) {
+ if (*regex == '\0') // An empty regex matches a prefix of anything.
+ return true;
+
+ // "$" only matches the end of a string. Note that regex being
+ // valid guarantees that there's nothing after "$" in it.
+ if (*regex == '$')
+ return *str == '\0';
+
+ // Is the first thing in regex an escape sequence?
+ const bool escaped = *regex == '\\';
+ if (escaped)
+ ++regex;
+ if (IsRepeat(regex[1])) {
+ // MatchRepetitionAndRegexAtHead() calls MatchRegexAtHead(), so
+ // here's an indirect recursion. It terminates as the regex gets
+ // shorter in each recursion.
+ return MatchRepetitionAndRegexAtHead(
+ escaped, regex[0], regex[1], regex + 2, str);
+ } else {
+ // regex isn't empty, isn't "$", and doesn't start with a
+ // repetition. We match the first atom of regex with the first
+ // character of str and recurse.
+ return (*str != '\0') && AtomMatchesChar(escaped, *regex, *str) &&
+ MatchRegexAtHead(regex + 1, str + 1);
+ }
+}
+
+// Returns true iff regex matches any substring of str. regex must be
+// a valid simple regular expression, or the result is undefined.
+//
+// The algorithm is recursive, but the recursion depth doesn't exceed
+// the regex length, so we won't need to worry about running out of
+// stack space normally. In rare cases the time complexity can be
+// exponential with respect to the regex length + the string length,
+// but usually it's must faster (often close to linear).
+bool MatchRegexAnywhere(const char* regex, const char* str) {
+ if (regex == NULL || str == NULL)
+ return false;
+
+ if (*regex == '^')
+ return MatchRegexAtHead(regex + 1, str);
+
+ // A successful match can be anywhere in str.
+ do {
+ if (MatchRegexAtHead(regex, str))
+ return true;
+ } while (*str++ != '\0');
+ return false;
+}
+
+// Implements the RE class.
+
+RE::~RE() {
+ free(const_cast<char*>(pattern_));
+ free(const_cast<char*>(full_pattern_));
+}
+
+// Returns true iff regular expression re matches the entire str.
+bool RE::FullMatch(const char* str, const RE& re) {
+ return re.is_valid_ && MatchRegexAnywhere(re.full_pattern_, str);
+}
+
+// Returns true iff regular expression re matches a substring of str
+// (including str itself).
+bool RE::PartialMatch(const char* str, const RE& re) {
+ return re.is_valid_ && MatchRegexAnywhere(re.pattern_, str);
+}
+
+// Initializes an RE from its string representation.
+void RE::Init(const char* regex) {
+ pattern_ = full_pattern_ = NULL;
+ if (regex != NULL) {
+ pattern_ = posix::StrDup(regex);
+ }
+
+ is_valid_ = ValidateRegex(regex);
+ if (!is_valid_) {
+ // No need to calculate the full pattern when the regex is invalid.
+ return;
+ }
+
+ const size_t len = strlen(regex);
+ // Reserves enough bytes to hold the regular expression used for a
+ // full match: we need space to prepend a '^', append a '$', and
+ // terminate the string with '\0'.
+ char* buffer = static_cast<char*>(malloc(len + 3));
+ full_pattern_ = buffer;
+
+ if (*regex != '^')
+ *buffer++ = '^'; // Makes sure full_pattern_ starts with '^'.
+
+ // We don't use snprintf or strncpy, as they trigger a warning when
+ // compiled with VC++ 8.0.
+ memcpy(buffer, regex, len);
+ buffer += len;
+
+ if (len == 0 || regex[len - 1] != '$')
+ *buffer++ = '$'; // Makes sure full_pattern_ ends with '$'.
+
+ *buffer = '\0';
+}
+
+#endif // GTEST_USES_POSIX_RE
+
+const char kUnknownFile[] = "unknown file";
+
+// Formats a source file path and a line number as they would appear
+// in an error message from the compiler used to compile this code.
+GTEST_API_ ::std::string FormatFileLocation(const char* file, int line) {
+ const std::string file_name(file == NULL ? kUnknownFile : file);
+
+ if (line < 0) {
+ return file_name + ":";
+ }
+#ifdef _MSC_VER
+ return file_name + "(" + StreamableToString(line) + "):";
+#else
+ return file_name + ":" + StreamableToString(line) + ":";
+#endif // _MSC_VER
+}
+
+// Formats a file location for compiler-independent XML output.
+// Although this function is not platform dependent, we put it next to
+// FormatFileLocation in order to contrast the two functions.
+// Note that FormatCompilerIndependentFileLocation() does NOT append colon
+// to the file location it produces, unlike FormatFileLocation().
+GTEST_API_ ::std::string FormatCompilerIndependentFileLocation(
+ const char* file, int line) {
+ const std::string file_name(file == NULL ? kUnknownFile : file);
+
+ if (line < 0)
+ return file_name;
+ else
+ return file_name + ":" + StreamableToString(line);
+}
+
+
+GTestLog::GTestLog(GTestLogSeverity severity, const char* file, int line)
+ : severity_(severity) {
+ const char* const marker =
+ severity == GTEST_INFO ? "[ INFO ]" :
+ severity == GTEST_WARNING ? "[WARNING]" :
+ severity == GTEST_ERROR ? "[ ERROR ]" : "[ FATAL ]";
+ GetStream() << ::std::endl << marker << " "
+ << FormatFileLocation(file, line).c_str() << ": ";
+}
+
+// Flushes the buffers and, if severity is GTEST_FATAL, aborts the program.
+GTestLog::~GTestLog() {
+ GetStream() << ::std::endl;
+ if (severity_ == GTEST_FATAL) {
+ fflush(stderr);
+ posix::Abort();
+ }
+}
+// Disable Microsoft deprecation warnings for POSIX functions called from
+// this class (creat, dup, dup2, and close)
+#ifdef _MSC_VER
+# pragma warning(push)
+# pragma warning(disable: 4996)
+#endif // _MSC_VER
+
+#if GTEST_HAS_STREAM_REDIRECTION
+
+// Object that captures an output stream (stdout/stderr).
+class CapturedStream {
+ public:
+ // The ctor redirects the stream to a temporary file.
+ explicit CapturedStream(int fd) : fd_(fd), uncaptured_fd_(dup(fd)) {
+# if GTEST_OS_WINDOWS
+ char temp_dir_path[MAX_PATH + 1] = { '\0' }; // NOLINT
+ char temp_file_path[MAX_PATH + 1] = { '\0' }; // NOLINT
+
+ ::GetTempPathA(sizeof(temp_dir_path), temp_dir_path);
+ const UINT success = ::GetTempFileNameA(temp_dir_path,
+ "gtest_redir",
+ 0, // Generate unique file name.
+ temp_file_path);
+ GTEST_CHECK_(success != 0)
+ << "Unable to create a temporary file in " << temp_dir_path;
+ const int captured_fd = creat(temp_file_path, _S_IREAD | _S_IWRITE);
+ GTEST_CHECK_(captured_fd != -1) << "Unable to open temporary file "
+ << temp_file_path;
+ filename_ = temp_file_path;
+# else
+ // There's no guarantee that a test has write access to the current
+ // directory, so we create the temporary file in the /tmp directory
+ // instead. We use /tmp on most systems, and /sdcard on Android.
+ // That's because Android doesn't have /tmp.
+# if GTEST_OS_LINUX_ANDROID
+ // Note: Android applications are expected to call the framework's
+ // Context.getExternalStorageDirectory() method through JNI to get
+ // the location of the world-writable SD Card directory. However,
+ // this requires a Context handle, which cannot be retrieved
+ // globally from native code. Doing so also precludes running the
+ // code as part of a regular standalone executable, which doesn't
+ // run in a Dalvik process (e.g. when running it through 'adb shell').
+ //
+ // The location /sdcard is directly accessible from native code
+ // and is the only location (unofficially) supported by the Android
+ // team. It's generally a symlink to the real SD Card mount point
+ // which can be /mnt/sdcard, /mnt/sdcard0, /system/media/sdcard, or
+ // other OEM-customized locations. Never rely on these, and always
+ // use /sdcard.
+ char name_template[] = "/sdcard/gtest_captured_stream.XXXXXX";
+# else
+ char name_template[] = "/tmp/captured_stream.XXXXXX";
+# endif // GTEST_OS_LINUX_ANDROID
+ const int captured_fd = mkstemp(name_template);
+ filename_ = name_template;
+# endif // GTEST_OS_WINDOWS
+ fflush(NULL);
+ dup2(captured_fd, fd_);
+ close(captured_fd);
+ }
+
+ ~CapturedStream() {
+ remove(filename_.c_str());
+ }
+
+ std::string GetCapturedString() {
+ if (uncaptured_fd_ != -1) {
+ // Restores the original stream.
+ fflush(NULL);
+ dup2(uncaptured_fd_, fd_);
+ close(uncaptured_fd_);
+ uncaptured_fd_ = -1;
+ }
+
+ FILE* const file = posix::FOpen(filename_.c_str(), "r");
+ const std::string content = ReadEntireFile(file);
+ posix::FClose(file);
+ return content;
+ }
+
+ private:
+ // Reads the entire content of a file as an std::string.
+ static std::string ReadEntireFile(FILE* file);
+
+ // Returns the size (in bytes) of a file.
+ static size_t GetFileSize(FILE* file);
+
+ const int fd_; // A stream to capture.
+ int uncaptured_fd_;
+ // Name of the temporary file holding the stderr output.
+ ::std::string filename_;
+
+ GTEST_DISALLOW_COPY_AND_ASSIGN_(CapturedStream);
+};
+
+// Returns the size (in bytes) of a file.
+size_t CapturedStream::GetFileSize(FILE* file) {
+ fseek(file, 0, SEEK_END);
+ return static_cast<size_t>(ftell(file));
+}
+
+// Reads the entire content of a file as a string.
+std::string CapturedStream::ReadEntireFile(FILE* file) {
+ const size_t file_size = GetFileSize(file);
+ char* const buffer = new char[file_size];
+
+ size_t bytes_last_read = 0; // # of bytes read in the last fread()
+ size_t bytes_read = 0; // # of bytes read so far
+
+ fseek(file, 0, SEEK_SET);
+
+ // Keeps reading the file until we cannot read further or the
+ // pre-determined file size is reached.
+ do {
+ bytes_last_read = fread(buffer+bytes_read, 1, file_size-bytes_read, file);
+ bytes_read += bytes_last_read;
+ } while (bytes_last_read > 0 && bytes_read < file_size);
+
+ const std::string content(buffer, bytes_read);
+ delete[] buffer;
+
+ return content;
+}
+
+# ifdef _MSC_VER
+# pragma warning(pop)
+# endif // _MSC_VER
+
+static CapturedStream* g_captured_stderr = NULL;
+static CapturedStream* g_captured_stdout = NULL;
+
+// Starts capturing an output stream (stdout/stderr).
+void CaptureStream(int fd, const char* stream_name, CapturedStream** stream) {
+ if (*stream != NULL) {
+ GTEST_LOG_(FATAL) << "Only one " << stream_name
+ << " capturer can exist at a time.";
+ }
+ *stream = new CapturedStream(fd);
+}
+
+// Stops capturing the output stream and returns the captured string.
+std::string GetCapturedStream(CapturedStream** captured_stream) {
+ const std::string content = (*captured_stream)->GetCapturedString();
+
+ delete *captured_stream;
+ *captured_stream = NULL;
+
+ return content;
+}
+
+// Starts capturing stdout.
+void CaptureStdout() {
+ CaptureStream(kStdOutFileno, "stdout", &g_captured_stdout);
+}
+
+// Starts capturing stderr.
+void CaptureStderr() {
+ CaptureStream(kStdErrFileno, "stderr", &g_captured_stderr);
+}
+
+// Stops capturing stdout and returns the captured string.
+std::string GetCapturedStdout() {
+ return GetCapturedStream(&g_captured_stdout);
+}
+
+// Stops capturing stderr and returns the captured string.
+std::string GetCapturedStderr() {
+ return GetCapturedStream(&g_captured_stderr);
+}
+
+#endif // GTEST_HAS_STREAM_REDIRECTION
+
+#if GTEST_HAS_DEATH_TEST
+
+// A copy of all command line arguments. Set by InitGoogleTest().
+::std::vector<testing::internal::string> g_argvs;
+
+static const ::std::vector<testing::internal::string>* g_injected_test_argvs =
+ NULL; // Owned.
+
+void SetInjectableArgvs(const ::std::vector<testing::internal::string>* argvs) {
+ if (g_injected_test_argvs != argvs)
+ delete g_injected_test_argvs;
+ g_injected_test_argvs = argvs;
+}
+
+const ::std::vector<testing::internal::string>& GetInjectableArgvs() {
+ if (g_injected_test_argvs != NULL) {
+ return *g_injected_test_argvs;
+ }
+ return g_argvs;
+}
+#endif // GTEST_HAS_DEATH_TEST
+
+#if GTEST_OS_WINDOWS_MOBILE
+namespace posix {
+void Abort() {
+ DebugBreak();
+ TerminateProcess(GetCurrentProcess(), 1);
+}
+} // namespace posix
+#endif // GTEST_OS_WINDOWS_MOBILE
+
+// Returns the name of the environment variable corresponding to the
+// given flag. For example, FlagToEnvVar("foo") will return
+// "GTEST_FOO" in the open-source version.
+static std::string FlagToEnvVar(const char* flag) {
+ const std::string full_flag =
+ (Message() << GTEST_FLAG_PREFIX_ << flag).GetString();
+
+ Message env_var;
+ for (size_t i = 0; i != full_flag.length(); i++) {
+ env_var << ToUpper(full_flag.c_str()[i]);
+ }
+
+ return env_var.GetString();
+}
+
+// Parses 'str' for a 32-bit signed integer. If successful, writes
+// the result to *value and returns true; otherwise leaves *value
+// unchanged and returns false.
+bool ParseInt32(const Message& src_text, const char* str, Int32* value) {
+ // Parses the environment variable as a decimal integer.
+ char* end = NULL;
+ const long long_value = strtol(str, &end, 10); // NOLINT
+
+ // Has strtol() consumed all characters in the string?
+ if (*end != '\0') {
+ // No - an invalid character was encountered.
+ Message msg;
+ msg << "WARNING: " << src_text
+ << " is expected to be a 32-bit integer, but actually"
+ << " has value \"" << str << "\".\n";
+ printf("%s", msg.GetString().c_str());
+ fflush(stdout);
+ return false;
+ }
+
+ // Is the parsed value in the range of an Int32?
+ const Int32 result = static_cast<Int32>(long_value);
+ if (long_value == LONG_MAX || long_value == LONG_MIN ||
+ // The parsed value overflows as a long. (strtol() returns
+ // LONG_MAX or LONG_MIN when the input overflows.)
+ result != long_value
+ // The parsed value overflows as an Int32.
+ ) {
+ Message msg;
+ msg << "WARNING: " << src_text
+ << " is expected to be a 32-bit integer, but actually"
+ << " has value " << str << ", which overflows.\n";
+ printf("%s", msg.GetString().c_str());
+ fflush(stdout);
+ return false;
+ }
+
+ *value = result;
+ return true;
+}
+
+// Reads and returns the Boolean environment variable corresponding to
+// the given flag; if it's not set, returns default_value.
+//
+// The value is considered true iff it's not "0".
+bool BoolFromGTestEnv(const char* flag, bool default_value) {
+ const std::string env_var = FlagToEnvVar(flag);
+ const char* const string_value = posix::GetEnv(env_var.c_str());
+ return string_value == NULL ?
+ default_value : strcmp(string_value, "0") != 0;
+}
+
+// Reads and returns a 32-bit integer stored in the environment
+// variable corresponding to the given flag; if it isn't set or
+// doesn't represent a valid 32-bit integer, returns default_value.
+Int32 Int32FromGTestEnv(const char* flag, Int32 default_value) {
+ const std::string env_var = FlagToEnvVar(flag);
+ const char* const string_value = posix::GetEnv(env_var.c_str());
+ if (string_value == NULL) {
+ // The environment variable is not set.
+ return default_value;
+ }
+
+ Int32 result = default_value;
+ if (!ParseInt32(Message() << "Environment variable " << env_var,
+ string_value, &result)) {
+ printf("The default value %s is used.\n",
+ (Message() << default_value).GetString().c_str());
+ fflush(stdout);
+ return default_value;
+ }
+
+ return result;
+}
+
+// Reads and returns the string environment variable corresponding to
+// the given flag; if it's not set, returns default_value.
+const char* StringFromGTestEnv(const char* flag, const char* default_value) {
+ const std::string env_var = FlagToEnvVar(flag);
+ const char* const value = posix::GetEnv(env_var.c_str());
+ return value == NULL ? default_value : value;
+}
+
+} // namespace internal
+} // namespace testing
+// Copyright 2007, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Author: wan@google.com (Zhanyong Wan)
+
+// Google Test - The Google C++ Testing Framework
+//
+// This file implements a universal value printer that can print a
+// value of any type T:
+//
+// void ::testing::internal::UniversalPrinter<T>::Print(value, ostream_ptr);
+//
+// It uses the << operator when possible, and prints the bytes in the
+// object otherwise. A user can override its behavior for a class
+// type Foo by defining either operator<<(::std::ostream&, const Foo&)
+// or void PrintTo(const Foo&, ::std::ostream*) in the namespace that
+// defines Foo.
+
+#include <ctype.h>
+#include <stdio.h>
+#include <ostream> // NOLINT
+#include <string>
+
+namespace testing {
+
+namespace {
+
+using ::std::ostream;
+
+// Prints a segment of bytes in the given object.
+void PrintByteSegmentInObjectTo(const unsigned char* obj_bytes, size_t start,
+ size_t count, ostream* os) {
+ char text[5] = "";
+ for (size_t i = 0; i != count; i++) {
+ const size_t j = start + i;
+ if (i != 0) {
+ // Organizes the bytes into groups of 2 for easy parsing by
+ // human.
+ if ((j % 2) == 0)
+ *os << ' ';
+ else
+ *os << '-';
+ }
+ GTEST_SNPRINTF_(text, sizeof(text), "%02X", obj_bytes[j]);
+ *os << text;
+ }
+}
+
+// Prints the bytes in the given value to the given ostream.
+void PrintBytesInObjectToImpl(const unsigned char* obj_bytes, size_t count,
+ ostream* os) {
+ // Tells the user how big the object is.
+ *os << count << "-byte object <";
+
+ const size_t kThreshold = 132;
+ const size_t kChunkSize = 64;
+ // If the object size is bigger than kThreshold, we'll have to omit
+ // some details by printing only the first and the last kChunkSize
+ // bytes.
+ // TODO(wan): let the user control the threshold using a flag.
+ if (count < kThreshold) {
+ PrintByteSegmentInObjectTo(obj_bytes, 0, count, os);
+ } else {
+ PrintByteSegmentInObjectTo(obj_bytes, 0, kChunkSize, os);
+ *os << " ... ";
+ // Rounds up to 2-byte boundary.
+ const size_t resume_pos = (count - kChunkSize + 1)/2*2;
+ PrintByteSegmentInObjectTo(obj_bytes, resume_pos, count - resume_pos, os);
+ }
+ *os << ">";
+}
+
+} // namespace
+
+namespace internal2 {
+
+// Delegates to PrintBytesInObjectToImpl() to print the bytes in the
+// given object. The delegation simplifies the implementation, which
+// uses the << operator and thus is easier done outside of the
+// ::testing::internal namespace, which contains a << operator that
+// sometimes conflicts with the one in STL.
+void PrintBytesInObjectTo(const unsigned char* obj_bytes, size_t count,
+ ostream* os) {
+ PrintBytesInObjectToImpl(obj_bytes, count, os);
+}
+
+} // namespace internal2
+
+namespace internal {
+
+// Depending on the value of a char (or wchar_t), we print it in one
+// of three formats:
+// - as is if it's a printable ASCII (e.g. 'a', '2', ' '),
+// - as a hexidecimal escape sequence (e.g. '\x7F'), or
+// - as a special escape sequence (e.g. '\r', '\n').
+enum CharFormat {
+ kAsIs,
+ kHexEscape,
+ kSpecialEscape
+};
+
+// Returns true if c is a printable ASCII character. We test the
+// value of c directly instead of calling isprint(), which is buggy on
+// Windows Mobile.
+inline bool IsPrintableAscii(wchar_t c) {
+ return 0x20 <= c && c <= 0x7E;
+}
+
+// Prints a wide or narrow char c as a character literal without the
+// quotes, escaping it when necessary; returns how c was formatted.
+// The template argument UnsignedChar is the unsigned version of Char,
+// which is the type of c.
+template <typename UnsignedChar, typename Char>
+static CharFormat PrintAsCharLiteralTo(Char c, ostream* os) {
+ switch (static_cast<wchar_t>(c)) {
+ case L'\0':
+ *os << "\\0";
+ break;
+ case L'\'':
+ *os << "\\'";
+ break;
+ case L'\\':
+ *os << "\\\\";
+ break;
+ case L'\a':
+ *os << "\\a";
+ break;
+ case L'\b':
+ *os << "\\b";
+ break;
+ case L'\f':
+ *os << "\\f";
+ break;
+ case L'\n':
+ *os << "\\n";
+ break;
+ case L'\r':
+ *os << "\\r";
+ break;
+ case L'\t':
+ *os << "\\t";
+ break;
+ case L'\v':
+ *os << "\\v";
+ break;
+ default:
+ if (IsPrintableAscii(c)) {
+ *os << static_cast<char>(c);
+ return kAsIs;
+ } else {
+ *os << "\\x" + String::FormatHexInt(static_cast<UnsignedChar>(c));
+ return kHexEscape;
+ }
+ }
+ return kSpecialEscape;
+}
+
+// Prints a wchar_t c as if it's part of a string literal, escaping it when
+// necessary; returns how c was formatted.
+static CharFormat PrintAsStringLiteralTo(wchar_t c, ostream* os) {
+ switch (c) {
+ case L'\'':
+ *os << "'";
+ return kAsIs;
+ case L'"':
+ *os << "\\\"";
+ return kSpecialEscape;
+ default:
+ return PrintAsCharLiteralTo<wchar_t>(c, os);
+ }
+}
+
+// Prints a char c as if it's part of a string literal, escaping it when
+// necessary; returns how c was formatted.
+static CharFormat PrintAsStringLiteralTo(char c, ostream* os) {
+ return PrintAsStringLiteralTo(
+ static_cast<wchar_t>(static_cast<unsigned char>(c)), os);
+}
+
+// Prints a wide or narrow character c and its code. '\0' is printed
+// as "'\\0'", other unprintable characters are also properly escaped
+// using the standard C++ escape sequence. The template argument
+// UnsignedChar is the unsigned version of Char, which is the type of c.
+template <typename UnsignedChar, typename Char>
+void PrintCharAndCodeTo(Char c, ostream* os) {
+ // First, print c as a literal in the most readable form we can find.
+ *os << ((sizeof(c) > 1) ? "L'" : "'");
+ const CharFormat format = PrintAsCharLiteralTo<UnsignedChar>(c, os);
+ *os << "'";
+
+ // To aid user debugging, we also print c's code in decimal, unless
+ // it's 0 (in which case c was printed as '\\0', making the code
+ // obvious).
+ if (c == 0)
+ return;
+ *os << " (" << static_cast<int>(c);
+
+ // For more convenience, we print c's code again in hexidecimal,
+ // unless c was already printed in the form '\x##' or the code is in
+ // [1, 9].
+ if (format == kHexEscape || (1 <= c && c <= 9)) {
+ // Do nothing.
+ } else {
+ *os << ", 0x" << String::FormatHexInt(static_cast<UnsignedChar>(c));
+ }
+ *os << ")";
+}
+
+void PrintTo(unsigned char c, ::std::ostream* os) {
+ PrintCharAndCodeTo<unsigned char>(c, os);
+}
+void PrintTo(signed char c, ::std::ostream* os) {
+ PrintCharAndCodeTo<unsigned char>(c, os);
+}
+
+// Prints a wchar_t as a symbol if it is printable or as its internal
+// code otherwise and also as its code. L'\0' is printed as "L'\\0'".
+void PrintTo(wchar_t wc, ostream* os) {
+ PrintCharAndCodeTo<wchar_t>(wc, os);
+}
+
+// Prints the given array of characters to the ostream. CharType must be either
+// char or wchar_t.
+// The array starts at begin, the length is len, it may include '\0' characters
+// and may not be NUL-terminated.
+template <typename CharType>
+static void PrintCharsAsStringTo(
+ const CharType* begin, size_t len, ostream* os) {
+ const char* const kQuoteBegin = sizeof(CharType) == 1 ? "\"" : "L\"";
+ *os << kQuoteBegin;
+ bool is_previous_hex = false;
+ for (size_t index = 0; index < len; ++index) {
+ const CharType cur = begin[index];
+ if (is_previous_hex && IsXDigit(cur)) {
+ // Previous character is of '\x..' form and this character can be
+ // interpreted as another hexadecimal digit in its number. Break string to
+ // disambiguate.
+ *os << "\" " << kQuoteBegin;
+ }
+ is_previous_hex = PrintAsStringLiteralTo(cur, os) == kHexEscape;
+ }
+ *os << "\"";
+}
+
+// Prints a (const) char/wchar_t array of 'len' elements, starting at address
+// 'begin'. CharType must be either char or wchar_t.
+template <typename CharType>
+static void UniversalPrintCharArray(
+ const CharType* begin, size_t len, ostream* os) {
+ // The code
+ // const char kFoo[] = "foo";
+ // generates an array of 4, not 3, elements, with the last one being '\0'.
+ //
+ // Therefore when printing a char array, we don't print the last element if
+ // it's '\0', such that the output matches the string literal as it's
+ // written in the source code.
+ if (len > 0 && begin[len - 1] == '\0') {
+ PrintCharsAsStringTo(begin, len - 1, os);
+ return;
+ }
+
+ // If, however, the last element in the array is not '\0', e.g.
+ // const char kFoo[] = { 'f', 'o', 'o' };
+ // we must print the entire array. We also print a message to indicate
+ // that the array is not NUL-terminated.
+ PrintCharsAsStringTo(begin, len, os);
+ *os << " (no terminating NUL)";
+}
+
+// Prints a (const) char array of 'len' elements, starting at address 'begin'.
+void UniversalPrintArray(const char* begin, size_t len, ostream* os) {
+ UniversalPrintCharArray(begin, len, os);
+}
+
+// Prints a (const) wchar_t array of 'len' elements, starting at address
+// 'begin'.
+void UniversalPrintArray(const wchar_t* begin, size_t len, ostream* os) {
+ UniversalPrintCharArray(begin, len, os);
+}
+
+// Prints the given C string to the ostream.
+void PrintTo(const char* s, ostream* os) {
+ if (s == NULL) {
+ *os << "NULL";
+ } else {
+ *os << ImplicitCast_<const void*>(s) << " pointing to ";
+ PrintCharsAsStringTo(s, strlen(s), os);
+ }
+}
+
+// MSVC compiler can be configured to define whar_t as a typedef
+// of unsigned short. Defining an overload for const wchar_t* in that case
+// would cause pointers to unsigned shorts be printed as wide strings,
+// possibly accessing more memory than intended and causing invalid
+// memory accesses. MSVC defines _NATIVE_WCHAR_T_DEFINED symbol when
+// wchar_t is implemented as a native type.
+#if !defined(_MSC_VER) || defined(_NATIVE_WCHAR_T_DEFINED)
+// Prints the given wide C string to the ostream.
+void PrintTo(const wchar_t* s, ostream* os) {
+ if (s == NULL) {
+ *os << "NULL";
+ } else {
+ *os << ImplicitCast_<const void*>(s) << " pointing to ";
+ PrintCharsAsStringTo(s, wcslen(s), os);
+ }
+}
+#endif // wchar_t is native
+
+// Prints a ::string object.
+#if GTEST_HAS_GLOBAL_STRING
+void PrintStringTo(const ::string& s, ostream* os) {
+ PrintCharsAsStringTo(s.data(), s.size(), os);
+}
+#endif // GTEST_HAS_GLOBAL_STRING
+
+void PrintStringTo(const ::std::string& s, ostream* os) {
+ PrintCharsAsStringTo(s.data(), s.size(), os);
+}
+
+// Prints a ::wstring object.
+#if GTEST_HAS_GLOBAL_WSTRING
+void PrintWideStringTo(const ::wstring& s, ostream* os) {
+ PrintCharsAsStringTo(s.data(), s.size(), os);
+}
+#endif // GTEST_HAS_GLOBAL_WSTRING
+
+#if GTEST_HAS_STD_WSTRING
+void PrintWideStringTo(const ::std::wstring& s, ostream* os) {
+ PrintCharsAsStringTo(s.data(), s.size(), os);
+}
+#endif // GTEST_HAS_STD_WSTRING
+
+} // namespace internal
+
+} // namespace testing
+// Copyright 2008, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Author: mheule@google.com (Markus Heule)
+//
+// The Google C++ Testing Framework (Google Test)
+
+
+// Indicates that this translation unit is part of Google Test's
+// implementation. It must come before gtest-internal-inl.h is
+// included, or there will be a compiler error. This trick is to
+// prevent a user from accidentally including gtest-internal-inl.h in
+// his code.
+#define GTEST_IMPLEMENTATION_ 1
+#undef GTEST_IMPLEMENTATION_
+
+namespace testing {
+
+using internal::GetUnitTestImpl;
+
+// Gets the summary of the failure message by omitting the stack trace
+// in it.
+std::string TestPartResult::ExtractSummary(const char* message) {
+ const char* const stack_trace = strstr(message, internal::kStackTraceMarker);
+ return stack_trace == NULL ? message :
+ std::string(message, stack_trace);
+}
+
+// Prints a TestPartResult object.
+std::ostream& operator<<(std::ostream& os, const TestPartResult& result) {
+ return os
+ << result.file_name() << ":" << result.line_number() << ": "
+ << (result.type() == TestPartResult::kSuccess ? "Success" :
+ result.type() == TestPartResult::kFatalFailure ? "Fatal failure" :
+ "Non-fatal failure") << ":\n"
+ << result.message() << std::endl;
+}
+
+// Appends a TestPartResult to the array.
+void TestPartResultArray::Append(const TestPartResult& result) {
+ array_.push_back(result);
+}
+
+// Returns the TestPartResult at the given index (0-based).
+const TestPartResult& TestPartResultArray::GetTestPartResult(int index) const {
+ if (index < 0 || index >= size()) {
+ printf("\nInvalid index (%d) into TestPartResultArray.\n", index);
+ internal::posix::Abort();
+ }
+
+ return array_[index];
+}
+
+// Returns the number of TestPartResult objects in the array.
+int TestPartResultArray::size() const {
+ return static_cast<int>(array_.size());
+}
+
+namespace internal {
+
+HasNewFatalFailureHelper::HasNewFatalFailureHelper()
+ : has_new_fatal_failure_(false),
+ original_reporter_(GetUnitTestImpl()->
+ GetTestPartResultReporterForCurrentThread()) {
+ GetUnitTestImpl()->SetTestPartResultReporterForCurrentThread(this);
+}
+
+HasNewFatalFailureHelper::~HasNewFatalFailureHelper() {
+ GetUnitTestImpl()->SetTestPartResultReporterForCurrentThread(
+ original_reporter_);
+}
+
+void HasNewFatalFailureHelper::ReportTestPartResult(
+ const TestPartResult& result) {
+ if (result.fatally_failed())
+ has_new_fatal_failure_ = true;
+ original_reporter_->ReportTestPartResult(result);
+}
+
+} // namespace internal
+
+} // namespace testing
+// Copyright 2008 Google Inc.
+// All Rights Reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Author: wan@google.com (Zhanyong Wan)
+
+
+namespace testing {
+namespace internal {
+
+#if GTEST_HAS_TYPED_TEST_P
+
+// Skips to the first non-space char in str. Returns an empty string if str
+// contains only whitespace characters.
+static const char* SkipSpaces(const char* str) {
+ while (IsSpace(*str))
+ str++;
+ return str;
+}
+
+// Verifies that registered_tests match the test names in
+// defined_test_names_; returns registered_tests if successful, or
+// aborts the program otherwise.
+const char* TypedTestCasePState::VerifyRegisteredTestNames(
+ const char* file, int line, const char* registered_tests) {
+ typedef ::std::set<const char*>::const_iterator DefinedTestIter;
+ registered_ = true;
+
+ // Skip initial whitespace in registered_tests since some
+ // preprocessors prefix stringizied literals with whitespace.
+ registered_tests = SkipSpaces(registered_tests);
+
+ Message errors;
+ ::std::set<std::string> tests;
+ for (const char* names = registered_tests; names != NULL;
+ names = SkipComma(names)) {
+ const std::string name = GetPrefixUntilComma(names);
+ if (tests.count(name) != 0) {
+ errors << "Test " << name << " is listed more than once.\n";
+ continue;
+ }
+
+ bool found = false;
+ for (DefinedTestIter it = defined_test_names_.begin();
+ it != defined_test_names_.end();
+ ++it) {
+ if (name == *it) {
+ found = true;
+ break;
+ }
+ }
+
+ if (found) {
+ tests.insert(name);
+ } else {
+ errors << "No test named " << name
+ << " can be found in this test case.\n";
+ }
+ }
+
+ for (DefinedTestIter it = defined_test_names_.begin();
+ it != defined_test_names_.end();
+ ++it) {
+ if (tests.count(*it) == 0) {
+ errors << "You forgot to list test " << *it << ".\n";
+ }
+ }
+
+ const std::string& errors_str = errors.GetString();
+ if (errors_str != "") {
+ fprintf(stderr, "%s %s", FormatFileLocation(file, line).c_str(),
+ errors_str.c_str());
+ fflush(stderr);
+ posix::Abort();
+ }
+
+ return registered_tests;
+}
+
+#endif // GTEST_HAS_TYPED_TEST_P
+
+} // namespace internal
+} // namespace testing
diff --git a/lib/kokkos/tpls/gtest/gtest/gtest-test-part.h b/lib/kokkos/tpls/gtest/gtest/gtest-test-part.h
new file mode 120000
index 000000000..48d39090f
--- /dev/null
+++ b/lib/kokkos/tpls/gtest/gtest/gtest-test-part.h
@@ -0,0 +1 @@
+gtest.h
\ No newline at end of file
diff --git a/lib/kokkos/tpls/gtest/gtest/gtest.h b/lib/kokkos/tpls/gtest/gtest/gtest.h
new file mode 100644
index 000000000..c74d098fa
--- /dev/null
+++ b/lib/kokkos/tpls/gtest/gtest/gtest.h
@@ -0,0 +1,20065 @@
+// Copyright 2005, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Author: wan@google.com (Zhanyong Wan)
+//
+// The Google C++ Testing Framework (Google Test)
+//
+// This header file defines the public API for Google Test. It should be
+// included by any test program that uses Google Test.
+//
+// IMPORTANT NOTE: Due to limitation of the C++ language, we have to
+// leave some internal implementation details in this header file.
+// They are clearly marked by comments like this:
+//
+// // INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
+//
+// Such code is NOT meant to be used by a user directly, and is subject
+// to CHANGE WITHOUT NOTICE. Therefore DO NOT DEPEND ON IT in a user
+// program!
+//
+// Acknowledgment: Google Test borrowed the idea of automatic test
+// registration from Barthelemy Dagenais' (barthelemy@prologique.com)
+// easyUnit framework.
+
+#ifdef __GNUC__
+#pragma GCC system_header
+#endif
+
+#ifndef GTEST_INCLUDE_GTEST_GTEST_H_
+#define GTEST_INCLUDE_GTEST_GTEST_H_
+
+#include <limits>
+#include <ostream>
+#include <vector>
+
+// Copyright 2005, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Authors: wan@google.com (Zhanyong Wan), eefacm@gmail.com (Sean Mcafee)
+//
+// The Google C++ Testing Framework (Google Test)
+//
+// This header file declares functions and macros used internally by
+// Google Test. They are subject to change without notice.
+
+#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_INTERNAL_H_
+#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_INTERNAL_H_
+
+// Copyright 2005, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Authors: wan@google.com (Zhanyong Wan)
+//
+// Low-level types and utilities for porting Google Test to various
+// platforms. They are subject to change without notice. DO NOT USE
+// THEM IN USER CODE.
+//
+// This file is fundamental to Google Test. All other Google Test source
+// files are expected to #include this. Therefore, it cannot #include
+// any other Google Test header.
+
+#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PORT_H_
+#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PORT_H_
+
+// The user can define the following macros in the build script to
+// control Google Test's behavior. If the user doesn't define a macro
+// in this list, Google Test will define it.
+//
+// GTEST_HAS_CLONE - Define it to 1/0 to indicate that clone(2)
+// is/isn't available.
+// GTEST_HAS_EXCEPTIONS - Define it to 1/0 to indicate that exceptions
+// are enabled.
+// GTEST_HAS_GLOBAL_STRING - Define it to 1/0 to indicate that ::string
+// is/isn't available (some systems define
+// ::string, which is different to std::string).
+// GTEST_HAS_GLOBAL_WSTRING - Define it to 1/0 to indicate that ::string
+// is/isn't available (some systems define
+// ::wstring, which is different to std::wstring).
+// GTEST_HAS_POSIX_RE - Define it to 1/0 to indicate that POSIX regular
+// expressions are/aren't available.
+// GTEST_HAS_PTHREAD - Define it to 1/0 to indicate that <pthread.h>
+// is/isn't available.
+// GTEST_HAS_RTTI - Define it to 1/0 to indicate that RTTI is/isn't
+// enabled.
+// GTEST_HAS_STD_WSTRING - Define it to 1/0 to indicate that
+// std::wstring does/doesn't work (Google Test can
+// be used where std::wstring is unavailable).
+// GTEST_HAS_TR1_TUPLE - Define it to 1/0 to indicate tr1::tuple
+// is/isn't available.
+// GTEST_HAS_SEH - Define it to 1/0 to indicate whether the
+// compiler supports Microsoft's "Structured
+// Exception Handling".
+// GTEST_HAS_STREAM_REDIRECTION
+// - Define it to 1/0 to indicate whether the
+// platform supports I/O stream redirection using
+// dup() and dup2().
+// GTEST_USE_OWN_TR1_TUPLE - Define it to 1/0 to indicate whether Google
+// Test's own tr1 tuple implementation should be
+// used. Unused when the user sets
+// GTEST_HAS_TR1_TUPLE to 0.
+// GTEST_LANG_CXX11 - Define it to 1/0 to indicate that Google Test
+// is building in C++11/C++98 mode.
+// GTEST_LINKED_AS_SHARED_LIBRARY
+// - Define to 1 when compiling tests that use
+// Google Test as a shared library (known as
+// DLL on Windows).
+// GTEST_CREATE_SHARED_LIBRARY
+// - Define to 1 when compiling Google Test itself
+// as a shared library.
+
+// This header defines the following utilities:
+//
+// Macros indicating the current platform (defined to 1 if compiled on
+// the given platform; otherwise undefined):
+// GTEST_OS_AIX - IBM AIX
+// GTEST_OS_CYGWIN - Cygwin
+// GTEST_OS_HPUX - HP-UX
+// GTEST_OS_LINUX - Linux
+// GTEST_OS_LINUX_ANDROID - Google Android
+// GTEST_OS_MAC - Mac OS X
+// GTEST_OS_IOS - iOS
+// GTEST_OS_IOS_SIMULATOR - iOS simulator
+// GTEST_OS_NACL - Google Native Client (NaCl)
+// GTEST_OS_OPENBSD - OpenBSD
+// GTEST_OS_QNX - QNX
+// GTEST_OS_SOLARIS - Sun Solaris
+// GTEST_OS_SYMBIAN - Symbian
+// GTEST_OS_WINDOWS - Windows (Desktop, MinGW, or Mobile)
+// GTEST_OS_WINDOWS_DESKTOP - Windows Desktop
+// GTEST_OS_WINDOWS_MINGW - MinGW
+// GTEST_OS_WINDOWS_MOBILE - Windows Mobile
+// GTEST_OS_ZOS - z/OS
+//
+// Among the platforms, Cygwin, Linux, Max OS X, and Windows have the
+// most stable support. Since core members of the Google Test project
+// don't have access to other platforms, support for them may be less
+// stable. If you notice any problems on your platform, please notify
+// googletestframework@googlegroups.com (patches for fixing them are
+// even more welcome!).
+//
+// Note that it is possible that none of the GTEST_OS_* macros are defined.
+//
+// Macros indicating available Google Test features (defined to 1 if
+// the corresponding feature is supported; otherwise undefined):
+// GTEST_HAS_COMBINE - the Combine() function (for value-parameterized
+// tests)
+// GTEST_HAS_DEATH_TEST - death tests
+// GTEST_HAS_PARAM_TEST - value-parameterized tests
+// GTEST_HAS_TYPED_TEST - typed tests
+// GTEST_HAS_TYPED_TEST_P - type-parameterized tests
+// GTEST_USES_POSIX_RE - enhanced POSIX regex is used. Do not confuse with
+// GTEST_HAS_POSIX_RE (see above) which users can
+// define themselves.
+// GTEST_USES_SIMPLE_RE - our own simple regex is used;
+// the above two are mutually exclusive.
+// GTEST_CAN_COMPARE_NULL - accepts untyped NULL in EXPECT_EQ().
+//
+// Macros for basic C++ coding:
+// GTEST_AMBIGUOUS_ELSE_BLOCKER_ - for disabling a gcc warning.
+// GTEST_ATTRIBUTE_UNUSED_ - declares that a class' instances or a
+// variable don't have to be used.
+// GTEST_DISALLOW_ASSIGN_ - disables operator=.
+// GTEST_DISALLOW_COPY_AND_ASSIGN_ - disables copy ctor and operator=.
+// GTEST_MUST_USE_RESULT_ - declares that a function's result must be used.
+//
+// Synchronization:
+// Mutex, MutexLock, ThreadLocal, GetThreadCount()
+// - synchronization primitives.
+// GTEST_IS_THREADSAFE - defined to 1 to indicate that the above
+// synchronization primitives have real implementations
+// and Google Test is thread-safe; or 0 otherwise.
+//
+// Template meta programming:
+// is_pointer - as in TR1; needed on Symbian and IBM XL C/C++ only.
+// IteratorTraits - partial implementation of std::iterator_traits, which
+// is not available in libCstd when compiled with Sun C++.
+//
+// Smart pointers:
+// scoped_ptr - as in TR2.
+//
+// Regular expressions:
+// RE - a simple regular expression class using the POSIX
+// Extended Regular Expression syntax on UNIX-like
+// platforms, or a reduced regular exception syntax on
+// other platforms, including Windows.
+//
+// Logging:
+// GTEST_LOG_() - logs messages at the specified severity level.
+// LogToStderr() - directs all log messages to stderr.
+// FlushInfoLog() - flushes informational log messages.
+//
+// Stdout and stderr capturing:
+// CaptureStdout() - starts capturing stdout.
+// GetCapturedStdout() - stops capturing stdout and returns the captured
+// string.
+// CaptureStderr() - starts capturing stderr.
+// GetCapturedStderr() - stops capturing stderr and returns the captured
+// string.
+//
+// Integer types:
+// TypeWithSize - maps an integer to a int type.
+// Int32, UInt32, Int64, UInt64, TimeInMillis
+// - integers of known sizes.
+// BiggestInt - the biggest signed integer type.
+//
+// Command-line utilities:
+// GTEST_FLAG() - references a flag.
+// GTEST_DECLARE_*() - declares a flag.
+// GTEST_DEFINE_*() - defines a flag.
+// GetInjectableArgvs() - returns the command line as a vector of strings.
+//
+// Environment variable utilities:
+// GetEnv() - gets the value of an environment variable.
+// BoolFromGTestEnv() - parses a bool environment variable.
+// Int32FromGTestEnv() - parses an Int32 environment variable.
+// StringFromGTestEnv() - parses a string environment variable.
+
+#include <ctype.h> // for isspace, etc
+#include <stddef.h> // for ptrdiff_t
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#ifndef _WIN32_WCE
+# include <sys/types.h>
+# include <sys/stat.h>
+#endif // !_WIN32_WCE
+
+#if defined __APPLE__
+# include <AvailabilityMacros.h>
+# include <TargetConditionals.h>
+#endif
+
+#include <iostream> // NOLINT
+#include <sstream> // NOLINT
+#include <string> // NOLINT
+
+#define GTEST_DEV_EMAIL_ "googletestframework@@googlegroups.com"
+#define GTEST_FLAG_PREFIX_ "gtest_"
+#define GTEST_FLAG_PREFIX_DASH_ "gtest-"
+#define GTEST_FLAG_PREFIX_UPPER_ "GTEST_"
+#define GTEST_NAME_ "Google Test"
+#define GTEST_PROJECT_URL_ "http://code.google.com/p/googletest/"
+
+// Determines the version of gcc that is used to compile this.
+#ifdef __GNUC__
+// 40302 means version 4.3.2.
+# define GTEST_GCC_VER_ \
+ (__GNUC__*10000 + __GNUC_MINOR__*100 + __GNUC_PATCHLEVEL__)
+#endif // __GNUC__
+
+// Determines the platform on which Google Test is compiled.
+#ifdef __CYGWIN__
+# define GTEST_OS_CYGWIN 1
+#elif defined __SYMBIAN32__
+# define GTEST_OS_SYMBIAN 1
+#elif defined _WIN32
+# define GTEST_OS_WINDOWS 1
+# ifdef _WIN32_WCE
+# define GTEST_OS_WINDOWS_MOBILE 1
+# elif defined(__MINGW__) || defined(__MINGW32__)
+# define GTEST_OS_WINDOWS_MINGW 1
+# else
+# define GTEST_OS_WINDOWS_DESKTOP 1
+# endif // _WIN32_WCE
+#elif defined __APPLE__
+# define GTEST_OS_MAC 1
+# if TARGET_OS_IPHONE
+# define GTEST_OS_IOS 1
+# if TARGET_IPHONE_SIMULATOR
+# define GTEST_OS_IOS_SIMULATOR 1
+# endif
+# endif
+#elif defined __linux__
+# define GTEST_OS_LINUX 1
+# if defined __ANDROID__
+# define GTEST_OS_LINUX_ANDROID 1
+# endif
+#elif defined __MVS__
+# define GTEST_OS_ZOS 1
+#elif defined(__sun) && defined(__SVR4)
+# define GTEST_OS_SOLARIS 1
+#elif defined(_AIX)
+# define GTEST_OS_AIX 1
+#elif defined(__hpux)
+# define GTEST_OS_HPUX 1
+#elif defined __native_client__
+# define GTEST_OS_NACL 1
+#elif defined __OpenBSD__
+# define GTEST_OS_OPENBSD 1
+#elif defined __QNX__
+# define GTEST_OS_QNX 1
+#endif // __CYGWIN__
+
+#ifndef GTEST_LANG_CXX11
+// gcc and clang define __GXX_EXPERIMENTAL_CXX0X__ when
+// -std={c,gnu}++{0x,11} is passed. The C++11 standard specifies a
+// value for __cplusplus, and recent versions of clang, gcc, and
+// probably other compilers set that too in C++11 mode.
+# if __GXX_EXPERIMENTAL_CXX0X__ || __cplusplus >= 201103L
+// Compiling in at least C++11 mode.
+# define GTEST_LANG_CXX11 1
+# else
+# define GTEST_LANG_CXX11 0
+# endif
+#endif
+
+// Brings in definitions for functions used in the testing::internal::posix
+// namespace (read, write, close, chdir, isatty, stat). We do not currently
+// use them on Windows Mobile.
+#if !GTEST_OS_WINDOWS
+// This assumes that non-Windows OSes provide unistd.h. For OSes where this
+// is not the case, we need to include headers that provide the functions
+// mentioned above.
+# include <unistd.h>
+# include <strings.h>
+#elif !GTEST_OS_WINDOWS_MOBILE
+# include <direct.h>
+# include <io.h>
+#endif
+
+#if GTEST_OS_LINUX_ANDROID
+// Used to define __ANDROID_API__ matching the target NDK API level.
+# include <android/api-level.h> // NOLINT
+#endif
+
+// Defines this to true iff Google Test can use POSIX regular expressions.
+#ifndef GTEST_HAS_POSIX_RE
+# if GTEST_OS_LINUX_ANDROID
+// On Android, <regex.h> is only available starting with Gingerbread.
+# define GTEST_HAS_POSIX_RE (__ANDROID_API__ >= 9)
+# else
+# define GTEST_HAS_POSIX_RE (!GTEST_OS_WINDOWS)
+# endif
+#endif
+
+#if GTEST_HAS_POSIX_RE
+
+// On some platforms, <regex.h> needs someone to define size_t, and
+// won't compile otherwise. We can #include it here as we already
+// included <stdlib.h>, which is guaranteed to define size_t through
+// <stddef.h>.
+# include <regex.h> // NOLINT
+
+# define GTEST_USES_POSIX_RE 1
+
+#elif GTEST_OS_WINDOWS
+
+// <regex.h> is not available on Windows. Use our own simple regex
+// implementation instead.
+# define GTEST_USES_SIMPLE_RE 1
+
+#else
+
+// <regex.h> may not be available on this platform. Use our own
+// simple regex implementation instead.
+# define GTEST_USES_SIMPLE_RE 1
+
+#endif // GTEST_HAS_POSIX_RE
+
+#ifndef GTEST_HAS_EXCEPTIONS
+// The user didn't tell us whether exceptions are enabled, so we need
+// to figure it out.
+# if defined(_MSC_VER) || defined(__BORLANDC__)
+// MSVC's and C++Builder's implementations of the STL use the _HAS_EXCEPTIONS
+// macro to enable exceptions, so we'll do the same.
+// Assumes that exceptions are enabled by default.
+# ifndef _HAS_EXCEPTIONS
+# define _HAS_EXCEPTIONS 1
+# endif // _HAS_EXCEPTIONS
+# define GTEST_HAS_EXCEPTIONS _HAS_EXCEPTIONS
+# elif defined(__GNUC__) && __EXCEPTIONS
+// gcc defines __EXCEPTIONS to 1 iff exceptions are enabled.
+# define GTEST_HAS_EXCEPTIONS 1
+# elif defined(__SUNPRO_CC)
+// Sun Pro CC supports exceptions. However, there is no compile-time way of
+// detecting whether they are enabled or not. Therefore, we assume that
+// they are enabled unless the user tells us otherwise.
+# define GTEST_HAS_EXCEPTIONS 1
+# elif defined(__IBMCPP__) && __EXCEPTIONS
+// xlC defines __EXCEPTIONS to 1 iff exceptions are enabled.
+# define GTEST_HAS_EXCEPTIONS 1
+# elif defined(__HP_aCC)
+// Exception handling is in effect by default in HP aCC compiler. It has to
+// be turned of by +noeh compiler option if desired.
+# define GTEST_HAS_EXCEPTIONS 1
+# else
+// For other compilers, we assume exceptions are disabled to be
+// conservative.
+# define GTEST_HAS_EXCEPTIONS 0
+# endif // defined(_MSC_VER) || defined(__BORLANDC__)
+#endif // GTEST_HAS_EXCEPTIONS
+
+#if !defined(GTEST_HAS_STD_STRING)
+// Even though we don't use this macro any longer, we keep it in case
+// some clients still depend on it.
+# define GTEST_HAS_STD_STRING 1
+#elif !GTEST_HAS_STD_STRING
+// The user told us that ::std::string isn't available.
+# error "Google Test cannot be used where ::std::string isn't available."
+#endif // !defined(GTEST_HAS_STD_STRING)
+
+#ifndef GTEST_HAS_GLOBAL_STRING
+// The user didn't tell us whether ::string is available, so we need
+// to figure it out.
+
+# define GTEST_HAS_GLOBAL_STRING 0
+
+#endif // GTEST_HAS_GLOBAL_STRING
+
+#ifndef GTEST_HAS_STD_WSTRING
+// The user didn't tell us whether ::std::wstring is available, so we need
+// to figure it out.
+// TODO(wan@google.com): uses autoconf to detect whether ::std::wstring
+// is available.
+
+// Cygwin 1.7 and below doesn't support ::std::wstring.
+// Solaris' libc++ doesn't support it either. Android has
+// no support for it at least as recent as Froyo (2.2).
+# define GTEST_HAS_STD_WSTRING \
+ (!(GTEST_OS_LINUX_ANDROID || GTEST_OS_CYGWIN || GTEST_OS_SOLARIS))
+
+#endif // GTEST_HAS_STD_WSTRING
+
+#ifndef GTEST_HAS_GLOBAL_WSTRING
+// The user didn't tell us whether ::wstring is available, so we need
+// to figure it out.
+# define GTEST_HAS_GLOBAL_WSTRING \
+ (GTEST_HAS_STD_WSTRING && GTEST_HAS_GLOBAL_STRING)
+#endif // GTEST_HAS_GLOBAL_WSTRING
+
+// Determines whether RTTI is available.
+#ifndef GTEST_HAS_RTTI
+// The user didn't tell us whether RTTI is enabled, so we need to
+// figure it out.
+
+# ifdef _MSC_VER
+
+# ifdef _CPPRTTI // MSVC defines this macro iff RTTI is enabled.
+# define GTEST_HAS_RTTI 1
+# else
+# define GTEST_HAS_RTTI 0
+# endif
+
+// Starting with version 4.3.2, gcc defines __GXX_RTTI iff RTTI is enabled.
+# elif defined(__GNUC__) && (GTEST_GCC_VER_ >= 40302)
+
+# ifdef __GXX_RTTI
+// When building against STLport with the Android NDK and with
+// -frtti -fno-exceptions, the build fails at link time with undefined
+// references to __cxa_bad_typeid. Note sure if STL or toolchain bug,
+// so disable RTTI when detected.
+# if GTEST_OS_LINUX_ANDROID && defined(_STLPORT_MAJOR) && \
+ !defined(__EXCEPTIONS)
+# define GTEST_HAS_RTTI 0
+# else
+# define GTEST_HAS_RTTI 1
+# endif // GTEST_OS_LINUX_ANDROID && __STLPORT_MAJOR && !__EXCEPTIONS
+# else
+# define GTEST_HAS_RTTI 0
+# endif // __GXX_RTTI
+
+// Clang defines __GXX_RTTI starting with version 3.0, but its manual recommends
+// using has_feature instead. has_feature(cxx_rtti) is supported since 2.7, the
+// first version with C++ support.
+# elif defined(__clang__)
+
+# define GTEST_HAS_RTTI __has_feature(cxx_rtti)
+
+// Starting with version 9.0 IBM Visual Age defines __RTTI_ALL__ to 1 if
+// both the typeid and dynamic_cast features are present.
+# elif defined(__IBMCPP__) && (__IBMCPP__ >= 900)
+
+# ifdef __RTTI_ALL__
+# define GTEST_HAS_RTTI 1
+# else
+# define GTEST_HAS_RTTI 0
+# endif
+
+# else
+
+// For all other compilers, we assume RTTI is enabled.
+# define GTEST_HAS_RTTI 1
+
+# endif // _MSC_VER
+
+#endif // GTEST_HAS_RTTI
+
+// It's this header's responsibility to #include <typeinfo> when RTTI
+// is enabled.
+#if GTEST_HAS_RTTI
+# include <typeinfo>
+#endif
+
+// Determines whether Google Test can use the pthreads library.
+#ifndef GTEST_HAS_PTHREAD
+// The user didn't tell us explicitly, so we assume pthreads support is
+// available on Linux and Mac.
+//
+// To disable threading support in Google Test, add -DGTEST_HAS_PTHREAD=0
+// to your compiler flags.
+# define GTEST_HAS_PTHREAD (GTEST_OS_LINUX || GTEST_OS_MAC || GTEST_OS_HPUX \
+ || GTEST_OS_QNX)
+#endif // GTEST_HAS_PTHREAD
+
+#if GTEST_HAS_PTHREAD
+// gtest-port.h guarantees to #include <pthread.h> when GTEST_HAS_PTHREAD is
+// true.
+# include <pthread.h> // NOLINT
+
+// For timespec and nanosleep, used below.
+# include <time.h> // NOLINT
+#endif
+
+// Determines whether Google Test can use tr1/tuple. You can define
+// this macro to 0 to prevent Google Test from using tuple (any
+// feature depending on tuple with be disabled in this mode).
+#ifndef GTEST_HAS_TR1_TUPLE
+# if GTEST_OS_LINUX_ANDROID && defined(_STLPORT_MAJOR)
+// STLport, provided with the Android NDK, has neither <tr1/tuple> or <tuple>.
+# define GTEST_HAS_TR1_TUPLE 0
+# else
+// The user didn't tell us not to do it, so we assume it's OK.
+# define GTEST_HAS_TR1_TUPLE 1
+# endif
+#endif // GTEST_HAS_TR1_TUPLE
+
+// Determines whether Google Test's own tr1 tuple implementation
+// should be used.
+#ifndef GTEST_USE_OWN_TR1_TUPLE
+// The user didn't tell us, so we need to figure it out.
+
+// We use our own TR1 tuple if we aren't sure the user has an
+// implementation of it already. At this time, libstdc++ 4.0.0+ and
+// MSVC 2010 are the only mainstream standard libraries that come
+// with a TR1 tuple implementation. NVIDIA's CUDA NVCC compiler
+// pretends to be GCC by defining __GNUC__ and friends, but cannot
+// compile GCC's tuple implementation. MSVC 2008 (9.0) provides TR1
+// tuple in a 323 MB Feature Pack download, which we cannot assume the
+// user has. QNX's QCC compiler is a modified GCC but it doesn't
+// support TR1 tuple. libc++ only provides std::tuple, in C++11 mode,
+// and it can be used with some compilers that define __GNUC__.
+# if (defined(__GNUC__) && !defined(__CUDACC__) && (GTEST_GCC_VER_ >= 40000) \
+ && !GTEST_OS_QNX && !defined(_LIBCPP_VERSION)) || _MSC_VER >= 1600
+# define GTEST_ENV_HAS_TR1_TUPLE_ 1
+# endif
+
+// C++11 specifies that <tuple> provides std::tuple. Use that if gtest is used
+// in C++11 mode and libstdc++ isn't very old (binaries targeting OS X 10.6
+// can build with clang but need to use gcc4.2's libstdc++).
+# if GTEST_LANG_CXX11 && (!defined(__GLIBCXX__) || __GLIBCXX__ > 20110325)
+# define GTEST_ENV_HAS_STD_TUPLE_ 1
+# endif
+
+# if GTEST_ENV_HAS_TR1_TUPLE_ || GTEST_ENV_HAS_STD_TUPLE_
+# define GTEST_USE_OWN_TR1_TUPLE 0
+# else
+# define GTEST_USE_OWN_TR1_TUPLE 1
+# endif
+
+#endif // GTEST_USE_OWN_TR1_TUPLE
+
+// To avoid conditional compilation everywhere, we make it
+// gtest-port.h's responsibility to #include the header implementing
+// tr1/tuple.
+#if GTEST_HAS_TR1_TUPLE
+
+# if GTEST_USE_OWN_TR1_TUPLE
+// This file was GENERATED by command:
+// pump.py gtest-tuple.h.pump
+// DO NOT EDIT BY HAND!!!
+
+// Copyright 2009 Google Inc.
+// All Rights Reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Author: wan@google.com (Zhanyong Wan)
+
+// Implements a subset of TR1 tuple needed by Google Test and Google Mock.
+
+#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_TUPLE_H_
+#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_TUPLE_H_
+
+#include <utility> // For ::std::pair.
+
+// The compiler used in Symbian has a bug that prevents us from declaring the
+// tuple template as a friend (it complains that tuple is redefined). This
+// hack bypasses the bug by declaring the members that should otherwise be
+// private as public.
+// Sun Studio versions < 12 also have the above bug.
+#if defined(__SYMBIAN32__) || (defined(__SUNPRO_CC) && __SUNPRO_CC < 0x590)
+# define GTEST_DECLARE_TUPLE_AS_FRIEND_ public:
+#else
+# define GTEST_DECLARE_TUPLE_AS_FRIEND_ \
+ template <GTEST_10_TYPENAMES_(U)> friend class tuple; \
+ private:
+#endif
+
+// GTEST_n_TUPLE_(T) is the type of an n-tuple.
+#define GTEST_0_TUPLE_(T) tuple<>
+#define GTEST_1_TUPLE_(T) tuple<T##0, void, void, void, void, void, void, \
+ void, void, void>
+#define GTEST_2_TUPLE_(T) tuple<T##0, T##1, void, void, void, void, void, \
+ void, void, void>
+#define GTEST_3_TUPLE_(T) tuple<T##0, T##1, T##2, void, void, void, void, \
+ void, void, void>
+#define GTEST_4_TUPLE_(T) tuple<T##0, T##1, T##2, T##3, void, void, void, \
+ void, void, void>
+#define GTEST_5_TUPLE_(T) tuple<T##0, T##1, T##2, T##3, T##4, void, void, \
+ void, void, void>
+#define GTEST_6_TUPLE_(T) tuple<T##0, T##1, T##2, T##3, T##4, T##5, void, \
+ void, void, void>
+#define GTEST_7_TUPLE_(T) tuple<T##0, T##1, T##2, T##3, T##4, T##5, T##6, \
+ void, void, void>
+#define GTEST_8_TUPLE_(T) tuple<T##0, T##1, T##2, T##3, T##4, T##5, T##6, \
+ T##7, void, void>
+#define GTEST_9_TUPLE_(T) tuple<T##0, T##1, T##2, T##3, T##4, T##5, T##6, \
+ T##7, T##8, void>
+#define GTEST_10_TUPLE_(T) tuple<T##0, T##1, T##2, T##3, T##4, T##5, T##6, \
+ T##7, T##8, T##9>
+
+// GTEST_n_TYPENAMES_(T) declares a list of n typenames.
+#define GTEST_0_TYPENAMES_(T)
+#define GTEST_1_TYPENAMES_(T) typename T##0
+#define GTEST_2_TYPENAMES_(T) typename T##0, typename T##1
+#define GTEST_3_TYPENAMES_(T) typename T##0, typename T##1, typename T##2
+#define GTEST_4_TYPENAMES_(T) typename T##0, typename T##1, typename T##2, \
+ typename T##3
+#define GTEST_5_TYPENAMES_(T) typename T##0, typename T##1, typename T##2, \
+ typename T##3, typename T##4
+#define GTEST_6_TYPENAMES_(T) typename T##0, typename T##1, typename T##2, \
+ typename T##3, typename T##4, typename T##5
+#define GTEST_7_TYPENAMES_(T) typename T##0, typename T##1, typename T##2, \
+ typename T##3, typename T##4, typename T##5, typename T##6
+#define GTEST_8_TYPENAMES_(T) typename T##0, typename T##1, typename T##2, \
+ typename T##3, typename T##4, typename T##5, typename T##6, typename T##7
+#define GTEST_9_TYPENAMES_(T) typename T##0, typename T##1, typename T##2, \
+ typename T##3, typename T##4, typename T##5, typename T##6, \
+ typename T##7, typename T##8
+#define GTEST_10_TYPENAMES_(T) typename T##0, typename T##1, typename T##2, \
+ typename T##3, typename T##4, typename T##5, typename T##6, \
+ typename T##7, typename T##8, typename T##9
+
+// In theory, defining stuff in the ::std namespace is undefined
+// behavior. We can do this as we are playing the role of a standard
+// library vendor.
+namespace std {
+namespace tr1 {
+
+template <typename T0 = void, typename T1 = void, typename T2 = void,
+ typename T3 = void, typename T4 = void, typename T5 = void,
+ typename T6 = void, typename T7 = void, typename T8 = void,
+ typename T9 = void>
+class tuple;
+
+// Anything in namespace gtest_internal is Google Test's INTERNAL
+// IMPLEMENTATION DETAIL and MUST NOT BE USED DIRECTLY in user code.
+namespace gtest_internal {
+
+// ByRef<T>::type is T if T is a reference; otherwise it's const T&.
+template <typename T>
+struct ByRef { typedef const T& type; }; // NOLINT
+template <typename T>
+struct ByRef<T&> { typedef T& type; }; // NOLINT
+
+// A handy wrapper for ByRef.
+#define GTEST_BY_REF_(T) typename ::std::tr1::gtest_internal::ByRef<T>::type
+
+// AddRef<T>::type is T if T is a reference; otherwise it's T&. This
+// is the same as tr1::add_reference<T>::type.
+template <typename T>
+struct AddRef { typedef T& type; }; // NOLINT
+template <typename T>
+struct AddRef<T&> { typedef T& type; }; // NOLINT
+
+// A handy wrapper for AddRef.
+#define GTEST_ADD_REF_(T) typename ::std::tr1::gtest_internal::AddRef<T>::type
+
+// A helper for implementing get<k>().
+template <int k> class Get;
+
+// A helper for implementing tuple_element<k, T>. kIndexValid is true
+// iff k < the number of fields in tuple type T.
+template <bool kIndexValid, int kIndex, class Tuple>
+struct TupleElement;
+
+template <GTEST_10_TYPENAMES_(T)>
+struct TupleElement<true, 0, GTEST_10_TUPLE_(T) > {
+ typedef T0 type;
+};
+
+template <GTEST_10_TYPENAMES_(T)>
+struct TupleElement<true, 1, GTEST_10_TUPLE_(T) > {
+ typedef T1 type;
+};
+
+template <GTEST_10_TYPENAMES_(T)>
+struct TupleElement<true, 2, GTEST_10_TUPLE_(T) > {
+ typedef T2 type;
+};
+
+template <GTEST_10_TYPENAMES_(T)>
+struct TupleElement<true, 3, GTEST_10_TUPLE_(T) > {
+ typedef T3 type;
+};
+
+template <GTEST_10_TYPENAMES_(T)>
+struct TupleElement<true, 4, GTEST_10_TUPLE_(T) > {
+ typedef T4 type;
+};
+
+template <GTEST_10_TYPENAMES_(T)>
+struct TupleElement<true, 5, GTEST_10_TUPLE_(T) > {
+ typedef T5 type;
+};
+
+template <GTEST_10_TYPENAMES_(T)>
+struct TupleElement<true, 6, GTEST_10_TUPLE_(T) > {
+ typedef T6 type;
+};
+
+template <GTEST_10_TYPENAMES_(T)>
+struct TupleElement<true, 7, GTEST_10_TUPLE_(T) > {
+ typedef T7 type;
+};
+
+template <GTEST_10_TYPENAMES_(T)>
+struct TupleElement<true, 8, GTEST_10_TUPLE_(T) > {
+ typedef T8 type;
+};
+
+template <GTEST_10_TYPENAMES_(T)>
+struct TupleElement<true, 9, GTEST_10_TUPLE_(T) > {
+ typedef T9 type;
+};
+
+} // namespace gtest_internal
+
+template <>
+class tuple<> {
+ public:
+ tuple() {}
+ tuple(const tuple& /* t */) {}
+ tuple& operator=(const tuple& /* t */) { return *this; }
+};
+
+template <GTEST_1_TYPENAMES_(T)>
+class GTEST_1_TUPLE_(T) {
+ public:
+ template <int k> friend class gtest_internal::Get;
+
+ tuple() : f0_() {}
+
+ explicit tuple(GTEST_BY_REF_(T0) f0) : f0_(f0) {}
+
+ tuple(const tuple& t) : f0_(t.f0_) {}
+
+ template <GTEST_1_TYPENAMES_(U)>
+ tuple(const GTEST_1_TUPLE_(U)& t) : f0_(t.f0_) {}
+
+ tuple& operator=(const tuple& t) { return CopyFrom(t); }
+
+ template <GTEST_1_TYPENAMES_(U)>
+ tuple& operator=(const GTEST_1_TUPLE_(U)& t) {
+ return CopyFrom(t);
+ }
+
+ GTEST_DECLARE_TUPLE_AS_FRIEND_
+
+ template <GTEST_1_TYPENAMES_(U)>
+ tuple& CopyFrom(const GTEST_1_TUPLE_(U)& t) {
+ f0_ = t.f0_;
+ return *this;
+ }
+
+ T0 f0_;
+};
+
+template <GTEST_2_TYPENAMES_(T)>
+class GTEST_2_TUPLE_(T) {
+ public:
+ template <int k> friend class gtest_internal::Get;
+
+ tuple() : f0_(), f1_() {}
+
+ explicit tuple(GTEST_BY_REF_(T0) f0, GTEST_BY_REF_(T1) f1) : f0_(f0),
+ f1_(f1) {}
+
+ tuple(const tuple& t) : f0_(t.f0_), f1_(t.f1_) {}
+
+ template <GTEST_2_TYPENAMES_(U)>
+ tuple(const GTEST_2_TUPLE_(U)& t) : f0_(t.f0_), f1_(t.f1_) {}
+ template <typename U0, typename U1>
+ tuple(const ::std::pair<U0, U1>& p) : f0_(p.first), f1_(p.second) {}
+
+ tuple& operator=(const tuple& t) { return CopyFrom(t); }
+
+ template <GTEST_2_TYPENAMES_(U)>
+ tuple& operator=(const GTEST_2_TUPLE_(U)& t) {
+ return CopyFrom(t);
+ }
+ template <typename U0, typename U1>
+ tuple& operator=(const ::std::pair<U0, U1>& p) {
+ f0_ = p.first;
+ f1_ = p.second;
+ return *this;
+ }
+
+ GTEST_DECLARE_TUPLE_AS_FRIEND_
+
+ template <GTEST_2_TYPENAMES_(U)>
+ tuple& CopyFrom(const GTEST_2_TUPLE_(U)& t) {
+ f0_ = t.f0_;
+ f1_ = t.f1_;
+ return *this;
+ }
+
+ T0 f0_;
+ T1 f1_;
+};
+
+template <GTEST_3_TYPENAMES_(T)>
+class GTEST_3_TUPLE_(T) {
+ public:
+ template <int k> friend class gtest_internal::Get;
+
+ tuple() : f0_(), f1_(), f2_() {}
+
+ explicit tuple(GTEST_BY_REF_(T0) f0, GTEST_BY_REF_(T1) f1,
+ GTEST_BY_REF_(T2) f2) : f0_(f0), f1_(f1), f2_(f2) {}
+
+ tuple(const tuple& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_) {}
+
+ template <GTEST_3_TYPENAMES_(U)>
+ tuple(const GTEST_3_TUPLE_(U)& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_) {}
+
+ tuple& operator=(const tuple& t) { return CopyFrom(t); }
+
+ template <GTEST_3_TYPENAMES_(U)>
+ tuple& operator=(const GTEST_3_TUPLE_(U)& t) {
+ return CopyFrom(t);
+ }
+
+ GTEST_DECLARE_TUPLE_AS_FRIEND_
+
+ template <GTEST_3_TYPENAMES_(U)>
+ tuple& CopyFrom(const GTEST_3_TUPLE_(U)& t) {
+ f0_ = t.f0_;
+ f1_ = t.f1_;
+ f2_ = t.f2_;
+ return *this;
+ }
+
+ T0 f0_;
+ T1 f1_;
+ T2 f2_;
+};
+
+template <GTEST_4_TYPENAMES_(T)>
+class GTEST_4_TUPLE_(T) {
+ public:
+ template <int k> friend class gtest_internal::Get;
+
+ tuple() : f0_(), f1_(), f2_(), f3_() {}
+
+ explicit tuple(GTEST_BY_REF_(T0) f0, GTEST_BY_REF_(T1) f1,
+ GTEST_BY_REF_(T2) f2, GTEST_BY_REF_(T3) f3) : f0_(f0), f1_(f1), f2_(f2),
+ f3_(f3) {}
+
+ tuple(const tuple& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_), f3_(t.f3_) {}
+
+ template <GTEST_4_TYPENAMES_(U)>
+ tuple(const GTEST_4_TUPLE_(U)& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_),
+ f3_(t.f3_) {}
+
+ tuple& operator=(const tuple& t) { return CopyFrom(t); }
+
+ template <GTEST_4_TYPENAMES_(U)>
+ tuple& operator=(const GTEST_4_TUPLE_(U)& t) {
+ return CopyFrom(t);
+ }
+
+ GTEST_DECLARE_TUPLE_AS_FRIEND_
+
+ template <GTEST_4_TYPENAMES_(U)>
+ tuple& CopyFrom(const GTEST_4_TUPLE_(U)& t) {
+ f0_ = t.f0_;
+ f1_ = t.f1_;
+ f2_ = t.f2_;
+ f3_ = t.f3_;
+ return *this;
+ }
+
+ T0 f0_;
+ T1 f1_;
+ T2 f2_;
+ T3 f3_;
+};
+
+template <GTEST_5_TYPENAMES_(T)>
+class GTEST_5_TUPLE_(T) {
+ public:
+ template <int k> friend class gtest_internal::Get;
+
+ tuple() : f0_(), f1_(), f2_(), f3_(), f4_() {}
+
+ explicit tuple(GTEST_BY_REF_(T0) f0, GTEST_BY_REF_(T1) f1,
+ GTEST_BY_REF_(T2) f2, GTEST_BY_REF_(T3) f3,
+ GTEST_BY_REF_(T4) f4) : f0_(f0), f1_(f1), f2_(f2), f3_(f3), f4_(f4) {}
+
+ tuple(const tuple& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_), f3_(t.f3_),
+ f4_(t.f4_) {}
+
+ template <GTEST_5_TYPENAMES_(U)>
+ tuple(const GTEST_5_TUPLE_(U)& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_),
+ f3_(t.f3_), f4_(t.f4_) {}
+
+ tuple& operator=(const tuple& t) { return CopyFrom(t); }
+
+ template <GTEST_5_TYPENAMES_(U)>
+ tuple& operator=(const GTEST_5_TUPLE_(U)& t) {
+ return CopyFrom(t);
+ }
+
+ GTEST_DECLARE_TUPLE_AS_FRIEND_
+
+ template <GTEST_5_TYPENAMES_(U)>
+ tuple& CopyFrom(const GTEST_5_TUPLE_(U)& t) {
+ f0_ = t.f0_;
+ f1_ = t.f1_;
+ f2_ = t.f2_;
+ f3_ = t.f3_;
+ f4_ = t.f4_;
+ return *this;
+ }
+
+ T0 f0_;
+ T1 f1_;
+ T2 f2_;
+ T3 f3_;
+ T4 f4_;
+};
+
+template <GTEST_6_TYPENAMES_(T)>
+class GTEST_6_TUPLE_(T) {
+ public:
+ template <int k> friend class gtest_internal::Get;
+
+ tuple() : f0_(), f1_(), f2_(), f3_(), f4_(), f5_() {}
+
+ explicit tuple(GTEST_BY_REF_(T0) f0, GTEST_BY_REF_(T1) f1,
+ GTEST_BY_REF_(T2) f2, GTEST_BY_REF_(T3) f3, GTEST_BY_REF_(T4) f4,
+ GTEST_BY_REF_(T5) f5) : f0_(f0), f1_(f1), f2_(f2), f3_(f3), f4_(f4),
+ f5_(f5) {}
+
+ tuple(const tuple& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_), f3_(t.f3_),
+ f4_(t.f4_), f5_(t.f5_) {}
+
+ template <GTEST_6_TYPENAMES_(U)>
+ tuple(const GTEST_6_TUPLE_(U)& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_),
+ f3_(t.f3_), f4_(t.f4_), f5_(t.f5_) {}
+
+ tuple& operator=(const tuple& t) { return CopyFrom(t); }
+
+ template <GTEST_6_TYPENAMES_(U)>
+ tuple& operator=(const GTEST_6_TUPLE_(U)& t) {
+ return CopyFrom(t);
+ }
+
+ GTEST_DECLARE_TUPLE_AS_FRIEND_
+
+ template <GTEST_6_TYPENAMES_(U)>
+ tuple& CopyFrom(const GTEST_6_TUPLE_(U)& t) {
+ f0_ = t.f0_;
+ f1_ = t.f1_;
+ f2_ = t.f2_;
+ f3_ = t.f3_;
+ f4_ = t.f4_;
+ f5_ = t.f5_;
+ return *this;
+ }
+
+ T0 f0_;
+ T1 f1_;
+ T2 f2_;
+ T3 f3_;
+ T4 f4_;
+ T5 f5_;
+};
+
+template <GTEST_7_TYPENAMES_(T)>
+class GTEST_7_TUPLE_(T) {
+ public:
+ template <int k> friend class gtest_internal::Get;
+
+ tuple() : f0_(), f1_(), f2_(), f3_(), f4_(), f5_(), f6_() {}
+
+ explicit tuple(GTEST_BY_REF_(T0) f0, GTEST_BY_REF_(T1) f1,
+ GTEST_BY_REF_(T2) f2, GTEST_BY_REF_(T3) f3, GTEST_BY_REF_(T4) f4,
+ GTEST_BY_REF_(T5) f5, GTEST_BY_REF_(T6) f6) : f0_(f0), f1_(f1), f2_(f2),
+ f3_(f3), f4_(f4), f5_(f5), f6_(f6) {}
+
+ tuple(const tuple& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_), f3_(t.f3_),
+ f4_(t.f4_), f5_(t.f5_), f6_(t.f6_) {}
+
+ template <GTEST_7_TYPENAMES_(U)>
+ tuple(const GTEST_7_TUPLE_(U)& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_),
+ f3_(t.f3_), f4_(t.f4_), f5_(t.f5_), f6_(t.f6_) {}
+
+ tuple& operator=(const tuple& t) { return CopyFrom(t); }
+
+ template <GTEST_7_TYPENAMES_(U)>
+ tuple& operator=(const GTEST_7_TUPLE_(U)& t) {
+ return CopyFrom(t);
+ }
+
+ GTEST_DECLARE_TUPLE_AS_FRIEND_
+
+ template <GTEST_7_TYPENAMES_(U)>
+ tuple& CopyFrom(const GTEST_7_TUPLE_(U)& t) {
+ f0_ = t.f0_;
+ f1_ = t.f1_;
+ f2_ = t.f2_;
+ f3_ = t.f3_;
+ f4_ = t.f4_;
+ f5_ = t.f5_;
+ f6_ = t.f6_;
+ return *this;
+ }
+
+ T0 f0_;
+ T1 f1_;
+ T2 f2_;
+ T3 f3_;
+ T4 f4_;
+ T5 f5_;
+ T6 f6_;
+};
+
+template <GTEST_8_TYPENAMES_(T)>
+class GTEST_8_TUPLE_(T) {
+ public:
+ template <int k> friend class gtest_internal::Get;
+
+ tuple() : f0_(), f1_(), f2_(), f3_(), f4_(), f5_(), f6_(), f7_() {}
+
+ explicit tuple(GTEST_BY_REF_(T0) f0, GTEST_BY_REF_(T1) f1,
+ GTEST_BY_REF_(T2) f2, GTEST_BY_REF_(T3) f3, GTEST_BY_REF_(T4) f4,
+ GTEST_BY_REF_(T5) f5, GTEST_BY_REF_(T6) f6,
+ GTEST_BY_REF_(T7) f7) : f0_(f0), f1_(f1), f2_(f2), f3_(f3), f4_(f4),
+ f5_(f5), f6_(f6), f7_(f7) {}
+
+ tuple(const tuple& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_), f3_(t.f3_),
+ f4_(t.f4_), f5_(t.f5_), f6_(t.f6_), f7_(t.f7_) {}
+
+ template <GTEST_8_TYPENAMES_(U)>
+ tuple(const GTEST_8_TUPLE_(U)& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_),
+ f3_(t.f3_), f4_(t.f4_), f5_(t.f5_), f6_(t.f6_), f7_(t.f7_) {}
+
+ tuple& operator=(const tuple& t) { return CopyFrom(t); }
+
+ template <GTEST_8_TYPENAMES_(U)>
+ tuple& operator=(const GTEST_8_TUPLE_(U)& t) {
+ return CopyFrom(t);
+ }
+
+ GTEST_DECLARE_TUPLE_AS_FRIEND_
+
+ template <GTEST_8_TYPENAMES_(U)>
+ tuple& CopyFrom(const GTEST_8_TUPLE_(U)& t) {
+ f0_ = t.f0_;
+ f1_ = t.f1_;
+ f2_ = t.f2_;
+ f3_ = t.f3_;
+ f4_ = t.f4_;
+ f5_ = t.f5_;
+ f6_ = t.f6_;
+ f7_ = t.f7_;
+ return *this;
+ }
+
+ T0 f0_;
+ T1 f1_;
+ T2 f2_;
+ T3 f3_;
+ T4 f4_;
+ T5 f5_;
+ T6 f6_;
+ T7 f7_;
+};
+
+template <GTEST_9_TYPENAMES_(T)>
+class GTEST_9_TUPLE_(T) {
+ public:
+ template <int k> friend class gtest_internal::Get;
+
+ tuple() : f0_(), f1_(), f2_(), f3_(), f4_(), f5_(), f6_(), f7_(), f8_() {}
+
+ explicit tuple(GTEST_BY_REF_(T0) f0, GTEST_BY_REF_(T1) f1,
+ GTEST_BY_REF_(T2) f2, GTEST_BY_REF_(T3) f3, GTEST_BY_REF_(T4) f4,
+ GTEST_BY_REF_(T5) f5, GTEST_BY_REF_(T6) f6, GTEST_BY_REF_(T7) f7,
+ GTEST_BY_REF_(T8) f8) : f0_(f0), f1_(f1), f2_(f2), f3_(f3), f4_(f4),
+ f5_(f5), f6_(f6), f7_(f7), f8_(f8) {}
+
+ tuple(const tuple& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_), f3_(t.f3_),
+ f4_(t.f4_), f5_(t.f5_), f6_(t.f6_), f7_(t.f7_), f8_(t.f8_) {}
+
+ template <GTEST_9_TYPENAMES_(U)>
+ tuple(const GTEST_9_TUPLE_(U)& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_),
+ f3_(t.f3_), f4_(t.f4_), f5_(t.f5_), f6_(t.f6_), f7_(t.f7_), f8_(t.f8_) {}
+
+ tuple& operator=(const tuple& t) { return CopyFrom(t); }
+
+ template <GTEST_9_TYPENAMES_(U)>
+ tuple& operator=(const GTEST_9_TUPLE_(U)& t) {
+ return CopyFrom(t);
+ }
+
+ GTEST_DECLARE_TUPLE_AS_FRIEND_
+
+ template <GTEST_9_TYPENAMES_(U)>
+ tuple& CopyFrom(const GTEST_9_TUPLE_(U)& t) {
+ f0_ = t.f0_;
+ f1_ = t.f1_;
+ f2_ = t.f2_;
+ f3_ = t.f3_;
+ f4_ = t.f4_;
+ f5_ = t.f5_;
+ f6_ = t.f6_;
+ f7_ = t.f7_;
+ f8_ = t.f8_;
+ return *this;
+ }
+
+ T0 f0_;
+ T1 f1_;
+ T2 f2_;
+ T3 f3_;
+ T4 f4_;
+ T5 f5_;
+ T6 f6_;
+ T7 f7_;
+ T8 f8_;
+};
+
+template <GTEST_10_TYPENAMES_(T)>
+class tuple {
+ public:
+ template <int k> friend class gtest_internal::Get;
+
+ tuple() : f0_(), f1_(), f2_(), f3_(), f4_(), f5_(), f6_(), f7_(), f8_(),
+ f9_() {}
+
+ explicit tuple(GTEST_BY_REF_(T0) f0, GTEST_BY_REF_(T1) f1,
+ GTEST_BY_REF_(T2) f2, GTEST_BY_REF_(T3) f3, GTEST_BY_REF_(T4) f4,
+ GTEST_BY_REF_(T5) f5, GTEST_BY_REF_(T6) f6, GTEST_BY_REF_(T7) f7,
+ GTEST_BY_REF_(T8) f8, GTEST_BY_REF_(T9) f9) : f0_(f0), f1_(f1), f2_(f2),
+ f3_(f3), f4_(f4), f5_(f5), f6_(f6), f7_(f7), f8_(f8), f9_(f9) {}
+
+ tuple(const tuple& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_), f3_(t.f3_),
+ f4_(t.f4_), f5_(t.f5_), f6_(t.f6_), f7_(t.f7_), f8_(t.f8_), f9_(t.f9_) {}
+
+ template <GTEST_10_TYPENAMES_(U)>
+ tuple(const GTEST_10_TUPLE_(U)& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_),
+ f3_(t.f3_), f4_(t.f4_), f5_(t.f5_), f6_(t.f6_), f7_(t.f7_), f8_(t.f8_),
+ f9_(t.f9_) {}
+
+ tuple& operator=(const tuple& t) { return CopyFrom(t); }
+
+ template <GTEST_10_TYPENAMES_(U)>
+ tuple& operator=(const GTEST_10_TUPLE_(U)& t) {
+ return CopyFrom(t);
+ }
+
+ GTEST_DECLARE_TUPLE_AS_FRIEND_
+
+ template <GTEST_10_TYPENAMES_(U)>
+ tuple& CopyFrom(const GTEST_10_TUPLE_(U)& t) {
+ f0_ = t.f0_;
+ f1_ = t.f1_;
+ f2_ = t.f2_;
+ f3_ = t.f3_;
+ f4_ = t.f4_;
+ f5_ = t.f5_;
+ f6_ = t.f6_;
+ f7_ = t.f7_;
+ f8_ = t.f8_;
+ f9_ = t.f9_;
+ return *this;
+ }
+
+ T0 f0_;
+ T1 f1_;
+ T2 f2_;
+ T3 f3_;
+ T4 f4_;
+ T5 f5_;
+ T6 f6_;
+ T7 f7_;
+ T8 f8_;
+ T9 f9_;
+};
+
+// 6.1.3.2 Tuple creation functions.
+
+// Known limitations: we don't support passing an
+// std::tr1::reference_wrapper<T> to make_tuple(). And we don't
+// implement tie().
+
+inline tuple<> make_tuple() { return tuple<>(); }
+
+template <GTEST_1_TYPENAMES_(T)>
+inline GTEST_1_TUPLE_(T) make_tuple(const T0& f0) {
+ return GTEST_1_TUPLE_(T)(f0);
+}
+
+template <GTEST_2_TYPENAMES_(T)>
+inline GTEST_2_TUPLE_(T) make_tuple(const T0& f0, const T1& f1) {
+ return GTEST_2_TUPLE_(T)(f0, f1);
+}
+
+template <GTEST_3_TYPENAMES_(T)>
+inline GTEST_3_TUPLE_(T) make_tuple(const T0& f0, const T1& f1, const T2& f2) {
+ return GTEST_3_TUPLE_(T)(f0, f1, f2);
+}
+
+template <GTEST_4_TYPENAMES_(T)>
+inline GTEST_4_TUPLE_(T) make_tuple(const T0& f0, const T1& f1, const T2& f2,
+ const T3& f3) {
+ return GTEST_4_TUPLE_(T)(f0, f1, f2, f3);
+}
+
+template <GTEST_5_TYPENAMES_(T)>
+inline GTEST_5_TUPLE_(T) make_tuple(const T0& f0, const T1& f1, const T2& f2,
+ const T3& f3, const T4& f4) {
+ return GTEST_5_TUPLE_(T)(f0, f1, f2, f3, f4);
+}
+
+template <GTEST_6_TYPENAMES_(T)>
+inline GTEST_6_TUPLE_(T) make_tuple(const T0& f0, const T1& f1, const T2& f2,
+ const T3& f3, const T4& f4, const T5& f5) {
+ return GTEST_6_TUPLE_(T)(f0, f1, f2, f3, f4, f5);
+}
+
+template <GTEST_7_TYPENAMES_(T)>
+inline GTEST_7_TUPLE_(T) make_tuple(const T0& f0, const T1& f1, const T2& f2,
+ const T3& f3, const T4& f4, const T5& f5, const T6& f6) {
+ return GTEST_7_TUPLE_(T)(f0, f1, f2, f3, f4, f5, f6);
+}
+
+template <GTEST_8_TYPENAMES_(T)>
+inline GTEST_8_TUPLE_(T) make_tuple(const T0& f0, const T1& f1, const T2& f2,
+ const T3& f3, const T4& f4, const T5& f5, const T6& f6, const T7& f7) {
+ return GTEST_8_TUPLE_(T)(f0, f1, f2, f3, f4, f5, f6, f7);
+}
+
+template <GTEST_9_TYPENAMES_(T)>
+inline GTEST_9_TUPLE_(T) make_tuple(const T0& f0, const T1& f1, const T2& f2,
+ const T3& f3, const T4& f4, const T5& f5, const T6& f6, const T7& f7,
+ const T8& f8) {
+ return GTEST_9_TUPLE_(T)(f0, f1, f2, f3, f4, f5, f6, f7, f8);
+}
+
+template <GTEST_10_TYPENAMES_(T)>
+inline GTEST_10_TUPLE_(T) make_tuple(const T0& f0, const T1& f1, const T2& f2,
+ const T3& f3, const T4& f4, const T5& f5, const T6& f6, const T7& f7,
+ const T8& f8, const T9& f9) {
+ return GTEST_10_TUPLE_(T)(f0, f1, f2, f3, f4, f5, f6, f7, f8, f9);
+}
+
+// 6.1.3.3 Tuple helper classes.
+
+template <typename Tuple> struct tuple_size;
+
+template <GTEST_0_TYPENAMES_(T)>
+struct tuple_size<GTEST_0_TUPLE_(T) > {
+ static const int value = 0;
+};
+
+template <GTEST_1_TYPENAMES_(T)>
+struct tuple_size<GTEST_1_TUPLE_(T) > {
+ static const int value = 1;
+};
+
+template <GTEST_2_TYPENAMES_(T)>
+struct tuple_size<GTEST_2_TUPLE_(T) > {
+ static const int value = 2;
+};
+
+template <GTEST_3_TYPENAMES_(T)>
+struct tuple_size<GTEST_3_TUPLE_(T) > {
+ static const int value = 3;
+};
+
+template <GTEST_4_TYPENAMES_(T)>
+struct tuple_size<GTEST_4_TUPLE_(T) > {
+ static const int value = 4;
+};
+
+template <GTEST_5_TYPENAMES_(T)>
+struct tuple_size<GTEST_5_TUPLE_(T) > {
+ static const int value = 5;
+};
+
+template <GTEST_6_TYPENAMES_(T)>
+struct tuple_size<GTEST_6_TUPLE_(T) > {
+ static const int value = 6;
+};
+
+template <GTEST_7_TYPENAMES_(T)>
+struct tuple_size<GTEST_7_TUPLE_(T) > {
+ static const int value = 7;
+};
+
+template <GTEST_8_TYPENAMES_(T)>
+struct tuple_size<GTEST_8_TUPLE_(T) > {
+ static const int value = 8;
+};
+
+template <GTEST_9_TYPENAMES_(T)>
+struct tuple_size<GTEST_9_TUPLE_(T) > {
+ static const int value = 9;
+};
+
+template <GTEST_10_TYPENAMES_(T)>
+struct tuple_size<GTEST_10_TUPLE_(T) > {
+ static const int value = 10;
+};
+
+template <int k, class Tuple>
+struct tuple_element {
+ typedef typename gtest_internal::TupleElement<
+ k < (tuple_size<Tuple>::value), k, Tuple>::type type;
+};
+
+#define GTEST_TUPLE_ELEMENT_(k, Tuple) typename tuple_element<k, Tuple >::type
+
+// 6.1.3.4 Element access.
+
+namespace gtest_internal {
+
+template <>
+class Get<0> {
+ public:
+ template <class Tuple>
+ static GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_(0, Tuple))
+ Field(Tuple& t) { return t.f0_; } // NOLINT
+
+ template <class Tuple>
+ static GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_(0, Tuple))
+ ConstField(const Tuple& t) { return t.f0_; }
+};
+
+template <>
+class Get<1> {
+ public:
+ template <class Tuple>
+ static GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_(1, Tuple))
+ Field(Tuple& t) { return t.f1_; } // NOLINT
+
+ template <class Tuple>
+ static GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_(1, Tuple))
+ ConstField(const Tuple& t) { return t.f1_; }
+};
+
+template <>
+class Get<2> {
+ public:
+ template <class Tuple>
+ static GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_(2, Tuple))
+ Field(Tuple& t) { return t.f2_; } // NOLINT
+
+ template <class Tuple>
+ static GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_(2, Tuple))
+ ConstField(const Tuple& t) { return t.f2_; }
+};
+
+template <>
+class Get<3> {
+ public:
+ template <class Tuple>
+ static GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_(3, Tuple))
+ Field(Tuple& t) { return t.f3_; } // NOLINT
+
+ template <class Tuple>
+ static GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_(3, Tuple))
+ ConstField(const Tuple& t) { return t.f3_; }
+};
+
+template <>
+class Get<4> {
+ public:
+ template <class Tuple>
+ static GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_(4, Tuple))
+ Field(Tuple& t) { return t.f4_; } // NOLINT
+
+ template <class Tuple>
+ static GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_(4, Tuple))
+ ConstField(const Tuple& t) { return t.f4_; }
+};
+
+template <>
+class Get<5> {
+ public:
+ template <class Tuple>
+ static GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_(5, Tuple))
+ Field(Tuple& t) { return t.f5_; } // NOLINT
+
+ template <class Tuple>
+ static GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_(5, Tuple))
+ ConstField(const Tuple& t) { return t.f5_; }
+};
+
+template <>
+class Get<6> {
+ public:
+ template <class Tuple>
+ static GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_(6, Tuple))
+ Field(Tuple& t) { return t.f6_; } // NOLINT
+
+ template <class Tuple>
+ static GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_(6, Tuple))
+ ConstField(const Tuple& t) { return t.f6_; }
+};
+
+template <>
+class Get<7> {
+ public:
+ template <class Tuple>
+ static GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_(7, Tuple))
+ Field(Tuple& t) { return t.f7_; } // NOLINT
+
+ template <class Tuple>
+ static GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_(7, Tuple))
+ ConstField(const Tuple& t) { return t.f7_; }
+};
+
+template <>
+class Get<8> {
+ public:
+ template <class Tuple>
+ static GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_(8, Tuple))
+ Field(Tuple& t) { return t.f8_; } // NOLINT
+
+ template <class Tuple>
+ static GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_(8, Tuple))
+ ConstField(const Tuple& t) { return t.f8_; }
+};
+
+template <>
+class Get<9> {
+ public:
+ template <class Tuple>
+ static GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_(9, Tuple))
+ Field(Tuple& t) { return t.f9_; } // NOLINT
+
+ template <class Tuple>
+ static GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_(9, Tuple))
+ ConstField(const Tuple& t) { return t.f9_; }
+};
+
+} // namespace gtest_internal
+
+template <int k, GTEST_10_TYPENAMES_(T)>
+GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_(k, GTEST_10_TUPLE_(T)))
+get(GTEST_10_TUPLE_(T)& t) {
+ return gtest_internal::Get<k>::Field(t);
+}
+
+template <int k, GTEST_10_TYPENAMES_(T)>
+GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_(k, GTEST_10_TUPLE_(T)))
+get(const GTEST_10_TUPLE_(T)& t) {
+ return gtest_internal::Get<k>::ConstField(t);
+}
+
+// 6.1.3.5 Relational operators
+
+// We only implement == and !=, as we don't have a need for the rest yet.
+
+namespace gtest_internal {
+
+// SameSizeTuplePrefixComparator<k, k>::Eq(t1, t2) returns true if the
+// first k fields of t1 equals the first k fields of t2.
+// SameSizeTuplePrefixComparator(k1, k2) would be a compiler error if
+// k1 != k2.
+template <int kSize1, int kSize2>
+struct SameSizeTuplePrefixComparator;
+
+template <>
+struct SameSizeTuplePrefixComparator<0, 0> {
+ template <class Tuple1, class Tuple2>
+ static bool Eq(const Tuple1& /* t1 */, const Tuple2& /* t2 */) {
+ return true;
+ }
+};
+
+template <int k>
+struct SameSizeTuplePrefixComparator<k, k> {
+ template <class Tuple1, class Tuple2>
+ static bool Eq(const Tuple1& t1, const Tuple2& t2) {
+ return SameSizeTuplePrefixComparator<k - 1, k - 1>::Eq(t1, t2) &&
+ ::std::tr1::get<k - 1>(t1) == ::std::tr1::get<k - 1>(t2);
+ }
+};
+
+} // namespace gtest_internal
+
+template <GTEST_10_TYPENAMES_(T), GTEST_10_TYPENAMES_(U)>
+inline bool operator==(const GTEST_10_TUPLE_(T)& t,
+ const GTEST_10_TUPLE_(U)& u) {
+ return gtest_internal::SameSizeTuplePrefixComparator<
+ tuple_size<GTEST_10_TUPLE_(T) >::value,
+ tuple_size<GTEST_10_TUPLE_(U) >::value>::Eq(t, u);
+}
+
+template <GTEST_10_TYPENAMES_(T), GTEST_10_TYPENAMES_(U)>
+inline bool operator!=(const GTEST_10_TUPLE_(T)& t,
+ const GTEST_10_TUPLE_(U)& u) { return !(t == u); }
+
+// 6.1.4 Pairs.
+// Unimplemented.
+
+} // namespace tr1
+} // namespace std
+
+#undef GTEST_0_TUPLE_
+#undef GTEST_1_TUPLE_
+#undef GTEST_2_TUPLE_
+#undef GTEST_3_TUPLE_
+#undef GTEST_4_TUPLE_
+#undef GTEST_5_TUPLE_
+#undef GTEST_6_TUPLE_
+#undef GTEST_7_TUPLE_
+#undef GTEST_8_TUPLE_
+#undef GTEST_9_TUPLE_
+#undef GTEST_10_TUPLE_
+
+#undef GTEST_0_TYPENAMES_
+#undef GTEST_1_TYPENAMES_
+#undef GTEST_2_TYPENAMES_
+#undef GTEST_3_TYPENAMES_
+#undef GTEST_4_TYPENAMES_
+#undef GTEST_5_TYPENAMES_
+#undef GTEST_6_TYPENAMES_
+#undef GTEST_7_TYPENAMES_
+#undef GTEST_8_TYPENAMES_
+#undef GTEST_9_TYPENAMES_
+#undef GTEST_10_TYPENAMES_
+
+#undef GTEST_DECLARE_TUPLE_AS_FRIEND_
+#undef GTEST_BY_REF_
+#undef GTEST_ADD_REF_
+#undef GTEST_TUPLE_ELEMENT_
+
+#endif // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_TUPLE_H_
+# elif GTEST_ENV_HAS_STD_TUPLE_
+# include <tuple>
+// C++11 puts its tuple into the ::std namespace rather than
+// ::std::tr1. gtest expects tuple to live in ::std::tr1, so put it there.
+// This causes undefined behavior, but supported compilers react in
+// the way we intend.
+namespace std {
+namespace tr1 {
+using ::std::get;
+using ::std::make_tuple;
+using ::std::tuple;
+using ::std::tuple_element;
+using ::std::tuple_size;
+}
+}
+
+# elif GTEST_OS_SYMBIAN
+
+// On Symbian, BOOST_HAS_TR1_TUPLE causes Boost's TR1 tuple library to
+// use STLport's tuple implementation, which unfortunately doesn't
+// work as the copy of STLport distributed with Symbian is incomplete.
+// By making sure BOOST_HAS_TR1_TUPLE is undefined, we force Boost to
+// use its own tuple implementation.
+# ifdef BOOST_HAS_TR1_TUPLE
+# undef BOOST_HAS_TR1_TUPLE
+# endif // BOOST_HAS_TR1_TUPLE
+
+// This prevents <boost/tr1/detail/config.hpp>, which defines
+// BOOST_HAS_TR1_TUPLE, from being #included by Boost's <tuple>.
+# define BOOST_TR1_DETAIL_CONFIG_HPP_INCLUDED
+# include <tuple>
+
+# elif defined(__GNUC__) && (GTEST_GCC_VER_ >= 40000)
+// GCC 4.0+ implements tr1/tuple in the <tr1/tuple> header. This does
+// not conform to the TR1 spec, which requires the header to be <tuple>.
+
+# if !GTEST_HAS_RTTI && GTEST_GCC_VER_ < 40302
+// Until version 4.3.2, gcc has a bug that causes <tr1/functional>,
+// which is #included by <tr1/tuple>, to not compile when RTTI is
+// disabled. _TR1_FUNCTIONAL is the header guard for
+// <tr1/functional>. Hence the following #define is a hack to prevent
+// <tr1/functional> from being included.
+# define _TR1_FUNCTIONAL 1
+# include <tr1/tuple>
+# undef _TR1_FUNCTIONAL // Allows the user to #include
+ // <tr1/functional> if he chooses to.
+# else
+# include <tr1/tuple> // NOLINT
+# endif // !GTEST_HAS_RTTI && GTEST_GCC_VER_ < 40302
+
+# else
+// If the compiler is not GCC 4.0+, we assume the user is using a
+// spec-conforming TR1 implementation.
+# include <tuple> // NOLINT
+# endif // GTEST_USE_OWN_TR1_TUPLE
+
+#endif // GTEST_HAS_TR1_TUPLE
+
+// Determines whether clone(2) is supported.
+// Usually it will only be available on Linux, excluding
+// Linux on the Itanium architecture.
+// Also see http://linux.die.net/man/2/clone.
+#ifndef GTEST_HAS_CLONE
+// The user didn't tell us, so we need to figure it out.
+
+# if GTEST_OS_LINUX && !defined(__ia64__)
+# if GTEST_OS_LINUX_ANDROID
+// On Android, clone() is only available on ARM starting with Gingerbread.
+# if defined(__arm__) && __ANDROID_API__ >= 9
+# define GTEST_HAS_CLONE 1
+# else
+# define GTEST_HAS_CLONE 0
+# endif
+# else
+# define GTEST_HAS_CLONE 1
+# endif
+# else
+# define GTEST_HAS_CLONE 0
+# endif // GTEST_OS_LINUX && !defined(__ia64__)
+
+#endif // GTEST_HAS_CLONE
+
+// Determines whether to support stream redirection. This is used to test
+// output correctness and to implement death tests.
+#ifndef GTEST_HAS_STREAM_REDIRECTION
+// By default, we assume that stream redirection is supported on all
+// platforms except known mobile ones.
+# if GTEST_OS_WINDOWS_MOBILE || GTEST_OS_SYMBIAN
+# define GTEST_HAS_STREAM_REDIRECTION 0
+# else
+# define GTEST_HAS_STREAM_REDIRECTION 1
+# endif // !GTEST_OS_WINDOWS_MOBILE && !GTEST_OS_SYMBIAN
+#endif // GTEST_HAS_STREAM_REDIRECTION
+
+// Determines whether to support death tests.
+// Google Test does not support death tests for VC 7.1 and earlier as
+// abort() in a VC 7.1 application compiled as GUI in debug config
+// pops up a dialog window that cannot be suppressed programmatically.
+#if (GTEST_OS_LINUX || GTEST_OS_CYGWIN || GTEST_OS_SOLARIS || \
+ (GTEST_OS_MAC && !GTEST_OS_IOS) || GTEST_OS_IOS_SIMULATOR || \
+ (GTEST_OS_WINDOWS_DESKTOP && _MSC_VER >= 1400) || \
+ GTEST_OS_WINDOWS_MINGW || GTEST_OS_AIX || GTEST_OS_HPUX || \
+ GTEST_OS_OPENBSD || GTEST_OS_QNX)
+# define GTEST_HAS_DEATH_TEST 1
+# include <vector> // NOLINT
+#endif
+
+// We don't support MSVC 7.1 with exceptions disabled now. Therefore
+// all the compilers we care about are adequate for supporting
+// value-parameterized tests.
+#define GTEST_HAS_PARAM_TEST 1
+
+// Determines whether to support type-driven tests.
+
+// Typed tests need <typeinfo> and variadic macros, which GCC, VC++ 8.0,
+// Sun Pro CC, IBM Visual Age, and HP aCC support.
+#if defined(__GNUC__) || (_MSC_VER >= 1400) || defined(__SUNPRO_CC) || \
+ defined(__IBMCPP__) || defined(__HP_aCC)
+# define GTEST_HAS_TYPED_TEST 1
+# define GTEST_HAS_TYPED_TEST_P 1
+#endif
+
+// Determines whether to support Combine(). This only makes sense when
+// value-parameterized tests are enabled. The implementation doesn't
+// work on Sun Studio since it doesn't understand templated conversion
+// operators.
+#if GTEST_HAS_PARAM_TEST && GTEST_HAS_TR1_TUPLE && !defined(__SUNPRO_CC)
+# define GTEST_HAS_COMBINE 1
+#endif
+
+// Determines whether the system compiler uses UTF-16 for encoding wide strings.
+#define GTEST_WIDE_STRING_USES_UTF16_ \
+ (GTEST_OS_WINDOWS || GTEST_OS_CYGWIN || GTEST_OS_SYMBIAN || GTEST_OS_AIX)
+
+// Determines whether test results can be streamed to a socket.
+#if GTEST_OS_LINUX
+# define GTEST_CAN_STREAM_RESULTS_ 1
+#endif
+
+// Defines some utility macros.
+
+// The GNU compiler emits a warning if nested "if" statements are followed by
+// an "else" statement and braces are not used to explicitly disambiguate the
+// "else" binding. This leads to problems with code like:
+//
+// if (gate)
+// ASSERT_*(condition) << "Some message";
+//
+// The "switch (0) case 0:" idiom is used to suppress this.
+#ifdef __INTEL_COMPILER
+# define GTEST_AMBIGUOUS_ELSE_BLOCKER_
+#else
+# define GTEST_AMBIGUOUS_ELSE_BLOCKER_ switch (0) case 0: default: // NOLINT
+#endif
+
+// Use this annotation at the end of a struct/class definition to
+// prevent the compiler from optimizing away instances that are never
+// used. This is useful when all interesting logic happens inside the
+// c'tor and / or d'tor. Example:
+//
+// struct Foo {
+// Foo() { ... }
+// } GTEST_ATTRIBUTE_UNUSED_;
+//
+// Also use it after a variable or parameter declaration to tell the
+// compiler the variable/parameter does not have to be used.
+#if defined(__GNUC__) && !defined(COMPILER_ICC)
+# define GTEST_ATTRIBUTE_UNUSED_ __attribute__ ((unused))
+#else
+# define GTEST_ATTRIBUTE_UNUSED_
+#endif
+
+// A macro to disallow operator=
+// This should be used in the private: declarations for a class.
+#define GTEST_DISALLOW_ASSIGN_(type)\
+ void operator=(type const &)
+
+// A macro to disallow copy constructor and operator=
+// This should be used in the private: declarations for a class.
+#define GTEST_DISALLOW_COPY_AND_ASSIGN_(type)\
+ type(type const &);\
+ GTEST_DISALLOW_ASSIGN_(type)
+
+// Tell the compiler to warn about unused return values for functions declared
+// with this macro. The macro should be used on function declarations
+// following the argument list:
+//
+// Sprocket* AllocateSprocket() GTEST_MUST_USE_RESULT_;
+#if defined(__GNUC__) && (GTEST_GCC_VER_ >= 30400) && !defined(COMPILER_ICC)
+# define GTEST_MUST_USE_RESULT_ __attribute__ ((warn_unused_result))
+#else
+# define GTEST_MUST_USE_RESULT_
+#endif // __GNUC__ && (GTEST_GCC_VER_ >= 30400) && !COMPILER_ICC
+
+// Determine whether the compiler supports Microsoft's Structured Exception
+// Handling. This is supported by several Windows compilers but generally
+// does not exist on any other system.
+#ifndef GTEST_HAS_SEH
+// The user didn't tell us, so we need to figure it out.
+
+# if defined(_MSC_VER) || defined(__BORLANDC__)
+// These two compilers are known to support SEH.
+# define GTEST_HAS_SEH 1
+# else
+// Assume no SEH.
+# define GTEST_HAS_SEH 0
+# endif
+
+#endif // GTEST_HAS_SEH
+
+#ifdef _MSC_VER
+
+# if GTEST_LINKED_AS_SHARED_LIBRARY
+# define GTEST_API_ __declspec(dllimport)
+# elif GTEST_CREATE_SHARED_LIBRARY
+# define GTEST_API_ __declspec(dllexport)
+# endif
+
+#endif // _MSC_VER
+
+#ifndef GTEST_API_
+# define GTEST_API_
+#endif
+
+#ifdef __GNUC__
+// Ask the compiler to never inline a given function.
+# define GTEST_NO_INLINE_ __attribute__((noinline))
+#else
+# define GTEST_NO_INLINE_
+#endif
+
+// _LIBCPP_VERSION is defined by the libc++ library from the LLVM project.
+#if defined(__GLIBCXX__) || defined(_LIBCPP_VERSION)
+# define GTEST_HAS_CXXABI_H_ 1
+#else
+# define GTEST_HAS_CXXABI_H_ 0
+#endif
+
+namespace testing {
+
+class Message;
+
+namespace internal {
+
+// A secret type that Google Test users don't know about. It has no
+// definition on purpose. Therefore it's impossible to create a
+// Secret object, which is what we want.
+class Secret;
+
+// The GTEST_COMPILE_ASSERT_ macro can be used to verify that a compile time
+// expression is true. For example, you could use it to verify the
+// size of a static array:
+//
+// GTEST_COMPILE_ASSERT_(ARRAYSIZE(content_type_names) == CONTENT_NUM_TYPES,
+// content_type_names_incorrect_size);
+//
+// or to make sure a struct is smaller than a certain size:
+//
+// GTEST_COMPILE_ASSERT_(sizeof(foo) < 128, foo_too_large);
+//
+// The second argument to the macro is the name of the variable. If
+// the expression is false, most compilers will issue a warning/error
+// containing the name of the variable.
+
+template <bool>
+struct CompileAssert {
+};
+
+#define GTEST_COMPILE_ASSERT_(expr, msg) \
+ typedef ::testing::internal::CompileAssert<(static_cast<bool>(expr))> \
+ msg[static_cast<bool>(expr) ? 1 : -1] GTEST_ATTRIBUTE_UNUSED_
+
+// Implementation details of GTEST_COMPILE_ASSERT_:
+//
+// - GTEST_COMPILE_ASSERT_ works by defining an array type that has -1
+// elements (and thus is invalid) when the expression is false.
+//
+// - The simpler definition
+//
+// #define GTEST_COMPILE_ASSERT_(expr, msg) typedef char msg[(expr) ? 1 : -1]
+//
+// does not work, as gcc supports variable-length arrays whose sizes
+// are determined at run-time (this is gcc's extension and not part
+// of the C++ standard). As a result, gcc fails to reject the
+// following code with the simple definition:
+//
+// int foo;
+// GTEST_COMPILE_ASSERT_(foo, msg); // not supposed to compile as foo is
+// // not a compile-time constant.
+//
+// - By using the type CompileAssert<(bool(expr))>, we ensures that
+// expr is a compile-time constant. (Template arguments must be
+// determined at compile-time.)
+//
+// - The outter parentheses in CompileAssert<(bool(expr))> are necessary
+// to work around a bug in gcc 3.4.4 and 4.0.1. If we had written
+//
+// CompileAssert<bool(expr)>
+//
+// instead, these compilers will refuse to compile
+//
+// GTEST_COMPILE_ASSERT_(5 > 0, some_message);
+//
+// (They seem to think the ">" in "5 > 0" marks the end of the
+// template argument list.)
+//
+// - The array size is (bool(expr) ? 1 : -1), instead of simply
+//
+// ((expr) ? 1 : -1).
+//
+// This is to avoid running into a bug in MS VC 7.1, which
+// causes ((0.0) ? 1 : -1) to incorrectly evaluate to 1.
+
+// StaticAssertTypeEqHelper is used by StaticAssertTypeEq defined in gtest.h.
+//
+// This template is declared, but intentionally undefined.
+template <typename T1, typename T2>
+struct StaticAssertTypeEqHelper;
+
+template <typename T>
+struct StaticAssertTypeEqHelper<T, T> {};
+
+#if GTEST_HAS_GLOBAL_STRING
+typedef ::string string;
+#else
+typedef ::std::string string;
+#endif // GTEST_HAS_GLOBAL_STRING
+
+#if GTEST_HAS_GLOBAL_WSTRING
+typedef ::wstring wstring;
+#elif GTEST_HAS_STD_WSTRING
+typedef ::std::wstring wstring;
+#endif // GTEST_HAS_GLOBAL_WSTRING
+
+// A helper for suppressing warnings on constant condition. It just
+// returns 'condition'.
+GTEST_API_ bool IsTrue(bool condition);
+
+// Defines scoped_ptr.
+
+// This implementation of scoped_ptr is PARTIAL - it only contains
+// enough stuff to satisfy Google Test's need.
+template <typename T>
+class scoped_ptr {
+ public:
+ typedef T element_type;
+
+ explicit scoped_ptr(T* p = NULL) : ptr_(p) {}
+ ~scoped_ptr() { reset(); }
+
+ T& operator*() const { return *ptr_; }
+ T* operator->() const { return ptr_; }
+ T* get() const { return ptr_; }
+
+ T* release() {
+ T* const ptr = ptr_;
+ ptr_ = NULL;
+ return ptr;
+ }
+
+ void reset(T* p = NULL) {
+ if (p != ptr_) {
+ if (IsTrue(sizeof(T) > 0)) { // Makes sure T is a complete type.
+ delete ptr_;
+ }
+ ptr_ = p;
+ }
+ }
+
+ private:
+ T* ptr_;
+
+ GTEST_DISALLOW_COPY_AND_ASSIGN_(scoped_ptr);
+};
+
+// Defines RE.
+
+// A simple C++ wrapper for <regex.h>. It uses the POSIX Extended
+// Regular Expression syntax.
+class GTEST_API_ RE {
+ public:
+ // A copy constructor is required by the Standard to initialize object
+ // references from r-values.
+ RE(const RE& other) { Init(other.pattern()); }
+
+ // Constructs an RE from a string.
+ RE(const ::std::string& regex) { Init(regex.c_str()); } // NOLINT
+
+#if GTEST_HAS_GLOBAL_STRING
+
+ RE(const ::string& regex) { Init(regex.c_str()); } // NOLINT
+
+#endif // GTEST_HAS_GLOBAL_STRING
+
+ RE(const char* regex) { Init(regex); } // NOLINT
+ ~RE();
+
+ // Returns the string representation of the regex.
+ const char* pattern() const { return pattern_; }
+
+ // FullMatch(str, re) returns true iff regular expression re matches
+ // the entire str.
+ // PartialMatch(str, re) returns true iff regular expression re
+ // matches a substring of str (including str itself).
+ //
+ // TODO(wan@google.com): make FullMatch() and PartialMatch() work
+ // when str contains NUL characters.
+ static bool FullMatch(const ::std::string& str, const RE& re) {
+ return FullMatch(str.c_str(), re);
+ }
+ static bool PartialMatch(const ::std::string& str, const RE& re) {
+ return PartialMatch(str.c_str(), re);
+ }
+
+#if GTEST_HAS_GLOBAL_STRING
+
+ static bool FullMatch(const ::string& str, const RE& re) {
+ return FullMatch(str.c_str(), re);
+ }
+ static bool PartialMatch(const ::string& str, const RE& re) {
+ return PartialMatch(str.c_str(), re);
+ }
+
+#endif // GTEST_HAS_GLOBAL_STRING
+
+ static bool FullMatch(const char* str, const RE& re);
+ static bool PartialMatch(const char* str, const RE& re);
+
+ private:
+ void Init(const char* regex);
+
+ // We use a const char* instead of an std::string, as Google Test used to be
+ // used where std::string is not available. TODO(wan@google.com): change to
+ // std::string.
+ const char* pattern_;
+ bool is_valid_;
+
+#if GTEST_USES_POSIX_RE
+
+ regex_t full_regex_; // For FullMatch().
+ regex_t partial_regex_; // For PartialMatch().
+
+#else // GTEST_USES_SIMPLE_RE
+
+ const char* full_pattern_; // For FullMatch();
+
+#endif
+
+ GTEST_DISALLOW_ASSIGN_(RE);
+};
+
+// Formats a source file path and a line number as they would appear
+// in an error message from the compiler used to compile this code.
+GTEST_API_ ::std::string FormatFileLocation(const char* file, int line);
+
+// Formats a file location for compiler-independent XML output.
+// Although this function is not platform dependent, we put it next to
+// FormatFileLocation in order to contrast the two functions.
+GTEST_API_ ::std::string FormatCompilerIndependentFileLocation(const char* file,
+ int line);
+
+// Defines logging utilities:
+// GTEST_LOG_(severity) - logs messages at the specified severity level. The
+// message itself is streamed into the macro.
+// LogToStderr() - directs all log messages to stderr.
+// FlushInfoLog() - flushes informational log messages.
+
+enum GTestLogSeverity {
+ GTEST_INFO,
+ GTEST_WARNING,
+ GTEST_ERROR,
+ GTEST_FATAL
+};
+
+// Formats log entry severity, provides a stream object for streaming the
+// log message, and terminates the message with a newline when going out of
+// scope.
+class GTEST_API_ GTestLog {
+ public:
+ GTestLog(GTestLogSeverity severity, const char* file, int line);
+
+ // Flushes the buffers and, if severity is GTEST_FATAL, aborts the program.
+ ~GTestLog();
+
+ ::std::ostream& GetStream() { return ::std::cerr; }
+
+ private:
+ const GTestLogSeverity severity_;
+
+ GTEST_DISALLOW_COPY_AND_ASSIGN_(GTestLog);
+};
+
+#define GTEST_LOG_(severity) \
+ ::testing::internal::GTestLog(::testing::internal::GTEST_##severity, \
+ __FILE__, __LINE__).GetStream()
+
+inline void LogToStderr() {}
+inline void FlushInfoLog() { fflush(NULL); }
+
+// INTERNAL IMPLEMENTATION - DO NOT USE.
+//
+// GTEST_CHECK_ is an all-mode assert. It aborts the program if the condition
+// is not satisfied.
+// Synopsys:
+// GTEST_CHECK_(boolean_condition);
+// or
+// GTEST_CHECK_(boolean_condition) << "Additional message";
+//
+// This checks the condition and if the condition is not satisfied
+// it prints message about the condition violation, including the
+// condition itself, plus additional message streamed into it, if any,
+// and then it aborts the program. It aborts the program irrespective of
+// whether it is built in the debug mode or not.
+#define GTEST_CHECK_(condition) \
+ GTEST_AMBIGUOUS_ELSE_BLOCKER_ \
+ if (::testing::internal::IsTrue(condition)) \
+ ; \
+ else \
+ GTEST_LOG_(FATAL) << "Condition " #condition " failed. "
+
+// An all-mode assert to verify that the given POSIX-style function
+// call returns 0 (indicating success). Known limitation: this
+// doesn't expand to a balanced 'if' statement, so enclose the macro
+// in {} if you need to use it as the only statement in an 'if'
+// branch.
+#define GTEST_CHECK_POSIX_SUCCESS_(posix_call) \
+ if (const int gtest_error = (posix_call)) \
+ GTEST_LOG_(FATAL) << #posix_call << "failed with error " \
+ << gtest_error
+
+// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
+//
+// Use ImplicitCast_ as a safe version of static_cast for upcasting in
+// the type hierarchy (e.g. casting a Foo* to a SuperclassOfFoo* or a
+// const Foo*). When you use ImplicitCast_, the compiler checks that
+// the cast is safe. Such explicit ImplicitCast_s are necessary in
+// surprisingly many situations where C++ demands an exact type match
+// instead of an argument type convertable to a target type.
+//
+// The syntax for using ImplicitCast_ is the same as for static_cast:
+//
+// ImplicitCast_<ToType>(expr)
+//
+// ImplicitCast_ would have been part of the C++ standard library,
+// but the proposal was submitted too late. It will probably make
+// its way into the language in the future.
+//
+// This relatively ugly name is intentional. It prevents clashes with
+// similar functions users may have (e.g., implicit_cast). The internal
+// namespace alone is not enough because the function can be found by ADL.
+template<typename To>
+inline To ImplicitCast_(To x) { return x; }
+
+// When you upcast (that is, cast a pointer from type Foo to type
+// SuperclassOfFoo), it's fine to use ImplicitCast_<>, since upcasts
+// always succeed. When you downcast (that is, cast a pointer from
+// type Foo to type SubclassOfFoo), static_cast<> isn't safe, because
+// how do you know the pointer is really of type SubclassOfFoo? It
+// could be a bare Foo, or of type DifferentSubclassOfFoo. Thus,
+// when you downcast, you should use this macro. In debug mode, we
+// use dynamic_cast<> to double-check the downcast is legal (we die
+// if it's not). In normal mode, we do the efficient static_cast<>
+// instead. Thus, it's important to test in debug mode to make sure
+// the cast is legal!
+// This is the only place in the code we should use dynamic_cast<>.
+// In particular, you SHOULDN'T be using dynamic_cast<> in order to
+// do RTTI (eg code like this:
+// if (dynamic_cast<Subclass1>(foo)) HandleASubclass1Object(foo);
+// if (dynamic_cast<Subclass2>(foo)) HandleASubclass2Object(foo);
+// You should design the code some other way not to need this.
+//
+// This relatively ugly name is intentional. It prevents clashes with
+// similar functions users may have (e.g., down_cast). The internal
+// namespace alone is not enough because the function can be found by ADL.
+template<typename To, typename From> // use like this: DownCast_<T*>(foo);
+inline To DownCast_(From* f) { // so we only accept pointers
+ // Ensures that To is a sub-type of From *. This test is here only
+ // for compile-time type checking, and has no overhead in an
+ // optimized build at run-time, as it will be optimized away
+ // completely.
+ if (false) {
+ const To to = NULL;
+ ::testing::internal::ImplicitCast_<From*>(to);
+ }
+
+#if GTEST_HAS_RTTI
+ // RTTI: debug mode only!
+ GTEST_CHECK_(f == NULL || dynamic_cast<To>(f) != NULL);
+#endif
+ return static_cast<To>(f);
+}
+
+// Downcasts the pointer of type Base to Derived.
+// Derived must be a subclass of Base. The parameter MUST
+// point to a class of type Derived, not any subclass of it.
+// When RTTI is available, the function performs a runtime
+// check to enforce this.
+template <class Derived, class Base>
+Derived* CheckedDowncastToActualType(Base* base) {
+#if GTEST_HAS_RTTI
+ GTEST_CHECK_(typeid(*base) == typeid(Derived));
+ return dynamic_cast<Derived*>(base); // NOLINT
+#else
+ return static_cast<Derived*>(base); // Poor man's downcast.
+#endif
+}
+
+#if GTEST_HAS_STREAM_REDIRECTION
+
+// Defines the stderr capturer:
+// CaptureStdout - starts capturing stdout.
+// GetCapturedStdout - stops capturing stdout and returns the captured string.
+// CaptureStderr - starts capturing stderr.
+// GetCapturedStderr - stops capturing stderr and returns the captured string.
+//
+GTEST_API_ void CaptureStdout();
+GTEST_API_ std::string GetCapturedStdout();
+GTEST_API_ void CaptureStderr();
+GTEST_API_ std::string GetCapturedStderr();
+
+#endif // GTEST_HAS_STREAM_REDIRECTION
+
+
+#if GTEST_HAS_DEATH_TEST
+
+const ::std::vector<testing::internal::string>& GetInjectableArgvs();
+void SetInjectableArgvs(const ::std::vector<testing::internal::string>*
+ new_argvs);
+
+// A copy of all command line arguments. Set by InitGoogleTest().
+extern ::std::vector<testing::internal::string> g_argvs;
+
+#endif // GTEST_HAS_DEATH_TEST
+
+// Defines synchronization primitives.
+
+#if GTEST_HAS_PTHREAD
+
+// Sleeps for (roughly) n milli-seconds. This function is only for
+// testing Google Test's own constructs. Don't use it in user tests,
+// either directly or indirectly.
+inline void SleepMilliseconds(int n) {
+ const timespec time = {
+ 0, // 0 seconds.
+ n * 1000L * 1000L, // And n ms.
+ };
+ nanosleep(&time, NULL);
+}
+
+// Allows a controller thread to pause execution of newly created
+// threads until notified. Instances of this class must be created
+// and destroyed in the controller thread.
+//
+// This class is only for testing Google Test's own constructs. Do not
+// use it in user tests, either directly or indirectly.
+class Notification {
+ public:
+ Notification() : notified_(false) {
+ GTEST_CHECK_POSIX_SUCCESS_(pthread_mutex_init(&mutex_, NULL));
+ }
+ ~Notification() {
+ pthread_mutex_destroy(&mutex_);
+ }
+
+ // Notifies all threads created with this notification to start. Must
+ // be called from the controller thread.
+ void Notify() {
+ pthread_mutex_lock(&mutex_);
+ notified_ = true;
+ pthread_mutex_unlock(&mutex_);
+ }
+
+ // Blocks until the controller thread notifies. Must be called from a test
+ // thread.
+ void WaitForNotification() {
+ for (;;) {
+ pthread_mutex_lock(&mutex_);
+ const bool notified = notified_;
+ pthread_mutex_unlock(&mutex_);
+ if (notified)
+ break;
+ SleepMilliseconds(10);
+ }
+ }
+
+ private:
+ pthread_mutex_t mutex_;
+ bool notified_;
+
+ GTEST_DISALLOW_COPY_AND_ASSIGN_(Notification);
+};
+
+// As a C-function, ThreadFuncWithCLinkage cannot be templated itself.
+// Consequently, it cannot select a correct instantiation of ThreadWithParam
+// in order to call its Run(). Introducing ThreadWithParamBase as a
+// non-templated base class for ThreadWithParam allows us to bypass this
+// problem.
+class ThreadWithParamBase {
+ public:
+ virtual ~ThreadWithParamBase() {}
+ virtual void Run() = 0;
+};
+
+// pthread_create() accepts a pointer to a function type with the C linkage.
+// According to the Standard (7.5/1), function types with different linkages
+// are different even if they are otherwise identical. Some compilers (for
+// example, SunStudio) treat them as different types. Since class methods
+// cannot be defined with C-linkage we need to define a free C-function to
+// pass into pthread_create().
+extern "C" inline void* ThreadFuncWithCLinkage(void* thread) {
+ static_cast<ThreadWithParamBase*>(thread)->Run();
+ return NULL;
+}
+
+// Helper class for testing Google Test's multi-threading constructs.
+// To use it, write:
+//
+// void ThreadFunc(int param) { /* Do things with param */ }
+// Notification thread_can_start;
+// ...
+// // The thread_can_start parameter is optional; you can supply NULL.
+// ThreadWithParam<int> thread(&ThreadFunc, 5, &thread_can_start);
+// thread_can_start.Notify();
+//
+// These classes are only for testing Google Test's own constructs. Do
+// not use them in user tests, either directly or indirectly.
+template <typename T>
+class ThreadWithParam : public ThreadWithParamBase {
+ public:
+ typedef void (*UserThreadFunc)(T);
+
+ ThreadWithParam(
+ UserThreadFunc func, T param, Notification* thread_can_start)
+ : func_(func),
+ param_(param),
+ thread_can_start_(thread_can_start),
+ finished_(false) {
+ ThreadWithParamBase* const base = this;
+ // The thread can be created only after all fields except thread_
+ // have been initialized.
+ GTEST_CHECK_POSIX_SUCCESS_(
+ pthread_create(&thread_, 0, &ThreadFuncWithCLinkage, base));
+ }
+ ~ThreadWithParam() { Join(); }
+
+ void Join() {
+ if (!finished_) {
+ GTEST_CHECK_POSIX_SUCCESS_(pthread_join(thread_, 0));
+ finished_ = true;
+ }
+ }
+
+ virtual void Run() {
+ if (thread_can_start_ != NULL)
+ thread_can_start_->WaitForNotification();
+ func_(param_);
+ }
+
+ private:
+ const UserThreadFunc func_; // User-supplied thread function.
+ const T param_; // User-supplied parameter to the thread function.
+ // When non-NULL, used to block execution until the controller thread
+ // notifies.
+ Notification* const thread_can_start_;
+ bool finished_; // true iff we know that the thread function has finished.
+ pthread_t thread_; // The native thread object.
+
+ GTEST_DISALLOW_COPY_AND_ASSIGN_(ThreadWithParam);
+};
+
+// MutexBase and Mutex implement mutex on pthreads-based platforms. They
+// are used in conjunction with class MutexLock:
+//
+// Mutex mutex;
+// ...
+// MutexLock lock(&mutex); // Acquires the mutex and releases it at the end
+// // of the current scope.
+//
+// MutexBase implements behavior for both statically and dynamically
+// allocated mutexes. Do not use MutexBase directly. Instead, write
+// the following to define a static mutex:
+//
+// GTEST_DEFINE_STATIC_MUTEX_(g_some_mutex);
+//
+// You can forward declare a static mutex like this:
+//
+// GTEST_DECLARE_STATIC_MUTEX_(g_some_mutex);
+//
+// To create a dynamic mutex, just define an object of type Mutex.
+class MutexBase {
+ public:
+ // Acquires this mutex.
+ void Lock() {
+ GTEST_CHECK_POSIX_SUCCESS_(pthread_mutex_lock(&mutex_));
+ owner_ = pthread_self();
+ has_owner_ = true;
+ }
+
+ // Releases this mutex.
+ void Unlock() {
+ // Since the lock is being released the owner_ field should no longer be
+ // considered valid. We don't protect writing to has_owner_ here, as it's
+ // the caller's responsibility to ensure that the current thread holds the
+ // mutex when this is called.
+ has_owner_ = false;
+ GTEST_CHECK_POSIX_SUCCESS_(pthread_mutex_unlock(&mutex_));
+ }
+
+ // Does nothing if the current thread holds the mutex. Otherwise, crashes
+ // with high probability.
+ void AssertHeld() const {
+ GTEST_CHECK_(has_owner_ && pthread_equal(owner_, pthread_self()))
+ << "The current thread is not holding the mutex @" << this;
+ }
+
+ // A static mutex may be used before main() is entered. It may even
+ // be used before the dynamic initialization stage. Therefore we
+ // must be able to initialize a static mutex object at link time.
+ // This means MutexBase has to be a POD and its member variables
+ // have to be public.
+ public:
+ pthread_mutex_t mutex_; // The underlying pthread mutex.
+ // has_owner_ indicates whether the owner_ field below contains a valid thread
+ // ID and is therefore safe to inspect (e.g., to use in pthread_equal()). All
+ // accesses to the owner_ field should be protected by a check of this field.
+ // An alternative might be to memset() owner_ to all zeros, but there's no
+ // guarantee that a zero'd pthread_t is necessarily invalid or even different
+ // from pthread_self().
+ bool has_owner_;
+ pthread_t owner_; // The thread holding the mutex.
+};
+
+// Forward-declares a static mutex.
+# define GTEST_DECLARE_STATIC_MUTEX_(mutex) \
+ extern ::testing::internal::MutexBase mutex
+
+// Defines and statically (i.e. at link time) initializes a static mutex.
+// The initialization list here does not explicitly initialize each field,
+// instead relying on default initialization for the unspecified fields. In
+// particular, the owner_ field (a pthread_t) is not explicitly initialized.
+// This allows initialization to work whether pthread_t is a scalar or struct.
+// The flag -Wmissing-field-initializers must not be specified for this to work.
+# define GTEST_DEFINE_STATIC_MUTEX_(mutex) \
+ ::testing::internal::MutexBase mutex = { PTHREAD_MUTEX_INITIALIZER, false }
+
+// The Mutex class can only be used for mutexes created at runtime. It
+// shares its API with MutexBase otherwise.
+class Mutex : public MutexBase {
+ public:
+ Mutex() {
+ GTEST_CHECK_POSIX_SUCCESS_(pthread_mutex_init(&mutex_, NULL));
+ has_owner_ = false;
+ }
+ ~Mutex() {
+ GTEST_CHECK_POSIX_SUCCESS_(pthread_mutex_destroy(&mutex_));
+ }
+
+ private:
+ GTEST_DISALLOW_COPY_AND_ASSIGN_(Mutex);
+};
+
+// We cannot name this class MutexLock as the ctor declaration would
+// conflict with a macro named MutexLock, which is defined on some
+// platforms. Hence the typedef trick below.
+class GTestMutexLock {
+ public:
+ explicit GTestMutexLock(MutexBase* mutex)
+ : mutex_(mutex) { mutex_->Lock(); }
+
+ ~GTestMutexLock() { mutex_->Unlock(); }
+
+ private:
+ MutexBase* const mutex_;
+
+ GTEST_DISALLOW_COPY_AND_ASSIGN_(GTestMutexLock);
+};
+
+typedef GTestMutexLock MutexLock;
+
+// Helpers for ThreadLocal.
+
+// pthread_key_create() requires DeleteThreadLocalValue() to have
+// C-linkage. Therefore it cannot be templatized to access
+// ThreadLocal<T>. Hence the need for class
+// ThreadLocalValueHolderBase.
+class ThreadLocalValueHolderBase {
+ public:
+ virtual ~ThreadLocalValueHolderBase() {}
+};
+
+// Called by pthread to delete thread-local data stored by
+// pthread_setspecific().
+extern "C" inline void DeleteThreadLocalValue(void* value_holder) {
+ delete static_cast<ThreadLocalValueHolderBase*>(value_holder);
+}
+
+// Implements thread-local storage on pthreads-based systems.
+//
+// // Thread 1
+// ThreadLocal<int> tl(100); // 100 is the default value for each thread.
+//
+// // Thread 2
+// tl.set(150); // Changes the value for thread 2 only.
+// EXPECT_EQ(150, tl.get());
+//
+// // Thread 1
+// EXPECT_EQ(100, tl.get()); // In thread 1, tl has the original value.
+// tl.set(200);
+// EXPECT_EQ(200, tl.get());
+//
+// The template type argument T must have a public copy constructor.
+// In addition, the default ThreadLocal constructor requires T to have
+// a public default constructor.
+//
+// An object managed for a thread by a ThreadLocal instance is deleted
+// when the thread exits. Or, if the ThreadLocal instance dies in
+// that thread, when the ThreadLocal dies. It's the user's
+// responsibility to ensure that all other threads using a ThreadLocal
+// have exited when it dies, or the per-thread objects for those
+// threads will not be deleted.
+//
+// Google Test only uses global ThreadLocal objects. That means they
+// will die after main() has returned. Therefore, no per-thread
+// object managed by Google Test will be leaked as long as all threads
+// using Google Test have exited when main() returns.
+template <typename T>
+class ThreadLocal {
+ public:
+ ThreadLocal() : key_(CreateKey()),
+ default_() {}
+ explicit ThreadLocal(const T& value) : key_(CreateKey()),
+ default_(value) {}
+
+ ~ThreadLocal() {
+ // Destroys the managed object for the current thread, if any.
+ DeleteThreadLocalValue(pthread_getspecific(key_));
+
+ // Releases resources associated with the key. This will *not*
+ // delete managed objects for other threads.
+ GTEST_CHECK_POSIX_SUCCESS_(pthread_key_delete(key_));
+ }
+
+ T* pointer() { return GetOrCreateValue(); }
+ const T* pointer() const { return GetOrCreateValue(); }
+ const T& get() const { return *pointer(); }
+ void set(const T& value) { *pointer() = value; }
+
+ private:
+ // Holds a value of type T.
+ class ValueHolder : public ThreadLocalValueHolderBase {
+ public:
+ explicit ValueHolder(const T& value) : value_(value) {}
+
+ T* pointer() { return &value_; }
+
+ private:
+ T value_;
+ GTEST_DISALLOW_COPY_AND_ASSIGN_(ValueHolder);
+ };
+
+ static pthread_key_t CreateKey() {
+ pthread_key_t key;
+ // When a thread exits, DeleteThreadLocalValue() will be called on
+ // the object managed for that thread.
+ GTEST_CHECK_POSIX_SUCCESS_(
+ pthread_key_create(&key, &DeleteThreadLocalValue));
+ return key;
+ }
+
+ T* GetOrCreateValue() const {
+ ThreadLocalValueHolderBase* const holder =
+ static_cast<ThreadLocalValueHolderBase*>(pthread_getspecific(key_));
+ if (holder != NULL) {
+ return CheckedDowncastToActualType<ValueHolder>(holder)->pointer();
+ }
+
+ ValueHolder* const new_holder = new ValueHolder(default_);
+ ThreadLocalValueHolderBase* const holder_base = new_holder;
+ GTEST_CHECK_POSIX_SUCCESS_(pthread_setspecific(key_, holder_base));
+ return new_holder->pointer();
+ }
+
+ // A key pthreads uses for looking up per-thread values.
+ const pthread_key_t key_;
+ const T default_; // The default value for each thread.
+
+ GTEST_DISALLOW_COPY_AND_ASSIGN_(ThreadLocal);
+};
+
+# define GTEST_IS_THREADSAFE 1
+
+#else // GTEST_HAS_PTHREAD
+
+// A dummy implementation of synchronization primitives (mutex, lock,
+// and thread-local variable). Necessary for compiling Google Test where
+// mutex is not supported - using Google Test in multiple threads is not
+// supported on such platforms.
+
+class Mutex {
+ public:
+ Mutex() {}
+ void Lock() {}
+ void Unlock() {}
+ void AssertHeld() const {}
+};
+
+# define GTEST_DECLARE_STATIC_MUTEX_(mutex) \
+ extern ::testing::internal::Mutex mutex
+
+# define GTEST_DEFINE_STATIC_MUTEX_(mutex) ::testing::internal::Mutex mutex
+
+class GTestMutexLock {
+ public:
+ explicit GTestMutexLock(Mutex*) {} // NOLINT
+};
+
+typedef GTestMutexLock MutexLock;
+
+template <typename T>
+class ThreadLocal {
+ public:
+ ThreadLocal() : value_() {}
+ explicit ThreadLocal(const T& value) : value_(value) {}
+ T* pointer() { return &value_; }
+ const T* pointer() const { return &value_; }
+ const T& get() const { return value_; }
+ void set(const T& value) { value_ = value; }
+ private:
+ T value_;
+};
+
+// The above synchronization primitives have dummy implementations.
+// Therefore Google Test is not thread-safe.
+# define GTEST_IS_THREADSAFE 0
+
+#endif // GTEST_HAS_PTHREAD
+
+// Returns the number of threads running in the process, or 0 to indicate that
+// we cannot detect it.
+GTEST_API_ size_t GetThreadCount();
+
+// Passing non-POD classes through ellipsis (...) crashes the ARM
+// compiler and generates a warning in Sun Studio. The Nokia Symbian
+// and the IBM XL C/C++ compiler try to instantiate a copy constructor
+// for objects passed through ellipsis (...), failing for uncopyable
+// objects. We define this to ensure that only POD is passed through
+// ellipsis on these systems.
+#if defined(__SYMBIAN32__) || defined(__IBMCPP__) || defined(__SUNPRO_CC)
+// We lose support for NULL detection where the compiler doesn't like
+// passing non-POD classes through ellipsis (...).
+# define GTEST_ELLIPSIS_NEEDS_POD_ 1
+#else
+# define GTEST_CAN_COMPARE_NULL 1
+#endif
+
+// The Nokia Symbian and IBM XL C/C++ compilers cannot decide between
+// const T& and const T* in a function template. These compilers
+// _can_ decide between class template specializations for T and T*,
+// so a tr1::type_traits-like is_pointer works.
+#if defined(__SYMBIAN32__) || defined(__IBMCPP__)
+# define GTEST_NEEDS_IS_POINTER_ 1
+#endif
+
+template <bool bool_value>
+struct bool_constant {
+ typedef bool_constant<bool_value> type;
+ static const bool value = bool_value;
+};
+template <bool bool_value> const bool bool_constant<bool_value>::value;
+
+typedef bool_constant<false> false_type;
+typedef bool_constant<true> true_type;
+
+template <typename T>
+struct is_pointer : public false_type {};
+
+template <typename T>
+struct is_pointer<T*> : public true_type {};
+
+template <typename Iterator>
+struct IteratorTraits {
+ typedef typename Iterator::value_type value_type;
+};
+
+template <typename T>
+struct IteratorTraits<T*> {
+ typedef T value_type;
+};
+
+template <typename T>
+struct IteratorTraits<const T*> {
+ typedef T value_type;
+};
+
+#if GTEST_OS_WINDOWS
+# define GTEST_PATH_SEP_ "\\"
+# define GTEST_HAS_ALT_PATH_SEP_ 1
+// The biggest signed integer type the compiler supports.
+typedef __int64 BiggestInt;
+#else
+# define GTEST_PATH_SEP_ "/"
+# define GTEST_HAS_ALT_PATH_SEP_ 0
+typedef long long BiggestInt; // NOLINT
+#endif // GTEST_OS_WINDOWS
+
+// Utilities for char.
+
+// isspace(int ch) and friends accept an unsigned char or EOF. char
+// may be signed, depending on the compiler (or compiler flags).
+// Therefore we need to cast a char to unsigned char before calling
+// isspace(), etc.
+
+inline bool IsAlpha(char ch) {
+ return isalpha(static_cast<unsigned char>(ch)) != 0;
+}
+inline bool IsAlNum(char ch) {
+ return isalnum(static_cast<unsigned char>(ch)) != 0;
+}
+inline bool IsDigit(char ch) {
+ return isdigit(static_cast<unsigned char>(ch)) != 0;
+}
+inline bool IsLower(char ch) {
+ return islower(static_cast<unsigned char>(ch)) != 0;
+}
+inline bool IsSpace(char ch) {
+ return isspace(static_cast<unsigned char>(ch)) != 0;
+}
+inline bool IsUpper(char ch) {
+ return isupper(static_cast<unsigned char>(ch)) != 0;
+}
+inline bool IsXDigit(char ch) {
+ return isxdigit(static_cast<unsigned char>(ch)) != 0;
+}
+inline bool IsXDigit(wchar_t ch) {
+ const unsigned char low_byte = static_cast<unsigned char>(ch);
+ return ch == low_byte && isxdigit(low_byte) != 0;
+}
+
+inline char ToLower(char ch) {
+ return static_cast<char>(tolower(static_cast<unsigned char>(ch)));
+}
+inline char ToUpper(char ch) {
+ return static_cast<char>(toupper(static_cast<unsigned char>(ch)));
+}
+
+// The testing::internal::posix namespace holds wrappers for common
+// POSIX functions. These wrappers hide the differences between
+// Windows/MSVC and POSIX systems. Since some compilers define these
+// standard functions as macros, the wrapper cannot have the same name
+// as the wrapped function.
+
+namespace posix {
+
+// Functions with a different name on Windows.
+
+#if GTEST_OS_WINDOWS
+
+typedef struct _stat StatStruct;
+
+# ifdef __BORLANDC__
+inline int IsATTY(int fd) { return isatty(fd); }
+inline int StrCaseCmp(const char* s1, const char* s2) {
+ return stricmp(s1, s2);
+}
+inline char* StrDup(const char* src) { return strdup(src); }
+# else // !__BORLANDC__
+# if GTEST_OS_WINDOWS_MOBILE
+inline int IsATTY(int /* fd */) { return 0; }
+# else
+inline int IsATTY(int fd) { return _isatty(fd); }
+# endif // GTEST_OS_WINDOWS_MOBILE
+inline int StrCaseCmp(const char* s1, const char* s2) {
+ return _stricmp(s1, s2);
+}
+inline char* StrDup(const char* src) { return _strdup(src); }
+# endif // __BORLANDC__
+
+# if GTEST_OS_WINDOWS_MOBILE
+inline int FileNo(FILE* file) { return reinterpret_cast<int>(_fileno(file)); }
+// Stat(), RmDir(), and IsDir() are not needed on Windows CE at this
+// time and thus not defined there.
+# else
+inline int FileNo(FILE* file) { return _fileno(file); }
+inline int Stat(const char* path, StatStruct* buf) { return _stat(path, buf); }
+inline int RmDir(const char* dir) { return _rmdir(dir); }
+inline bool IsDir(const StatStruct& st) {
+ return (_S_IFDIR & st.st_mode) != 0;
+}
+# endif // GTEST_OS_WINDOWS_MOBILE
+
+#else
+
+typedef struct stat StatStruct;
+
+inline int FileNo(FILE* file) { return fileno(file); }
+inline int IsATTY(int fd) { return isatty(fd); }
+inline int Stat(const char* path, StatStruct* buf) { return stat(path, buf); }
+inline int StrCaseCmp(const char* s1, const char* s2) {
+ return strcasecmp(s1, s2);
+}
+inline char* StrDup(const char* src) { return strdup(src); }
+inline int RmDir(const char* dir) { return rmdir(dir); }
+inline bool IsDir(const StatStruct& st) { return S_ISDIR(st.st_mode); }
+
+#endif // GTEST_OS_WINDOWS
+
+// Functions deprecated by MSVC 8.0.
+
+#ifdef _MSC_VER
+// Temporarily disable warning 4996 (deprecated function).
+# pragma warning(push)
+# pragma warning(disable:4996)
+#endif
+
+inline const char* StrNCpy(char* dest, const char* src, size_t n) {
+ return strncpy(dest, src, n);
+}
+
+// ChDir(), FReopen(), FDOpen(), Read(), Write(), Close(), and
+// StrError() aren't needed on Windows CE at this time and thus not
+// defined there.
+
+#if !GTEST_OS_WINDOWS_MOBILE
+inline int ChDir(const char* dir) { return chdir(dir); }
+#endif
+inline FILE* FOpen(const char* path, const char* mode) {
+ return fopen(path, mode);
+}
+#if !GTEST_OS_WINDOWS_MOBILE
+inline FILE *FReopen(const char* path, const char* mode, FILE* stream) {
+ return freopen(path, mode, stream);
+}
+inline FILE* FDOpen(int fd, const char* mode) { return fdopen(fd, mode); }
+#endif
+inline int FClose(FILE* fp) { return fclose(fp); }
+#if !GTEST_OS_WINDOWS_MOBILE
+inline int Read(int fd, void* buf, unsigned int count) {
+ return static_cast<int>(read(fd, buf, count));
+}
+inline int Write(int fd, const void* buf, unsigned int count) {
+ return static_cast<int>(write(fd, buf, count));
+}
+inline int Close(int fd) { return close(fd); }
+inline const char* StrError(int errnum) { return strerror(errnum); }
+#endif
+inline const char* GetEnv(const char* name) {
+#if GTEST_OS_WINDOWS_MOBILE
+ // We are on Windows CE, which has no environment variables.
+ return NULL;
+#elif defined(__BORLANDC__) || defined(__SunOS_5_8) || defined(__SunOS_5_9)
+ // Environment variables which we programmatically clear will be set to the
+ // empty string rather than unset (NULL). Handle that case.
+ const char* const env = getenv(name);
+ return (env != NULL && env[0] != '\0') ? env : NULL;
+#else
+ return getenv(name);
+#endif
+}
+
+#ifdef _MSC_VER
+# pragma warning(pop) // Restores the warning state.
+#endif
+
+#if GTEST_OS_WINDOWS_MOBILE
+// Windows CE has no C library. The abort() function is used in
+// several places in Google Test. This implementation provides a reasonable
+// imitation of standard behaviour.
+void Abort();
+#else
+inline void Abort() { abort(); }
+#endif // GTEST_OS_WINDOWS_MOBILE
+
+} // namespace posix
+
+// MSVC "deprecates" snprintf and issues warnings wherever it is used. In
+// order to avoid these warnings, we need to use _snprintf or _snprintf_s on
+// MSVC-based platforms. We map the GTEST_SNPRINTF_ macro to the appropriate
+// function in order to achieve that. We use macro definition here because
+// snprintf is a variadic function.
+#if _MSC_VER >= 1400 && !GTEST_OS_WINDOWS_MOBILE
+// MSVC 2005 and above support variadic macros.
+# define GTEST_SNPRINTF_(buffer, size, format, ...) \
+ _snprintf_s(buffer, size, size, format, __VA_ARGS__)
+#elif defined(_MSC_VER)
+// Windows CE does not define _snprintf_s and MSVC prior to 2005 doesn't
+// complain about _snprintf.
+# define GTEST_SNPRINTF_ _snprintf
+#else
+# define GTEST_SNPRINTF_ snprintf
+#endif
+
+// The maximum number a BiggestInt can represent. This definition
+// works no matter BiggestInt is represented in one's complement or
+// two's complement.
+//
+// We cannot rely on numeric_limits in STL, as __int64 and long long
+// are not part of standard C++ and numeric_limits doesn't need to be
+// defined for them.
+const BiggestInt kMaxBiggestInt =
+ ~(static_cast<BiggestInt>(1) << (8*sizeof(BiggestInt) - 1));
+
+// This template class serves as a compile-time function from size to
+// type. It maps a size in bytes to a primitive type with that
+// size. e.g.
+//
+// TypeWithSize<4>::UInt
+//
+// is typedef-ed to be unsigned int (unsigned integer made up of 4
+// bytes).
+//
+// Such functionality should belong to STL, but I cannot find it
+// there.
+//
+// Google Test uses this class in the implementation of floating-point
+// comparison.
+//
+// For now it only handles UInt (unsigned int) as that's all Google Test
+// needs. Other types can be easily added in the future if need
+// arises.
+template <size_t size>
+class TypeWithSize {
+ public:
+ // This prevents the user from using TypeWithSize<N> with incorrect
+ // values of N.
+ typedef void UInt;
+};
+
+// The specialization for size 4.
+template <>
+class TypeWithSize<4> {
+ public:
+ // unsigned int has size 4 in both gcc and MSVC.
+ //
+ // As base/basictypes.h doesn't compile on Windows, we cannot use
+ // uint32, uint64, and etc here.
+ typedef int Int;
+ typedef unsigned int UInt;
+};
+
+// The specialization for size 8.
+template <>
+class TypeWithSize<8> {
+ public:
+#if GTEST_OS_WINDOWS
+ typedef __int64 Int;
+ typedef unsigned __int64 UInt;
+#else
+ typedef long long Int; // NOLINT
+ typedef unsigned long long UInt; // NOLINT
+#endif // GTEST_OS_WINDOWS
+};
+
+// Integer types of known sizes.
+typedef TypeWithSize<4>::Int Int32;
+typedef TypeWithSize<4>::UInt UInt32;
+typedef TypeWithSize<8>::Int Int64;
+typedef TypeWithSize<8>::UInt UInt64;
+typedef TypeWithSize<8>::Int TimeInMillis; // Represents time in milliseconds.
+
+// Utilities for command line flags and environment variables.
+
+// Macro for referencing flags.
+#define GTEST_FLAG(name) FLAGS_gtest_##name
+
+// Macros for declaring flags.
+#define GTEST_DECLARE_bool_(name) GTEST_API_ extern bool GTEST_FLAG(name)
+#define GTEST_DECLARE_int32_(name) \
+ GTEST_API_ extern ::testing::internal::Int32 GTEST_FLAG(name)
+#define GTEST_DECLARE_string_(name) \
+ GTEST_API_ extern ::std::string GTEST_FLAG(name)
+
+// Macros for defining flags.
+#define GTEST_DEFINE_bool_(name, default_val, doc) \
+ GTEST_API_ bool GTEST_FLAG(name) = (default_val)
+#define GTEST_DEFINE_int32_(name, default_val, doc) \
+ GTEST_API_ ::testing::internal::Int32 GTEST_FLAG(name) = (default_val)
+#define GTEST_DEFINE_string_(name, default_val, doc) \
+ GTEST_API_ ::std::string GTEST_FLAG(name) = (default_val)
+
+// Thread annotations
+#define GTEST_EXCLUSIVE_LOCK_REQUIRED_(locks)
+#define GTEST_LOCK_EXCLUDED_(locks)
+
+// Parses 'str' for a 32-bit signed integer. If successful, writes the result
+// to *value and returns true; otherwise leaves *value unchanged and returns
+// false.
+// TODO(chandlerc): Find a better way to refactor flag and environment parsing
+// out of both gtest-port.cc and gtest.cc to avoid exporting this utility
+// function.
+bool ParseInt32(const Message& src_text, const char* str, Int32* value);
+
+// Parses a bool/Int32/string from the environment variable
+// corresponding to the given Google Test flag.
+bool BoolFromGTestEnv(const char* flag, bool default_val);
+GTEST_API_ Int32 Int32FromGTestEnv(const char* flag, Int32 default_val);
+const char* StringFromGTestEnv(const char* flag, const char* default_val);
+
+} // namespace internal
+} // namespace testing
+
+#endif // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PORT_H_
+
+#if GTEST_OS_LINUX
+# include <stdlib.h>
+# include <sys/types.h>
+# include <sys/wait.h>
+# include <unistd.h>
+#endif // GTEST_OS_LINUX
+
+#if GTEST_HAS_EXCEPTIONS
+# include <stdexcept>
+#endif
+
+#include <ctype.h>
+#include <float.h>
+#include <string.h>
+#include <iomanip>
+#include <limits>
+#include <set>
+
+// Copyright 2005, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Author: wan@google.com (Zhanyong Wan)
+//
+// The Google C++ Testing Framework (Google Test)
+//
+// This header file defines the Message class.
+//
+// IMPORTANT NOTE: Due to limitation of the C++ language, we have to
+// leave some internal implementation details in this header file.
+// They are clearly marked by comments like this:
+//
+// // INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
+//
+// Such code is NOT meant to be used by a user directly, and is subject
+// to CHANGE WITHOUT NOTICE. Therefore DO NOT DEPEND ON IT in a user
+// program!
+
+#ifndef GTEST_INCLUDE_GTEST_GTEST_MESSAGE_H_
+#define GTEST_INCLUDE_GTEST_GTEST_MESSAGE_H_
+
+#include <limits>
+
+
+// Ensures that there is at least one operator<< in the global namespace.
+// See Message& operator<<(...) below for why.
+void operator<<(const testing::internal::Secret&, int);
+
+namespace testing {
+
+// The Message class works like an ostream repeater.
+//
+// Typical usage:
+//
+// 1. You stream a bunch of values to a Message object.
+// It will remember the text in a stringstream.
+// 2. Then you stream the Message object to an ostream.
+// This causes the text in the Message to be streamed
+// to the ostream.
+//
+// For example;
+//
+// testing::Message foo;
+// foo << 1 << " != " << 2;
+// std::cout << foo;
+//
+// will print "1 != 2".
+//
+// Message is not intended to be inherited from. In particular, its
+// destructor is not virtual.
+//
+// Note that stringstream behaves differently in gcc and in MSVC. You
+// can stream a NULL char pointer to it in the former, but not in the
+// latter (it causes an access violation if you do). The Message
+// class hides this difference by treating a NULL char pointer as
+// "(null)".
+class GTEST_API_ Message {
+ private:
+ // The type of basic IO manipulators (endl, ends, and flush) for
+ // narrow streams.
+ typedef std::ostream& (*BasicNarrowIoManip)(std::ostream&);
+
+ public:
+ // Constructs an empty Message.
+ Message();
+
+ // Copy constructor.
+ Message(const Message& msg) : ss_(new ::std::stringstream) { // NOLINT
+ *ss_ << msg.GetString();
+ }
+
+ // Constructs a Message from a C-string.
+ explicit Message(const char* str) : ss_(new ::std::stringstream) {
+ *ss_ << str;
+ }
+
+#if GTEST_OS_SYMBIAN
+ // Streams a value (either a pointer or not) to this object.
+ template <typename T>
+ inline Message& operator <<(const T& value) {
+ StreamHelper(typename internal::is_pointer<T>::type(), value);
+ return *this;
+ }
+#else
+ // Streams a non-pointer value to this object.
+ template <typename T>
+ inline Message& operator <<(const T& val) {
+ // Some libraries overload << for STL containers. These
+ // overloads are defined in the global namespace instead of ::std.
+ //
+ // C++'s symbol lookup rule (i.e. Koenig lookup) says that these
+ // overloads are visible in either the std namespace or the global
+ // namespace, but not other namespaces, including the testing
+ // namespace which Google Test's Message class is in.
+ //
+ // To allow STL containers (and other types that has a << operator
+ // defined in the global namespace) to be used in Google Test
+ // assertions, testing::Message must access the custom << operator
+ // from the global namespace. With this using declaration,
+ // overloads of << defined in the global namespace and those
+ // visible via Koenig lookup are both exposed in this function.
+ using ::operator <<;
+ *ss_ << val;
+ return *this;
+ }
+
+ // Streams a pointer value to this object.
+ //
+ // This function is an overload of the previous one. When you
+ // stream a pointer to a Message, this definition will be used as it
+ // is more specialized. (The C++ Standard, section
+ // [temp.func.order].) If you stream a non-pointer, then the
+ // previous definition will be used.
+ //
+ // The reason for this overload is that streaming a NULL pointer to
+ // ostream is undefined behavior. Depending on the compiler, you
+ // may get "0", "(nil)", "(null)", or an access violation. To
+ // ensure consistent result across compilers, we always treat NULL
+ // as "(null)".
+ template <typename T>
+ inline Message& operator <<(T* const& pointer) { // NOLINT
+ if (pointer == NULL) {
+ *ss_ << "(null)";
+ } else {
+ *ss_ << pointer;
+ }
+ return *this;
+ }
+#endif // GTEST_OS_SYMBIAN
+
+ // Since the basic IO manipulators are overloaded for both narrow
+ // and wide streams, we have to provide this specialized definition
+ // of operator <<, even though its body is the same as the
+ // templatized version above. Without this definition, streaming
+ // endl or other basic IO manipulators to Message will confuse the
+ // compiler.
+ Message& operator <<(BasicNarrowIoManip val) {
+ *ss_ << val;
+ return *this;
+ }
+
+ // Instead of 1/0, we want to see true/false for bool values.
+ Message& operator <<(bool b) {
+ return *this << (b ? "true" : "false");
+ }
+
+ // These two overloads allow streaming a wide C string to a Message
+ // using the UTF-8 encoding.
+ Message& operator <<(const wchar_t* wide_c_str);
+ Message& operator <<(wchar_t* wide_c_str);
+
+#if GTEST_HAS_STD_WSTRING
+ // Converts the given wide string to a narrow string using the UTF-8
+ // encoding, and streams the result to this Message object.
+ Message& operator <<(const ::std::wstring& wstr);
+#endif // GTEST_HAS_STD_WSTRING
+
+#if GTEST_HAS_GLOBAL_WSTRING
+ // Converts the given wide string to a narrow string using the UTF-8
+ // encoding, and streams the result to this Message object.
+ Message& operator <<(const ::wstring& wstr);
+#endif // GTEST_HAS_GLOBAL_WSTRING
+
+ // Gets the text streamed to this object so far as an std::string.
+ // Each '\0' character in the buffer is replaced with "\\0".
+ //
+ // INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
+ std::string GetString() const;
+
+ private:
+
+#if GTEST_OS_SYMBIAN
+ // These are needed as the Nokia Symbian Compiler cannot decide between
+ // const T& and const T* in a function template. The Nokia compiler _can_
+ // decide between class template specializations for T and T*, so a
+ // tr1::type_traits-like is_pointer works, and we can overload on that.
+ template <typename T>
+ inline void StreamHelper(internal::true_type /*is_pointer*/, T* pointer) {
+ if (pointer == NULL) {
+ *ss_ << "(null)";
+ } else {
+ *ss_ << pointer;
+ }
+ }
+ template <typename T>
+ inline void StreamHelper(internal::false_type /*is_pointer*/,
+ const T& value) {
+ // See the comments in Message& operator <<(const T&) above for why
+ // we need this using statement.
+ using ::operator <<;
+ *ss_ << value;
+ }
+#endif // GTEST_OS_SYMBIAN
+
+ // We'll hold the text streamed to this object here.
+ const internal::scoped_ptr< ::std::stringstream> ss_;
+
+ // We declare (but don't implement) this to prevent the compiler
+ // from implementing the assignment operator.
+ void operator=(const Message&);
+};
+
+// Streams a Message to an ostream.
+inline std::ostream& operator <<(std::ostream& os, const Message& sb) {
+ return os << sb.GetString();
+}
+
+namespace internal {
+
+// Converts a streamable value to an std::string. A NULL pointer is
+// converted to "(null)". When the input value is a ::string,
+// ::std::string, ::wstring, or ::std::wstring object, each NUL
+// character in it is replaced with "\\0".
+template <typename T>
+std::string StreamableToString(const T& streamable) {
+ return (Message() << streamable).GetString();
+}
+
+} // namespace internal
+} // namespace testing
+
+#endif // GTEST_INCLUDE_GTEST_GTEST_MESSAGE_H_
+// Copyright 2005, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Authors: wan@google.com (Zhanyong Wan), eefacm@gmail.com (Sean Mcafee)
+//
+// The Google C++ Testing Framework (Google Test)
+//
+// This header file declares the String class and functions used internally by
+// Google Test. They are subject to change without notice. They should not used
+// by code external to Google Test.
+//
+// This header file is #included by <gtest/internal/gtest-internal.h>.
+// It should not be #included by other files.
+
+#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_STRING_H_
+#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_STRING_H_
+
+#ifdef __BORLANDC__
+// string.h is not guaranteed to provide strcpy on C++ Builder.
+# include <mem.h>
+#endif
+
+#include <string.h>
+#include <string>
+
+
+namespace testing {
+namespace internal {
+
+// String - an abstract class holding static string utilities.
+class GTEST_API_ String {
+ public:
+ // Static utility methods
+
+ // Clones a 0-terminated C string, allocating memory using new. The
+ // caller is responsible for deleting the return value using
+ // delete[]. Returns the cloned string, or NULL if the input is
+ // NULL.
+ //
+ // This is different from strdup() in string.h, which allocates
+ // memory using malloc().
+ static const char* CloneCString(const char* c_str);
+
+#if GTEST_OS_WINDOWS_MOBILE
+ // Windows CE does not have the 'ANSI' versions of Win32 APIs. To be
+ // able to pass strings to Win32 APIs on CE we need to convert them
+ // to 'Unicode', UTF-16.
+
+ // Creates a UTF-16 wide string from the given ANSI string, allocating
+ // memory using new. The caller is responsible for deleting the return
+ // value using delete[]. Returns the wide string, or NULL if the
+ // input is NULL.
+ //
+ // The wide string is created using the ANSI codepage (CP_ACP) to
+ // match the behaviour of the ANSI versions of Win32 calls and the
+ // C runtime.
+ static LPCWSTR AnsiToUtf16(const char* c_str);
+
+ // Creates an ANSI string from the given wide string, allocating
+ // memory using new. The caller is responsible for deleting the return
+ // value using delete[]. Returns the ANSI string, or NULL if the
+ // input is NULL.
+ //
+ // The returned string is created using the ANSI codepage (CP_ACP) to
+ // match the behaviour of the ANSI versions of Win32 calls and the
+ // C runtime.
+ static const char* Utf16ToAnsi(LPCWSTR utf16_str);
+#endif
+
+ // Compares two C strings. Returns true iff they have the same content.
+ //
+ // Unlike strcmp(), this function can handle NULL argument(s). A
+ // NULL C string is considered different to any non-NULL C string,
+ // including the empty string.
+ static bool CStringEquals(const char* lhs, const char* rhs);
+
+ // Converts a wide C string to a String using the UTF-8 encoding.
+ // NULL will be converted to "(null)". If an error occurred during
+ // the conversion, "(failed to convert from wide string)" is
+ // returned.
+ static std::string ShowWideCString(const wchar_t* wide_c_str);
+
+ // Compares two wide C strings. Returns true iff they have the same
+ // content.
+ //
+ // Unlike wcscmp(), this function can handle NULL argument(s). A
+ // NULL C string is considered different to any non-NULL C string,
+ // including the empty string.
+ static bool WideCStringEquals(const wchar_t* lhs, const wchar_t* rhs);
+
+ // Compares two C strings, ignoring case. Returns true iff they
+ // have the same content.
+ //
+ // Unlike strcasecmp(), this function can handle NULL argument(s).
+ // A NULL C string is considered different to any non-NULL C string,
+ // including the empty string.
+ static bool CaseInsensitiveCStringEquals(const char* lhs,
+ const char* rhs);
+
+ // Compares two wide C strings, ignoring case. Returns true iff they
+ // have the same content.
+ //
+ // Unlike wcscasecmp(), this function can handle NULL argument(s).
+ // A NULL C string is considered different to any non-NULL wide C string,
+ // including the empty string.
+ // NB: The implementations on different platforms slightly differ.
+ // On windows, this method uses _wcsicmp which compares according to LC_CTYPE
+ // environment variable. On GNU platform this method uses wcscasecmp
+ // which compares according to LC_CTYPE category of the current locale.
+ // On MacOS X, it uses towlower, which also uses LC_CTYPE category of the
+ // current locale.
+ static bool CaseInsensitiveWideCStringEquals(const wchar_t* lhs,
+ const wchar_t* rhs);
+
+ // Returns true iff the given string ends with the given suffix, ignoring
+ // case. Any string is considered to end with an empty suffix.
+ static bool EndsWithCaseInsensitive(
+ const std::string& str, const std::string& suffix);
+
+ // Formats an int value as "%02d".
+ static std::string FormatIntWidth2(int value); // "%02d" for width == 2
+
+ // Formats an int value as "%X".
+ static std::string FormatHexInt(int value);
+
+ // Formats a byte as "%02X".
+ static std::string FormatByte(unsigned char value);
+
+ private:
+ String(); // Not meant to be instantiated.
+}; // class String
+
+// Gets the content of the stringstream's buffer as an std::string. Each '\0'
+// character in the buffer is replaced with "\\0".
+GTEST_API_ std::string StringStreamToString(::std::stringstream* stream);
+
+} // namespace internal
+} // namespace testing
+
+#endif // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_STRING_H_
+// Copyright 2008, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Author: keith.ray@gmail.com (Keith Ray)
+//
+// Google Test filepath utilities
+//
+// This header file declares classes and functions used internally by
+// Google Test. They are subject to change without notice.
+//
+// This file is #included in <gtest/internal/gtest-internal.h>.
+// Do not include this header file separately!
+
+#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_FILEPATH_H_
+#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_FILEPATH_H_
+
+
+namespace testing {
+namespace internal {
+
+// FilePath - a class for file and directory pathname manipulation which
+// handles platform-specific conventions (like the pathname separator).
+// Used for helper functions for naming files in a directory for xml output.
+// Except for Set methods, all methods are const or static, which provides an
+// "immutable value object" -- useful for peace of mind.
+// A FilePath with a value ending in a path separator ("like/this/") represents
+// a directory, otherwise it is assumed to represent a file. In either case,
+// it may or may not represent an actual file or directory in the file system.
+// Names are NOT checked for syntax correctness -- no checking for illegal
+// characters, malformed paths, etc.
+
+class GTEST_API_ FilePath {
+ public:
+ FilePath() : pathname_("") { }
+ FilePath(const FilePath& rhs) : pathname_(rhs.pathname_) { }
+
+ explicit FilePath(const std::string& pathname) : pathname_(pathname) {
+ Normalize();
+ }
+
+ FilePath& operator=(const FilePath& rhs) {
+ Set(rhs);
+ return *this;
+ }
+
+ void Set(const FilePath& rhs) {
+ pathname_ = rhs.pathname_;
+ }
+
+ const std::string& string() const { return pathname_; }
+ const char* c_str() const { return pathname_.c_str(); }
+
+ // Returns the current working directory, or "" if unsuccessful.
+ static FilePath GetCurrentDir();
+
+ // Given directory = "dir", base_name = "test", number = 0,
+ // extension = "xml", returns "dir/test.xml". If number is greater
+ // than zero (e.g., 12), returns "dir/test_12.xml".
+ // On Windows platform, uses \ as the separator rather than /.
+ static FilePath MakeFileName(const FilePath& directory,
+ const FilePath& base_name,
+ int number,
+ const char* extension);
+
+ // Given directory = "dir", relative_path = "test.xml",
+ // returns "dir/test.xml".
+ // On Windows, uses \ as the separator rather than /.
+ static FilePath ConcatPaths(const FilePath& directory,
+ const FilePath& relative_path);
+
+ // Returns a pathname for a file that does not currently exist. The pathname
+ // will be directory/base_name.extension or
+ // directory/base_name_<number>.extension if directory/base_name.extension
+ // already exists. The number will be incremented until a pathname is found
+ // that does not already exist.
+ // Examples: 'dir/foo_test.xml' or 'dir/foo_test_1.xml'.
+ // There could be a race condition if two or more processes are calling this
+ // function at the same time -- they could both pick the same filename.
+ static FilePath GenerateUniqueFileName(const FilePath& directory,
+ const FilePath& base_name,
+ const char* extension);
+
+ // Returns true iff the path is "".
+ bool IsEmpty() const { return pathname_.empty(); }
+
+ // If input name has a trailing separator character, removes it and returns
+ // the name, otherwise return the name string unmodified.
+ // On Windows platform, uses \ as the separator, other platforms use /.
+ FilePath RemoveTrailingPathSeparator() const;
+
+ // Returns a copy of the FilePath with the directory part removed.
+ // Example: FilePath("path/to/file").RemoveDirectoryName() returns
+ // FilePath("file"). If there is no directory part ("just_a_file"), it returns
+ // the FilePath unmodified. If there is no file part ("just_a_dir/") it
+ // returns an empty FilePath ("").
+ // On Windows platform, '\' is the path separator, otherwise it is '/'.
+ FilePath RemoveDirectoryName() const;
+
+ // RemoveFileName returns the directory path with the filename removed.
+ // Example: FilePath("path/to/file").RemoveFileName() returns "path/to/".
+ // If the FilePath is "a_file" or "/a_file", RemoveFileName returns
+ // FilePath("./") or, on Windows, FilePath(".\\"). If the filepath does
+ // not have a file, like "just/a/dir/", it returns the FilePath unmodified.
+ // On Windows platform, '\' is the path separator, otherwise it is '/'.
+ FilePath RemoveFileName() const;
+
+ // Returns a copy of the FilePath with the case-insensitive extension removed.
+ // Example: FilePath("dir/file.exe").RemoveExtension("EXE") returns
+ // FilePath("dir/file"). If a case-insensitive extension is not
+ // found, returns a copy of the original FilePath.
+ FilePath RemoveExtension(const char* extension) const;
+
+ // Creates directories so that path exists. Returns true if successful or if
+ // the directories already exist; returns false if unable to create
+ // directories for any reason. Will also return false if the FilePath does
+ // not represent a directory (that is, it doesn't end with a path separator).
+ bool CreateDirectoriesRecursively() const;
+
+ // Create the directory so that path exists. Returns true if successful or
+ // if the directory already exists; returns false if unable to create the
+ // directory for any reason, including if the parent directory does not
+ // exist. Not named "CreateDirectory" because that's a macro on Windows.
+ bool CreateFolder() const;
+
+ // Returns true if FilePath describes something in the file-system,
+ // either a file, directory, or whatever, and that something exists.
+ bool FileOrDirectoryExists() const;
+
+ // Returns true if pathname describes a directory in the file-system
+ // that exists.
+ bool DirectoryExists() const;
+
+ // Returns true if FilePath ends with a path separator, which indicates that
+ // it is intended to represent a directory. Returns false otherwise.
+ // This does NOT check that a directory (or file) actually exists.
+ bool IsDirectory() const;
+
+ // Returns true if pathname describes a root directory. (Windows has one
+ // root directory per disk drive.)
+ bool IsRootDirectory() const;
+
+ // Returns true if pathname describes an absolute path.
+ bool IsAbsolutePath() const;
+
+ private:
+ // Replaces multiple consecutive separators with a single separator.
+ // For example, "bar///foo" becomes "bar/foo". Does not eliminate other
+ // redundancies that might be in a pathname involving "." or "..".
+ //
+ // A pathname with multiple consecutive separators may occur either through
+ // user error or as a result of some scripts or APIs that generate a pathname
+ // with a trailing separator. On other platforms the same API or script
+ // may NOT generate a pathname with a trailing "/". Then elsewhere that
+ // pathname may have another "/" and pathname components added to it,
+ // without checking for the separator already being there.
+ // The script language and operating system may allow paths like "foo//bar"
+ // but some of the functions in FilePath will not handle that correctly. In
+ // particular, RemoveTrailingPathSeparator() only removes one separator, and
+ // it is called in CreateDirectoriesRecursively() assuming that it will change
+ // a pathname from directory syntax (trailing separator) to filename syntax.
+ //
+ // On Windows this method also replaces the alternate path separator '/' with
+ // the primary path separator '\\', so that for example "bar\\/\\foo" becomes
+ // "bar\\foo".
+
+ void Normalize();
+
+ // Returns a pointer to the last occurence of a valid path separator in
+ // the FilePath. On Windows, for example, both '/' and '\' are valid path
+ // separators. Returns NULL if no path separator was found.
+ const char* FindLastPathSeparator() const;
+
+ std::string pathname_;
+}; // class FilePath
+
+} // namespace internal
+} // namespace testing
+
+#endif // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_FILEPATH_H_
+// This file was GENERATED by command:
+// pump.py gtest-type-util.h.pump
+// DO NOT EDIT BY HAND!!!
+
+// Copyright 2008 Google Inc.
+// All Rights Reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Author: wan@google.com (Zhanyong Wan)
+
+// Type utilities needed for implementing typed and type-parameterized
+// tests. This file is generated by a SCRIPT. DO NOT EDIT BY HAND!
+//
+// Currently we support at most 50 types in a list, and at most 50
+// type-parameterized tests in one type-parameterized test case.
+// Please contact googletestframework@googlegroups.com if you need
+// more.
+
+#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_TYPE_UTIL_H_
+#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_TYPE_UTIL_H_
+
+
+// #ifdef __GNUC__ is too general here. It is possible to use gcc without using
+// libstdc++ (which is where cxxabi.h comes from).
+# if GTEST_HAS_CXXABI_H_
+# include <cxxabi.h>
+# elif defined(__HP_aCC)
+# include <acxx_demangle.h>
+# endif // GTEST_HASH_CXXABI_H_
+
+namespace testing {
+namespace internal {
+
+// GetTypeName<T>() returns a human-readable name of type T.
+// NB: This function is also used in Google Mock, so don't move it inside of
+// the typed-test-only section below.
+template <typename T>
+std::string GetTypeName() {
+# if GTEST_HAS_RTTI
+
+ const char* const name = typeid(T).name();
+# if GTEST_HAS_CXXABI_H_ || defined(__HP_aCC)
+ int status = 0;
+ // gcc's implementation of typeid(T).name() mangles the type name,
+ // so we have to demangle it.
+# if GTEST_HAS_CXXABI_H_
+ using abi::__cxa_demangle;
+# endif // GTEST_HAS_CXXABI_H_
+ char* const readable_name = __cxa_demangle(name, 0, 0, &status);
+ const std::string name_str(status == 0 ? readable_name : name);
+ free(readable_name);
+ return name_str;
+# else
+ return name;
+# endif // GTEST_HAS_CXXABI_H_ || __HP_aCC
+
+# else
+
+ return "<type>";
+
+# endif // GTEST_HAS_RTTI
+}
+
+#if GTEST_HAS_TYPED_TEST || GTEST_HAS_TYPED_TEST_P
+
+// AssertyTypeEq<T1, T2>::type is defined iff T1 and T2 are the same
+// type. This can be used as a compile-time assertion to ensure that
+// two types are equal.
+
+template <typename T1, typename T2>
+struct AssertTypeEq;
+
+template <typename T>
+struct AssertTypeEq<T, T> {
+ typedef bool type;
+};
+
+// A unique type used as the default value for the arguments of class
+// template Types. This allows us to simulate variadic templates
+// (e.g. Types<int>, Type<int, double>, and etc), which C++ doesn't
+// support directly.
+struct None {};
+
+// The following family of struct and struct templates are used to
+// represent type lists. In particular, TypesN<T1, T2, ..., TN>
+// represents a type list with N types (T1, T2, ..., and TN) in it.
+// Except for Types0, every struct in the family has two member types:
+// Head for the first type in the list, and Tail for the rest of the
+// list.
+
+// The empty type list.
+struct Types0 {};
+
+// Type lists of length 1, 2, 3, and so on.
+
+template <typename T1>
+struct Types1 {
+ typedef T1 Head;
+ typedef Types0 Tail;
+};
+template <typename T1, typename T2>
+struct Types2 {
+ typedef T1 Head;
+ typedef Types1<T2> Tail;
+};
+
+template <typename T1, typename T2, typename T3>
+struct Types3 {
+ typedef T1 Head;
+ typedef Types2<T2, T3> Tail;
+};
+
+template <typename T1, typename T2, typename T3, typename T4>
+struct Types4 {
+ typedef T1 Head;
+ typedef Types3<T2, T3, T4> Tail;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5>
+struct Types5 {
+ typedef T1 Head;
+ typedef Types4<T2, T3, T4, T5> Tail;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6>
+struct Types6 {
+ typedef T1 Head;
+ typedef Types5<T2, T3, T4, T5, T6> Tail;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7>
+struct Types7 {
+ typedef T1 Head;
+ typedef Types6<T2, T3, T4, T5, T6, T7> Tail;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8>
+struct Types8 {
+ typedef T1 Head;
+ typedef Types7<T2, T3, T4, T5, T6, T7, T8> Tail;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9>
+struct Types9 {
+ typedef T1 Head;
+ typedef Types8<T2, T3, T4, T5, T6, T7, T8, T9> Tail;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10>
+struct Types10 {
+ typedef T1 Head;
+ typedef Types9<T2, T3, T4, T5, T6, T7, T8, T9, T10> Tail;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11>
+struct Types11 {
+ typedef T1 Head;
+ typedef Types10<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11> Tail;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12>
+struct Types12 {
+ typedef T1 Head;
+ typedef Types11<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12> Tail;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13>
+struct Types13 {
+ typedef T1 Head;
+ typedef Types12<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13> Tail;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14>
+struct Types14 {
+ typedef T1 Head;
+ typedef Types13<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14> Tail;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15>
+struct Types15 {
+ typedef T1 Head;
+ typedef Types14<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+ T15> Tail;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15,
+ typename T16>
+struct Types16 {
+ typedef T1 Head;
+ typedef Types15<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+ T16> Tail;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15,
+ typename T16, typename T17>
+struct Types17 {
+ typedef T1 Head;
+ typedef Types16<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+ T16, T17> Tail;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15,
+ typename T16, typename T17, typename T18>
+struct Types18 {
+ typedef T1 Head;
+ typedef Types17<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+ T16, T17, T18> Tail;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15,
+ typename T16, typename T17, typename T18, typename T19>
+struct Types19 {
+ typedef T1 Head;
+ typedef Types18<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+ T16, T17, T18, T19> Tail;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15,
+ typename T16, typename T17, typename T18, typename T19, typename T20>
+struct Types20 {
+ typedef T1 Head;
+ typedef Types19<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+ T16, T17, T18, T19, T20> Tail;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15,
+ typename T16, typename T17, typename T18, typename T19, typename T20,
+ typename T21>
+struct Types21 {
+ typedef T1 Head;
+ typedef Types20<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+ T16, T17, T18, T19, T20, T21> Tail;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15,
+ typename T16, typename T17, typename T18, typename T19, typename T20,
+ typename T21, typename T22>
+struct Types22 {
+ typedef T1 Head;
+ typedef Types21<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+ T16, T17, T18, T19, T20, T21, T22> Tail;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15,
+ typename T16, typename T17, typename T18, typename T19, typename T20,
+ typename T21, typename T22, typename T23>
+struct Types23 {
+ typedef T1 Head;
+ typedef Types22<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+ T16, T17, T18, T19, T20, T21, T22, T23> Tail;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15,
+ typename T16, typename T17, typename T18, typename T19, typename T20,
+ typename T21, typename T22, typename T23, typename T24>
+struct Types24 {
+ typedef T1 Head;
+ typedef Types23<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+ T16, T17, T18, T19, T20, T21, T22, T23, T24> Tail;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15,
+ typename T16, typename T17, typename T18, typename T19, typename T20,
+ typename T21, typename T22, typename T23, typename T24, typename T25>
+struct Types25 {
+ typedef T1 Head;
+ typedef Types24<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+ T16, T17, T18, T19, T20, T21, T22, T23, T24, T25> Tail;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15,
+ typename T16, typename T17, typename T18, typename T19, typename T20,
+ typename T21, typename T22, typename T23, typename T24, typename T25,
+ typename T26>
+struct Types26 {
+ typedef T1 Head;
+ typedef Types25<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+ T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26> Tail;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15,
+ typename T16, typename T17, typename T18, typename T19, typename T20,
+ typename T21, typename T22, typename T23, typename T24, typename T25,
+ typename T26, typename T27>
+struct Types27 {
+ typedef T1 Head;
+ typedef Types26<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+ T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27> Tail;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15,
+ typename T16, typename T17, typename T18, typename T19, typename T20,
+ typename T21, typename T22, typename T23, typename T24, typename T25,
+ typename T26, typename T27, typename T28>
+struct Types28 {
+ typedef T1 Head;
+ typedef Types27<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+ T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28> Tail;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15,
+ typename T16, typename T17, typename T18, typename T19, typename T20,
+ typename T21, typename T22, typename T23, typename T24, typename T25,
+ typename T26, typename T27, typename T28, typename T29>
+struct Types29 {
+ typedef T1 Head;
+ typedef Types28<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+ T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+ T29> Tail;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15,
+ typename T16, typename T17, typename T18, typename T19, typename T20,
+ typename T21, typename T22, typename T23, typename T24, typename T25,
+ typename T26, typename T27, typename T28, typename T29, typename T30>
+struct Types30 {
+ typedef T1 Head;
+ typedef Types29<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+ T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
+ T30> Tail;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15,
+ typename T16, typename T17, typename T18, typename T19, typename T20,
+ typename T21, typename T22, typename T23, typename T24, typename T25,
+ typename T26, typename T27, typename T28, typename T29, typename T30,
+ typename T31>
+struct Types31 {
+ typedef T1 Head;
+ typedef Types30<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+ T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
+ T30, T31> Tail;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15,
+ typename T16, typename T17, typename T18, typename T19, typename T20,
+ typename T21, typename T22, typename T23, typename T24, typename T25,
+ typename T26, typename T27, typename T28, typename T29, typename T30,
+ typename T31, typename T32>
+struct Types32 {
+ typedef T1 Head;
+ typedef Types31<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+ T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
+ T30, T31, T32> Tail;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15,
+ typename T16, typename T17, typename T18, typename T19, typename T20,
+ typename T21, typename T22, typename T23, typename T24, typename T25,
+ typename T26, typename T27, typename T28, typename T29, typename T30,
+ typename T31, typename T32, typename T33>
+struct Types33 {
+ typedef T1 Head;
+ typedef Types32<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+ T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
+ T30, T31, T32, T33> Tail;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15,
+ typename T16, typename T17, typename T18, typename T19, typename T20,
+ typename T21, typename T22, typename T23, typename T24, typename T25,
+ typename T26, typename T27, typename T28, typename T29, typename T30,
+ typename T31, typename T32, typename T33, typename T34>
+struct Types34 {
+ typedef T1 Head;
+ typedef Types33<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+ T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
+ T30, T31, T32, T33, T34> Tail;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15,
+ typename T16, typename T17, typename T18, typename T19, typename T20,
+ typename T21, typename T22, typename T23, typename T24, typename T25,
+ typename T26, typename T27, typename T28, typename T29, typename T30,
+ typename T31, typename T32, typename T33, typename T34, typename T35>
+struct Types35 {
+ typedef T1 Head;
+ typedef Types34<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+ T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
+ T30, T31, T32, T33, T34, T35> Tail;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15,
+ typename T16, typename T17, typename T18, typename T19, typename T20,
+ typename T21, typename T22, typename T23, typename T24, typename T25,
+ typename T26, typename T27, typename T28, typename T29, typename T30,
+ typename T31, typename T32, typename T33, typename T34, typename T35,
+ typename T36>
+struct Types36 {
+ typedef T1 Head;
+ typedef Types35<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+ T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
+ T30, T31, T32, T33, T34, T35, T36> Tail;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15,
+ typename T16, typename T17, typename T18, typename T19, typename T20,
+ typename T21, typename T22, typename T23, typename T24, typename T25,
+ typename T26, typename T27, typename T28, typename T29, typename T30,
+ typename T31, typename T32, typename T33, typename T34, typename T35,
+ typename T36, typename T37>
+struct Types37 {
+ typedef T1 Head;
+ typedef Types36<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+ T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
+ T30, T31, T32, T33, T34, T35, T36, T37> Tail;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15,
+ typename T16, typename T17, typename T18, typename T19, typename T20,
+ typename T21, typename T22, typename T23, typename T24, typename T25,
+ typename T26, typename T27, typename T28, typename T29, typename T30,
+ typename T31, typename T32, typename T33, typename T34, typename T35,
+ typename T36, typename T37, typename T38>
+struct Types38 {
+ typedef T1 Head;
+ typedef Types37<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+ T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
+ T30, T31, T32, T33, T34, T35, T36, T37, T38> Tail;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15,
+ typename T16, typename T17, typename T18, typename T19, typename T20,
+ typename T21, typename T22, typename T23, typename T24, typename T25,
+ typename T26, typename T27, typename T28, typename T29, typename T30,
+ typename T31, typename T32, typename T33, typename T34, typename T35,
+ typename T36, typename T37, typename T38, typename T39>
+struct Types39 {
+ typedef T1 Head;
+ typedef Types38<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+ T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
+ T30, T31, T32, T33, T34, T35, T36, T37, T38, T39> Tail;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15,
+ typename T16, typename T17, typename T18, typename T19, typename T20,
+ typename T21, typename T22, typename T23, typename T24, typename T25,
+ typename T26, typename T27, typename T28, typename T29, typename T30,
+ typename T31, typename T32, typename T33, typename T34, typename T35,
+ typename T36, typename T37, typename T38, typename T39, typename T40>
+struct Types40 {
+ typedef T1 Head;
+ typedef Types39<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+ T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
+ T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40> Tail;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15,
+ typename T16, typename T17, typename T18, typename T19, typename T20,
+ typename T21, typename T22, typename T23, typename T24, typename T25,
+ typename T26, typename T27, typename T28, typename T29, typename T30,
+ typename T31, typename T32, typename T33, typename T34, typename T35,
+ typename T36, typename T37, typename T38, typename T39, typename T40,
+ typename T41>
+struct Types41 {
+ typedef T1 Head;
+ typedef Types40<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+ T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
+ T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41> Tail;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15,
+ typename T16, typename T17, typename T18, typename T19, typename T20,
+ typename T21, typename T22, typename T23, typename T24, typename T25,
+ typename T26, typename T27, typename T28, typename T29, typename T30,
+ typename T31, typename T32, typename T33, typename T34, typename T35,
+ typename T36, typename T37, typename T38, typename T39, typename T40,
+ typename T41, typename T42>
+struct Types42 {
+ typedef T1 Head;
+ typedef Types41<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+ T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
+ T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42> Tail;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15,
+ typename T16, typename T17, typename T18, typename T19, typename T20,
+ typename T21, typename T22, typename T23, typename T24, typename T25,
+ typename T26, typename T27, typename T28, typename T29, typename T30,
+ typename T31, typename T32, typename T33, typename T34, typename T35,
+ typename T36, typename T37, typename T38, typename T39, typename T40,
+ typename T41, typename T42, typename T43>
+struct Types43 {
+ typedef T1 Head;
+ typedef Types42<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+ T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
+ T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42,
+ T43> Tail;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15,
+ typename T16, typename T17, typename T18, typename T19, typename T20,
+ typename T21, typename T22, typename T23, typename T24, typename T25,
+ typename T26, typename T27, typename T28, typename T29, typename T30,
+ typename T31, typename T32, typename T33, typename T34, typename T35,
+ typename T36, typename T37, typename T38, typename T39, typename T40,
+ typename T41, typename T42, typename T43, typename T44>
+struct Types44 {
+ typedef T1 Head;
+ typedef Types43<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+ T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
+ T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
+ T44> Tail;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15,
+ typename T16, typename T17, typename T18, typename T19, typename T20,
+ typename T21, typename T22, typename T23, typename T24, typename T25,
+ typename T26, typename T27, typename T28, typename T29, typename T30,
+ typename T31, typename T32, typename T33, typename T34, typename T35,
+ typename T36, typename T37, typename T38, typename T39, typename T40,
+ typename T41, typename T42, typename T43, typename T44, typename T45>
+struct Types45 {
+ typedef T1 Head;
+ typedef Types44<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+ T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
+ T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
+ T44, T45> Tail;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15,
+ typename T16, typename T17, typename T18, typename T19, typename T20,
+ typename T21, typename T22, typename T23, typename T24, typename T25,
+ typename T26, typename T27, typename T28, typename T29, typename T30,
+ typename T31, typename T32, typename T33, typename T34, typename T35,
+ typename T36, typename T37, typename T38, typename T39, typename T40,
+ typename T41, typename T42, typename T43, typename T44, typename T45,
+ typename T46>
+struct Types46 {
+ typedef T1 Head;
+ typedef Types45<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+ T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
+ T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
+ T44, T45, T46> Tail;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15,
+ typename T16, typename T17, typename T18, typename T19, typename T20,
+ typename T21, typename T22, typename T23, typename T24, typename T25,
+ typename T26, typename T27, typename T28, typename T29, typename T30,
+ typename T31, typename T32, typename T33, typename T34, typename T35,
+ typename T36, typename T37, typename T38, typename T39, typename T40,
+ typename T41, typename T42, typename T43, typename T44, typename T45,
+ typename T46, typename T47>
+struct Types47 {
+ typedef T1 Head;
+ typedef Types46<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+ T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
+ T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
+ T44, T45, T46, T47> Tail;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15,
+ typename T16, typename T17, typename T18, typename T19, typename T20,
+ typename T21, typename T22, typename T23, typename T24, typename T25,
+ typename T26, typename T27, typename T28, typename T29, typename T30,
+ typename T31, typename T32, typename T33, typename T34, typename T35,
+ typename T36, typename T37, typename T38, typename T39, typename T40,
+ typename T41, typename T42, typename T43, typename T44, typename T45,
+ typename T46, typename T47, typename T48>
+struct Types48 {
+ typedef T1 Head;
+ typedef Types47<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+ T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
+ T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
+ T44, T45, T46, T47, T48> Tail;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15,
+ typename T16, typename T17, typename T18, typename T19, typename T20,
+ typename T21, typename T22, typename T23, typename T24, typename T25,
+ typename T26, typename T27, typename T28, typename T29, typename T30,
+ typename T31, typename T32, typename T33, typename T34, typename T35,
+ typename T36, typename T37, typename T38, typename T39, typename T40,
+ typename T41, typename T42, typename T43, typename T44, typename T45,
+ typename T46, typename T47, typename T48, typename T49>
+struct Types49 {
+ typedef T1 Head;
+ typedef Types48<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+ T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
+ T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
+ T44, T45, T46, T47, T48, T49> Tail;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15,
+ typename T16, typename T17, typename T18, typename T19, typename T20,
+ typename T21, typename T22, typename T23, typename T24, typename T25,
+ typename T26, typename T27, typename T28, typename T29, typename T30,
+ typename T31, typename T32, typename T33, typename T34, typename T35,
+ typename T36, typename T37, typename T38, typename T39, typename T40,
+ typename T41, typename T42, typename T43, typename T44, typename T45,
+ typename T46, typename T47, typename T48, typename T49, typename T50>
+struct Types50 {
+ typedef T1 Head;
+ typedef Types49<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+ T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
+ T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
+ T44, T45, T46, T47, T48, T49, T50> Tail;
+};
+
+
+} // namespace internal
+
+// We don't want to require the users to write TypesN<...> directly,
+// as that would require them to count the length. Types<...> is much
+// easier to write, but generates horrible messages when there is a
+// compiler error, as gcc insists on printing out each template
+// argument, even if it has the default value (this means Types<int>
+// will appear as Types<int, None, None, ..., None> in the compiler
+// errors).
+//
+// Our solution is to combine the best part of the two approaches: a
+// user would write Types<T1, ..., TN>, and Google Test will translate
+// that to TypesN<T1, ..., TN> internally to make error messages
+// readable. The translation is done by the 'type' member of the
+// Types template.
+template <typename T1 = internal::None, typename T2 = internal::None,
+ typename T3 = internal::None, typename T4 = internal::None,
+ typename T5 = internal::None, typename T6 = internal::None,
+ typename T7 = internal::None, typename T8 = internal::None,
+ typename T9 = internal::None, typename T10 = internal::None,
+ typename T11 = internal::None, typename T12 = internal::None,
+ typename T13 = internal::None, typename T14 = internal::None,
+ typename T15 = internal::None, typename T16 = internal::None,
+ typename T17 = internal::None, typename T18 = internal::None,
+ typename T19 = internal::None, typename T20 = internal::None,
+ typename T21 = internal::None, typename T22 = internal::None,
+ typename T23 = internal::None, typename T24 = internal::None,
+ typename T25 = internal::None, typename T26 = internal::None,
+ typename T27 = internal::None, typename T28 = internal::None,
+ typename T29 = internal::None, typename T30 = internal::None,
+ typename T31 = internal::None, typename T32 = internal::None,
+ typename T33 = internal::None, typename T34 = internal::None,
+ typename T35 = internal::None, typename T36 = internal::None,
+ typename T37 = internal::None, typename T38 = internal::None,
+ typename T39 = internal::None, typename T40 = internal::None,
+ typename T41 = internal::None, typename T42 = internal::None,
+ typename T43 = internal::None, typename T44 = internal::None,
+ typename T45 = internal::None, typename T46 = internal::None,
+ typename T47 = internal::None, typename T48 = internal::None,
+ typename T49 = internal::None, typename T50 = internal::None>
+struct Types {
+ typedef internal::Types50<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
+ T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
+ T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40,
+ T41, T42, T43, T44, T45, T46, T47, T48, T49, T50> type;
+};
+
+template <>
+struct Types<internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None> {
+ typedef internal::Types0 type;
+};
+template <typename T1>
+struct Types<T1, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None> {
+ typedef internal::Types1<T1> type;
+};
+template <typename T1, typename T2>
+struct Types<T1, T2, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None> {
+ typedef internal::Types2<T1, T2> type;
+};
+template <typename T1, typename T2, typename T3>
+struct Types<T1, T2, T3, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None> {
+ typedef internal::Types3<T1, T2, T3> type;
+};
+template <typename T1, typename T2, typename T3, typename T4>
+struct Types<T1, T2, T3, T4, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None> {
+ typedef internal::Types4<T1, T2, T3, T4> type;
+};
+template <typename T1, typename T2, typename T3, typename T4, typename T5>
+struct Types<T1, T2, T3, T4, T5, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None> {
+ typedef internal::Types5<T1, T2, T3, T4, T5> type;
+};
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6>
+struct Types<T1, T2, T3, T4, T5, T6, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None> {
+ typedef internal::Types6<T1, T2, T3, T4, T5, T6> type;
+};
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7>
+struct Types<T1, T2, T3, T4, T5, T6, T7, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None> {
+ typedef internal::Types7<T1, T2, T3, T4, T5, T6, T7> type;
+};
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8>
+struct Types<T1, T2, T3, T4, T5, T6, T7, T8, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None> {
+ typedef internal::Types8<T1, T2, T3, T4, T5, T6, T7, T8> type;
+};
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9>
+struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None> {
+ typedef internal::Types9<T1, T2, T3, T4, T5, T6, T7, T8, T9> type;
+};
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10>
+struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None> {
+ typedef internal::Types10<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10> type;
+};
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11>
+struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None> {
+ typedef internal::Types11<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11> type;
+};
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12>
+struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None> {
+ typedef internal::Types12<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
+ T12> type;
+};
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13>
+struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None> {
+ typedef internal::Types13<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
+ T13> type;
+};
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14>
+struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None> {
+ typedef internal::Types14<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
+ T13, T14> type;
+};
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15>
+struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None> {
+ typedef internal::Types15<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
+ T13, T14, T15> type;
+};
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15,
+ typename T16>
+struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+ T16, internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None> {
+ typedef internal::Types16<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
+ T13, T14, T15, T16> type;
+};
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15,
+ typename T16, typename T17>
+struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+ T16, T17, internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None> {
+ typedef internal::Types17<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
+ T13, T14, T15, T16, T17> type;
+};
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15,
+ typename T16, typename T17, typename T18>
+struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+ T16, T17, T18, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None> {
+ typedef internal::Types18<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
+ T13, T14, T15, T16, T17, T18> type;
+};
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15,
+ typename T16, typename T17, typename T18, typename T19>
+struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+ T16, T17, T18, T19, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None> {
+ typedef internal::Types19<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
+ T13, T14, T15, T16, T17, T18, T19> type;
+};
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15,
+ typename T16, typename T17, typename T18, typename T19, typename T20>
+struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+ T16, T17, T18, T19, T20, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None> {
+ typedef internal::Types20<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
+ T13, T14, T15, T16, T17, T18, T19, T20> type;
+};
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15,
+ typename T16, typename T17, typename T18, typename T19, typename T20,
+ typename T21>
+struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+ T16, T17, T18, T19, T20, T21, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None> {
+ typedef internal::Types21<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
+ T13, T14, T15, T16, T17, T18, T19, T20, T21> type;
+};
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15,
+ typename T16, typename T17, typename T18, typename T19, typename T20,
+ typename T21, typename T22>
+struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+ T16, T17, T18, T19, T20, T21, T22, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None> {
+ typedef internal::Types22<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
+ T13, T14, T15, T16, T17, T18, T19, T20, T21, T22> type;
+};
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15,
+ typename T16, typename T17, typename T18, typename T19, typename T20,
+ typename T21, typename T22, typename T23>
+struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+ T16, T17, T18, T19, T20, T21, T22, T23, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None> {
+ typedef internal::Types23<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
+ T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23> type;
+};
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15,
+ typename T16, typename T17, typename T18, typename T19, typename T20,
+ typename T21, typename T22, typename T23, typename T24>
+struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+ T16, T17, T18, T19, T20, T21, T22, T23, T24, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None> {
+ typedef internal::Types24<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
+ T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24> type;
+};
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15,
+ typename T16, typename T17, typename T18, typename T19, typename T20,
+ typename T21, typename T22, typename T23, typename T24, typename T25>
+struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+ T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None> {
+ typedef internal::Types25<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
+ T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25> type;
+};
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15,
+ typename T16, typename T17, typename T18, typename T19, typename T20,
+ typename T21, typename T22, typename T23, typename T24, typename T25,
+ typename T26>
+struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+ T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None> {
+ typedef internal::Types26<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
+ T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
+ T26> type;
+};
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15,
+ typename T16, typename T17, typename T18, typename T19, typename T20,
+ typename T21, typename T22, typename T23, typename T24, typename T25,
+ typename T26, typename T27>
+struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+ T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None> {
+ typedef internal::Types27<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
+ T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
+ T27> type;
+};
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15,
+ typename T16, typename T17, typename T18, typename T19, typename T20,
+ typename T21, typename T22, typename T23, typename T24, typename T25,
+ typename T26, typename T27, typename T28>
+struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+ T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None> {
+ typedef internal::Types28<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
+ T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
+ T27, T28> type;
+};
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15,
+ typename T16, typename T17, typename T18, typename T19, typename T20,
+ typename T21, typename T22, typename T23, typename T24, typename T25,
+ typename T26, typename T27, typename T28, typename T29>
+struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+ T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None> {
+ typedef internal::Types29<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
+ T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
+ T27, T28, T29> type;
+};
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15,
+ typename T16, typename T17, typename T18, typename T19, typename T20,
+ typename T21, typename T22, typename T23, typename T24, typename T25,
+ typename T26, typename T27, typename T28, typename T29, typename T30>
+struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+ T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None> {
+ typedef internal::Types30<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
+ T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
+ T27, T28, T29, T30> type;
+};
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15,
+ typename T16, typename T17, typename T18, typename T19, typename T20,
+ typename T21, typename T22, typename T23, typename T24, typename T25,
+ typename T26, typename T27, typename T28, typename T29, typename T30,
+ typename T31>
+struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+ T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
+ T31, internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None> {
+ typedef internal::Types31<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
+ T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
+ T27, T28, T29, T30, T31> type;
+};
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15,
+ typename T16, typename T17, typename T18, typename T19, typename T20,
+ typename T21, typename T22, typename T23, typename T24, typename T25,
+ typename T26, typename T27, typename T28, typename T29, typename T30,
+ typename T31, typename T32>
+struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+ T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
+ T31, T32, internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None> {
+ typedef internal::Types32<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
+ T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
+ T27, T28, T29, T30, T31, T32> type;
+};
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15,
+ typename T16, typename T17, typename T18, typename T19, typename T20,
+ typename T21, typename T22, typename T23, typename T24, typename T25,
+ typename T26, typename T27, typename T28, typename T29, typename T30,
+ typename T31, typename T32, typename T33>
+struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+ T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
+ T31, T32, T33, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None> {
+ typedef internal::Types33<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
+ T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
+ T27, T28, T29, T30, T31, T32, T33> type;
+};
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15,
+ typename T16, typename T17, typename T18, typename T19, typename T20,
+ typename T21, typename T22, typename T23, typename T24, typename T25,
+ typename T26, typename T27, typename T28, typename T29, typename T30,
+ typename T31, typename T32, typename T33, typename T34>
+struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+ T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
+ T31, T32, T33, T34, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None> {
+ typedef internal::Types34<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
+ T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
+ T27, T28, T29, T30, T31, T32, T33, T34> type;
+};
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15,
+ typename T16, typename T17, typename T18, typename T19, typename T20,
+ typename T21, typename T22, typename T23, typename T24, typename T25,
+ typename T26, typename T27, typename T28, typename T29, typename T30,
+ typename T31, typename T32, typename T33, typename T34, typename T35>
+struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+ T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
+ T31, T32, T33, T34, T35, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None> {
+ typedef internal::Types35<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
+ T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
+ T27, T28, T29, T30, T31, T32, T33, T34, T35> type;
+};
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15,
+ typename T16, typename T17, typename T18, typename T19, typename T20,
+ typename T21, typename T22, typename T23, typename T24, typename T25,
+ typename T26, typename T27, typename T28, typename T29, typename T30,
+ typename T31, typename T32, typename T33, typename T34, typename T35,
+ typename T36>
+struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+ T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
+ T31, T32, T33, T34, T35, T36, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None> {
+ typedef internal::Types36<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
+ T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
+ T27, T28, T29, T30, T31, T32, T33, T34, T35, T36> type;
+};
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15,
+ typename T16, typename T17, typename T18, typename T19, typename T20,
+ typename T21, typename T22, typename T23, typename T24, typename T25,
+ typename T26, typename T27, typename T28, typename T29, typename T30,
+ typename T31, typename T32, typename T33, typename T34, typename T35,
+ typename T36, typename T37>
+struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+ T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
+ T31, T32, T33, T34, T35, T36, T37, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None> {
+ typedef internal::Types37<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
+ T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
+ T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37> type;
+};
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15,
+ typename T16, typename T17, typename T18, typename T19, typename T20,
+ typename T21, typename T22, typename T23, typename T24, typename T25,
+ typename T26, typename T27, typename T28, typename T29, typename T30,
+ typename T31, typename T32, typename T33, typename T34, typename T35,
+ typename T36, typename T37, typename T38>
+struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+ T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
+ T31, T32, T33, T34, T35, T36, T37, T38, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None> {
+ typedef internal::Types38<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
+ T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
+ T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38> type;
+};
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15,
+ typename T16, typename T17, typename T18, typename T19, typename T20,
+ typename T21, typename T22, typename T23, typename T24, typename T25,
+ typename T26, typename T27, typename T28, typename T29, typename T30,
+ typename T31, typename T32, typename T33, typename T34, typename T35,
+ typename T36, typename T37, typename T38, typename T39>
+struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+ T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
+ T31, T32, T33, T34, T35, T36, T37, T38, T39, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None> {
+ typedef internal::Types39<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
+ T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
+ T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39> type;
+};
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15,
+ typename T16, typename T17, typename T18, typename T19, typename T20,
+ typename T21, typename T22, typename T23, typename T24, typename T25,
+ typename T26, typename T27, typename T28, typename T29, typename T30,
+ typename T31, typename T32, typename T33, typename T34, typename T35,
+ typename T36, typename T37, typename T38, typename T39, typename T40>
+struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+ T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
+ T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None> {
+ typedef internal::Types40<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
+ T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
+ T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39,
+ T40> type;
+};
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15,
+ typename T16, typename T17, typename T18, typename T19, typename T20,
+ typename T21, typename T22, typename T23, typename T24, typename T25,
+ typename T26, typename T27, typename T28, typename T29, typename T30,
+ typename T31, typename T32, typename T33, typename T34, typename T35,
+ typename T36, typename T37, typename T38, typename T39, typename T40,
+ typename T41>
+struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+ T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
+ T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None, internal::None> {
+ typedef internal::Types41<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
+ T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
+ T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40,
+ T41> type;
+};
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15,
+ typename T16, typename T17, typename T18, typename T19, typename T20,
+ typename T21, typename T22, typename T23, typename T24, typename T25,
+ typename T26, typename T27, typename T28, typename T29, typename T30,
+ typename T31, typename T32, typename T33, typename T34, typename T35,
+ typename T36, typename T37, typename T38, typename T39, typename T40,
+ typename T41, typename T42>
+struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+ T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
+ T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, internal::None,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None> {
+ typedef internal::Types42<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
+ T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
+ T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40,
+ T41, T42> type;
+};
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15,
+ typename T16, typename T17, typename T18, typename T19, typename T20,
+ typename T21, typename T22, typename T23, typename T24, typename T25,
+ typename T26, typename T27, typename T28, typename T29, typename T30,
+ typename T31, typename T32, typename T33, typename T34, typename T35,
+ typename T36, typename T37, typename T38, typename T39, typename T40,
+ typename T41, typename T42, typename T43>
+struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+ T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
+ T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None, internal::None> {
+ typedef internal::Types43<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
+ T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
+ T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40,
+ T41, T42, T43> type;
+};
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15,
+ typename T16, typename T17, typename T18, typename T19, typename T20,
+ typename T21, typename T22, typename T23, typename T24, typename T25,
+ typename T26, typename T27, typename T28, typename T29, typename T30,
+ typename T31, typename T32, typename T33, typename T34, typename T35,
+ typename T36, typename T37, typename T38, typename T39, typename T40,
+ typename T41, typename T42, typename T43, typename T44>
+struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+ T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
+ T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43, T44,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None, internal::None> {
+ typedef internal::Types44<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
+ T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
+ T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40,
+ T41, T42, T43, T44> type;
+};
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15,
+ typename T16, typename T17, typename T18, typename T19, typename T20,
+ typename T21, typename T22, typename T23, typename T24, typename T25,
+ typename T26, typename T27, typename T28, typename T29, typename T30,
+ typename T31, typename T32, typename T33, typename T34, typename T35,
+ typename T36, typename T37, typename T38, typename T39, typename T40,
+ typename T41, typename T42, typename T43, typename T44, typename T45>
+struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+ T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
+ T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43, T44, T45,
+ internal::None, internal::None, internal::None, internal::None,
+ internal::None> {
+ typedef internal::Types45<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
+ T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
+ T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40,
+ T41, T42, T43, T44, T45> type;
+};
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15,
+ typename T16, typename T17, typename T18, typename T19, typename T20,
+ typename T21, typename T22, typename T23, typename T24, typename T25,
+ typename T26, typename T27, typename T28, typename T29, typename T30,
+ typename T31, typename T32, typename T33, typename T34, typename T35,
+ typename T36, typename T37, typename T38, typename T39, typename T40,
+ typename T41, typename T42, typename T43, typename T44, typename T45,
+ typename T46>
+struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+ T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
+ T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43, T44, T45,
+ T46, internal::None, internal::None, internal::None, internal::None> {
+ typedef internal::Types46<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
+ T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
+ T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40,
+ T41, T42, T43, T44, T45, T46> type;
+};
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15,
+ typename T16, typename T17, typename T18, typename T19, typename T20,
+ typename T21, typename T22, typename T23, typename T24, typename T25,
+ typename T26, typename T27, typename T28, typename T29, typename T30,
+ typename T31, typename T32, typename T33, typename T34, typename T35,
+ typename T36, typename T37, typename T38, typename T39, typename T40,
+ typename T41, typename T42, typename T43, typename T44, typename T45,
+ typename T46, typename T47>
+struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+ T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
+ T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43, T44, T45,
+ T46, T47, internal::None, internal::None, internal::None> {
+ typedef internal::Types47<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
+ T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
+ T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40,
+ T41, T42, T43, T44, T45, T46, T47> type;
+};
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15,
+ typename T16, typename T17, typename T18, typename T19, typename T20,
+ typename T21, typename T22, typename T23, typename T24, typename T25,
+ typename T26, typename T27, typename T28, typename T29, typename T30,
+ typename T31, typename T32, typename T33, typename T34, typename T35,
+ typename T36, typename T37, typename T38, typename T39, typename T40,
+ typename T41, typename T42, typename T43, typename T44, typename T45,
+ typename T46, typename T47, typename T48>
+struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+ T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
+ T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43, T44, T45,
+ T46, T47, T48, internal::None, internal::None> {
+ typedef internal::Types48<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
+ T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
+ T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40,
+ T41, T42, T43, T44, T45, T46, T47, T48> type;
+};
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15,
+ typename T16, typename T17, typename T18, typename T19, typename T20,
+ typename T21, typename T22, typename T23, typename T24, typename T25,
+ typename T26, typename T27, typename T28, typename T29, typename T30,
+ typename T31, typename T32, typename T33, typename T34, typename T35,
+ typename T36, typename T37, typename T38, typename T39, typename T40,
+ typename T41, typename T42, typename T43, typename T44, typename T45,
+ typename T46, typename T47, typename T48, typename T49>
+struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+ T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
+ T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43, T44, T45,
+ T46, T47, T48, T49, internal::None> {
+ typedef internal::Types49<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
+ T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
+ T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40,
+ T41, T42, T43, T44, T45, T46, T47, T48, T49> type;
+};
+
+namespace internal {
+
+# define GTEST_TEMPLATE_ template <typename T> class
+
+// The template "selector" struct TemplateSel<Tmpl> is used to
+// represent Tmpl, which must be a class template with one type
+// parameter, as a type. TemplateSel<Tmpl>::Bind<T>::type is defined
+// as the type Tmpl<T>. This allows us to actually instantiate the
+// template "selected" by TemplateSel<Tmpl>.
+//
+// This trick is necessary for simulating typedef for class templates,
+// which C++ doesn't support directly.
+template <GTEST_TEMPLATE_ Tmpl>
+struct TemplateSel {
+ template <typename T>
+ struct Bind {
+ typedef Tmpl<T> type;
+ };
+};
+
+# define GTEST_BIND_(TmplSel, T) \
+ TmplSel::template Bind<T>::type
+
+// A unique struct template used as the default value for the
+// arguments of class template Templates. This allows us to simulate
+// variadic templates (e.g. Templates<int>, Templates<int, double>,
+// and etc), which C++ doesn't support directly.
+template <typename T>
+struct NoneT {};
+
+// The following family of struct and struct templates are used to
+// represent template lists. In particular, TemplatesN<T1, T2, ...,
+// TN> represents a list of N templates (T1, T2, ..., and TN). Except
+// for Templates0, every struct in the family has two member types:
+// Head for the selector of the first template in the list, and Tail
+// for the rest of the list.
+
+// The empty template list.
+struct Templates0 {};
+
+// Template lists of length 1, 2, 3, and so on.
+
+template <GTEST_TEMPLATE_ T1>
+struct Templates1 {
+ typedef TemplateSel<T1> Head;
+ typedef Templates0 Tail;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2>
+struct Templates2 {
+ typedef TemplateSel<T1> Head;
+ typedef Templates1<T2> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3>
+struct Templates3 {
+ typedef TemplateSel<T1> Head;
+ typedef Templates2<T2, T3> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+ GTEST_TEMPLATE_ T4>
+struct Templates4 {
+ typedef TemplateSel<T1> Head;
+ typedef Templates3<T2, T3, T4> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+ GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5>
+struct Templates5 {
+ typedef TemplateSel<T1> Head;
+ typedef Templates4<T2, T3, T4, T5> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+ GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6>
+struct Templates6 {
+ typedef TemplateSel<T1> Head;
+ typedef Templates5<T2, T3, T4, T5, T6> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+ GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+ GTEST_TEMPLATE_ T7>
+struct Templates7 {
+ typedef TemplateSel<T1> Head;
+ typedef Templates6<T2, T3, T4, T5, T6, T7> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+ GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+ GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8>
+struct Templates8 {
+ typedef TemplateSel<T1> Head;
+ typedef Templates7<T2, T3, T4, T5, T6, T7, T8> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+ GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+ GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9>
+struct Templates9 {
+ typedef TemplateSel<T1> Head;
+ typedef Templates8<T2, T3, T4, T5, T6, T7, T8, T9> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+ GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+ GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+ GTEST_TEMPLATE_ T10>
+struct Templates10 {
+ typedef TemplateSel<T1> Head;
+ typedef Templates9<T2, T3, T4, T5, T6, T7, T8, T9, T10> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+ GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+ GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+ GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11>
+struct Templates11 {
+ typedef TemplateSel<T1> Head;
+ typedef Templates10<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+ GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+ GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+ GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12>
+struct Templates12 {
+ typedef TemplateSel<T1> Head;
+ typedef Templates11<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+ GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+ GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+ GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+ GTEST_TEMPLATE_ T13>
+struct Templates13 {
+ typedef TemplateSel<T1> Head;
+ typedef Templates12<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+ GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+ GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+ GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+ GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14>
+struct Templates14 {
+ typedef TemplateSel<T1> Head;
+ typedef Templates13<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+ T14> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+ GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+ GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+ GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+ GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15>
+struct Templates15 {
+ typedef TemplateSel<T1> Head;
+ typedef Templates14<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+ T15> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+ GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+ GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+ GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+ GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+ GTEST_TEMPLATE_ T16>
+struct Templates16 {
+ typedef TemplateSel<T1> Head;
+ typedef Templates15<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+ T15, T16> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+ GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+ GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+ GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+ GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+ GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17>
+struct Templates17 {
+ typedef TemplateSel<T1> Head;
+ typedef Templates16<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+ T15, T16, T17> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+ GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+ GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+ GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+ GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+ GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18>
+struct Templates18 {
+ typedef TemplateSel<T1> Head;
+ typedef Templates17<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+ T15, T16, T17, T18> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+ GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+ GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+ GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+ GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+ GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+ GTEST_TEMPLATE_ T19>
+struct Templates19 {
+ typedef TemplateSel<T1> Head;
+ typedef Templates18<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+ T15, T16, T17, T18, T19> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+ GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+ GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+ GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+ GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+ GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+ GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20>
+struct Templates20 {
+ typedef TemplateSel<T1> Head;
+ typedef Templates19<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+ T15, T16, T17, T18, T19, T20> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+ GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+ GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+ GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+ GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+ GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+ GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21>
+struct Templates21 {
+ typedef TemplateSel<T1> Head;
+ typedef Templates20<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+ T15, T16, T17, T18, T19, T20, T21> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+ GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+ GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+ GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+ GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+ GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+ GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+ GTEST_TEMPLATE_ T22>
+struct Templates22 {
+ typedef TemplateSel<T1> Head;
+ typedef Templates21<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+ T15, T16, T17, T18, T19, T20, T21, T22> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+ GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+ GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+ GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+ GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+ GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+ GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+ GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23>
+struct Templates23 {
+ typedef TemplateSel<T1> Head;
+ typedef Templates22<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+ T15, T16, T17, T18, T19, T20, T21, T22, T23> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+ GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+ GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+ GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+ GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+ GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+ GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+ GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24>
+struct Templates24 {
+ typedef TemplateSel<T1> Head;
+ typedef Templates23<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+ T15, T16, T17, T18, T19, T20, T21, T22, T23, T24> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+ GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+ GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+ GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+ GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+ GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+ GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+ GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+ GTEST_TEMPLATE_ T25>
+struct Templates25 {
+ typedef TemplateSel<T1> Head;
+ typedef Templates24<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+ T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+ GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+ GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+ GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+ GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+ GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+ GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+ GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+ GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26>
+struct Templates26 {
+ typedef TemplateSel<T1> Head;
+ typedef Templates25<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+ T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+ GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+ GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+ GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+ GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+ GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+ GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+ GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+ GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27>
+struct Templates27 {
+ typedef TemplateSel<T1> Head;
+ typedef Templates26<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+ T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+ GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+ GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+ GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+ GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+ GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+ GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+ GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+ GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
+ GTEST_TEMPLATE_ T28>
+struct Templates28 {
+ typedef TemplateSel<T1> Head;
+ typedef Templates27<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+ T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
+ T28> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+ GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+ GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+ GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+ GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+ GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+ GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+ GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+ GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
+ GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29>
+struct Templates29 {
+ typedef TemplateSel<T1> Head;
+ typedef Templates28<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+ T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+ T29> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+ GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+ GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+ GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+ GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+ GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+ GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+ GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+ GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
+ GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30>
+struct Templates30 {
+ typedef TemplateSel<T1> Head;
+ typedef Templates29<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+ T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+ T29, T30> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+ GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+ GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+ GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+ GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+ GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+ GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+ GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+ GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
+ GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
+ GTEST_TEMPLATE_ T31>
+struct Templates31 {
+ typedef TemplateSel<T1> Head;
+ typedef Templates30<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+ T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+ T29, T30, T31> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+ GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+ GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+ GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+ GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+ GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+ GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+ GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+ GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
+ GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
+ GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32>
+struct Templates32 {
+ typedef TemplateSel<T1> Head;
+ typedef Templates31<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+ T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+ T29, T30, T31, T32> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+ GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+ GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+ GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+ GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+ GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+ GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+ GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+ GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
+ GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
+ GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33>
+struct Templates33 {
+ typedef TemplateSel<T1> Head;
+ typedef Templates32<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+ T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+ T29, T30, T31, T32, T33> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+ GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+ GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+ GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+ GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+ GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+ GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+ GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+ GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
+ GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
+ GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
+ GTEST_TEMPLATE_ T34>
+struct Templates34 {
+ typedef TemplateSel<T1> Head;
+ typedef Templates33<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+ T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+ T29, T30, T31, T32, T33, T34> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+ GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+ GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+ GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+ GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+ GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+ GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+ GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+ GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
+ GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
+ GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
+ GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35>
+struct Templates35 {
+ typedef TemplateSel<T1> Head;
+ typedef Templates34<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+ T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+ T29, T30, T31, T32, T33, T34, T35> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+ GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+ GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+ GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+ GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+ GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+ GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+ GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+ GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
+ GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
+ GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
+ GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36>
+struct Templates36 {
+ typedef TemplateSel<T1> Head;
+ typedef Templates35<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+ T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+ T29, T30, T31, T32, T33, T34, T35, T36> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+ GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+ GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+ GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+ GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+ GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+ GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+ GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+ GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
+ GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
+ GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
+ GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
+ GTEST_TEMPLATE_ T37>
+struct Templates37 {
+ typedef TemplateSel<T1> Head;
+ typedef Templates36<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+ T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+ T29, T30, T31, T32, T33, T34, T35, T36, T37> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+ GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+ GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+ GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+ GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+ GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+ GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+ GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+ GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
+ GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
+ GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
+ GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
+ GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38>
+struct Templates38 {
+ typedef TemplateSel<T1> Head;
+ typedef Templates37<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+ T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+ T29, T30, T31, T32, T33, T34, T35, T36, T37, T38> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+ GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+ GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+ GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+ GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+ GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+ GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+ GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+ GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
+ GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
+ GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
+ GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
+ GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39>
+struct Templates39 {
+ typedef TemplateSel<T1> Head;
+ typedef Templates38<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+ T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+ T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+ GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+ GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+ GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+ GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+ GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+ GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+ GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+ GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
+ GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
+ GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
+ GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
+ GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
+ GTEST_TEMPLATE_ T40>
+struct Templates40 {
+ typedef TemplateSel<T1> Head;
+ typedef Templates39<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+ T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+ T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+ GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+ GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+ GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+ GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+ GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+ GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+ GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+ GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
+ GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
+ GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
+ GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
+ GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
+ GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41>
+struct Templates41 {
+ typedef TemplateSel<T1> Head;
+ typedef Templates40<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+ T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+ T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+ GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+ GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+ GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+ GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+ GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+ GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+ GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+ GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
+ GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
+ GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
+ GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
+ GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
+ GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42>
+struct Templates42 {
+ typedef TemplateSel<T1> Head;
+ typedef Templates41<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+ T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+ T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41,
+ T42> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+ GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+ GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+ GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+ GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+ GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+ GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+ GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+ GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
+ GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
+ GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
+ GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
+ GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
+ GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42,
+ GTEST_TEMPLATE_ T43>
+struct Templates43 {
+ typedef TemplateSel<T1> Head;
+ typedef Templates42<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+ T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+ T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42,
+ T43> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+ GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+ GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+ GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+ GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+ GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+ GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+ GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+ GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
+ GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
+ GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
+ GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
+ GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
+ GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42,
+ GTEST_TEMPLATE_ T43, GTEST_TEMPLATE_ T44>
+struct Templates44 {
+ typedef TemplateSel<T1> Head;
+ typedef Templates43<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+ T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+ T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42,
+ T43, T44> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+ GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+ GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+ GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+ GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+ GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+ GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+ GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+ GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
+ GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
+ GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
+ GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
+ GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
+ GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42,
+ GTEST_TEMPLATE_ T43, GTEST_TEMPLATE_ T44, GTEST_TEMPLATE_ T45>
+struct Templates45 {
+ typedef TemplateSel<T1> Head;
+ typedef Templates44<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+ T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+ T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42,
+ T43, T44, T45> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+ GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+ GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+ GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+ GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+ GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+ GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+ GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+ GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
+ GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
+ GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
+ GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
+ GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
+ GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42,
+ GTEST_TEMPLATE_ T43, GTEST_TEMPLATE_ T44, GTEST_TEMPLATE_ T45,
+ GTEST_TEMPLATE_ T46>
+struct Templates46 {
+ typedef TemplateSel<T1> Head;
+ typedef Templates45<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+ T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+ T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42,
+ T43, T44, T45, T46> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+ GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+ GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+ GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+ GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+ GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+ GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+ GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+ GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
+ GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
+ GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
+ GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
+ GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
+ GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42,
+ GTEST_TEMPLATE_ T43, GTEST_TEMPLATE_ T44, GTEST_TEMPLATE_ T45,
+ GTEST_TEMPLATE_ T46, GTEST_TEMPLATE_ T47>
+struct Templates47 {
+ typedef TemplateSel<T1> Head;
+ typedef Templates46<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+ T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+ T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42,
+ T43, T44, T45, T46, T47> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+ GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+ GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+ GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+ GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+ GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+ GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+ GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+ GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
+ GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
+ GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
+ GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
+ GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
+ GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42,
+ GTEST_TEMPLATE_ T43, GTEST_TEMPLATE_ T44, GTEST_TEMPLATE_ T45,
+ GTEST_TEMPLATE_ T46, GTEST_TEMPLATE_ T47, GTEST_TEMPLATE_ T48>
+struct Templates48 {
+ typedef TemplateSel<T1> Head;
+ typedef Templates47<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+ T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+ T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42,
+ T43, T44, T45, T46, T47, T48> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+ GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+ GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+ GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+ GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+ GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+ GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+ GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+ GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
+ GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
+ GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
+ GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
+ GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
+ GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42,
+ GTEST_TEMPLATE_ T43, GTEST_TEMPLATE_ T44, GTEST_TEMPLATE_ T45,
+ GTEST_TEMPLATE_ T46, GTEST_TEMPLATE_ T47, GTEST_TEMPLATE_ T48,
+ GTEST_TEMPLATE_ T49>
+struct Templates49 {
+ typedef TemplateSel<T1> Head;
+ typedef Templates48<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+ T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+ T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42,
+ T43, T44, T45, T46, T47, T48, T49> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+ GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+ GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+ GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+ GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+ GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+ GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+ GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+ GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
+ GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
+ GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
+ GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
+ GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
+ GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42,
+ GTEST_TEMPLATE_ T43, GTEST_TEMPLATE_ T44, GTEST_TEMPLATE_ T45,
+ GTEST_TEMPLATE_ T46, GTEST_TEMPLATE_ T47, GTEST_TEMPLATE_ T48,
+ GTEST_TEMPLATE_ T49, GTEST_TEMPLATE_ T50>
+struct Templates50 {
+ typedef TemplateSel<T1> Head;
+ typedef Templates49<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+ T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+ T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42,
+ T43, T44, T45, T46, T47, T48, T49, T50> Tail;
+};
+
+
+// We don't want to require the users to write TemplatesN<...> directly,
+// as that would require them to count the length. Templates<...> is much
+// easier to write, but generates horrible messages when there is a
+// compiler error, as gcc insists on printing out each template
+// argument, even if it has the default value (this means Templates<list>
+// will appear as Templates<list, NoneT, NoneT, ..., NoneT> in the compiler
+// errors).
+//
+// Our solution is to combine the best part of the two approaches: a
+// user would write Templates<T1, ..., TN>, and Google Test will translate
+// that to TemplatesN<T1, ..., TN> internally to make error messages
+// readable. The translation is done by the 'type' member of the
+// Templates template.
+template <GTEST_TEMPLATE_ T1 = NoneT, GTEST_TEMPLATE_ T2 = NoneT,
+ GTEST_TEMPLATE_ T3 = NoneT, GTEST_TEMPLATE_ T4 = NoneT,
+ GTEST_TEMPLATE_ T5 = NoneT, GTEST_TEMPLATE_ T6 = NoneT,
+ GTEST_TEMPLATE_ T7 = NoneT, GTEST_TEMPLATE_ T8 = NoneT,
+ GTEST_TEMPLATE_ T9 = NoneT, GTEST_TEMPLATE_ T10 = NoneT,
+ GTEST_TEMPLATE_ T11 = NoneT, GTEST_TEMPLATE_ T12 = NoneT,
+ GTEST_TEMPLATE_ T13 = NoneT, GTEST_TEMPLATE_ T14 = NoneT,
+ GTEST_TEMPLATE_ T15 = NoneT, GTEST_TEMPLATE_ T16 = NoneT,
+ GTEST_TEMPLATE_ T17 = NoneT, GTEST_TEMPLATE_ T18 = NoneT,
+ GTEST_TEMPLATE_ T19 = NoneT, GTEST_TEMPLATE_ T20 = NoneT,
+ GTEST_TEMPLATE_ T21 = NoneT, GTEST_TEMPLATE_ T22 = NoneT,
+ GTEST_TEMPLATE_ T23 = NoneT, GTEST_TEMPLATE_ T24 = NoneT,
+ GTEST_TEMPLATE_ T25 = NoneT, GTEST_TEMPLATE_ T26 = NoneT,
+ GTEST_TEMPLATE_ T27 = NoneT, GTEST_TEMPLATE_ T28 = NoneT,
+ GTEST_TEMPLATE_ T29 = NoneT, GTEST_TEMPLATE_ T30 = NoneT,
+ GTEST_TEMPLATE_ T31 = NoneT, GTEST_TEMPLATE_ T32 = NoneT,
+ GTEST_TEMPLATE_ T33 = NoneT, GTEST_TEMPLATE_ T34 = NoneT,
+ GTEST_TEMPLATE_ T35 = NoneT, GTEST_TEMPLATE_ T36 = NoneT,
+ GTEST_TEMPLATE_ T37 = NoneT, GTEST_TEMPLATE_ T38 = NoneT,
+ GTEST_TEMPLATE_ T39 = NoneT, GTEST_TEMPLATE_ T40 = NoneT,
+ GTEST_TEMPLATE_ T41 = NoneT, GTEST_TEMPLATE_ T42 = NoneT,
+ GTEST_TEMPLATE_ T43 = NoneT, GTEST_TEMPLATE_ T44 = NoneT,
+ GTEST_TEMPLATE_ T45 = NoneT, GTEST_TEMPLATE_ T46 = NoneT,
+ GTEST_TEMPLATE_ T47 = NoneT, GTEST_TEMPLATE_ T48 = NoneT,
+ GTEST_TEMPLATE_ T49 = NoneT, GTEST_TEMPLATE_ T50 = NoneT>
+struct Templates {
+ typedef Templates50<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+ T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
+ T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41,
+ T42, T43, T44, T45, T46, T47, T48, T49, T50> type;
+};
+
+template <>
+struct Templates<NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+ NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+ NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+ NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+ NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+ NoneT> {
+ typedef Templates0 type;
+};
+template <GTEST_TEMPLATE_ T1>
+struct Templates<T1, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+ NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+ NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+ NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+ NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+ NoneT> {
+ typedef Templates1<T1> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2>
+struct Templates<T1, T2, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+ NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+ NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+ NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+ NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+ NoneT> {
+ typedef Templates2<T1, T2> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3>
+struct Templates<T1, T2, T3, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+ NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+ NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+ NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+ NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
+ typedef Templates3<T1, T2, T3> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+ GTEST_TEMPLATE_ T4>
+struct Templates<T1, T2, T3, T4, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+ NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+ NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+ NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+ NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
+ typedef Templates4<T1, T2, T3, T4> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+ GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5>
+struct Templates<T1, T2, T3, T4, T5, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+ NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+ NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+ NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+ NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
+ typedef Templates5<T1, T2, T3, T4, T5> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+ GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6>
+struct Templates<T1, T2, T3, T4, T5, T6, NoneT, NoneT, NoneT, NoneT, NoneT,
+ NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+ NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+ NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+ NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
+ typedef Templates6<T1, T2, T3, T4, T5, T6> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+ GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+ GTEST_TEMPLATE_ T7>
+struct Templates<T1, T2, T3, T4, T5, T6, T7, NoneT, NoneT, NoneT, NoneT, NoneT,
+ NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+ NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+ NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+ NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
+ typedef Templates7<T1, T2, T3, T4, T5, T6, T7> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+ GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+ GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8>
+struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, NoneT, NoneT, NoneT, NoneT,
+ NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+ NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+ NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+ NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
+ typedef Templates8<T1, T2, T3, T4, T5, T6, T7, T8> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+ GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+ GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9>
+struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, NoneT, NoneT, NoneT,
+ NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+ NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+ NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+ NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
+ typedef Templates9<T1, T2, T3, T4, T5, T6, T7, T8, T9> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+ GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+ GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+ GTEST_TEMPLATE_ T10>
+struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, NoneT, NoneT, NoneT,
+ NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+ NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+ NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+ NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
+ typedef Templates10<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+ GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+ GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+ GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11>
+struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, NoneT, NoneT,
+ NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+ NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+ NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+ NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
+ typedef Templates11<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+ GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+ GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+ GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12>
+struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, NoneT,
+ NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+ NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+ NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+ NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
+ typedef Templates12<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+ GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+ GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+ GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+ GTEST_TEMPLATE_ T13>
+struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, NoneT,
+ NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+ NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+ NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+ NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
+ typedef Templates13<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
+ T13> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+ GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+ GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+ GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+ GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14>
+struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+ NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+ NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+ NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+ NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
+ typedef Templates14<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+ T14> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+ GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+ GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+ GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+ GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15>
+struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+ T15, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+ NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+ NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+ NoneT, NoneT, NoneT, NoneT, NoneT> {
+ typedef Templates15<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+ T14, T15> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+ GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+ GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+ GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+ GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+ GTEST_TEMPLATE_ T16>
+struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+ T15, T16, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+ NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+ NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+ NoneT, NoneT, NoneT, NoneT, NoneT> {
+ typedef Templates16<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+ T14, T15, T16> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+ GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+ GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+ GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+ GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+ GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17>
+struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+ T15, T16, T17, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+ NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+ NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+ NoneT, NoneT, NoneT, NoneT, NoneT> {
+ typedef Templates17<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+ T14, T15, T16, T17> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+ GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+ GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+ GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+ GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+ GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18>
+struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+ T15, T16, T17, T18, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+ NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+ NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+ NoneT, NoneT, NoneT, NoneT> {
+ typedef Templates18<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+ T14, T15, T16, T17, T18> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+ GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+ GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+ GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+ GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+ GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+ GTEST_TEMPLATE_ T19>
+struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+ T15, T16, T17, T18, T19, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+ NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+ NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+ NoneT, NoneT, NoneT, NoneT> {
+ typedef Templates19<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+ T14, T15, T16, T17, T18, T19> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+ GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+ GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+ GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+ GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+ GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+ GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20>
+struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+ T15, T16, T17, T18, T19, T20, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+ NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+ NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+ NoneT, NoneT, NoneT, NoneT> {
+ typedef Templates20<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+ T14, T15, T16, T17, T18, T19, T20> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+ GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+ GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+ GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+ GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+ GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+ GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21>
+struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+ T15, T16, T17, T18, T19, T20, T21, NoneT, NoneT, NoneT, NoneT, NoneT,
+ NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+ NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+ NoneT, NoneT, NoneT, NoneT> {
+ typedef Templates21<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+ T14, T15, T16, T17, T18, T19, T20, T21> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+ GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+ GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+ GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+ GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+ GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+ GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+ GTEST_TEMPLATE_ T22>
+struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+ T15, T16, T17, T18, T19, T20, T21, T22, NoneT, NoneT, NoneT, NoneT, NoneT,
+ NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+ NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+ NoneT, NoneT, NoneT> {
+ typedef Templates22<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+ T14, T15, T16, T17, T18, T19, T20, T21, T22> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+ GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+ GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+ GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+ GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+ GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+ GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+ GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23>
+struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+ T15, T16, T17, T18, T19, T20, T21, T22, T23, NoneT, NoneT, NoneT, NoneT,
+ NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+ NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+ NoneT, NoneT, NoneT> {
+ typedef Templates23<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+ T14, T15, T16, T17, T18, T19, T20, T21, T22, T23> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+ GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+ GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+ GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+ GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+ GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+ GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+ GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24>
+struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+ T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, NoneT, NoneT, NoneT,
+ NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+ NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+ NoneT, NoneT, NoneT> {
+ typedef Templates24<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+ T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+ GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+ GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+ GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+ GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+ GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+ GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+ GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+ GTEST_TEMPLATE_ T25>
+struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+ T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, NoneT, NoneT, NoneT,
+ NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+ NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+ NoneT, NoneT> {
+ typedef Templates25<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+ T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+ GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+ GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+ GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+ GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+ GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+ GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+ GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+ GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26>
+struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+ T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, NoneT, NoneT,
+ NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+ NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+ NoneT, NoneT> {
+ typedef Templates26<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+ T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+ GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+ GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+ GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+ GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+ GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+ GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+ GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+ GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27>
+struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+ T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, NoneT,
+ NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+ NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+ NoneT, NoneT> {
+ typedef Templates27<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+ T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
+ T27> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+ GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+ GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+ GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+ GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+ GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+ GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+ GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+ GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
+ GTEST_TEMPLATE_ T28>
+struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+ T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+ NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+ NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+ NoneT, NoneT> {
+ typedef Templates28<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+ T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
+ T28> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+ GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+ GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+ GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+ GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+ GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+ GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+ GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+ GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
+ GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29>
+struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+ T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
+ NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+ NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+ NoneT> {
+ typedef Templates29<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+ T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
+ T28, T29> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+ GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+ GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+ GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+ GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+ GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+ GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+ GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+ GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
+ GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30>
+struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+ T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
+ T30, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+ NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
+ typedef Templates30<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+ T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
+ T28, T29, T30> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+ GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+ GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+ GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+ GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+ GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+ GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+ GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+ GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
+ GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
+ GTEST_TEMPLATE_ T31>
+struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+ T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
+ T30, T31, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+ NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
+ typedef Templates31<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+ T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
+ T28, T29, T30, T31> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+ GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+ GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+ GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+ GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+ GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+ GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+ GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+ GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
+ GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
+ GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32>
+struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+ T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
+ T30, T31, T32, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+ NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
+ typedef Templates32<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+ T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
+ T28, T29, T30, T31, T32> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+ GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+ GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+ GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+ GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+ GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+ GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+ GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+ GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
+ GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
+ GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33>
+struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+ T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
+ T30, T31, T32, T33, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+ NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
+ typedef Templates33<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+ T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
+ T28, T29, T30, T31, T32, T33> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+ GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+ GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+ GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+ GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+ GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+ GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+ GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+ GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
+ GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
+ GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
+ GTEST_TEMPLATE_ T34>
+struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+ T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
+ T30, T31, T32, T33, T34, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+ NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
+ typedef Templates34<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+ T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
+ T28, T29, T30, T31, T32, T33, T34> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+ GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+ GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+ GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+ GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+ GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+ GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+ GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+ GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
+ GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
+ GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
+ GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35>
+struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+ T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
+ T30, T31, T32, T33, T34, T35, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+ NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
+ typedef Templates35<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+ T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
+ T28, T29, T30, T31, T32, T33, T34, T35> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+ GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+ GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+ GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+ GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+ GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+ GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+ GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+ GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
+ GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
+ GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
+ GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36>
+struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+ T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
+ T30, T31, T32, T33, T34, T35, T36, NoneT, NoneT, NoneT, NoneT, NoneT,
+ NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
+ typedef Templates36<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+ T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
+ T28, T29, T30, T31, T32, T33, T34, T35, T36> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+ GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+ GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+ GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+ GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+ GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+ GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+ GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+ GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
+ GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
+ GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
+ GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
+ GTEST_TEMPLATE_ T37>
+struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+ T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
+ T30, T31, T32, T33, T34, T35, T36, T37, NoneT, NoneT, NoneT, NoneT, NoneT,
+ NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
+ typedef Templates37<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+ T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
+ T28, T29, T30, T31, T32, T33, T34, T35, T36, T37> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+ GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+ GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+ GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+ GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+ GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+ GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+ GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+ GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
+ GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
+ GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
+ GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
+ GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38>
+struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+ T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
+ T30, T31, T32, T33, T34, T35, T36, T37, T38, NoneT, NoneT, NoneT, NoneT,
+ NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
+ typedef Templates38<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+ T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
+ T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+ GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+ GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+ GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+ GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+ GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+ GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+ GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+ GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
+ GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
+ GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
+ GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
+ GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39>
+struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+ T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
+ T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, NoneT, NoneT, NoneT,
+ NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
+ typedef Templates39<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+ T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
+ T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+ GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+ GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+ GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+ GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+ GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+ GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+ GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+ GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
+ GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
+ GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
+ GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
+ GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
+ GTEST_TEMPLATE_ T40>
+struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+ T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
+ T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, NoneT, NoneT, NoneT,
+ NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
+ typedef Templates40<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+ T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
+ T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+ GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+ GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+ GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+ GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+ GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+ GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+ GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+ GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
+ GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
+ GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
+ GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
+ GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
+ GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41>
+struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+ T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
+ T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, NoneT, NoneT,
+ NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
+ typedef Templates41<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+ T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
+ T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40,
+ T41> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+ GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+ GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+ GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+ GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+ GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+ GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+ GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+ GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
+ GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
+ GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
+ GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
+ GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
+ GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42>
+struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+ T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
+ T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, NoneT,
+ NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
+ typedef Templates42<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+ T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
+ T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41,
+ T42> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+ GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+ GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+ GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+ GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+ GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+ GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+ GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+ GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
+ GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
+ GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
+ GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
+ GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
+ GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42,
+ GTEST_TEMPLATE_ T43>
+struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+ T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
+ T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
+ NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
+ typedef Templates43<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+ T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
+ T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41,
+ T42, T43> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+ GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+ GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+ GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+ GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+ GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+ GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+ GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+ GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
+ GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
+ GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
+ GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
+ GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
+ GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42,
+ GTEST_TEMPLATE_ T43, GTEST_TEMPLATE_ T44>
+struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+ T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
+ T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43, T44,
+ NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
+ typedef Templates44<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+ T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
+ T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41,
+ T42, T43, T44> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+ GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+ GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+ GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+ GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+ GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+ GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+ GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+ GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
+ GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
+ GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
+ GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
+ GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
+ GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42,
+ GTEST_TEMPLATE_ T43, GTEST_TEMPLATE_ T44, GTEST_TEMPLATE_ T45>
+struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+ T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
+ T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43, T44,
+ T45, NoneT, NoneT, NoneT, NoneT, NoneT> {
+ typedef Templates45<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+ T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
+ T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41,
+ T42, T43, T44, T45> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+ GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+ GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+ GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+ GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+ GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+ GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+ GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+ GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
+ GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
+ GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
+ GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
+ GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
+ GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42,
+ GTEST_TEMPLATE_ T43, GTEST_TEMPLATE_ T44, GTEST_TEMPLATE_ T45,
+ GTEST_TEMPLATE_ T46>
+struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+ T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
+ T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43, T44,
+ T45, T46, NoneT, NoneT, NoneT, NoneT> {
+ typedef Templates46<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+ T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
+ T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41,
+ T42, T43, T44, T45, T46> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+ GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+ GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+ GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+ GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+ GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+ GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+ GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+ GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
+ GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
+ GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
+ GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
+ GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
+ GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42,
+ GTEST_TEMPLATE_ T43, GTEST_TEMPLATE_ T44, GTEST_TEMPLATE_ T45,
+ GTEST_TEMPLATE_ T46, GTEST_TEMPLATE_ T47>
+struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+ T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
+ T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43, T44,
+ T45, T46, T47, NoneT, NoneT, NoneT> {
+ typedef Templates47<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+ T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
+ T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41,
+ T42, T43, T44, T45, T46, T47> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+ GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+ GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+ GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+ GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+ GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+ GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+ GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+ GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
+ GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
+ GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
+ GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
+ GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
+ GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42,
+ GTEST_TEMPLATE_ T43, GTEST_TEMPLATE_ T44, GTEST_TEMPLATE_ T45,
+ GTEST_TEMPLATE_ T46, GTEST_TEMPLATE_ T47, GTEST_TEMPLATE_ T48>
+struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+ T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
+ T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43, T44,
+ T45, T46, T47, T48, NoneT, NoneT> {
+ typedef Templates48<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+ T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
+ T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41,
+ T42, T43, T44, T45, T46, T47, T48> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+ GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+ GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+ GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+ GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+ GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+ GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+ GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+ GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
+ GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
+ GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
+ GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
+ GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
+ GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42,
+ GTEST_TEMPLATE_ T43, GTEST_TEMPLATE_ T44, GTEST_TEMPLATE_ T45,
+ GTEST_TEMPLATE_ T46, GTEST_TEMPLATE_ T47, GTEST_TEMPLATE_ T48,
+ GTEST_TEMPLATE_ T49>
+struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+ T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
+ T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43, T44,
+ T45, T46, T47, T48, T49, NoneT> {
+ typedef Templates49<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+ T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
+ T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41,
+ T42, T43, T44, T45, T46, T47, T48, T49> type;
+};
+
+// The TypeList template makes it possible to use either a single type
+// or a Types<...> list in TYPED_TEST_CASE() and
+// INSTANTIATE_TYPED_TEST_CASE_P().
+
+template <typename T>
+struct TypeList {
+ typedef Types1<T> type;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15,
+ typename T16, typename T17, typename T18, typename T19, typename T20,
+ typename T21, typename T22, typename T23, typename T24, typename T25,
+ typename T26, typename T27, typename T28, typename T29, typename T30,
+ typename T31, typename T32, typename T33, typename T34, typename T35,
+ typename T36, typename T37, typename T38, typename T39, typename T40,
+ typename T41, typename T42, typename T43, typename T44, typename T45,
+ typename T46, typename T47, typename T48, typename T49, typename T50>
+struct TypeList<Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+ T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+ T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
+ T44, T45, T46, T47, T48, T49, T50> > {
+ typedef typename Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
+ T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
+ T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40,
+ T41, T42, T43, T44, T45, T46, T47, T48, T49, T50>::type type;
+};
+
+#endif // GTEST_HAS_TYPED_TEST || GTEST_HAS_TYPED_TEST_P
+
+} // namespace internal
+} // namespace testing
+
+#endif // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_TYPE_UTIL_H_
+
+// Due to C++ preprocessor weirdness, we need double indirection to
+// concatenate two tokens when one of them is __LINE__. Writing
+//
+// foo ## __LINE__
+//
+// will result in the token foo__LINE__, instead of foo followed by
+// the current line number. For more details, see
+// http://www.parashift.com/c++-faq-lite/misc-technical-issues.html#faq-39.6
+#define GTEST_CONCAT_TOKEN_(foo, bar) GTEST_CONCAT_TOKEN_IMPL_(foo, bar)
+#define GTEST_CONCAT_TOKEN_IMPL_(foo, bar) foo ## bar
+
+class ProtocolMessage;
+namespace proto2 { class Message; }
+
+namespace testing {
+
+// Forward declarations.
+
+class AssertionResult; // Result of an assertion.
+class Message; // Represents a failure message.
+class Test; // Represents a test.
+class TestInfo; // Information about a test.
+class TestPartResult; // Result of a test part.
+class UnitTest; // A collection of test cases.
+
+template <typename T>
+::std::string PrintToString(const T& value);
+
+namespace internal {
+
+struct TraceInfo; // Information about a trace point.
+class ScopedTrace; // Implements scoped trace.
+class TestInfoImpl; // Opaque implementation of TestInfo
+class UnitTestImpl; // Opaque implementation of UnitTest
+
+// How many times InitGoogleTest() has been called.
+GTEST_API_ extern int g_init_gtest_count;
+
+// The text used in failure messages to indicate the start of the
+// stack trace.
+GTEST_API_ extern const char kStackTraceMarker[];
+
+// Two overloaded helpers for checking at compile time whether an
+// expression is a null pointer literal (i.e. NULL or any 0-valued
+// compile-time integral constant). Their return values have
+// different sizes, so we can use sizeof() to test which version is
+// picked by the compiler. These helpers have no implementations, as
+// we only need their signatures.
+//
+// Given IsNullLiteralHelper(x), the compiler will pick the first
+// version if x can be implicitly converted to Secret*, and pick the
+// second version otherwise. Since Secret is a secret and incomplete
+// type, the only expression a user can write that has type Secret* is
+// a null pointer literal. Therefore, we know that x is a null
+// pointer literal if and only if the first version is picked by the
+// compiler.
+char IsNullLiteralHelper(Secret* p);
+char (&IsNullLiteralHelper(...))[2]; // NOLINT
+
+// A compile-time bool constant that is true if and only if x is a
+// null pointer literal (i.e. NULL or any 0-valued compile-time
+// integral constant).
+#ifdef GTEST_ELLIPSIS_NEEDS_POD_
+// We lose support for NULL detection where the compiler doesn't like
+// passing non-POD classes through ellipsis (...).
+# define GTEST_IS_NULL_LITERAL_(x) false
+#else
+# define GTEST_IS_NULL_LITERAL_(x) \
+ (sizeof(::testing::internal::IsNullLiteralHelper(x)) == 1)
+#endif // GTEST_ELLIPSIS_NEEDS_POD_
+
+// Appends the user-supplied message to the Google-Test-generated message.
+GTEST_API_ std::string AppendUserMessage(
+ const std::string& gtest_msg, const Message& user_msg);
+
+#if GTEST_HAS_EXCEPTIONS
+
+// This exception is thrown by (and only by) a failed Google Test
+// assertion when GTEST_FLAG(throw_on_failure) is true (if exceptions
+// are enabled). We derive it from std::runtime_error, which is for
+// errors presumably detectable only at run time. Since
+// std::runtime_error inherits from std::exception, many testing
+// frameworks know how to extract and print the message inside it.
+class GTEST_API_ GoogleTestFailureException : public ::std::runtime_error {
+ public:
+ explicit GoogleTestFailureException(const TestPartResult& failure);
+};
+
+#endif // GTEST_HAS_EXCEPTIONS
+
+// A helper class for creating scoped traces in user programs.
+class GTEST_API_ ScopedTrace {
+ public:
+ // The c'tor pushes the given source file location and message onto
+ // a trace stack maintained by Google Test.
+ ScopedTrace(const char* file, int line, const Message& message);
+
+ // The d'tor pops the info pushed by the c'tor.
+ //
+ // Note that the d'tor is not virtual in order to be efficient.
+ // Don't inherit from ScopedTrace!
+ ~ScopedTrace();
+
+ private:
+ GTEST_DISALLOW_COPY_AND_ASSIGN_(ScopedTrace);
+} GTEST_ATTRIBUTE_UNUSED_; // A ScopedTrace object does its job in its
+ // c'tor and d'tor. Therefore it doesn't
+ // need to be used otherwise.
+
+// Constructs and returns the message for an equality assertion
+// (e.g. ASSERT_EQ, EXPECT_STREQ, etc) failure.
+//
+// The first four parameters are the expressions used in the assertion
+// and their values, as strings. For example, for ASSERT_EQ(foo, bar)
+// where foo is 5 and bar is 6, we have:
+//
+// expected_expression: "foo"
+// actual_expression: "bar"
+// expected_value: "5"
+// actual_value: "6"
+//
+// The ignoring_case parameter is true iff the assertion is a
+// *_STRCASEEQ*. When it's true, the string " (ignoring case)" will
+// be inserted into the message.
+GTEST_API_ AssertionResult EqFailure(const char* expected_expression,
+ const char* actual_expression,
+ const std::string& expected_value,
+ const std::string& actual_value,
+ bool ignoring_case);
+
+// Constructs a failure message for Boolean assertions such as EXPECT_TRUE.
+GTEST_API_ std::string GetBoolAssertionFailureMessage(
+ const AssertionResult& assertion_result,
+ const char* expression_text,
+ const char* actual_predicate_value,
+ const char* expected_predicate_value);
+
+// This template class represents an IEEE floating-point number
+// (either single-precision or double-precision, depending on the
+// template parameters).
+//
+// The purpose of this class is to do more sophisticated number
+// comparison. (Due to round-off error, etc, it's very unlikely that
+// two floating-points will be equal exactly. Hence a naive
+// comparison by the == operation often doesn't work.)
+//
+// Format of IEEE floating-point:
+//
+// The most-significant bit being the leftmost, an IEEE
+// floating-point looks like
+//
+// sign_bit exponent_bits fraction_bits
+//
+// Here, sign_bit is a single bit that designates the sign of the
+// number.
+//
+// For float, there are 8 exponent bits and 23 fraction bits.
+//
+// For double, there are 11 exponent bits and 52 fraction bits.
+//
+// More details can be found at
+// http://en.wikipedia.org/wiki/IEEE_floating-point_standard.
+//
+// Template parameter:
+//
+// RawType: the raw floating-point type (either float or double)
+template <typename RawType>
+class FloatingPoint {
+ public:
+ // Defines the unsigned integer type that has the same size as the
+ // floating point number.
+ typedef typename TypeWithSize<sizeof(RawType)>::UInt Bits;
+
+ // Constants.
+
+ // # of bits in a number.
+ static const size_t kBitCount = 8*sizeof(RawType);
+
+ // # of fraction bits in a number.
+ static const size_t kFractionBitCount =
+ std::numeric_limits<RawType>::digits - 1;
+
+ // # of exponent bits in a number.
+ static const size_t kExponentBitCount = kBitCount - 1 - kFractionBitCount;
+
+ // The mask for the sign bit.
+ static const Bits kSignBitMask = static_cast<Bits>(1) << (kBitCount - 1);
+
+ // The mask for the fraction bits.
+ static const Bits kFractionBitMask =
+ ~static_cast<Bits>(0) >> (kExponentBitCount + 1);
+
+ // The mask for the exponent bits.
+ static const Bits kExponentBitMask = ~(kSignBitMask | kFractionBitMask);
+
+ // How many ULP's (Units in the Last Place) we want to tolerate when
+ // comparing two numbers. The larger the value, the more error we
+ // allow. A 0 value means that two numbers must be exactly the same
+ // to be considered equal.
+ //
+ // The maximum error of a single floating-point operation is 0.5
+ // units in the last place. On Intel CPU's, all floating-point
+ // calculations are done with 80-bit precision, while double has 64
+ // bits. Therefore, 4 should be enough for ordinary use.
+ //
+ // See the following article for more details on ULP:
+ // http://randomascii.wordpress.com/2012/02/25/comparing-floating-point-numbers-2012-edition/
+ static const size_t kMaxUlps = 4;
+
+ // Constructs a FloatingPoint from a raw floating-point number.
+ //
+ // On an Intel CPU, passing a non-normalized NAN (Not a Number)
+ // around may change its bits, although the new value is guaranteed
+ // to be also a NAN. Therefore, don't expect this constructor to
+ // preserve the bits in x when x is a NAN.
+ explicit FloatingPoint(const RawType& x) { u_.value_ = x; }
+
+ // Static methods
+
+ // Reinterprets a bit pattern as a floating-point number.
+ //
+ // This function is needed to test the AlmostEquals() method.
+ static RawType ReinterpretBits(const Bits bits) {
+ FloatingPoint fp(0);
+ fp.u_.bits_ = bits;
+ return fp.u_.value_;
+ }
+
+ // Returns the floating-point number that represent positive infinity.
+ static RawType Infinity() {
+ return ReinterpretBits(kExponentBitMask);
+ }
+
+ // Returns the maximum representable finite floating-point number.
+ static RawType Max();
+
+ // Non-static methods
+
+ // Returns the bits that represents this number.
+ const Bits &bits() const { return u_.bits_; }
+
+ // Returns the exponent bits of this number.
+ Bits exponent_bits() const { return kExponentBitMask & u_.bits_; }
+
+ // Returns the fraction bits of this number.
+ Bits fraction_bits() const { return kFractionBitMask & u_.bits_; }
+
+ // Returns the sign bit of this number.
+ Bits sign_bit() const { return kSignBitMask & u_.bits_; }
+
+ // Returns true iff this is NAN (not a number).
+ bool is_nan() const {
+ // It's a NAN if the exponent bits are all ones and the fraction
+ // bits are not entirely zeros.
+ return (exponent_bits() == kExponentBitMask) && (fraction_bits() != 0);
+ }
+
+ // Returns true iff this number is at most kMaxUlps ULP's away from
+ // rhs. In particular, this function:
+ //
+ // - returns false if either number is (or both are) NAN.
+ // - treats really large numbers as almost equal to infinity.
+ // - thinks +0.0 and -0.0 are 0 DLP's apart.
+ bool AlmostEquals(const FloatingPoint& rhs) const {
+ // The IEEE standard says that any comparison operation involving
+ // a NAN must return false.
+ if (is_nan() || rhs.is_nan()) return false;
+
+ return DistanceBetweenSignAndMagnitudeNumbers(u_.bits_, rhs.u_.bits_)
+ <= kMaxUlps;
+ }
+
+ private:
+ // The data type used to store the actual floating-point number.
+ union FloatingPointUnion {
+ RawType value_; // The raw floating-point number.
+ Bits bits_; // The bits that represent the number.
+ };
+
+ // Converts an integer from the sign-and-magnitude representation to
+ // the biased representation. More precisely, let N be 2 to the
+ // power of (kBitCount - 1), an integer x is represented by the
+ // unsigned number x + N.
+ //
+ // For instance,
+ //
+ // -N + 1 (the most negative number representable using
+ // sign-and-magnitude) is represented by 1;
+ // 0 is represented by N; and
+ // N - 1 (the biggest number representable using
+ // sign-and-magnitude) is represented by 2N - 1.
+ //
+ // Read http://en.wikipedia.org/wiki/Signed_number_representations
+ // for more details on signed number representations.
+ static Bits SignAndMagnitudeToBiased(const Bits &sam) {
+ if (kSignBitMask & sam) {
+ // sam represents a negative number.
+ return ~sam + 1;
+ } else {
+ // sam represents a positive number.
+ return kSignBitMask | sam;
+ }
+ }
+
+ // Given two numbers in the sign-and-magnitude representation,
+ // returns the distance between them as an unsigned number.
+ static Bits DistanceBetweenSignAndMagnitudeNumbers(const Bits &sam1,
+ const Bits &sam2) {
+ const Bits biased1 = SignAndMagnitudeToBiased(sam1);
+ const Bits biased2 = SignAndMagnitudeToBiased(sam2);
+ return (biased1 >= biased2) ? (biased1 - biased2) : (biased2 - biased1);
+ }
+
+ FloatingPointUnion u_;
+};
+
+// We cannot use std::numeric_limits<T>::max() as it clashes with the max()
+// macro defined by <windows.h>.
+template <>
+inline float FloatingPoint<float>::Max() { return FLT_MAX; }
+template <>
+inline double FloatingPoint<double>::Max() { return DBL_MAX; }
+
+// Typedefs the instances of the FloatingPoint template class that we
+// care to use.
+typedef FloatingPoint<float> Float;
+typedef FloatingPoint<double> Double;
+
+// In order to catch the mistake of putting tests that use different
+// test fixture classes in the same test case, we need to assign
+// unique IDs to fixture classes and compare them. The TypeId type is
+// used to hold such IDs. The user should treat TypeId as an opaque
+// type: the only operation allowed on TypeId values is to compare
+// them for equality using the == operator.
+typedef const void* TypeId;
+
+template <typename T>
+class TypeIdHelper {
+ public:
+ // dummy_ must not have a const type. Otherwise an overly eager
+ // compiler (e.g. MSVC 7.1 & 8.0) may try to merge
+ // TypeIdHelper<T>::dummy_ for different Ts as an "optimization".
+ static bool dummy_;
+};
+
+template <typename T>
+bool TypeIdHelper<T>::dummy_ = false;
+
+// GetTypeId<T>() returns the ID of type T. Different values will be
+// returned for different types. Calling the function twice with the
+// same type argument is guaranteed to return the same ID.
+template <typename T>
+TypeId GetTypeId() {
+ // The compiler is required to allocate a different
+ // TypeIdHelper<T>::dummy_ variable for each T used to instantiate
+ // the template. Therefore, the address of dummy_ is guaranteed to
+ // be unique.
+ return &(TypeIdHelper<T>::dummy_);
+}
+
+// Returns the type ID of ::testing::Test. Always call this instead
+// of GetTypeId< ::testing::Test>() to get the type ID of
+// ::testing::Test, as the latter may give the wrong result due to a
+// suspected linker bug when compiling Google Test as a Mac OS X
+// framework.
+GTEST_API_ TypeId GetTestTypeId();
+
+// Defines the abstract factory interface that creates instances
+// of a Test object.
+class TestFactoryBase {
+ public:
+ virtual ~TestFactoryBase() {}
+
+ // Creates a test instance to run. The instance is both created and destroyed
+ // within TestInfoImpl::Run()
+ virtual Test* CreateTest() = 0;
+
+ protected:
+ TestFactoryBase() {}
+
+ private:
+ GTEST_DISALLOW_COPY_AND_ASSIGN_(TestFactoryBase);
+};
+
+// This class provides implementation of TeastFactoryBase interface.
+// It is used in TEST and TEST_F macros.
+template <class TestClass>
+class TestFactoryImpl : public TestFactoryBase {
+ public:
+ virtual Test* CreateTest() { return new TestClass; }
+};
+
+#if GTEST_OS_WINDOWS
+
+// Predicate-formatters for implementing the HRESULT checking macros
+// {ASSERT|EXPECT}_HRESULT_{SUCCEEDED|FAILED}
+// We pass a long instead of HRESULT to avoid causing an
+// include dependency for the HRESULT type.
+GTEST_API_ AssertionResult IsHRESULTSuccess(const char* expr,
+ long hr); // NOLINT
+GTEST_API_ AssertionResult IsHRESULTFailure(const char* expr,
+ long hr); // NOLINT
+
+#endif // GTEST_OS_WINDOWS
+
+// Types of SetUpTestCase() and TearDownTestCase() functions.
+typedef void (*SetUpTestCaseFunc)();
+typedef void (*TearDownTestCaseFunc)();
+
+// Creates a new TestInfo object and registers it with Google Test;
+// returns the created object.
+//
+// Arguments:
+//
+// test_case_name: name of the test case
+// name: name of the test
+// type_param the name of the test's type parameter, or NULL if
+// this is not a typed or a type-parameterized test.
+// value_param text representation of the test's value parameter,
+// or NULL if this is not a type-parameterized test.
+// fixture_class_id: ID of the test fixture class
+// set_up_tc: pointer to the function that sets up the test case
+// tear_down_tc: pointer to the function that tears down the test case
+// factory: pointer to the factory that creates a test object.
+// The newly created TestInfo instance will assume
+// ownership of the factory object.
+GTEST_API_ TestInfo* MakeAndRegisterTestInfo(
+ const char* test_case_name,
+ const char* name,
+ const char* type_param,
+ const char* value_param,
+ TypeId fixture_class_id,
+ SetUpTestCaseFunc set_up_tc,
+ TearDownTestCaseFunc tear_down_tc,
+ TestFactoryBase* factory);
+
+// If *pstr starts with the given prefix, modifies *pstr to be right
+// past the prefix and returns true; otherwise leaves *pstr unchanged
+// and returns false. None of pstr, *pstr, and prefix can be NULL.
+GTEST_API_ bool SkipPrefix(const char* prefix, const char** pstr);
+
+#if GTEST_HAS_TYPED_TEST || GTEST_HAS_TYPED_TEST_P
+
+// State of the definition of a type-parameterized test case.
+class GTEST_API_ TypedTestCasePState {
+ public:
+ TypedTestCasePState() : registered_(false) {}
+
+ // Adds the given test name to defined_test_names_ and return true
+ // if the test case hasn't been registered; otherwise aborts the
+ // program.
+ bool AddTestName(const char* file, int line, const char* case_name,
+ const char* test_name) {
+ if (registered_) {
+ fprintf(stderr, "%s Test %s must be defined before "
+ "REGISTER_TYPED_TEST_CASE_P(%s, ...).\n",
+ FormatFileLocation(file, line).c_str(), test_name, case_name);
+ fflush(stderr);
+ posix::Abort();
+ }
+ defined_test_names_.insert(test_name);
+ return true;
+ }
+
+ // Verifies that registered_tests match the test names in
+ // defined_test_names_; returns registered_tests if successful, or
+ // aborts the program otherwise.
+ const char* VerifyRegisteredTestNames(
+ const char* file, int line, const char* registered_tests);
+
+ private:
+ bool registered_;
+ ::std::set<const char*> defined_test_names_;
+};
+
+// Skips to the first non-space char after the first comma in 'str';
+// returns NULL if no comma is found in 'str'.
+inline const char* SkipComma(const char* str) {
+ const char* comma = strchr(str, ',');
+ if (comma == NULL) {
+ return NULL;
+ }
+ while (IsSpace(*(++comma))) {}
+ return comma;
+}
+
+// Returns the prefix of 'str' before the first comma in it; returns
+// the entire string if it contains no comma.
+inline std::string GetPrefixUntilComma(const char* str) {
+ const char* comma = strchr(str, ',');
+ return comma == NULL ? str : std::string(str, comma);
+}
+
+// TypeParameterizedTest<Fixture, TestSel, Types>::Register()
+// registers a list of type-parameterized tests with Google Test. The
+// return value is insignificant - we just need to return something
+// such that we can call this function in a namespace scope.
+//
+// Implementation note: The GTEST_TEMPLATE_ macro declares a template
+// template parameter. It's defined in gtest-type-util.h.
+template <GTEST_TEMPLATE_ Fixture, class TestSel, typename Types>
+class TypeParameterizedTest {
+ public:
+ // 'index' is the index of the test in the type list 'Types'
+ // specified in INSTANTIATE_TYPED_TEST_CASE_P(Prefix, TestCase,
+ // Types). Valid values for 'index' are [0, N - 1] where N is the
+ // length of Types.
+ static bool Register(const char* prefix, const char* case_name,
+ const char* test_names, int index) {
+ typedef typename Types::Head Type;
+ typedef Fixture<Type> FixtureClass;
+ typedef typename GTEST_BIND_(TestSel, Type) TestClass;
+
+ // First, registers the first type-parameterized test in the type
+ // list.
+ MakeAndRegisterTestInfo(
+ (std::string(prefix) + (prefix[0] == '\0' ? "" : "/") + case_name + "/"
+ + StreamableToString(index)).c_str(),
+ GetPrefixUntilComma(test_names).c_str(),
+ GetTypeName<Type>().c_str(),
+ NULL, // No value parameter.
+ GetTypeId<FixtureClass>(),
+ TestClass::SetUpTestCase,
+ TestClass::TearDownTestCase,
+ new TestFactoryImpl<TestClass>);
+
+ // Next, recurses (at compile time) with the tail of the type list.
+ return TypeParameterizedTest<Fixture, TestSel, typename Types::Tail>
+ ::Register(prefix, case_name, test_names, index + 1);
+ }
+};
+
+// The base case for the compile time recursion.
+template <GTEST_TEMPLATE_ Fixture, class TestSel>
+class TypeParameterizedTest<Fixture, TestSel, Types0> {
+ public:
+ static bool Register(const char* /*prefix*/, const char* /*case_name*/,
+ const char* /*test_names*/, int /*index*/) {
+ return true;
+ }
+};
+
+// TypeParameterizedTestCase<Fixture, Tests, Types>::Register()
+// registers *all combinations* of 'Tests' and 'Types' with Google
+// Test. The return value is insignificant - we just need to return
+// something such that we can call this function in a namespace scope.
+template <GTEST_TEMPLATE_ Fixture, typename Tests, typename Types>
+class TypeParameterizedTestCase {
+ public:
+ static bool Register(const char* prefix, const char* case_name,
+ const char* test_names) {
+ typedef typename Tests::Head Head;
+
+ // First, register the first test in 'Test' for each type in 'Types'.
+ TypeParameterizedTest<Fixture, Head, Types>::Register(
+ prefix, case_name, test_names, 0);
+
+ // Next, recurses (at compile time) with the tail of the test list.
+ return TypeParameterizedTestCase<Fixture, typename Tests::Tail, Types>
+ ::Register(prefix, case_name, SkipComma(test_names));
+ }
+};
+
+// The base case for the compile time recursion.
+template <GTEST_TEMPLATE_ Fixture, typename Types>
+class TypeParameterizedTestCase<Fixture, Templates0, Types> {
+ public:
+ static bool Register(const char* /*prefix*/, const char* /*case_name*/,
+ const char* /*test_names*/) {
+ return true;
+ }
+};
+
+#endif // GTEST_HAS_TYPED_TEST || GTEST_HAS_TYPED_TEST_P
+
+// Returns the current OS stack trace as an std::string.
+//
+// The maximum number of stack frames to be included is specified by
+// the gtest_stack_trace_depth flag. The skip_count parameter
+// specifies the number of top frames to be skipped, which doesn't
+// count against the number of frames to be included.
+//
+// For example, if Foo() calls Bar(), which in turn calls
+// GetCurrentOsStackTraceExceptTop(..., 1), Foo() will be included in
+// the trace but Bar() and GetCurrentOsStackTraceExceptTop() won't.
+GTEST_API_ std::string GetCurrentOsStackTraceExceptTop(
+ UnitTest* unit_test, int skip_count);
+
+// Helpers for suppressing warnings on unreachable code or constant
+// condition.
+
+// Always returns true.
+GTEST_API_ bool AlwaysTrue();
+
+// Always returns false.
+inline bool AlwaysFalse() { return !AlwaysTrue(); }
+
+// Helper for suppressing false warning from Clang on a const char*
+// variable declared in a conditional expression always being NULL in
+// the else branch.
+struct GTEST_API_ ConstCharPtr {
+ ConstCharPtr(const char* str) : value(str) {}
+ operator bool() const { return true; }
+ const char* value;
+};
+
+// A simple Linear Congruential Generator for generating random
+// numbers with a uniform distribution. Unlike rand() and srand(), it
+// doesn't use global state (and therefore can't interfere with user
+// code). Unlike rand_r(), it's portable. An LCG isn't very random,
+// but it's good enough for our purposes.
+class GTEST_API_ Random {
+ public:
+ static const UInt32 kMaxRange = 1u << 31;
+
+ explicit Random(UInt32 seed) : state_(seed) {}
+
+ void Reseed(UInt32 seed) { state_ = seed; }
+
+ // Generates a random number from [0, range). Crashes if 'range' is
+ // 0 or greater than kMaxRange.
+ UInt32 Generate(UInt32 range);
+
+ private:
+ UInt32 state_;
+ GTEST_DISALLOW_COPY_AND_ASSIGN_(Random);
+};
+
+// Defining a variable of type CompileAssertTypesEqual<T1, T2> will cause a
+// compiler error iff T1 and T2 are different types.
+template <typename T1, typename T2>
+struct CompileAssertTypesEqual;
+
+template <typename T>
+struct CompileAssertTypesEqual<T, T> {
+};
+
+// Removes the reference from a type if it is a reference type,
+// otherwise leaves it unchanged. This is the same as
+// tr1::remove_reference, which is not widely available yet.
+template <typename T>
+struct RemoveReference { typedef T type; }; // NOLINT
+template <typename T>
+struct RemoveReference<T&> { typedef T type; }; // NOLINT
+
+// A handy wrapper around RemoveReference that works when the argument
+// T depends on template parameters.
+#define GTEST_REMOVE_REFERENCE_(T) \
+ typename ::testing::internal::RemoveReference<T>::type
+
+// Removes const from a type if it is a const type, otherwise leaves
+// it unchanged. This is the same as tr1::remove_const, which is not
+// widely available yet.
+template <typename T>
+struct RemoveConst { typedef T type; }; // NOLINT
+template <typename T>
+struct RemoveConst<const T> { typedef T type; }; // NOLINT
+
+// MSVC 8.0, Sun C++, and IBM XL C++ have a bug which causes the above
+// definition to fail to remove the const in 'const int[3]' and 'const
+// char[3][4]'. The following specialization works around the bug.
+template <typename T, size_t N>
+struct RemoveConst<const T[N]> {
+ typedef typename RemoveConst<T>::type type[N];
+};
+
+#if defined(_MSC_VER) && _MSC_VER < 1400
+// This is the only specialization that allows VC++ 7.1 to remove const in
+// 'const int[3] and 'const int[3][4]'. However, it causes trouble with GCC
+// and thus needs to be conditionally compiled.
+template <typename T, size_t N>
+struct RemoveConst<T[N]> {
+ typedef typename RemoveConst<T>::type type[N];
+};
+#endif
+
+// A handy wrapper around RemoveConst that works when the argument
+// T depends on template parameters.
+#define GTEST_REMOVE_CONST_(T) \
+ typename ::testing::internal::RemoveConst<T>::type
+
+// Turns const U&, U&, const U, and U all into U.
+#define GTEST_REMOVE_REFERENCE_AND_CONST_(T) \
+ GTEST_REMOVE_CONST_(GTEST_REMOVE_REFERENCE_(T))
+
+// Adds reference to a type if it is not a reference type,
+// otherwise leaves it unchanged. This is the same as
+// tr1::add_reference, which is not widely available yet.
+template <typename T>
+struct AddReference { typedef T& type; }; // NOLINT
+template <typename T>
+struct AddReference<T&> { typedef T& type; }; // NOLINT
+
+// A handy wrapper around AddReference that works when the argument T
+// depends on template parameters.
+#define GTEST_ADD_REFERENCE_(T) \
+ typename ::testing::internal::AddReference<T>::type
+
+// Adds a reference to const on top of T as necessary. For example,
+// it transforms
+//
+// char ==> const char&
+// const char ==> const char&
+// char& ==> const char&
+// const char& ==> const char&
+//
+// The argument T must depend on some template parameters.
+#define GTEST_REFERENCE_TO_CONST_(T) \
+ GTEST_ADD_REFERENCE_(const GTEST_REMOVE_REFERENCE_(T))
+
+// ImplicitlyConvertible<From, To>::value is a compile-time bool
+// constant that's true iff type From can be implicitly converted to
+// type To.
+template <typename From, typename To>
+class ImplicitlyConvertible {
+ private:
+ // We need the following helper functions only for their types.
+ // They have no implementations.
+
+ // MakeFrom() is an expression whose type is From. We cannot simply
+ // use From(), as the type From may not have a public default
+ // constructor.
+ static From MakeFrom();
+
+ // These two functions are overloaded. Given an expression
+ // Helper(x), the compiler will pick the first version if x can be
+ // implicitly converted to type To; otherwise it will pick the
+ // second version.
+ //
+ // The first version returns a value of size 1, and the second
+ // version returns a value of size 2. Therefore, by checking the
+ // size of Helper(x), which can be done at compile time, we can tell
+ // which version of Helper() is used, and hence whether x can be
+ // implicitly converted to type To.
+ static char Helper(To);
+ static char (&Helper(...))[2]; // NOLINT
+
+ // We have to put the 'public' section after the 'private' section,
+ // or MSVC refuses to compile the code.
+ public:
+ // MSVC warns about implicitly converting from double to int for
+ // possible loss of data, so we need to temporarily disable the
+ // warning.
+#ifdef _MSC_VER
+# pragma warning(push) // Saves the current warning state.
+# pragma warning(disable:4244) // Temporarily disables warning 4244.
+
+ static const bool value =
+ sizeof(Helper(ImplicitlyConvertible::MakeFrom())) == 1;
+# pragma warning(pop) // Restores the warning state.
+#elif defined(__BORLANDC__)
+ // C++Builder cannot use member overload resolution during template
+ // instantiation. The simplest workaround is to use its C++0x type traits
+ // functions (C++Builder 2009 and above only).
+ static const bool value = __is_convertible(From, To);
+#else
+ static const bool value =
+ sizeof(Helper(ImplicitlyConvertible::MakeFrom())) == 1;
+#endif // _MSV_VER
+};
+template <typename From, typename To>
+const bool ImplicitlyConvertible<From, To>::value;
+
+// IsAProtocolMessage<T>::value is a compile-time bool constant that's
+// true iff T is type ProtocolMessage, proto2::Message, or a subclass
+// of those.
+template <typename T>
+struct IsAProtocolMessage
+ : public bool_constant<
+ ImplicitlyConvertible<const T*, const ::ProtocolMessage*>::value ||
+ ImplicitlyConvertible<const T*, const ::proto2::Message*>::value> {
+};
+
+// When the compiler sees expression IsContainerTest<C>(0), if C is an
+// STL-style container class, the first overload of IsContainerTest
+// will be viable (since both C::iterator* and C::const_iterator* are
+// valid types and NULL can be implicitly converted to them). It will
+// be picked over the second overload as 'int' is a perfect match for
+// the type of argument 0. If C::iterator or C::const_iterator is not
+// a valid type, the first overload is not viable, and the second
+// overload will be picked. Therefore, we can determine whether C is
+// a container class by checking the type of IsContainerTest<C>(0).
+// The value of the expression is insignificant.
+//
+// Note that we look for both C::iterator and C::const_iterator. The
+// reason is that C++ injects the name of a class as a member of the
+// class itself (e.g. you can refer to class iterator as either
+// 'iterator' or 'iterator::iterator'). If we look for C::iterator
+// only, for example, we would mistakenly think that a class named
+// iterator is an STL container.
+//
+// Also note that the simpler approach of overloading
+// IsContainerTest(typename C::const_iterator*) and
+// IsContainerTest(...) doesn't work with Visual Age C++ and Sun C++.
+typedef int IsContainer;
+template <class C>
+IsContainer IsContainerTest(int /* dummy */,
+ typename C::iterator* /* it */ = NULL,
+ typename C::const_iterator* /* const_it */ = NULL) {
+ return 0;
+}
+
+typedef char IsNotContainer;
+template <class C>
+IsNotContainer IsContainerTest(long /* dummy */) { return '\0'; }
+
+// EnableIf<condition>::type is void when 'Cond' is true, and
+// undefined when 'Cond' is false. To use SFINAE to make a function
+// overload only apply when a particular expression is true, add
+// "typename EnableIf<expression>::type* = 0" as the last parameter.
+template<bool> struct EnableIf;
+template<> struct EnableIf<true> { typedef void type; }; // NOLINT
+
+// Utilities for native arrays.
+
+// ArrayEq() compares two k-dimensional native arrays using the
+// elements' operator==, where k can be any integer >= 0. When k is
+// 0, ArrayEq() degenerates into comparing a single pair of values.
+
+template <typename T, typename U>
+bool ArrayEq(const T* lhs, size_t size, const U* rhs);
+
+// This generic version is used when k is 0.
+template <typename T, typename U>
+inline bool ArrayEq(const T& lhs, const U& rhs) { return lhs == rhs; }
+
+// This overload is used when k >= 1.
+template <typename T, typename U, size_t N>
+inline bool ArrayEq(const T(&lhs)[N], const U(&rhs)[N]) {
+ return internal::ArrayEq(lhs, N, rhs);
+}
+
+// This helper reduces code bloat. If we instead put its logic inside
+// the previous ArrayEq() function, arrays with different sizes would
+// lead to different copies of the template code.
+template <typename T, typename U>
+bool ArrayEq(const T* lhs, size_t size, const U* rhs) {
+ for (size_t i = 0; i != size; i++) {
+ if (!internal::ArrayEq(lhs[i], rhs[i]))
+ return false;
+ }
+ return true;
+}
+
+// Finds the first element in the iterator range [begin, end) that
+// equals elem. Element may be a native array type itself.
+template <typename Iter, typename Element>
+Iter ArrayAwareFind(Iter begin, Iter end, const Element& elem) {
+ for (Iter it = begin; it != end; ++it) {
+ if (internal::ArrayEq(*it, elem))
+ return it;
+ }
+ return end;
+}
+
+// CopyArray() copies a k-dimensional native array using the elements'
+// operator=, where k can be any integer >= 0. When k is 0,
+// CopyArray() degenerates into copying a single value.
+
+template <typename T, typename U>
+void CopyArray(const T* from, size_t size, U* to);
+
+// This generic version is used when k is 0.
+template <typename T, typename U>
+inline void CopyArray(const T& from, U* to) { *to = from; }
+
+// This overload is used when k >= 1.
+template <typename T, typename U, size_t N>
+inline void CopyArray(const T(&from)[N], U(*to)[N]) {
+ internal::CopyArray(from, N, *to);
+}
+
+// This helper reduces code bloat. If we instead put its logic inside
+// the previous CopyArray() function, arrays with different sizes
+// would lead to different copies of the template code.
+template <typename T, typename U>
+void CopyArray(const T* from, size_t size, U* to) {
+ for (size_t i = 0; i != size; i++) {
+ internal::CopyArray(from[i], to + i);
+ }
+}
+
+// The relation between an NativeArray object (see below) and the
+// native array it represents.
+enum RelationToSource {
+ kReference, // The NativeArray references the native array.
+ kCopy // The NativeArray makes a copy of the native array and
+ // owns the copy.
+};
+
+// Adapts a native array to a read-only STL-style container. Instead
+// of the complete STL container concept, this adaptor only implements
+// members useful for Google Mock's container matchers. New members
+// should be added as needed. To simplify the implementation, we only
+// support Element being a raw type (i.e. having no top-level const or
+// reference modifier). It's the client's responsibility to satisfy
+// this requirement. Element can be an array type itself (hence
+// multi-dimensional arrays are supported).
+template <typename Element>
+class NativeArray {
+ public:
+ // STL-style container typedefs.
+ typedef Element value_type;
+ typedef Element* iterator;
+ typedef const Element* const_iterator;
+
+ // Constructs from a native array.
+ NativeArray(const Element* array, size_t count, RelationToSource relation) {
+ Init(array, count, relation);
+ }
+
+ // Copy constructor.
+ NativeArray(const NativeArray& rhs) {
+ Init(rhs.array_, rhs.size_, rhs.relation_to_source_);
+ }
+
+ ~NativeArray() {
+ // Ensures that the user doesn't instantiate NativeArray with a
+ // const or reference type.
+ static_cast<void>(StaticAssertTypeEqHelper<Element,
+ GTEST_REMOVE_REFERENCE_AND_CONST_(Element)>());
+ if (relation_to_source_ == kCopy)
+ delete[] array_;
+ }
+
+ // STL-style container methods.
+ size_t size() const { return size_; }
+ const_iterator begin() const { return array_; }
+ const_iterator end() const { return array_ + size_; }
+ bool operator==(const NativeArray& rhs) const {
+ return size() == rhs.size() &&
+ ArrayEq(begin(), size(), rhs.begin());
+ }
+
+ private:
+ // Initializes this object; makes a copy of the input array if
+ // 'relation' is kCopy.
+ void Init(const Element* array, size_t a_size, RelationToSource relation) {
+ if (relation == kReference) {
+ array_ = array;
+ } else {
+ Element* const copy = new Element[a_size];
+ CopyArray(array, a_size, copy);
+ array_ = copy;
+ }
+ size_ = a_size;
+ relation_to_source_ = relation;
+ }
+
+ const Element* array_;
+ size_t size_;
+ RelationToSource relation_to_source_;
+
+ GTEST_DISALLOW_ASSIGN_(NativeArray);
+};
+
+} // namespace internal
+} // namespace testing
+
+#define GTEST_MESSAGE_AT_(file, line, message, result_type) \
+ ::testing::internal::AssertHelper(result_type, file, line, message) \
+ = ::testing::Message()
+
+#define GTEST_MESSAGE_(message, result_type) \
+ GTEST_MESSAGE_AT_(__FILE__, __LINE__, message, result_type)
+
+#define GTEST_FATAL_FAILURE_(message) \
+ return GTEST_MESSAGE_(message, ::testing::TestPartResult::kFatalFailure)
+
+#define GTEST_NONFATAL_FAILURE_(message) \
+ GTEST_MESSAGE_(message, ::testing::TestPartResult::kNonFatalFailure)
+
+#define GTEST_SUCCESS_(message) \
+ GTEST_MESSAGE_(message, ::testing::TestPartResult::kSuccess)
+
+// Suppresses MSVC warnings 4072 (unreachable code) for the code following
+// statement if it returns or throws (or doesn't return or throw in some
+// situations).
+#define GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement) \
+ if (::testing::internal::AlwaysTrue()) { statement; }
+
+#define GTEST_TEST_THROW_(statement, expected_exception, fail) \
+ GTEST_AMBIGUOUS_ELSE_BLOCKER_ \
+ if (::testing::internal::ConstCharPtr gtest_msg = "") { \
+ bool gtest_caught_expected = false; \
+ try { \
+ GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement); \
+ } \
+ catch (expected_exception const&) { \
+ gtest_caught_expected = true; \
+ } \
+ catch (...) { \
+ gtest_msg.value = \
+ "Expected: " #statement " throws an exception of type " \
+ #expected_exception ".\n Actual: it throws a different type."; \
+ goto GTEST_CONCAT_TOKEN_(gtest_label_testthrow_, __LINE__); \
+ } \
+ if (!gtest_caught_expected) { \
+ gtest_msg.value = \
+ "Expected: " #statement " throws an exception of type " \
+ #expected_exception ".\n Actual: it throws nothing."; \
+ goto GTEST_CONCAT_TOKEN_(gtest_label_testthrow_, __LINE__); \
+ } \
+ } else \
+ GTEST_CONCAT_TOKEN_(gtest_label_testthrow_, __LINE__): \
+ fail(gtest_msg.value)
+
+#define GTEST_TEST_NO_THROW_(statement, fail) \
+ GTEST_AMBIGUOUS_ELSE_BLOCKER_ \
+ if (::testing::internal::AlwaysTrue()) { \
+ try { \
+ GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement); \
+ } \
+ catch (...) { \
+ goto GTEST_CONCAT_TOKEN_(gtest_label_testnothrow_, __LINE__); \
+ } \
+ } else \
+ GTEST_CONCAT_TOKEN_(gtest_label_testnothrow_, __LINE__): \
+ fail("Expected: " #statement " doesn't throw an exception.\n" \
+ " Actual: it throws.")
+
+#define GTEST_TEST_ANY_THROW_(statement, fail) \
+ GTEST_AMBIGUOUS_ELSE_BLOCKER_ \
+ if (::testing::internal::AlwaysTrue()) { \
+ bool gtest_caught_any = false; \
+ try { \
+ GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement); \
+ } \
+ catch (...) { \
+ gtest_caught_any = true; \
+ } \
+ if (!gtest_caught_any) { \
+ goto GTEST_CONCAT_TOKEN_(gtest_label_testanythrow_, __LINE__); \
+ } \
+ } else \
+ GTEST_CONCAT_TOKEN_(gtest_label_testanythrow_, __LINE__): \
+ fail("Expected: " #statement " throws an exception.\n" \
+ " Actual: it doesn't.")
+
+
+// Implements Boolean test assertions such as EXPECT_TRUE. expression can be
+// either a boolean expression or an AssertionResult. text is a textual
+// represenation of expression as it was passed into the EXPECT_TRUE.
+#define GTEST_TEST_BOOLEAN_(expression, text, actual, expected, fail) \
+ GTEST_AMBIGUOUS_ELSE_BLOCKER_ \
+ if (const ::testing::AssertionResult gtest_ar_ = \
+ ::testing::AssertionResult(expression)) \
+ ; \
+ else \
+ fail(::testing::internal::GetBoolAssertionFailureMessage(\
+ gtest_ar_, text, #actual, #expected).c_str())
+
+#define GTEST_TEST_NO_FATAL_FAILURE_(statement, fail) \
+ GTEST_AMBIGUOUS_ELSE_BLOCKER_ \
+ if (::testing::internal::AlwaysTrue()) { \
+ ::testing::internal::HasNewFatalFailureHelper gtest_fatal_failure_checker; \
+ GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement); \
+ if (gtest_fatal_failure_checker.has_new_fatal_failure()) { \
+ goto GTEST_CONCAT_TOKEN_(gtest_label_testnofatal_, __LINE__); \
+ } \
+ } else \
+ GTEST_CONCAT_TOKEN_(gtest_label_testnofatal_, __LINE__): \
+ fail("Expected: " #statement " doesn't generate new fatal " \
+ "failures in the current thread.\n" \
+ " Actual: it does.")
+
+// Expands to the name of the class that implements the given test.
+#define GTEST_TEST_CLASS_NAME_(test_case_name, test_name) \
+ test_case_name##_##test_name##_Test
+
+// Helper macro for defining tests.
+#define GTEST_TEST_(test_case_name, test_name, parent_class, parent_id)\
+class GTEST_TEST_CLASS_NAME_(test_case_name, test_name) : public parent_class {\
+ public:\
+ GTEST_TEST_CLASS_NAME_(test_case_name, test_name)() {}\
+ private:\
+ virtual void TestBody();\
+ static ::testing::TestInfo* const test_info_ GTEST_ATTRIBUTE_UNUSED_;\
+ GTEST_DISALLOW_COPY_AND_ASSIGN_(\
+ GTEST_TEST_CLASS_NAME_(test_case_name, test_name));\
+};\
+\
+::testing::TestInfo* const GTEST_TEST_CLASS_NAME_(test_case_name, test_name)\
+ ::test_info_ =\
+ ::testing::internal::MakeAndRegisterTestInfo(\
+ #test_case_name, #test_name, NULL, NULL, \
+ (parent_id), \
+ parent_class::SetUpTestCase, \
+ parent_class::TearDownTestCase, \
+ new ::testing::internal::TestFactoryImpl<\
+ GTEST_TEST_CLASS_NAME_(test_case_name, test_name)>);\
+void GTEST_TEST_CLASS_NAME_(test_case_name, test_name)::TestBody()
+
+#endif // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_INTERNAL_H_
+// Copyright 2005, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Author: wan@google.com (Zhanyong Wan)
+//
+// The Google C++ Testing Framework (Google Test)
+//
+// This header file defines the public API for death tests. It is
+// #included by gtest.h so a user doesn't need to include this
+// directly.
+
+#ifndef GTEST_INCLUDE_GTEST_GTEST_DEATH_TEST_H_
+#define GTEST_INCLUDE_GTEST_GTEST_DEATH_TEST_H_
+
+// Copyright 2005, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Authors: wan@google.com (Zhanyong Wan), eefacm@gmail.com (Sean Mcafee)
+//
+// The Google C++ Testing Framework (Google Test)
+//
+// This header file defines internal utilities needed for implementing
+// death tests. They are subject to change without notice.
+
+#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_DEATH_TEST_INTERNAL_H_
+#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_DEATH_TEST_INTERNAL_H_
+
+
+#include <stdio.h>
+
+namespace testing {
+namespace internal {
+
+GTEST_DECLARE_string_(internal_run_death_test);
+
+// Names of the flags (needed for parsing Google Test flags).
+const char kDeathTestStyleFlag[] = "death_test_style";
+const char kDeathTestUseFork[] = "death_test_use_fork";
+const char kInternalRunDeathTestFlag[] = "internal_run_death_test";
+
+#if GTEST_HAS_DEATH_TEST
+
+// DeathTest is a class that hides much of the complexity of the
+// GTEST_DEATH_TEST_ macro. It is abstract; its static Create method
+// returns a concrete class that depends on the prevailing death test
+// style, as defined by the --gtest_death_test_style and/or
+// --gtest_internal_run_death_test flags.
+
+// In describing the results of death tests, these terms are used with
+// the corresponding definitions:
+//
+// exit status: The integer exit information in the format specified
+// by wait(2)
+// exit code: The integer code passed to exit(3), _exit(2), or
+// returned from main()
+class GTEST_API_ DeathTest {
+ public:
+ // Create returns false if there was an error determining the
+ // appropriate action to take for the current death test; for example,
+ // if the gtest_death_test_style flag is set to an invalid value.
+ // The LastMessage method will return a more detailed message in that
+ // case. Otherwise, the DeathTest pointer pointed to by the "test"
+ // argument is set. If the death test should be skipped, the pointer
+ // is set to NULL; otherwise, it is set to the address of a new concrete
+ // DeathTest object that controls the execution of the current test.
+ static bool Create(const char* statement, const RE* regex,
+ const char* file, int line, DeathTest** test);
+ DeathTest();
+ virtual ~DeathTest() { }
+
+ // A helper class that aborts a death test when it's deleted.
+ class ReturnSentinel {
+ public:
+ explicit ReturnSentinel(DeathTest* test) : test_(test) { }
+ ~ReturnSentinel() { test_->Abort(TEST_ENCOUNTERED_RETURN_STATEMENT); }
+ private:
+ DeathTest* const test_;
+ GTEST_DISALLOW_COPY_AND_ASSIGN_(ReturnSentinel);
+ } GTEST_ATTRIBUTE_UNUSED_;
+
+ // An enumeration of possible roles that may be taken when a death
+ // test is encountered. EXECUTE means that the death test logic should
+ // be executed immediately. OVERSEE means that the program should prepare
+ // the appropriate environment for a child process to execute the death
+ // test, then wait for it to complete.
+ enum TestRole { OVERSEE_TEST, EXECUTE_TEST };
+
+ // An enumeration of the three reasons that a test might be aborted.
+ enum AbortReason {
+ TEST_ENCOUNTERED_RETURN_STATEMENT,
+ TEST_THREW_EXCEPTION,
+ TEST_DID_NOT_DIE
+ };
+
+ // Assumes one of the above roles.
+ virtual TestRole AssumeRole() = 0;
+
+ // Waits for the death test to finish and returns its status.
+ virtual int Wait() = 0;
+
+ // Returns true if the death test passed; that is, the test process
+ // exited during the test, its exit status matches a user-supplied
+ // predicate, and its stderr output matches a user-supplied regular
+ // expression.
+ // The user-supplied predicate may be a macro expression rather
+ // than a function pointer or functor, or else Wait and Passed could
+ // be combined.
+ virtual bool Passed(bool exit_status_ok) = 0;
+
+ // Signals that the death test did not die as expected.
+ virtual void Abort(AbortReason reason) = 0;
+
+ // Returns a human-readable outcome message regarding the outcome of
+ // the last death test.
+ static const char* LastMessage();
+
+ static void set_last_death_test_message(const std::string& message);
+
+ private:
+ // A string containing a description of the outcome of the last death test.
+ static std::string last_death_test_message_;
+
+ GTEST_DISALLOW_COPY_AND_ASSIGN_(DeathTest);
+};
+
+// Factory interface for death tests. May be mocked out for testing.
+class DeathTestFactory {
+ public:
+ virtual ~DeathTestFactory() { }
+ virtual bool Create(const char* statement, const RE* regex,
+ const char* file, int line, DeathTest** test) = 0;
+};
+
+// A concrete DeathTestFactory implementation for normal use.
+class DefaultDeathTestFactory : public DeathTestFactory {
+ public:
+ virtual bool Create(const char* statement, const RE* regex,
+ const char* file, int line, DeathTest** test);
+};
+
+// Returns true if exit_status describes a process that was terminated
+// by a signal, or exited normally with a nonzero exit code.
+GTEST_API_ bool ExitedUnsuccessfully(int exit_status);
+
+// Traps C++ exceptions escaping statement and reports them as test
+// failures. Note that trapping SEH exceptions is not implemented here.
+# if GTEST_HAS_EXCEPTIONS
+# define GTEST_EXECUTE_DEATH_TEST_STATEMENT_(statement, death_test) \
+ try { \
+ GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement); \
+ } catch (const ::std::exception& gtest_exception) { \
+ fprintf(\
+ stderr, \
+ "\n%s: Caught std::exception-derived exception escaping the " \
+ "death test statement. Exception message: %s\n", \
+ ::testing::internal::FormatFileLocation(__FILE__, __LINE__).c_str(), \
+ gtest_exception.what()); \
+ fflush(stderr); \
+ death_test->Abort(::testing::internal::DeathTest::TEST_THREW_EXCEPTION); \
+ } catch (...) { \
+ death_test->Abort(::testing::internal::DeathTest::TEST_THREW_EXCEPTION); \
+ }
+
+# else
+# define GTEST_EXECUTE_DEATH_TEST_STATEMENT_(statement, death_test) \
+ GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement)
+
+# endif
+
+// This macro is for implementing ASSERT_DEATH*, EXPECT_DEATH*,
+// ASSERT_EXIT*, and EXPECT_EXIT*.
+# define GTEST_DEATH_TEST_(statement, predicate, regex, fail) \
+ GTEST_AMBIGUOUS_ELSE_BLOCKER_ \
+ if (::testing::internal::AlwaysTrue()) { \
+ const ::testing::internal::RE& gtest_regex = (regex); \
+ ::testing::internal::DeathTest* gtest_dt; \
+ if (!::testing::internal::DeathTest::Create(#statement, &gtest_regex, \
+ __FILE__, __LINE__, &gtest_dt)) { \
+ goto GTEST_CONCAT_TOKEN_(gtest_label_, __LINE__); \
+ } \
+ if (gtest_dt != NULL) { \
+ ::testing::internal::scoped_ptr< ::testing::internal::DeathTest> \
+ gtest_dt_ptr(gtest_dt); \
+ switch (gtest_dt->AssumeRole()) { \
+ case ::testing::internal::DeathTest::OVERSEE_TEST: \
+ if (!gtest_dt->Passed(predicate(gtest_dt->Wait()))) { \
+ goto GTEST_CONCAT_TOKEN_(gtest_label_, __LINE__); \
+ } \
+ break; \
+ case ::testing::internal::DeathTest::EXECUTE_TEST: { \
+ ::testing::internal::DeathTest::ReturnSentinel \
+ gtest_sentinel(gtest_dt); \
+ GTEST_EXECUTE_DEATH_TEST_STATEMENT_(statement, gtest_dt); \
+ gtest_dt->Abort(::testing::internal::DeathTest::TEST_DID_NOT_DIE); \
+ break; \
+ } \
+ default: \
+ break; \
+ } \
+ } \
+ } else \
+ GTEST_CONCAT_TOKEN_(gtest_label_, __LINE__): \
+ fail(::testing::internal::DeathTest::LastMessage())
+// The symbol "fail" here expands to something into which a message
+// can be streamed.
+
+// This macro is for implementing ASSERT/EXPECT_DEBUG_DEATH when compiled in
+// NDEBUG mode. In this case we need the statements to be executed, the regex is
+// ignored, and the macro must accept a streamed message even though the message
+// is never printed.
+# define GTEST_EXECUTE_STATEMENT_(statement, regex) \
+ GTEST_AMBIGUOUS_ELSE_BLOCKER_ \
+ if (::testing::internal::AlwaysTrue()) { \
+ GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement); \
+ } else \
+ ::testing::Message()
+
+// A class representing the parsed contents of the
+// --gtest_internal_run_death_test flag, as it existed when
+// RUN_ALL_TESTS was called.
+class InternalRunDeathTestFlag {
+ public:
+ InternalRunDeathTestFlag(const std::string& a_file,
+ int a_line,
+ int an_index,
+ int a_write_fd)
+ : file_(a_file), line_(a_line), index_(an_index),
+ write_fd_(a_write_fd) {}
+
+ ~InternalRunDeathTestFlag() {
+ if (write_fd_ >= 0)
+ posix::Close(write_fd_);
+ }
+
+ const std::string& file() const { return file_; }
+ int line() const { return line_; }
+ int index() const { return index_; }
+ int write_fd() const { return write_fd_; }
+
+ private:
+ std::string file_;
+ int line_;
+ int index_;
+ int write_fd_;
+
+ GTEST_DISALLOW_COPY_AND_ASSIGN_(InternalRunDeathTestFlag);
+};
+
+// Returns a newly created InternalRunDeathTestFlag object with fields
+// initialized from the GTEST_FLAG(internal_run_death_test) flag if
+// the flag is specified; otherwise returns NULL.
+InternalRunDeathTestFlag* ParseInternalRunDeathTestFlag();
+
+#else // GTEST_HAS_DEATH_TEST
+
+// This macro is used for implementing macros such as
+// EXPECT_DEATH_IF_SUPPORTED and ASSERT_DEATH_IF_SUPPORTED on systems where
+// death tests are not supported. Those macros must compile on such systems
+// iff EXPECT_DEATH and ASSERT_DEATH compile with the same parameters on
+// systems that support death tests. This allows one to write such a macro
+// on a system that does not support death tests and be sure that it will
+// compile on a death-test supporting system.
+//
+// Parameters:
+// statement - A statement that a macro such as EXPECT_DEATH would test
+// for program termination. This macro has to make sure this
+// statement is compiled but not executed, to ensure that
+// EXPECT_DEATH_IF_SUPPORTED compiles with a certain
+// parameter iff EXPECT_DEATH compiles with it.
+// regex - A regex that a macro such as EXPECT_DEATH would use to test
+// the output of statement. This parameter has to be
+// compiled but not evaluated by this macro, to ensure that
+// this macro only accepts expressions that a macro such as
+// EXPECT_DEATH would accept.
+// terminator - Must be an empty statement for EXPECT_DEATH_IF_SUPPORTED
+// and a return statement for ASSERT_DEATH_IF_SUPPORTED.
+// This ensures that ASSERT_DEATH_IF_SUPPORTED will not
+// compile inside functions where ASSERT_DEATH doesn't
+// compile.
+//
+// The branch that has an always false condition is used to ensure that
+// statement and regex are compiled (and thus syntactically correct) but
+// never executed. The unreachable code macro protects the terminator
+// statement from generating an 'unreachable code' warning in case
+// statement unconditionally returns or throws. The Message constructor at
+// the end allows the syntax of streaming additional messages into the
+// macro, for compilational compatibility with EXPECT_DEATH/ASSERT_DEATH.
+# define GTEST_UNSUPPORTED_DEATH_TEST_(statement, regex, terminator) \
+ GTEST_AMBIGUOUS_ELSE_BLOCKER_ \
+ if (::testing::internal::AlwaysTrue()) { \
+ GTEST_LOG_(WARNING) \
+ << "Death tests are not supported on this platform.\n" \
+ << "Statement '" #statement "' cannot be verified."; \
+ } else if (::testing::internal::AlwaysFalse()) { \
+ ::testing::internal::RE::PartialMatch(".*", (regex)); \
+ GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement); \
+ terminator; \
+ } else \
+ ::testing::Message()
+
+#endif // GTEST_HAS_DEATH_TEST
+
+} // namespace internal
+} // namespace testing
+
+#endif // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_DEATH_TEST_INTERNAL_H_
+
+namespace testing {
+
+// This flag controls the style of death tests. Valid values are "threadsafe",
+// meaning that the death test child process will re-execute the test binary
+// from the start, running only a single death test, or "fast",
+// meaning that the child process will execute the test logic immediately
+// after forking.
+GTEST_DECLARE_string_(death_test_style);
+
+#if GTEST_HAS_DEATH_TEST
+
+namespace internal {
+
+// Returns a Boolean value indicating whether the caller is currently
+// executing in the context of the death test child process. Tools such as
+// Valgrind heap checkers may need this to modify their behavior in death
+// tests. IMPORTANT: This is an internal utility. Using it may break the
+// implementation of death tests. User code MUST NOT use it.
+GTEST_API_ bool InDeathTestChild();
+
+} // namespace internal
+
+// The following macros are useful for writing death tests.
+
+// Here's what happens when an ASSERT_DEATH* or EXPECT_DEATH* is
+// executed:
+//
+// 1. It generates a warning if there is more than one active
+// thread. This is because it's safe to fork() or clone() only
+// when there is a single thread.
+//
+// 2. The parent process clone()s a sub-process and runs the death
+// test in it; the sub-process exits with code 0 at the end of the
+// death test, if it hasn't exited already.
+//
+// 3. The parent process waits for the sub-process to terminate.
+//
+// 4. The parent process checks the exit code and error message of
+// the sub-process.
+//
+// Examples:
+//
+// ASSERT_DEATH(server.SendMessage(56, "Hello"), "Invalid port number");
+// for (int i = 0; i < 5; i++) {
+// EXPECT_DEATH(server.ProcessRequest(i),
+// "Invalid request .* in ProcessRequest()")
+// << "Failed to die on request " << i;
+// }
+//
+// ASSERT_EXIT(server.ExitNow(), ::testing::ExitedWithCode(0), "Exiting");
+//
+// bool KilledBySIGHUP(int exit_code) {
+// return WIFSIGNALED(exit_code) && WTERMSIG(exit_code) == SIGHUP;
+// }
+//
+// ASSERT_EXIT(client.HangUpServer(), KilledBySIGHUP, "Hanging up!");
+//
+// On the regular expressions used in death tests:
+//
+// On POSIX-compliant systems (*nix), we use the <regex.h> library,
+// which uses the POSIX extended regex syntax.
+//
+// On other platforms (e.g. Windows), we only support a simple regex
+// syntax implemented as part of Google Test. This limited
+// implementation should be enough most of the time when writing
+// death tests; though it lacks many features you can find in PCRE
+// or POSIX extended regex syntax. For example, we don't support
+// union ("x|y"), grouping ("(xy)"), brackets ("[xy]"), and
+// repetition count ("x{5,7}"), among others.
+//
+// Below is the syntax that we do support. We chose it to be a
+// subset of both PCRE and POSIX extended regex, so it's easy to
+// learn wherever you come from. In the following: 'A' denotes a
+// literal character, period (.), or a single \\ escape sequence;
+// 'x' and 'y' denote regular expressions; 'm' and 'n' are for
+// natural numbers.
+//
+// c matches any literal character c
+// \\d matches any decimal digit
+// \\D matches any character that's not a decimal digit
+// \\f matches \f
+// \\n matches \n
+// \\r matches \r
+// \\s matches any ASCII whitespace, including \n
+// \\S matches any character that's not a whitespace
+// \\t matches \t
+// \\v matches \v
+// \\w matches any letter, _, or decimal digit
+// \\W matches any character that \\w doesn't match
+// \\c matches any literal character c, which must be a punctuation
+// . matches any single character except \n
+// A? matches 0 or 1 occurrences of A
+// A* matches 0 or many occurrences of A
+// A+ matches 1 or many occurrences of A
+// ^ matches the beginning of a string (not that of each line)
+// $ matches the end of a string (not that of each line)
+// xy matches x followed by y
+//
+// If you accidentally use PCRE or POSIX extended regex features
+// not implemented by us, you will get a run-time failure. In that
+// case, please try to rewrite your regular expression within the
+// above syntax.
+//
+// This implementation is *not* meant to be as highly tuned or robust
+// as a compiled regex library, but should perform well enough for a
+// death test, which already incurs significant overhead by launching
+// a child process.
+//
+// Known caveats:
+//
+// A "threadsafe" style death test obtains the path to the test
+// program from argv[0] and re-executes it in the sub-process. For
+// simplicity, the current implementation doesn't search the PATH
+// when launching the sub-process. This means that the user must
+// invoke the test program via a path that contains at least one
+// path separator (e.g. path/to/foo_test and
+// /absolute/path/to/bar_test are fine, but foo_test is not). This
+// is rarely a problem as people usually don't put the test binary
+// directory in PATH.
+//
+// TODO(wan@google.com): make thread-safe death tests search the PATH.
+
+// Asserts that a given statement causes the program to exit, with an
+// integer exit status that satisfies predicate, and emitting error output
+// that matches regex.
+# define ASSERT_EXIT(statement, predicate, regex) \
+ GTEST_DEATH_TEST_(statement, predicate, regex, GTEST_FATAL_FAILURE_)
+
+// Like ASSERT_EXIT, but continues on to successive tests in the
+// test case, if any:
+# define EXPECT_EXIT(statement, predicate, regex) \
+ GTEST_DEATH_TEST_(statement, predicate, regex, GTEST_NONFATAL_FAILURE_)
+
+// Asserts that a given statement causes the program to exit, either by
+// explicitly exiting with a nonzero exit code or being killed by a
+// signal, and emitting error output that matches regex.
+# define ASSERT_DEATH(statement, regex) \
+ ASSERT_EXIT(statement, ::testing::internal::ExitedUnsuccessfully, regex)
+
+// Like ASSERT_DEATH, but continues on to successive tests in the
+// test case, if any:
+# define EXPECT_DEATH(statement, regex) \
+ EXPECT_EXIT(statement, ::testing::internal::ExitedUnsuccessfully, regex)
+
+// Two predicate classes that can be used in {ASSERT,EXPECT}_EXIT*:
+
+// Tests that an exit code describes a normal exit with a given exit code.
+class GTEST_API_ ExitedWithCode {
+ public:
+ explicit ExitedWithCode(int exit_code);
+ bool operator()(int exit_status) const;
+ private:
+ // No implementation - assignment is unsupported.
+ void operator=(const ExitedWithCode& other);
+
+ const int exit_code_;
+};
+
+# if !GTEST_OS_WINDOWS
+// Tests that an exit code describes an exit due to termination by a
+// given signal.
+class GTEST_API_ KilledBySignal {
+ public:
+ explicit KilledBySignal(int signum);
+ bool operator()(int exit_status) const;
+ private:
+ const int signum_;
+};
+# endif // !GTEST_OS_WINDOWS
+
+// EXPECT_DEBUG_DEATH asserts that the given statements die in debug mode.
+// The death testing framework causes this to have interesting semantics,
+// since the sideeffects of the call are only visible in opt mode, and not
+// in debug mode.
+//
+// In practice, this can be used to test functions that utilize the
+// LOG(DFATAL) macro using the following style:
+//
+// int DieInDebugOr12(int* sideeffect) {
+// if (sideeffect) {
+// *sideeffect = 12;
+// }
+// LOG(DFATAL) << "death";
+// return 12;
+// }
+//
+// TEST(TestCase, TestDieOr12WorksInDgbAndOpt) {
+// int sideeffect = 0;
+// // Only asserts in dbg.
+// EXPECT_DEBUG_DEATH(DieInDebugOr12(&sideeffect), "death");
+//
+// #ifdef NDEBUG
+// // opt-mode has sideeffect visible.
+// EXPECT_EQ(12, sideeffect);
+// #else
+// // dbg-mode no visible sideeffect.
+// EXPECT_EQ(0, sideeffect);
+// #endif
+// }
+//
+// This will assert that DieInDebugReturn12InOpt() crashes in debug
+// mode, usually due to a DCHECK or LOG(DFATAL), but returns the
+// appropriate fallback value (12 in this case) in opt mode. If you
+// need to test that a function has appropriate side-effects in opt
+// mode, include assertions against the side-effects. A general
+// pattern for this is:
+//
+// EXPECT_DEBUG_DEATH({
+// // Side-effects here will have an effect after this statement in
+// // opt mode, but none in debug mode.
+// EXPECT_EQ(12, DieInDebugOr12(&sideeffect));
+// }, "death");
+//
+# ifdef NDEBUG
+
+# define EXPECT_DEBUG_DEATH(statement, regex) \
+ GTEST_EXECUTE_STATEMENT_(statement, regex)
+
+# define ASSERT_DEBUG_DEATH(statement, regex) \
+ GTEST_EXECUTE_STATEMENT_(statement, regex)
+
+# else
+
+# define EXPECT_DEBUG_DEATH(statement, regex) \
+ EXPECT_DEATH(statement, regex)
+
+# define ASSERT_DEBUG_DEATH(statement, regex) \
+ ASSERT_DEATH(statement, regex)
+
+# endif // NDEBUG for EXPECT_DEBUG_DEATH
+#endif // GTEST_HAS_DEATH_TEST
+
+// EXPECT_DEATH_IF_SUPPORTED(statement, regex) and
+// ASSERT_DEATH_IF_SUPPORTED(statement, regex) expand to real death tests if
+// death tests are supported; otherwise they just issue a warning. This is
+// useful when you are combining death test assertions with normal test
+// assertions in one test.
+#if GTEST_HAS_DEATH_TEST
+# define EXPECT_DEATH_IF_SUPPORTED(statement, regex) \
+ EXPECT_DEATH(statement, regex)
+# define ASSERT_DEATH_IF_SUPPORTED(statement, regex) \
+ ASSERT_DEATH(statement, regex)
+#else
+# define EXPECT_DEATH_IF_SUPPORTED(statement, regex) \
+ GTEST_UNSUPPORTED_DEATH_TEST_(statement, regex, )
+# define ASSERT_DEATH_IF_SUPPORTED(statement, regex) \
+ GTEST_UNSUPPORTED_DEATH_TEST_(statement, regex, return)
+#endif
+
+} // namespace testing
+
+#endif // GTEST_INCLUDE_GTEST_GTEST_DEATH_TEST_H_
+// This file was GENERATED by command:
+// pump.py gtest-param-test.h.pump
+// DO NOT EDIT BY HAND!!!
+
+// Copyright 2008, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Authors: vladl@google.com (Vlad Losev)
+//
+// Macros and functions for implementing parameterized tests
+// in Google C++ Testing Framework (Google Test)
+//
+// This file is generated by a SCRIPT. DO NOT EDIT BY HAND!
+//
+#ifndef GTEST_INCLUDE_GTEST_GTEST_PARAM_TEST_H_
+#define GTEST_INCLUDE_GTEST_GTEST_PARAM_TEST_H_
+
+
+// Value-parameterized tests allow you to test your code with different
+// parameters without writing multiple copies of the same test.
+//
+// Here is how you use value-parameterized tests:
+
+#if 0
+
+// To write value-parameterized tests, first you should define a fixture
+// class. It is usually derived from testing::TestWithParam<T> (see below for
+// another inheritance scheme that's sometimes useful in more complicated
+// class hierarchies), where the type of your parameter values.
+// TestWithParam<T> is itself derived from testing::Test. T can be any
+// copyable type. If it's a raw pointer, you are responsible for managing the
+// lifespan of the pointed values.
+
+class FooTest : public ::testing::TestWithParam<const char*> {
+ // You can implement all the usual class fixture members here.
+};
+
+// Then, use the TEST_P macro to define as many parameterized tests
+// for this fixture as you want. The _P suffix is for "parameterized"
+// or "pattern", whichever you prefer to think.
+
+TEST_P(FooTest, DoesBlah) {
+ // Inside a test, access the test parameter with the GetParam() method
+ // of the TestWithParam<T> class:
+ EXPECT_TRUE(foo.Blah(GetParam()));
+ ...
+}
+
+TEST_P(FooTest, HasBlahBlah) {
+ ...
+}
+
+// Finally, you can use INSTANTIATE_TEST_CASE_P to instantiate the test
+// case with any set of parameters you want. Google Test defines a number
+// of functions for generating test parameters. They return what we call
+// (surprise!) parameter generators. Here is a summary of them, which
+// are all in the testing namespace:
+//
+//
+// Range(begin, end [, step]) - Yields values {begin, begin+step,
+// begin+step+step, ...}. The values do not
+// include end. step defaults to 1.
+// Values(v1, v2, ..., vN) - Yields values {v1, v2, ..., vN}.
+// ValuesIn(container) - Yields values from a C-style array, an STL
+// ValuesIn(begin,end) container, or an iterator range [begin, end).
+// Bool() - Yields sequence {false, true}.
+// Combine(g1, g2, ..., gN) - Yields all combinations (the Cartesian product
+// for the math savvy) of the values generated
+// by the N generators.
+//
+// For more details, see comments at the definitions of these functions below
+// in this file.
+//
+// The following statement will instantiate tests from the FooTest test case
+// each with parameter values "meeny", "miny", and "moe".
+
+INSTANTIATE_TEST_CASE_P(InstantiationName,
+ FooTest,
+ Values("meeny", "miny", "moe"));
+
+// To distinguish different instances of the pattern, (yes, you
+// can instantiate it more then once) the first argument to the
+// INSTANTIATE_TEST_CASE_P macro is a prefix that will be added to the
+// actual test case name. Remember to pick unique prefixes for different
+// instantiations. The tests from the instantiation above will have
+// these names:
+//
+// * InstantiationName/FooTest.DoesBlah/0 for "meeny"
+// * InstantiationName/FooTest.DoesBlah/1 for "miny"
+// * InstantiationName/FooTest.DoesBlah/2 for "moe"
+// * InstantiationName/FooTest.HasBlahBlah/0 for "meeny"
+// * InstantiationName/FooTest.HasBlahBlah/1 for "miny"
+// * InstantiationName/FooTest.HasBlahBlah/2 for "moe"
+//
+// You can use these names in --gtest_filter.
+//
+// This statement will instantiate all tests from FooTest again, each
+// with parameter values "cat" and "dog":
+
+const char* pets[] = {"cat", "dog"};
+INSTANTIATE_TEST_CASE_P(AnotherInstantiationName, FooTest, ValuesIn(pets));
+
+// The tests from the instantiation above will have these names:
+//
+// * AnotherInstantiationName/FooTest.DoesBlah/0 for "cat"
+// * AnotherInstantiationName/FooTest.DoesBlah/1 for "dog"
+// * AnotherInstantiationName/FooTest.HasBlahBlah/0 for "cat"
+// * AnotherInstantiationName/FooTest.HasBlahBlah/1 for "dog"
+//
+// Please note that INSTANTIATE_TEST_CASE_P will instantiate all tests
+// in the given test case, whether their definitions come before or
+// AFTER the INSTANTIATE_TEST_CASE_P statement.
+//
+// Please also note that generator expressions (including parameters to the
+// generators) are evaluated in InitGoogleTest(), after main() has started.
+// This allows the user on one hand, to adjust generator parameters in order
+// to dynamically determine a set of tests to run and on the other hand,
+// give the user a chance to inspect the generated tests with Google Test
+// reflection API before RUN_ALL_TESTS() is executed.
+//
+// You can see samples/sample7_unittest.cc and samples/sample8_unittest.cc
+// for more examples.
+//
+// In the future, we plan to publish the API for defining new parameter
+// generators. But for now this interface remains part of the internal
+// implementation and is subject to change.
+//
+//
+// A parameterized test fixture must be derived from testing::Test and from
+// testing::WithParamInterface<T>, where T is the type of the parameter
+// values. Inheriting from TestWithParam<T> satisfies that requirement because
+// TestWithParam<T> inherits from both Test and WithParamInterface. In more
+// complicated hierarchies, however, it is occasionally useful to inherit
+// separately from Test and WithParamInterface. For example:
+
+class BaseTest : public ::testing::Test {
+ // You can inherit all the usual members for a non-parameterized test
+ // fixture here.
+};
+
+class DerivedTest : public BaseTest, public ::testing::WithParamInterface<int> {
+ // The usual test fixture members go here too.
+};
+
+TEST_F(BaseTest, HasFoo) {
+ // This is an ordinary non-parameterized test.
+}
+
+TEST_P(DerivedTest, DoesBlah) {
+ // GetParam works just the same here as if you inherit from TestWithParam.
+ EXPECT_TRUE(foo.Blah(GetParam()));
+}
+
+#endif // 0
+
+
+#if !GTEST_OS_SYMBIAN
+# include <utility>
+#endif
+
+// scripts/fuse_gtest.py depends on gtest's own header being #included
+// *unconditionally*. Therefore these #includes cannot be moved
+// inside #if GTEST_HAS_PARAM_TEST.
+// Copyright 2008 Google Inc.
+// All Rights Reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Author: vladl@google.com (Vlad Losev)
+
+// Type and function utilities for implementing parameterized tests.
+
+#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PARAM_UTIL_H_
+#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PARAM_UTIL_H_
+
+#include <iterator>
+#include <utility>
+#include <vector>
+
+// scripts/fuse_gtest.py depends on gtest's own header being #included
+// *unconditionally*. Therefore these #includes cannot be moved
+// inside #if GTEST_HAS_PARAM_TEST.
+// Copyright 2003 Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Authors: Dan Egnor (egnor@google.com)
+//
+// A "smart" pointer type with reference tracking. Every pointer to a
+// particular object is kept on a circular linked list. When the last pointer
+// to an object is destroyed or reassigned, the object is deleted.
+//
+// Used properly, this deletes the object when the last reference goes away.
+// There are several caveats:
+// - Like all reference counting schemes, cycles lead to leaks.
+// - Each smart pointer is actually two pointers (8 bytes instead of 4).
+// - Every time a pointer is assigned, the entire list of pointers to that
+// object is traversed. This class is therefore NOT SUITABLE when there
+// will often be more than two or three pointers to a particular object.
+// - References are only tracked as long as linked_ptr<> objects are copied.
+// If a linked_ptr<> is converted to a raw pointer and back, BAD THINGS
+// will happen (double deletion).
+//
+// A good use of this class is storing object references in STL containers.
+// You can safely put linked_ptr<> in a vector<>.
+// Other uses may not be as good.
+//
+// Note: If you use an incomplete type with linked_ptr<>, the class
+// *containing* linked_ptr<> must have a constructor and destructor (even
+// if they do nothing!).
+//
+// Bill Gibbons suggested we use something like this.
+//
+// Thread Safety:
+// Unlike other linked_ptr implementations, in this implementation
+// a linked_ptr object is thread-safe in the sense that:
+// - it's safe to copy linked_ptr objects concurrently,
+// - it's safe to copy *from* a linked_ptr and read its underlying
+// raw pointer (e.g. via get()) concurrently, and
+// - it's safe to write to two linked_ptrs that point to the same
+// shared object concurrently.
+// TODO(wan@google.com): rename this to safe_linked_ptr to avoid
+// confusion with normal linked_ptr.
+
+#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_LINKED_PTR_H_
+#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_LINKED_PTR_H_
+
+#include <stdlib.h>
+#include <assert.h>
+
+
+namespace testing {
+namespace internal {
+
+// Protects copying of all linked_ptr objects.
+GTEST_API_ GTEST_DECLARE_STATIC_MUTEX_(g_linked_ptr_mutex);
+
+// This is used internally by all instances of linked_ptr<>. It needs to be
+// a non-template class because different types of linked_ptr<> can refer to
+// the same object (linked_ptr<Superclass>(obj) vs linked_ptr<Subclass>(obj)).
+// So, it needs to be possible for different types of linked_ptr to participate
+// in the same circular linked list, so we need a single class type here.
+//
+// DO NOT USE THIS CLASS DIRECTLY YOURSELF. Use linked_ptr<T>.
+class linked_ptr_internal {
+ public:
+ // Create a new circle that includes only this instance.
+ void join_new() {
+ next_ = this;
+ }
+
+ // Many linked_ptr operations may change p.link_ for some linked_ptr
+ // variable p in the same circle as this object. Therefore we need
+ // to prevent two such operations from occurring concurrently.
+ //
+ // Note that different types of linked_ptr objects can coexist in a
+ // circle (e.g. linked_ptr<Base>, linked_ptr<Derived1>, and
+ // linked_ptr<Derived2>). Therefore we must use a single mutex to
+ // protect all linked_ptr objects. This can create serious
+ // contention in production code, but is acceptable in a testing
+ // framework.
+
+ // Join an existing circle.
+ void join(linked_ptr_internal const* ptr)
+ GTEST_LOCK_EXCLUDED_(g_linked_ptr_mutex) {
+ MutexLock lock(&g_linked_ptr_mutex);
+
+ linked_ptr_internal const* p = ptr;
+ while (p->next_ != ptr) p = p->next_;
+ p->next_ = this;
+ next_ = ptr;
+ }
+
+ // Leave whatever circle we're part of. Returns true if we were the
+ // last member of the circle. Once this is done, you can join() another.
+ bool depart()
+ GTEST_LOCK_EXCLUDED_(g_linked_ptr_mutex) {
+ MutexLock lock(&g_linked_ptr_mutex);
+
+ if (next_ == this) return true;
+ linked_ptr_internal const* p = next_;
+ while (p->next_ != this) p = p->next_;
+ p->next_ = next_;
+ return false;
+ }
+
+ private:
+ mutable linked_ptr_internal const* next_;
+};
+
+template <typename T>
+class linked_ptr {
+ public:
+ typedef T element_type;
+
+ // Take over ownership of a raw pointer. This should happen as soon as
+ // possible after the object is created.
+ explicit linked_ptr(T* ptr = NULL) { capture(ptr); }
+ ~linked_ptr() { depart(); }
+
+ // Copy an existing linked_ptr<>, adding ourselves to the list of references.
+ template <typename U> linked_ptr(linked_ptr<U> const& ptr) { copy(&ptr); }
+ linked_ptr(linked_ptr const& ptr) { // NOLINT
+ assert(&ptr != this);
+ copy(&ptr);
+ }
+
+ // Assignment releases the old value and acquires the new.
+ template <typename U> linked_ptr& operator=(linked_ptr<U> const& ptr) {
+ depart();
+ copy(&ptr);
+ return *this;
+ }
+
+ linked_ptr& operator=(linked_ptr const& ptr) {
+ if (&ptr != this) {
+ depart();
+ copy(&ptr);
+ }
+ return *this;
+ }
+
+ // Smart pointer members.
+ void reset(T* ptr = NULL) {
+ depart();
+ capture(ptr);
+ }
+ T* get() const { return value_; }
+ T* operator->() const { return value_; }
+ T& operator*() const { return *value_; }
+
+ bool operator==(T* p) const { return value_ == p; }
+ bool operator!=(T* p) const { return value_ != p; }
+ template <typename U>
+ bool operator==(linked_ptr<U> const& ptr) const {
+ return value_ == ptr.get();
+ }
+ template <typename U>
+ bool operator!=(linked_ptr<U> const& ptr) const {
+ return value_ != ptr.get();
+ }
+
+ private:
+ template <typename U>
+ friend class linked_ptr;
+
+ T* value_;
+ linked_ptr_internal link_;
+
+ void depart() {
+ if (link_.depart()) delete value_;
+ }
+
+ void capture(T* ptr) {
+ value_ = ptr;
+ link_.join_new();
+ }
+
+ template <typename U> void copy(linked_ptr<U> const* ptr) {
+ value_ = ptr->get();
+ if (value_)
+ link_.join(&ptr->link_);
+ else
+ link_.join_new();
+ }
+};
+
+template<typename T> inline
+bool operator==(T* ptr, const linked_ptr<T>& x) {
+ return ptr == x.get();
+}
+
+template<typename T> inline
+bool operator!=(T* ptr, const linked_ptr<T>& x) {
+ return ptr != x.get();
+}
+
+// A function to convert T* into linked_ptr<T>
+// Doing e.g. make_linked_ptr(new FooBarBaz<type>(arg)) is a shorter notation
+// for linked_ptr<FooBarBaz<type> >(new FooBarBaz<type>(arg))
+template <typename T>
+linked_ptr<T> make_linked_ptr(T* ptr) {
+ return linked_ptr<T>(ptr);
+}
+
+} // namespace internal
+} // namespace testing
+
+#endif // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_LINKED_PTR_H_
+// Copyright 2007, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Author: wan@google.com (Zhanyong Wan)
+
+// Google Test - The Google C++ Testing Framework
+//
+// This file implements a universal value printer that can print a
+// value of any type T:
+//
+// void ::testing::internal::UniversalPrinter<T>::Print(value, ostream_ptr);
+//
+// A user can teach this function how to print a class type T by
+// defining either operator<<() or PrintTo() in the namespace that
+// defines T. More specifically, the FIRST defined function in the
+// following list will be used (assuming T is defined in namespace
+// foo):
+//
+// 1. foo::PrintTo(const T&, ostream*)
+// 2. operator<<(ostream&, const T&) defined in either foo or the
+// global namespace.
+//
+// If none of the above is defined, it will print the debug string of
+// the value if it is a protocol buffer, or print the raw bytes in the
+// value otherwise.
+//
+// To aid debugging: when T is a reference type, the address of the
+// value is also printed; when T is a (const) char pointer, both the
+// pointer value and the NUL-terminated string it points to are
+// printed.
+//
+// We also provide some convenient wrappers:
+//
+// // Prints a value to a string. For a (const or not) char
+// // pointer, the NUL-terminated string (but not the pointer) is
+// // printed.
+// std::string ::testing::PrintToString(const T& value);
+//
+// // Prints a value tersely: for a reference type, the referenced
+// // value (but not the address) is printed; for a (const or not) char
+// // pointer, the NUL-terminated string (but not the pointer) is
+// // printed.
+// void ::testing::internal::UniversalTersePrint(const T& value, ostream*);
+//
+// // Prints value using the type inferred by the compiler. The difference
+// // from UniversalTersePrint() is that this function prints both the
+// // pointer and the NUL-terminated string for a (const or not) char pointer.
+// void ::testing::internal::UniversalPrint(const T& value, ostream*);
+//
+// // Prints the fields of a tuple tersely to a string vector, one
+// // element for each field. Tuple support must be enabled in
+// // gtest-port.h.
+// std::vector<string> UniversalTersePrintTupleFieldsToStrings(
+// const Tuple& value);
+//
+// Known limitation:
+//
+// The print primitives print the elements of an STL-style container
+// using the compiler-inferred type of *iter where iter is a
+// const_iterator of the container. When const_iterator is an input
+// iterator but not a forward iterator, this inferred type may not
+// match value_type, and the print output may be incorrect. In
+// practice, this is rarely a problem as for most containers
+// const_iterator is a forward iterator. We'll fix this if there's an
+// actual need for it. Note that this fix cannot rely on value_type
+// being defined as many user-defined container types don't have
+// value_type.
+
+#ifndef GTEST_INCLUDE_GTEST_GTEST_PRINTERS_H_
+#define GTEST_INCLUDE_GTEST_GTEST_PRINTERS_H_
+
+#include <ostream> // NOLINT
+#include <sstream>
+#include <string>
+#include <utility>
+#include <vector>
+
+namespace testing {
+
+// Definitions in the 'internal' and 'internal2' name spaces are
+// subject to change without notice. DO NOT USE THEM IN USER CODE!
+namespace internal2 {
+
+// Prints the given number of bytes in the given object to the given
+// ostream.
+GTEST_API_ void PrintBytesInObjectTo(const unsigned char* obj_bytes,
+ size_t count,
+ ::std::ostream* os);
+
+// For selecting which printer to use when a given type has neither <<
+// nor PrintTo().
+enum TypeKind {
+ kProtobuf, // a protobuf type
+ kConvertibleToInteger, // a type implicitly convertible to BiggestInt
+ // (e.g. a named or unnamed enum type)
+ kOtherType // anything else
+};
+
+// TypeWithoutFormatter<T, kTypeKind>::PrintValue(value, os) is called
+// by the universal printer to print a value of type T when neither
+// operator<< nor PrintTo() is defined for T, where kTypeKind is the
+// "kind" of T as defined by enum TypeKind.
+template <typename T, TypeKind kTypeKind>
+class TypeWithoutFormatter {
+ public:
+ // This default version is called when kTypeKind is kOtherType.
+ static void PrintValue(const T& value, ::std::ostream* os) {
+ PrintBytesInObjectTo(reinterpret_cast<const unsigned char*>(&value),
+ sizeof(value), os);
+ }
+};
+
+// We print a protobuf using its ShortDebugString() when the string
+// doesn't exceed this many characters; otherwise we print it using
+// DebugString() for better readability.
+const size_t kProtobufOneLinerMaxLength = 50;
+
+template <typename T>
+class TypeWithoutFormatter<T, kProtobuf> {
+ public:
+ static void PrintValue(const T& value, ::std::ostream* os) {
+ const ::testing::internal::string short_str = value.ShortDebugString();
+ const ::testing::internal::string pretty_str =
+ short_str.length() <= kProtobufOneLinerMaxLength ?
+ short_str : ("\n" + value.DebugString());
+ *os << ("<" + pretty_str + ">");
+ }
+};
+
+template <typename T>
+class TypeWithoutFormatter<T, kConvertibleToInteger> {
+ public:
+ // Since T has no << operator or PrintTo() but can be implicitly
+ // converted to BiggestInt, we print it as a BiggestInt.
+ //
+ // Most likely T is an enum type (either named or unnamed), in which
+ // case printing it as an integer is the desired behavior. In case
+ // T is not an enum, printing it as an integer is the best we can do
+ // given that it has no user-defined printer.
+ static void PrintValue(const T& value, ::std::ostream* os) {
+ const internal::BiggestInt kBigInt = value;
+ *os << kBigInt;
+ }
+};
+
+// Prints the given value to the given ostream. If the value is a
+// protocol message, its debug string is printed; if it's an enum or
+// of a type implicitly convertible to BiggestInt, it's printed as an
+// integer; otherwise the bytes in the value are printed. This is
+// what UniversalPrinter<T>::Print() does when it knows nothing about
+// type T and T has neither << operator nor PrintTo().
+//
+// A user can override this behavior for a class type Foo by defining
+// a << operator in the namespace where Foo is defined.
+//
+// We put this operator in namespace 'internal2' instead of 'internal'
+// to simplify the implementation, as much code in 'internal' needs to
+// use << in STL, which would conflict with our own << were it defined
+// in 'internal'.
+//
+// Note that this operator<< takes a generic std::basic_ostream<Char,
+// CharTraits> type instead of the more restricted std::ostream. If
+// we define it to take an std::ostream instead, we'll get an
+// "ambiguous overloads" compiler error when trying to print a type
+// Foo that supports streaming to std::basic_ostream<Char,
+// CharTraits>, as the compiler cannot tell whether
+// operator<<(std::ostream&, const T&) or
+// operator<<(std::basic_stream<Char, CharTraits>, const Foo&) is more
+// specific.
+template <typename Char, typename CharTraits, typename T>
+::std::basic_ostream<Char, CharTraits>& operator<<(
+ ::std::basic_ostream<Char, CharTraits>& os, const T& x) {
+ TypeWithoutFormatter<T,
+ (internal::IsAProtocolMessage<T>::value ? kProtobuf :
+ internal::ImplicitlyConvertible<const T&, internal::BiggestInt>::value ?
+ kConvertibleToInteger : kOtherType)>::PrintValue(x, &os);
+ return os;
+}
+
+} // namespace internal2
+} // namespace testing
+
+// This namespace MUST NOT BE NESTED IN ::testing, or the name look-up
+// magic needed for implementing UniversalPrinter won't work.
+namespace testing_internal {
+
+// Used to print a value that is not an STL-style container when the
+// user doesn't define PrintTo() for it.
+template <typename T>
+void DefaultPrintNonContainerTo(const T& value, ::std::ostream* os) {
+ // With the following statement, during unqualified name lookup,
+ // testing::internal2::operator<< appears as if it was declared in
+ // the nearest enclosing namespace that contains both
+ // ::testing_internal and ::testing::internal2, i.e. the global
+ // namespace. For more details, refer to the C++ Standard section
+ // 7.3.4-1 [namespace.udir]. This allows us to fall back onto
+ // testing::internal2::operator<< in case T doesn't come with a <<
+ // operator.
+ //
+ // We cannot write 'using ::testing::internal2::operator<<;', which
+ // gcc 3.3 fails to compile due to a compiler bug.
+ using namespace ::testing::internal2; // NOLINT
+
+ // Assuming T is defined in namespace foo, in the next statement,
+ // the compiler will consider all of:
+ //
+ // 1. foo::operator<< (thanks to Koenig look-up),
+ // 2. ::operator<< (as the current namespace is enclosed in ::),
+ // 3. testing::internal2::operator<< (thanks to the using statement above).
+ //
+ // The operator<< whose type matches T best will be picked.
+ //
+ // We deliberately allow #2 to be a candidate, as sometimes it's
+ // impossible to define #1 (e.g. when foo is ::std, defining
+ // anything in it is undefined behavior unless you are a compiler
+ // vendor.).
+ *os << value;
+}
+
+} // namespace testing_internal
+
+namespace testing {
+namespace internal {
+
+// UniversalPrinter<T>::Print(value, ostream_ptr) prints the given
+// value to the given ostream. The caller must ensure that
+// 'ostream_ptr' is not NULL, or the behavior is undefined.
+//
+// We define UniversalPrinter as a class template (as opposed to a
+// function template), as we need to partially specialize it for
+// reference types, which cannot be done with function templates.
+template <typename T>
+class UniversalPrinter;
+
+template <typename T>
+void UniversalPrint(const T& value, ::std::ostream* os);
+
+// Used to print an STL-style container when the user doesn't define
+// a PrintTo() for it.
+template <typename C>
+void DefaultPrintTo(IsContainer /* dummy */,
+ false_type /* is not a pointer */,
+ const C& container, ::std::ostream* os) {
+ const size_t kMaxCount = 32; // The maximum number of elements to print.
+ *os << '{';
+ size_t count = 0;
+ for (typename C::const_iterator it = container.begin();
+ it != container.end(); ++it, ++count) {
+ if (count > 0) {
+ *os << ',';
+ if (count == kMaxCount) { // Enough has been printed.
+ *os << " ...";
+ break;
+ }
+ }
+ *os << ' ';
+ // We cannot call PrintTo(*it, os) here as PrintTo() doesn't
+ // handle *it being a native array.
+ internal::UniversalPrint(*it, os);
+ }
+
+ if (count > 0) {
+ *os << ' ';
+ }
+ *os << '}';
+}
+
+// Used to print a pointer that is neither a char pointer nor a member
+// pointer, when the user doesn't define PrintTo() for it. (A member
+// variable pointer or member function pointer doesn't really point to
+// a location in the address space. Their representation is
+// implementation-defined. Therefore they will be printed as raw
+// bytes.)
+template <typename T>
+void DefaultPrintTo(IsNotContainer /* dummy */,
+ true_type /* is a pointer */,
+ T* p, ::std::ostream* os) {
+ if (p == NULL) {
+ *os << "NULL";
+ } else {
+ // C++ doesn't allow casting from a function pointer to any object
+ // pointer.
+ //
+ // IsTrue() silences warnings: "Condition is always true",
+ // "unreachable code".
+ if (IsTrue(ImplicitlyConvertible<T*, const void*>::value)) {
+ // T is not a function type. We just call << to print p,
+ // relying on ADL to pick up user-defined << for their pointer
+ // types, if any.
+ *os << p;
+ } else {
+ // T is a function type, so '*os << p' doesn't do what we want
+ // (it just prints p as bool). We want to print p as a const
+ // void*. However, we cannot cast it to const void* directly,
+ // even using reinterpret_cast, as earlier versions of gcc
+ // (e.g. 3.4.5) cannot compile the cast when p is a function
+ // pointer. Casting to UInt64 first solves the problem.
+ *os << reinterpret_cast<const void*>(
+ reinterpret_cast<internal::UInt64>(p));
+ }
+ }
+}
+
+// Used to print a non-container, non-pointer value when the user
+// doesn't define PrintTo() for it.
+template <typename T>
+void DefaultPrintTo(IsNotContainer /* dummy */,
+ false_type /* is not a pointer */,
+ const T& value, ::std::ostream* os) {
+ ::testing_internal::DefaultPrintNonContainerTo(value, os);
+}
+
+// Prints the given value using the << operator if it has one;
+// otherwise prints the bytes in it. This is what
+// UniversalPrinter<T>::Print() does when PrintTo() is not specialized
+// or overloaded for type T.
+//
+// A user can override this behavior for a class type Foo by defining
+// an overload of PrintTo() in the namespace where Foo is defined. We
+// give the user this option as sometimes defining a << operator for
+// Foo is not desirable (e.g. the coding style may prevent doing it,
+// or there is already a << operator but it doesn't do what the user
+// wants).
+template <typename T>
+void PrintTo(const T& value, ::std::ostream* os) {
+ // DefaultPrintTo() is overloaded. The type of its first two
+ // arguments determine which version will be picked. If T is an
+ // STL-style container, the version for container will be called; if
+ // T is a pointer, the pointer version will be called; otherwise the
+ // generic version will be called.
+ //
+ // Note that we check for container types here, prior to we check
+ // for protocol message types in our operator<<. The rationale is:
+ //
+ // For protocol messages, we want to give people a chance to
+ // override Google Mock's format by defining a PrintTo() or
+ // operator<<. For STL containers, other formats can be
+ // incompatible with Google Mock's format for the container
+ // elements; therefore we check for container types here to ensure
+ // that our format is used.
+ //
+ // The second argument of DefaultPrintTo() is needed to bypass a bug
+ // in Symbian's C++ compiler that prevents it from picking the right
+ // overload between:
+ //
+ // PrintTo(const T& x, ...);
+ // PrintTo(T* x, ...);
+ DefaultPrintTo(IsContainerTest<T>(0), is_pointer<T>(), value, os);
+}
+
+// The following list of PrintTo() overloads tells
+// UniversalPrinter<T>::Print() how to print standard types (built-in
+// types, strings, plain arrays, and pointers).
+
+// Overloads for various char types.
+GTEST_API_ void PrintTo(unsigned char c, ::std::ostream* os);
+GTEST_API_ void PrintTo(signed char c, ::std::ostream* os);
+inline void PrintTo(char c, ::std::ostream* os) {
+ // When printing a plain char, we always treat it as unsigned. This
+ // way, the output won't be affected by whether the compiler thinks
+ // char is signed or not.
+ PrintTo(static_cast<unsigned char>(c), os);
+}
+
+// Overloads for other simple built-in types.
+inline void PrintTo(bool x, ::std::ostream* os) {
+ *os << (x ? "true" : "false");
+}
+
+// Overload for wchar_t type.
+// Prints a wchar_t as a symbol if it is printable or as its internal
+// code otherwise and also as its decimal code (except for L'\0').
+// The L'\0' char is printed as "L'\\0'". The decimal code is printed
+// as signed integer when wchar_t is implemented by the compiler
+// as a signed type and is printed as an unsigned integer when wchar_t
+// is implemented as an unsigned type.
+GTEST_API_ void PrintTo(wchar_t wc, ::std::ostream* os);
+
+// Overloads for C strings.
+GTEST_API_ void PrintTo(const char* s, ::std::ostream* os);
+inline void PrintTo(char* s, ::std::ostream* os) {
+ PrintTo(ImplicitCast_<const char*>(s), os);
+}
+
+// signed/unsigned char is often used for representing binary data, so
+// we print pointers to it as void* to be safe.
+inline void PrintTo(const signed char* s, ::std::ostream* os) {
+ PrintTo(ImplicitCast_<const void*>(s), os);
+}
+inline void PrintTo(signed char* s, ::std::ostream* os) {
+ PrintTo(ImplicitCast_<const void*>(s), os);
+}
+inline void PrintTo(const unsigned char* s, ::std::ostream* os) {
+ PrintTo(ImplicitCast_<const void*>(s), os);
+}
+inline void PrintTo(unsigned char* s, ::std::ostream* os) {
+ PrintTo(ImplicitCast_<const void*>(s), os);
+}
+
+// MSVC can be configured to define wchar_t as a typedef of unsigned
+// short. It defines _NATIVE_WCHAR_T_DEFINED when wchar_t is a native
+// type. When wchar_t is a typedef, defining an overload for const
+// wchar_t* would cause unsigned short* be printed as a wide string,
+// possibly causing invalid memory accesses.
+#if !defined(_MSC_VER) || defined(_NATIVE_WCHAR_T_DEFINED)
+// Overloads for wide C strings
+GTEST_API_ void PrintTo(const wchar_t* s, ::std::ostream* os);
+inline void PrintTo(wchar_t* s, ::std::ostream* os) {
+ PrintTo(ImplicitCast_<const wchar_t*>(s), os);
+}
+#endif
+
+// Overload for C arrays. Multi-dimensional arrays are printed
+// properly.
+
+// Prints the given number of elements in an array, without printing
+// the curly braces.
+template <typename T>
+void PrintRawArrayTo(const T a[], size_t count, ::std::ostream* os) {
+ UniversalPrint(a[0], os);
+ for (size_t i = 1; i != count; i++) {
+ *os << ", ";
+ UniversalPrint(a[i], os);
+ }
+}
+
+// Overloads for ::string and ::std::string.
+#if GTEST_HAS_GLOBAL_STRING
+GTEST_API_ void PrintStringTo(const ::string&s, ::std::ostream* os);
+inline void PrintTo(const ::string& s, ::std::ostream* os) {
+ PrintStringTo(s, os);
+}
+#endif // GTEST_HAS_GLOBAL_STRING
+
+GTEST_API_ void PrintStringTo(const ::std::string&s, ::std::ostream* os);
+inline void PrintTo(const ::std::string& s, ::std::ostream* os) {
+ PrintStringTo(s, os);
+}
+
+// Overloads for ::wstring and ::std::wstring.
+#if GTEST_HAS_GLOBAL_WSTRING
+GTEST_API_ void PrintWideStringTo(const ::wstring&s, ::std::ostream* os);
+inline void PrintTo(const ::wstring& s, ::std::ostream* os) {
+ PrintWideStringTo(s, os);
+}
+#endif // GTEST_HAS_GLOBAL_WSTRING
+
+#if GTEST_HAS_STD_WSTRING
+GTEST_API_ void PrintWideStringTo(const ::std::wstring&s, ::std::ostream* os);
+inline void PrintTo(const ::std::wstring& s, ::std::ostream* os) {
+ PrintWideStringTo(s, os);
+}
+#endif // GTEST_HAS_STD_WSTRING
+
+#if GTEST_HAS_TR1_TUPLE
+// Overload for ::std::tr1::tuple. Needed for printing function arguments,
+// which are packed as tuples.
+
+// Helper function for printing a tuple. T must be instantiated with
+// a tuple type.
+template <typename T>
+void PrintTupleTo(const T& t, ::std::ostream* os);
+
+// Overloaded PrintTo() for tuples of various arities. We support
+// tuples of up-to 10 fields. The following implementation works
+// regardless of whether tr1::tuple is implemented using the
+// non-standard variadic template feature or not.
+
+inline void PrintTo(const ::std::tr1::tuple<>& t, ::std::ostream* os) {
+ PrintTupleTo(t, os);
+}
+
+template <typename T1>
+void PrintTo(const ::std::tr1::tuple<T1>& t, ::std::ostream* os) {
+ PrintTupleTo(t, os);
+}
+
+template <typename T1, typename T2>
+void PrintTo(const ::std::tr1::tuple<T1, T2>& t, ::std::ostream* os) {
+ PrintTupleTo(t, os);
+}
+
+template <typename T1, typename T2, typename T3>
+void PrintTo(const ::std::tr1::tuple<T1, T2, T3>& t, ::std::ostream* os) {
+ PrintTupleTo(t, os);
+}
+
+template <typename T1, typename T2, typename T3, typename T4>
+void PrintTo(const ::std::tr1::tuple<T1, T2, T3, T4>& t, ::std::ostream* os) {
+ PrintTupleTo(t, os);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5>
+void PrintTo(const ::std::tr1::tuple<T1, T2, T3, T4, T5>& t,
+ ::std::ostream* os) {
+ PrintTupleTo(t, os);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6>
+void PrintTo(const ::std::tr1::tuple<T1, T2, T3, T4, T5, T6>& t,
+ ::std::ostream* os) {
+ PrintTupleTo(t, os);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7>
+void PrintTo(const ::std::tr1::tuple<T1, T2, T3, T4, T5, T6, T7>& t,
+ ::std::ostream* os) {
+ PrintTupleTo(t, os);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8>
+void PrintTo(const ::std::tr1::tuple<T1, T2, T3, T4, T5, T6, T7, T8>& t,
+ ::std::ostream* os) {
+ PrintTupleTo(t, os);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9>
+void PrintTo(const ::std::tr1::tuple<T1, T2, T3, T4, T5, T6, T7, T8, T9>& t,
+ ::std::ostream* os) {
+ PrintTupleTo(t, os);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10>
+void PrintTo(
+ const ::std::tr1::tuple<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10>& t,
+ ::std::ostream* os) {
+ PrintTupleTo(t, os);
+}
+#endif // GTEST_HAS_TR1_TUPLE
+
+// Overload for std::pair.
+template <typename T1, typename T2>
+void PrintTo(const ::std::pair<T1, T2>& value, ::std::ostream* os) {
+ *os << '(';
+ // We cannot use UniversalPrint(value.first, os) here, as T1 may be
+ // a reference type. The same for printing value.second.
+ UniversalPrinter<T1>::Print(value.first, os);
+ *os << ", ";
+ UniversalPrinter<T2>::Print(value.second, os);
+ *os << ')';
+}
+
+// Implements printing a non-reference type T by letting the compiler
+// pick the right overload of PrintTo() for T.
+template <typename T>
+class UniversalPrinter {
+ public:
+ // MSVC warns about adding const to a function type, so we want to
+ // disable the warning.
+#ifdef _MSC_VER
+# pragma warning(push) // Saves the current warning state.
+# pragma warning(disable:4180) // Temporarily disables warning 4180.
+#endif // _MSC_VER
+
+ // Note: we deliberately don't call this PrintTo(), as that name
+ // conflicts with ::testing::internal::PrintTo in the body of the
+ // function.
+ static void Print(const T& value, ::std::ostream* os) {
+ // By default, ::testing::internal::PrintTo() is used for printing
+ // the value.
+ //
+ // Thanks to Koenig look-up, if T is a class and has its own
+ // PrintTo() function defined in its namespace, that function will
+ // be visible here. Since it is more specific than the generic ones
+ // in ::testing::internal, it will be picked by the compiler in the
+ // following statement - exactly what we want.
+ PrintTo(value, os);
+ }
+
+#ifdef _MSC_VER
+# pragma warning(pop) // Restores the warning state.
+#endif // _MSC_VER
+};
+
+// UniversalPrintArray(begin, len, os) prints an array of 'len'
+// elements, starting at address 'begin'.
+template <typename T>
+void UniversalPrintArray(const T* begin, size_t len, ::std::ostream* os) {
+ if (len == 0) {
+ *os << "{}";
+ } else {
+ *os << "{ ";
+ const size_t kThreshold = 18;
+ const size_t kChunkSize = 8;
+ // If the array has more than kThreshold elements, we'll have to
+ // omit some details by printing only the first and the last
+ // kChunkSize elements.
+ // TODO(wan@google.com): let the user control the threshold using a flag.
+ if (len <= kThreshold) {
+ PrintRawArrayTo(begin, len, os);
+ } else {
+ PrintRawArrayTo(begin, kChunkSize, os);
+ *os << ", ..., ";
+ PrintRawArrayTo(begin + len - kChunkSize, kChunkSize, os);
+ }
+ *os << " }";
+ }
+}
+// This overload prints a (const) char array compactly.
+GTEST_API_ void UniversalPrintArray(
+ const char* begin, size_t len, ::std::ostream* os);
+
+// This overload prints a (const) wchar_t array compactly.
+GTEST_API_ void UniversalPrintArray(
+ const wchar_t* begin, size_t len, ::std::ostream* os);
+
+// Implements printing an array type T[N].
+template <typename T, size_t N>
+class UniversalPrinter<T[N]> {
+ public:
+ // Prints the given array, omitting some elements when there are too
+ // many.
+ static void Print(const T (&a)[N], ::std::ostream* os) {
+ UniversalPrintArray(a, N, os);
+ }
+};
+
+// Implements printing a reference type T&.
+template <typename T>
+class UniversalPrinter<T&> {
+ public:
+ // MSVC warns about adding const to a function type, so we want to
+ // disable the warning.
+#ifdef _MSC_VER
+# pragma warning(push) // Saves the current warning state.
+# pragma warning(disable:4180) // Temporarily disables warning 4180.
+#endif // _MSC_VER
+
+ static void Print(const T& value, ::std::ostream* os) {
+ // Prints the address of the value. We use reinterpret_cast here
+ // as static_cast doesn't compile when T is a function type.
+ *os << "@" << reinterpret_cast<const void*>(&value) << " ";
+
+ // Then prints the value itself.
+ UniversalPrint(value, os);
+ }
+
+#ifdef _MSC_VER
+# pragma warning(pop) // Restores the warning state.
+#endif // _MSC_VER
+};
+
+// Prints a value tersely: for a reference type, the referenced value
+// (but not the address) is printed; for a (const) char pointer, the
+// NUL-terminated string (but not the pointer) is printed.
+
+template <typename T>
+class UniversalTersePrinter {
+ public:
+ static void Print(const T& value, ::std::ostream* os) {
+ UniversalPrint(value, os);
+ }
+};
+template <typename T>
+class UniversalTersePrinter<T&> {
+ public:
+ static void Print(const T& value, ::std::ostream* os) {
+ UniversalPrint(value, os);
+ }
+};
+template <typename T, size_t N>
+class UniversalTersePrinter<T[N]> {
+ public:
+ static void Print(const T (&value)[N], ::std::ostream* os) {
+ UniversalPrinter<T[N]>::Print(value, os);
+ }
+};
+template <>
+class UniversalTersePrinter<const char*> {
+ public:
+ static void Print(const char* str, ::std::ostream* os) {
+ if (str == NULL) {
+ *os << "NULL";
+ } else {
+ UniversalPrint(string(str), os);
+ }
+ }
+};
+template <>
+class UniversalTersePrinter<char*> {
+ public:
+ static void Print(char* str, ::std::ostream* os) {
+ UniversalTersePrinter<const char*>::Print(str, os);
+ }
+};
+
+#if GTEST_HAS_STD_WSTRING
+template <>
+class UniversalTersePrinter<const wchar_t*> {
+ public:
+ static void Print(const wchar_t* str, ::std::ostream* os) {
+ if (str == NULL) {
+ *os << "NULL";
+ } else {
+ UniversalPrint(::std::wstring(str), os);
+ }
+ }
+};
+#endif
+
+template <>
+class UniversalTersePrinter<wchar_t*> {
+ public:
+ static void Print(wchar_t* str, ::std::ostream* os) {
+ UniversalTersePrinter<const wchar_t*>::Print(str, os);
+ }
+};
+
+template <typename T>
+void UniversalTersePrint(const T& value, ::std::ostream* os) {
+ UniversalTersePrinter<T>::Print(value, os);
+}
+
+// Prints a value using the type inferred by the compiler. The
+// difference between this and UniversalTersePrint() is that for a
+// (const) char pointer, this prints both the pointer and the
+// NUL-terminated string.
+template <typename T>
+void UniversalPrint(const T& value, ::std::ostream* os) {
+ // A workarond for the bug in VC++ 7.1 that prevents us from instantiating
+ // UniversalPrinter with T directly.
+ typedef T T1;
+ UniversalPrinter<T1>::Print(value, os);
+}
+
+#if GTEST_HAS_TR1_TUPLE
+typedef ::std::vector<string> Strings;
+
+// This helper template allows PrintTo() for tuples and
+// UniversalTersePrintTupleFieldsToStrings() to be defined by
+// induction on the number of tuple fields. The idea is that
+// TuplePrefixPrinter<N>::PrintPrefixTo(t, os) prints the first N
+// fields in tuple t, and can be defined in terms of
+// TuplePrefixPrinter<N - 1>.
+
+// The inductive case.
+template <size_t N>
+struct TuplePrefixPrinter {
+ // Prints the first N fields of a tuple.
+ template <typename Tuple>
+ static void PrintPrefixTo(const Tuple& t, ::std::ostream* os) {
+ TuplePrefixPrinter<N - 1>::PrintPrefixTo(t, os);
+ *os << ", ";
+ UniversalPrinter<typename ::std::tr1::tuple_element<N - 1, Tuple>::type>
+ ::Print(::std::tr1::get<N - 1>(t), os);
+ }
+
+ // Tersely prints the first N fields of a tuple to a string vector,
+ // one element for each field.
+ template <typename Tuple>
+ static void TersePrintPrefixToStrings(const Tuple& t, Strings* strings) {
+ TuplePrefixPrinter<N - 1>::TersePrintPrefixToStrings(t, strings);
+ ::std::stringstream ss;
+ UniversalTersePrint(::std::tr1::get<N - 1>(t), &ss);
+ strings->push_back(ss.str());
+ }
+};
+
+// Base cases.
+template <>
+struct TuplePrefixPrinter<0> {
+ template <typename Tuple>
+ static void PrintPrefixTo(const Tuple&, ::std::ostream*) {}
+
+ template <typename Tuple>
+ static void TersePrintPrefixToStrings(const Tuple&, Strings*) {}
+};
+// We have to specialize the entire TuplePrefixPrinter<> class
+// template here, even though the definition of
+// TersePrintPrefixToStrings() is the same as the generic version, as
+// Embarcadero (formerly CodeGear, formerly Borland) C++ doesn't
+// support specializing a method template of a class template.
+template <>
+struct TuplePrefixPrinter<1> {
+ template <typename Tuple>
+ static void PrintPrefixTo(const Tuple& t, ::std::ostream* os) {
+ UniversalPrinter<typename ::std::tr1::tuple_element<0, Tuple>::type>::
+ Print(::std::tr1::get<0>(t), os);
+ }
+
+ template <typename Tuple>
+ static void TersePrintPrefixToStrings(const Tuple& t, Strings* strings) {
+ ::std::stringstream ss;
+ UniversalTersePrint(::std::tr1::get<0>(t), &ss);
+ strings->push_back(ss.str());
+ }
+};
+
+// Helper function for printing a tuple. T must be instantiated with
+// a tuple type.
+template <typename T>
+void PrintTupleTo(const T& t, ::std::ostream* os) {
+ *os << "(";
+ TuplePrefixPrinter< ::std::tr1::tuple_size<T>::value>::
+ PrintPrefixTo(t, os);
+ *os << ")";
+}
+
+// Prints the fields of a tuple tersely to a string vector, one
+// element for each field. See the comment before
+// UniversalTersePrint() for how we define "tersely".
+template <typename Tuple>
+Strings UniversalTersePrintTupleFieldsToStrings(const Tuple& value) {
+ Strings result;
+ TuplePrefixPrinter< ::std::tr1::tuple_size<Tuple>::value>::
+ TersePrintPrefixToStrings(value, &result);
+ return result;
+}
+#endif // GTEST_HAS_TR1_TUPLE
+
+} // namespace internal
+
+template <typename T>
+::std::string PrintToString(const T& value) {
+ ::std::stringstream ss;
+ internal::UniversalTersePrinter<T>::Print(value, &ss);
+ return ss.str();
+}
+
+} // namespace testing
+
+#endif // GTEST_INCLUDE_GTEST_GTEST_PRINTERS_H_
+
+#if GTEST_HAS_PARAM_TEST
+
+namespace testing {
+namespace internal {
+
+// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
+//
+// Outputs a message explaining invalid registration of different
+// fixture class for the same test case. This may happen when
+// TEST_P macro is used to define two tests with the same name
+// but in different namespaces.
+GTEST_API_ void ReportInvalidTestCaseType(const char* test_case_name,
+ const char* file, int line);
+
+template <typename> class ParamGeneratorInterface;
+template <typename> class ParamGenerator;
+
+// Interface for iterating over elements provided by an implementation
+// of ParamGeneratorInterface<T>.
+template <typename T>
+class ParamIteratorInterface {
+ public:
+ virtual ~ParamIteratorInterface() {}
+ // A pointer to the base generator instance.
+ // Used only for the purposes of iterator comparison
+ // to make sure that two iterators belong to the same generator.
+ virtual const ParamGeneratorInterface<T>* BaseGenerator() const = 0;
+ // Advances iterator to point to the next element
+ // provided by the generator. The caller is responsible
+ // for not calling Advance() on an iterator equal to
+ // BaseGenerator()->End().
+ virtual void Advance() = 0;
+ // Clones the iterator object. Used for implementing copy semantics
+ // of ParamIterator<T>.
+ virtual ParamIteratorInterface* Clone() const = 0;
+ // Dereferences the current iterator and provides (read-only) access
+ // to the pointed value. It is the caller's responsibility not to call
+ // Current() on an iterator equal to BaseGenerator()->End().
+ // Used for implementing ParamGenerator<T>::operator*().
+ virtual const T* Current() const = 0;
+ // Determines whether the given iterator and other point to the same
+ // element in the sequence generated by the generator.
+ // Used for implementing ParamGenerator<T>::operator==().
+ virtual bool Equals(const ParamIteratorInterface& other) const = 0;
+};
+
+// Class iterating over elements provided by an implementation of
+// ParamGeneratorInterface<T>. It wraps ParamIteratorInterface<T>
+// and implements the const forward iterator concept.
+template <typename T>
+class ParamIterator {
+ public:
+ typedef T value_type;
+ typedef const T& reference;
+ typedef ptrdiff_t difference_type;
+
+ // ParamIterator assumes ownership of the impl_ pointer.
+ ParamIterator(const ParamIterator& other) : impl_(other.impl_->Clone()) {}
+ ParamIterator& operator=(const ParamIterator& other) {
+ if (this != &other)
+ impl_.reset(other.impl_->Clone());
+ return *this;
+ }
+
+ const T& operator*() const { return *impl_->Current(); }
+ const T* operator->() const { return impl_->Current(); }
+ // Prefix version of operator++.
+ ParamIterator& operator++() {
+ impl_->Advance();
+ return *this;
+ }
+ // Postfix version of operator++.
+ ParamIterator operator++(int /*unused*/) {
+ ParamIteratorInterface<T>* clone = impl_->Clone();
+ impl_->Advance();
+ return ParamIterator(clone);
+ }
+ bool operator==(const ParamIterator& other) const {
+ return impl_.get() == other.impl_.get() || impl_->Equals(*other.impl_);
+ }
+ bool operator!=(const ParamIterator& other) const {
+ return !(*this == other);
+ }
+
+ private:
+ friend class ParamGenerator<T>;
+ explicit ParamIterator(ParamIteratorInterface<T>* impl) : impl_(impl) {}
+ scoped_ptr<ParamIteratorInterface<T> > impl_;
+};
+
+// ParamGeneratorInterface<T> is the binary interface to access generators
+// defined in other translation units.
+template <typename T>
+class ParamGeneratorInterface {
+ public:
+ typedef T ParamType;
+
+ virtual ~ParamGeneratorInterface() {}
+
+ // Generator interface definition
+ virtual ParamIteratorInterface<T>* Begin() const = 0;
+ virtual ParamIteratorInterface<T>* End() const = 0;
+};
+
+// Wraps ParamGeneratorInterface<T> and provides general generator syntax
+// compatible with the STL Container concept.
+// This class implements copy initialization semantics and the contained
+// ParamGeneratorInterface<T> instance is shared among all copies
+// of the original object. This is possible because that instance is immutable.
+template<typename T>
+class ParamGenerator {
+ public:
+ typedef ParamIterator<T> iterator;
+
+ explicit ParamGenerator(ParamGeneratorInterface<T>* impl) : impl_(impl) {}
+ ParamGenerator(const ParamGenerator& other) : impl_(other.impl_) {}
+
+ ParamGenerator& operator=(const ParamGenerator& other) {
+ impl_ = other.impl_;
+ return *this;
+ }
+
+ iterator begin() const { return iterator(impl_->Begin()); }
+ iterator end() const { return iterator(impl_->End()); }
+
+ private:
+ linked_ptr<const ParamGeneratorInterface<T> > impl_;
+};
+
+// Generates values from a range of two comparable values. Can be used to
+// generate sequences of user-defined types that implement operator+() and
+// operator<().
+// This class is used in the Range() function.
+template <typename T, typename IncrementT>
+class RangeGenerator : public ParamGeneratorInterface<T> {
+ public:
+ RangeGenerator(T begin, T end, IncrementT step)
+ : begin_(begin), end_(end),
+ step_(step), end_index_(CalculateEndIndex(begin, end, step)) {}
+ virtual ~RangeGenerator() {}
+
+ virtual ParamIteratorInterface<T>* Begin() const {
+ return new Iterator(this, begin_, 0, step_);
+ }
+ virtual ParamIteratorInterface<T>* End() const {
+ return new Iterator(this, end_, end_index_, step_);
+ }
+
+ private:
+ class Iterator : public ParamIteratorInterface<T> {
+ public:
+ Iterator(const ParamGeneratorInterface<T>* base, T value, int index,
+ IncrementT step)
+ : base_(base), value_(value), index_(index), step_(step) {}
+ virtual ~Iterator() {}
+
+ virtual const ParamGeneratorInterface<T>* BaseGenerator() const {
+ return base_;
+ }
+ virtual void Advance() {
+ value_ = value_ + step_;
+ index_++;
+ }
+ virtual ParamIteratorInterface<T>* Clone() const {
+ return new Iterator(*this);
+ }
+ virtual const T* Current() const { return &value_; }
+ virtual bool Equals(const ParamIteratorInterface<T>& other) const {
+ // Having the same base generator guarantees that the other
+ // iterator is of the same type and we can downcast.
+ GTEST_CHECK_(BaseGenerator() == other.BaseGenerator())
+ << "The program attempted to compare iterators "
+ << "from different generators." << std::endl;
+ const int other_index =
+ CheckedDowncastToActualType<const Iterator>(&other)->index_;
+ return index_ == other_index;
+ }
+
+ private:
+ Iterator(const Iterator& other)
+ : ParamIteratorInterface<T>(),
+ base_(other.base_), value_(other.value_), index_(other.index_),
+ step_(other.step_) {}
+
+ // No implementation - assignment is unsupported.
+ void operator=(const Iterator& other);
+
+ const ParamGeneratorInterface<T>* const base_;
+ T value_;
+ int index_;
+ const IncrementT step_;
+ }; // class RangeGenerator::Iterator
+
+ static int CalculateEndIndex(const T& begin,
+ const T& end,
+ const IncrementT& step) {
+ int end_index = 0;
+ for (T i = begin; i < end; i = i + step)
+ end_index++;
+ return end_index;
+ }
+
+ // No implementation - assignment is unsupported.
+ void operator=(const RangeGenerator& other);
+
+ const T begin_;
+ const T end_;
+ const IncrementT step_;
+ // The index for the end() iterator. All the elements in the generated
+ // sequence are indexed (0-based) to aid iterator comparison.
+ const int end_index_;
+}; // class RangeGenerator
+
+
+// Generates values from a pair of STL-style iterators. Used in the
+// ValuesIn() function. The elements are copied from the source range
+// since the source can be located on the stack, and the generator
+// is likely to persist beyond that stack frame.
+template <typename T>
+class ValuesInIteratorRangeGenerator : public ParamGeneratorInterface<T> {
+ public:
+ template <typename ForwardIterator>
+ ValuesInIteratorRangeGenerator(ForwardIterator begin, ForwardIterator end)
+ : container_(begin, end) {}
+ virtual ~ValuesInIteratorRangeGenerator() {}
+
+ virtual ParamIteratorInterface<T>* Begin() const {
+ return new Iterator(this, container_.begin());
+ }
+ virtual ParamIteratorInterface<T>* End() const {
+ return new Iterator(this, container_.end());
+ }
+
+ private:
+ typedef typename ::std::vector<T> ContainerType;
+
+ class Iterator : public ParamIteratorInterface<T> {
+ public:
+ Iterator(const ParamGeneratorInterface<T>* base,
+ typename ContainerType::const_iterator iterator)
+ : base_(base), iterator_(iterator) {}
+ virtual ~Iterator() {}
+
+ virtual const ParamGeneratorInterface<T>* BaseGenerator() const {
+ return base_;
+ }
+ virtual void Advance() {
+ ++iterator_;
+ value_.reset();
+ }
+ virtual ParamIteratorInterface<T>* Clone() const {
+ return new Iterator(*this);
+ }
+ // We need to use cached value referenced by iterator_ because *iterator_
+ // can return a temporary object (and of type other then T), so just
+ // having "return &*iterator_;" doesn't work.
+ // value_ is updated here and not in Advance() because Advance()
+ // can advance iterator_ beyond the end of the range, and we cannot
+ // detect that fact. The client code, on the other hand, is
+ // responsible for not calling Current() on an out-of-range iterator.
+ virtual const T* Current() const {
+ if (value_.get() == NULL)
+ value_.reset(new T(*iterator_));
+ return value_.get();
+ }
+ virtual bool Equals(const ParamIteratorInterface<T>& other) const {
+ // Having the same base generator guarantees that the other
+ // iterator is of the same type and we can downcast.
+ GTEST_CHECK_(BaseGenerator() == other.BaseGenerator())
+ << "The program attempted to compare iterators "
+ << "from different generators." << std::endl;
+ return iterator_ ==
+ CheckedDowncastToActualType<const Iterator>(&other)->iterator_;
+ }
+
+ private:
+ Iterator(const Iterator& other)
+ // The explicit constructor call suppresses a false warning
+ // emitted by gcc when supplied with the -Wextra option.
+ : ParamIteratorInterface<T>(),
+ base_(other.base_),
+ iterator_(other.iterator_) {}
+
+ const ParamGeneratorInterface<T>* const base_;
+ typename ContainerType::const_iterator iterator_;
+ // A cached value of *iterator_. We keep it here to allow access by
+ // pointer in the wrapping iterator's operator->().
+ // value_ needs to be mutable to be accessed in Current().
+ // Use of scoped_ptr helps manage cached value's lifetime,
+ // which is bound by the lifespan of the iterator itself.
+ mutable scoped_ptr<const T> value_;
+ }; // class ValuesInIteratorRangeGenerator::Iterator
+
+ // No implementation - assignment is unsupported.
+ void operator=(const ValuesInIteratorRangeGenerator& other);
+
+ const ContainerType container_;
+}; // class ValuesInIteratorRangeGenerator
+
+// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
+//
+// Stores a parameter value and later creates tests parameterized with that
+// value.
+template <class TestClass>
+class ParameterizedTestFactory : public TestFactoryBase {
+ public:
+ typedef typename TestClass::ParamType ParamType;
+ explicit ParameterizedTestFactory(ParamType parameter) :
+ parameter_(parameter) {}
+ virtual Test* CreateTest() {
+ TestClass::SetParam(&parameter_);
+ return new TestClass();
+ }
+
+ private:
+ const ParamType parameter_;
+
+ GTEST_DISALLOW_COPY_AND_ASSIGN_(ParameterizedTestFactory);
+};
+
+// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
+//
+// TestMetaFactoryBase is a base class for meta-factories that create
+// test factories for passing into MakeAndRegisterTestInfo function.
+template <class ParamType>
+class TestMetaFactoryBase {
+ public:
+ virtual ~TestMetaFactoryBase() {}
+
+ virtual TestFactoryBase* CreateTestFactory(ParamType parameter) = 0;
+};
+
+// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
+//
+// TestMetaFactory creates test factories for passing into
+// MakeAndRegisterTestInfo function. Since MakeAndRegisterTestInfo receives
+// ownership of test factory pointer, same factory object cannot be passed
+// into that method twice. But ParameterizedTestCaseInfo is going to call
+// it for each Test/Parameter value combination. Thus it needs meta factory
+// creator class.
+template <class TestCase>
+class TestMetaFactory
+ : public TestMetaFactoryBase<typename TestCase::ParamType> {
+ public:
+ typedef typename TestCase::ParamType ParamType;
+
+ TestMetaFactory() {}
+
+ virtual TestFactoryBase* CreateTestFactory(ParamType parameter) {
+ return new ParameterizedTestFactory<TestCase>(parameter);
+ }
+
+ private:
+ GTEST_DISALLOW_COPY_AND_ASSIGN_(TestMetaFactory);
+};
+
+// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
+//
+// ParameterizedTestCaseInfoBase is a generic interface
+// to ParameterizedTestCaseInfo classes. ParameterizedTestCaseInfoBase
+// accumulates test information provided by TEST_P macro invocations
+// and generators provided by INSTANTIATE_TEST_CASE_P macro invocations
+// and uses that information to register all resulting test instances
+// in RegisterTests method. The ParameterizeTestCaseRegistry class holds
+// a collection of pointers to the ParameterizedTestCaseInfo objects
+// and calls RegisterTests() on each of them when asked.
+class ParameterizedTestCaseInfoBase {
+ public:
+ virtual ~ParameterizedTestCaseInfoBase() {}
+
+ // Base part of test case name for display purposes.
+ virtual const string& GetTestCaseName() const = 0;
+ // Test case id to verify identity.
+ virtual TypeId GetTestCaseTypeId() const = 0;
+ // UnitTest class invokes this method to register tests in this
+ // test case right before running them in RUN_ALL_TESTS macro.
+ // This method should not be called more then once on any single
+ // instance of a ParameterizedTestCaseInfoBase derived class.
+ virtual void RegisterTests() = 0;
+
+ protected:
+ ParameterizedTestCaseInfoBase() {}
+
+ private:
+ GTEST_DISALLOW_COPY_AND_ASSIGN_(ParameterizedTestCaseInfoBase);
+};
+
+// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
+//
+// ParameterizedTestCaseInfo accumulates tests obtained from TEST_P
+// macro invocations for a particular test case and generators
+// obtained from INSTANTIATE_TEST_CASE_P macro invocations for that
+// test case. It registers tests with all values generated by all
+// generators when asked.
+template <class TestCase>
+class ParameterizedTestCaseInfo : public ParameterizedTestCaseInfoBase {
+ public:
+ // ParamType and GeneratorCreationFunc are private types but are required
+ // for declarations of public methods AddTestPattern() and
+ // AddTestCaseInstantiation().
+ typedef typename TestCase::ParamType ParamType;
+ // A function that returns an instance of appropriate generator type.
+ typedef ParamGenerator<ParamType>(GeneratorCreationFunc)();
+
+ explicit ParameterizedTestCaseInfo(const char* name)
+ : test_case_name_(name) {}
+
+ // Test case base name for display purposes.
+ virtual const string& GetTestCaseName() const { return test_case_name_; }
+ // Test case id to verify identity.
+ virtual TypeId GetTestCaseTypeId() const { return GetTypeId<TestCase>(); }
+ // TEST_P macro uses AddTestPattern() to record information
+ // about a single test in a LocalTestInfo structure.
+ // test_case_name is the base name of the test case (without invocation
+ // prefix). test_base_name is the name of an individual test without
+ // parameter index. For the test SequenceA/FooTest.DoBar/1 FooTest is
+ // test case base name and DoBar is test base name.
+ void AddTestPattern(const char* test_case_name,
+ const char* test_base_name,
+ TestMetaFactoryBase<ParamType>* meta_factory) {
+ tests_.push_back(linked_ptr<TestInfo>(new TestInfo(test_case_name,
+ test_base_name,
+ meta_factory)));
+ }
+ // INSTANTIATE_TEST_CASE_P macro uses AddGenerator() to record information
+ // about a generator.
+ int AddTestCaseInstantiation(const string& instantiation_name,
+ GeneratorCreationFunc* func,
+ const char* /* file */,
+ int /* line */) {
+ instantiations_.push_back(::std::make_pair(instantiation_name, func));
+ return 0; // Return value used only to run this method in namespace scope.
+ }
+ // UnitTest class invokes this method to register tests in this test case
+ // test cases right before running tests in RUN_ALL_TESTS macro.
+ // This method should not be called more then once on any single
+ // instance of a ParameterizedTestCaseInfoBase derived class.
+ // UnitTest has a guard to prevent from calling this method more then once.
+ virtual void RegisterTests() {
+ for (typename TestInfoContainer::iterator test_it = tests_.begin();
+ test_it != tests_.end(); ++test_it) {
+ linked_ptr<TestInfo> test_info = *test_it;
+ for (typename InstantiationContainer::iterator gen_it =
+ instantiations_.begin(); gen_it != instantiations_.end();
+ ++gen_it) {
+ const string& instantiation_name = gen_it->first;
+ ParamGenerator<ParamType> generator((*gen_it->second)());
+
+ string test_case_name;
+ if ( !instantiation_name.empty() )
+ test_case_name = instantiation_name + "/";
+ test_case_name += test_info->test_case_base_name;
+
+ int i = 0;
+ for (typename ParamGenerator<ParamType>::iterator param_it =
+ generator.begin();
+ param_it != generator.end(); ++param_it, ++i) {
+ Message test_name_stream;
+ test_name_stream << test_info->test_base_name << "/" << i;
+ MakeAndRegisterTestInfo(
+ test_case_name.c_str(),
+ test_name_stream.GetString().c_str(),
+ NULL, // No type parameter.
+ PrintToString(*param_it).c_str(),
+ GetTestCaseTypeId(),
+ TestCase::SetUpTestCase,
+ TestCase::TearDownTestCase,
+ test_info->test_meta_factory->CreateTestFactory(*param_it));
+ } // for param_it
+ } // for gen_it
+ } // for test_it
+ } // RegisterTests
+
+ private:
+ // LocalTestInfo structure keeps information about a single test registered
+ // with TEST_P macro.
+ struct TestInfo {
+ TestInfo(const char* a_test_case_base_name,
+ const char* a_test_base_name,
+ TestMetaFactoryBase<ParamType>* a_test_meta_factory) :
+ test_case_base_name(a_test_case_base_name),
+ test_base_name(a_test_base_name),
+ test_meta_factory(a_test_meta_factory) {}
+
+ const string test_case_base_name;
+ const string test_base_name;
+ const scoped_ptr<TestMetaFactoryBase<ParamType> > test_meta_factory;
+ };
+ typedef ::std::vector<linked_ptr<TestInfo> > TestInfoContainer;
+ // Keeps pairs of <Instantiation name, Sequence generator creation function>
+ // received from INSTANTIATE_TEST_CASE_P macros.
+ typedef ::std::vector<std::pair<string, GeneratorCreationFunc*> >
+ InstantiationContainer;
+
+ const string test_case_name_;
+ TestInfoContainer tests_;
+ InstantiationContainer instantiations_;
+
+ GTEST_DISALLOW_COPY_AND_ASSIGN_(ParameterizedTestCaseInfo);
+}; // class ParameterizedTestCaseInfo
+
+// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
+//
+// ParameterizedTestCaseRegistry contains a map of ParameterizedTestCaseInfoBase
+// classes accessed by test case names. TEST_P and INSTANTIATE_TEST_CASE_P
+// macros use it to locate their corresponding ParameterizedTestCaseInfo
+// descriptors.
+class ParameterizedTestCaseRegistry {
+ public:
+ ParameterizedTestCaseRegistry() {}
+ ~ParameterizedTestCaseRegistry() {
+ for (TestCaseInfoContainer::iterator it = test_case_infos_.begin();
+ it != test_case_infos_.end(); ++it) {
+ delete *it;
+ }
+ }
+
+ // Looks up or creates and returns a structure containing information about
+ // tests and instantiations of a particular test case.
+ template <class TestCase>
+ ParameterizedTestCaseInfo<TestCase>* GetTestCasePatternHolder(
+ const char* test_case_name,
+ const char* file,
+ int line) {
+ ParameterizedTestCaseInfo<TestCase>* typed_test_info = NULL;
+ for (TestCaseInfoContainer::iterator it = test_case_infos_.begin();
+ it != test_case_infos_.end(); ++it) {
+ if ((*it)->GetTestCaseName() == test_case_name) {
+ if ((*it)->GetTestCaseTypeId() != GetTypeId<TestCase>()) {
+ // Complain about incorrect usage of Google Test facilities
+ // and terminate the program since we cannot guaranty correct
+ // test case setup and tear-down in this case.
+ ReportInvalidTestCaseType(test_case_name, file, line);
+ posix::Abort();
+ } else {
+ // At this point we are sure that the object we found is of the same
+ // type we are looking for, so we downcast it to that type
+ // without further checks.
+ typed_test_info = CheckedDowncastToActualType<
+ ParameterizedTestCaseInfo<TestCase> >(*it);
+ }
+ break;
+ }
+ }
+ if (typed_test_info == NULL) {
+ typed_test_info = new ParameterizedTestCaseInfo<TestCase>(test_case_name);
+ test_case_infos_.push_back(typed_test_info);
+ }
+ return typed_test_info;
+ }
+ void RegisterTests() {
+ for (TestCaseInfoContainer::iterator it = test_case_infos_.begin();
+ it != test_case_infos_.end(); ++it) {
+ (*it)->RegisterTests();
+ }
+ }
+
+ private:
+ typedef ::std::vector<ParameterizedTestCaseInfoBase*> TestCaseInfoContainer;
+
+ TestCaseInfoContainer test_case_infos_;
+
+ GTEST_DISALLOW_COPY_AND_ASSIGN_(ParameterizedTestCaseRegistry);
+};
+
+} // namespace internal
+} // namespace testing
+
+#endif // GTEST_HAS_PARAM_TEST
+
+#endif // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PARAM_UTIL_H_
+// This file was GENERATED by command:
+// pump.py gtest-param-util-generated.h.pump
+// DO NOT EDIT BY HAND!!!
+
+// Copyright 2008 Google Inc.
+// All Rights Reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Author: vladl@google.com (Vlad Losev)
+
+// Type and function utilities for implementing parameterized tests.
+// This file is generated by a SCRIPT. DO NOT EDIT BY HAND!
+//
+// Currently Google Test supports at most 50 arguments in Values,
+// and at most 10 arguments in Combine. Please contact
+// googletestframework@googlegroups.com if you need more.
+// Please note that the number of arguments to Combine is limited
+// by the maximum arity of the implementation of tr1::tuple which is
+// currently set at 10.
+
+#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PARAM_UTIL_GENERATED_H_
+#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PARAM_UTIL_GENERATED_H_
+
+// scripts/fuse_gtest.py depends on gtest's own header being #included
+// *unconditionally*. Therefore these #includes cannot be moved
+// inside #if GTEST_HAS_PARAM_TEST.
+
+#if GTEST_HAS_PARAM_TEST
+
+namespace testing {
+
+// Forward declarations of ValuesIn(), which is implemented in
+// include/gtest/gtest-param-test.h.
+template <typename ForwardIterator>
+internal::ParamGenerator<
+ typename ::testing::internal::IteratorTraits<ForwardIterator>::value_type>
+ValuesIn(ForwardIterator begin, ForwardIterator end);
+
+template <typename T, size_t N>
+internal::ParamGenerator<T> ValuesIn(const T (&array)[N]);
+
+template <class Container>
+internal::ParamGenerator<typename Container::value_type> ValuesIn(
+ const Container& container);
+
+namespace internal {
+
+// Used in the Values() function to provide polymorphic capabilities.
+template <typename T1>
+class ValueArray1 {
+ public:
+ explicit ValueArray1(T1 v1) : v1_(v1) {}
+
+ template <typename T>
+ operator ParamGenerator<T>() const { return ValuesIn(&v1_, &v1_ + 1); }
+
+ private:
+ // No implementation - assignment is unsupported.
+ void operator=(const ValueArray1& other);
+
+ const T1 v1_;
+};
+
+template <typename T1, typename T2>
+class ValueArray2 {
+ public:
+ ValueArray2(T1 v1, T2 v2) : v1_(v1), v2_(v2) {}
+
+ template <typename T>
+ operator ParamGenerator<T>() const {
+ const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_)};
+ return ValuesIn(array);
+ }
+
+ private:
+ // No implementation - assignment is unsupported.
+ void operator=(const ValueArray2& other);
+
+ const T1 v1_;
+ const T2 v2_;
+};
+
+template <typename T1, typename T2, typename T3>
+class ValueArray3 {
+ public:
+ ValueArray3(T1 v1, T2 v2, T3 v3) : v1_(v1), v2_(v2), v3_(v3) {}
+
+ template <typename T>
+ operator ParamGenerator<T>() const {
+ const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
+ static_cast<T>(v3_)};
+ return ValuesIn(array);
+ }
+
+ private:
+ // No implementation - assignment is unsupported.
+ void operator=(const ValueArray3& other);
+
+ const T1 v1_;
+ const T2 v2_;
+ const T3 v3_;
+};
+
+template <typename T1, typename T2, typename T3, typename T4>
+class ValueArray4 {
+ public:
+ ValueArray4(T1 v1, T2 v2, T3 v3, T4 v4) : v1_(v1), v2_(v2), v3_(v3),
+ v4_(v4) {}
+
+ template <typename T>
+ operator ParamGenerator<T>() const {
+ const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
+ static_cast<T>(v3_), static_cast<T>(v4_)};
+ return ValuesIn(array);
+ }
+
+ private:
+ // No implementation - assignment is unsupported.
+ void operator=(const ValueArray4& other);
+
+ const T1 v1_;
+ const T2 v2_;
+ const T3 v3_;
+ const T4 v4_;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5>
+class ValueArray5 {
+ public:
+ ValueArray5(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5) : v1_(v1), v2_(v2), v3_(v3),
+ v4_(v4), v5_(v5) {}
+
+ template <typename T>
+ operator ParamGenerator<T>() const {
+ const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
+ static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_)};
+ return ValuesIn(array);
+ }
+
+ private:
+ // No implementation - assignment is unsupported.
+ void operator=(const ValueArray5& other);
+
+ const T1 v1_;
+ const T2 v2_;
+ const T3 v3_;
+ const T4 v4_;
+ const T5 v5_;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6>
+class ValueArray6 {
+ public:
+ ValueArray6(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6) : v1_(v1), v2_(v2),
+ v3_(v3), v4_(v4), v5_(v5), v6_(v6) {}
+
+ template <typename T>
+ operator ParamGenerator<T>() const {
+ const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
+ static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
+ static_cast<T>(v6_)};
+ return ValuesIn(array);
+ }
+
+ private:
+ // No implementation - assignment is unsupported.
+ void operator=(const ValueArray6& other);
+
+ const T1 v1_;
+ const T2 v2_;
+ const T3 v3_;
+ const T4 v4_;
+ const T5 v5_;
+ const T6 v6_;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7>
+class ValueArray7 {
+ public:
+ ValueArray7(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7) : v1_(v1),
+ v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7) {}
+
+ template <typename T>
+ operator ParamGenerator<T>() const {
+ const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
+ static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
+ static_cast<T>(v6_), static_cast<T>(v7_)};
+ return ValuesIn(array);
+ }
+
+ private:
+ // No implementation - assignment is unsupported.
+ void operator=(const ValueArray7& other);
+
+ const T1 v1_;
+ const T2 v2_;
+ const T3 v3_;
+ const T4 v4_;
+ const T5 v5_;
+ const T6 v6_;
+ const T7 v7_;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8>
+class ValueArray8 {
+ public:
+ ValueArray8(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7,
+ T8 v8) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7),
+ v8_(v8) {}
+
+ template <typename T>
+ operator ParamGenerator<T>() const {
+ const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
+ static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
+ static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_)};
+ return ValuesIn(array);
+ }
+
+ private:
+ // No implementation - assignment is unsupported.
+ void operator=(const ValueArray8& other);
+
+ const T1 v1_;
+ const T2 v2_;
+ const T3 v3_;
+ const T4 v4_;
+ const T5 v5_;
+ const T6 v6_;
+ const T7 v7_;
+ const T8 v8_;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9>
+class ValueArray9 {
+ public:
+ ValueArray9(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8,
+ T9 v9) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7),
+ v8_(v8), v9_(v9) {}
+
+ template <typename T>
+ operator ParamGenerator<T>() const {
+ const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
+ static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
+ static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
+ static_cast<T>(v9_)};
+ return ValuesIn(array);
+ }
+
+ private:
+ // No implementation - assignment is unsupported.
+ void operator=(const ValueArray9& other);
+
+ const T1 v1_;
+ const T2 v2_;
+ const T3 v3_;
+ const T4 v4_;
+ const T5 v5_;
+ const T6 v6_;
+ const T7 v7_;
+ const T8 v8_;
+ const T9 v9_;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10>
+class ValueArray10 {
+ public:
+ ValueArray10(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+ T10 v10) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7),
+ v8_(v8), v9_(v9), v10_(v10) {}
+
+ template <typename T>
+ operator ParamGenerator<T>() const {
+ const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
+ static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
+ static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
+ static_cast<T>(v9_), static_cast<T>(v10_)};
+ return ValuesIn(array);
+ }
+
+ private:
+ // No implementation - assignment is unsupported.
+ void operator=(const ValueArray10& other);
+
+ const T1 v1_;
+ const T2 v2_;
+ const T3 v3_;
+ const T4 v4_;
+ const T5 v5_;
+ const T6 v6_;
+ const T7 v7_;
+ const T8 v8_;
+ const T9 v9_;
+ const T10 v10_;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11>
+class ValueArray11 {
+ public:
+ ValueArray11(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+ T10 v10, T11 v11) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6),
+ v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11) {}
+
+ template <typename T>
+ operator ParamGenerator<T>() const {
+ const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
+ static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
+ static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
+ static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_)};
+ return ValuesIn(array);
+ }
+
+ private:
+ // No implementation - assignment is unsupported.
+ void operator=(const ValueArray11& other);
+
+ const T1 v1_;
+ const T2 v2_;
+ const T3 v3_;
+ const T4 v4_;
+ const T5 v5_;
+ const T6 v6_;
+ const T7 v7_;
+ const T8 v8_;
+ const T9 v9_;
+ const T10 v10_;
+ const T11 v11_;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12>
+class ValueArray12 {
+ public:
+ ValueArray12(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+ T10 v10, T11 v11, T12 v12) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5),
+ v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12) {}
+
+ template <typename T>
+ operator ParamGenerator<T>() const {
+ const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
+ static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
+ static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
+ static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
+ static_cast<T>(v12_)};
+ return ValuesIn(array);
+ }
+
+ private:
+ // No implementation - assignment is unsupported.
+ void operator=(const ValueArray12& other);
+
+ const T1 v1_;
+ const T2 v2_;
+ const T3 v3_;
+ const T4 v4_;
+ const T5 v5_;
+ const T6 v6_;
+ const T7 v7_;
+ const T8 v8_;
+ const T9 v9_;
+ const T10 v10_;
+ const T11 v11_;
+ const T12 v12_;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13>
+class ValueArray13 {
+ public:
+ ValueArray13(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+ T10 v10, T11 v11, T12 v12, T13 v13) : v1_(v1), v2_(v2), v3_(v3), v4_(v4),
+ v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11),
+ v12_(v12), v13_(v13) {}
+
+ template <typename T>
+ operator ParamGenerator<T>() const {
+ const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
+ static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
+ static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
+ static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
+ static_cast<T>(v12_), static_cast<T>(v13_)};
+ return ValuesIn(array);
+ }
+
+ private:
+ // No implementation - assignment is unsupported.
+ void operator=(const ValueArray13& other);
+
+ const T1 v1_;
+ const T2 v2_;
+ const T3 v3_;
+ const T4 v4_;
+ const T5 v5_;
+ const T6 v6_;
+ const T7 v7_;
+ const T8 v8_;
+ const T9 v9_;
+ const T10 v10_;
+ const T11 v11_;
+ const T12 v12_;
+ const T13 v13_;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14>
+class ValueArray14 {
+ public:
+ ValueArray14(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+ T10 v10, T11 v11, T12 v12, T13 v13, T14 v14) : v1_(v1), v2_(v2), v3_(v3),
+ v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10),
+ v11_(v11), v12_(v12), v13_(v13), v14_(v14) {}
+
+ template <typename T>
+ operator ParamGenerator<T>() const {
+ const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
+ static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
+ static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
+ static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
+ static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_)};
+ return ValuesIn(array);
+ }
+
+ private:
+ // No implementation - assignment is unsupported.
+ void operator=(const ValueArray14& other);
+
+ const T1 v1_;
+ const T2 v2_;
+ const T3 v3_;
+ const T4 v4_;
+ const T5 v5_;
+ const T6 v6_;
+ const T7 v7_;
+ const T8 v8_;
+ const T9 v9_;
+ const T10 v10_;
+ const T11 v11_;
+ const T12 v12_;
+ const T13 v13_;
+ const T14 v14_;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15>
+class ValueArray15 {
+ public:
+ ValueArray15(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+ T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15) : v1_(v1), v2_(v2),
+ v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10),
+ v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15) {}
+
+ template <typename T>
+ operator ParamGenerator<T>() const {
+ const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
+ static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
+ static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
+ static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
+ static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
+ static_cast<T>(v15_)};
+ return ValuesIn(array);
+ }
+
+ private:
+ // No implementation - assignment is unsupported.
+ void operator=(const ValueArray15& other);
+
+ const T1 v1_;
+ const T2 v2_;
+ const T3 v3_;
+ const T4 v4_;
+ const T5 v5_;
+ const T6 v6_;
+ const T7 v7_;
+ const T8 v8_;
+ const T9 v9_;
+ const T10 v10_;
+ const T11 v11_;
+ const T12 v12_;
+ const T13 v13_;
+ const T14 v14_;
+ const T15 v15_;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15,
+ typename T16>
+class ValueArray16 {
+ public:
+ ValueArray16(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+ T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16) : v1_(v1),
+ v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9),
+ v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15),
+ v16_(v16) {}
+
+ template <typename T>
+ operator ParamGenerator<T>() const {
+ const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
+ static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
+ static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
+ static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
+ static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
+ static_cast<T>(v15_), static_cast<T>(v16_)};
+ return ValuesIn(array);
+ }
+
+ private:
+ // No implementation - assignment is unsupported.
+ void operator=(const ValueArray16& other);
+
+ const T1 v1_;
+ const T2 v2_;
+ const T3 v3_;
+ const T4 v4_;
+ const T5 v5_;
+ const T6 v6_;
+ const T7 v7_;
+ const T8 v8_;
+ const T9 v9_;
+ const T10 v10_;
+ const T11 v11_;
+ const T12 v12_;
+ const T13 v13_;
+ const T14 v14_;
+ const T15 v15_;
+ const T16 v16_;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15,
+ typename T16, typename T17>
+class ValueArray17 {
+ public:
+ ValueArray17(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+ T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16,
+ T17 v17) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7),
+ v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14),
+ v15_(v15), v16_(v16), v17_(v17) {}
+
+ template <typename T>
+ operator ParamGenerator<T>() const {
+ const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
+ static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
+ static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
+ static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
+ static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
+ static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_)};
+ return ValuesIn(array);
+ }
+
+ private:
+ // No implementation - assignment is unsupported.
+ void operator=(const ValueArray17& other);
+
+ const T1 v1_;
+ const T2 v2_;
+ const T3 v3_;
+ const T4 v4_;
+ const T5 v5_;
+ const T6 v6_;
+ const T7 v7_;
+ const T8 v8_;
+ const T9 v9_;
+ const T10 v10_;
+ const T11 v11_;
+ const T12 v12_;
+ const T13 v13_;
+ const T14 v14_;
+ const T15 v15_;
+ const T16 v16_;
+ const T17 v17_;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15,
+ typename T16, typename T17, typename T18>
+class ValueArray18 {
+ public:
+ ValueArray18(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+ T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
+ T18 v18) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7),
+ v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14),
+ v15_(v15), v16_(v16), v17_(v17), v18_(v18) {}
+
+ template <typename T>
+ operator ParamGenerator<T>() const {
+ const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
+ static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
+ static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
+ static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
+ static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
+ static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
+ static_cast<T>(v18_)};
+ return ValuesIn(array);
+ }
+
+ private:
+ // No implementation - assignment is unsupported.
+ void operator=(const ValueArray18& other);
+
+ const T1 v1_;
+ const T2 v2_;
+ const T3 v3_;
+ const T4 v4_;
+ const T5 v5_;
+ const T6 v6_;
+ const T7 v7_;
+ const T8 v8_;
+ const T9 v9_;
+ const T10 v10_;
+ const T11 v11_;
+ const T12 v12_;
+ const T13 v13_;
+ const T14 v14_;
+ const T15 v15_;
+ const T16 v16_;
+ const T17 v17_;
+ const T18 v18_;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15,
+ typename T16, typename T17, typename T18, typename T19>
+class ValueArray19 {
+ public:
+ ValueArray19(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+ T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
+ T18 v18, T19 v19) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6),
+ v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13),
+ v14_(v14), v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19) {}
+
+ template <typename T>
+ operator ParamGenerator<T>() const {
+ const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
+ static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
+ static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
+ static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
+ static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
+ static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
+ static_cast<T>(v18_), static_cast<T>(v19_)};
+ return ValuesIn(array);
+ }
+
+ private:
+ // No implementation - assignment is unsupported.
+ void operator=(const ValueArray19& other);
+
+ const T1 v1_;
+ const T2 v2_;
+ const T3 v3_;
+ const T4 v4_;
+ const T5 v5_;
+ const T6 v6_;
+ const T7 v7_;
+ const T8 v8_;
+ const T9 v9_;
+ const T10 v10_;
+ const T11 v11_;
+ const T12 v12_;
+ const T13 v13_;
+ const T14 v14_;
+ const T15 v15_;
+ const T16 v16_;
+ const T17 v17_;
+ const T18 v18_;
+ const T19 v19_;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15,
+ typename T16, typename T17, typename T18, typename T19, typename T20>
+class ValueArray20 {
+ public:
+ ValueArray20(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+ T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
+ T18 v18, T19 v19, T20 v20) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5),
+ v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12),
+ v13_(v13), v14_(v14), v15_(v15), v16_(v16), v17_(v17), v18_(v18),
+ v19_(v19), v20_(v20) {}
+
+ template <typename T>
+ operator ParamGenerator<T>() const {
+ const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
+ static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
+ static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
+ static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
+ static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
+ static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
+ static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_)};
+ return ValuesIn(array);
+ }
+
+ private:
+ // No implementation - assignment is unsupported.
+ void operator=(const ValueArray20& other);
+
+ const T1 v1_;
+ const T2 v2_;
+ const T3 v3_;
+ const T4 v4_;
+ const T5 v5_;
+ const T6 v6_;
+ const T7 v7_;
+ const T8 v8_;
+ const T9 v9_;
+ const T10 v10_;
+ const T11 v11_;
+ const T12 v12_;
+ const T13 v13_;
+ const T14 v14_;
+ const T15 v15_;
+ const T16 v16_;
+ const T17 v17_;
+ const T18 v18_;
+ const T19 v19_;
+ const T20 v20_;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15,
+ typename T16, typename T17, typename T18, typename T19, typename T20,
+ typename T21>
+class ValueArray21 {
+ public:
+ ValueArray21(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+ T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
+ T18 v18, T19 v19, T20 v20, T21 v21) : v1_(v1), v2_(v2), v3_(v3), v4_(v4),
+ v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11),
+ v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16), v17_(v17),
+ v18_(v18), v19_(v19), v20_(v20), v21_(v21) {}
+
+ template <typename T>
+ operator ParamGenerator<T>() const {
+ const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
+ static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
+ static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
+ static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
+ static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
+ static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
+ static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
+ static_cast<T>(v21_)};
+ return ValuesIn(array);
+ }
+
+ private:
+ // No implementation - assignment is unsupported.
+ void operator=(const ValueArray21& other);
+
+ const T1 v1_;
+ const T2 v2_;
+ const T3 v3_;
+ const T4 v4_;
+ const T5 v5_;
+ const T6 v6_;
+ const T7 v7_;
+ const T8 v8_;
+ const T9 v9_;
+ const T10 v10_;
+ const T11 v11_;
+ const T12 v12_;
+ const T13 v13_;
+ const T14 v14_;
+ const T15 v15_;
+ const T16 v16_;
+ const T17 v17_;
+ const T18 v18_;
+ const T19 v19_;
+ const T20 v20_;
+ const T21 v21_;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15,
+ typename T16, typename T17, typename T18, typename T19, typename T20,
+ typename T21, typename T22>
+class ValueArray22 {
+ public:
+ ValueArray22(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+ T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
+ T18 v18, T19 v19, T20 v20, T21 v21, T22 v22) : v1_(v1), v2_(v2), v3_(v3),
+ v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10),
+ v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16),
+ v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22) {}
+
+ template <typename T>
+ operator ParamGenerator<T>() const {
+ const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
+ static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
+ static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
+ static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
+ static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
+ static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
+ static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
+ static_cast<T>(v21_), static_cast<T>(v22_)};
+ return ValuesIn(array);
+ }
+
+ private:
+ // No implementation - assignment is unsupported.
+ void operator=(const ValueArray22& other);
+
+ const T1 v1_;
+ const T2 v2_;
+ const T3 v3_;
+ const T4 v4_;
+ const T5 v5_;
+ const T6 v6_;
+ const T7 v7_;
+ const T8 v8_;
+ const T9 v9_;
+ const T10 v10_;
+ const T11 v11_;
+ const T12 v12_;
+ const T13 v13_;
+ const T14 v14_;
+ const T15 v15_;
+ const T16 v16_;
+ const T17 v17_;
+ const T18 v18_;
+ const T19 v19_;
+ const T20 v20_;
+ const T21 v21_;
+ const T22 v22_;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15,
+ typename T16, typename T17, typename T18, typename T19, typename T20,
+ typename T21, typename T22, typename T23>
+class ValueArray23 {
+ public:
+ ValueArray23(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+ T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
+ T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23) : v1_(v1), v2_(v2),
+ v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10),
+ v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16),
+ v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22),
+ v23_(v23) {}
+
+ template <typename T>
+ operator ParamGenerator<T>() const {
+ const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
+ static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
+ static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
+ static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
+ static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
+ static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
+ static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
+ static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_)};
+ return ValuesIn(array);
+ }
+
+ private:
+ // No implementation - assignment is unsupported.
+ void operator=(const ValueArray23& other);
+
+ const T1 v1_;
+ const T2 v2_;
+ const T3 v3_;
+ const T4 v4_;
+ const T5 v5_;
+ const T6 v6_;
+ const T7 v7_;
+ const T8 v8_;
+ const T9 v9_;
+ const T10 v10_;
+ const T11 v11_;
+ const T12 v12_;
+ const T13 v13_;
+ const T14 v14_;
+ const T15 v15_;
+ const T16 v16_;
+ const T17 v17_;
+ const T18 v18_;
+ const T19 v19_;
+ const T20 v20_;
+ const T21 v21_;
+ const T22 v22_;
+ const T23 v23_;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15,
+ typename T16, typename T17, typename T18, typename T19, typename T20,
+ typename T21, typename T22, typename T23, typename T24>
+class ValueArray24 {
+ public:
+ ValueArray24(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+ T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
+ T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24) : v1_(v1),
+ v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9),
+ v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15),
+ v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21),
+ v22_(v22), v23_(v23), v24_(v24) {}
+
+ template <typename T>
+ operator ParamGenerator<T>() const {
+ const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
+ static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
+ static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
+ static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
+ static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
+ static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
+ static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
+ static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
+ static_cast<T>(v24_)};
+ return ValuesIn(array);
+ }
+
+ private:
+ // No implementation - assignment is unsupported.
+ void operator=(const ValueArray24& other);
+
+ const T1 v1_;
+ const T2 v2_;
+ const T3 v3_;
+ const T4 v4_;
+ const T5 v5_;
+ const T6 v6_;
+ const T7 v7_;
+ const T8 v8_;
+ const T9 v9_;
+ const T10 v10_;
+ const T11 v11_;
+ const T12 v12_;
+ const T13 v13_;
+ const T14 v14_;
+ const T15 v15_;
+ const T16 v16_;
+ const T17 v17_;
+ const T18 v18_;
+ const T19 v19_;
+ const T20 v20_;
+ const T21 v21_;
+ const T22 v22_;
+ const T23 v23_;
+ const T24 v24_;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15,
+ typename T16, typename T17, typename T18, typename T19, typename T20,
+ typename T21, typename T22, typename T23, typename T24, typename T25>
+class ValueArray25 {
+ public:
+ ValueArray25(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+ T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
+ T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24,
+ T25 v25) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7),
+ v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14),
+ v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20),
+ v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25) {}
+
+ template <typename T>
+ operator ParamGenerator<T>() const {
+ const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
+ static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
+ static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
+ static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
+ static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
+ static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
+ static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
+ static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
+ static_cast<T>(v24_), static_cast<T>(v25_)};
+ return ValuesIn(array);
+ }
+
+ private:
+ // No implementation - assignment is unsupported.
+ void operator=(const ValueArray25& other);
+
+ const T1 v1_;
+ const T2 v2_;
+ const T3 v3_;
+ const T4 v4_;
+ const T5 v5_;
+ const T6 v6_;
+ const T7 v7_;
+ const T8 v8_;
+ const T9 v9_;
+ const T10 v10_;
+ const T11 v11_;
+ const T12 v12_;
+ const T13 v13_;
+ const T14 v14_;
+ const T15 v15_;
+ const T16 v16_;
+ const T17 v17_;
+ const T18 v18_;
+ const T19 v19_;
+ const T20 v20_;
+ const T21 v21_;
+ const T22 v22_;
+ const T23 v23_;
+ const T24 v24_;
+ const T25 v25_;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15,
+ typename T16, typename T17, typename T18, typename T19, typename T20,
+ typename T21, typename T22, typename T23, typename T24, typename T25,
+ typename T26>
+class ValueArray26 {
+ public:
+ ValueArray26(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+ T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
+ T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
+ T26 v26) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7),
+ v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14),
+ v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20),
+ v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25), v26_(v26) {}
+
+ template <typename T>
+ operator ParamGenerator<T>() const {
+ const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
+ static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
+ static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
+ static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
+ static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
+ static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
+ static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
+ static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
+ static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_)};
+ return ValuesIn(array);
+ }
+
+ private:
+ // No implementation - assignment is unsupported.
+ void operator=(const ValueArray26& other);
+
+ const T1 v1_;
+ const T2 v2_;
+ const T3 v3_;
+ const T4 v4_;
+ const T5 v5_;
+ const T6 v6_;
+ const T7 v7_;
+ const T8 v8_;
+ const T9 v9_;
+ const T10 v10_;
+ const T11 v11_;
+ const T12 v12_;
+ const T13 v13_;
+ const T14 v14_;
+ const T15 v15_;
+ const T16 v16_;
+ const T17 v17_;
+ const T18 v18_;
+ const T19 v19_;
+ const T20 v20_;
+ const T21 v21_;
+ const T22 v22_;
+ const T23 v23_;
+ const T24 v24_;
+ const T25 v25_;
+ const T26 v26_;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15,
+ typename T16, typename T17, typename T18, typename T19, typename T20,
+ typename T21, typename T22, typename T23, typename T24, typename T25,
+ typename T26, typename T27>
+class ValueArray27 {
+ public:
+ ValueArray27(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+ T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
+ T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
+ T26 v26, T27 v27) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6),
+ v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13),
+ v14_(v14), v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19),
+ v20_(v20), v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25),
+ v26_(v26), v27_(v27) {}
+
+ template <typename T>
+ operator ParamGenerator<T>() const {
+ const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
+ static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
+ static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
+ static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
+ static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
+ static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
+ static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
+ static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
+ static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
+ static_cast<T>(v27_)};
+ return ValuesIn(array);
+ }
+
+ private:
+ // No implementation - assignment is unsupported.
+ void operator=(const ValueArray27& other);
+
+ const T1 v1_;
+ const T2 v2_;
+ const T3 v3_;
+ const T4 v4_;
+ const T5 v5_;
+ const T6 v6_;
+ const T7 v7_;
+ const T8 v8_;
+ const T9 v9_;
+ const T10 v10_;
+ const T11 v11_;
+ const T12 v12_;
+ const T13 v13_;
+ const T14 v14_;
+ const T15 v15_;
+ const T16 v16_;
+ const T17 v17_;
+ const T18 v18_;
+ const T19 v19_;
+ const T20 v20_;
+ const T21 v21_;
+ const T22 v22_;
+ const T23 v23_;
+ const T24 v24_;
+ const T25 v25_;
+ const T26 v26_;
+ const T27 v27_;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15,
+ typename T16, typename T17, typename T18, typename T19, typename T20,
+ typename T21, typename T22, typename T23, typename T24, typename T25,
+ typename T26, typename T27, typename T28>
+class ValueArray28 {
+ public:
+ ValueArray28(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+ T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
+ T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
+ T26 v26, T27 v27, T28 v28) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5),
+ v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12),
+ v13_(v13), v14_(v14), v15_(v15), v16_(v16), v17_(v17), v18_(v18),
+ v19_(v19), v20_(v20), v21_(v21), v22_(v22), v23_(v23), v24_(v24),
+ v25_(v25), v26_(v26), v27_(v27), v28_(v28) {}
+
+ template <typename T>
+ operator ParamGenerator<T>() const {
+ const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
+ static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
+ static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
+ static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
+ static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
+ static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
+ static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
+ static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
+ static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
+ static_cast<T>(v27_), static_cast<T>(v28_)};
+ return ValuesIn(array);
+ }
+
+ private:
+ // No implementation - assignment is unsupported.
+ void operator=(const ValueArray28& other);
+
+ const T1 v1_;
+ const T2 v2_;
+ const T3 v3_;
+ const T4 v4_;
+ const T5 v5_;
+ const T6 v6_;
+ const T7 v7_;
+ const T8 v8_;
+ const T9 v9_;
+ const T10 v10_;
+ const T11 v11_;
+ const T12 v12_;
+ const T13 v13_;
+ const T14 v14_;
+ const T15 v15_;
+ const T16 v16_;
+ const T17 v17_;
+ const T18 v18_;
+ const T19 v19_;
+ const T20 v20_;
+ const T21 v21_;
+ const T22 v22_;
+ const T23 v23_;
+ const T24 v24_;
+ const T25 v25_;
+ const T26 v26_;
+ const T27 v27_;
+ const T28 v28_;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15,
+ typename T16, typename T17, typename T18, typename T19, typename T20,
+ typename T21, typename T22, typename T23, typename T24, typename T25,
+ typename T26, typename T27, typename T28, typename T29>
+class ValueArray29 {
+ public:
+ ValueArray29(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+ T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
+ T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
+ T26 v26, T27 v27, T28 v28, T29 v29) : v1_(v1), v2_(v2), v3_(v3), v4_(v4),
+ v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11),
+ v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16), v17_(v17),
+ v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22), v23_(v23),
+ v24_(v24), v25_(v25), v26_(v26), v27_(v27), v28_(v28), v29_(v29) {}
+
+ template <typename T>
+ operator ParamGenerator<T>() const {
+ const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
+ static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
+ static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
+ static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
+ static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
+ static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
+ static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
+ static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
+ static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
+ static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_)};
+ return ValuesIn(array);
+ }
+
+ private:
+ // No implementation - assignment is unsupported.
+ void operator=(const ValueArray29& other);
+
+ const T1 v1_;
+ const T2 v2_;
+ const T3 v3_;
+ const T4 v4_;
+ const T5 v5_;
+ const T6 v6_;
+ const T7 v7_;
+ const T8 v8_;
+ const T9 v9_;
+ const T10 v10_;
+ const T11 v11_;
+ const T12 v12_;
+ const T13 v13_;
+ const T14 v14_;
+ const T15 v15_;
+ const T16 v16_;
+ const T17 v17_;
+ const T18 v18_;
+ const T19 v19_;
+ const T20 v20_;
+ const T21 v21_;
+ const T22 v22_;
+ const T23 v23_;
+ const T24 v24_;
+ const T25 v25_;
+ const T26 v26_;
+ const T27 v27_;
+ const T28 v28_;
+ const T29 v29_;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15,
+ typename T16, typename T17, typename T18, typename T19, typename T20,
+ typename T21, typename T22, typename T23, typename T24, typename T25,
+ typename T26, typename T27, typename T28, typename T29, typename T30>
+class ValueArray30 {
+ public:
+ ValueArray30(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+ T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
+ T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
+ T26 v26, T27 v27, T28 v28, T29 v29, T30 v30) : v1_(v1), v2_(v2), v3_(v3),
+ v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10),
+ v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16),
+ v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22),
+ v23_(v23), v24_(v24), v25_(v25), v26_(v26), v27_(v27), v28_(v28),
+ v29_(v29), v30_(v30) {}
+
+ template <typename T>
+ operator ParamGenerator<T>() const {
+ const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
+ static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
+ static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
+ static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
+ static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
+ static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
+ static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
+ static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
+ static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
+ static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_),
+ static_cast<T>(v30_)};
+ return ValuesIn(array);
+ }
+
+ private:
+ // No implementation - assignment is unsupported.
+ void operator=(const ValueArray30& other);
+
+ const T1 v1_;
+ const T2 v2_;
+ const T3 v3_;
+ const T4 v4_;
+ const T5 v5_;
+ const T6 v6_;
+ const T7 v7_;
+ const T8 v8_;
+ const T9 v9_;
+ const T10 v10_;
+ const T11 v11_;
+ const T12 v12_;
+ const T13 v13_;
+ const T14 v14_;
+ const T15 v15_;
+ const T16 v16_;
+ const T17 v17_;
+ const T18 v18_;
+ const T19 v19_;
+ const T20 v20_;
+ const T21 v21_;
+ const T22 v22_;
+ const T23 v23_;
+ const T24 v24_;
+ const T25 v25_;
+ const T26 v26_;
+ const T27 v27_;
+ const T28 v28_;
+ const T29 v29_;
+ const T30 v30_;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15,
+ typename T16, typename T17, typename T18, typename T19, typename T20,
+ typename T21, typename T22, typename T23, typename T24, typename T25,
+ typename T26, typename T27, typename T28, typename T29, typename T30,
+ typename T31>
+class ValueArray31 {
+ public:
+ ValueArray31(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+ T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
+ T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
+ T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31) : v1_(v1), v2_(v2),
+ v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10),
+ v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16),
+ v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22),
+ v23_(v23), v24_(v24), v25_(v25), v26_(v26), v27_(v27), v28_(v28),
+ v29_(v29), v30_(v30), v31_(v31) {}
+
+ template <typename T>
+ operator ParamGenerator<T>() const {
+ const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
+ static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
+ static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
+ static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
+ static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
+ static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
+ static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
+ static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
+ static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
+ static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_),
+ static_cast<T>(v30_), static_cast<T>(v31_)};
+ return ValuesIn(array);
+ }
+
+ private:
+ // No implementation - assignment is unsupported.
+ void operator=(const ValueArray31& other);
+
+ const T1 v1_;
+ const T2 v2_;
+ const T3 v3_;
+ const T4 v4_;
+ const T5 v5_;
+ const T6 v6_;
+ const T7 v7_;
+ const T8 v8_;
+ const T9 v9_;
+ const T10 v10_;
+ const T11 v11_;
+ const T12 v12_;
+ const T13 v13_;
+ const T14 v14_;
+ const T15 v15_;
+ const T16 v16_;
+ const T17 v17_;
+ const T18 v18_;
+ const T19 v19_;
+ const T20 v20_;
+ const T21 v21_;
+ const T22 v22_;
+ const T23 v23_;
+ const T24 v24_;
+ const T25 v25_;
+ const T26 v26_;
+ const T27 v27_;
+ const T28 v28_;
+ const T29 v29_;
+ const T30 v30_;
+ const T31 v31_;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15,
+ typename T16, typename T17, typename T18, typename T19, typename T20,
+ typename T21, typename T22, typename T23, typename T24, typename T25,
+ typename T26, typename T27, typename T28, typename T29, typename T30,
+ typename T31, typename T32>
+class ValueArray32 {
+ public:
+ ValueArray32(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+ T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
+ T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
+ T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32) : v1_(v1),
+ v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9),
+ v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15),
+ v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21),
+ v22_(v22), v23_(v23), v24_(v24), v25_(v25), v26_(v26), v27_(v27),
+ v28_(v28), v29_(v29), v30_(v30), v31_(v31), v32_(v32) {}
+
+ template <typename T>
+ operator ParamGenerator<T>() const {
+ const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
+ static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
+ static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
+ static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
+ static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
+ static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
+ static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
+ static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
+ static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
+ static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_),
+ static_cast<T>(v30_), static_cast<T>(v31_), static_cast<T>(v32_)};
+ return ValuesIn(array);
+ }
+
+ private:
+ // No implementation - assignment is unsupported.
+ void operator=(const ValueArray32& other);
+
+ const T1 v1_;
+ const T2 v2_;
+ const T3 v3_;
+ const T4 v4_;
+ const T5 v5_;
+ const T6 v6_;
+ const T7 v7_;
+ const T8 v8_;
+ const T9 v9_;
+ const T10 v10_;
+ const T11 v11_;
+ const T12 v12_;
+ const T13 v13_;
+ const T14 v14_;
+ const T15 v15_;
+ const T16 v16_;
+ const T17 v17_;
+ const T18 v18_;
+ const T19 v19_;
+ const T20 v20_;
+ const T21 v21_;
+ const T22 v22_;
+ const T23 v23_;
+ const T24 v24_;
+ const T25 v25_;
+ const T26 v26_;
+ const T27 v27_;
+ const T28 v28_;
+ const T29 v29_;
+ const T30 v30_;
+ const T31 v31_;
+ const T32 v32_;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15,
+ typename T16, typename T17, typename T18, typename T19, typename T20,
+ typename T21, typename T22, typename T23, typename T24, typename T25,
+ typename T26, typename T27, typename T28, typename T29, typename T30,
+ typename T31, typename T32, typename T33>
+class ValueArray33 {
+ public:
+ ValueArray33(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+ T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
+ T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
+ T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32,
+ T33 v33) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7),
+ v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14),
+ v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20),
+ v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25), v26_(v26),
+ v27_(v27), v28_(v28), v29_(v29), v30_(v30), v31_(v31), v32_(v32),
+ v33_(v33) {}
+
+ template <typename T>
+ operator ParamGenerator<T>() const {
+ const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
+ static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
+ static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
+ static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
+ static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
+ static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
+ static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
+ static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
+ static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
+ static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_),
+ static_cast<T>(v30_), static_cast<T>(v31_), static_cast<T>(v32_),
+ static_cast<T>(v33_)};
+ return ValuesIn(array);
+ }
+
+ private:
+ // No implementation - assignment is unsupported.
+ void operator=(const ValueArray33& other);
+
+ const T1 v1_;
+ const T2 v2_;
+ const T3 v3_;
+ const T4 v4_;
+ const T5 v5_;
+ const T6 v6_;
+ const T7 v7_;
+ const T8 v8_;
+ const T9 v9_;
+ const T10 v10_;
+ const T11 v11_;
+ const T12 v12_;
+ const T13 v13_;
+ const T14 v14_;
+ const T15 v15_;
+ const T16 v16_;
+ const T17 v17_;
+ const T18 v18_;
+ const T19 v19_;
+ const T20 v20_;
+ const T21 v21_;
+ const T22 v22_;
+ const T23 v23_;
+ const T24 v24_;
+ const T25 v25_;
+ const T26 v26_;
+ const T27 v27_;
+ const T28 v28_;
+ const T29 v29_;
+ const T30 v30_;
+ const T31 v31_;
+ const T32 v32_;
+ const T33 v33_;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15,
+ typename T16, typename T17, typename T18, typename T19, typename T20,
+ typename T21, typename T22, typename T23, typename T24, typename T25,
+ typename T26, typename T27, typename T28, typename T29, typename T30,
+ typename T31, typename T32, typename T33, typename T34>
+class ValueArray34 {
+ public:
+ ValueArray34(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+ T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
+ T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
+ T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
+ T34 v34) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7),
+ v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14),
+ v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20),
+ v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25), v26_(v26),
+ v27_(v27), v28_(v28), v29_(v29), v30_(v30), v31_(v31), v32_(v32),
+ v33_(v33), v34_(v34) {}
+
+ template <typename T>
+ operator ParamGenerator<T>() const {
+ const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
+ static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
+ static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
+ static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
+ static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
+ static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
+ static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
+ static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
+ static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
+ static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_),
+ static_cast<T>(v30_), static_cast<T>(v31_), static_cast<T>(v32_),
+ static_cast<T>(v33_), static_cast<T>(v34_)};
+ return ValuesIn(array);
+ }
+
+ private:
+ // No implementation - assignment is unsupported.
+ void operator=(const ValueArray34& other);
+
+ const T1 v1_;
+ const T2 v2_;
+ const T3 v3_;
+ const T4 v4_;
+ const T5 v5_;
+ const T6 v6_;
+ const T7 v7_;
+ const T8 v8_;
+ const T9 v9_;
+ const T10 v10_;
+ const T11 v11_;
+ const T12 v12_;
+ const T13 v13_;
+ const T14 v14_;
+ const T15 v15_;
+ const T16 v16_;
+ const T17 v17_;
+ const T18 v18_;
+ const T19 v19_;
+ const T20 v20_;
+ const T21 v21_;
+ const T22 v22_;
+ const T23 v23_;
+ const T24 v24_;
+ const T25 v25_;
+ const T26 v26_;
+ const T27 v27_;
+ const T28 v28_;
+ const T29 v29_;
+ const T30 v30_;
+ const T31 v31_;
+ const T32 v32_;
+ const T33 v33_;
+ const T34 v34_;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15,
+ typename T16, typename T17, typename T18, typename T19, typename T20,
+ typename T21, typename T22, typename T23, typename T24, typename T25,
+ typename T26, typename T27, typename T28, typename T29, typename T30,
+ typename T31, typename T32, typename T33, typename T34, typename T35>
+class ValueArray35 {
+ public:
+ ValueArray35(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+ T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
+ T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
+ T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
+ T34 v34, T35 v35) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6),
+ v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13),
+ v14_(v14), v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19),
+ v20_(v20), v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25),
+ v26_(v26), v27_(v27), v28_(v28), v29_(v29), v30_(v30), v31_(v31),
+ v32_(v32), v33_(v33), v34_(v34), v35_(v35) {}
+
+ template <typename T>
+ operator ParamGenerator<T>() const {
+ const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
+ static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
+ static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
+ static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
+ static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
+ static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
+ static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
+ static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
+ static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
+ static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_),
+ static_cast<T>(v30_), static_cast<T>(v31_), static_cast<T>(v32_),
+ static_cast<T>(v33_), static_cast<T>(v34_), static_cast<T>(v35_)};
+ return ValuesIn(array);
+ }
+
+ private:
+ // No implementation - assignment is unsupported.
+ void operator=(const ValueArray35& other);
+
+ const T1 v1_;
+ const T2 v2_;
+ const T3 v3_;
+ const T4 v4_;
+ const T5 v5_;
+ const T6 v6_;
+ const T7 v7_;
+ const T8 v8_;
+ const T9 v9_;
+ const T10 v10_;
+ const T11 v11_;
+ const T12 v12_;
+ const T13 v13_;
+ const T14 v14_;
+ const T15 v15_;
+ const T16 v16_;
+ const T17 v17_;
+ const T18 v18_;
+ const T19 v19_;
+ const T20 v20_;
+ const T21 v21_;
+ const T22 v22_;
+ const T23 v23_;
+ const T24 v24_;
+ const T25 v25_;
+ const T26 v26_;
+ const T27 v27_;
+ const T28 v28_;
+ const T29 v29_;
+ const T30 v30_;
+ const T31 v31_;
+ const T32 v32_;
+ const T33 v33_;
+ const T34 v34_;
+ const T35 v35_;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15,
+ typename T16, typename T17, typename T18, typename T19, typename T20,
+ typename T21, typename T22, typename T23, typename T24, typename T25,
+ typename T26, typename T27, typename T28, typename T29, typename T30,
+ typename T31, typename T32, typename T33, typename T34, typename T35,
+ typename T36>
+class ValueArray36 {
+ public:
+ ValueArray36(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+ T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
+ T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
+ T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
+ T34 v34, T35 v35, T36 v36) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5),
+ v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12),
+ v13_(v13), v14_(v14), v15_(v15), v16_(v16), v17_(v17), v18_(v18),
+ v19_(v19), v20_(v20), v21_(v21), v22_(v22), v23_(v23), v24_(v24),
+ v25_(v25), v26_(v26), v27_(v27), v28_(v28), v29_(v29), v30_(v30),
+ v31_(v31), v32_(v32), v33_(v33), v34_(v34), v35_(v35), v36_(v36) {}
+
+ template <typename T>
+ operator ParamGenerator<T>() const {
+ const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
+ static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
+ static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
+ static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
+ static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
+ static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
+ static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
+ static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
+ static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
+ static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_),
+ static_cast<T>(v30_), static_cast<T>(v31_), static_cast<T>(v32_),
+ static_cast<T>(v33_), static_cast<T>(v34_), static_cast<T>(v35_),
+ static_cast<T>(v36_)};
+ return ValuesIn(array);
+ }
+
+ private:
+ // No implementation - assignment is unsupported.
+ void operator=(const ValueArray36& other);
+
+ const T1 v1_;
+ const T2 v2_;
+ const T3 v3_;
+ const T4 v4_;
+ const T5 v5_;
+ const T6 v6_;
+ const T7 v7_;
+ const T8 v8_;
+ const T9 v9_;
+ const T10 v10_;
+ const T11 v11_;
+ const T12 v12_;
+ const T13 v13_;
+ const T14 v14_;
+ const T15 v15_;
+ const T16 v16_;
+ const T17 v17_;
+ const T18 v18_;
+ const T19 v19_;
+ const T20 v20_;
+ const T21 v21_;
+ const T22 v22_;
+ const T23 v23_;
+ const T24 v24_;
+ const T25 v25_;
+ const T26 v26_;
+ const T27 v27_;
+ const T28 v28_;
+ const T29 v29_;
+ const T30 v30_;
+ const T31 v31_;
+ const T32 v32_;
+ const T33 v33_;
+ const T34 v34_;
+ const T35 v35_;
+ const T36 v36_;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15,
+ typename T16, typename T17, typename T18, typename T19, typename T20,
+ typename T21, typename T22, typename T23, typename T24, typename T25,
+ typename T26, typename T27, typename T28, typename T29, typename T30,
+ typename T31, typename T32, typename T33, typename T34, typename T35,
+ typename T36, typename T37>
+class ValueArray37 {
+ public:
+ ValueArray37(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+ T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
+ T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
+ T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
+ T34 v34, T35 v35, T36 v36, T37 v37) : v1_(v1), v2_(v2), v3_(v3), v4_(v4),
+ v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11),
+ v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16), v17_(v17),
+ v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22), v23_(v23),
+ v24_(v24), v25_(v25), v26_(v26), v27_(v27), v28_(v28), v29_(v29),
+ v30_(v30), v31_(v31), v32_(v32), v33_(v33), v34_(v34), v35_(v35),
+ v36_(v36), v37_(v37) {}
+
+ template <typename T>
+ operator ParamGenerator<T>() const {
+ const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
+ static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
+ static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
+ static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
+ static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
+ static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
+ static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
+ static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
+ static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
+ static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_),
+ static_cast<T>(v30_), static_cast<T>(v31_), static_cast<T>(v32_),
+ static_cast<T>(v33_), static_cast<T>(v34_), static_cast<T>(v35_),
+ static_cast<T>(v36_), static_cast<T>(v37_)};
+ return ValuesIn(array);
+ }
+
+ private:
+ // No implementation - assignment is unsupported.
+ void operator=(const ValueArray37& other);
+
+ const T1 v1_;
+ const T2 v2_;
+ const T3 v3_;
+ const T4 v4_;
+ const T5 v5_;
+ const T6 v6_;
+ const T7 v7_;
+ const T8 v8_;
+ const T9 v9_;
+ const T10 v10_;
+ const T11 v11_;
+ const T12 v12_;
+ const T13 v13_;
+ const T14 v14_;
+ const T15 v15_;
+ const T16 v16_;
+ const T17 v17_;
+ const T18 v18_;
+ const T19 v19_;
+ const T20 v20_;
+ const T21 v21_;
+ const T22 v22_;
+ const T23 v23_;
+ const T24 v24_;
+ const T25 v25_;
+ const T26 v26_;
+ const T27 v27_;
+ const T28 v28_;
+ const T29 v29_;
+ const T30 v30_;
+ const T31 v31_;
+ const T32 v32_;
+ const T33 v33_;
+ const T34 v34_;
+ const T35 v35_;
+ const T36 v36_;
+ const T37 v37_;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15,
+ typename T16, typename T17, typename T18, typename T19, typename T20,
+ typename T21, typename T22, typename T23, typename T24, typename T25,
+ typename T26, typename T27, typename T28, typename T29, typename T30,
+ typename T31, typename T32, typename T33, typename T34, typename T35,
+ typename T36, typename T37, typename T38>
+class ValueArray38 {
+ public:
+ ValueArray38(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+ T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
+ T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
+ T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
+ T34 v34, T35 v35, T36 v36, T37 v37, T38 v38) : v1_(v1), v2_(v2), v3_(v3),
+ v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10),
+ v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16),
+ v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22),
+ v23_(v23), v24_(v24), v25_(v25), v26_(v26), v27_(v27), v28_(v28),
+ v29_(v29), v30_(v30), v31_(v31), v32_(v32), v33_(v33), v34_(v34),
+ v35_(v35), v36_(v36), v37_(v37), v38_(v38) {}
+
+ template <typename T>
+ operator ParamGenerator<T>() const {
+ const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
+ static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
+ static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
+ static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
+ static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
+ static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
+ static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
+ static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
+ static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
+ static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_),
+ static_cast<T>(v30_), static_cast<T>(v31_), static_cast<T>(v32_),
+ static_cast<T>(v33_), static_cast<T>(v34_), static_cast<T>(v35_),
+ static_cast<T>(v36_), static_cast<T>(v37_), static_cast<T>(v38_)};
+ return ValuesIn(array);
+ }
+
+ private:
+ // No implementation - assignment is unsupported.
+ void operator=(const ValueArray38& other);
+
+ const T1 v1_;
+ const T2 v2_;
+ const T3 v3_;
+ const T4 v4_;
+ const T5 v5_;
+ const T6 v6_;
+ const T7 v7_;
+ const T8 v8_;
+ const T9 v9_;
+ const T10 v10_;
+ const T11 v11_;
+ const T12 v12_;
+ const T13 v13_;
+ const T14 v14_;
+ const T15 v15_;
+ const T16 v16_;
+ const T17 v17_;
+ const T18 v18_;
+ const T19 v19_;
+ const T20 v20_;
+ const T21 v21_;
+ const T22 v22_;
+ const T23 v23_;
+ const T24 v24_;
+ const T25 v25_;
+ const T26 v26_;
+ const T27 v27_;
+ const T28 v28_;
+ const T29 v29_;
+ const T30 v30_;
+ const T31 v31_;
+ const T32 v32_;
+ const T33 v33_;
+ const T34 v34_;
+ const T35 v35_;
+ const T36 v36_;
+ const T37 v37_;
+ const T38 v38_;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15,
+ typename T16, typename T17, typename T18, typename T19, typename T20,
+ typename T21, typename T22, typename T23, typename T24, typename T25,
+ typename T26, typename T27, typename T28, typename T29, typename T30,
+ typename T31, typename T32, typename T33, typename T34, typename T35,
+ typename T36, typename T37, typename T38, typename T39>
+class ValueArray39 {
+ public:
+ ValueArray39(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+ T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
+ T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
+ T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
+ T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39) : v1_(v1), v2_(v2),
+ v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10),
+ v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16),
+ v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22),
+ v23_(v23), v24_(v24), v25_(v25), v26_(v26), v27_(v27), v28_(v28),
+ v29_(v29), v30_(v30), v31_(v31), v32_(v32), v33_(v33), v34_(v34),
+ v35_(v35), v36_(v36), v37_(v37), v38_(v38), v39_(v39) {}
+
+ template <typename T>
+ operator ParamGenerator<T>() const {
+ const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
+ static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
+ static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
+ static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
+ static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
+ static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
+ static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
+ static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
+ static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
+ static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_),
+ static_cast<T>(v30_), static_cast<T>(v31_), static_cast<T>(v32_),
+ static_cast<T>(v33_), static_cast<T>(v34_), static_cast<T>(v35_),
+ static_cast<T>(v36_), static_cast<T>(v37_), static_cast<T>(v38_),
+ static_cast<T>(v39_)};
+ return ValuesIn(array);
+ }
+
+ private:
+ // No implementation - assignment is unsupported.
+ void operator=(const ValueArray39& other);
+
+ const T1 v1_;
+ const T2 v2_;
+ const T3 v3_;
+ const T4 v4_;
+ const T5 v5_;
+ const T6 v6_;
+ const T7 v7_;
+ const T8 v8_;
+ const T9 v9_;
+ const T10 v10_;
+ const T11 v11_;
+ const T12 v12_;
+ const T13 v13_;
+ const T14 v14_;
+ const T15 v15_;
+ const T16 v16_;
+ const T17 v17_;
+ const T18 v18_;
+ const T19 v19_;
+ const T20 v20_;
+ const T21 v21_;
+ const T22 v22_;
+ const T23 v23_;
+ const T24 v24_;
+ const T25 v25_;
+ const T26 v26_;
+ const T27 v27_;
+ const T28 v28_;
+ const T29 v29_;
+ const T30 v30_;
+ const T31 v31_;
+ const T32 v32_;
+ const T33 v33_;
+ const T34 v34_;
+ const T35 v35_;
+ const T36 v36_;
+ const T37 v37_;
+ const T38 v38_;
+ const T39 v39_;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15,
+ typename T16, typename T17, typename T18, typename T19, typename T20,
+ typename T21, typename T22, typename T23, typename T24, typename T25,
+ typename T26, typename T27, typename T28, typename T29, typename T30,
+ typename T31, typename T32, typename T33, typename T34, typename T35,
+ typename T36, typename T37, typename T38, typename T39, typename T40>
+class ValueArray40 {
+ public:
+ ValueArray40(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+ T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
+ T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
+ T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
+ T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40) : v1_(v1),
+ v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9),
+ v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15),
+ v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21),
+ v22_(v22), v23_(v23), v24_(v24), v25_(v25), v26_(v26), v27_(v27),
+ v28_(v28), v29_(v29), v30_(v30), v31_(v31), v32_(v32), v33_(v33),
+ v34_(v34), v35_(v35), v36_(v36), v37_(v37), v38_(v38), v39_(v39),
+ v40_(v40) {}
+
+ template <typename T>
+ operator ParamGenerator<T>() const {
+ const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
+ static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
+ static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
+ static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
+ static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
+ static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
+ static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
+ static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
+ static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
+ static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_),
+ static_cast<T>(v30_), static_cast<T>(v31_), static_cast<T>(v32_),
+ static_cast<T>(v33_), static_cast<T>(v34_), static_cast<T>(v35_),
+ static_cast<T>(v36_), static_cast<T>(v37_), static_cast<T>(v38_),
+ static_cast<T>(v39_), static_cast<T>(v40_)};
+ return ValuesIn(array);
+ }
+
+ private:
+ // No implementation - assignment is unsupported.
+ void operator=(const ValueArray40& other);
+
+ const T1 v1_;
+ const T2 v2_;
+ const T3 v3_;
+ const T4 v4_;
+ const T5 v5_;
+ const T6 v6_;
+ const T7 v7_;
+ const T8 v8_;
+ const T9 v9_;
+ const T10 v10_;
+ const T11 v11_;
+ const T12 v12_;
+ const T13 v13_;
+ const T14 v14_;
+ const T15 v15_;
+ const T16 v16_;
+ const T17 v17_;
+ const T18 v18_;
+ const T19 v19_;
+ const T20 v20_;
+ const T21 v21_;
+ const T22 v22_;
+ const T23 v23_;
+ const T24 v24_;
+ const T25 v25_;
+ const T26 v26_;
+ const T27 v27_;
+ const T28 v28_;
+ const T29 v29_;
+ const T30 v30_;
+ const T31 v31_;
+ const T32 v32_;
+ const T33 v33_;
+ const T34 v34_;
+ const T35 v35_;
+ const T36 v36_;
+ const T37 v37_;
+ const T38 v38_;
+ const T39 v39_;
+ const T40 v40_;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15,
+ typename T16, typename T17, typename T18, typename T19, typename T20,
+ typename T21, typename T22, typename T23, typename T24, typename T25,
+ typename T26, typename T27, typename T28, typename T29, typename T30,
+ typename T31, typename T32, typename T33, typename T34, typename T35,
+ typename T36, typename T37, typename T38, typename T39, typename T40,
+ typename T41>
+class ValueArray41 {
+ public:
+ ValueArray41(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+ T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
+ T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
+ T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
+ T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40,
+ T41 v41) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7),
+ v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14),
+ v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20),
+ v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25), v26_(v26),
+ v27_(v27), v28_(v28), v29_(v29), v30_(v30), v31_(v31), v32_(v32),
+ v33_(v33), v34_(v34), v35_(v35), v36_(v36), v37_(v37), v38_(v38),
+ v39_(v39), v40_(v40), v41_(v41) {}
+
+ template <typename T>
+ operator ParamGenerator<T>() const {
+ const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
+ static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
+ static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
+ static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
+ static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
+ static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
+ static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
+ static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
+ static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
+ static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_),
+ static_cast<T>(v30_), static_cast<T>(v31_), static_cast<T>(v32_),
+ static_cast<T>(v33_), static_cast<T>(v34_), static_cast<T>(v35_),
+ static_cast<T>(v36_), static_cast<T>(v37_), static_cast<T>(v38_),
+ static_cast<T>(v39_), static_cast<T>(v40_), static_cast<T>(v41_)};
+ return ValuesIn(array);
+ }
+
+ private:
+ // No implementation - assignment is unsupported.
+ void operator=(const ValueArray41& other);
+
+ const T1 v1_;
+ const T2 v2_;
+ const T3 v3_;
+ const T4 v4_;
+ const T5 v5_;
+ const T6 v6_;
+ const T7 v7_;
+ const T8 v8_;
+ const T9 v9_;
+ const T10 v10_;
+ const T11 v11_;
+ const T12 v12_;
+ const T13 v13_;
+ const T14 v14_;
+ const T15 v15_;
+ const T16 v16_;
+ const T17 v17_;
+ const T18 v18_;
+ const T19 v19_;
+ const T20 v20_;
+ const T21 v21_;
+ const T22 v22_;
+ const T23 v23_;
+ const T24 v24_;
+ const T25 v25_;
+ const T26 v26_;
+ const T27 v27_;
+ const T28 v28_;
+ const T29 v29_;
+ const T30 v30_;
+ const T31 v31_;
+ const T32 v32_;
+ const T33 v33_;
+ const T34 v34_;
+ const T35 v35_;
+ const T36 v36_;
+ const T37 v37_;
+ const T38 v38_;
+ const T39 v39_;
+ const T40 v40_;
+ const T41 v41_;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15,
+ typename T16, typename T17, typename T18, typename T19, typename T20,
+ typename T21, typename T22, typename T23, typename T24, typename T25,
+ typename T26, typename T27, typename T28, typename T29, typename T30,
+ typename T31, typename T32, typename T33, typename T34, typename T35,
+ typename T36, typename T37, typename T38, typename T39, typename T40,
+ typename T41, typename T42>
+class ValueArray42 {
+ public:
+ ValueArray42(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+ T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
+ T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
+ T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
+ T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41,
+ T42 v42) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7),
+ v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14),
+ v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20),
+ v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25), v26_(v26),
+ v27_(v27), v28_(v28), v29_(v29), v30_(v30), v31_(v31), v32_(v32),
+ v33_(v33), v34_(v34), v35_(v35), v36_(v36), v37_(v37), v38_(v38),
+ v39_(v39), v40_(v40), v41_(v41), v42_(v42) {}
+
+ template <typename T>
+ operator ParamGenerator<T>() const {
+ const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
+ static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
+ static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
+ static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
+ static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
+ static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
+ static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
+ static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
+ static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
+ static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_),
+ static_cast<T>(v30_), static_cast<T>(v31_), static_cast<T>(v32_),
+ static_cast<T>(v33_), static_cast<T>(v34_), static_cast<T>(v35_),
+ static_cast<T>(v36_), static_cast<T>(v37_), static_cast<T>(v38_),
+ static_cast<T>(v39_), static_cast<T>(v40_), static_cast<T>(v41_),
+ static_cast<T>(v42_)};
+ return ValuesIn(array);
+ }
+
+ private:
+ // No implementation - assignment is unsupported.
+ void operator=(const ValueArray42& other);
+
+ const T1 v1_;
+ const T2 v2_;
+ const T3 v3_;
+ const T4 v4_;
+ const T5 v5_;
+ const T6 v6_;
+ const T7 v7_;
+ const T8 v8_;
+ const T9 v9_;
+ const T10 v10_;
+ const T11 v11_;
+ const T12 v12_;
+ const T13 v13_;
+ const T14 v14_;
+ const T15 v15_;
+ const T16 v16_;
+ const T17 v17_;
+ const T18 v18_;
+ const T19 v19_;
+ const T20 v20_;
+ const T21 v21_;
+ const T22 v22_;
+ const T23 v23_;
+ const T24 v24_;
+ const T25 v25_;
+ const T26 v26_;
+ const T27 v27_;
+ const T28 v28_;
+ const T29 v29_;
+ const T30 v30_;
+ const T31 v31_;
+ const T32 v32_;
+ const T33 v33_;
+ const T34 v34_;
+ const T35 v35_;
+ const T36 v36_;
+ const T37 v37_;
+ const T38 v38_;
+ const T39 v39_;
+ const T40 v40_;
+ const T41 v41_;
+ const T42 v42_;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15,
+ typename T16, typename T17, typename T18, typename T19, typename T20,
+ typename T21, typename T22, typename T23, typename T24, typename T25,
+ typename T26, typename T27, typename T28, typename T29, typename T30,
+ typename T31, typename T32, typename T33, typename T34, typename T35,
+ typename T36, typename T37, typename T38, typename T39, typename T40,
+ typename T41, typename T42, typename T43>
+class ValueArray43 {
+ public:
+ ValueArray43(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+ T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
+ T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
+ T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
+ T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41,
+ T42 v42, T43 v43) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6),
+ v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13),
+ v14_(v14), v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19),
+ v20_(v20), v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25),
+ v26_(v26), v27_(v27), v28_(v28), v29_(v29), v30_(v30), v31_(v31),
+ v32_(v32), v33_(v33), v34_(v34), v35_(v35), v36_(v36), v37_(v37),
+ v38_(v38), v39_(v39), v40_(v40), v41_(v41), v42_(v42), v43_(v43) {}
+
+ template <typename T>
+ operator ParamGenerator<T>() const {
+ const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
+ static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
+ static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
+ static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
+ static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
+ static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
+ static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
+ static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
+ static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
+ static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_),
+ static_cast<T>(v30_), static_cast<T>(v31_), static_cast<T>(v32_),
+ static_cast<T>(v33_), static_cast<T>(v34_), static_cast<T>(v35_),
+ static_cast<T>(v36_), static_cast<T>(v37_), static_cast<T>(v38_),
+ static_cast<T>(v39_), static_cast<T>(v40_), static_cast<T>(v41_),
+ static_cast<T>(v42_), static_cast<T>(v43_)};
+ return ValuesIn(array);
+ }
+
+ private:
+ // No implementation - assignment is unsupported.
+ void operator=(const ValueArray43& other);
+
+ const T1 v1_;
+ const T2 v2_;
+ const T3 v3_;
+ const T4 v4_;
+ const T5 v5_;
+ const T6 v6_;
+ const T7 v7_;
+ const T8 v8_;
+ const T9 v9_;
+ const T10 v10_;
+ const T11 v11_;
+ const T12 v12_;
+ const T13 v13_;
+ const T14 v14_;
+ const T15 v15_;
+ const T16 v16_;
+ const T17 v17_;
+ const T18 v18_;
+ const T19 v19_;
+ const T20 v20_;
+ const T21 v21_;
+ const T22 v22_;
+ const T23 v23_;
+ const T24 v24_;
+ const T25 v25_;
+ const T26 v26_;
+ const T27 v27_;
+ const T28 v28_;
+ const T29 v29_;
+ const T30 v30_;
+ const T31 v31_;
+ const T32 v32_;
+ const T33 v33_;
+ const T34 v34_;
+ const T35 v35_;
+ const T36 v36_;
+ const T37 v37_;
+ const T38 v38_;
+ const T39 v39_;
+ const T40 v40_;
+ const T41 v41_;
+ const T42 v42_;
+ const T43 v43_;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15,
+ typename T16, typename T17, typename T18, typename T19, typename T20,
+ typename T21, typename T22, typename T23, typename T24, typename T25,
+ typename T26, typename T27, typename T28, typename T29, typename T30,
+ typename T31, typename T32, typename T33, typename T34, typename T35,
+ typename T36, typename T37, typename T38, typename T39, typename T40,
+ typename T41, typename T42, typename T43, typename T44>
+class ValueArray44 {
+ public:
+ ValueArray44(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+ T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
+ T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
+ T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
+ T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41,
+ T42 v42, T43 v43, T44 v44) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5),
+ v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12),
+ v13_(v13), v14_(v14), v15_(v15), v16_(v16), v17_(v17), v18_(v18),
+ v19_(v19), v20_(v20), v21_(v21), v22_(v22), v23_(v23), v24_(v24),
+ v25_(v25), v26_(v26), v27_(v27), v28_(v28), v29_(v29), v30_(v30),
+ v31_(v31), v32_(v32), v33_(v33), v34_(v34), v35_(v35), v36_(v36),
+ v37_(v37), v38_(v38), v39_(v39), v40_(v40), v41_(v41), v42_(v42),
+ v43_(v43), v44_(v44) {}
+
+ template <typename T>
+ operator ParamGenerator<T>() const {
+ const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
+ static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
+ static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
+ static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
+ static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
+ static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
+ static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
+ static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
+ static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
+ static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_),
+ static_cast<T>(v30_), static_cast<T>(v31_), static_cast<T>(v32_),
+ static_cast<T>(v33_), static_cast<T>(v34_), static_cast<T>(v35_),
+ static_cast<T>(v36_), static_cast<T>(v37_), static_cast<T>(v38_),
+ static_cast<T>(v39_), static_cast<T>(v40_), static_cast<T>(v41_),
+ static_cast<T>(v42_), static_cast<T>(v43_), static_cast<T>(v44_)};
+ return ValuesIn(array);
+ }
+
+ private:
+ // No implementation - assignment is unsupported.
+ void operator=(const ValueArray44& other);
+
+ const T1 v1_;
+ const T2 v2_;
+ const T3 v3_;
+ const T4 v4_;
+ const T5 v5_;
+ const T6 v6_;
+ const T7 v7_;
+ const T8 v8_;
+ const T9 v9_;
+ const T10 v10_;
+ const T11 v11_;
+ const T12 v12_;
+ const T13 v13_;
+ const T14 v14_;
+ const T15 v15_;
+ const T16 v16_;
+ const T17 v17_;
+ const T18 v18_;
+ const T19 v19_;
+ const T20 v20_;
+ const T21 v21_;
+ const T22 v22_;
+ const T23 v23_;
+ const T24 v24_;
+ const T25 v25_;
+ const T26 v26_;
+ const T27 v27_;
+ const T28 v28_;
+ const T29 v29_;
+ const T30 v30_;
+ const T31 v31_;
+ const T32 v32_;
+ const T33 v33_;
+ const T34 v34_;
+ const T35 v35_;
+ const T36 v36_;
+ const T37 v37_;
+ const T38 v38_;
+ const T39 v39_;
+ const T40 v40_;
+ const T41 v41_;
+ const T42 v42_;
+ const T43 v43_;
+ const T44 v44_;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15,
+ typename T16, typename T17, typename T18, typename T19, typename T20,
+ typename T21, typename T22, typename T23, typename T24, typename T25,
+ typename T26, typename T27, typename T28, typename T29, typename T30,
+ typename T31, typename T32, typename T33, typename T34, typename T35,
+ typename T36, typename T37, typename T38, typename T39, typename T40,
+ typename T41, typename T42, typename T43, typename T44, typename T45>
+class ValueArray45 {
+ public:
+ ValueArray45(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+ T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
+ T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
+ T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
+ T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41,
+ T42 v42, T43 v43, T44 v44, T45 v45) : v1_(v1), v2_(v2), v3_(v3), v4_(v4),
+ v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11),
+ v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16), v17_(v17),
+ v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22), v23_(v23),
+ v24_(v24), v25_(v25), v26_(v26), v27_(v27), v28_(v28), v29_(v29),
+ v30_(v30), v31_(v31), v32_(v32), v33_(v33), v34_(v34), v35_(v35),
+ v36_(v36), v37_(v37), v38_(v38), v39_(v39), v40_(v40), v41_(v41),
+ v42_(v42), v43_(v43), v44_(v44), v45_(v45) {}
+
+ template <typename T>
+ operator ParamGenerator<T>() const {
+ const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
+ static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
+ static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
+ static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
+ static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
+ static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
+ static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
+ static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
+ static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
+ static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_),
+ static_cast<T>(v30_), static_cast<T>(v31_), static_cast<T>(v32_),
+ static_cast<T>(v33_), static_cast<T>(v34_), static_cast<T>(v35_),
+ static_cast<T>(v36_), static_cast<T>(v37_), static_cast<T>(v38_),
+ static_cast<T>(v39_), static_cast<T>(v40_), static_cast<T>(v41_),
+ static_cast<T>(v42_), static_cast<T>(v43_), static_cast<T>(v44_),
+ static_cast<T>(v45_)};
+ return ValuesIn(array);
+ }
+
+ private:
+ // No implementation - assignment is unsupported.
+ void operator=(const ValueArray45& other);
+
+ const T1 v1_;
+ const T2 v2_;
+ const T3 v3_;
+ const T4 v4_;
+ const T5 v5_;
+ const T6 v6_;
+ const T7 v7_;
+ const T8 v8_;
+ const T9 v9_;
+ const T10 v10_;
+ const T11 v11_;
+ const T12 v12_;
+ const T13 v13_;
+ const T14 v14_;
+ const T15 v15_;
+ const T16 v16_;
+ const T17 v17_;
+ const T18 v18_;
+ const T19 v19_;
+ const T20 v20_;
+ const T21 v21_;
+ const T22 v22_;
+ const T23 v23_;
+ const T24 v24_;
+ const T25 v25_;
+ const T26 v26_;
+ const T27 v27_;
+ const T28 v28_;
+ const T29 v29_;
+ const T30 v30_;
+ const T31 v31_;
+ const T32 v32_;
+ const T33 v33_;
+ const T34 v34_;
+ const T35 v35_;
+ const T36 v36_;
+ const T37 v37_;
+ const T38 v38_;
+ const T39 v39_;
+ const T40 v40_;
+ const T41 v41_;
+ const T42 v42_;
+ const T43 v43_;
+ const T44 v44_;
+ const T45 v45_;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15,
+ typename T16, typename T17, typename T18, typename T19, typename T20,
+ typename T21, typename T22, typename T23, typename T24, typename T25,
+ typename T26, typename T27, typename T28, typename T29, typename T30,
+ typename T31, typename T32, typename T33, typename T34, typename T35,
+ typename T36, typename T37, typename T38, typename T39, typename T40,
+ typename T41, typename T42, typename T43, typename T44, typename T45,
+ typename T46>
+class ValueArray46 {
+ public:
+ ValueArray46(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+ T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
+ T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
+ T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
+ T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41,
+ T42 v42, T43 v43, T44 v44, T45 v45, T46 v46) : v1_(v1), v2_(v2), v3_(v3),
+ v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10),
+ v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16),
+ v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22),
+ v23_(v23), v24_(v24), v25_(v25), v26_(v26), v27_(v27), v28_(v28),
+ v29_(v29), v30_(v30), v31_(v31), v32_(v32), v33_(v33), v34_(v34),
+ v35_(v35), v36_(v36), v37_(v37), v38_(v38), v39_(v39), v40_(v40),
+ v41_(v41), v42_(v42), v43_(v43), v44_(v44), v45_(v45), v46_(v46) {}
+
+ template <typename T>
+ operator ParamGenerator<T>() const {
+ const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
+ static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
+ static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
+ static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
+ static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
+ static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
+ static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
+ static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
+ static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
+ static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_),
+ static_cast<T>(v30_), static_cast<T>(v31_), static_cast<T>(v32_),
+ static_cast<T>(v33_), static_cast<T>(v34_), static_cast<T>(v35_),
+ static_cast<T>(v36_), static_cast<T>(v37_), static_cast<T>(v38_),
+ static_cast<T>(v39_), static_cast<T>(v40_), static_cast<T>(v41_),
+ static_cast<T>(v42_), static_cast<T>(v43_), static_cast<T>(v44_),
+ static_cast<T>(v45_), static_cast<T>(v46_)};
+ return ValuesIn(array);
+ }
+
+ private:
+ // No implementation - assignment is unsupported.
+ void operator=(const ValueArray46& other);
+
+ const T1 v1_;
+ const T2 v2_;
+ const T3 v3_;
+ const T4 v4_;
+ const T5 v5_;
+ const T6 v6_;
+ const T7 v7_;
+ const T8 v8_;
+ const T9 v9_;
+ const T10 v10_;
+ const T11 v11_;
+ const T12 v12_;
+ const T13 v13_;
+ const T14 v14_;
+ const T15 v15_;
+ const T16 v16_;
+ const T17 v17_;
+ const T18 v18_;
+ const T19 v19_;
+ const T20 v20_;
+ const T21 v21_;
+ const T22 v22_;
+ const T23 v23_;
+ const T24 v24_;
+ const T25 v25_;
+ const T26 v26_;
+ const T27 v27_;
+ const T28 v28_;
+ const T29 v29_;
+ const T30 v30_;
+ const T31 v31_;
+ const T32 v32_;
+ const T33 v33_;
+ const T34 v34_;
+ const T35 v35_;
+ const T36 v36_;
+ const T37 v37_;
+ const T38 v38_;
+ const T39 v39_;
+ const T40 v40_;
+ const T41 v41_;
+ const T42 v42_;
+ const T43 v43_;
+ const T44 v44_;
+ const T45 v45_;
+ const T46 v46_;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15,
+ typename T16, typename T17, typename T18, typename T19, typename T20,
+ typename T21, typename T22, typename T23, typename T24, typename T25,
+ typename T26, typename T27, typename T28, typename T29, typename T30,
+ typename T31, typename T32, typename T33, typename T34, typename T35,
+ typename T36, typename T37, typename T38, typename T39, typename T40,
+ typename T41, typename T42, typename T43, typename T44, typename T45,
+ typename T46, typename T47>
+class ValueArray47 {
+ public:
+ ValueArray47(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+ T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
+ T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
+ T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
+ T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41,
+ T42 v42, T43 v43, T44 v44, T45 v45, T46 v46, T47 v47) : v1_(v1), v2_(v2),
+ v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10),
+ v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16),
+ v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22),
+ v23_(v23), v24_(v24), v25_(v25), v26_(v26), v27_(v27), v28_(v28),
+ v29_(v29), v30_(v30), v31_(v31), v32_(v32), v33_(v33), v34_(v34),
+ v35_(v35), v36_(v36), v37_(v37), v38_(v38), v39_(v39), v40_(v40),
+ v41_(v41), v42_(v42), v43_(v43), v44_(v44), v45_(v45), v46_(v46),
+ v47_(v47) {}
+
+ template <typename T>
+ operator ParamGenerator<T>() const {
+ const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
+ static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
+ static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
+ static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
+ static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
+ static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
+ static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
+ static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
+ static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
+ static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_),
+ static_cast<T>(v30_), static_cast<T>(v31_), static_cast<T>(v32_),
+ static_cast<T>(v33_), static_cast<T>(v34_), static_cast<T>(v35_),
+ static_cast<T>(v36_), static_cast<T>(v37_), static_cast<T>(v38_),
+ static_cast<T>(v39_), static_cast<T>(v40_), static_cast<T>(v41_),
+ static_cast<T>(v42_), static_cast<T>(v43_), static_cast<T>(v44_),
+ static_cast<T>(v45_), static_cast<T>(v46_), static_cast<T>(v47_)};
+ return ValuesIn(array);
+ }
+
+ private:
+ // No implementation - assignment is unsupported.
+ void operator=(const ValueArray47& other);
+
+ const T1 v1_;
+ const T2 v2_;
+ const T3 v3_;
+ const T4 v4_;
+ const T5 v5_;
+ const T6 v6_;
+ const T7 v7_;
+ const T8 v8_;
+ const T9 v9_;
+ const T10 v10_;
+ const T11 v11_;
+ const T12 v12_;
+ const T13 v13_;
+ const T14 v14_;
+ const T15 v15_;
+ const T16 v16_;
+ const T17 v17_;
+ const T18 v18_;
+ const T19 v19_;
+ const T20 v20_;
+ const T21 v21_;
+ const T22 v22_;
+ const T23 v23_;
+ const T24 v24_;
+ const T25 v25_;
+ const T26 v26_;
+ const T27 v27_;
+ const T28 v28_;
+ const T29 v29_;
+ const T30 v30_;
+ const T31 v31_;
+ const T32 v32_;
+ const T33 v33_;
+ const T34 v34_;
+ const T35 v35_;
+ const T36 v36_;
+ const T37 v37_;
+ const T38 v38_;
+ const T39 v39_;
+ const T40 v40_;
+ const T41 v41_;
+ const T42 v42_;
+ const T43 v43_;
+ const T44 v44_;
+ const T45 v45_;
+ const T46 v46_;
+ const T47 v47_;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15,
+ typename T16, typename T17, typename T18, typename T19, typename T20,
+ typename T21, typename T22, typename T23, typename T24, typename T25,
+ typename T26, typename T27, typename T28, typename T29, typename T30,
+ typename T31, typename T32, typename T33, typename T34, typename T35,
+ typename T36, typename T37, typename T38, typename T39, typename T40,
+ typename T41, typename T42, typename T43, typename T44, typename T45,
+ typename T46, typename T47, typename T48>
+class ValueArray48 {
+ public:
+ ValueArray48(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+ T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
+ T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
+ T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
+ T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41,
+ T42 v42, T43 v43, T44 v44, T45 v45, T46 v46, T47 v47, T48 v48) : v1_(v1),
+ v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9),
+ v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15),
+ v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21),
+ v22_(v22), v23_(v23), v24_(v24), v25_(v25), v26_(v26), v27_(v27),
+ v28_(v28), v29_(v29), v30_(v30), v31_(v31), v32_(v32), v33_(v33),
+ v34_(v34), v35_(v35), v36_(v36), v37_(v37), v38_(v38), v39_(v39),
+ v40_(v40), v41_(v41), v42_(v42), v43_(v43), v44_(v44), v45_(v45),
+ v46_(v46), v47_(v47), v48_(v48) {}
+
+ template <typename T>
+ operator ParamGenerator<T>() const {
+ const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
+ static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
+ static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
+ static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
+ static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
+ static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
+ static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
+ static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
+ static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
+ static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_),
+ static_cast<T>(v30_), static_cast<T>(v31_), static_cast<T>(v32_),
+ static_cast<T>(v33_), static_cast<T>(v34_), static_cast<T>(v35_),
+ static_cast<T>(v36_), static_cast<T>(v37_), static_cast<T>(v38_),
+ static_cast<T>(v39_), static_cast<T>(v40_), static_cast<T>(v41_),
+ static_cast<T>(v42_), static_cast<T>(v43_), static_cast<T>(v44_),
+ static_cast<T>(v45_), static_cast<T>(v46_), static_cast<T>(v47_),
+ static_cast<T>(v48_)};
+ return ValuesIn(array);
+ }
+
+ private:
+ // No implementation - assignment is unsupported.
+ void operator=(const ValueArray48& other);
+
+ const T1 v1_;
+ const T2 v2_;
+ const T3 v3_;
+ const T4 v4_;
+ const T5 v5_;
+ const T6 v6_;
+ const T7 v7_;
+ const T8 v8_;
+ const T9 v9_;
+ const T10 v10_;
+ const T11 v11_;
+ const T12 v12_;
+ const T13 v13_;
+ const T14 v14_;
+ const T15 v15_;
+ const T16 v16_;
+ const T17 v17_;
+ const T18 v18_;
+ const T19 v19_;
+ const T20 v20_;
+ const T21 v21_;
+ const T22 v22_;
+ const T23 v23_;
+ const T24 v24_;
+ const T25 v25_;
+ const T26 v26_;
+ const T27 v27_;
+ const T28 v28_;
+ const T29 v29_;
+ const T30 v30_;
+ const T31 v31_;
+ const T32 v32_;
+ const T33 v33_;
+ const T34 v34_;
+ const T35 v35_;
+ const T36 v36_;
+ const T37 v37_;
+ const T38 v38_;
+ const T39 v39_;
+ const T40 v40_;
+ const T41 v41_;
+ const T42 v42_;
+ const T43 v43_;
+ const T44 v44_;
+ const T45 v45_;
+ const T46 v46_;
+ const T47 v47_;
+ const T48 v48_;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15,
+ typename T16, typename T17, typename T18, typename T19, typename T20,
+ typename T21, typename T22, typename T23, typename T24, typename T25,
+ typename T26, typename T27, typename T28, typename T29, typename T30,
+ typename T31, typename T32, typename T33, typename T34, typename T35,
+ typename T36, typename T37, typename T38, typename T39, typename T40,
+ typename T41, typename T42, typename T43, typename T44, typename T45,
+ typename T46, typename T47, typename T48, typename T49>
+class ValueArray49 {
+ public:
+ ValueArray49(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+ T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
+ T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
+ T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
+ T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41,
+ T42 v42, T43 v43, T44 v44, T45 v45, T46 v46, T47 v47, T48 v48,
+ T49 v49) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7),
+ v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14),
+ v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20),
+ v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25), v26_(v26),
+ v27_(v27), v28_(v28), v29_(v29), v30_(v30), v31_(v31), v32_(v32),
+ v33_(v33), v34_(v34), v35_(v35), v36_(v36), v37_(v37), v38_(v38),
+ v39_(v39), v40_(v40), v41_(v41), v42_(v42), v43_(v43), v44_(v44),
+ v45_(v45), v46_(v46), v47_(v47), v48_(v48), v49_(v49) {}
+
+ template <typename T>
+ operator ParamGenerator<T>() const {
+ const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
+ static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
+ static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
+ static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
+ static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
+ static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
+ static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
+ static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
+ static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
+ static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_),
+ static_cast<T>(v30_), static_cast<T>(v31_), static_cast<T>(v32_),
+ static_cast<T>(v33_), static_cast<T>(v34_), static_cast<T>(v35_),
+ static_cast<T>(v36_), static_cast<T>(v37_), static_cast<T>(v38_),
+ static_cast<T>(v39_), static_cast<T>(v40_), static_cast<T>(v41_),
+ static_cast<T>(v42_), static_cast<T>(v43_), static_cast<T>(v44_),
+ static_cast<T>(v45_), static_cast<T>(v46_), static_cast<T>(v47_),
+ static_cast<T>(v48_), static_cast<T>(v49_)};
+ return ValuesIn(array);
+ }
+
+ private:
+ // No implementation - assignment is unsupported.
+ void operator=(const ValueArray49& other);
+
+ const T1 v1_;
+ const T2 v2_;
+ const T3 v3_;
+ const T4 v4_;
+ const T5 v5_;
+ const T6 v6_;
+ const T7 v7_;
+ const T8 v8_;
+ const T9 v9_;
+ const T10 v10_;
+ const T11 v11_;
+ const T12 v12_;
+ const T13 v13_;
+ const T14 v14_;
+ const T15 v15_;
+ const T16 v16_;
+ const T17 v17_;
+ const T18 v18_;
+ const T19 v19_;
+ const T20 v20_;
+ const T21 v21_;
+ const T22 v22_;
+ const T23 v23_;
+ const T24 v24_;
+ const T25 v25_;
+ const T26 v26_;
+ const T27 v27_;
+ const T28 v28_;
+ const T29 v29_;
+ const T30 v30_;
+ const T31 v31_;
+ const T32 v32_;
+ const T33 v33_;
+ const T34 v34_;
+ const T35 v35_;
+ const T36 v36_;
+ const T37 v37_;
+ const T38 v38_;
+ const T39 v39_;
+ const T40 v40_;
+ const T41 v41_;
+ const T42 v42_;
+ const T43 v43_;
+ const T44 v44_;
+ const T45 v45_;
+ const T46 v46_;
+ const T47 v47_;
+ const T48 v48_;
+ const T49 v49_;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15,
+ typename T16, typename T17, typename T18, typename T19, typename T20,
+ typename T21, typename T22, typename T23, typename T24, typename T25,
+ typename T26, typename T27, typename T28, typename T29, typename T30,
+ typename T31, typename T32, typename T33, typename T34, typename T35,
+ typename T36, typename T37, typename T38, typename T39, typename T40,
+ typename T41, typename T42, typename T43, typename T44, typename T45,
+ typename T46, typename T47, typename T48, typename T49, typename T50>
+class ValueArray50 {
+ public:
+ ValueArray50(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+ T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
+ T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
+ T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
+ T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41,
+ T42 v42, T43 v43, T44 v44, T45 v45, T46 v46, T47 v47, T48 v48, T49 v49,
+ T50 v50) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7),
+ v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14),
+ v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20),
+ v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25), v26_(v26),
+ v27_(v27), v28_(v28), v29_(v29), v30_(v30), v31_(v31), v32_(v32),
+ v33_(v33), v34_(v34), v35_(v35), v36_(v36), v37_(v37), v38_(v38),
+ v39_(v39), v40_(v40), v41_(v41), v42_(v42), v43_(v43), v44_(v44),
+ v45_(v45), v46_(v46), v47_(v47), v48_(v48), v49_(v49), v50_(v50) {}
+
+ template <typename T>
+ operator ParamGenerator<T>() const {
+ const T array[] = {static_cast<T>(v1_), static_cast<T>(v2_),
+ static_cast<T>(v3_), static_cast<T>(v4_), static_cast<T>(v5_),
+ static_cast<T>(v6_), static_cast<T>(v7_), static_cast<T>(v8_),
+ static_cast<T>(v9_), static_cast<T>(v10_), static_cast<T>(v11_),
+ static_cast<T>(v12_), static_cast<T>(v13_), static_cast<T>(v14_),
+ static_cast<T>(v15_), static_cast<T>(v16_), static_cast<T>(v17_),
+ static_cast<T>(v18_), static_cast<T>(v19_), static_cast<T>(v20_),
+ static_cast<T>(v21_), static_cast<T>(v22_), static_cast<T>(v23_),
+ static_cast<T>(v24_), static_cast<T>(v25_), static_cast<T>(v26_),
+ static_cast<T>(v27_), static_cast<T>(v28_), static_cast<T>(v29_),
+ static_cast<T>(v30_), static_cast<T>(v31_), static_cast<T>(v32_),
+ static_cast<T>(v33_), static_cast<T>(v34_), static_cast<T>(v35_),
+ static_cast<T>(v36_), static_cast<T>(v37_), static_cast<T>(v38_),
+ static_cast<T>(v39_), static_cast<T>(v40_), static_cast<T>(v41_),
+ static_cast<T>(v42_), static_cast<T>(v43_), static_cast<T>(v44_),
+ static_cast<T>(v45_), static_cast<T>(v46_), static_cast<T>(v47_),
+ static_cast<T>(v48_), static_cast<T>(v49_), static_cast<T>(v50_)};
+ return ValuesIn(array);
+ }
+
+ private:
+ // No implementation - assignment is unsupported.
+ void operator=(const ValueArray50& other);
+
+ const T1 v1_;
+ const T2 v2_;
+ const T3 v3_;
+ const T4 v4_;
+ const T5 v5_;
+ const T6 v6_;
+ const T7 v7_;
+ const T8 v8_;
+ const T9 v9_;
+ const T10 v10_;
+ const T11 v11_;
+ const T12 v12_;
+ const T13 v13_;
+ const T14 v14_;
+ const T15 v15_;
+ const T16 v16_;
+ const T17 v17_;
+ const T18 v18_;
+ const T19 v19_;
+ const T20 v20_;
+ const T21 v21_;
+ const T22 v22_;
+ const T23 v23_;
+ const T24 v24_;
+ const T25 v25_;
+ const T26 v26_;
+ const T27 v27_;
+ const T28 v28_;
+ const T29 v29_;
+ const T30 v30_;
+ const T31 v31_;
+ const T32 v32_;
+ const T33 v33_;
+ const T34 v34_;
+ const T35 v35_;
+ const T36 v36_;
+ const T37 v37_;
+ const T38 v38_;
+ const T39 v39_;
+ const T40 v40_;
+ const T41 v41_;
+ const T42 v42_;
+ const T43 v43_;
+ const T44 v44_;
+ const T45 v45_;
+ const T46 v46_;
+ const T47 v47_;
+ const T48 v48_;
+ const T49 v49_;
+ const T50 v50_;
+};
+
+# if GTEST_HAS_COMBINE
+// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
+//
+// Generates values from the Cartesian product of values produced
+// by the argument generators.
+//
+template <typename T1, typename T2>
+class CartesianProductGenerator2
+ : public ParamGeneratorInterface< ::std::tr1::tuple<T1, T2> > {
+ public:
+ typedef ::std::tr1::tuple<T1, T2> ParamType;
+
+ CartesianProductGenerator2(const ParamGenerator<T1>& g1,
+ const ParamGenerator<T2>& g2)
+ : g1_(g1), g2_(g2) {}
+ virtual ~CartesianProductGenerator2() {}
+
+ virtual ParamIteratorInterface<ParamType>* Begin() const {
+ return new Iterator(this, g1_, g1_.begin(), g2_, g2_.begin());
+ }
+ virtual ParamIteratorInterface<ParamType>* End() const {
+ return new Iterator(this, g1_, g1_.end(), g2_, g2_.end());
+ }
+
+ private:
+ class Iterator : public ParamIteratorInterface<ParamType> {
+ public:
+ Iterator(const ParamGeneratorInterface<ParamType>* base,
+ const ParamGenerator<T1>& g1,
+ const typename ParamGenerator<T1>::iterator& current1,
+ const ParamGenerator<T2>& g2,
+ const typename ParamGenerator<T2>::iterator& current2)
+ : base_(base),
+ begin1_(g1.begin()), end1_(g1.end()), current1_(current1),
+ begin2_(g2.begin()), end2_(g2.end()), current2_(current2) {
+ ComputeCurrentValue();
+ }
+ virtual ~Iterator() {}
+
+ virtual const ParamGeneratorInterface<ParamType>* BaseGenerator() const {
+ return base_;
+ }
+ // Advance should not be called on beyond-of-range iterators
+ // so no component iterators must be beyond end of range, either.
+ virtual void Advance() {
+ assert(!AtEnd());
+ ++current2_;
+ if (current2_ == end2_) {
+ current2_ = begin2_;
+ ++current1_;
+ }
+ ComputeCurrentValue();
+ }
+ virtual ParamIteratorInterface<ParamType>* Clone() const {
+ return new Iterator(*this);
+ }
+ virtual const ParamType* Current() const { return &current_value_; }
+ virtual bool Equals(const ParamIteratorInterface<ParamType>& other) const {
+ // Having the same base generator guarantees that the other
+ // iterator is of the same type and we can downcast.
+ GTEST_CHECK_(BaseGenerator() == other.BaseGenerator())
+ << "The program attempted to compare iterators "
+ << "from different generators." << std::endl;
+ const Iterator* typed_other =
+ CheckedDowncastToActualType<const Iterator>(&other);
+ // We must report iterators equal if they both point beyond their
+ // respective ranges. That can happen in a variety of fashions,
+ // so we have to consult AtEnd().
+ return (AtEnd() && typed_other->AtEnd()) ||
+ (
+ current1_ == typed_other->current1_ &&
+ current2_ == typed_other->current2_);
+ }
+
+ private:
+ Iterator(const Iterator& other)
+ : base_(other.base_),
+ begin1_(other.begin1_),
+ end1_(other.end1_),
+ current1_(other.current1_),
+ begin2_(other.begin2_),
+ end2_(other.end2_),
+ current2_(other.current2_) {
+ ComputeCurrentValue();
+ }
+
+ void ComputeCurrentValue() {
+ if (!AtEnd())
+ current_value_ = ParamType(*current1_, *current2_);
+ }
+ bool AtEnd() const {
+ // We must report iterator past the end of the range when either of the
+ // component iterators has reached the end of its range.
+ return
+ current1_ == end1_ ||
+ current2_ == end2_;
+ }
+
+ // No implementation - assignment is unsupported.
+ void operator=(const Iterator& other);
+
+ const ParamGeneratorInterface<ParamType>* const base_;
+ // begin[i]_ and end[i]_ define the i-th range that Iterator traverses.
+ // current[i]_ is the actual traversing iterator.
+ const typename ParamGenerator<T1>::iterator begin1_;
+ const typename ParamGenerator<T1>::iterator end1_;
+ typename ParamGenerator<T1>::iterator current1_;
+ const typename ParamGenerator<T2>::iterator begin2_;
+ const typename ParamGenerator<T2>::iterator end2_;
+ typename ParamGenerator<T2>::iterator current2_;
+ ParamType current_value_;
+ }; // class CartesianProductGenerator2::Iterator
+
+ // No implementation - assignment is unsupported.
+ void operator=(const CartesianProductGenerator2& other);
+
+ const ParamGenerator<T1> g1_;
+ const ParamGenerator<T2> g2_;
+}; // class CartesianProductGenerator2
+
+
+template <typename T1, typename T2, typename T3>
+class CartesianProductGenerator3
+ : public ParamGeneratorInterface< ::std::tr1::tuple<T1, T2, T3> > {
+ public:
+ typedef ::std::tr1::tuple<T1, T2, T3> ParamType;
+
+ CartesianProductGenerator3(const ParamGenerator<T1>& g1,
+ const ParamGenerator<T2>& g2, const ParamGenerator<T3>& g3)
+ : g1_(g1), g2_(g2), g3_(g3) {}
+ virtual ~CartesianProductGenerator3() {}
+
+ virtual ParamIteratorInterface<ParamType>* Begin() const {
+ return new Iterator(this, g1_, g1_.begin(), g2_, g2_.begin(), g3_,
+ g3_.begin());
+ }
+ virtual ParamIteratorInterface<ParamType>* End() const {
+ return new Iterator(this, g1_, g1_.end(), g2_, g2_.end(), g3_, g3_.end());
+ }
+
+ private:
+ class Iterator : public ParamIteratorInterface<ParamType> {
+ public:
+ Iterator(const ParamGeneratorInterface<ParamType>* base,
+ const ParamGenerator<T1>& g1,
+ const typename ParamGenerator<T1>::iterator& current1,
+ const ParamGenerator<T2>& g2,
+ const typename ParamGenerator<T2>::iterator& current2,
+ const ParamGenerator<T3>& g3,
+ const typename ParamGenerator<T3>::iterator& current3)
+ : base_(base),
+ begin1_(g1.begin()), end1_(g1.end()), current1_(current1),
+ begin2_(g2.begin()), end2_(g2.end()), current2_(current2),
+ begin3_(g3.begin()), end3_(g3.end()), current3_(current3) {
+ ComputeCurrentValue();
+ }
+ virtual ~Iterator() {}
+
+ virtual const ParamGeneratorInterface<ParamType>* BaseGenerator() const {
+ return base_;
+ }
+ // Advance should not be called on beyond-of-range iterators
+ // so no component iterators must be beyond end of range, either.
+ virtual void Advance() {
+ assert(!AtEnd());
+ ++current3_;
+ if (current3_ == end3_) {
+ current3_ = begin3_;
+ ++current2_;
+ }
+ if (current2_ == end2_) {
+ current2_ = begin2_;
+ ++current1_;
+ }
+ ComputeCurrentValue();
+ }
+ virtual ParamIteratorInterface<ParamType>* Clone() const {
+ return new Iterator(*this);
+ }
+ virtual const ParamType* Current() const { return &current_value_; }
+ virtual bool Equals(const ParamIteratorInterface<ParamType>& other) const {
+ // Having the same base generator guarantees that the other
+ // iterator is of the same type and we can downcast.
+ GTEST_CHECK_(BaseGenerator() == other.BaseGenerator())
+ << "The program attempted to compare iterators "
+ << "from different generators." << std::endl;
+ const Iterator* typed_other =
+ CheckedDowncastToActualType<const Iterator>(&other);
+ // We must report iterators equal if they both point beyond their
+ // respective ranges. That can happen in a variety of fashions,
+ // so we have to consult AtEnd().
+ return (AtEnd() && typed_other->AtEnd()) ||
+ (
+ current1_ == typed_other->current1_ &&
+ current2_ == typed_other->current2_ &&
+ current3_ == typed_other->current3_);
+ }
+
+ private:
+ Iterator(const Iterator& other)
+ : base_(other.base_),
+ begin1_(other.begin1_),
+ end1_(other.end1_),
+ current1_(other.current1_),
+ begin2_(other.begin2_),
+ end2_(other.end2_),
+ current2_(other.current2_),
+ begin3_(other.begin3_),
+ end3_(other.end3_),
+ current3_(other.current3_) {
+ ComputeCurrentValue();
+ }
+
+ void ComputeCurrentValue() {
+ if (!AtEnd())
+ current_value_ = ParamType(*current1_, *current2_, *current3_);
+ }
+ bool AtEnd() const {
+ // We must report iterator past the end of the range when either of the
+ // component iterators has reached the end of its range.
+ return
+ current1_ == end1_ ||
+ current2_ == end2_ ||
+ current3_ == end3_;
+ }
+
+ // No implementation - assignment is unsupported.
+ void operator=(const Iterator& other);
+
+ const ParamGeneratorInterface<ParamType>* const base_;
+ // begin[i]_ and end[i]_ define the i-th range that Iterator traverses.
+ // current[i]_ is the actual traversing iterator.
+ const typename ParamGenerator<T1>::iterator begin1_;
+ const typename ParamGenerator<T1>::iterator end1_;
+ typename ParamGenerator<T1>::iterator current1_;
+ const typename ParamGenerator<T2>::iterator begin2_;
+ const typename ParamGenerator<T2>::iterator end2_;
+ typename ParamGenerator<T2>::iterator current2_;
+ const typename ParamGenerator<T3>::iterator begin3_;
+ const typename ParamGenerator<T3>::iterator end3_;
+ typename ParamGenerator<T3>::iterator current3_;
+ ParamType current_value_;
+ }; // class CartesianProductGenerator3::Iterator
+
+ // No implementation - assignment is unsupported.
+ void operator=(const CartesianProductGenerator3& other);
+
+ const ParamGenerator<T1> g1_;
+ const ParamGenerator<T2> g2_;
+ const ParamGenerator<T3> g3_;
+}; // class CartesianProductGenerator3
+
+
+template <typename T1, typename T2, typename T3, typename T4>
+class CartesianProductGenerator4
+ : public ParamGeneratorInterface< ::std::tr1::tuple<T1, T2, T3, T4> > {
+ public:
+ typedef ::std::tr1::tuple<T1, T2, T3, T4> ParamType;
+
+ CartesianProductGenerator4(const ParamGenerator<T1>& g1,
+ const ParamGenerator<T2>& g2, const ParamGenerator<T3>& g3,
+ const ParamGenerator<T4>& g4)
+ : g1_(g1), g2_(g2), g3_(g3), g4_(g4) {}
+ virtual ~CartesianProductGenerator4() {}
+
+ virtual ParamIteratorInterface<ParamType>* Begin() const {
+ return new Iterator(this, g1_, g1_.begin(), g2_, g2_.begin(), g3_,
+ g3_.begin(), g4_, g4_.begin());
+ }
+ virtual ParamIteratorInterface<ParamType>* End() const {
+ return new Iterator(this, g1_, g1_.end(), g2_, g2_.end(), g3_, g3_.end(),
+ g4_, g4_.end());
+ }
+
+ private:
+ class Iterator : public ParamIteratorInterface<ParamType> {
+ public:
+ Iterator(const ParamGeneratorInterface<ParamType>* base,
+ const ParamGenerator<T1>& g1,
+ const typename ParamGenerator<T1>::iterator& current1,
+ const ParamGenerator<T2>& g2,
+ const typename ParamGenerator<T2>::iterator& current2,
+ const ParamGenerator<T3>& g3,
+ const typename ParamGenerator<T3>::iterator& current3,
+ const ParamGenerator<T4>& g4,
+ const typename ParamGenerator<T4>::iterator& current4)
+ : base_(base),
+ begin1_(g1.begin()), end1_(g1.end()), current1_(current1),
+ begin2_(g2.begin()), end2_(g2.end()), current2_(current2),
+ begin3_(g3.begin()), end3_(g3.end()), current3_(current3),
+ begin4_(g4.begin()), end4_(g4.end()), current4_(current4) {
+ ComputeCurrentValue();
+ }
+ virtual ~Iterator() {}
+
+ virtual const ParamGeneratorInterface<ParamType>* BaseGenerator() const {
+ return base_;
+ }
+ // Advance should not be called on beyond-of-range iterators
+ // so no component iterators must be beyond end of range, either.
+ virtual void Advance() {
+ assert(!AtEnd());
+ ++current4_;
+ if (current4_ == end4_) {
+ current4_ = begin4_;
+ ++current3_;
+ }
+ if (current3_ == end3_) {
+ current3_ = begin3_;
+ ++current2_;
+ }
+ if (current2_ == end2_) {
+ current2_ = begin2_;
+ ++current1_;
+ }
+ ComputeCurrentValue();
+ }
+ virtual ParamIteratorInterface<ParamType>* Clone() const {
+ return new Iterator(*this);
+ }
+ virtual const ParamType* Current() const { return &current_value_; }
+ virtual bool Equals(const ParamIteratorInterface<ParamType>& other) const {
+ // Having the same base generator guarantees that the other
+ // iterator is of the same type and we can downcast.
+ GTEST_CHECK_(BaseGenerator() == other.BaseGenerator())
+ << "The program attempted to compare iterators "
+ << "from different generators." << std::endl;
+ const Iterator* typed_other =
+ CheckedDowncastToActualType<const Iterator>(&other);
+ // We must report iterators equal if they both point beyond their
+ // respective ranges. That can happen in a variety of fashions,
+ // so we have to consult AtEnd().
+ return (AtEnd() && typed_other->AtEnd()) ||
+ (
+ current1_ == typed_other->current1_ &&
+ current2_ == typed_other->current2_ &&
+ current3_ == typed_other->current3_ &&
+ current4_ == typed_other->current4_);
+ }
+
+ private:
+ Iterator(const Iterator& other)
+ : base_(other.base_),
+ begin1_(other.begin1_),
+ end1_(other.end1_),
+ current1_(other.current1_),
+ begin2_(other.begin2_),
+ end2_(other.end2_),
+ current2_(other.current2_),
+ begin3_(other.begin3_),
+ end3_(other.end3_),
+ current3_(other.current3_),
+ begin4_(other.begin4_),
+ end4_(other.end4_),
+ current4_(other.current4_) {
+ ComputeCurrentValue();
+ }
+
+ void ComputeCurrentValue() {
+ if (!AtEnd())
+ current_value_ = ParamType(*current1_, *current2_, *current3_,
+ *current4_);
+ }
+ bool AtEnd() const {
+ // We must report iterator past the end of the range when either of the
+ // component iterators has reached the end of its range.
+ return
+ current1_ == end1_ ||
+ current2_ == end2_ ||
+ current3_ == end3_ ||
+ current4_ == end4_;
+ }
+
+ // No implementation - assignment is unsupported.
+ void operator=(const Iterator& other);
+
+ const ParamGeneratorInterface<ParamType>* const base_;
+ // begin[i]_ and end[i]_ define the i-th range that Iterator traverses.
+ // current[i]_ is the actual traversing iterator.
+ const typename ParamGenerator<T1>::iterator begin1_;
+ const typename ParamGenerator<T1>::iterator end1_;
+ typename ParamGenerator<T1>::iterator current1_;
+ const typename ParamGenerator<T2>::iterator begin2_;
+ const typename ParamGenerator<T2>::iterator end2_;
+ typename ParamGenerator<T2>::iterator current2_;
+ const typename ParamGenerator<T3>::iterator begin3_;
+ const typename ParamGenerator<T3>::iterator end3_;
+ typename ParamGenerator<T3>::iterator current3_;
+ const typename ParamGenerator<T4>::iterator begin4_;
+ const typename ParamGenerator<T4>::iterator end4_;
+ typename ParamGenerator<T4>::iterator current4_;
+ ParamType current_value_;
+ }; // class CartesianProductGenerator4::Iterator
+
+ // No implementation - assignment is unsupported.
+ void operator=(const CartesianProductGenerator4& other);
+
+ const ParamGenerator<T1> g1_;
+ const ParamGenerator<T2> g2_;
+ const ParamGenerator<T3> g3_;
+ const ParamGenerator<T4> g4_;
+}; // class CartesianProductGenerator4
+
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5>
+class CartesianProductGenerator5
+ : public ParamGeneratorInterface< ::std::tr1::tuple<T1, T2, T3, T4, T5> > {
+ public:
+ typedef ::std::tr1::tuple<T1, T2, T3, T4, T5> ParamType;
+
+ CartesianProductGenerator5(const ParamGenerator<T1>& g1,
+ const ParamGenerator<T2>& g2, const ParamGenerator<T3>& g3,
+ const ParamGenerator<T4>& g4, const ParamGenerator<T5>& g5)
+ : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5) {}
+ virtual ~CartesianProductGenerator5() {}
+
+ virtual ParamIteratorInterface<ParamType>* Begin() const {
+ return new Iterator(this, g1_, g1_.begin(), g2_, g2_.begin(), g3_,
+ g3_.begin(), g4_, g4_.begin(), g5_, g5_.begin());
+ }
+ virtual ParamIteratorInterface<ParamType>* End() const {
+ return new Iterator(this, g1_, g1_.end(), g2_, g2_.end(), g3_, g3_.end(),
+ g4_, g4_.end(), g5_, g5_.end());
+ }
+
+ private:
+ class Iterator : public ParamIteratorInterface<ParamType> {
+ public:
+ Iterator(const ParamGeneratorInterface<ParamType>* base,
+ const ParamGenerator<T1>& g1,
+ const typename ParamGenerator<T1>::iterator& current1,
+ const ParamGenerator<T2>& g2,
+ const typename ParamGenerator<T2>::iterator& current2,
+ const ParamGenerator<T3>& g3,
+ const typename ParamGenerator<T3>::iterator& current3,
+ const ParamGenerator<T4>& g4,
+ const typename ParamGenerator<T4>::iterator& current4,
+ const ParamGenerator<T5>& g5,
+ const typename ParamGenerator<T5>::iterator& current5)
+ : base_(base),
+ begin1_(g1.begin()), end1_(g1.end()), current1_(current1),
+ begin2_(g2.begin()), end2_(g2.end()), current2_(current2),
+ begin3_(g3.begin()), end3_(g3.end()), current3_(current3),
+ begin4_(g4.begin()), end4_(g4.end()), current4_(current4),
+ begin5_(g5.begin()), end5_(g5.end()), current5_(current5) {
+ ComputeCurrentValue();
+ }
+ virtual ~Iterator() {}
+
+ virtual const ParamGeneratorInterface<ParamType>* BaseGenerator() const {
+ return base_;
+ }
+ // Advance should not be called on beyond-of-range iterators
+ // so no component iterators must be beyond end of range, either.
+ virtual void Advance() {
+ assert(!AtEnd());
+ ++current5_;
+ if (current5_ == end5_) {
+ current5_ = begin5_;
+ ++current4_;
+ }
+ if (current4_ == end4_) {
+ current4_ = begin4_;
+ ++current3_;
+ }
+ if (current3_ == end3_) {
+ current3_ = begin3_;
+ ++current2_;
+ }
+ if (current2_ == end2_) {
+ current2_ = begin2_;
+ ++current1_;
+ }
+ ComputeCurrentValue();
+ }
+ virtual ParamIteratorInterface<ParamType>* Clone() const {
+ return new Iterator(*this);
+ }
+ virtual const ParamType* Current() const { return &current_value_; }
+ virtual bool Equals(const ParamIteratorInterface<ParamType>& other) const {
+ // Having the same base generator guarantees that the other
+ // iterator is of the same type and we can downcast.
+ GTEST_CHECK_(BaseGenerator() == other.BaseGenerator())
+ << "The program attempted to compare iterators "
+ << "from different generators." << std::endl;
+ const Iterator* typed_other =
+ CheckedDowncastToActualType<const Iterator>(&other);
+ // We must report iterators equal if they both point beyond their
+ // respective ranges. That can happen in a variety of fashions,
+ // so we have to consult AtEnd().
+ return (AtEnd() && typed_other->AtEnd()) ||
+ (
+ current1_ == typed_other->current1_ &&
+ current2_ == typed_other->current2_ &&
+ current3_ == typed_other->current3_ &&
+ current4_ == typed_other->current4_ &&
+ current5_ == typed_other->current5_);
+ }
+
+ private:
+ Iterator(const Iterator& other)
+ : base_(other.base_),
+ begin1_(other.begin1_),
+ end1_(other.end1_),
+ current1_(other.current1_),
+ begin2_(other.begin2_),
+ end2_(other.end2_),
+ current2_(other.current2_),
+ begin3_(other.begin3_),
+ end3_(other.end3_),
+ current3_(other.current3_),
+ begin4_(other.begin4_),
+ end4_(other.end4_),
+ current4_(other.current4_),
+ begin5_(other.begin5_),
+ end5_(other.end5_),
+ current5_(other.current5_) {
+ ComputeCurrentValue();
+ }
+
+ void ComputeCurrentValue() {
+ if (!AtEnd())
+ current_value_ = ParamType(*current1_, *current2_, *current3_,
+ *current4_, *current5_);
+ }
+ bool AtEnd() const {
+ // We must report iterator past the end of the range when either of the
+ // component iterators has reached the end of its range.
+ return
+ current1_ == end1_ ||
+ current2_ == end2_ ||
+ current3_ == end3_ ||
+ current4_ == end4_ ||
+ current5_ == end5_;
+ }
+
+ // No implementation - assignment is unsupported.
+ void operator=(const Iterator& other);
+
+ const ParamGeneratorInterface<ParamType>* const base_;
+ // begin[i]_ and end[i]_ define the i-th range that Iterator traverses.
+ // current[i]_ is the actual traversing iterator.
+ const typename ParamGenerator<T1>::iterator begin1_;
+ const typename ParamGenerator<T1>::iterator end1_;
+ typename ParamGenerator<T1>::iterator current1_;
+ const typename ParamGenerator<T2>::iterator begin2_;
+ const typename ParamGenerator<T2>::iterator end2_;
+ typename ParamGenerator<T2>::iterator current2_;
+ const typename ParamGenerator<T3>::iterator begin3_;
+ const typename ParamGenerator<T3>::iterator end3_;
+ typename ParamGenerator<T3>::iterator current3_;
+ const typename ParamGenerator<T4>::iterator begin4_;
+ const typename ParamGenerator<T4>::iterator end4_;
+ typename ParamGenerator<T4>::iterator current4_;
+ const typename ParamGenerator<T5>::iterator begin5_;
+ const typename ParamGenerator<T5>::iterator end5_;
+ typename ParamGenerator<T5>::iterator current5_;
+ ParamType current_value_;
+ }; // class CartesianProductGenerator5::Iterator
+
+ // No implementation - assignment is unsupported.
+ void operator=(const CartesianProductGenerator5& other);
+
+ const ParamGenerator<T1> g1_;
+ const ParamGenerator<T2> g2_;
+ const ParamGenerator<T3> g3_;
+ const ParamGenerator<T4> g4_;
+ const ParamGenerator<T5> g5_;
+}; // class CartesianProductGenerator5
+
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6>
+class CartesianProductGenerator6
+ : public ParamGeneratorInterface< ::std::tr1::tuple<T1, T2, T3, T4, T5,
+ T6> > {
+ public:
+ typedef ::std::tr1::tuple<T1, T2, T3, T4, T5, T6> ParamType;
+
+ CartesianProductGenerator6(const ParamGenerator<T1>& g1,
+ const ParamGenerator<T2>& g2, const ParamGenerator<T3>& g3,
+ const ParamGenerator<T4>& g4, const ParamGenerator<T5>& g5,
+ const ParamGenerator<T6>& g6)
+ : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5), g6_(g6) {}
+ virtual ~CartesianProductGenerator6() {}
+
+ virtual ParamIteratorInterface<ParamType>* Begin() const {
+ return new Iterator(this, g1_, g1_.begin(), g2_, g2_.begin(), g3_,
+ g3_.begin(), g4_, g4_.begin(), g5_, g5_.begin(), g6_, g6_.begin());
+ }
+ virtual ParamIteratorInterface<ParamType>* End() const {
+ return new Iterator(this, g1_, g1_.end(), g2_, g2_.end(), g3_, g3_.end(),
+ g4_, g4_.end(), g5_, g5_.end(), g6_, g6_.end());
+ }
+
+ private:
+ class Iterator : public ParamIteratorInterface<ParamType> {
+ public:
+ Iterator(const ParamGeneratorInterface<ParamType>* base,
+ const ParamGenerator<T1>& g1,
+ const typename ParamGenerator<T1>::iterator& current1,
+ const ParamGenerator<T2>& g2,
+ const typename ParamGenerator<T2>::iterator& current2,
+ const ParamGenerator<T3>& g3,
+ const typename ParamGenerator<T3>::iterator& current3,
+ const ParamGenerator<T4>& g4,
+ const typename ParamGenerator<T4>::iterator& current4,
+ const ParamGenerator<T5>& g5,
+ const typename ParamGenerator<T5>::iterator& current5,
+ const ParamGenerator<T6>& g6,
+ const typename ParamGenerator<T6>::iterator& current6)
+ : base_(base),
+ begin1_(g1.begin()), end1_(g1.end()), current1_(current1),
+ begin2_(g2.begin()), end2_(g2.end()), current2_(current2),
+ begin3_(g3.begin()), end3_(g3.end()), current3_(current3),
+ begin4_(g4.begin()), end4_(g4.end()), current4_(current4),
+ begin5_(g5.begin()), end5_(g5.end()), current5_(current5),
+ begin6_(g6.begin()), end6_(g6.end()), current6_(current6) {
+ ComputeCurrentValue();
+ }
+ virtual ~Iterator() {}
+
+ virtual const ParamGeneratorInterface<ParamType>* BaseGenerator() const {
+ return base_;
+ }
+ // Advance should not be called on beyond-of-range iterators
+ // so no component iterators must be beyond end of range, either.
+ virtual void Advance() {
+ assert(!AtEnd());
+ ++current6_;
+ if (current6_ == end6_) {
+ current6_ = begin6_;
+ ++current5_;
+ }
+ if (current5_ == end5_) {
+ current5_ = begin5_;
+ ++current4_;
+ }
+ if (current4_ == end4_) {
+ current4_ = begin4_;
+ ++current3_;
+ }
+ if (current3_ == end3_) {
+ current3_ = begin3_;
+ ++current2_;
+ }
+ if (current2_ == end2_) {
+ current2_ = begin2_;
+ ++current1_;
+ }
+ ComputeCurrentValue();
+ }
+ virtual ParamIteratorInterface<ParamType>* Clone() const {
+ return new Iterator(*this);
+ }
+ virtual const ParamType* Current() const { return &current_value_; }
+ virtual bool Equals(const ParamIteratorInterface<ParamType>& other) const {
+ // Having the same base generator guarantees that the other
+ // iterator is of the same type and we can downcast.
+ GTEST_CHECK_(BaseGenerator() == other.BaseGenerator())
+ << "The program attempted to compare iterators "
+ << "from different generators." << std::endl;
+ const Iterator* typed_other =
+ CheckedDowncastToActualType<const Iterator>(&other);
+ // We must report iterators equal if they both point beyond their
+ // respective ranges. That can happen in a variety of fashions,
+ // so we have to consult AtEnd().
+ return (AtEnd() && typed_other->AtEnd()) ||
+ (
+ current1_ == typed_other->current1_ &&
+ current2_ == typed_other->current2_ &&
+ current3_ == typed_other->current3_ &&
+ current4_ == typed_other->current4_ &&
+ current5_ == typed_other->current5_ &&
+ current6_ == typed_other->current6_);
+ }
+
+ private:
+ Iterator(const Iterator& other)
+ : base_(other.base_),
+ begin1_(other.begin1_),
+ end1_(other.end1_),
+ current1_(other.current1_),
+ begin2_(other.begin2_),
+ end2_(other.end2_),
+ current2_(other.current2_),
+ begin3_(other.begin3_),
+ end3_(other.end3_),
+ current3_(other.current3_),
+ begin4_(other.begin4_),
+ end4_(other.end4_),
+ current4_(other.current4_),
+ begin5_(other.begin5_),
+ end5_(other.end5_),
+ current5_(other.current5_),
+ begin6_(other.begin6_),
+ end6_(other.end6_),
+ current6_(other.current6_) {
+ ComputeCurrentValue();
+ }
+
+ void ComputeCurrentValue() {
+ if (!AtEnd())
+ current_value_ = ParamType(*current1_, *current2_, *current3_,
+ *current4_, *current5_, *current6_);
+ }
+ bool AtEnd() const {
+ // We must report iterator past the end of the range when either of the
+ // component iterators has reached the end of its range.
+ return
+ current1_ == end1_ ||
+ current2_ == end2_ ||
+ current3_ == end3_ ||
+ current4_ == end4_ ||
+ current5_ == end5_ ||
+ current6_ == end6_;
+ }
+
+ // No implementation - assignment is unsupported.
+ void operator=(const Iterator& other);
+
+ const ParamGeneratorInterface<ParamType>* const base_;
+ // begin[i]_ and end[i]_ define the i-th range that Iterator traverses.
+ // current[i]_ is the actual traversing iterator.
+ const typename ParamGenerator<T1>::iterator begin1_;
+ const typename ParamGenerator<T1>::iterator end1_;
+ typename ParamGenerator<T1>::iterator current1_;
+ const typename ParamGenerator<T2>::iterator begin2_;
+ const typename ParamGenerator<T2>::iterator end2_;
+ typename ParamGenerator<T2>::iterator current2_;
+ const typename ParamGenerator<T3>::iterator begin3_;
+ const typename ParamGenerator<T3>::iterator end3_;
+ typename ParamGenerator<T3>::iterator current3_;
+ const typename ParamGenerator<T4>::iterator begin4_;
+ const typename ParamGenerator<T4>::iterator end4_;
+ typename ParamGenerator<T4>::iterator current4_;
+ const typename ParamGenerator<T5>::iterator begin5_;
+ const typename ParamGenerator<T5>::iterator end5_;
+ typename ParamGenerator<T5>::iterator current5_;
+ const typename ParamGenerator<T6>::iterator begin6_;
+ const typename ParamGenerator<T6>::iterator end6_;
+ typename ParamGenerator<T6>::iterator current6_;
+ ParamType current_value_;
+ }; // class CartesianProductGenerator6::Iterator
+
+ // No implementation - assignment is unsupported.
+ void operator=(const CartesianProductGenerator6& other);
+
+ const ParamGenerator<T1> g1_;
+ const ParamGenerator<T2> g2_;
+ const ParamGenerator<T3> g3_;
+ const ParamGenerator<T4> g4_;
+ const ParamGenerator<T5> g5_;
+ const ParamGenerator<T6> g6_;
+}; // class CartesianProductGenerator6
+
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7>
+class CartesianProductGenerator7
+ : public ParamGeneratorInterface< ::std::tr1::tuple<T1, T2, T3, T4, T5, T6,
+ T7> > {
+ public:
+ typedef ::std::tr1::tuple<T1, T2, T3, T4, T5, T6, T7> ParamType;
+
+ CartesianProductGenerator7(const ParamGenerator<T1>& g1,
+ const ParamGenerator<T2>& g2, const ParamGenerator<T3>& g3,
+ const ParamGenerator<T4>& g4, const ParamGenerator<T5>& g5,
+ const ParamGenerator<T6>& g6, const ParamGenerator<T7>& g7)
+ : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5), g6_(g6), g7_(g7) {}
+ virtual ~CartesianProductGenerator7() {}
+
+ virtual ParamIteratorInterface<ParamType>* Begin() const {
+ return new Iterator(this, g1_, g1_.begin(), g2_, g2_.begin(), g3_,
+ g3_.begin(), g4_, g4_.begin(), g5_, g5_.begin(), g6_, g6_.begin(), g7_,
+ g7_.begin());
+ }
+ virtual ParamIteratorInterface<ParamType>* End() const {
+ return new Iterator(this, g1_, g1_.end(), g2_, g2_.end(), g3_, g3_.end(),
+ g4_, g4_.end(), g5_, g5_.end(), g6_, g6_.end(), g7_, g7_.end());
+ }
+
+ private:
+ class Iterator : public ParamIteratorInterface<ParamType> {
+ public:
+ Iterator(const ParamGeneratorInterface<ParamType>* base,
+ const ParamGenerator<T1>& g1,
+ const typename ParamGenerator<T1>::iterator& current1,
+ const ParamGenerator<T2>& g2,
+ const typename ParamGenerator<T2>::iterator& current2,
+ const ParamGenerator<T3>& g3,
+ const typename ParamGenerator<T3>::iterator& current3,
+ const ParamGenerator<T4>& g4,
+ const typename ParamGenerator<T4>::iterator& current4,
+ const ParamGenerator<T5>& g5,
+ const typename ParamGenerator<T5>::iterator& current5,
+ const ParamGenerator<T6>& g6,
+ const typename ParamGenerator<T6>::iterator& current6,
+ const ParamGenerator<T7>& g7,
+ const typename ParamGenerator<T7>::iterator& current7)
+ : base_(base),
+ begin1_(g1.begin()), end1_(g1.end()), current1_(current1),
+ begin2_(g2.begin()), end2_(g2.end()), current2_(current2),
+ begin3_(g3.begin()), end3_(g3.end()), current3_(current3),
+ begin4_(g4.begin()), end4_(g4.end()), current4_(current4),
+ begin5_(g5.begin()), end5_(g5.end()), current5_(current5),
+ begin6_(g6.begin()), end6_(g6.end()), current6_(current6),
+ begin7_(g7.begin()), end7_(g7.end()), current7_(current7) {
+ ComputeCurrentValue();
+ }
+ virtual ~Iterator() {}
+
+ virtual const ParamGeneratorInterface<ParamType>* BaseGenerator() const {
+ return base_;
+ }
+ // Advance should not be called on beyond-of-range iterators
+ // so no component iterators must be beyond end of range, either.
+ virtual void Advance() {
+ assert(!AtEnd());
+ ++current7_;
+ if (current7_ == end7_) {
+ current7_ = begin7_;
+ ++current6_;
+ }
+ if (current6_ == end6_) {
+ current6_ = begin6_;
+ ++current5_;
+ }
+ if (current5_ == end5_) {
+ current5_ = begin5_;
+ ++current4_;
+ }
+ if (current4_ == end4_) {
+ current4_ = begin4_;
+ ++current3_;
+ }
+ if (current3_ == end3_) {
+ current3_ = begin3_;
+ ++current2_;
+ }
+ if (current2_ == end2_) {
+ current2_ = begin2_;
+ ++current1_;
+ }
+ ComputeCurrentValue();
+ }
+ virtual ParamIteratorInterface<ParamType>* Clone() const {
+ return new Iterator(*this);
+ }
+ virtual const ParamType* Current() const { return &current_value_; }
+ virtual bool Equals(const ParamIteratorInterface<ParamType>& other) const {
+ // Having the same base generator guarantees that the other
+ // iterator is of the same type and we can downcast.
+ GTEST_CHECK_(BaseGenerator() == other.BaseGenerator())
+ << "The program attempted to compare iterators "
+ << "from different generators." << std::endl;
+ const Iterator* typed_other =
+ CheckedDowncastToActualType<const Iterator>(&other);
+ // We must report iterators equal if they both point beyond their
+ // respective ranges. That can happen in a variety of fashions,
+ // so we have to consult AtEnd().
+ return (AtEnd() && typed_other->AtEnd()) ||
+ (
+ current1_ == typed_other->current1_ &&
+ current2_ == typed_other->current2_ &&
+ current3_ == typed_other->current3_ &&
+ current4_ == typed_other->current4_ &&
+ current5_ == typed_other->current5_ &&
+ current6_ == typed_other->current6_ &&
+ current7_ == typed_other->current7_);
+ }
+
+ private:
+ Iterator(const Iterator& other)
+ : base_(other.base_),
+ begin1_(other.begin1_),
+ end1_(other.end1_),
+ current1_(other.current1_),
+ begin2_(other.begin2_),
+ end2_(other.end2_),
+ current2_(other.current2_),
+ begin3_(other.begin3_),
+ end3_(other.end3_),
+ current3_(other.current3_),
+ begin4_(other.begin4_),
+ end4_(other.end4_),
+ current4_(other.current4_),
+ begin5_(other.begin5_),
+ end5_(other.end5_),
+ current5_(other.current5_),
+ begin6_(other.begin6_),
+ end6_(other.end6_),
+ current6_(other.current6_),
+ begin7_(other.begin7_),
+ end7_(other.end7_),
+ current7_(other.current7_) {
+ ComputeCurrentValue();
+ }
+
+ void ComputeCurrentValue() {
+ if (!AtEnd())
+ current_value_ = ParamType(*current1_, *current2_, *current3_,
+ *current4_, *current5_, *current6_, *current7_);
+ }
+ bool AtEnd() const {
+ // We must report iterator past the end of the range when either of the
+ // component iterators has reached the end of its range.
+ return
+ current1_ == end1_ ||
+ current2_ == end2_ ||
+ current3_ == end3_ ||
+ current4_ == end4_ ||
+ current5_ == end5_ ||
+ current6_ == end6_ ||
+ current7_ == end7_;
+ }
+
+ // No implementation - assignment is unsupported.
+ void operator=(const Iterator& other);
+
+ const ParamGeneratorInterface<ParamType>* const base_;
+ // begin[i]_ and end[i]_ define the i-th range that Iterator traverses.
+ // current[i]_ is the actual traversing iterator.
+ const typename ParamGenerator<T1>::iterator begin1_;
+ const typename ParamGenerator<T1>::iterator end1_;
+ typename ParamGenerator<T1>::iterator current1_;
+ const typename ParamGenerator<T2>::iterator begin2_;
+ const typename ParamGenerator<T2>::iterator end2_;
+ typename ParamGenerator<T2>::iterator current2_;
+ const typename ParamGenerator<T3>::iterator begin3_;
+ const typename ParamGenerator<T3>::iterator end3_;
+ typename ParamGenerator<T3>::iterator current3_;
+ const typename ParamGenerator<T4>::iterator begin4_;
+ const typename ParamGenerator<T4>::iterator end4_;
+ typename ParamGenerator<T4>::iterator current4_;
+ const typename ParamGenerator<T5>::iterator begin5_;
+ const typename ParamGenerator<T5>::iterator end5_;
+ typename ParamGenerator<T5>::iterator current5_;
+ const typename ParamGenerator<T6>::iterator begin6_;
+ const typename ParamGenerator<T6>::iterator end6_;
+ typename ParamGenerator<T6>::iterator current6_;
+ const typename ParamGenerator<T7>::iterator begin7_;
+ const typename ParamGenerator<T7>::iterator end7_;
+ typename ParamGenerator<T7>::iterator current7_;
+ ParamType current_value_;
+ }; // class CartesianProductGenerator7::Iterator
+
+ // No implementation - assignment is unsupported.
+ void operator=(const CartesianProductGenerator7& other);
+
+ const ParamGenerator<T1> g1_;
+ const ParamGenerator<T2> g2_;
+ const ParamGenerator<T3> g3_;
+ const ParamGenerator<T4> g4_;
+ const ParamGenerator<T5> g5_;
+ const ParamGenerator<T6> g6_;
+ const ParamGenerator<T7> g7_;
+}; // class CartesianProductGenerator7
+
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8>
+class CartesianProductGenerator8
+ : public ParamGeneratorInterface< ::std::tr1::tuple<T1, T2, T3, T4, T5, T6,
+ T7, T8> > {
+ public:
+ typedef ::std::tr1::tuple<T1, T2, T3, T4, T5, T6, T7, T8> ParamType;
+
+ CartesianProductGenerator8(const ParamGenerator<T1>& g1,
+ const ParamGenerator<T2>& g2, const ParamGenerator<T3>& g3,
+ const ParamGenerator<T4>& g4, const ParamGenerator<T5>& g5,
+ const ParamGenerator<T6>& g6, const ParamGenerator<T7>& g7,
+ const ParamGenerator<T8>& g8)
+ : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5), g6_(g6), g7_(g7),
+ g8_(g8) {}
+ virtual ~CartesianProductGenerator8() {}
+
+ virtual ParamIteratorInterface<ParamType>* Begin() const {
+ return new Iterator(this, g1_, g1_.begin(), g2_, g2_.begin(), g3_,
+ g3_.begin(), g4_, g4_.begin(), g5_, g5_.begin(), g6_, g6_.begin(), g7_,
+ g7_.begin(), g8_, g8_.begin());
+ }
+ virtual ParamIteratorInterface<ParamType>* End() const {
+ return new Iterator(this, g1_, g1_.end(), g2_, g2_.end(), g3_, g3_.end(),
+ g4_, g4_.end(), g5_, g5_.end(), g6_, g6_.end(), g7_, g7_.end(), g8_,
+ g8_.end());
+ }
+
+ private:
+ class Iterator : public ParamIteratorInterface<ParamType> {
+ public:
+ Iterator(const ParamGeneratorInterface<ParamType>* base,
+ const ParamGenerator<T1>& g1,
+ const typename ParamGenerator<T1>::iterator& current1,
+ const ParamGenerator<T2>& g2,
+ const typename ParamGenerator<T2>::iterator& current2,
+ const ParamGenerator<T3>& g3,
+ const typename ParamGenerator<T3>::iterator& current3,
+ const ParamGenerator<T4>& g4,
+ const typename ParamGenerator<T4>::iterator& current4,
+ const ParamGenerator<T5>& g5,
+ const typename ParamGenerator<T5>::iterator& current5,
+ const ParamGenerator<T6>& g6,
+ const typename ParamGenerator<T6>::iterator& current6,
+ const ParamGenerator<T7>& g7,
+ const typename ParamGenerator<T7>::iterator& current7,
+ const ParamGenerator<T8>& g8,
+ const typename ParamGenerator<T8>::iterator& current8)
+ : base_(base),
+ begin1_(g1.begin()), end1_(g1.end()), current1_(current1),
+ begin2_(g2.begin()), end2_(g2.end()), current2_(current2),
+ begin3_(g3.begin()), end3_(g3.end()), current3_(current3),
+ begin4_(g4.begin()), end4_(g4.end()), current4_(current4),
+ begin5_(g5.begin()), end5_(g5.end()), current5_(current5),
+ begin6_(g6.begin()), end6_(g6.end()), current6_(current6),
+ begin7_(g7.begin()), end7_(g7.end()), current7_(current7),
+ begin8_(g8.begin()), end8_(g8.end()), current8_(current8) {
+ ComputeCurrentValue();
+ }
+ virtual ~Iterator() {}
+
+ virtual const ParamGeneratorInterface<ParamType>* BaseGenerator() const {
+ return base_;
+ }
+ // Advance should not be called on beyond-of-range iterators
+ // so no component iterators must be beyond end of range, either.
+ virtual void Advance() {
+ assert(!AtEnd());
+ ++current8_;
+ if (current8_ == end8_) {
+ current8_ = begin8_;
+ ++current7_;
+ }
+ if (current7_ == end7_) {
+ current7_ = begin7_;
+ ++current6_;
+ }
+ if (current6_ == end6_) {
+ current6_ = begin6_;
+ ++current5_;
+ }
+ if (current5_ == end5_) {
+ current5_ = begin5_;
+ ++current4_;
+ }
+ if (current4_ == end4_) {
+ current4_ = begin4_;
+ ++current3_;
+ }
+ if (current3_ == end3_) {
+ current3_ = begin3_;
+ ++current2_;
+ }
+ if (current2_ == end2_) {
+ current2_ = begin2_;
+ ++current1_;
+ }
+ ComputeCurrentValue();
+ }
+ virtual ParamIteratorInterface<ParamType>* Clone() const {
+ return new Iterator(*this);
+ }
+ virtual const ParamType* Current() const { return &current_value_; }
+ virtual bool Equals(const ParamIteratorInterface<ParamType>& other) const {
+ // Having the same base generator guarantees that the other
+ // iterator is of the same type and we can downcast.
+ GTEST_CHECK_(BaseGenerator() == other.BaseGenerator())
+ << "The program attempted to compare iterators "
+ << "from different generators." << std::endl;
+ const Iterator* typed_other =
+ CheckedDowncastToActualType<const Iterator>(&other);
+ // We must report iterators equal if they both point beyond their
+ // respective ranges. That can happen in a variety of fashions,
+ // so we have to consult AtEnd().
+ return (AtEnd() && typed_other->AtEnd()) ||
+ (
+ current1_ == typed_other->current1_ &&
+ current2_ == typed_other->current2_ &&
+ current3_ == typed_other->current3_ &&
+ current4_ == typed_other->current4_ &&
+ current5_ == typed_other->current5_ &&
+ current6_ == typed_other->current6_ &&
+ current7_ == typed_other->current7_ &&
+ current8_ == typed_other->current8_);
+ }
+
+ private:
+ Iterator(const Iterator& other)
+ : base_(other.base_),
+ begin1_(other.begin1_),
+ end1_(other.end1_),
+ current1_(other.current1_),
+ begin2_(other.begin2_),
+ end2_(other.end2_),
+ current2_(other.current2_),
+ begin3_(other.begin3_),
+ end3_(other.end3_),
+ current3_(other.current3_),
+ begin4_(other.begin4_),
+ end4_(other.end4_),
+ current4_(other.current4_),
+ begin5_(other.begin5_),
+ end5_(other.end5_),
+ current5_(other.current5_),
+ begin6_(other.begin6_),
+ end6_(other.end6_),
+ current6_(other.current6_),
+ begin7_(other.begin7_),
+ end7_(other.end7_),
+ current7_(other.current7_),
+ begin8_(other.begin8_),
+ end8_(other.end8_),
+ current8_(other.current8_) {
+ ComputeCurrentValue();
+ }
+
+ void ComputeCurrentValue() {
+ if (!AtEnd())
+ current_value_ = ParamType(*current1_, *current2_, *current3_,
+ *current4_, *current5_, *current6_, *current7_, *current8_);
+ }
+ bool AtEnd() const {
+ // We must report iterator past the end of the range when either of the
+ // component iterators has reached the end of its range.
+ return
+ current1_ == end1_ ||
+ current2_ == end2_ ||
+ current3_ == end3_ ||
+ current4_ == end4_ ||
+ current5_ == end5_ ||
+ current6_ == end6_ ||
+ current7_ == end7_ ||
+ current8_ == end8_;
+ }
+
+ // No implementation - assignment is unsupported.
+ void operator=(const Iterator& other);
+
+ const ParamGeneratorInterface<ParamType>* const base_;
+ // begin[i]_ and end[i]_ define the i-th range that Iterator traverses.
+ // current[i]_ is the actual traversing iterator.
+ const typename ParamGenerator<T1>::iterator begin1_;
+ const typename ParamGenerator<T1>::iterator end1_;
+ typename ParamGenerator<T1>::iterator current1_;
+ const typename ParamGenerator<T2>::iterator begin2_;
+ const typename ParamGenerator<T2>::iterator end2_;
+ typename ParamGenerator<T2>::iterator current2_;
+ const typename ParamGenerator<T3>::iterator begin3_;
+ const typename ParamGenerator<T3>::iterator end3_;
+ typename ParamGenerator<T3>::iterator current3_;
+ const typename ParamGenerator<T4>::iterator begin4_;
+ const typename ParamGenerator<T4>::iterator end4_;
+ typename ParamGenerator<T4>::iterator current4_;
+ const typename ParamGenerator<T5>::iterator begin5_;
+ const typename ParamGenerator<T5>::iterator end5_;
+ typename ParamGenerator<T5>::iterator current5_;
+ const typename ParamGenerator<T6>::iterator begin6_;
+ const typename ParamGenerator<T6>::iterator end6_;
+ typename ParamGenerator<T6>::iterator current6_;
+ const typename ParamGenerator<T7>::iterator begin7_;
+ const typename ParamGenerator<T7>::iterator end7_;
+ typename ParamGenerator<T7>::iterator current7_;
+ const typename ParamGenerator<T8>::iterator begin8_;
+ const typename ParamGenerator<T8>::iterator end8_;
+ typename ParamGenerator<T8>::iterator current8_;
+ ParamType current_value_;
+ }; // class CartesianProductGenerator8::Iterator
+
+ // No implementation - assignment is unsupported.
+ void operator=(const CartesianProductGenerator8& other);
+
+ const ParamGenerator<T1> g1_;
+ const ParamGenerator<T2> g2_;
+ const ParamGenerator<T3> g3_;
+ const ParamGenerator<T4> g4_;
+ const ParamGenerator<T5> g5_;
+ const ParamGenerator<T6> g6_;
+ const ParamGenerator<T7> g7_;
+ const ParamGenerator<T8> g8_;
+}; // class CartesianProductGenerator8
+
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9>
+class CartesianProductGenerator9
+ : public ParamGeneratorInterface< ::std::tr1::tuple<T1, T2, T3, T4, T5, T6,
+ T7, T8, T9> > {
+ public:
+ typedef ::std::tr1::tuple<T1, T2, T3, T4, T5, T6, T7, T8, T9> ParamType;
+
+ CartesianProductGenerator9(const ParamGenerator<T1>& g1,
+ const ParamGenerator<T2>& g2, const ParamGenerator<T3>& g3,
+ const ParamGenerator<T4>& g4, const ParamGenerator<T5>& g5,
+ const ParamGenerator<T6>& g6, const ParamGenerator<T7>& g7,
+ const ParamGenerator<T8>& g8, const ParamGenerator<T9>& g9)
+ : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5), g6_(g6), g7_(g7), g8_(g8),
+ g9_(g9) {}
+ virtual ~CartesianProductGenerator9() {}
+
+ virtual ParamIteratorInterface<ParamType>* Begin() const {
+ return new Iterator(this, g1_, g1_.begin(), g2_, g2_.begin(), g3_,
+ g3_.begin(), g4_, g4_.begin(), g5_, g5_.begin(), g6_, g6_.begin(), g7_,
+ g7_.begin(), g8_, g8_.begin(), g9_, g9_.begin());
+ }
+ virtual ParamIteratorInterface<ParamType>* End() const {
+ return new Iterator(this, g1_, g1_.end(), g2_, g2_.end(), g3_, g3_.end(),
+ g4_, g4_.end(), g5_, g5_.end(), g6_, g6_.end(), g7_, g7_.end(), g8_,
+ g8_.end(), g9_, g9_.end());
+ }
+
+ private:
+ class Iterator : public ParamIteratorInterface<ParamType> {
+ public:
+ Iterator(const ParamGeneratorInterface<ParamType>* base,
+ const ParamGenerator<T1>& g1,
+ const typename ParamGenerator<T1>::iterator& current1,
+ const ParamGenerator<T2>& g2,
+ const typename ParamGenerator<T2>::iterator& current2,
+ const ParamGenerator<T3>& g3,
+ const typename ParamGenerator<T3>::iterator& current3,
+ const ParamGenerator<T4>& g4,
+ const typename ParamGenerator<T4>::iterator& current4,
+ const ParamGenerator<T5>& g5,
+ const typename ParamGenerator<T5>::iterator& current5,
+ const ParamGenerator<T6>& g6,
+ const typename ParamGenerator<T6>::iterator& current6,
+ const ParamGenerator<T7>& g7,
+ const typename ParamGenerator<T7>::iterator& current7,
+ const ParamGenerator<T8>& g8,
+ const typename ParamGenerator<T8>::iterator& current8,
+ const ParamGenerator<T9>& g9,
+ const typename ParamGenerator<T9>::iterator& current9)
+ : base_(base),
+ begin1_(g1.begin()), end1_(g1.end()), current1_(current1),
+ begin2_(g2.begin()), end2_(g2.end()), current2_(current2),
+ begin3_(g3.begin()), end3_(g3.end()), current3_(current3),
+ begin4_(g4.begin()), end4_(g4.end()), current4_(current4),
+ begin5_(g5.begin()), end5_(g5.end()), current5_(current5),
+ begin6_(g6.begin()), end6_(g6.end()), current6_(current6),
+ begin7_(g7.begin()), end7_(g7.end()), current7_(current7),
+ begin8_(g8.begin()), end8_(g8.end()), current8_(current8),
+ begin9_(g9.begin()), end9_(g9.end()), current9_(current9) {
+ ComputeCurrentValue();
+ }
+ virtual ~Iterator() {}
+
+ virtual const ParamGeneratorInterface<ParamType>* BaseGenerator() const {
+ return base_;
+ }
+ // Advance should not be called on beyond-of-range iterators
+ // so no component iterators must be beyond end of range, either.
+ virtual void Advance() {
+ assert(!AtEnd());
+ ++current9_;
+ if (current9_ == end9_) {
+ current9_ = begin9_;
+ ++current8_;
+ }
+ if (current8_ == end8_) {
+ current8_ = begin8_;
+ ++current7_;
+ }
+ if (current7_ == end7_) {
+ current7_ = begin7_;
+ ++current6_;
+ }
+ if (current6_ == end6_) {
+ current6_ = begin6_;
+ ++current5_;
+ }
+ if (current5_ == end5_) {
+ current5_ = begin5_;
+ ++current4_;
+ }
+ if (current4_ == end4_) {
+ current4_ = begin4_;
+ ++current3_;
+ }
+ if (current3_ == end3_) {
+ current3_ = begin3_;
+ ++current2_;
+ }
+ if (current2_ == end2_) {
+ current2_ = begin2_;
+ ++current1_;
+ }
+ ComputeCurrentValue();
+ }
+ virtual ParamIteratorInterface<ParamType>* Clone() const {
+ return new Iterator(*this);
+ }
+ virtual const ParamType* Current() const { return &current_value_; }
+ virtual bool Equals(const ParamIteratorInterface<ParamType>& other) const {
+ // Having the same base generator guarantees that the other
+ // iterator is of the same type and we can downcast.
+ GTEST_CHECK_(BaseGenerator() == other.BaseGenerator())
+ << "The program attempted to compare iterators "
+ << "from different generators." << std::endl;
+ const Iterator* typed_other =
+ CheckedDowncastToActualType<const Iterator>(&other);
+ // We must report iterators equal if they both point beyond their
+ // respective ranges. That can happen in a variety of fashions,
+ // so we have to consult AtEnd().
+ return (AtEnd() && typed_other->AtEnd()) ||
+ (
+ current1_ == typed_other->current1_ &&
+ current2_ == typed_other->current2_ &&
+ current3_ == typed_other->current3_ &&
+ current4_ == typed_other->current4_ &&
+ current5_ == typed_other->current5_ &&
+ current6_ == typed_other->current6_ &&
+ current7_ == typed_other->current7_ &&
+ current8_ == typed_other->current8_ &&
+ current9_ == typed_other->current9_);
+ }
+
+ private:
+ Iterator(const Iterator& other)
+ : base_(other.base_),
+ begin1_(other.begin1_),
+ end1_(other.end1_),
+ current1_(other.current1_),
+ begin2_(other.begin2_),
+ end2_(other.end2_),
+ current2_(other.current2_),
+ begin3_(other.begin3_),
+ end3_(other.end3_),
+ current3_(other.current3_),
+ begin4_(other.begin4_),
+ end4_(other.end4_),
+ current4_(other.current4_),
+ begin5_(other.begin5_),
+ end5_(other.end5_),
+ current5_(other.current5_),
+ begin6_(other.begin6_),
+ end6_(other.end6_),
+ current6_(other.current6_),
+ begin7_(other.begin7_),
+ end7_(other.end7_),
+ current7_(other.current7_),
+ begin8_(other.begin8_),
+ end8_(other.end8_),
+ current8_(other.current8_),
+ begin9_(other.begin9_),
+ end9_(other.end9_),
+ current9_(other.current9_) {
+ ComputeCurrentValue();
+ }
+
+ void ComputeCurrentValue() {
+ if (!AtEnd())
+ current_value_ = ParamType(*current1_, *current2_, *current3_,
+ *current4_, *current5_, *current6_, *current7_, *current8_,
+ *current9_);
+ }
+ bool AtEnd() const {
+ // We must report iterator past the end of the range when either of the
+ // component iterators has reached the end of its range.
+ return
+ current1_ == end1_ ||
+ current2_ == end2_ ||
+ current3_ == end3_ ||
+ current4_ == end4_ ||
+ current5_ == end5_ ||
+ current6_ == end6_ ||
+ current7_ == end7_ ||
+ current8_ == end8_ ||
+ current9_ == end9_;
+ }
+
+ // No implementation - assignment is unsupported.
+ void operator=(const Iterator& other);
+
+ const ParamGeneratorInterface<ParamType>* const base_;
+ // begin[i]_ and end[i]_ define the i-th range that Iterator traverses.
+ // current[i]_ is the actual traversing iterator.
+ const typename ParamGenerator<T1>::iterator begin1_;
+ const typename ParamGenerator<T1>::iterator end1_;
+ typename ParamGenerator<T1>::iterator current1_;
+ const typename ParamGenerator<T2>::iterator begin2_;
+ const typename ParamGenerator<T2>::iterator end2_;
+ typename ParamGenerator<T2>::iterator current2_;
+ const typename ParamGenerator<T3>::iterator begin3_;
+ const typename ParamGenerator<T3>::iterator end3_;
+ typename ParamGenerator<T3>::iterator current3_;
+ const typename ParamGenerator<T4>::iterator begin4_;
+ const typename ParamGenerator<T4>::iterator end4_;
+ typename ParamGenerator<T4>::iterator current4_;
+ const typename ParamGenerator<T5>::iterator begin5_;
+ const typename ParamGenerator<T5>::iterator end5_;
+ typename ParamGenerator<T5>::iterator current5_;
+ const typename ParamGenerator<T6>::iterator begin6_;
+ const typename ParamGenerator<T6>::iterator end6_;
+ typename ParamGenerator<T6>::iterator current6_;
+ const typename ParamGenerator<T7>::iterator begin7_;
+ const typename ParamGenerator<T7>::iterator end7_;
+ typename ParamGenerator<T7>::iterator current7_;
+ const typename ParamGenerator<T8>::iterator begin8_;
+ const typename ParamGenerator<T8>::iterator end8_;
+ typename ParamGenerator<T8>::iterator current8_;
+ const typename ParamGenerator<T9>::iterator begin9_;
+ const typename ParamGenerator<T9>::iterator end9_;
+ typename ParamGenerator<T9>::iterator current9_;
+ ParamType current_value_;
+ }; // class CartesianProductGenerator9::Iterator
+
+ // No implementation - assignment is unsupported.
+ void operator=(const CartesianProductGenerator9& other);
+
+ const ParamGenerator<T1> g1_;
+ const ParamGenerator<T2> g2_;
+ const ParamGenerator<T3> g3_;
+ const ParamGenerator<T4> g4_;
+ const ParamGenerator<T5> g5_;
+ const ParamGenerator<T6> g6_;
+ const ParamGenerator<T7> g7_;
+ const ParamGenerator<T8> g8_;
+ const ParamGenerator<T9> g9_;
+}; // class CartesianProductGenerator9
+
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10>
+class CartesianProductGenerator10
+ : public ParamGeneratorInterface< ::std::tr1::tuple<T1, T2, T3, T4, T5, T6,
+ T7, T8, T9, T10> > {
+ public:
+ typedef ::std::tr1::tuple<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10> ParamType;
+
+ CartesianProductGenerator10(const ParamGenerator<T1>& g1,
+ const ParamGenerator<T2>& g2, const ParamGenerator<T3>& g3,
+ const ParamGenerator<T4>& g4, const ParamGenerator<T5>& g5,
+ const ParamGenerator<T6>& g6, const ParamGenerator<T7>& g7,
+ const ParamGenerator<T8>& g8, const ParamGenerator<T9>& g9,
+ const ParamGenerator<T10>& g10)
+ : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5), g6_(g6), g7_(g7), g8_(g8),
+ g9_(g9), g10_(g10) {}
+ virtual ~CartesianProductGenerator10() {}
+
+ virtual ParamIteratorInterface<ParamType>* Begin() const {
+ return new Iterator(this, g1_, g1_.begin(), g2_, g2_.begin(), g3_,
+ g3_.begin(), g4_, g4_.begin(), g5_, g5_.begin(), g6_, g6_.begin(), g7_,
+ g7_.begin(), g8_, g8_.begin(), g9_, g9_.begin(), g10_, g10_.begin());
+ }
+ virtual ParamIteratorInterface<ParamType>* End() const {
+ return new Iterator(this, g1_, g1_.end(), g2_, g2_.end(), g3_, g3_.end(),
+ g4_, g4_.end(), g5_, g5_.end(), g6_, g6_.end(), g7_, g7_.end(), g8_,
+ g8_.end(), g9_, g9_.end(), g10_, g10_.end());
+ }
+
+ private:
+ class Iterator : public ParamIteratorInterface<ParamType> {
+ public:
+ Iterator(const ParamGeneratorInterface<ParamType>* base,
+ const ParamGenerator<T1>& g1,
+ const typename ParamGenerator<T1>::iterator& current1,
+ const ParamGenerator<T2>& g2,
+ const typename ParamGenerator<T2>::iterator& current2,
+ const ParamGenerator<T3>& g3,
+ const typename ParamGenerator<T3>::iterator& current3,
+ const ParamGenerator<T4>& g4,
+ const typename ParamGenerator<T4>::iterator& current4,
+ const ParamGenerator<T5>& g5,
+ const typename ParamGenerator<T5>::iterator& current5,
+ const ParamGenerator<T6>& g6,
+ const typename ParamGenerator<T6>::iterator& current6,
+ const ParamGenerator<T7>& g7,
+ const typename ParamGenerator<T7>::iterator& current7,
+ const ParamGenerator<T8>& g8,
+ const typename ParamGenerator<T8>::iterator& current8,
+ const ParamGenerator<T9>& g9,
+ const typename ParamGenerator<T9>::iterator& current9,
+ const ParamGenerator<T10>& g10,
+ const typename ParamGenerator<T10>::iterator& current10)
+ : base_(base),
+ begin1_(g1.begin()), end1_(g1.end()), current1_(current1),
+ begin2_(g2.begin()), end2_(g2.end()), current2_(current2),
+ begin3_(g3.begin()), end3_(g3.end()), current3_(current3),
+ begin4_(g4.begin()), end4_(g4.end()), current4_(current4),
+ begin5_(g5.begin()), end5_(g5.end()), current5_(current5),
+ begin6_(g6.begin()), end6_(g6.end()), current6_(current6),
+ begin7_(g7.begin()), end7_(g7.end()), current7_(current7),
+ begin8_(g8.begin()), end8_(g8.end()), current8_(current8),
+ begin9_(g9.begin()), end9_(g9.end()), current9_(current9),
+ begin10_(g10.begin()), end10_(g10.end()), current10_(current10) {
+ ComputeCurrentValue();
+ }
+ virtual ~Iterator() {}
+
+ virtual const ParamGeneratorInterface<ParamType>* BaseGenerator() const {
+ return base_;
+ }
+ // Advance should not be called on beyond-of-range iterators
+ // so no component iterators must be beyond end of range, either.
+ virtual void Advance() {
+ assert(!AtEnd());
+ ++current10_;
+ if (current10_ == end10_) {
+ current10_ = begin10_;
+ ++current9_;
+ }
+ if (current9_ == end9_) {
+ current9_ = begin9_;
+ ++current8_;
+ }
+ if (current8_ == end8_) {
+ current8_ = begin8_;
+ ++current7_;
+ }
+ if (current7_ == end7_) {
+ current7_ = begin7_;
+ ++current6_;
+ }
+ if (current6_ == end6_) {
+ current6_ = begin6_;
+ ++current5_;
+ }
+ if (current5_ == end5_) {
+ current5_ = begin5_;
+ ++current4_;
+ }
+ if (current4_ == end4_) {
+ current4_ = begin4_;
+ ++current3_;
+ }
+ if (current3_ == end3_) {
+ current3_ = begin3_;
+ ++current2_;
+ }
+ if (current2_ == end2_) {
+ current2_ = begin2_;
+ ++current1_;
+ }
+ ComputeCurrentValue();
+ }
+ virtual ParamIteratorInterface<ParamType>* Clone() const {
+ return new Iterator(*this);
+ }
+ virtual const ParamType* Current() const { return &current_value_; }
+ virtual bool Equals(const ParamIteratorInterface<ParamType>& other) const {
+ // Having the same base generator guarantees that the other
+ // iterator is of the same type and we can downcast.
+ GTEST_CHECK_(BaseGenerator() == other.BaseGenerator())
+ << "The program attempted to compare iterators "
+ << "from different generators." << std::endl;
+ const Iterator* typed_other =
+ CheckedDowncastToActualType<const Iterator>(&other);
+ // We must report iterators equal if they both point beyond their
+ // respective ranges. That can happen in a variety of fashions,
+ // so we have to consult AtEnd().
+ return (AtEnd() && typed_other->AtEnd()) ||
+ (
+ current1_ == typed_other->current1_ &&
+ current2_ == typed_other->current2_ &&
+ current3_ == typed_other->current3_ &&
+ current4_ == typed_other->current4_ &&
+ current5_ == typed_other->current5_ &&
+ current6_ == typed_other->current6_ &&
+ current7_ == typed_other->current7_ &&
+ current8_ == typed_other->current8_ &&
+ current9_ == typed_other->current9_ &&
+ current10_ == typed_other->current10_);
+ }
+
+ private:
+ Iterator(const Iterator& other)
+ : base_(other.base_),
+ begin1_(other.begin1_),
+ end1_(other.end1_),
+ current1_(other.current1_),
+ begin2_(other.begin2_),
+ end2_(other.end2_),
+ current2_(other.current2_),
+ begin3_(other.begin3_),
+ end3_(other.end3_),
+ current3_(other.current3_),
+ begin4_(other.begin4_),
+ end4_(other.end4_),
+ current4_(other.current4_),
+ begin5_(other.begin5_),
+ end5_(other.end5_),
+ current5_(other.current5_),
+ begin6_(other.begin6_),
+ end6_(other.end6_),
+ current6_(other.current6_),
+ begin7_(other.begin7_),
+ end7_(other.end7_),
+ current7_(other.current7_),
+ begin8_(other.begin8_),
+ end8_(other.end8_),
+ current8_(other.current8_),
+ begin9_(other.begin9_),
+ end9_(other.end9_),
+ current9_(other.current9_),
+ begin10_(other.begin10_),
+ end10_(other.end10_),
+ current10_(other.current10_) {
+ ComputeCurrentValue();
+ }
+
+ void ComputeCurrentValue() {
+ if (!AtEnd())
+ current_value_ = ParamType(*current1_, *current2_, *current3_,
+ *current4_, *current5_, *current6_, *current7_, *current8_,
+ *current9_, *current10_);
+ }
+ bool AtEnd() const {
+ // We must report iterator past the end of the range when either of the
+ // component iterators has reached the end of its range.
+ return
+ current1_ == end1_ ||
+ current2_ == end2_ ||
+ current3_ == end3_ ||
+ current4_ == end4_ ||
+ current5_ == end5_ ||
+ current6_ == end6_ ||
+ current7_ == end7_ ||
+ current8_ == end8_ ||
+ current9_ == end9_ ||
+ current10_ == end10_;
+ }
+
+ // No implementation - assignment is unsupported.
+ void operator=(const Iterator& other);
+
+ const ParamGeneratorInterface<ParamType>* const base_;
+ // begin[i]_ and end[i]_ define the i-th range that Iterator traverses.
+ // current[i]_ is the actual traversing iterator.
+ const typename ParamGenerator<T1>::iterator begin1_;
+ const typename ParamGenerator<T1>::iterator end1_;
+ typename ParamGenerator<T1>::iterator current1_;
+ const typename ParamGenerator<T2>::iterator begin2_;
+ const typename ParamGenerator<T2>::iterator end2_;
+ typename ParamGenerator<T2>::iterator current2_;
+ const typename ParamGenerator<T3>::iterator begin3_;
+ const typename ParamGenerator<T3>::iterator end3_;
+ typename ParamGenerator<T3>::iterator current3_;
+ const typename ParamGenerator<T4>::iterator begin4_;
+ const typename ParamGenerator<T4>::iterator end4_;
+ typename ParamGenerator<T4>::iterator current4_;
+ const typename ParamGenerator<T5>::iterator begin5_;
+ const typename ParamGenerator<T5>::iterator end5_;
+ typename ParamGenerator<T5>::iterator current5_;
+ const typename ParamGenerator<T6>::iterator begin6_;
+ const typename ParamGenerator<T6>::iterator end6_;
+ typename ParamGenerator<T6>::iterator current6_;
+ const typename ParamGenerator<T7>::iterator begin7_;
+ const typename ParamGenerator<T7>::iterator end7_;
+ typename ParamGenerator<T7>::iterator current7_;
+ const typename ParamGenerator<T8>::iterator begin8_;
+ const typename ParamGenerator<T8>::iterator end8_;
+ typename ParamGenerator<T8>::iterator current8_;
+ const typename ParamGenerator<T9>::iterator begin9_;
+ const typename ParamGenerator<T9>::iterator end9_;
+ typename ParamGenerator<T9>::iterator current9_;
+ const typename ParamGenerator<T10>::iterator begin10_;
+ const typename ParamGenerator<T10>::iterator end10_;
+ typename ParamGenerator<T10>::iterator current10_;
+ ParamType current_value_;
+ }; // class CartesianProductGenerator10::Iterator
+
+ // No implementation - assignment is unsupported.
+ void operator=(const CartesianProductGenerator10& other);
+
+ const ParamGenerator<T1> g1_;
+ const ParamGenerator<T2> g2_;
+ const ParamGenerator<T3> g3_;
+ const ParamGenerator<T4> g4_;
+ const ParamGenerator<T5> g5_;
+ const ParamGenerator<T6> g6_;
+ const ParamGenerator<T7> g7_;
+ const ParamGenerator<T8> g8_;
+ const ParamGenerator<T9> g9_;
+ const ParamGenerator<T10> g10_;
+}; // class CartesianProductGenerator10
+
+
+// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
+//
+// Helper classes providing Combine() with polymorphic features. They allow
+// casting CartesianProductGeneratorN<T> to ParamGenerator<U> if T is
+// convertible to U.
+//
+template <class Generator1, class Generator2>
+class CartesianProductHolder2 {
+ public:
+CartesianProductHolder2(const Generator1& g1, const Generator2& g2)
+ : g1_(g1), g2_(g2) {}
+ template <typename T1, typename T2>
+ operator ParamGenerator< ::std::tr1::tuple<T1, T2> >() const {
+ return ParamGenerator< ::std::tr1::tuple<T1, T2> >(
+ new CartesianProductGenerator2<T1, T2>(
+ static_cast<ParamGenerator<T1> >(g1_),
+ static_cast<ParamGenerator<T2> >(g2_)));
+ }
+
+ private:
+ // No implementation - assignment is unsupported.
+ void operator=(const CartesianProductHolder2& other);
+
+ const Generator1 g1_;
+ const Generator2 g2_;
+}; // class CartesianProductHolder2
+
+template <class Generator1, class Generator2, class Generator3>
+class CartesianProductHolder3 {
+ public:
+CartesianProductHolder3(const Generator1& g1, const Generator2& g2,
+ const Generator3& g3)
+ : g1_(g1), g2_(g2), g3_(g3) {}
+ template <typename T1, typename T2, typename T3>
+ operator ParamGenerator< ::std::tr1::tuple<T1, T2, T3> >() const {
+ return ParamGenerator< ::std::tr1::tuple<T1, T2, T3> >(
+ new CartesianProductGenerator3<T1, T2, T3>(
+ static_cast<ParamGenerator<T1> >(g1_),
+ static_cast<ParamGenerator<T2> >(g2_),
+ static_cast<ParamGenerator<T3> >(g3_)));
+ }
+
+ private:
+ // No implementation - assignment is unsupported.
+ void operator=(const CartesianProductHolder3& other);
+
+ const Generator1 g1_;
+ const Generator2 g2_;
+ const Generator3 g3_;
+}; // class CartesianProductHolder3
+
+template <class Generator1, class Generator2, class Generator3,
+ class Generator4>
+class CartesianProductHolder4 {
+ public:
+CartesianProductHolder4(const Generator1& g1, const Generator2& g2,
+ const Generator3& g3, const Generator4& g4)
+ : g1_(g1), g2_(g2), g3_(g3), g4_(g4) {}
+ template <typename T1, typename T2, typename T3, typename T4>
+ operator ParamGenerator< ::std::tr1::tuple<T1, T2, T3, T4> >() const {
+ return ParamGenerator< ::std::tr1::tuple<T1, T2, T3, T4> >(
+ new CartesianProductGenerator4<T1, T2, T3, T4>(
+ static_cast<ParamGenerator<T1> >(g1_),
+ static_cast<ParamGenerator<T2> >(g2_),
+ static_cast<ParamGenerator<T3> >(g3_),
+ static_cast<ParamGenerator<T4> >(g4_)));
+ }
+
+ private:
+ // No implementation - assignment is unsupported.
+ void operator=(const CartesianProductHolder4& other);
+
+ const Generator1 g1_;
+ const Generator2 g2_;
+ const Generator3 g3_;
+ const Generator4 g4_;
+}; // class CartesianProductHolder4
+
+template <class Generator1, class Generator2, class Generator3,
+ class Generator4, class Generator5>
+class CartesianProductHolder5 {
+ public:
+CartesianProductHolder5(const Generator1& g1, const Generator2& g2,
+ const Generator3& g3, const Generator4& g4, const Generator5& g5)
+ : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5) {}
+ template <typename T1, typename T2, typename T3, typename T4, typename T5>
+ operator ParamGenerator< ::std::tr1::tuple<T1, T2, T3, T4, T5> >() const {
+ return ParamGenerator< ::std::tr1::tuple<T1, T2, T3, T4, T5> >(
+ new CartesianProductGenerator5<T1, T2, T3, T4, T5>(
+ static_cast<ParamGenerator<T1> >(g1_),
+ static_cast<ParamGenerator<T2> >(g2_),
+ static_cast<ParamGenerator<T3> >(g3_),
+ static_cast<ParamGenerator<T4> >(g4_),
+ static_cast<ParamGenerator<T5> >(g5_)));
+ }
+
+ private:
+ // No implementation - assignment is unsupported.
+ void operator=(const CartesianProductHolder5& other);
+
+ const Generator1 g1_;
+ const Generator2 g2_;
+ const Generator3 g3_;
+ const Generator4 g4_;
+ const Generator5 g5_;
+}; // class CartesianProductHolder5
+
+template <class Generator1, class Generator2, class Generator3,
+ class Generator4, class Generator5, class Generator6>
+class CartesianProductHolder6 {
+ public:
+CartesianProductHolder6(const Generator1& g1, const Generator2& g2,
+ const Generator3& g3, const Generator4& g4, const Generator5& g5,
+ const Generator6& g6)
+ : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5), g6_(g6) {}
+ template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6>
+ operator ParamGenerator< ::std::tr1::tuple<T1, T2, T3, T4, T5, T6> >() const {
+ return ParamGenerator< ::std::tr1::tuple<T1, T2, T3, T4, T5, T6> >(
+ new CartesianProductGenerator6<T1, T2, T3, T4, T5, T6>(
+ static_cast<ParamGenerator<T1> >(g1_),
+ static_cast<ParamGenerator<T2> >(g2_),
+ static_cast<ParamGenerator<T3> >(g3_),
+ static_cast<ParamGenerator<T4> >(g4_),
+ static_cast<ParamGenerator<T5> >(g5_),
+ static_cast<ParamGenerator<T6> >(g6_)));
+ }
+
+ private:
+ // No implementation - assignment is unsupported.
+ void operator=(const CartesianProductHolder6& other);
+
+ const Generator1 g1_;
+ const Generator2 g2_;
+ const Generator3 g3_;
+ const Generator4 g4_;
+ const Generator5 g5_;
+ const Generator6 g6_;
+}; // class CartesianProductHolder6
+
+template <class Generator1, class Generator2, class Generator3,
+ class Generator4, class Generator5, class Generator6, class Generator7>
+class CartesianProductHolder7 {
+ public:
+CartesianProductHolder7(const Generator1& g1, const Generator2& g2,
+ const Generator3& g3, const Generator4& g4, const Generator5& g5,
+ const Generator6& g6, const Generator7& g7)
+ : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5), g6_(g6), g7_(g7) {}
+ template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7>
+ operator ParamGenerator< ::std::tr1::tuple<T1, T2, T3, T4, T5, T6,
+ T7> >() const {
+ return ParamGenerator< ::std::tr1::tuple<T1, T2, T3, T4, T5, T6, T7> >(
+ new CartesianProductGenerator7<T1, T2, T3, T4, T5, T6, T7>(
+ static_cast<ParamGenerator<T1> >(g1_),
+ static_cast<ParamGenerator<T2> >(g2_),
+ static_cast<ParamGenerator<T3> >(g3_),
+ static_cast<ParamGenerator<T4> >(g4_),
+ static_cast<ParamGenerator<T5> >(g5_),
+ static_cast<ParamGenerator<T6> >(g6_),
+ static_cast<ParamGenerator<T7> >(g7_)));
+ }
+
+ private:
+ // No implementation - assignment is unsupported.
+ void operator=(const CartesianProductHolder7& other);
+
+ const Generator1 g1_;
+ const Generator2 g2_;
+ const Generator3 g3_;
+ const Generator4 g4_;
+ const Generator5 g5_;
+ const Generator6 g6_;
+ const Generator7 g7_;
+}; // class CartesianProductHolder7
+
+template <class Generator1, class Generator2, class Generator3,
+ class Generator4, class Generator5, class Generator6, class Generator7,
+ class Generator8>
+class CartesianProductHolder8 {
+ public:
+CartesianProductHolder8(const Generator1& g1, const Generator2& g2,
+ const Generator3& g3, const Generator4& g4, const Generator5& g5,
+ const Generator6& g6, const Generator7& g7, const Generator8& g8)
+ : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5), g6_(g6), g7_(g7),
+ g8_(g8) {}
+ template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8>
+ operator ParamGenerator< ::std::tr1::tuple<T1, T2, T3, T4, T5, T6, T7,
+ T8> >() const {
+ return ParamGenerator< ::std::tr1::tuple<T1, T2, T3, T4, T5, T6, T7, T8> >(
+ new CartesianProductGenerator8<T1, T2, T3, T4, T5, T6, T7, T8>(
+ static_cast<ParamGenerator<T1> >(g1_),
+ static_cast<ParamGenerator<T2> >(g2_),
+ static_cast<ParamGenerator<T3> >(g3_),
+ static_cast<ParamGenerator<T4> >(g4_),
+ static_cast<ParamGenerator<T5> >(g5_),
+ static_cast<ParamGenerator<T6> >(g6_),
+ static_cast<ParamGenerator<T7> >(g7_),
+ static_cast<ParamGenerator<T8> >(g8_)));
+ }
+
+ private:
+ // No implementation - assignment is unsupported.
+ void operator=(const CartesianProductHolder8& other);
+
+ const Generator1 g1_;
+ const Generator2 g2_;
+ const Generator3 g3_;
+ const Generator4 g4_;
+ const Generator5 g5_;
+ const Generator6 g6_;
+ const Generator7 g7_;
+ const Generator8 g8_;
+}; // class CartesianProductHolder8
+
+template <class Generator1, class Generator2, class Generator3,
+ class Generator4, class Generator5, class Generator6, class Generator7,
+ class Generator8, class Generator9>
+class CartesianProductHolder9 {
+ public:
+CartesianProductHolder9(const Generator1& g1, const Generator2& g2,
+ const Generator3& g3, const Generator4& g4, const Generator5& g5,
+ const Generator6& g6, const Generator7& g7, const Generator8& g8,
+ const Generator9& g9)
+ : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5), g6_(g6), g7_(g7), g8_(g8),
+ g9_(g9) {}
+ template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9>
+ operator ParamGenerator< ::std::tr1::tuple<T1, T2, T3, T4, T5, T6, T7, T8,
+ T9> >() const {
+ return ParamGenerator< ::std::tr1::tuple<T1, T2, T3, T4, T5, T6, T7, T8,
+ T9> >(
+ new CartesianProductGenerator9<T1, T2, T3, T4, T5, T6, T7, T8, T9>(
+ static_cast<ParamGenerator<T1> >(g1_),
+ static_cast<ParamGenerator<T2> >(g2_),
+ static_cast<ParamGenerator<T3> >(g3_),
+ static_cast<ParamGenerator<T4> >(g4_),
+ static_cast<ParamGenerator<T5> >(g5_),
+ static_cast<ParamGenerator<T6> >(g6_),
+ static_cast<ParamGenerator<T7> >(g7_),
+ static_cast<ParamGenerator<T8> >(g8_),
+ static_cast<ParamGenerator<T9> >(g9_)));
+ }
+
+ private:
+ // No implementation - assignment is unsupported.
+ void operator=(const CartesianProductHolder9& other);
+
+ const Generator1 g1_;
+ const Generator2 g2_;
+ const Generator3 g3_;
+ const Generator4 g4_;
+ const Generator5 g5_;
+ const Generator6 g6_;
+ const Generator7 g7_;
+ const Generator8 g8_;
+ const Generator9 g9_;
+}; // class CartesianProductHolder9
+
+template <class Generator1, class Generator2, class Generator3,
+ class Generator4, class Generator5, class Generator6, class Generator7,
+ class Generator8, class Generator9, class Generator10>
+class CartesianProductHolder10 {
+ public:
+CartesianProductHolder10(const Generator1& g1, const Generator2& g2,
+ const Generator3& g3, const Generator4& g4, const Generator5& g5,
+ const Generator6& g6, const Generator7& g7, const Generator8& g8,
+ const Generator9& g9, const Generator10& g10)
+ : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5), g6_(g6), g7_(g7), g8_(g8),
+ g9_(g9), g10_(g10) {}
+ template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10>
+ operator ParamGenerator< ::std::tr1::tuple<T1, T2, T3, T4, T5, T6, T7, T8,
+ T9, T10> >() const {
+ return ParamGenerator< ::std::tr1::tuple<T1, T2, T3, T4, T5, T6, T7, T8,
+ T9, T10> >(
+ new CartesianProductGenerator10<T1, T2, T3, T4, T5, T6, T7, T8, T9,
+ T10>(
+ static_cast<ParamGenerator<T1> >(g1_),
+ static_cast<ParamGenerator<T2> >(g2_),
+ static_cast<ParamGenerator<T3> >(g3_),
+ static_cast<ParamGenerator<T4> >(g4_),
+ static_cast<ParamGenerator<T5> >(g5_),
+ static_cast<ParamGenerator<T6> >(g6_),
+ static_cast<ParamGenerator<T7> >(g7_),
+ static_cast<ParamGenerator<T8> >(g8_),
+ static_cast<ParamGenerator<T9> >(g9_),
+ static_cast<ParamGenerator<T10> >(g10_)));
+ }
+
+ private:
+ // No implementation - assignment is unsupported.
+ void operator=(const CartesianProductHolder10& other);
+
+ const Generator1 g1_;
+ const Generator2 g2_;
+ const Generator3 g3_;
+ const Generator4 g4_;
+ const Generator5 g5_;
+ const Generator6 g6_;
+ const Generator7 g7_;
+ const Generator8 g8_;
+ const Generator9 g9_;
+ const Generator10 g10_;
+}; // class CartesianProductHolder10
+
+# endif // GTEST_HAS_COMBINE
+
+} // namespace internal
+} // namespace testing
+
+#endif // GTEST_HAS_PARAM_TEST
+
+#endif // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PARAM_UTIL_GENERATED_H_
+
+#if GTEST_HAS_PARAM_TEST
+
+namespace testing {
+
+// Functions producing parameter generators.
+//
+// Google Test uses these generators to produce parameters for value-
+// parameterized tests. When a parameterized test case is instantiated
+// with a particular generator, Google Test creates and runs tests
+// for each element in the sequence produced by the generator.
+//
+// In the following sample, tests from test case FooTest are instantiated
+// each three times with parameter values 3, 5, and 8:
+//
+// class FooTest : public TestWithParam<int> { ... };
+//
+// TEST_P(FooTest, TestThis) {
+// }
+// TEST_P(FooTest, TestThat) {
+// }
+// INSTANTIATE_TEST_CASE_P(TestSequence, FooTest, Values(3, 5, 8));
+//
+
+// Range() returns generators providing sequences of values in a range.
+//
+// Synopsis:
+// Range(start, end)
+// - returns a generator producing a sequence of values {start, start+1,
+// start+2, ..., }.
+// Range(start, end, step)
+// - returns a generator producing a sequence of values {start, start+step,
+// start+step+step, ..., }.
+// Notes:
+// * The generated sequences never include end. For example, Range(1, 5)
+// returns a generator producing a sequence {1, 2, 3, 4}. Range(1, 9, 2)
+// returns a generator producing {1, 3, 5, 7}.
+// * start and end must have the same type. That type may be any integral or
+// floating-point type or a user defined type satisfying these conditions:
+// * It must be assignable (have operator=() defined).
+// * It must have operator+() (operator+(int-compatible type) for
+// two-operand version).
+// * It must have operator<() defined.
+// Elements in the resulting sequences will also have that type.
+// * Condition start < end must be satisfied in order for resulting sequences
+// to contain any elements.
+//
+template <typename T, typename IncrementT>
+internal::ParamGenerator<T> Range(T start, T end, IncrementT step) {
+ return internal::ParamGenerator<T>(
+ new internal::RangeGenerator<T, IncrementT>(start, end, step));
+}
+
+template <typename T>
+internal::ParamGenerator<T> Range(T start, T end) {
+ return Range(start, end, 1);
+}
+
+// ValuesIn() function allows generation of tests with parameters coming from
+// a container.
+//
+// Synopsis:
+// ValuesIn(const T (&array)[N])
+// - returns a generator producing sequences with elements from
+// a C-style array.
+// ValuesIn(const Container& container)
+// - returns a generator producing sequences with elements from
+// an STL-style container.
+// ValuesIn(Iterator begin, Iterator end)
+// - returns a generator producing sequences with elements from
+// a range [begin, end) defined by a pair of STL-style iterators. These
+// iterators can also be plain C pointers.
+//
+// Please note that ValuesIn copies the values from the containers
+// passed in and keeps them to generate tests in RUN_ALL_TESTS().
+//
+// Examples:
+//
+// This instantiates tests from test case StringTest
+// each with C-string values of "foo", "bar", and "baz":
+//
+// const char* strings[] = {"foo", "bar", "baz"};
+// INSTANTIATE_TEST_CASE_P(StringSequence, SrtingTest, ValuesIn(strings));
+//
+// This instantiates tests from test case StlStringTest
+// each with STL strings with values "a" and "b":
+//
+// ::std::vector< ::std::string> GetParameterStrings() {
+// ::std::vector< ::std::string> v;
+// v.push_back("a");
+// v.push_back("b");
+// return v;
+// }
+//
+// INSTANTIATE_TEST_CASE_P(CharSequence,
+// StlStringTest,
+// ValuesIn(GetParameterStrings()));
+//
+//
+// This will also instantiate tests from CharTest
+// each with parameter values 'a' and 'b':
+//
+// ::std::list<char> GetParameterChars() {
+// ::std::list<char> list;
+// list.push_back('a');
+// list.push_back('b');
+// return list;
+// }
+// ::std::list<char> l = GetParameterChars();
+// INSTANTIATE_TEST_CASE_P(CharSequence2,
+// CharTest,
+// ValuesIn(l.begin(), l.end()));
+//
+template <typename ForwardIterator>
+internal::ParamGenerator<
+ typename ::testing::internal::IteratorTraits<ForwardIterator>::value_type>
+ValuesIn(ForwardIterator begin, ForwardIterator end) {
+ typedef typename ::testing::internal::IteratorTraits<ForwardIterator>
+ ::value_type ParamType;
+ return internal::ParamGenerator<ParamType>(
+ new internal::ValuesInIteratorRangeGenerator<ParamType>(begin, end));
+}
+
+template <typename T, size_t N>
+internal::ParamGenerator<T> ValuesIn(const T (&array)[N]) {
+ return ValuesIn(array, array + N);
+}
+
+template <class Container>
+internal::ParamGenerator<typename Container::value_type> ValuesIn(
+ const Container& container) {
+ return ValuesIn(container.begin(), container.end());
+}
+
+// Values() allows generating tests from explicitly specified list of
+// parameters.
+//
+// Synopsis:
+// Values(T v1, T v2, ..., T vN)
+// - returns a generator producing sequences with elements v1, v2, ..., vN.
+//
+// For example, this instantiates tests from test case BarTest each
+// with values "one", "two", and "three":
+//
+// INSTANTIATE_TEST_CASE_P(NumSequence, BarTest, Values("one", "two", "three"));
+//
+// This instantiates tests from test case BazTest each with values 1, 2, 3.5.
+// The exact type of values will depend on the type of parameter in BazTest.
+//
+// INSTANTIATE_TEST_CASE_P(FloatingNumbers, BazTest, Values(1, 2, 3.5));
+//
+// Currently, Values() supports from 1 to 50 parameters.
+//
+template <typename T1>
+internal::ValueArray1<T1> Values(T1 v1) {
+ return internal::ValueArray1<T1>(v1);
+}
+
+template <typename T1, typename T2>
+internal::ValueArray2<T1, T2> Values(T1 v1, T2 v2) {
+ return internal::ValueArray2<T1, T2>(v1, v2);
+}
+
+template <typename T1, typename T2, typename T3>
+internal::ValueArray3<T1, T2, T3> Values(T1 v1, T2 v2, T3 v3) {
+ return internal::ValueArray3<T1, T2, T3>(v1, v2, v3);
+}
+
+template <typename T1, typename T2, typename T3, typename T4>
+internal::ValueArray4<T1, T2, T3, T4> Values(T1 v1, T2 v2, T3 v3, T4 v4) {
+ return internal::ValueArray4<T1, T2, T3, T4>(v1, v2, v3, v4);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5>
+internal::ValueArray5<T1, T2, T3, T4, T5> Values(T1 v1, T2 v2, T3 v3, T4 v4,
+ T5 v5) {
+ return internal::ValueArray5<T1, T2, T3, T4, T5>(v1, v2, v3, v4, v5);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6>
+internal::ValueArray6<T1, T2, T3, T4, T5, T6> Values(T1 v1, T2 v2, T3 v3,
+ T4 v4, T5 v5, T6 v6) {
+ return internal::ValueArray6<T1, T2, T3, T4, T5, T6>(v1, v2, v3, v4, v5, v6);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7>
+internal::ValueArray7<T1, T2, T3, T4, T5, T6, T7> Values(T1 v1, T2 v2, T3 v3,
+ T4 v4, T5 v5, T6 v6, T7 v7) {
+ return internal::ValueArray7<T1, T2, T3, T4, T5, T6, T7>(v1, v2, v3, v4, v5,
+ v6, v7);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8>
+internal::ValueArray8<T1, T2, T3, T4, T5, T6, T7, T8> Values(T1 v1, T2 v2,
+ T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8) {
+ return internal::ValueArray8<T1, T2, T3, T4, T5, T6, T7, T8>(v1, v2, v3, v4,
+ v5, v6, v7, v8);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9>
+internal::ValueArray9<T1, T2, T3, T4, T5, T6, T7, T8, T9> Values(T1 v1, T2 v2,
+ T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9) {
+ return internal::ValueArray9<T1, T2, T3, T4, T5, T6, T7, T8, T9>(v1, v2, v3,
+ v4, v5, v6, v7, v8, v9);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10>
+internal::ValueArray10<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10> Values(T1 v1,
+ T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10) {
+ return internal::ValueArray10<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10>(v1,
+ v2, v3, v4, v5, v6, v7, v8, v9, v10);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11>
+internal::ValueArray11<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10,
+ T11> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+ T10 v10, T11 v11) {
+ return internal::ValueArray11<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10,
+ T11>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12>
+internal::ValueArray12<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
+ T12> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+ T10 v10, T11 v11, T12 v12) {
+ return internal::ValueArray12<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
+ T12>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13>
+internal::ValueArray13<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
+ T13> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+ T10 v10, T11 v11, T12 v12, T13 v13) {
+ return internal::ValueArray13<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
+ T12, T13>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14>
+internal::ValueArray14<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+ T14> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+ T10 v10, T11 v11, T12 v12, T13 v13, T14 v14) {
+ return internal::ValueArray14<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
+ T12, T13, T14>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13,
+ v14);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15>
+internal::ValueArray15<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+ T14, T15> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8,
+ T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15) {
+ return internal::ValueArray15<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
+ T12, T13, T14, T15>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12,
+ v13, v14, v15);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15,
+ typename T16>
+internal::ValueArray16<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+ T14, T15, T16> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7,
+ T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15,
+ T16 v16) {
+ return internal::ValueArray16<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
+ T12, T13, T14, T15, T16>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11,
+ v12, v13, v14, v15, v16);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15,
+ typename T16, typename T17>
+internal::ValueArray17<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+ T14, T15, T16, T17> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7,
+ T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15,
+ T16 v16, T17 v17) {
+ return internal::ValueArray17<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
+ T12, T13, T14, T15, T16, T17>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10,
+ v11, v12, v13, v14, v15, v16, v17);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15,
+ typename T16, typename T17, typename T18>
+internal::ValueArray18<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+ T14, T15, T16, T17, T18> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6,
+ T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15,
+ T16 v16, T17 v17, T18 v18) {
+ return internal::ValueArray18<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
+ T12, T13, T14, T15, T16, T17, T18>(v1, v2, v3, v4, v5, v6, v7, v8, v9,
+ v10, v11, v12, v13, v14, v15, v16, v17, v18);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15,
+ typename T16, typename T17, typename T18, typename T19>
+internal::ValueArray19<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+ T14, T15, T16, T17, T18, T19> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5,
+ T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14,
+ T15 v15, T16 v16, T17 v17, T18 v18, T19 v19) {
+ return internal::ValueArray19<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
+ T12, T13, T14, T15, T16, T17, T18, T19>(v1, v2, v3, v4, v5, v6, v7, v8,
+ v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15,
+ typename T16, typename T17, typename T18, typename T19, typename T20>
+internal::ValueArray20<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+ T14, T15, T16, T17, T18, T19, T20> Values(T1 v1, T2 v2, T3 v3, T4 v4,
+ T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13,
+ T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20) {
+ return internal::ValueArray20<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
+ T12, T13, T14, T15, T16, T17, T18, T19, T20>(v1, v2, v3, v4, v5, v6, v7,
+ v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15,
+ typename T16, typename T17, typename T18, typename T19, typename T20,
+ typename T21>
+internal::ValueArray21<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+ T14, T15, T16, T17, T18, T19, T20, T21> Values(T1 v1, T2 v2, T3 v3, T4 v4,
+ T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13,
+ T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21) {
+ return internal::ValueArray21<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
+ T12, T13, T14, T15, T16, T17, T18, T19, T20, T21>(v1, v2, v3, v4, v5, v6,
+ v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15,
+ typename T16, typename T17, typename T18, typename T19, typename T20,
+ typename T21, typename T22>
+internal::ValueArray22<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+ T14, T15, T16, T17, T18, T19, T20, T21, T22> Values(T1 v1, T2 v2, T3 v3,
+ T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12,
+ T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20,
+ T21 v21, T22 v22) {
+ return internal::ValueArray22<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
+ T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22>(v1, v2, v3, v4,
+ v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19,
+ v20, v21, v22);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15,
+ typename T16, typename T17, typename T18, typename T19, typename T20,
+ typename T21, typename T22, typename T23>
+internal::ValueArray23<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+ T14, T15, T16, T17, T18, T19, T20, T21, T22, T23> Values(T1 v1, T2 v2,
+ T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12,
+ T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20,
+ T21 v21, T22 v22, T23 v23) {
+ return internal::ValueArray23<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
+ T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23>(v1, v2, v3,
+ v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19,
+ v20, v21, v22, v23);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15,
+ typename T16, typename T17, typename T18, typename T19, typename T20,
+ typename T21, typename T22, typename T23, typename T24>
+internal::ValueArray24<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+ T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24> Values(T1 v1, T2 v2,
+ T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12,
+ T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20,
+ T21 v21, T22 v22, T23 v23, T24 v24) {
+ return internal::ValueArray24<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
+ T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24>(v1, v2,
+ v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18,
+ v19, v20, v21, v22, v23, v24);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15,
+ typename T16, typename T17, typename T18, typename T19, typename T20,
+ typename T21, typename T22, typename T23, typename T24, typename T25>
+internal::ValueArray25<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+ T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25> Values(T1 v1,
+ T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11,
+ T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19,
+ T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25) {
+ return internal::ValueArray25<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
+ T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25>(v1,
+ v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17,
+ v18, v19, v20, v21, v22, v23, v24, v25);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15,
+ typename T16, typename T17, typename T18, typename T19, typename T20,
+ typename T21, typename T22, typename T23, typename T24, typename T25,
+ typename T26>
+internal::ValueArray26<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+ T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
+ T26> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+ T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
+ T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
+ T26 v26) {
+ return internal::ValueArray26<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
+ T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
+ T26>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15,
+ v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15,
+ typename T16, typename T17, typename T18, typename T19, typename T20,
+ typename T21, typename T22, typename T23, typename T24, typename T25,
+ typename T26, typename T27>
+internal::ValueArray27<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+ T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
+ T27> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+ T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
+ T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
+ T26 v26, T27 v27) {
+ return internal::ValueArray27<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
+ T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
+ T26, T27>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14,
+ v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15,
+ typename T16, typename T17, typename T18, typename T19, typename T20,
+ typename T21, typename T22, typename T23, typename T24, typename T25,
+ typename T26, typename T27, typename T28>
+internal::ValueArray28<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+ T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
+ T28> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+ T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
+ T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
+ T26 v26, T27 v27, T28 v28) {
+ return internal::ValueArray28<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
+ T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
+ T26, T27, T28>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13,
+ v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27,
+ v28);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15,
+ typename T16, typename T17, typename T18, typename T19, typename T20,
+ typename T21, typename T22, typename T23, typename T24, typename T25,
+ typename T26, typename T27, typename T28, typename T29>
+internal::ValueArray29<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+ T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+ T29> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+ T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
+ T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
+ T26 v26, T27 v27, T28 v28, T29 v29) {
+ return internal::ValueArray29<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
+ T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
+ T26, T27, T28, T29>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12,
+ v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26,
+ v27, v28, v29);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15,
+ typename T16, typename T17, typename T18, typename T19, typename T20,
+ typename T21, typename T22, typename T23, typename T24, typename T25,
+ typename T26, typename T27, typename T28, typename T29, typename T30>
+internal::ValueArray30<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+ T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+ T29, T30> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8,
+ T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16,
+ T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24,
+ T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30) {
+ return internal::ValueArray30<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
+ T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
+ T26, T27, T28, T29, T30>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11,
+ v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25,
+ v26, v27, v28, v29, v30);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15,
+ typename T16, typename T17, typename T18, typename T19, typename T20,
+ typename T21, typename T22, typename T23, typename T24, typename T25,
+ typename T26, typename T27, typename T28, typename T29, typename T30,
+ typename T31>
+internal::ValueArray31<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+ T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+ T29, T30, T31> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7,
+ T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15,
+ T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23,
+ T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31) {
+ return internal::ValueArray31<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
+ T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
+ T26, T27, T28, T29, T30, T31>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10,
+ v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24,
+ v25, v26, v27, v28, v29, v30, v31);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15,
+ typename T16, typename T17, typename T18, typename T19, typename T20,
+ typename T21, typename T22, typename T23, typename T24, typename T25,
+ typename T26, typename T27, typename T28, typename T29, typename T30,
+ typename T31, typename T32>
+internal::ValueArray32<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+ T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+ T29, T30, T31, T32> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7,
+ T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15,
+ T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23,
+ T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31,
+ T32 v32) {
+ return internal::ValueArray32<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
+ T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
+ T26, T27, T28, T29, T30, T31, T32>(v1, v2, v3, v4, v5, v6, v7, v8, v9,
+ v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23,
+ v24, v25, v26, v27, v28, v29, v30, v31, v32);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15,
+ typename T16, typename T17, typename T18, typename T19, typename T20,
+ typename T21, typename T22, typename T23, typename T24, typename T25,
+ typename T26, typename T27, typename T28, typename T29, typename T30,
+ typename T31, typename T32, typename T33>
+internal::ValueArray33<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+ T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+ T29, T30, T31, T32, T33> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6,
+ T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15,
+ T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23,
+ T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31,
+ T32 v32, T33 v33) {
+ return internal::ValueArray33<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
+ T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
+ T26, T27, T28, T29, T30, T31, T32, T33>(v1, v2, v3, v4, v5, v6, v7, v8,
+ v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23,
+ v24, v25, v26, v27, v28, v29, v30, v31, v32, v33);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15,
+ typename T16, typename T17, typename T18, typename T19, typename T20,
+ typename T21, typename T22, typename T23, typename T24, typename T25,
+ typename T26, typename T27, typename T28, typename T29, typename T30,
+ typename T31, typename T32, typename T33, typename T34>
+internal::ValueArray34<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+ T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+ T29, T30, T31, T32, T33, T34> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5,
+ T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14,
+ T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22,
+ T23 v23, T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30,
+ T31 v31, T32 v32, T33 v33, T34 v34) {
+ return internal::ValueArray34<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
+ T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
+ T26, T27, T28, T29, T30, T31, T32, T33, T34>(v1, v2, v3, v4, v5, v6, v7,
+ v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22,
+ v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15,
+ typename T16, typename T17, typename T18, typename T19, typename T20,
+ typename T21, typename T22, typename T23, typename T24, typename T25,
+ typename T26, typename T27, typename T28, typename T29, typename T30,
+ typename T31, typename T32, typename T33, typename T34, typename T35>
+internal::ValueArray35<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+ T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+ T29, T30, T31, T32, T33, T34, T35> Values(T1 v1, T2 v2, T3 v3, T4 v4,
+ T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13,
+ T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21,
+ T22 v22, T23 v23, T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29,
+ T30 v30, T31 v31, T32 v32, T33 v33, T34 v34, T35 v35) {
+ return internal::ValueArray35<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
+ T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
+ T26, T27, T28, T29, T30, T31, T32, T33, T34, T35>(v1, v2, v3, v4, v5, v6,
+ v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21,
+ v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15,
+ typename T16, typename T17, typename T18, typename T19, typename T20,
+ typename T21, typename T22, typename T23, typename T24, typename T25,
+ typename T26, typename T27, typename T28, typename T29, typename T30,
+ typename T31, typename T32, typename T33, typename T34, typename T35,
+ typename T36>
+internal::ValueArray36<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+ T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+ T29, T30, T31, T32, T33, T34, T35, T36> Values(T1 v1, T2 v2, T3 v3, T4 v4,
+ T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13,
+ T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21,
+ T22 v22, T23 v23, T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29,
+ T30 v30, T31 v31, T32 v32, T33 v33, T34 v34, T35 v35, T36 v36) {
+ return internal::ValueArray36<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
+ T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
+ T26, T27, T28, T29, T30, T31, T32, T33, T34, T35, T36>(v1, v2, v3, v4,
+ v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19,
+ v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33,
+ v34, v35, v36);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15,
+ typename T16, typename T17, typename T18, typename T19, typename T20,
+ typename T21, typename T22, typename T23, typename T24, typename T25,
+ typename T26, typename T27, typename T28, typename T29, typename T30,
+ typename T31, typename T32, typename T33, typename T34, typename T35,
+ typename T36, typename T37>
+internal::ValueArray37<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+ T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+ T29, T30, T31, T32, T33, T34, T35, T36, T37> Values(T1 v1, T2 v2, T3 v3,
+ T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12,
+ T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20,
+ T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, T26 v26, T27 v27, T28 v28,
+ T29 v29, T30 v30, T31 v31, T32 v32, T33 v33, T34 v34, T35 v35, T36 v36,
+ T37 v37) {
+ return internal::ValueArray37<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
+ T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
+ T26, T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37>(v1, v2, v3,
+ v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19,
+ v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33,
+ v34, v35, v36, v37);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15,
+ typename T16, typename T17, typename T18, typename T19, typename T20,
+ typename T21, typename T22, typename T23, typename T24, typename T25,
+ typename T26, typename T27, typename T28, typename T29, typename T30,
+ typename T31, typename T32, typename T33, typename T34, typename T35,
+ typename T36, typename T37, typename T38>
+internal::ValueArray38<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+ T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+ T29, T30, T31, T32, T33, T34, T35, T36, T37, T38> Values(T1 v1, T2 v2,
+ T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12,
+ T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20,
+ T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, T26 v26, T27 v27, T28 v28,
+ T29 v29, T30 v30, T31 v31, T32 v32, T33 v33, T34 v34, T35 v35, T36 v36,
+ T37 v37, T38 v38) {
+ return internal::ValueArray38<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
+ T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
+ T26, T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38>(v1, v2,
+ v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18,
+ v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32,
+ v33, v34, v35, v36, v37, v38);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15,
+ typename T16, typename T17, typename T18, typename T19, typename T20,
+ typename T21, typename T22, typename T23, typename T24, typename T25,
+ typename T26, typename T27, typename T28, typename T29, typename T30,
+ typename T31, typename T32, typename T33, typename T34, typename T35,
+ typename T36, typename T37, typename T38, typename T39>
+internal::ValueArray39<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+ T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+ T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39> Values(T1 v1, T2 v2,
+ T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12,
+ T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20,
+ T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, T26 v26, T27 v27, T28 v28,
+ T29 v29, T30 v30, T31 v31, T32 v32, T33 v33, T34 v34, T35 v35, T36 v36,
+ T37 v37, T38 v38, T39 v39) {
+ return internal::ValueArray39<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
+ T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
+ T26, T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39>(v1,
+ v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17,
+ v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31,
+ v32, v33, v34, v35, v36, v37, v38, v39);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15,
+ typename T16, typename T17, typename T18, typename T19, typename T20,
+ typename T21, typename T22, typename T23, typename T24, typename T25,
+ typename T26, typename T27, typename T28, typename T29, typename T30,
+ typename T31, typename T32, typename T33, typename T34, typename T35,
+ typename T36, typename T37, typename T38, typename T39, typename T40>
+internal::ValueArray40<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+ T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+ T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40> Values(T1 v1,
+ T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11,
+ T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19,
+ T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, T26 v26, T27 v27,
+ T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33, T34 v34, T35 v35,
+ T36 v36, T37 v37, T38 v38, T39 v39, T40 v40) {
+ return internal::ValueArray40<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
+ T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
+ T26, T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39,
+ T40>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15,
+ v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29,
+ v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15,
+ typename T16, typename T17, typename T18, typename T19, typename T20,
+ typename T21, typename T22, typename T23, typename T24, typename T25,
+ typename T26, typename T27, typename T28, typename T29, typename T30,
+ typename T31, typename T32, typename T33, typename T34, typename T35,
+ typename T36, typename T37, typename T38, typename T39, typename T40,
+ typename T41>
+internal::ValueArray41<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+ T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+ T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40,
+ T41> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+ T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
+ T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
+ T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
+ T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41) {
+ return internal::ValueArray41<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
+ T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
+ T26, T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39,
+ T40, T41>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14,
+ v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28,
+ v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15,
+ typename T16, typename T17, typename T18, typename T19, typename T20,
+ typename T21, typename T22, typename T23, typename T24, typename T25,
+ typename T26, typename T27, typename T28, typename T29, typename T30,
+ typename T31, typename T32, typename T33, typename T34, typename T35,
+ typename T36, typename T37, typename T38, typename T39, typename T40,
+ typename T41, typename T42>
+internal::ValueArray42<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+ T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+ T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41,
+ T42> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+ T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
+ T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
+ T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
+ T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41,
+ T42 v42) {
+ return internal::ValueArray42<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
+ T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
+ T26, T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39,
+ T40, T41, T42>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13,
+ v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27,
+ v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41,
+ v42);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15,
+ typename T16, typename T17, typename T18, typename T19, typename T20,
+ typename T21, typename T22, typename T23, typename T24, typename T25,
+ typename T26, typename T27, typename T28, typename T29, typename T30,
+ typename T31, typename T32, typename T33, typename T34, typename T35,
+ typename T36, typename T37, typename T38, typename T39, typename T40,
+ typename T41, typename T42, typename T43>
+internal::ValueArray43<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+ T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+ T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42,
+ T43> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+ T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
+ T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
+ T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
+ T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41,
+ T42 v42, T43 v43) {
+ return internal::ValueArray43<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
+ T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
+ T26, T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39,
+ T40, T41, T42, T43>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12,
+ v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26,
+ v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40,
+ v41, v42, v43);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15,
+ typename T16, typename T17, typename T18, typename T19, typename T20,
+ typename T21, typename T22, typename T23, typename T24, typename T25,
+ typename T26, typename T27, typename T28, typename T29, typename T30,
+ typename T31, typename T32, typename T33, typename T34, typename T35,
+ typename T36, typename T37, typename T38, typename T39, typename T40,
+ typename T41, typename T42, typename T43, typename T44>
+internal::ValueArray44<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+ T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+ T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
+ T44> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+ T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
+ T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
+ T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
+ T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41,
+ T42 v42, T43 v43, T44 v44) {
+ return internal::ValueArray44<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
+ T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
+ T26, T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39,
+ T40, T41, T42, T43, T44>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11,
+ v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25,
+ v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39,
+ v40, v41, v42, v43, v44);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15,
+ typename T16, typename T17, typename T18, typename T19, typename T20,
+ typename T21, typename T22, typename T23, typename T24, typename T25,
+ typename T26, typename T27, typename T28, typename T29, typename T30,
+ typename T31, typename T32, typename T33, typename T34, typename T35,
+ typename T36, typename T37, typename T38, typename T39, typename T40,
+ typename T41, typename T42, typename T43, typename T44, typename T45>
+internal::ValueArray45<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+ T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+ T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
+ T44, T45> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8,
+ T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16,
+ T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24,
+ T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32,
+ T33 v33, T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40,
+ T41 v41, T42 v42, T43 v43, T44 v44, T45 v45) {
+ return internal::ValueArray45<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
+ T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
+ T26, T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39,
+ T40, T41, T42, T43, T44, T45>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10,
+ v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24,
+ v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38,
+ v39, v40, v41, v42, v43, v44, v45);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15,
+ typename T16, typename T17, typename T18, typename T19, typename T20,
+ typename T21, typename T22, typename T23, typename T24, typename T25,
+ typename T26, typename T27, typename T28, typename T29, typename T30,
+ typename T31, typename T32, typename T33, typename T34, typename T35,
+ typename T36, typename T37, typename T38, typename T39, typename T40,
+ typename T41, typename T42, typename T43, typename T44, typename T45,
+ typename T46>
+internal::ValueArray46<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+ T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+ T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
+ T44, T45, T46> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7,
+ T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15,
+ T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23,
+ T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31,
+ T32 v32, T33 v33, T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39,
+ T40 v40, T41 v41, T42 v42, T43 v43, T44 v44, T45 v45, T46 v46) {
+ return internal::ValueArray46<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
+ T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
+ T26, T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39,
+ T40, T41, T42, T43, T44, T45, T46>(v1, v2, v3, v4, v5, v6, v7, v8, v9,
+ v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23,
+ v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37,
+ v38, v39, v40, v41, v42, v43, v44, v45, v46);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15,
+ typename T16, typename T17, typename T18, typename T19, typename T20,
+ typename T21, typename T22, typename T23, typename T24, typename T25,
+ typename T26, typename T27, typename T28, typename T29, typename T30,
+ typename T31, typename T32, typename T33, typename T34, typename T35,
+ typename T36, typename T37, typename T38, typename T39, typename T40,
+ typename T41, typename T42, typename T43, typename T44, typename T45,
+ typename T46, typename T47>
+internal::ValueArray47<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+ T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+ T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
+ T44, T45, T46, T47> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7,
+ T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15,
+ T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23,
+ T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31,
+ T32 v32, T33 v33, T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39,
+ T40 v40, T41 v41, T42 v42, T43 v43, T44 v44, T45 v45, T46 v46, T47 v47) {
+ return internal::ValueArray47<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
+ T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
+ T26, T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39,
+ T40, T41, T42, T43, T44, T45, T46, T47>(v1, v2, v3, v4, v5, v6, v7, v8,
+ v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23,
+ v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37,
+ v38, v39, v40, v41, v42, v43, v44, v45, v46, v47);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15,
+ typename T16, typename T17, typename T18, typename T19, typename T20,
+ typename T21, typename T22, typename T23, typename T24, typename T25,
+ typename T26, typename T27, typename T28, typename T29, typename T30,
+ typename T31, typename T32, typename T33, typename T34, typename T35,
+ typename T36, typename T37, typename T38, typename T39, typename T40,
+ typename T41, typename T42, typename T43, typename T44, typename T45,
+ typename T46, typename T47, typename T48>
+internal::ValueArray48<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+ T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+ T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
+ T44, T45, T46, T47, T48> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6,
+ T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15,
+ T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23,
+ T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31,
+ T32 v32, T33 v33, T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39,
+ T40 v40, T41 v41, T42 v42, T43 v43, T44 v44, T45 v45, T46 v46, T47 v47,
+ T48 v48) {
+ return internal::ValueArray48<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
+ T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
+ T26, T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39,
+ T40, T41, T42, T43, T44, T45, T46, T47, T48>(v1, v2, v3, v4, v5, v6, v7,
+ v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22,
+ v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36,
+ v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15,
+ typename T16, typename T17, typename T18, typename T19, typename T20,
+ typename T21, typename T22, typename T23, typename T24, typename T25,
+ typename T26, typename T27, typename T28, typename T29, typename T30,
+ typename T31, typename T32, typename T33, typename T34, typename T35,
+ typename T36, typename T37, typename T38, typename T39, typename T40,
+ typename T41, typename T42, typename T43, typename T44, typename T45,
+ typename T46, typename T47, typename T48, typename T49>
+internal::ValueArray49<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+ T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+ T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
+ T44, T45, T46, T47, T48, T49> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5,
+ T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14,
+ T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22,
+ T23 v23, T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30,
+ T31 v31, T32 v32, T33 v33, T34 v34, T35 v35, T36 v36, T37 v37, T38 v38,
+ T39 v39, T40 v40, T41 v41, T42 v42, T43 v43, T44 v44, T45 v45, T46 v46,
+ T47 v47, T48 v48, T49 v49) {
+ return internal::ValueArray49<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
+ T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
+ T26, T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39,
+ T40, T41, T42, T43, T44, T45, T46, T47, T48, T49>(v1, v2, v3, v4, v5, v6,
+ v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21,
+ v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35,
+ v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11, typename T12, typename T13, typename T14, typename T15,
+ typename T16, typename T17, typename T18, typename T19, typename T20,
+ typename T21, typename T22, typename T23, typename T24, typename T25,
+ typename T26, typename T27, typename T28, typename T29, typename T30,
+ typename T31, typename T32, typename T33, typename T34, typename T35,
+ typename T36, typename T37, typename T38, typename T39, typename T40,
+ typename T41, typename T42, typename T43, typename T44, typename T45,
+ typename T46, typename T47, typename T48, typename T49, typename T50>
+internal::ValueArray50<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+ T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+ T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
+ T44, T45, T46, T47, T48, T49, T50> Values(T1 v1, T2 v2, T3 v3, T4 v4,
+ T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13,
+ T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21,
+ T22 v22, T23 v23, T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29,
+ T30 v30, T31 v31, T32 v32, T33 v33, T34 v34, T35 v35, T36 v36, T37 v37,
+ T38 v38, T39 v39, T40 v40, T41 v41, T42 v42, T43 v43, T44 v44, T45 v45,
+ T46 v46, T47 v47, T48 v48, T49 v49, T50 v50) {
+ return internal::ValueArray50<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
+ T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
+ T26, T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39,
+ T40, T41, T42, T43, T44, T45, T46, T47, T48, T49, T50>(v1, v2, v3, v4,
+ v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19,
+ v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33,
+ v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47,
+ v48, v49, v50);
+}
+
+// Bool() allows generating tests with parameters in a set of (false, true).
+//
+// Synopsis:
+// Bool()
+// - returns a generator producing sequences with elements {false, true}.
+//
+// It is useful when testing code that depends on Boolean flags. Combinations
+// of multiple flags can be tested when several Bool()'s are combined using
+// Combine() function.
+//
+// In the following example all tests in the test case FlagDependentTest
+// will be instantiated twice with parameters false and true.
+//
+// class FlagDependentTest : public testing::TestWithParam<bool> {
+// virtual void SetUp() {
+// external_flag = GetParam();
+// }
+// }
+// INSTANTIATE_TEST_CASE_P(BoolSequence, FlagDependentTest, Bool());
+//
+inline internal::ParamGenerator<bool> Bool() {
+ return Values(false, true);
+}
+
+# if GTEST_HAS_COMBINE
+// Combine() allows the user to combine two or more sequences to produce
+// values of a Cartesian product of those sequences' elements.
+//
+// Synopsis:
+// Combine(gen1, gen2, ..., genN)
+// - returns a generator producing sequences with elements coming from
+// the Cartesian product of elements from the sequences generated by
+// gen1, gen2, ..., genN. The sequence elements will have a type of
+// tuple<T1, T2, ..., TN> where T1, T2, ..., TN are the types
+// of elements from sequences produces by gen1, gen2, ..., genN.
+//
+// Combine can have up to 10 arguments. This number is currently limited
+// by the maximum number of elements in the tuple implementation used by Google
+// Test.
+//
+// Example:
+//
+// This will instantiate tests in test case AnimalTest each one with
+// the parameter values tuple("cat", BLACK), tuple("cat", WHITE),
+// tuple("dog", BLACK), and tuple("dog", WHITE):
+//
+// enum Color { BLACK, GRAY, WHITE };
+// class AnimalTest
+// : public testing::TestWithParam<tuple<const char*, Color> > {...};
+//
+// TEST_P(AnimalTest, AnimalLooksNice) {...}
+//
+// INSTANTIATE_TEST_CASE_P(AnimalVariations, AnimalTest,
+// Combine(Values("cat", "dog"),
+// Values(BLACK, WHITE)));
+//
+// This will instantiate tests in FlagDependentTest with all variations of two
+// Boolean flags:
+//
+// class FlagDependentTest
+// : public testing::TestWithParam<tuple<bool, bool> > {
+// virtual void SetUp() {
+// // Assigns external_flag_1 and external_flag_2 values from the tuple.
+// tie(external_flag_1, external_flag_2) = GetParam();
+// }
+// };
+//
+// TEST_P(FlagDependentTest, TestFeature1) {
+// // Test your code using external_flag_1 and external_flag_2 here.
+// }
+// INSTANTIATE_TEST_CASE_P(TwoBoolSequence, FlagDependentTest,
+// Combine(Bool(), Bool()));
+//
+template <typename Generator1, typename Generator2>
+internal::CartesianProductHolder2<Generator1, Generator2> Combine(
+ const Generator1& g1, const Generator2& g2) {
+ return internal::CartesianProductHolder2<Generator1, Generator2>(
+ g1, g2);
+}
+
+template <typename Generator1, typename Generator2, typename Generator3>
+internal::CartesianProductHolder3<Generator1, Generator2, Generator3> Combine(
+ const Generator1& g1, const Generator2& g2, const Generator3& g3) {
+ return internal::CartesianProductHolder3<Generator1, Generator2, Generator3>(
+ g1, g2, g3);
+}
+
+template <typename Generator1, typename Generator2, typename Generator3,
+ typename Generator4>
+internal::CartesianProductHolder4<Generator1, Generator2, Generator3,
+ Generator4> Combine(
+ const Generator1& g1, const Generator2& g2, const Generator3& g3,
+ const Generator4& g4) {
+ return internal::CartesianProductHolder4<Generator1, Generator2, Generator3,
+ Generator4>(
+ g1, g2, g3, g4);
+}
+
+template <typename Generator1, typename Generator2, typename Generator3,
+ typename Generator4, typename Generator5>
+internal::CartesianProductHolder5<Generator1, Generator2, Generator3,
+ Generator4, Generator5> Combine(
+ const Generator1& g1, const Generator2& g2, const Generator3& g3,
+ const Generator4& g4, const Generator5& g5) {
+ return internal::CartesianProductHolder5<Generator1, Generator2, Generator3,
+ Generator4, Generator5>(
+ g1, g2, g3, g4, g5);
+}
+
+template <typename Generator1, typename Generator2, typename Generator3,
+ typename Generator4, typename Generator5, typename Generator6>
+internal::CartesianProductHolder6<Generator1, Generator2, Generator3,
+ Generator4, Generator5, Generator6> Combine(
+ const Generator1& g1, const Generator2& g2, const Generator3& g3,
+ const Generator4& g4, const Generator5& g5, const Generator6& g6) {
+ return internal::CartesianProductHolder6<Generator1, Generator2, Generator3,
+ Generator4, Generator5, Generator6>(
+ g1, g2, g3, g4, g5, g6);
+}
+
+template <typename Generator1, typename Generator2, typename Generator3,
+ typename Generator4, typename Generator5, typename Generator6,
+ typename Generator7>
+internal::CartesianProductHolder7<Generator1, Generator2, Generator3,
+ Generator4, Generator5, Generator6, Generator7> Combine(
+ const Generator1& g1, const Generator2& g2, const Generator3& g3,
+ const Generator4& g4, const Generator5& g5, const Generator6& g6,
+ const Generator7& g7) {
+ return internal::CartesianProductHolder7<Generator1, Generator2, Generator3,
+ Generator4, Generator5, Generator6, Generator7>(
+ g1, g2, g3, g4, g5, g6, g7);
+}
+
+template <typename Generator1, typename Generator2, typename Generator3,
+ typename Generator4, typename Generator5, typename Generator6,
+ typename Generator7, typename Generator8>
+internal::CartesianProductHolder8<Generator1, Generator2, Generator3,
+ Generator4, Generator5, Generator6, Generator7, Generator8> Combine(
+ const Generator1& g1, const Generator2& g2, const Generator3& g3,
+ const Generator4& g4, const Generator5& g5, const Generator6& g6,
+ const Generator7& g7, const Generator8& g8) {
+ return internal::CartesianProductHolder8<Generator1, Generator2, Generator3,
+ Generator4, Generator5, Generator6, Generator7, Generator8>(
+ g1, g2, g3, g4, g5, g6, g7, g8);
+}
+
+template <typename Generator1, typename Generator2, typename Generator3,
+ typename Generator4, typename Generator5, typename Generator6,
+ typename Generator7, typename Generator8, typename Generator9>
+internal::CartesianProductHolder9<Generator1, Generator2, Generator3,
+ Generator4, Generator5, Generator6, Generator7, Generator8,
+ Generator9> Combine(
+ const Generator1& g1, const Generator2& g2, const Generator3& g3,
+ const Generator4& g4, const Generator5& g5, const Generator6& g6,
+ const Generator7& g7, const Generator8& g8, const Generator9& g9) {
+ return internal::CartesianProductHolder9<Generator1, Generator2, Generator3,
+ Generator4, Generator5, Generator6, Generator7, Generator8, Generator9>(
+ g1, g2, g3, g4, g5, g6, g7, g8, g9);
+}
+
+template <typename Generator1, typename Generator2, typename Generator3,
+ typename Generator4, typename Generator5, typename Generator6,
+ typename Generator7, typename Generator8, typename Generator9,
+ typename Generator10>
+internal::CartesianProductHolder10<Generator1, Generator2, Generator3,
+ Generator4, Generator5, Generator6, Generator7, Generator8, Generator9,
+ Generator10> Combine(
+ const Generator1& g1, const Generator2& g2, const Generator3& g3,
+ const Generator4& g4, const Generator5& g5, const Generator6& g6,
+ const Generator7& g7, const Generator8& g8, const Generator9& g9,
+ const Generator10& g10) {
+ return internal::CartesianProductHolder10<Generator1, Generator2, Generator3,
+ Generator4, Generator5, Generator6, Generator7, Generator8, Generator9,
+ Generator10>(
+ g1, g2, g3, g4, g5, g6, g7, g8, g9, g10);
+}
+# endif // GTEST_HAS_COMBINE
+
+
+
+# define TEST_P(test_case_name, test_name) \
+ class GTEST_TEST_CLASS_NAME_(test_case_name, test_name) \
+ : public test_case_name { \
+ public: \
+ GTEST_TEST_CLASS_NAME_(test_case_name, test_name)() {} \
+ virtual void TestBody(); \
+ private: \
+ static int AddToRegistry() { \
+ ::testing::UnitTest::GetInstance()->parameterized_test_registry(). \
+ GetTestCasePatternHolder<test_case_name>(\
+ #test_case_name, __FILE__, __LINE__)->AddTestPattern(\
+ #test_case_name, \
+ #test_name, \
+ new ::testing::internal::TestMetaFactory< \
+ GTEST_TEST_CLASS_NAME_(test_case_name, test_name)>()); \
+ return 0; \
+ } \
+ static int gtest_registering_dummy_; \
+ GTEST_DISALLOW_COPY_AND_ASSIGN_(\
+ GTEST_TEST_CLASS_NAME_(test_case_name, test_name)); \
+ }; \
+ int GTEST_TEST_CLASS_NAME_(test_case_name, \
+ test_name)::gtest_registering_dummy_ = \
+ GTEST_TEST_CLASS_NAME_(test_case_name, test_name)::AddToRegistry(); \
+ void GTEST_TEST_CLASS_NAME_(test_case_name, test_name)::TestBody()
+
+# define INSTANTIATE_TEST_CASE_P(prefix, test_case_name, generator) \
+ ::testing::internal::ParamGenerator<test_case_name::ParamType> \
+ gtest_##prefix##test_case_name##_EvalGenerator_() { return generator; } \
+ int gtest_##prefix##test_case_name##_dummy_ = \
+ ::testing::UnitTest::GetInstance()->parameterized_test_registry(). \
+ GetTestCasePatternHolder<test_case_name>(\
+ #test_case_name, __FILE__, __LINE__)->AddTestCaseInstantiation(\
+ #prefix, \
+ &gtest_##prefix##test_case_name##_EvalGenerator_, \
+ __FILE__, __LINE__)
+
+} // namespace testing
+
+#endif // GTEST_HAS_PARAM_TEST
+
+#endif // GTEST_INCLUDE_GTEST_GTEST_PARAM_TEST_H_
+// Copyright 2006, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Author: wan@google.com (Zhanyong Wan)
+//
+// Google C++ Testing Framework definitions useful in production code.
+
+#ifndef GTEST_INCLUDE_GTEST_GTEST_PROD_H_
+#define GTEST_INCLUDE_GTEST_GTEST_PROD_H_
+
+// When you need to test the private or protected members of a class,
+// use the FRIEND_TEST macro to declare your tests as friends of the
+// class. For example:
+//
+// class MyClass {
+// private:
+// void MyMethod();
+// FRIEND_TEST(MyClassTest, MyMethod);
+// };
+//
+// class MyClassTest : public testing::Test {
+// // ...
+// };
+//
+// TEST_F(MyClassTest, MyMethod) {
+// // Can call MyClass::MyMethod() here.
+// }
+
+#define FRIEND_TEST(test_case_name, test_name)\
+friend class test_case_name##_##test_name##_Test
+
+#endif // GTEST_INCLUDE_GTEST_GTEST_PROD_H_
+// Copyright 2008, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Author: mheule@google.com (Markus Heule)
+//
+
+#ifndef GTEST_INCLUDE_GTEST_GTEST_TEST_PART_H_
+#define GTEST_INCLUDE_GTEST_GTEST_TEST_PART_H_
+
+#include <iosfwd>
+#include <vector>
+
+namespace testing {
+
+// A copyable object representing the result of a test part (i.e. an
+// assertion or an explicit FAIL(), ADD_FAILURE(), or SUCCESS()).
+//
+// Don't inherit from TestPartResult as its destructor is not virtual.
+class GTEST_API_ TestPartResult {
+ public:
+ // The possible outcomes of a test part (i.e. an assertion or an
+ // explicit SUCCEED(), FAIL(), or ADD_FAILURE()).
+ enum Type {
+ kSuccess, // Succeeded.
+ kNonFatalFailure, // Failed but the test can continue.
+ kFatalFailure // Failed and the test should be terminated.
+ };
+
+ // C'tor. TestPartResult does NOT have a default constructor.
+ // Always use this constructor (with parameters) to create a
+ // TestPartResult object.
+ TestPartResult(Type a_type,
+ const char* a_file_name,
+ int a_line_number,
+ const char* a_message)
+ : type_(a_type),
+ file_name_(a_file_name == NULL ? "" : a_file_name),
+ line_number_(a_line_number),
+ summary_(ExtractSummary(a_message)),
+ message_(a_message) {
+ }
+
+ // Gets the outcome of the test part.
+ Type type() const { return type_; }
+
+ // Gets the name of the source file where the test part took place, or
+ // NULL if it's unknown.
+ const char* file_name() const {
+ return file_name_.empty() ? NULL : file_name_.c_str();
+ }
+
+ // Gets the line in the source file where the test part took place,
+ // or -1 if it's unknown.
+ int line_number() const { return line_number_; }
+
+ // Gets the summary of the failure message.
+ const char* summary() const { return summary_.c_str(); }
+
+ // Gets the message associated with the test part.
+ const char* message() const { return message_.c_str(); }
+
+ // Returns true iff the test part passed.
+ bool passed() const { return type_ == kSuccess; }
+
+ // Returns true iff the test part failed.
+ bool failed() const { return type_ != kSuccess; }
+
+ // Returns true iff the test part non-fatally failed.
+ bool nonfatally_failed() const { return type_ == kNonFatalFailure; }
+
+ // Returns true iff the test part fatally failed.
+ bool fatally_failed() const { return type_ == kFatalFailure; }
+
+ private:
+ Type type_;
+
+ // Gets the summary of the failure message by omitting the stack
+ // trace in it.
+ static std::string ExtractSummary(const char* message);
+
+ // The name of the source file where the test part took place, or
+ // "" if the source file is unknown.
+ std::string file_name_;
+ // The line in the source file where the test part took place, or -1
+ // if the line number is unknown.
+ int line_number_;
+ std::string summary_; // The test failure summary.
+ std::string message_; // The test failure message.
+};
+
+// Prints a TestPartResult object.
+std::ostream& operator<<(std::ostream& os, const TestPartResult& result);
+
+// An array of TestPartResult objects.
+//
+// Don't inherit from TestPartResultArray as its destructor is not
+// virtual.
+class GTEST_API_ TestPartResultArray {
+ public:
+ TestPartResultArray() {}
+
+ // Appends the given TestPartResult to the array.
+ void Append(const TestPartResult& result);
+
+ // Returns the TestPartResult at the given index (0-based).
+ const TestPartResult& GetTestPartResult(int index) const;
+
+ // Returns the number of TestPartResult objects in the array.
+ int size() const;
+
+ private:
+ std::vector<TestPartResult> array_;
+
+ GTEST_DISALLOW_COPY_AND_ASSIGN_(TestPartResultArray);
+};
+
+// This interface knows how to report a test part result.
+class TestPartResultReporterInterface {
+ public:
+ virtual ~TestPartResultReporterInterface() {}
+
+ virtual void ReportTestPartResult(const TestPartResult& result) = 0;
+};
+
+namespace internal {
+
+// This helper class is used by {ASSERT|EXPECT}_NO_FATAL_FAILURE to check if a
+// statement generates new fatal failures. To do so it registers itself as the
+// current test part result reporter. Besides checking if fatal failures were
+// reported, it only delegates the reporting to the former result reporter.
+// The original result reporter is restored in the destructor.
+// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
+class GTEST_API_ HasNewFatalFailureHelper
+ : public TestPartResultReporterInterface {
+ public:
+ HasNewFatalFailureHelper();
+ virtual ~HasNewFatalFailureHelper();
+ virtual void ReportTestPartResult(const TestPartResult& result);
+ bool has_new_fatal_failure() const { return has_new_fatal_failure_; }
+ private:
+ bool has_new_fatal_failure_;
+ TestPartResultReporterInterface* original_reporter_;
+
+ GTEST_DISALLOW_COPY_AND_ASSIGN_(HasNewFatalFailureHelper);
+};
+
+} // namespace internal
+
+} // namespace testing
+
+#endif // GTEST_INCLUDE_GTEST_GTEST_TEST_PART_H_
+// Copyright 2008 Google Inc.
+// All Rights Reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Author: wan@google.com (Zhanyong Wan)
+
+#ifndef GTEST_INCLUDE_GTEST_GTEST_TYPED_TEST_H_
+#define GTEST_INCLUDE_GTEST_GTEST_TYPED_TEST_H_
+
+// This header implements typed tests and type-parameterized tests.
+
+// Typed (aka type-driven) tests repeat the same test for types in a
+// list. You must know which types you want to test with when writing
+// typed tests. Here's how you do it:
+
+#if 0
+
+// First, define a fixture class template. It should be parameterized
+// by a type. Remember to derive it from testing::Test.
+template <typename T>
+class FooTest : public testing::Test {
+ public:
+ ...
+ typedef std::list<T> List;
+ static T shared_;
+ T value_;
+};
+
+// Next, associate a list of types with the test case, which will be
+// repeated for each type in the list. The typedef is necessary for
+// the macro to parse correctly.
+typedef testing::Types<char, int, unsigned int> MyTypes;
+TYPED_TEST_CASE(FooTest, MyTypes);
+
+// If the type list contains only one type, you can write that type
+// directly without Types<...>:
+// TYPED_TEST_CASE(FooTest, int);
+
+// Then, use TYPED_TEST() instead of TEST_F() to define as many typed
+// tests for this test case as you want.
+TYPED_TEST(FooTest, DoesBlah) {
+ // Inside a test, refer to TypeParam to get the type parameter.
+ // Since we are inside a derived class template, C++ requires use to
+ // visit the members of FooTest via 'this'.
+ TypeParam n = this->value_;
+
+ // To visit static members of the fixture, add the TestFixture::
+ // prefix.
+ n += TestFixture::shared_;
+
+ // To refer to typedefs in the fixture, add the "typename
+ // TestFixture::" prefix.
+ typename TestFixture::List values;
+ values.push_back(n);
+ ...
+}
+
+TYPED_TEST(FooTest, HasPropertyA) { ... }
+
+#endif // 0
+
+// Type-parameterized tests are abstract test patterns parameterized
+// by a type. Compared with typed tests, type-parameterized tests
+// allow you to define the test pattern without knowing what the type
+// parameters are. The defined pattern can be instantiated with
+// different types any number of times, in any number of translation
+// units.
+//
+// If you are designing an interface or concept, you can define a
+// suite of type-parameterized tests to verify properties that any
+// valid implementation of the interface/concept should have. Then,
+// each implementation can easily instantiate the test suite to verify
+// that it conforms to the requirements, without having to write
+// similar tests repeatedly. Here's an example:
+
+#if 0
+
+// First, define a fixture class template. It should be parameterized
+// by a type. Remember to derive it from testing::Test.
+template <typename T>
+class FooTest : public testing::Test {
+ ...
+};
+
+// Next, declare that you will define a type-parameterized test case
+// (the _P suffix is for "parameterized" or "pattern", whichever you
+// prefer):
+TYPED_TEST_CASE_P(FooTest);
+
+// Then, use TYPED_TEST_P() to define as many type-parameterized tests
+// for this type-parameterized test case as you want.
+TYPED_TEST_P(FooTest, DoesBlah) {
+ // Inside a test, refer to TypeParam to get the type parameter.
+ TypeParam n = 0;
+ ...
+}
+
+TYPED_TEST_P(FooTest, HasPropertyA) { ... }
+
+// Now the tricky part: you need to register all test patterns before
+// you can instantiate them. The first argument of the macro is the
+// test case name; the rest are the names of the tests in this test
+// case.
+REGISTER_TYPED_TEST_CASE_P(FooTest,
+ DoesBlah, HasPropertyA);
+
+// Finally, you are free to instantiate the pattern with the types you
+// want. If you put the above code in a header file, you can #include
+// it in multiple C++ source files and instantiate it multiple times.
+//
+// To distinguish different instances of the pattern, the first
+// argument to the INSTANTIATE_* macro is a prefix that will be added
+// to the actual test case name. Remember to pick unique prefixes for
+// different instances.
+typedef testing::Types<char, int, unsigned int> MyTypes;
+INSTANTIATE_TYPED_TEST_CASE_P(My, FooTest, MyTypes);
+
+// If the type list contains only one type, you can write that type
+// directly without Types<...>:
+// INSTANTIATE_TYPED_TEST_CASE_P(My, FooTest, int);
+
+#endif // 0
+
+
+// Implements typed tests.
+
+#if GTEST_HAS_TYPED_TEST
+
+// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
+//
+// Expands to the name of the typedef for the type parameters of the
+// given test case.
+# define GTEST_TYPE_PARAMS_(TestCaseName) gtest_type_params_##TestCaseName##_
+
+// The 'Types' template argument below must have spaces around it
+// since some compilers may choke on '>>' when passing a template
+// instance (e.g. Types<int>)
+# define TYPED_TEST_CASE(CaseName, Types) \
+ typedef ::testing::internal::TypeList< Types >::type \
+ GTEST_TYPE_PARAMS_(CaseName)
+
+# define TYPED_TEST(CaseName, TestName) \
+ template <typename gtest_TypeParam_> \
+ class GTEST_TEST_CLASS_NAME_(CaseName, TestName) \
+ : public CaseName<gtest_TypeParam_> { \
+ private: \
+ typedef CaseName<gtest_TypeParam_> TestFixture; \
+ typedef gtest_TypeParam_ TypeParam; \
+ virtual void TestBody(); \
+ }; \
+ bool gtest_##CaseName##_##TestName##_registered_ GTEST_ATTRIBUTE_UNUSED_ = \
+ ::testing::internal::TypeParameterizedTest< \
+ CaseName, \
+ ::testing::internal::TemplateSel< \
+ GTEST_TEST_CLASS_NAME_(CaseName, TestName)>, \
+ GTEST_TYPE_PARAMS_(CaseName)>::Register(\
+ "", #CaseName, #TestName, 0); \
+ template <typename gtest_TypeParam_> \
+ void GTEST_TEST_CLASS_NAME_(CaseName, TestName)<gtest_TypeParam_>::TestBody()
+
+#endif // GTEST_HAS_TYPED_TEST
+
+// Implements type-parameterized tests.
+
+#if GTEST_HAS_TYPED_TEST_P
+
+// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
+//
+// Expands to the namespace name that the type-parameterized tests for
+// the given type-parameterized test case are defined in. The exact
+// name of the namespace is subject to change without notice.
+# define GTEST_CASE_NAMESPACE_(TestCaseName) \
+ gtest_case_##TestCaseName##_
+
+// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
+//
+// Expands to the name of the variable used to remember the names of
+// the defined tests in the given test case.
+# define GTEST_TYPED_TEST_CASE_P_STATE_(TestCaseName) \
+ gtest_typed_test_case_p_state_##TestCaseName##_
+
+// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE DIRECTLY.
+//
+// Expands to the name of the variable used to remember the names of
+// the registered tests in the given test case.
+# define GTEST_REGISTERED_TEST_NAMES_(TestCaseName) \
+ gtest_registered_test_names_##TestCaseName##_
+
+// The variables defined in the type-parameterized test macros are
+// static as typically these macros are used in a .h file that can be
+// #included in multiple translation units linked together.
+# define TYPED_TEST_CASE_P(CaseName) \
+ static ::testing::internal::TypedTestCasePState \
+ GTEST_TYPED_TEST_CASE_P_STATE_(CaseName)
+
+# define TYPED_TEST_P(CaseName, TestName) \
+ namespace GTEST_CASE_NAMESPACE_(CaseName) { \
+ template <typename gtest_TypeParam_> \
+ class TestName : public CaseName<gtest_TypeParam_> { \
+ private: \
+ typedef CaseName<gtest_TypeParam_> TestFixture; \
+ typedef gtest_TypeParam_ TypeParam; \
+ virtual void TestBody(); \
+ }; \
+ static bool gtest_##TestName##_defined_ GTEST_ATTRIBUTE_UNUSED_ = \
+ GTEST_TYPED_TEST_CASE_P_STATE_(CaseName).AddTestName(\
+ __FILE__, __LINE__, #CaseName, #TestName); \
+ } \
+ template <typename gtest_TypeParam_> \
+ void GTEST_CASE_NAMESPACE_(CaseName)::TestName<gtest_TypeParam_>::TestBody()
+
+# define REGISTER_TYPED_TEST_CASE_P(CaseName, ...) \
+ namespace GTEST_CASE_NAMESPACE_(CaseName) { \
+ typedef ::testing::internal::Templates<__VA_ARGS__>::type gtest_AllTests_; \
+ } \
+ static const char* const GTEST_REGISTERED_TEST_NAMES_(CaseName) = \
+ GTEST_TYPED_TEST_CASE_P_STATE_(CaseName).VerifyRegisteredTestNames(\
+ __FILE__, __LINE__, #__VA_ARGS__)
+
+// The 'Types' template argument below must have spaces around it
+// since some compilers may choke on '>>' when passing a template
+// instance (e.g. Types<int>)
+# define INSTANTIATE_TYPED_TEST_CASE_P(Prefix, CaseName, Types) \
+ bool gtest_##Prefix##_##CaseName GTEST_ATTRIBUTE_UNUSED_ = \
+ ::testing::internal::TypeParameterizedTestCase<CaseName, \
+ GTEST_CASE_NAMESPACE_(CaseName)::gtest_AllTests_, \
+ ::testing::internal::TypeList< Types >::type>::Register(\
+ #Prefix, #CaseName, GTEST_REGISTERED_TEST_NAMES_(CaseName))
+
+#endif // GTEST_HAS_TYPED_TEST_P
+
+#endif // GTEST_INCLUDE_GTEST_GTEST_TYPED_TEST_H_
+
+// Depending on the platform, different string classes are available.
+// On Linux, in addition to ::std::string, Google also makes use of
+// class ::string, which has the same interface as ::std::string, but
+// has a different implementation.
+//
+// The user can define GTEST_HAS_GLOBAL_STRING to 1 to indicate that
+// ::string is available AND is a distinct type to ::std::string, or
+// define it to 0 to indicate otherwise.
+//
+// If the user's ::std::string and ::string are the same class due to
+// aliasing, he should define GTEST_HAS_GLOBAL_STRING to 0.
+//
+// If the user doesn't define GTEST_HAS_GLOBAL_STRING, it is defined
+// heuristically.
+
+namespace testing {
+
+// Declares the flags.
+
+// This flag temporary enables the disabled tests.
+GTEST_DECLARE_bool_(also_run_disabled_tests);
+
+// This flag brings the debugger on an assertion failure.
+GTEST_DECLARE_bool_(break_on_failure);
+
+// This flag controls whether Google Test catches all test-thrown exceptions
+// and logs them as failures.
+GTEST_DECLARE_bool_(catch_exceptions);
+
+// This flag enables using colors in terminal output. Available values are
+// "yes" to enable colors, "no" (disable colors), or "auto" (the default)
+// to let Google Test decide.
+GTEST_DECLARE_string_(color);
+
+// This flag sets up the filter to select by name using a glob pattern
+// the tests to run. If the filter is not given all tests are executed.
+GTEST_DECLARE_string_(filter);
+
+// This flag causes the Google Test to list tests. None of the tests listed
+// are actually run if the flag is provided.
+GTEST_DECLARE_bool_(list_tests);
+
+// This flag controls whether Google Test emits a detailed XML report to a file
+// in addition to its normal textual output.
+GTEST_DECLARE_string_(output);
+
+// This flags control whether Google Test prints the elapsed time for each
+// test.
+GTEST_DECLARE_bool_(print_time);
+
+// This flag specifies the random number seed.
+GTEST_DECLARE_int32_(random_seed);
+
+// This flag sets how many times the tests are repeated. The default value
+// is 1. If the value is -1 the tests are repeating forever.
+GTEST_DECLARE_int32_(repeat);
+
+// This flag controls whether Google Test includes Google Test internal
+// stack frames in failure stack traces.
+GTEST_DECLARE_bool_(show_internal_stack_frames);
+
+// When this flag is specified, tests' order is randomized on every iteration.
+GTEST_DECLARE_bool_(shuffle);
+
+// This flag specifies the maximum number of stack frames to be
+// printed in a failure message.
+GTEST_DECLARE_int32_(stack_trace_depth);
+
+// When this flag is specified, a failed assertion will throw an
+// exception if exceptions are enabled, or exit the program with a
+// non-zero code otherwise.
+GTEST_DECLARE_bool_(throw_on_failure);
+
+// When this flag is set with a "host:port" string, on supported
+// platforms test results are streamed to the specified port on
+// the specified host machine.
+GTEST_DECLARE_string_(stream_result_to);
+
+// The upper limit for valid stack trace depths.
+const int kMaxStackTraceDepth = 100;
+
+namespace internal {
+
+class AssertHelper;
+class DefaultGlobalTestPartResultReporter;
+class ExecDeathTest;
+class NoExecDeathTest;
+class FinalSuccessChecker;
+class GTestFlagSaver;
+class StreamingListenerTest;
+class TestResultAccessor;
+class TestEventListenersAccessor;
+class TestEventRepeater;
+class UnitTestRecordPropertyTestHelper;
+class WindowsDeathTest;
+class UnitTestImpl* GetUnitTestImpl();
+void ReportFailureInUnknownLocation(TestPartResult::Type result_type,
+ const std::string& message);
+
+} // namespace internal
+
+// The friend relationship of some of these classes is cyclic.
+// If we don't forward declare them the compiler might confuse the classes
+// in friendship clauses with same named classes on the scope.
+class Test;
+class TestCase;
+class TestInfo;
+class UnitTest;
+
+// A class for indicating whether an assertion was successful. When
+// the assertion wasn't successful, the AssertionResult object
+// remembers a non-empty message that describes how it failed.
+//
+// To create an instance of this class, use one of the factory functions
+// (AssertionSuccess() and AssertionFailure()).
+//
+// This class is useful for two purposes:
+// 1. Defining predicate functions to be used with Boolean test assertions
+// EXPECT_TRUE/EXPECT_FALSE and their ASSERT_ counterparts
+// 2. Defining predicate-format functions to be
+// used with predicate assertions (ASSERT_PRED_FORMAT*, etc).
+//
+// For example, if you define IsEven predicate:
+//
+// testing::AssertionResult IsEven(int n) {
+// if ((n % 2) == 0)
+// return testing::AssertionSuccess();
+// else
+// return testing::AssertionFailure() << n << " is odd";
+// }
+//
+// Then the failed expectation EXPECT_TRUE(IsEven(Fib(5)))
+// will print the message
+//
+// Value of: IsEven(Fib(5))
+// Actual: false (5 is odd)
+// Expected: true
+//
+// instead of a more opaque
+//
+// Value of: IsEven(Fib(5))
+// Actual: false
+// Expected: true
+//
+// in case IsEven is a simple Boolean predicate.
+//
+// If you expect your predicate to be reused and want to support informative
+// messages in EXPECT_FALSE and ASSERT_FALSE (negative assertions show up
+// about half as often as positive ones in our tests), supply messages for
+// both success and failure cases:
+//
+// testing::AssertionResult IsEven(int n) {
+// if ((n % 2) == 0)
+// return testing::AssertionSuccess() << n << " is even";
+// else
+// return testing::AssertionFailure() << n << " is odd";
+// }
+//
+// Then a statement EXPECT_FALSE(IsEven(Fib(6))) will print
+//
+// Value of: IsEven(Fib(6))
+// Actual: true (8 is even)
+// Expected: false
+//
+// NB: Predicates that support negative Boolean assertions have reduced
+// performance in positive ones so be careful not to use them in tests
+// that have lots (tens of thousands) of positive Boolean assertions.
+//
+// To use this class with EXPECT_PRED_FORMAT assertions such as:
+//
+// // Verifies that Foo() returns an even number.
+// EXPECT_PRED_FORMAT1(IsEven, Foo());
+//
+// you need to define:
+//
+// testing::AssertionResult IsEven(const char* expr, int n) {
+// if ((n % 2) == 0)
+// return testing::AssertionSuccess();
+// else
+// return testing::AssertionFailure()
+// << "Expected: " << expr << " is even\n Actual: it's " << n;
+// }
+//
+// If Foo() returns 5, you will see the following message:
+//
+// Expected: Foo() is even
+// Actual: it's 5
+//
+class GTEST_API_ AssertionResult {
+ public:
+ // Copy constructor.
+ // Used in EXPECT_TRUE/FALSE(assertion_result).
+ AssertionResult(const AssertionResult& other);
+ // Used in the EXPECT_TRUE/FALSE(bool_expression).
+ explicit AssertionResult(bool success) : success_(success) {}
+
+ // Returns true iff the assertion succeeded.
+ operator bool() const { return success_; } // NOLINT
+
+ // Returns the assertion's negation. Used with EXPECT/ASSERT_FALSE.
+ AssertionResult operator!() const;
+
+ // Returns the text streamed into this AssertionResult. Test assertions
+ // use it when they fail (i.e., the predicate's outcome doesn't match the
+ // assertion's expectation). When nothing has been streamed into the
+ // object, returns an empty string.
+ const char* message() const {
+ return message_.get() != NULL ? message_->c_str() : "";
+ }
+ // TODO(vladl@google.com): Remove this after making sure no clients use it.
+ // Deprecated; please use message() instead.
+ const char* failure_message() const { return message(); }
+
+ // Streams a custom failure message into this object.
+ template <typename T> AssertionResult& operator<<(const T& value) {
+ AppendMessage(Message() << value);
+ return *this;
+ }
+
+ // Allows streaming basic output manipulators such as endl or flush into
+ // this object.
+ AssertionResult& operator<<(
+ ::std::ostream& (*basic_manipulator)(::std::ostream& stream)) {
+ AppendMessage(Message() << basic_manipulator);
+ return *this;
+ }
+
+ private:
+ // Appends the contents of message to message_.
+ void AppendMessage(const Message& a_message) {
+ if (message_.get() == NULL)
+ message_.reset(new ::std::string);
+ message_->append(a_message.GetString().c_str());
+ }
+
+ // Stores result of the assertion predicate.
+ bool success_;
+ // Stores the message describing the condition in case the expectation
+ // construct is not satisfied with the predicate's outcome.
+ // Referenced via a pointer to avoid taking too much stack frame space
+ // with test assertions.
+ internal::scoped_ptr< ::std::string> message_;
+
+ GTEST_DISALLOW_ASSIGN_(AssertionResult);
+};
+
+// Makes a successful assertion result.
+GTEST_API_ AssertionResult AssertionSuccess();
+
+// Makes a failed assertion result.
+GTEST_API_ AssertionResult AssertionFailure();
+
+// Makes a failed assertion result with the given failure message.
+// Deprecated; use AssertionFailure() << msg.
+GTEST_API_ AssertionResult AssertionFailure(const Message& msg);
+
+// The abstract class that all tests inherit from.
+//
+// In Google Test, a unit test program contains one or many TestCases, and
+// each TestCase contains one or many Tests.
+//
+// When you define a test using the TEST macro, you don't need to
+// explicitly derive from Test - the TEST macro automatically does
+// this for you.
+//
+// The only time you derive from Test is when defining a test fixture
+// to be used a TEST_F. For example:
+//
+// class FooTest : public testing::Test {
+// protected:
+// virtual void SetUp() { ... }
+// virtual void TearDown() { ... }
+// ...
+// };
+//
+// TEST_F(FooTest, Bar) { ... }
+// TEST_F(FooTest, Baz) { ... }
+//
+// Test is not copyable.
+class GTEST_API_ Test {
+ public:
+ friend class TestInfo;
+
+ // Defines types for pointers to functions that set up and tear down
+ // a test case.
+ typedef internal::SetUpTestCaseFunc SetUpTestCaseFunc;
+ typedef internal::TearDownTestCaseFunc TearDownTestCaseFunc;
+
+ // The d'tor is virtual as we intend to inherit from Test.
+ virtual ~Test();
+
+ // Sets up the stuff shared by all tests in this test case.
+ //
+ // Google Test will call Foo::SetUpTestCase() before running the first
+ // test in test case Foo. Hence a sub-class can define its own
+ // SetUpTestCase() method to shadow the one defined in the super
+ // class.
+ static void SetUpTestCase() {}
+
+ // Tears down the stuff shared by all tests in this test case.
+ //
+ // Google Test will call Foo::TearDownTestCase() after running the last
+ // test in test case Foo. Hence a sub-class can define its own
+ // TearDownTestCase() method to shadow the one defined in the super
+ // class.
+ static void TearDownTestCase() {}
+
+ // Returns true iff the current test has a fatal failure.
+ static bool HasFatalFailure();
+
+ // Returns true iff the current test has a non-fatal failure.
+ static bool HasNonfatalFailure();
+
+ // Returns true iff the current test has a (either fatal or
+ // non-fatal) failure.
+ static bool HasFailure() { return HasFatalFailure() || HasNonfatalFailure(); }
+
+ // Logs a property for the current test, test case, or for the entire
+ // invocation of the test program when used outside of the context of a
+ // test case. Only the last value for a given key is remembered. These
+ // are public static so they can be called from utility functions that are
+ // not members of the test fixture. Calls to RecordProperty made during
+ // lifespan of the test (from the moment its constructor starts to the
+ // moment its destructor finishes) will be output in XML as attributes of
+ // the <testcase> element. Properties recorded from fixture's
+ // SetUpTestCase or TearDownTestCase are logged as attributes of the
+ // corresponding <testsuite> element. Calls to RecordProperty made in the
+ // global context (before or after invocation of RUN_ALL_TESTS and from
+ // SetUp/TearDown method of Environment objects registered with Google
+ // Test) will be output as attributes of the <testsuites> element.
+ static void RecordProperty(const std::string& key, const std::string& value);
+ static void RecordProperty(const std::string& key, int value);
+
+ protected:
+ // Creates a Test object.
+ Test();
+
+ // Sets up the test fixture.
+ virtual void SetUp();
+
+ // Tears down the test fixture.
+ virtual void TearDown();
+
+ private:
+ // Returns true iff the current test has the same fixture class as
+ // the first test in the current test case.
+ static bool HasSameFixtureClass();
+
+ // Runs the test after the test fixture has been set up.
+ //
+ // A sub-class must implement this to define the test logic.
+ //
+ // DO NOT OVERRIDE THIS FUNCTION DIRECTLY IN A USER PROGRAM.
+ // Instead, use the TEST or TEST_F macro.
+ virtual void TestBody() = 0;
+
+ // Sets up, executes, and tears down the test.
+ void Run();
+
+ // Deletes self. We deliberately pick an unusual name for this
+ // internal method to avoid clashing with names used in user TESTs.
+ void DeleteSelf_() { delete this; }
+
+ // Uses a GTestFlagSaver to save and restore all Google Test flags.
+ const internal::GTestFlagSaver* const gtest_flag_saver_;
+
+ // Often a user mis-spells SetUp() as Setup() and spends a long time
+ // wondering why it is never called by Google Test. The declaration of
+ // the following method is solely for catching such an error at
+ // compile time:
+ //
+ // - The return type is deliberately chosen to be not void, so it
+ // will be a conflict if a user declares void Setup() in his test
+ // fixture.
+ //
+ // - This method is private, so it will be another compiler error
+ // if a user calls it from his test fixture.
+ //
+ // DO NOT OVERRIDE THIS FUNCTION.
+ //
+ // If you see an error about overriding the following function or
+ // about it being private, you have mis-spelled SetUp() as Setup().
+ struct Setup_should_be_spelled_SetUp {};
+ virtual Setup_should_be_spelled_SetUp* Setup() { return NULL; }
+
+ // We disallow copying Tests.
+ GTEST_DISALLOW_COPY_AND_ASSIGN_(Test);
+};
+
+typedef internal::TimeInMillis TimeInMillis;
+
+// A copyable object representing a user specified test property which can be
+// output as a key/value string pair.
+//
+// Don't inherit from TestProperty as its destructor is not virtual.
+class TestProperty {
+ public:
+ // C'tor. TestProperty does NOT have a default constructor.
+ // Always use this constructor (with parameters) to create a
+ // TestProperty object.
+ TestProperty(const std::string& a_key, const std::string& a_value) :
+ key_(a_key), value_(a_value) {
+ }
+
+ // Gets the user supplied key.
+ const char* key() const {
+ return key_.c_str();
+ }
+
+ // Gets the user supplied value.
+ const char* value() const {
+ return value_.c_str();
+ }
+
+ // Sets a new value, overriding the one supplied in the constructor.
+ void SetValue(const std::string& new_value) {
+ value_ = new_value;
+ }
+
+ private:
+ // The key supplied by the user.
+ std::string key_;
+ // The value supplied by the user.
+ std::string value_;
+};
+
+// The result of a single Test. This includes a list of
+// TestPartResults, a list of TestProperties, a count of how many
+// death tests there are in the Test, and how much time it took to run
+// the Test.
+//
+// TestResult is not copyable.
+class GTEST_API_ TestResult {
+ public:
+ // Creates an empty TestResult.
+ TestResult();
+
+ // D'tor. Do not inherit from TestResult.
+ ~TestResult();
+
+ // Gets the number of all test parts. This is the sum of the number
+ // of successful test parts and the number of failed test parts.
+ int total_part_count() const;
+
+ // Returns the number of the test properties.
+ int test_property_count() const;
+
+ // Returns true iff the test passed (i.e. no test part failed).
+ bool Passed() const { return !Failed(); }
+
+ // Returns true iff the test failed.
+ bool Failed() const;
+
+ // Returns true iff the test fatally failed.
+ bool HasFatalFailure() const;
+
+ // Returns true iff the test has a non-fatal failure.
+ bool HasNonfatalFailure() const;
+
+ // Returns the elapsed time, in milliseconds.
+ TimeInMillis elapsed_time() const { return elapsed_time_; }
+
+ // Returns the i-th test part result among all the results. i can range
+ // from 0 to test_property_count() - 1. If i is not in that range, aborts
+ // the program.
+ const TestPartResult& GetTestPartResult(int i) const;
+
+ // Returns the i-th test property. i can range from 0 to
+ // test_property_count() - 1. If i is not in that range, aborts the
+ // program.
+ const TestProperty& GetTestProperty(int i) const;
+
+ private:
+ friend class TestInfo;
+ friend class TestCase;
+ friend class UnitTest;
+ friend class internal::DefaultGlobalTestPartResultReporter;
+ friend class internal::ExecDeathTest;
+ friend class internal::TestResultAccessor;
+ friend class internal::UnitTestImpl;
+ friend class internal::WindowsDeathTest;
+
+ // Gets the vector of TestPartResults.
+ const std::vector<TestPartResult>& test_part_results() const {
+ return test_part_results_;
+ }
+
+ // Gets the vector of TestProperties.
+ const std::vector<TestProperty>& test_properties() const {
+ return test_properties_;
+ }
+
+ // Sets the elapsed time.
+ void set_elapsed_time(TimeInMillis elapsed) { elapsed_time_ = elapsed; }
+
+ // Adds a test property to the list. The property is validated and may add
+ // a non-fatal failure if invalid (e.g., if it conflicts with reserved
+ // key names). If a property is already recorded for the same key, the
+ // value will be updated, rather than storing multiple values for the same
+ // key. xml_element specifies the element for which the property is being
+ // recorded and is used for validation.
+ void RecordProperty(const std::string& xml_element,
+ const TestProperty& test_property);
+
+ // Adds a failure if the key is a reserved attribute of Google Test
+ // testcase tags. Returns true if the property is valid.
+ // TODO(russr): Validate attribute names are legal and human readable.
+ static bool ValidateTestProperty(const std::string& xml_element,
+ const TestProperty& test_property);
+
+ // Adds a test part result to the list.
+ void AddTestPartResult(const TestPartResult& test_part_result);
+
+ // Returns the death test count.
+ int death_test_count() const { return death_test_count_; }
+
+ // Increments the death test count, returning the new count.
+ int increment_death_test_count() { return ++death_test_count_; }
+
+ // Clears the test part results.
+ void ClearTestPartResults();
+
+ // Clears the object.
+ void Clear();
+
+ // Protects mutable state of the property vector and of owned
+ // properties, whose values may be updated.
+ internal::Mutex test_properites_mutex_;
+
+ // The vector of TestPartResults
+ std::vector<TestPartResult> test_part_results_;
+ // The vector of TestProperties
+ std::vector<TestProperty> test_properties_;
+ // Running count of death tests.
+ int death_test_count_;
+ // The elapsed time, in milliseconds.
+ TimeInMillis elapsed_time_;
+
+ // We disallow copying TestResult.
+ GTEST_DISALLOW_COPY_AND_ASSIGN_(TestResult);
+}; // class TestResult
+
+// A TestInfo object stores the following information about a test:
+//
+// Test case name
+// Test name
+// Whether the test should be run
+// A function pointer that creates the test object when invoked
+// Test result
+//
+// The constructor of TestInfo registers itself with the UnitTest
+// singleton such that the RUN_ALL_TESTS() macro knows which tests to
+// run.
+class GTEST_API_ TestInfo {
+ public:
+ // Destructs a TestInfo object. This function is not virtual, so
+ // don't inherit from TestInfo.
+ ~TestInfo();
+
+ // Returns the test case name.
+ const char* test_case_name() const { return test_case_name_.c_str(); }
+
+ // Returns the test name.
+ const char* name() const { return name_.c_str(); }
+
+ // Returns the name of the parameter type, or NULL if this is not a typed
+ // or a type-parameterized test.
+ const char* type_param() const {
+ if (type_param_.get() != NULL)
+ return type_param_->c_str();
+ return NULL;
+ }
+
+ // Returns the text representation of the value parameter, or NULL if this
+ // is not a value-parameterized test.
+ const char* value_param() const {
+ if (value_param_.get() != NULL)
+ return value_param_->c_str();
+ return NULL;
+ }
+
+ // Returns true if this test should run, that is if the test is not
+ // disabled (or it is disabled but the also_run_disabled_tests flag has
+ // been specified) and its full name matches the user-specified filter.
+ //
+ // Google Test allows the user to filter the tests by their full names.
+ // The full name of a test Bar in test case Foo is defined as
+ // "Foo.Bar". Only the tests that match the filter will run.
+ //
+ // A filter is a colon-separated list of glob (not regex) patterns,
+ // optionally followed by a '-' and a colon-separated list of
+ // negative patterns (tests to exclude). A test is run if it
+ // matches one of the positive patterns and does not match any of
+ // the negative patterns.
+ //
+ // For example, *A*:Foo.* is a filter that matches any string that
+ // contains the character 'A' or starts with "Foo.".
+ bool should_run() const { return should_run_; }
+
+ // Returns true iff this test will appear in the XML report.
+ bool is_reportable() const {
+ // For now, the XML report includes all tests matching the filter.
+ // In the future, we may trim tests that are excluded because of
+ // sharding.
+ return matches_filter_;
+ }
+
+ // Returns the result of the test.
+ const TestResult* result() const { return &result_; }
+
+ private:
+#if GTEST_HAS_DEATH_TEST
+ friend class internal::DefaultDeathTestFactory;
+#endif // GTEST_HAS_DEATH_TEST
+ friend class Test;
+ friend class TestCase;
+ friend class internal::UnitTestImpl;
+ friend class internal::StreamingListenerTest;
+ friend TestInfo* internal::MakeAndRegisterTestInfo(
+ const char* test_case_name,
+ const char* name,
+ const char* type_param,
+ const char* value_param,
+ internal::TypeId fixture_class_id,
+ Test::SetUpTestCaseFunc set_up_tc,
+ Test::TearDownTestCaseFunc tear_down_tc,
+ internal::TestFactoryBase* factory);
+
+ // Constructs a TestInfo object. The newly constructed instance assumes
+ // ownership of the factory object.
+ TestInfo(const std::string& test_case_name,
+ const std::string& name,
+ const char* a_type_param, // NULL if not a type-parameterized test
+ const char* a_value_param, // NULL if not a value-parameterized test
+ internal::TypeId fixture_class_id,
+ internal::TestFactoryBase* factory);
+
+ // Increments the number of death tests encountered in this test so
+ // far.
+ int increment_death_test_count() {
+ return result_.increment_death_test_count();
+ }
+
+ // Creates the test object, runs it, records its result, and then
+ // deletes it.
+ void Run();
+
+ static void ClearTestResult(TestInfo* test_info) {
+ test_info->result_.Clear();
+ }
+
+ // These fields are immutable properties of the test.
+ const std::string test_case_name_; // Test case name
+ const std::string name_; // Test name
+ // Name of the parameter type, or NULL if this is not a typed or a
+ // type-parameterized test.
+ const internal::scoped_ptr<const ::std::string> type_param_;
+ // Text representation of the value parameter, or NULL if this is not a
+ // value-parameterized test.
+ const internal::scoped_ptr<const ::std::string> value_param_;
+ const internal::TypeId fixture_class_id_; // ID of the test fixture class
+ bool should_run_; // True iff this test should run
+ bool is_disabled_; // True iff this test is disabled
+ bool matches_filter_; // True if this test matches the
+ // user-specified filter.
+ internal::TestFactoryBase* const factory_; // The factory that creates
+ // the test object
+
+ // This field is mutable and needs to be reset before running the
+ // test for the second time.
+ TestResult result_;
+
+ GTEST_DISALLOW_COPY_AND_ASSIGN_(TestInfo);
+};
+
+// A test case, which consists of a vector of TestInfos.
+//
+// TestCase is not copyable.
+class GTEST_API_ TestCase {
+ public:
+ // Creates a TestCase with the given name.
+ //
+ // TestCase does NOT have a default constructor. Always use this
+ // constructor to create a TestCase object.
+ //
+ // Arguments:
+ //
+ // name: name of the test case
+ // a_type_param: the name of the test's type parameter, or NULL if
+ // this is not a type-parameterized test.
+ // set_up_tc: pointer to the function that sets up the test case
+ // tear_down_tc: pointer to the function that tears down the test case
+ TestCase(const char* name, const char* a_type_param,
+ Test::SetUpTestCaseFunc set_up_tc,
+ Test::TearDownTestCaseFunc tear_down_tc);
+
+ // Destructor of TestCase.
+ virtual ~TestCase();
+
+ // Gets the name of the TestCase.
+ const char* name() const { return name_.c_str(); }
+
+ // Returns the name of the parameter type, or NULL if this is not a
+ // type-parameterized test case.
+ const char* type_param() const {
+ if (type_param_.get() != NULL)
+ return type_param_->c_str();
+ return NULL;
+ }
+
+ // Returns true if any test in this test case should run.
+ bool should_run() const { return should_run_; }
+
+ // Gets the number of successful tests in this test case.
+ int successful_test_count() const;
+
+ // Gets the number of failed tests in this test case.
+ int failed_test_count() const;
+
+ // Gets the number of disabled tests that will be reported in the XML report.
+ int reportable_disabled_test_count() const;
+
+ // Gets the number of disabled tests in this test case.
+ int disabled_test_count() const;
+
+ // Gets the number of tests to be printed in the XML report.
+ int reportable_test_count() const;
+
+ // Get the number of tests in this test case that should run.
+ int test_to_run_count() const;
+
+ // Gets the number of all tests in this test case.
+ int total_test_count() const;
+
+ // Returns true iff the test case passed.
+ bool Passed() const { return !Failed(); }
+
+ // Returns true iff the test case failed.
+ bool Failed() const { return failed_test_count() > 0; }
+
+ // Returns the elapsed time, in milliseconds.
+ TimeInMillis elapsed_time() const { return elapsed_time_; }
+
+ // Returns the i-th test among all the tests. i can range from 0 to
+ // total_test_count() - 1. If i is not in that range, returns NULL.
+ const TestInfo* GetTestInfo(int i) const;
+
+ // Returns the TestResult that holds test properties recorded during
+ // execution of SetUpTestCase and TearDownTestCase.
+ const TestResult& ad_hoc_test_result() const { return ad_hoc_test_result_; }
+
+ private:
+ friend class Test;
+ friend class internal::UnitTestImpl;
+
+ // Gets the (mutable) vector of TestInfos in this TestCase.
+ std::vector<TestInfo*>& test_info_list() { return test_info_list_; }
+
+ // Gets the (immutable) vector of TestInfos in this TestCase.
+ const std::vector<TestInfo*>& test_info_list() const {
+ return test_info_list_;
+ }
+
+ // Returns the i-th test among all the tests. i can range from 0 to
+ // total_test_count() - 1. If i is not in that range, returns NULL.
+ TestInfo* GetMutableTestInfo(int i);
+
+ // Sets the should_run member.
+ void set_should_run(bool should) { should_run_ = should; }
+
+ // Adds a TestInfo to this test case. Will delete the TestInfo upon
+ // destruction of the TestCase object.
+ void AddTestInfo(TestInfo * test_info);
+
+ // Clears the results of all tests in this test case.
+ void ClearResult();
+
+ // Clears the results of all tests in the given test case.
+ static void ClearTestCaseResult(TestCase* test_case) {
+ test_case->ClearResult();
+ }
+
+ // Runs every test in this TestCase.
+ void Run();
+
+ // Runs SetUpTestCase() for this TestCase. This wrapper is needed
+ // for catching exceptions thrown from SetUpTestCase().
+ void RunSetUpTestCase() { (*set_up_tc_)(); }
+
+ // Runs TearDownTestCase() for this TestCase. This wrapper is
+ // needed for catching exceptions thrown from TearDownTestCase().
+ void RunTearDownTestCase() { (*tear_down_tc_)(); }
+
+ // Returns true iff test passed.
+ static bool TestPassed(const TestInfo* test_info) {
+ return test_info->should_run() && test_info->result()->Passed();
+ }
+
+ // Returns true iff test failed.
+ static bool TestFailed(const TestInfo* test_info) {
+ return test_info->should_run() && test_info->result()->Failed();
+ }
+
+ // Returns true iff the test is disabled and will be reported in the XML
+ // report.
+ static bool TestReportableDisabled(const TestInfo* test_info) {
+ return test_info->is_reportable() && test_info->is_disabled_;
+ }
+
+ // Returns true iff test is disabled.
+ static bool TestDisabled(const TestInfo* test_info) {
+ return test_info->is_disabled_;
+ }
+
+ // Returns true iff this test will appear in the XML report.
+ static bool TestReportable(const TestInfo* test_info) {
+ return test_info->is_reportable();
+ }
+
+ // Returns true if the given test should run.
+ static bool ShouldRunTest(const TestInfo* test_info) {
+ return test_info->should_run();
+ }
+
+ // Shuffles the tests in this test case.
+ void ShuffleTests(internal::Random* random);
+
+ // Restores the test order to before the first shuffle.
+ void UnshuffleTests();
+
+ // Name of the test case.
+ std::string name_;
+ // Name of the parameter type, or NULL if this is not a typed or a
+ // type-parameterized test.
+ const internal::scoped_ptr<const ::std::string> type_param_;
+ // The vector of TestInfos in their original order. It owns the
+ // elements in the vector.
+ std::vector<TestInfo*> test_info_list_;
+ // Provides a level of indirection for the test list to allow easy
+ // shuffling and restoring the test order. The i-th element in this
+ // vector is the index of the i-th test in the shuffled test list.
+ std::vector<int> test_indices_;
+ // Pointer to the function that sets up the test case.
+ Test::SetUpTestCaseFunc set_up_tc_;
+ // Pointer to the function that tears down the test case.
+ Test::TearDownTestCaseFunc tear_down_tc_;
+ // True iff any test in this test case should run.
+ bool should_run_;
+ // Elapsed time, in milliseconds.
+ TimeInMillis elapsed_time_;
+ // Holds test properties recorded during execution of SetUpTestCase and
+ // TearDownTestCase.
+ TestResult ad_hoc_test_result_;
+
+ // We disallow copying TestCases.
+ GTEST_DISALLOW_COPY_AND_ASSIGN_(TestCase);
+};
+
+// An Environment object is capable of setting up and tearing down an
+// environment. The user should subclass this to define his own
+// environment(s).
+//
+// An Environment object does the set-up and tear-down in virtual
+// methods SetUp() and TearDown() instead of the constructor and the
+// destructor, as:
+//
+// 1. You cannot safely throw from a destructor. This is a problem
+// as in some cases Google Test is used where exceptions are enabled, and
+// we may want to implement ASSERT_* using exceptions where they are
+// available.
+// 2. You cannot use ASSERT_* directly in a constructor or
+// destructor.
+class Environment {
+ public:
+ // The d'tor is virtual as we need to subclass Environment.
+ virtual ~Environment() {}
+
+ // Override this to define how to set up the environment.
+ virtual void SetUp() {}
+
+ // Override this to define how to tear down the environment.
+ virtual void TearDown() {}
+ private:
+ // If you see an error about overriding the following function or
+ // about it being private, you have mis-spelled SetUp() as Setup().
+ struct Setup_should_be_spelled_SetUp {};
+ virtual Setup_should_be_spelled_SetUp* Setup() { return NULL; }
+};
+
+// The interface for tracing execution of tests. The methods are organized in
+// the order the corresponding events are fired.
+class TestEventListener {
+ public:
+ virtual ~TestEventListener() {}
+
+ // Fired before any test activity starts.
+ virtual void OnTestProgramStart(const UnitTest& unit_test) = 0;
+
+ // Fired before each iteration of tests starts. There may be more than
+ // one iteration if GTEST_FLAG(repeat) is set. iteration is the iteration
+ // index, starting from 0.
+ virtual void OnTestIterationStart(const UnitTest& unit_test,
+ int iteration) = 0;
+
+ // Fired before environment set-up for each iteration of tests starts.
+ virtual void OnEnvironmentsSetUpStart(const UnitTest& unit_test) = 0;
+
+ // Fired after environment set-up for each iteration of tests ends.
+ virtual void OnEnvironmentsSetUpEnd(const UnitTest& unit_test) = 0;
+
+ // Fired before the test case starts.
+ virtual void OnTestCaseStart(const TestCase& test_case) = 0;
+
+ // Fired before the test starts.
+ virtual void OnTestStart(const TestInfo& test_info) = 0;
+
+ // Fired after a failed assertion or a SUCCEED() invocation.
+ virtual void OnTestPartResult(const TestPartResult& test_part_result) = 0;
+
+ // Fired after the test ends.
+ virtual void OnTestEnd(const TestInfo& test_info) = 0;
+
+ // Fired after the test case ends.
+ virtual void OnTestCaseEnd(const TestCase& test_case) = 0;
+
+ // Fired before environment tear-down for each iteration of tests starts.
+ virtual void OnEnvironmentsTearDownStart(const UnitTest& unit_test) = 0;
+
+ // Fired after environment tear-down for each iteration of tests ends.
+ virtual void OnEnvironmentsTearDownEnd(const UnitTest& unit_test) = 0;
+
+ // Fired after each iteration of tests finishes.
+ virtual void OnTestIterationEnd(const UnitTest& unit_test,
+ int iteration) = 0;
+
+ // Fired after all test activities have ended.
+ virtual void OnTestProgramEnd(const UnitTest& unit_test) = 0;
+};
+
+// The convenience class for users who need to override just one or two
+// methods and are not concerned that a possible change to a signature of
+// the methods they override will not be caught during the build. For
+// comments about each method please see the definition of TestEventListener
+// above.
+class EmptyTestEventListener : public TestEventListener {
+ public:
+ virtual void OnTestProgramStart(const UnitTest& /*unit_test*/) {}
+ virtual void OnTestIterationStart(const UnitTest& /*unit_test*/,
+ int /*iteration*/) {}
+ virtual void OnEnvironmentsSetUpStart(const UnitTest& /*unit_test*/) {}
+ virtual void OnEnvironmentsSetUpEnd(const UnitTest& /*unit_test*/) {}
+ virtual void OnTestCaseStart(const TestCase& /*test_case*/) {}
+ virtual void OnTestStart(const TestInfo& /*test_info*/) {}
+ virtual void OnTestPartResult(const TestPartResult& /*test_part_result*/) {}
+ virtual void OnTestEnd(const TestInfo& /*test_info*/) {}
+ virtual void OnTestCaseEnd(const TestCase& /*test_case*/) {}
+ virtual void OnEnvironmentsTearDownStart(const UnitTest& /*unit_test*/) {}
+ virtual void OnEnvironmentsTearDownEnd(const UnitTest& /*unit_test*/) {}
+ virtual void OnTestIterationEnd(const UnitTest& /*unit_test*/,
+ int /*iteration*/) {}
+ virtual void OnTestProgramEnd(const UnitTest& /*unit_test*/) {}
+};
+
+// TestEventListeners lets users add listeners to track events in Google Test.
+class GTEST_API_ TestEventListeners {
+ public:
+ TestEventListeners();
+ ~TestEventListeners();
+
+ // Appends an event listener to the end of the list. Google Test assumes
+ // the ownership of the listener (i.e. it will delete the listener when
+ // the test program finishes).
+ void Append(TestEventListener* listener);
+
+ // Removes the given event listener from the list and returns it. It then
+ // becomes the caller's responsibility to delete the listener. Returns
+ // NULL if the listener is not found in the list.
+ TestEventListener* Release(TestEventListener* listener);
+
+ // Returns the standard listener responsible for the default console
+ // output. Can be removed from the listeners list to shut down default
+ // console output. Note that removing this object from the listener list
+ // with Release transfers its ownership to the caller and makes this
+ // function return NULL the next time.
+ TestEventListener* default_result_printer() const {
+ return default_result_printer_;
+ }
+
+ // Returns the standard listener responsible for the default XML output
+ // controlled by the --gtest_output=xml flag. Can be removed from the
+ // listeners list by users who want to shut down the default XML output
+ // controlled by this flag and substitute it with custom one. Note that
+ // removing this object from the listener list with Release transfers its
+ // ownership to the caller and makes this function return NULL the next
+ // time.
+ TestEventListener* default_xml_generator() const {
+ return default_xml_generator_;
+ }
+
+ private:
+ friend class TestCase;
+ friend class TestInfo;
+ friend class internal::DefaultGlobalTestPartResultReporter;
+ friend class internal::NoExecDeathTest;
+ friend class internal::TestEventListenersAccessor;
+ friend class internal::UnitTestImpl;
+
+ // Returns repeater that broadcasts the TestEventListener events to all
+ // subscribers.
+ TestEventListener* repeater();
+
+ // Sets the default_result_printer attribute to the provided listener.
+ // The listener is also added to the listener list and previous
+ // default_result_printer is removed from it and deleted. The listener can
+ // also be NULL in which case it will not be added to the list. Does
+ // nothing if the previous and the current listener objects are the same.
+ void SetDefaultResultPrinter(TestEventListener* listener);
+
+ // Sets the default_xml_generator attribute to the provided listener. The
+ // listener is also added to the listener list and previous
+ // default_xml_generator is removed from it and deleted. The listener can
+ // also be NULL in which case it will not be added to the list. Does
+ // nothing if the previous and the current listener objects are the same.
+ void SetDefaultXmlGenerator(TestEventListener* listener);
+
+ // Controls whether events will be forwarded by the repeater to the
+ // listeners in the list.
+ bool EventForwardingEnabled() const;
+ void SuppressEventForwarding();
+
+ // The actual list of listeners.
+ internal::TestEventRepeater* repeater_;
+ // Listener responsible for the standard result output.
+ TestEventListener* default_result_printer_;
+ // Listener responsible for the creation of the XML output file.
+ TestEventListener* default_xml_generator_;
+
+ // We disallow copying TestEventListeners.
+ GTEST_DISALLOW_COPY_AND_ASSIGN_(TestEventListeners);
+};
+
+// A UnitTest consists of a vector of TestCases.
+//
+// This is a singleton class. The only instance of UnitTest is
+// created when UnitTest::GetInstance() is first called. This
+// instance is never deleted.
+//
+// UnitTest is not copyable.
+//
+// This class is thread-safe as long as the methods are called
+// according to their specification.
+class GTEST_API_ UnitTest {
+ public:
+ // Gets the singleton UnitTest object. The first time this method
+ // is called, a UnitTest object is constructed and returned.
+ // Consecutive calls will return the same object.
+ static UnitTest* GetInstance();
+
+ // Runs all tests in this UnitTest object and prints the result.
+ // Returns 0 if successful, or 1 otherwise.
+ //
+ // This method can only be called from the main thread.
+ //
+ // INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
+ int Run() GTEST_MUST_USE_RESULT_;
+
+ // Returns the working directory when the first TEST() or TEST_F()
+ // was executed. The UnitTest object owns the string.
+ const char* original_working_dir() const;
+
+ // Returns the TestCase object for the test that's currently running,
+ // or NULL if no test is running.
+ const TestCase* current_test_case() const
+ GTEST_LOCK_EXCLUDED_(mutex_);
+
+ // Returns the TestInfo object for the test that's currently running,
+ // or NULL if no test is running.
+ const TestInfo* current_test_info() const
+ GTEST_LOCK_EXCLUDED_(mutex_);
+
+ // Returns the random seed used at the start of the current test run.
+ int random_seed() const;
+
+#if GTEST_HAS_PARAM_TEST
+ // Returns the ParameterizedTestCaseRegistry object used to keep track of
+ // value-parameterized tests and instantiate and register them.
+ //
+ // INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
+ internal::ParameterizedTestCaseRegistry& parameterized_test_registry()
+ GTEST_LOCK_EXCLUDED_(mutex_);
+#endif // GTEST_HAS_PARAM_TEST
+
+ // Gets the number of successful test cases.
+ int successful_test_case_count() const;
+
+ // Gets the number of failed test cases.
+ int failed_test_case_count() const;
+
+ // Gets the number of all test cases.
+ int total_test_case_count() const;
+
+ // Gets the number of all test cases that contain at least one test
+ // that should run.
+ int test_case_to_run_count() const;
+
+ // Gets the number of successful tests.
+ int successful_test_count() const;
+
+ // Gets the number of failed tests.
+ int failed_test_count() const;
+
+ // Gets the number of disabled tests that will be reported in the XML report.
+ int reportable_disabled_test_count() const;
+
+ // Gets the number of disabled tests.
+ int disabled_test_count() const;
+
+ // Gets the number of tests to be printed in the XML report.
+ int reportable_test_count() const;
+
+ // Gets the number of all tests.
+ int total_test_count() const;
+
+ // Gets the number of tests that should run.
+ int test_to_run_count() const;
+
+ // Gets the time of the test program start, in ms from the start of the
+ // UNIX epoch.
+ TimeInMillis start_timestamp() const;
+
+ // Gets the elapsed time, in milliseconds.
+ TimeInMillis elapsed_time() const;
+
+ // Returns true iff the unit test passed (i.e. all test cases passed).
+ bool Passed() const;
+
+ // Returns true iff the unit test failed (i.e. some test case failed
+ // or something outside of all tests failed).
+ bool Failed() const;
+
+ // Gets the i-th test case among all the test cases. i can range from 0 to
+ // total_test_case_count() - 1. If i is not in that range, returns NULL.
+ const TestCase* GetTestCase(int i) const;
+
+ // Returns the TestResult containing information on test failures and
+ // properties logged outside of individual test cases.
+ const TestResult& ad_hoc_test_result() const;
+
+ // Returns the list of event listeners that can be used to track events
+ // inside Google Test.
+ TestEventListeners& listeners();
+
+ private:
+ // Registers and returns a global test environment. When a test
+ // program is run, all global test environments will be set-up in
+ // the order they were registered. After all tests in the program
+ // have finished, all global test environments will be torn-down in
+ // the *reverse* order they were registered.
+ //
+ // The UnitTest object takes ownership of the given environment.
+ //
+ // This method can only be called from the main thread.
+ Environment* AddEnvironment(Environment* env);
+
+ // Adds a TestPartResult to the current TestResult object. All
+ // Google Test assertion macros (e.g. ASSERT_TRUE, EXPECT_EQ, etc)
+ // eventually call this to report their results. The user code
+ // should use the assertion macros instead of calling this directly.
+ void AddTestPartResult(TestPartResult::Type result_type,
+ const char* file_name,
+ int line_number,
+ const std::string& message,
+ const std::string& os_stack_trace)
+ GTEST_LOCK_EXCLUDED_(mutex_);
+
+ // Adds a TestProperty to the current TestResult object when invoked from
+ // inside a test, to current TestCase's ad_hoc_test_result_ when invoked
+ // from SetUpTestCase or TearDownTestCase, or to the global property set
+ // when invoked elsewhere. If the result already contains a property with
+ // the same key, the value will be updated.
+ void RecordProperty(const std::string& key, const std::string& value);
+
+ // Gets the i-th test case among all the test cases. i can range from 0 to
+ // total_test_case_count() - 1. If i is not in that range, returns NULL.
+ TestCase* GetMutableTestCase(int i);
+
+ // Accessors for the implementation object.
+ internal::UnitTestImpl* impl() { return impl_; }
+ const internal::UnitTestImpl* impl() const { return impl_; }
+
+ // These classes and funcions are friends as they need to access private
+ // members of UnitTest.
+ friend class Test;
+ friend class internal::AssertHelper;
+ friend class internal::ScopedTrace;
+ friend class internal::StreamingListenerTest;
+ friend class internal::UnitTestRecordPropertyTestHelper;
+ friend Environment* AddGlobalTestEnvironment(Environment* env);
+ friend internal::UnitTestImpl* internal::GetUnitTestImpl();
+ friend void internal::ReportFailureInUnknownLocation(
+ TestPartResult::Type result_type,
+ const std::string& message);
+
+ // Creates an empty UnitTest.
+ UnitTest();
+
+ // D'tor
+ virtual ~UnitTest();
+
+ // Pushes a trace defined by SCOPED_TRACE() on to the per-thread
+ // Google Test trace stack.
+ void PushGTestTrace(const internal::TraceInfo& trace)
+ GTEST_LOCK_EXCLUDED_(mutex_);
+
+ // Pops a trace from the per-thread Google Test trace stack.
+ void PopGTestTrace()
+ GTEST_LOCK_EXCLUDED_(mutex_);
+
+ // Protects mutable state in *impl_. This is mutable as some const
+ // methods need to lock it too.
+ mutable internal::Mutex mutex_;
+
+ // Opaque implementation object. This field is never changed once
+ // the object is constructed. We don't mark it as const here, as
+ // doing so will cause a warning in the constructor of UnitTest.
+ // Mutable state in *impl_ is protected by mutex_.
+ internal::UnitTestImpl* impl_;
+
+ // We disallow copying UnitTest.
+ GTEST_DISALLOW_COPY_AND_ASSIGN_(UnitTest);
+};
+
+// A convenient wrapper for adding an environment for the test
+// program.
+//
+// You should call this before RUN_ALL_TESTS() is called, probably in
+// main(). If you use gtest_main, you need to call this before main()
+// starts for it to take effect. For example, you can define a global
+// variable like this:
+//
+// testing::Environment* const foo_env =
+// testing::AddGlobalTestEnvironment(new FooEnvironment);
+//
+// However, we strongly recommend you to write your own main() and
+// call AddGlobalTestEnvironment() there, as relying on initialization
+// of global variables makes the code harder to read and may cause
+// problems when you register multiple environments from different
+// translation units and the environments have dependencies among them
+// (remember that the compiler doesn't guarantee the order in which
+// global variables from different translation units are initialized).
+inline Environment* AddGlobalTestEnvironment(Environment* env) {
+ return UnitTest::GetInstance()->AddEnvironment(env);
+}
+
+// Initializes Google Test. This must be called before calling
+// RUN_ALL_TESTS(). In particular, it parses a command line for the
+// flags that Google Test recognizes. Whenever a Google Test flag is
+// seen, it is removed from argv, and *argc is decremented.
+//
+// No value is returned. Instead, the Google Test flag variables are
+// updated.
+//
+// Calling the function for the second time has no user-visible effect.
+GTEST_API_ void InitGoogleTest(int* argc, char** argv);
+
+// This overloaded version can be used in Windows programs compiled in
+// UNICODE mode.
+GTEST_API_ void InitGoogleTest(int* argc, wchar_t** argv);
+
+namespace internal {
+
+// FormatForComparison<ToPrint, OtherOperand>::Format(value) formats a
+// value of type ToPrint that is an operand of a comparison assertion
+// (e.g. ASSERT_EQ). OtherOperand is the type of the other operand in
+// the comparison, and is used to help determine the best way to
+// format the value. In particular, when the value is a C string
+// (char pointer) and the other operand is an STL string object, we
+// want to format the C string as a string, since we know it is
+// compared by value with the string object. If the value is a char
+// pointer but the other operand is not an STL string object, we don't
+// know whether the pointer is supposed to point to a NUL-terminated
+// string, and thus want to print it as a pointer to be safe.
+//
+// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
+
+// The default case.
+template <typename ToPrint, typename OtherOperand>
+class FormatForComparison {
+ public:
+ static ::std::string Format(const ToPrint& value) {
+ return ::testing::PrintToString(value);
+ }
+};
+
+// Array.
+template <typename ToPrint, size_t N, typename OtherOperand>
+class FormatForComparison<ToPrint[N], OtherOperand> {
+ public:
+ static ::std::string Format(const ToPrint* value) {
+ return FormatForComparison<const ToPrint*, OtherOperand>::Format(value);
+ }
+};
+
+// By default, print C string as pointers to be safe, as we don't know
+// whether they actually point to a NUL-terminated string.
+
+#define GTEST_IMPL_FORMAT_C_STRING_AS_POINTER_(CharType) \
+ template <typename OtherOperand> \
+ class FormatForComparison<CharType*, OtherOperand> { \
+ public: \
+ static ::std::string Format(CharType* value) { \
+ return ::testing::PrintToString(static_cast<const void*>(value)); \
+ } \
+ }
+
+GTEST_IMPL_FORMAT_C_STRING_AS_POINTER_(char);
+GTEST_IMPL_FORMAT_C_STRING_AS_POINTER_(const char);
+GTEST_IMPL_FORMAT_C_STRING_AS_POINTER_(wchar_t);
+GTEST_IMPL_FORMAT_C_STRING_AS_POINTER_(const wchar_t);
+
+#undef GTEST_IMPL_FORMAT_C_STRING_AS_POINTER_
+
+// If a C string is compared with an STL string object, we know it's meant
+// to point to a NUL-terminated string, and thus can print it as a string.
+
+#define GTEST_IMPL_FORMAT_C_STRING_AS_STRING_(CharType, OtherStringType) \
+ template <> \
+ class FormatForComparison<CharType*, OtherStringType> { \
+ public: \
+ static ::std::string Format(CharType* value) { \
+ return ::testing::PrintToString(value); \
+ } \
+ }
+
+GTEST_IMPL_FORMAT_C_STRING_AS_STRING_(char, ::std::string);
+GTEST_IMPL_FORMAT_C_STRING_AS_STRING_(const char, ::std::string);
+
+#if GTEST_HAS_GLOBAL_STRING
+GTEST_IMPL_FORMAT_C_STRING_AS_STRING_(char, ::string);
+GTEST_IMPL_FORMAT_C_STRING_AS_STRING_(const char, ::string);
+#endif
+
+#if GTEST_HAS_GLOBAL_WSTRING
+GTEST_IMPL_FORMAT_C_STRING_AS_STRING_(wchar_t, ::wstring);
+GTEST_IMPL_FORMAT_C_STRING_AS_STRING_(const wchar_t, ::wstring);
+#endif
+
+#if GTEST_HAS_STD_WSTRING
+GTEST_IMPL_FORMAT_C_STRING_AS_STRING_(wchar_t, ::std::wstring);
+GTEST_IMPL_FORMAT_C_STRING_AS_STRING_(const wchar_t, ::std::wstring);
+#endif
+
+#undef GTEST_IMPL_FORMAT_C_STRING_AS_STRING_
+
+// Formats a comparison assertion (e.g. ASSERT_EQ, EXPECT_LT, and etc)
+// operand to be used in a failure message. The type (but not value)
+// of the other operand may affect the format. This allows us to
+// print a char* as a raw pointer when it is compared against another
+// char* or void*, and print it as a C string when it is compared
+// against an std::string object, for example.
+//
+// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
+template <typename T1, typename T2>
+std::string FormatForComparisonFailureMessage(
+ const T1& value, const T2& /* other_operand */) {
+ return FormatForComparison<T1, T2>::Format(value);
+}
+
+// The helper function for {ASSERT|EXPECT}_EQ.
+template <typename T1, typename T2>
+AssertionResult CmpHelperEQ(const char* expected_expression,
+ const char* actual_expression,
+ const T1& expected,
+ const T2& actual) {
+#ifdef _MSC_VER
+# pragma warning(push) // Saves the current warning state.
+# pragma warning(disable:4389) // Temporarily disables warning on
+ // signed/unsigned mismatch.
+#endif
+
+ if (expected == actual) {
+ return AssertionSuccess();
+ }
+
+#ifdef _MSC_VER
+# pragma warning(pop) // Restores the warning state.
+#endif
+
+ return EqFailure(expected_expression,
+ actual_expression,
+ FormatForComparisonFailureMessage(expected, actual),
+ FormatForComparisonFailureMessage(actual, expected),
+ false);
+}
+
+// With this overloaded version, we allow anonymous enums to be used
+// in {ASSERT|EXPECT}_EQ when compiled with gcc 4, as anonymous enums
+// can be implicitly cast to BiggestInt.
+GTEST_API_ AssertionResult CmpHelperEQ(const char* expected_expression,
+ const char* actual_expression,
+ BiggestInt expected,
+ BiggestInt actual);
+
+// The helper class for {ASSERT|EXPECT}_EQ. The template argument
+// lhs_is_null_literal is true iff the first argument to ASSERT_EQ()
+// is a null pointer literal. The following default implementation is
+// for lhs_is_null_literal being false.
+template <bool lhs_is_null_literal>
+class EqHelper {
+ public:
+ // This templatized version is for the general case.
+ template <typename T1, typename T2>
+ static AssertionResult Compare(const char* expected_expression,
+ const char* actual_expression,
+ const T1& expected,
+ const T2& actual) {
+ return CmpHelperEQ(expected_expression, actual_expression, expected,
+ actual);
+ }
+
+ // With this overloaded version, we allow anonymous enums to be used
+ // in {ASSERT|EXPECT}_EQ when compiled with gcc 4, as anonymous
+ // enums can be implicitly cast to BiggestInt.
+ //
+ // Even though its body looks the same as the above version, we
+ // cannot merge the two, as it will make anonymous enums unhappy.
+ static AssertionResult Compare(const char* expected_expression,
+ const char* actual_expression,
+ BiggestInt expected,
+ BiggestInt actual) {
+ return CmpHelperEQ(expected_expression, actual_expression, expected,
+ actual);
+ }
+};
+
+// This specialization is used when the first argument to ASSERT_EQ()
+// is a null pointer literal, like NULL, false, or 0.
+template <>
+class EqHelper<true> {
+ public:
+ // We define two overloaded versions of Compare(). The first
+ // version will be picked when the second argument to ASSERT_EQ() is
+ // NOT a pointer, e.g. ASSERT_EQ(0, AnIntFunction()) or
+ // EXPECT_EQ(false, a_bool).
+ template <typename T1, typename T2>
+ static AssertionResult Compare(
+ const char* expected_expression,
+ const char* actual_expression,
+ const T1& expected,
+ const T2& actual,
+ // The following line prevents this overload from being considered if T2
+ // is not a pointer type. We need this because ASSERT_EQ(NULL, my_ptr)
+ // expands to Compare("", "", NULL, my_ptr), which requires a conversion
+ // to match the Secret* in the other overload, which would otherwise make
+ // this template match better.
+ typename EnableIf<!is_pointer<T2>::value>::type* = 0) {
+ return CmpHelperEQ(expected_expression, actual_expression, expected,
+ actual);
+ }
+
+ // This version will be picked when the second argument to ASSERT_EQ() is a
+ // pointer, e.g. ASSERT_EQ(NULL, a_pointer).
+ template <typename T>
+ static AssertionResult Compare(
+ const char* expected_expression,
+ const char* actual_expression,
+ // We used to have a second template parameter instead of Secret*. That
+ // template parameter would deduce to 'long', making this a better match
+ // than the first overload even without the first overload's EnableIf.
+ // Unfortunately, gcc with -Wconversion-null warns when "passing NULL to
+ // non-pointer argument" (even a deduced integral argument), so the old
+ // implementation caused warnings in user code.
+ Secret* /* expected (NULL) */,
+ T* actual) {
+ // We already know that 'expected' is a null pointer.
+ return CmpHelperEQ(expected_expression, actual_expression,
+ static_cast<T*>(NULL), actual);
+ }
+};
+
+// A macro for implementing the helper functions needed to implement
+// ASSERT_?? and EXPECT_??. It is here just to avoid copy-and-paste
+// of similar code.
+//
+// For each templatized helper function, we also define an overloaded
+// version for BiggestInt in order to reduce code bloat and allow
+// anonymous enums to be used with {ASSERT|EXPECT}_?? when compiled
+// with gcc 4.
+//
+// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
+#define GTEST_IMPL_CMP_HELPER_(op_name, op)\
+template <typename T1, typename T2>\
+AssertionResult CmpHelper##op_name(const char* expr1, const char* expr2, \
+ const T1& val1, const T2& val2) {\
+ if (val1 op val2) {\
+ return AssertionSuccess();\
+ } else {\
+ return AssertionFailure() \
+ << "Expected: (" << expr1 << ") " #op " (" << expr2\
+ << "), actual: " << FormatForComparisonFailureMessage(val1, val2)\
+ << " vs " << FormatForComparisonFailureMessage(val2, val1);\
+ }\
+}\
+GTEST_API_ AssertionResult CmpHelper##op_name(\
+ const char* expr1, const char* expr2, BiggestInt val1, BiggestInt val2)
+
+// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
+
+// Implements the helper function for {ASSERT|EXPECT}_NE
+GTEST_IMPL_CMP_HELPER_(NE, !=);
+// Implements the helper function for {ASSERT|EXPECT}_LE
+GTEST_IMPL_CMP_HELPER_(LE, <=);
+// Implements the helper function for {ASSERT|EXPECT}_LT
+GTEST_IMPL_CMP_HELPER_(LT, <);
+// Implements the helper function for {ASSERT|EXPECT}_GE
+GTEST_IMPL_CMP_HELPER_(GE, >=);
+// Implements the helper function for {ASSERT|EXPECT}_GT
+GTEST_IMPL_CMP_HELPER_(GT, >);
+
+#undef GTEST_IMPL_CMP_HELPER_
+
+// The helper function for {ASSERT|EXPECT}_STREQ.
+//
+// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
+GTEST_API_ AssertionResult CmpHelperSTREQ(const char* expected_expression,
+ const char* actual_expression,
+ const char* expected,
+ const char* actual);
+
+// The helper function for {ASSERT|EXPECT}_STRCASEEQ.
+//
+// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
+GTEST_API_ AssertionResult CmpHelperSTRCASEEQ(const char* expected_expression,
+ const char* actual_expression,
+ const char* expected,
+ const char* actual);
+
+// The helper function for {ASSERT|EXPECT}_STRNE.
+//
+// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
+GTEST_API_ AssertionResult CmpHelperSTRNE(const char* s1_expression,
+ const char* s2_expression,
+ const char* s1,
+ const char* s2);
+
+// The helper function for {ASSERT|EXPECT}_STRCASENE.
+//
+// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
+GTEST_API_ AssertionResult CmpHelperSTRCASENE(const char* s1_expression,
+ const char* s2_expression,
+ const char* s1,
+ const char* s2);
+
+
+// Helper function for *_STREQ on wide strings.
+//
+// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
+GTEST_API_ AssertionResult CmpHelperSTREQ(const char* expected_expression,
+ const char* actual_expression,
+ const wchar_t* expected,
+ const wchar_t* actual);
+
+// Helper function for *_STRNE on wide strings.
+//
+// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
+GTEST_API_ AssertionResult CmpHelperSTRNE(const char* s1_expression,
+ const char* s2_expression,
+ const wchar_t* s1,
+ const wchar_t* s2);
+
+} // namespace internal
+
+// IsSubstring() and IsNotSubstring() are intended to be used as the
+// first argument to {EXPECT,ASSERT}_PRED_FORMAT2(), not by
+// themselves. They check whether needle is a substring of haystack
+// (NULL is considered a substring of itself only), and return an
+// appropriate error message when they fail.
+//
+// The {needle,haystack}_expr arguments are the stringified
+// expressions that generated the two real arguments.
+GTEST_API_ AssertionResult IsSubstring(
+ const char* needle_expr, const char* haystack_expr,
+ const char* needle, const char* haystack);
+GTEST_API_ AssertionResult IsSubstring(
+ const char* needle_expr, const char* haystack_expr,
+ const wchar_t* needle, const wchar_t* haystack);
+GTEST_API_ AssertionResult IsNotSubstring(
+ const char* needle_expr, const char* haystack_expr,
+ const char* needle, const char* haystack);
+GTEST_API_ AssertionResult IsNotSubstring(
+ const char* needle_expr, const char* haystack_expr,
+ const wchar_t* needle, const wchar_t* haystack);
+GTEST_API_ AssertionResult IsSubstring(
+ const char* needle_expr, const char* haystack_expr,
+ const ::std::string& needle, const ::std::string& haystack);
+GTEST_API_ AssertionResult IsNotSubstring(
+ const char* needle_expr, const char* haystack_expr,
+ const ::std::string& needle, const ::std::string& haystack);
+
+#if GTEST_HAS_STD_WSTRING
+GTEST_API_ AssertionResult IsSubstring(
+ const char* needle_expr, const char* haystack_expr,
+ const ::std::wstring& needle, const ::std::wstring& haystack);
+GTEST_API_ AssertionResult IsNotSubstring(
+ const char* needle_expr, const char* haystack_expr,
+ const ::std::wstring& needle, const ::std::wstring& haystack);
+#endif // GTEST_HAS_STD_WSTRING
+
+namespace internal {
+
+// Helper template function for comparing floating-points.
+//
+// Template parameter:
+//
+// RawType: the raw floating-point type (either float or double)
+//
+// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
+template <typename RawType>
+AssertionResult CmpHelperFloatingPointEQ(const char* expected_expression,
+ const char* actual_expression,
+ RawType expected,
+ RawType actual) {
+ const FloatingPoint<RawType> lhs(expected), rhs(actual);
+
+ if (lhs.AlmostEquals(rhs)) {
+ return AssertionSuccess();
+ }
+
+ ::std::stringstream expected_ss;
+ expected_ss << std::setprecision(std::numeric_limits<RawType>::digits10 + 2)
+ << expected;
+
+ ::std::stringstream actual_ss;
+ actual_ss << std::setprecision(std::numeric_limits<RawType>::digits10 + 2)
+ << actual;
+
+ return EqFailure(expected_expression,
+ actual_expression,
+ StringStreamToString(&expected_ss),
+ StringStreamToString(&actual_ss),
+ false);
+}
+
+// Helper function for implementing ASSERT_NEAR.
+//
+// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
+GTEST_API_ AssertionResult DoubleNearPredFormat(const char* expr1,
+ const char* expr2,
+ const char* abs_error_expr,
+ double val1,
+ double val2,
+ double abs_error);
+
+// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
+// A class that enables one to stream messages to assertion macros
+class GTEST_API_ AssertHelper {
+ public:
+ // Constructor.
+ AssertHelper(TestPartResult::Type type,
+ const char* file,
+ int line,
+ const char* message);
+ ~AssertHelper();
+
+ // Message assignment is a semantic trick to enable assertion
+ // streaming; see the GTEST_MESSAGE_ macro below.
+ void operator=(const Message& message) const;
+
+ private:
+ // We put our data in a struct so that the size of the AssertHelper class can
+ // be as small as possible. This is important because gcc is incapable of
+ // re-using stack space even for temporary variables, so every EXPECT_EQ
+ // reserves stack space for another AssertHelper.
+ struct AssertHelperData {
+ AssertHelperData(TestPartResult::Type t,
+ const char* srcfile,
+ int line_num,
+ const char* msg)
+ : type(t), file(srcfile), line(line_num), message(msg) { }
+
+ TestPartResult::Type const type;
+ const char* const file;
+ int const line;
+ std::string const message;
+
+ private:
+ GTEST_DISALLOW_COPY_AND_ASSIGN_(AssertHelperData);
+ };
+
+ AssertHelperData* const data_;
+
+ GTEST_DISALLOW_COPY_AND_ASSIGN_(AssertHelper);
+};
+
+} // namespace internal
+
+#if GTEST_HAS_PARAM_TEST
+// The pure interface class that all value-parameterized tests inherit from.
+// A value-parameterized class must inherit from both ::testing::Test and
+// ::testing::WithParamInterface. In most cases that just means inheriting
+// from ::testing::TestWithParam, but more complicated test hierarchies
+// may need to inherit from Test and WithParamInterface at different levels.
+//
+// This interface has support for accessing the test parameter value via
+// the GetParam() method.
+//
+// Use it with one of the parameter generator defining functions, like Range(),
+// Values(), ValuesIn(), Bool(), and Combine().
+//
+// class FooTest : public ::testing::TestWithParam<int> {
+// protected:
+// FooTest() {
+// // Can use GetParam() here.
+// }
+// virtual ~FooTest() {
+// // Can use GetParam() here.
+// }
+// virtual void SetUp() {
+// // Can use GetParam() here.
+// }
+// virtual void TearDown {
+// // Can use GetParam() here.
+// }
+// };
+// TEST_P(FooTest, DoesBar) {
+// // Can use GetParam() method here.
+// Foo foo;
+// ASSERT_TRUE(foo.DoesBar(GetParam()));
+// }
+// INSTANTIATE_TEST_CASE_P(OneToTenRange, FooTest, ::testing::Range(1, 10));
+
+template <typename T>
+class WithParamInterface {
+ public:
+ typedef T ParamType;
+ virtual ~WithParamInterface() {}
+
+ // The current parameter value. Is also available in the test fixture's
+ // constructor. This member function is non-static, even though it only
+ // references static data, to reduce the opportunity for incorrect uses
+ // like writing 'WithParamInterface<bool>::GetParam()' for a test that
+ // uses a fixture whose parameter type is int.
+ const ParamType& GetParam() const {
+ GTEST_CHECK_(parameter_ != NULL)
+ << "GetParam() can only be called inside a value-parameterized test "
+ << "-- did you intend to write TEST_P instead of TEST_F?";
+ return *parameter_;
+ }
+
+ private:
+ // Sets parameter value. The caller is responsible for making sure the value
+ // remains alive and unchanged throughout the current test.
+ static void SetParam(const ParamType* parameter) {
+ parameter_ = parameter;
+ }
+
+ // Static value used for accessing parameter during a test lifetime.
+ static const ParamType* parameter_;
+
+ // TestClass must be a subclass of WithParamInterface<T> and Test.
+ template <class TestClass> friend class internal::ParameterizedTestFactory;
+};
+
+template <typename T>
+const T* WithParamInterface<T>::parameter_ = NULL;
+
+// Most value-parameterized classes can ignore the existence of
+// WithParamInterface, and can just inherit from ::testing::TestWithParam.
+
+template <typename T>
+class TestWithParam : public Test, public WithParamInterface<T> {
+};
+
+#endif // GTEST_HAS_PARAM_TEST
+
+// Macros for indicating success/failure in test code.
+
+// ADD_FAILURE unconditionally adds a failure to the current test.
+// SUCCEED generates a success - it doesn't automatically make the
+// current test successful, as a test is only successful when it has
+// no failure.
+//
+// EXPECT_* verifies that a certain condition is satisfied. If not,
+// it behaves like ADD_FAILURE. In particular:
+//
+// EXPECT_TRUE verifies that a Boolean condition is true.
+// EXPECT_FALSE verifies that a Boolean condition is false.
+//
+// FAIL and ASSERT_* are similar to ADD_FAILURE and EXPECT_*, except
+// that they will also abort the current function on failure. People
+// usually want the fail-fast behavior of FAIL and ASSERT_*, but those
+// writing data-driven tests often find themselves using ADD_FAILURE
+// and EXPECT_* more.
+
+// Generates a nonfatal failure with a generic message.
+#define ADD_FAILURE() GTEST_NONFATAL_FAILURE_("Failed")
+
+// Generates a nonfatal failure at the given source file location with
+// a generic message.
+#define ADD_FAILURE_AT(file, line) \
+ GTEST_MESSAGE_AT_(file, line, "Failed", \
+ ::testing::TestPartResult::kNonFatalFailure)
+
+// Generates a fatal failure with a generic message.
+#define GTEST_FAIL() GTEST_FATAL_FAILURE_("Failed")
+
+// Define this macro to 1 to omit the definition of FAIL(), which is a
+// generic name and clashes with some other libraries.
+#if !GTEST_DONT_DEFINE_FAIL
+# define FAIL() GTEST_FAIL()
+#endif
+
+// Generates a success with a generic message.
+#define GTEST_SUCCEED() GTEST_SUCCESS_("Succeeded")
+
+// Define this macro to 1 to omit the definition of SUCCEED(), which
+// is a generic name and clashes with some other libraries.
+#if !GTEST_DONT_DEFINE_SUCCEED
+# define SUCCEED() GTEST_SUCCEED()
+#endif
+
+// Macros for testing exceptions.
+//
+// * {ASSERT|EXPECT}_THROW(statement, expected_exception):
+// Tests that the statement throws the expected exception.
+// * {ASSERT|EXPECT}_NO_THROW(statement):
+// Tests that the statement doesn't throw any exception.
+// * {ASSERT|EXPECT}_ANY_THROW(statement):
+// Tests that the statement throws an exception.
+
+#define EXPECT_THROW(statement, expected_exception) \
+ GTEST_TEST_THROW_(statement, expected_exception, GTEST_NONFATAL_FAILURE_)
+#define EXPECT_NO_THROW(statement) \
+ GTEST_TEST_NO_THROW_(statement, GTEST_NONFATAL_FAILURE_)
+#define EXPECT_ANY_THROW(statement) \
+ GTEST_TEST_ANY_THROW_(statement, GTEST_NONFATAL_FAILURE_)
+#define ASSERT_THROW(statement, expected_exception) \
+ GTEST_TEST_THROW_(statement, expected_exception, GTEST_FATAL_FAILURE_)
+#define ASSERT_NO_THROW(statement) \
+ GTEST_TEST_NO_THROW_(statement, GTEST_FATAL_FAILURE_)
+#define ASSERT_ANY_THROW(statement) \
+ GTEST_TEST_ANY_THROW_(statement, GTEST_FATAL_FAILURE_)
+
+// Boolean assertions. Condition can be either a Boolean expression or an
+// AssertionResult. For more information on how to use AssertionResult with
+// these macros see comments on that class.
+#define EXPECT_TRUE(condition) \
+ GTEST_TEST_BOOLEAN_(condition, #condition, false, true, \
+ GTEST_NONFATAL_FAILURE_)
+#define EXPECT_FALSE(condition) \
+ GTEST_TEST_BOOLEAN_(!(condition), #condition, true, false, \
+ GTEST_NONFATAL_FAILURE_)
+#define ASSERT_TRUE(condition) \
+ GTEST_TEST_BOOLEAN_(condition, #condition, false, true, \
+ GTEST_FATAL_FAILURE_)
+#define ASSERT_FALSE(condition) \
+ GTEST_TEST_BOOLEAN_(!(condition), #condition, true, false, \
+ GTEST_FATAL_FAILURE_)
+
+// Includes the auto-generated header that implements a family of
+// generic predicate assertion macros.
+// Copyright 2006, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// This file is AUTOMATICALLY GENERATED on 10/31/2011 by command
+// 'gen_gtest_pred_impl.py 5'. DO NOT EDIT BY HAND!
+//
+// Implements a family of generic predicate assertion macros.
+
+#ifndef GTEST_INCLUDE_GTEST_GTEST_PRED_IMPL_H_
+#define GTEST_INCLUDE_GTEST_GTEST_PRED_IMPL_H_
+
+// Makes sure this header is not included before gtest.h.
+#ifndef GTEST_INCLUDE_GTEST_GTEST_H_
+# error Do not include gtest_pred_impl.h directly. Include gtest.h instead.
+#endif // GTEST_INCLUDE_GTEST_GTEST_H_
+
+// This header implements a family of generic predicate assertion
+// macros:
+//
+// ASSERT_PRED_FORMAT1(pred_format, v1)
+// ASSERT_PRED_FORMAT2(pred_format, v1, v2)
+// ...
+//
+// where pred_format is a function or functor that takes n (in the
+// case of ASSERT_PRED_FORMATn) values and their source expression
+// text, and returns a testing::AssertionResult. See the definition
+// of ASSERT_EQ in gtest.h for an example.
+//
+// If you don't care about formatting, you can use the more
+// restrictive version:
+//
+// ASSERT_PRED1(pred, v1)
+// ASSERT_PRED2(pred, v1, v2)
+// ...
+//
+// where pred is an n-ary function or functor that returns bool,
+// and the values v1, v2, ..., must support the << operator for
+// streaming to std::ostream.
+//
+// We also define the EXPECT_* variations.
+//
+// For now we only support predicates whose arity is at most 5.
+// Please email googletestframework@googlegroups.com if you need
+// support for higher arities.
+
+// GTEST_ASSERT_ is the basic statement to which all of the assertions
+// in this file reduce. Don't use this in your code.
+
+#define GTEST_ASSERT_(expression, on_failure) \
+ GTEST_AMBIGUOUS_ELSE_BLOCKER_ \
+ if (const ::testing::AssertionResult gtest_ar = (expression)) \
+ ; \
+ else \
+ on_failure(gtest_ar.failure_message())
+
+
+// Helper function for implementing {EXPECT|ASSERT}_PRED1. Don't use
+// this in your code.
+template <typename Pred,
+ typename T1>
+AssertionResult AssertPred1Helper(const char* pred_text,
+ const char* e1,
+ Pred pred,
+ const T1& v1) {
+ if (pred(v1)) return AssertionSuccess();
+
+ return AssertionFailure() << pred_text << "("
+ << e1 << ") evaluates to false, where"
+ << "\n" << e1 << " evaluates to " << v1;
+}
+
+// Internal macro for implementing {EXPECT|ASSERT}_PRED_FORMAT1.
+// Don't use this in your code.
+#define GTEST_PRED_FORMAT1_(pred_format, v1, on_failure)\
+ GTEST_ASSERT_(pred_format(#v1, v1), \
+ on_failure)
+
+// Internal macro for implementing {EXPECT|ASSERT}_PRED1. Don't use
+// this in your code.
+#define GTEST_PRED1_(pred, v1, on_failure)\
+ GTEST_ASSERT_(::testing::AssertPred1Helper(#pred, \
+ #v1, \
+ pred, \
+ v1), on_failure)
+
+// Unary predicate assertion macros.
+#define EXPECT_PRED_FORMAT1(pred_format, v1) \
+ GTEST_PRED_FORMAT1_(pred_format, v1, GTEST_NONFATAL_FAILURE_)
+#define EXPECT_PRED1(pred, v1) \
+ GTEST_PRED1_(pred, v1, GTEST_NONFATAL_FAILURE_)
+#define ASSERT_PRED_FORMAT1(pred_format, v1) \
+ GTEST_PRED_FORMAT1_(pred_format, v1, GTEST_FATAL_FAILURE_)
+#define ASSERT_PRED1(pred, v1) \
+ GTEST_PRED1_(pred, v1, GTEST_FATAL_FAILURE_)
+
+
+
+// Helper function for implementing {EXPECT|ASSERT}_PRED2. Don't use
+// this in your code.
+template <typename Pred,
+ typename T1,
+ typename T2>
+AssertionResult AssertPred2Helper(const char* pred_text,
+ const char* e1,
+ const char* e2,
+ Pred pred,
+ const T1& v1,
+ const T2& v2) {
+ if (pred(v1, v2)) return AssertionSuccess();
+
+ return AssertionFailure() << pred_text << "("
+ << e1 << ", "
+ << e2 << ") evaluates to false, where"
+ << "\n" << e1 << " evaluates to " << v1
+ << "\n" << e2 << " evaluates to " << v2;
+}
+
+// Internal macro for implementing {EXPECT|ASSERT}_PRED_FORMAT2.
+// Don't use this in your code.
+#define GTEST_PRED_FORMAT2_(pred_format, v1, v2, on_failure)\
+ GTEST_ASSERT_(pred_format(#v1, #v2, v1, v2), \
+ on_failure)
+
+// Internal macro for implementing {EXPECT|ASSERT}_PRED2. Don't use
+// this in your code.
+#define GTEST_PRED2_(pred, v1, v2, on_failure)\
+ GTEST_ASSERT_(::testing::AssertPred2Helper(#pred, \
+ #v1, \
+ #v2, \
+ pred, \
+ v1, \
+ v2), on_failure)
+
+// Binary predicate assertion macros.
+#define EXPECT_PRED_FORMAT2(pred_format, v1, v2) \
+ GTEST_PRED_FORMAT2_(pred_format, v1, v2, GTEST_NONFATAL_FAILURE_)
+#define EXPECT_PRED2(pred, v1, v2) \
+ GTEST_PRED2_(pred, v1, v2, GTEST_NONFATAL_FAILURE_)
+#define ASSERT_PRED_FORMAT2(pred_format, v1, v2) \
+ GTEST_PRED_FORMAT2_(pred_format, v1, v2, GTEST_FATAL_FAILURE_)
+#define ASSERT_PRED2(pred, v1, v2) \
+ GTEST_PRED2_(pred, v1, v2, GTEST_FATAL_FAILURE_)
+
+
+
+// Helper function for implementing {EXPECT|ASSERT}_PRED3. Don't use
+// this in your code.
+template <typename Pred,
+ typename T1,
+ typename T2,
+ typename T3>
+AssertionResult AssertPred3Helper(const char* pred_text,
+ const char* e1,
+ const char* e2,
+ const char* e3,
+ Pred pred,
+ const T1& v1,
+ const T2& v2,
+ const T3& v3) {
+ if (pred(v1, v2, v3)) return AssertionSuccess();
+
+ return AssertionFailure() << pred_text << "("
+ << e1 << ", "
+ << e2 << ", "
+ << e3 << ") evaluates to false, where"
+ << "\n" << e1 << " evaluates to " << v1
+ << "\n" << e2 << " evaluates to " << v2
+ << "\n" << e3 << " evaluates to " << v3;
+}
+
+// Internal macro for implementing {EXPECT|ASSERT}_PRED_FORMAT3.
+// Don't use this in your code.
+#define GTEST_PRED_FORMAT3_(pred_format, v1, v2, v3, on_failure)\
+ GTEST_ASSERT_(pred_format(#v1, #v2, #v3, v1, v2, v3), \
+ on_failure)
+
+// Internal macro for implementing {EXPECT|ASSERT}_PRED3. Don't use
+// this in your code.
+#define GTEST_PRED3_(pred, v1, v2, v3, on_failure)\
+ GTEST_ASSERT_(::testing::AssertPred3Helper(#pred, \
+ #v1, \
+ #v2, \
+ #v3, \
+ pred, \
+ v1, \
+ v2, \
+ v3), on_failure)
+
+// Ternary predicate assertion macros.
+#define EXPECT_PRED_FORMAT3(pred_format, v1, v2, v3) \
+ GTEST_PRED_FORMAT3_(pred_format, v1, v2, v3, GTEST_NONFATAL_FAILURE_)
+#define EXPECT_PRED3(pred, v1, v2, v3) \
+ GTEST_PRED3_(pred, v1, v2, v3, GTEST_NONFATAL_FAILURE_)
+#define ASSERT_PRED_FORMAT3(pred_format, v1, v2, v3) \
+ GTEST_PRED_FORMAT3_(pred_format, v1, v2, v3, GTEST_FATAL_FAILURE_)
+#define ASSERT_PRED3(pred, v1, v2, v3) \
+ GTEST_PRED3_(pred, v1, v2, v3, GTEST_FATAL_FAILURE_)
+
+
+
+// Helper function for implementing {EXPECT|ASSERT}_PRED4. Don't use
+// this in your code.
+template <typename Pred,
+ typename T1,
+ typename T2,
+ typename T3,
+ typename T4>
+AssertionResult AssertPred4Helper(const char* pred_text,
+ const char* e1,
+ const char* e2,
+ const char* e3,
+ const char* e4,
+ Pred pred,
+ const T1& v1,
+ const T2& v2,
+ const T3& v3,
+ const T4& v4) {
+ if (pred(v1, v2, v3, v4)) return AssertionSuccess();
+
+ return AssertionFailure() << pred_text << "("
+ << e1 << ", "
+ << e2 << ", "
+ << e3 << ", "
+ << e4 << ") evaluates to false, where"
+ << "\n" << e1 << " evaluates to " << v1
+ << "\n" << e2 << " evaluates to " << v2
+ << "\n" << e3 << " evaluates to " << v3
+ << "\n" << e4 << " evaluates to " << v4;
+}
+
+// Internal macro for implementing {EXPECT|ASSERT}_PRED_FORMAT4.
+// Don't use this in your code.
+#define GTEST_PRED_FORMAT4_(pred_format, v1, v2, v3, v4, on_failure)\
+ GTEST_ASSERT_(pred_format(#v1, #v2, #v3, #v4, v1, v2, v3, v4), \
+ on_failure)
+
+// Internal macro for implementing {EXPECT|ASSERT}_PRED4. Don't use
+// this in your code.
+#define GTEST_PRED4_(pred, v1, v2, v3, v4, on_failure)\
+ GTEST_ASSERT_(::testing::AssertPred4Helper(#pred, \
+ #v1, \
+ #v2, \
+ #v3, \
+ #v4, \
+ pred, \
+ v1, \
+ v2, \
+ v3, \
+ v4), on_failure)
+
+// 4-ary predicate assertion macros.
+#define EXPECT_PRED_FORMAT4(pred_format, v1, v2, v3, v4) \
+ GTEST_PRED_FORMAT4_(pred_format, v1, v2, v3, v4, GTEST_NONFATAL_FAILURE_)
+#define EXPECT_PRED4(pred, v1, v2, v3, v4) \
+ GTEST_PRED4_(pred, v1, v2, v3, v4, GTEST_NONFATAL_FAILURE_)
+#define ASSERT_PRED_FORMAT4(pred_format, v1, v2, v3, v4) \
+ GTEST_PRED_FORMAT4_(pred_format, v1, v2, v3, v4, GTEST_FATAL_FAILURE_)
+#define ASSERT_PRED4(pred, v1, v2, v3, v4) \
+ GTEST_PRED4_(pred, v1, v2, v3, v4, GTEST_FATAL_FAILURE_)
+
+
+
+// Helper function for implementing {EXPECT|ASSERT}_PRED5. Don't use
+// this in your code.
+template <typename Pred,
+ typename T1,
+ typename T2,
+ typename T3,
+ typename T4,
+ typename T5>
+AssertionResult AssertPred5Helper(const char* pred_text,
+ const char* e1,
+ const char* e2,
+ const char* e3,
+ const char* e4,
+ const char* e5,
+ Pred pred,
+ const T1& v1,
+ const T2& v2,
+ const T3& v3,
+ const T4& v4,
+ const T5& v5) {
+ if (pred(v1, v2, v3, v4, v5)) return AssertionSuccess();
+
+ return AssertionFailure() << pred_text << "("
+ << e1 << ", "
+ << e2 << ", "
+ << e3 << ", "
+ << e4 << ", "
+ << e5 << ") evaluates to false, where"
+ << "\n" << e1 << " evaluates to " << v1
+ << "\n" << e2 << " evaluates to " << v2
+ << "\n" << e3 << " evaluates to " << v3
+ << "\n" << e4 << " evaluates to " << v4
+ << "\n" << e5 << " evaluates to " << v5;
+}
+
+// Internal macro for implementing {EXPECT|ASSERT}_PRED_FORMAT5.
+// Don't use this in your code.
+#define GTEST_PRED_FORMAT5_(pred_format, v1, v2, v3, v4, v5, on_failure)\
+ GTEST_ASSERT_(pred_format(#v1, #v2, #v3, #v4, #v5, v1, v2, v3, v4, v5), \
+ on_failure)
+
+// Internal macro for implementing {EXPECT|ASSERT}_PRED5. Don't use
+// this in your code.
+#define GTEST_PRED5_(pred, v1, v2, v3, v4, v5, on_failure)\
+ GTEST_ASSERT_(::testing::AssertPred5Helper(#pred, \
+ #v1, \
+ #v2, \
+ #v3, \
+ #v4, \
+ #v5, \
+ pred, \
+ v1, \
+ v2, \
+ v3, \
+ v4, \
+ v5), on_failure)
+
+// 5-ary predicate assertion macros.
+#define EXPECT_PRED_FORMAT5(pred_format, v1, v2, v3, v4, v5) \
+ GTEST_PRED_FORMAT5_(pred_format, v1, v2, v3, v4, v5, GTEST_NONFATAL_FAILURE_)
+#define EXPECT_PRED5(pred, v1, v2, v3, v4, v5) \
+ GTEST_PRED5_(pred, v1, v2, v3, v4, v5, GTEST_NONFATAL_FAILURE_)
+#define ASSERT_PRED_FORMAT5(pred_format, v1, v2, v3, v4, v5) \
+ GTEST_PRED_FORMAT5_(pred_format, v1, v2, v3, v4, v5, GTEST_FATAL_FAILURE_)
+#define ASSERT_PRED5(pred, v1, v2, v3, v4, v5) \
+ GTEST_PRED5_(pred, v1, v2, v3, v4, v5, GTEST_FATAL_FAILURE_)
+
+
+
+#endif // GTEST_INCLUDE_GTEST_GTEST_PRED_IMPL_H_
+
+// Macros for testing equalities and inequalities.
+//
+// * {ASSERT|EXPECT}_EQ(expected, actual): Tests that expected == actual
+// * {ASSERT|EXPECT}_NE(v1, v2): Tests that v1 != v2
+// * {ASSERT|EXPECT}_LT(v1, v2): Tests that v1 < v2
+// * {ASSERT|EXPECT}_LE(v1, v2): Tests that v1 <= v2
+// * {ASSERT|EXPECT}_GT(v1, v2): Tests that v1 > v2
+// * {ASSERT|EXPECT}_GE(v1, v2): Tests that v1 >= v2
+//
+// When they are not, Google Test prints both the tested expressions and
+// their actual values. The values must be compatible built-in types,
+// or you will get a compiler error. By "compatible" we mean that the
+// values can be compared by the respective operator.
+//
+// Note:
+//
+// 1. It is possible to make a user-defined type work with
+// {ASSERT|EXPECT}_??(), but that requires overloading the
+// comparison operators and is thus discouraged by the Google C++
+// Usage Guide. Therefore, you are advised to use the
+// {ASSERT|EXPECT}_TRUE() macro to assert that two objects are
+// equal.
+//
+// 2. The {ASSERT|EXPECT}_??() macros do pointer comparisons on
+// pointers (in particular, C strings). Therefore, if you use it
+// with two C strings, you are testing how their locations in memory
+// are related, not how their content is related. To compare two C
+// strings by content, use {ASSERT|EXPECT}_STR*().
+//
+// 3. {ASSERT|EXPECT}_EQ(expected, actual) is preferred to
+// {ASSERT|EXPECT}_TRUE(expected == actual), as the former tells you
+// what the actual value is when it fails, and similarly for the
+// other comparisons.
+//
+// 4. Do not depend on the order in which {ASSERT|EXPECT}_??()
+// evaluate their arguments, which is undefined.
+//
+// 5. These macros evaluate their arguments exactly once.
+//
+// Examples:
+//
+// EXPECT_NE(5, Foo());
+// EXPECT_EQ(NULL, a_pointer);
+// ASSERT_LT(i, array_size);
+// ASSERT_GT(records.size(), 0) << "There is no record left.";
+
+#define EXPECT_EQ(expected, actual) \
+ EXPECT_PRED_FORMAT2(::testing::internal:: \
+ EqHelper<GTEST_IS_NULL_LITERAL_(expected)>::Compare, \
+ expected, actual)
+#define EXPECT_NE(expected, actual) \
+ EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperNE, expected, actual)
+#define EXPECT_LE(val1, val2) \
+ EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperLE, val1, val2)
+#define EXPECT_LT(val1, val2) \
+ EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperLT, val1, val2)
+#define EXPECT_GE(val1, val2) \
+ EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperGE, val1, val2)
+#define EXPECT_GT(val1, val2) \
+ EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperGT, val1, val2)
+
+#define GTEST_ASSERT_EQ(expected, actual) \
+ ASSERT_PRED_FORMAT2(::testing::internal:: \
+ EqHelper<GTEST_IS_NULL_LITERAL_(expected)>::Compare, \
+ expected, actual)
+#define GTEST_ASSERT_NE(val1, val2) \
+ ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperNE, val1, val2)
+#define GTEST_ASSERT_LE(val1, val2) \
+ ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperLE, val1, val2)
+#define GTEST_ASSERT_LT(val1, val2) \
+ ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperLT, val1, val2)
+#define GTEST_ASSERT_GE(val1, val2) \
+ ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperGE, val1, val2)
+#define GTEST_ASSERT_GT(val1, val2) \
+ ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperGT, val1, val2)
+
+// Define macro GTEST_DONT_DEFINE_ASSERT_XY to 1 to omit the definition of
+// ASSERT_XY(), which clashes with some users' own code.
+
+#if !GTEST_DONT_DEFINE_ASSERT_EQ
+# define ASSERT_EQ(val1, val2) GTEST_ASSERT_EQ(val1, val2)
+#endif
+
+#if !GTEST_DONT_DEFINE_ASSERT_NE
+# define ASSERT_NE(val1, val2) GTEST_ASSERT_NE(val1, val2)
+#endif
+
+#if !GTEST_DONT_DEFINE_ASSERT_LE
+# define ASSERT_LE(val1, val2) GTEST_ASSERT_LE(val1, val2)
+#endif
+
+#if !GTEST_DONT_DEFINE_ASSERT_LT
+# define ASSERT_LT(val1, val2) GTEST_ASSERT_LT(val1, val2)
+#endif
+
+#if !GTEST_DONT_DEFINE_ASSERT_GE
+# define ASSERT_GE(val1, val2) GTEST_ASSERT_GE(val1, val2)
+#endif
+
+#if !GTEST_DONT_DEFINE_ASSERT_GT
+# define ASSERT_GT(val1, val2) GTEST_ASSERT_GT(val1, val2)
+#endif
+
+// C-string Comparisons. All tests treat NULL and any non-NULL string
+// as different. Two NULLs are equal.
+//
+// * {ASSERT|EXPECT}_STREQ(s1, s2): Tests that s1 == s2
+// * {ASSERT|EXPECT}_STRNE(s1, s2): Tests that s1 != s2
+// * {ASSERT|EXPECT}_STRCASEEQ(s1, s2): Tests that s1 == s2, ignoring case
+// * {ASSERT|EXPECT}_STRCASENE(s1, s2): Tests that s1 != s2, ignoring case
+//
+// For wide or narrow string objects, you can use the
+// {ASSERT|EXPECT}_??() macros.
+//
+// Don't depend on the order in which the arguments are evaluated,
+// which is undefined.
+//
+// These macros evaluate their arguments exactly once.
+
+#define EXPECT_STREQ(expected, actual) \
+ EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperSTREQ, expected, actual)
+#define EXPECT_STRNE(s1, s2) \
+ EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperSTRNE, s1, s2)
+#define EXPECT_STRCASEEQ(expected, actual) \
+ EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperSTRCASEEQ, expected, actual)
+#define EXPECT_STRCASENE(s1, s2)\
+ EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperSTRCASENE, s1, s2)
+
+#define ASSERT_STREQ(expected, actual) \
+ ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperSTREQ, expected, actual)
+#define ASSERT_STRNE(s1, s2) \
+ ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperSTRNE, s1, s2)
+#define ASSERT_STRCASEEQ(expected, actual) \
+ ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperSTRCASEEQ, expected, actual)
+#define ASSERT_STRCASENE(s1, s2)\
+ ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperSTRCASENE, s1, s2)
+
+// Macros for comparing floating-point numbers.
+//
+// * {ASSERT|EXPECT}_FLOAT_EQ(expected, actual):
+// Tests that two float values are almost equal.
+// * {ASSERT|EXPECT}_DOUBLE_EQ(expected, actual):
+// Tests that two double values are almost equal.
+// * {ASSERT|EXPECT}_NEAR(v1, v2, abs_error):
+// Tests that v1 and v2 are within the given distance to each other.
+//
+// Google Test uses ULP-based comparison to automatically pick a default
+// error bound that is appropriate for the operands. See the
+// FloatingPoint template class in gtest-internal.h if you are
+// interested in the implementation details.
+
+#define EXPECT_FLOAT_EQ(expected, actual)\
+ EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperFloatingPointEQ<float>, \
+ expected, actual)
+
+#define EXPECT_DOUBLE_EQ(expected, actual)\
+ EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperFloatingPointEQ<double>, \
+ expected, actual)
+
+#define ASSERT_FLOAT_EQ(expected, actual)\
+ ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperFloatingPointEQ<float>, \
+ expected, actual)
+
+#define ASSERT_DOUBLE_EQ(expected, actual)\
+ ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperFloatingPointEQ<double>, \
+ expected, actual)
+
+#define EXPECT_NEAR(val1, val2, abs_error)\
+ EXPECT_PRED_FORMAT3(::testing::internal::DoubleNearPredFormat, \
+ val1, val2, abs_error)
+
+#define ASSERT_NEAR(val1, val2, abs_error)\
+ ASSERT_PRED_FORMAT3(::testing::internal::DoubleNearPredFormat, \
+ val1, val2, abs_error)
+
+// These predicate format functions work on floating-point values, and
+// can be used in {ASSERT|EXPECT}_PRED_FORMAT2*(), e.g.
+//
+// EXPECT_PRED_FORMAT2(testing::DoubleLE, Foo(), 5.0);
+
+// Asserts that val1 is less than, or almost equal to, val2. Fails
+// otherwise. In particular, it fails if either val1 or val2 is NaN.
+GTEST_API_ AssertionResult FloatLE(const char* expr1, const char* expr2,
+ float val1, float val2);
+GTEST_API_ AssertionResult DoubleLE(const char* expr1, const char* expr2,
+ double val1, double val2);
+
+
+#if GTEST_OS_WINDOWS
+
+// Macros that test for HRESULT failure and success, these are only useful
+// on Windows, and rely on Windows SDK macros and APIs to compile.
+//
+// * {ASSERT|EXPECT}_HRESULT_{SUCCEEDED|FAILED}(expr)
+//
+// When expr unexpectedly fails or succeeds, Google Test prints the
+// expected result and the actual result with both a human-readable
+// string representation of the error, if available, as well as the
+// hex result code.
+# define EXPECT_HRESULT_SUCCEEDED(expr) \
+ EXPECT_PRED_FORMAT1(::testing::internal::IsHRESULTSuccess, (expr))
+
+# define ASSERT_HRESULT_SUCCEEDED(expr) \
+ ASSERT_PRED_FORMAT1(::testing::internal::IsHRESULTSuccess, (expr))
+
+# define EXPECT_HRESULT_FAILED(expr) \
+ EXPECT_PRED_FORMAT1(::testing::internal::IsHRESULTFailure, (expr))
+
+# define ASSERT_HRESULT_FAILED(expr) \
+ ASSERT_PRED_FORMAT1(::testing::internal::IsHRESULTFailure, (expr))
+
+#endif // GTEST_OS_WINDOWS
+
+// Macros that execute statement and check that it doesn't generate new fatal
+// failures in the current thread.
+//
+// * {ASSERT|EXPECT}_NO_FATAL_FAILURE(statement);
+//
+// Examples:
+//
+// EXPECT_NO_FATAL_FAILURE(Process());
+// ASSERT_NO_FATAL_FAILURE(Process()) << "Process() failed";
+//
+#define ASSERT_NO_FATAL_FAILURE(statement) \
+ GTEST_TEST_NO_FATAL_FAILURE_(statement, GTEST_FATAL_FAILURE_)
+#define EXPECT_NO_FATAL_FAILURE(statement) \
+ GTEST_TEST_NO_FATAL_FAILURE_(statement, GTEST_NONFATAL_FAILURE_)
+
+// Causes a trace (including the source file path, the current line
+// number, and the given message) to be included in every test failure
+// message generated by code in the current scope. The effect is
+// undone when the control leaves the current scope.
+//
+// The message argument can be anything streamable to std::ostream.
+//
+// In the implementation, we include the current line number as part
+// of the dummy variable name, thus allowing multiple SCOPED_TRACE()s
+// to appear in the same block - as long as they are on different
+// lines.
+#define SCOPED_TRACE(message) \
+ ::testing::internal::ScopedTrace GTEST_CONCAT_TOKEN_(gtest_trace_, __LINE__)(\
+ __FILE__, __LINE__, ::testing::Message() << (message))
+
+// Compile-time assertion for type equality.
+// StaticAssertTypeEq<type1, type2>() compiles iff type1 and type2 are
+// the same type. The value it returns is not interesting.
+//
+// Instead of making StaticAssertTypeEq a class template, we make it a
+// function template that invokes a helper class template. This
+// prevents a user from misusing StaticAssertTypeEq<T1, T2> by
+// defining objects of that type.
+//
+// CAVEAT:
+//
+// When used inside a method of a class template,
+// StaticAssertTypeEq<T1, T2>() is effective ONLY IF the method is
+// instantiated. For example, given:
+//
+// template <typename T> class Foo {
+// public:
+// void Bar() { testing::StaticAssertTypeEq<int, T>(); }
+// };
+//
+// the code:
+//
+// void Test1() { Foo<bool> foo; }
+//
+// will NOT generate a compiler error, as Foo<bool>::Bar() is never
+// actually instantiated. Instead, you need:
+//
+// void Test2() { Foo<bool> foo; foo.Bar(); }
+//
+// to cause a compiler error.
+template <typename T1, typename T2>
+bool StaticAssertTypeEq() {
+ (void)internal::StaticAssertTypeEqHelper<T1, T2>();
+ return true;
+}
+
+// Defines a test.
+//
+// The first parameter is the name of the test case, and the second
+// parameter is the name of the test within the test case.
+//
+// The convention is to end the test case name with "Test". For
+// example, a test case for the Foo class can be named FooTest.
+//
+// The user should put his test code between braces after using this
+// macro. Example:
+//
+// TEST(FooTest, InitializesCorrectly) {
+// Foo foo;
+// EXPECT_TRUE(foo.StatusIsOK());
+// }
+
+// Note that we call GetTestTypeId() instead of GetTypeId<
+// ::testing::Test>() here to get the type ID of testing::Test. This
+// is to work around a suspected linker bug when using Google Test as
+// a framework on Mac OS X. The bug causes GetTypeId<
+// ::testing::Test>() to return different values depending on whether
+// the call is from the Google Test framework itself or from user test
+// code. GetTestTypeId() is guaranteed to always return the same
+// value, as it always calls GetTypeId<>() from the Google Test
+// framework.
+#define GTEST_TEST(test_case_name, test_name)\
+ GTEST_TEST_(test_case_name, test_name, \
+ ::testing::Test, ::testing::internal::GetTestTypeId())
+
+// Define this macro to 1 to omit the definition of TEST(), which
+// is a generic name and clashes with some other libraries.
+#if !GTEST_DONT_DEFINE_TEST
+# define TEST(test_case_name, test_name) GTEST_TEST(test_case_name, test_name)
+#endif
+
+// Defines a test that uses a test fixture.
+//
+// The first parameter is the name of the test fixture class, which
+// also doubles as the test case name. The second parameter is the
+// name of the test within the test case.
+//
+// A test fixture class must be declared earlier. The user should put
+// his test code between braces after using this macro. Example:
+//
+// class FooTest : public testing::Test {
+// protected:
+// virtual void SetUp() { b_.AddElement(3); }
+//
+// Foo a_;
+// Foo b_;
+// };
+//
+// TEST_F(FooTest, InitializesCorrectly) {
+// EXPECT_TRUE(a_.StatusIsOK());
+// }
+//
+// TEST_F(FooTest, ReturnsElementCountCorrectly) {
+// EXPECT_EQ(0, a_.size());
+// EXPECT_EQ(1, b_.size());
+// }
+
+#define TEST_F(test_fixture, test_name)\
+ GTEST_TEST_(test_fixture, test_name, test_fixture, \
+ ::testing::internal::GetTypeId<test_fixture>())
+
+} // namespace testing
+
+// Use this function in main() to run all tests. It returns 0 if all
+// tests are successful, or 1 otherwise.
+//
+// RUN_ALL_TESTS() should be invoked after the command line has been
+// parsed by InitGoogleTest().
+//
+// This function was formerly a macro; thus, it is in the global
+// namespace and has an all-caps name.
+int RUN_ALL_TESTS() GTEST_MUST_USE_RESULT_;
+
+inline int RUN_ALL_TESTS() {
+ return ::testing::UnitTest::GetInstance()->Run();
+}
+
+#endif // GTEST_INCLUDE_GTEST_GTEST_H_
diff --git a/src/KOKKOS/kokkos_type.h b/src/KOKKOS/kokkos_type.h
index 15d2eb552..564d346d6 100644
--- a/src/KOKKOS/kokkos_type.h
+++ b/src/KOKKOS/kokkos_type.h
@@ -1,748 +1,752 @@
/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#ifndef LMP_LMPTYPE_KOKKOS_H
#define LMP_LMPTYPE_KOKKOS_H
#include <Kokkos_Core.hpp>
#include <Kokkos_DualView.hpp>
#include <impl/Kokkos_Timer.hpp>
#include <Kokkos_Vectorization.hpp>
#define MAX_TYPES_STACKPARAMS 12
#define NeighClusterSize 8
struct lmp_float3 {
float x,y,z;
KOKKOS_INLINE_FUNCTION
lmp_float3():x(0.0f),z(0.0f),y(0.0f) {}
KOKKOS_INLINE_FUNCTION
void operator += (const lmp_float3& tmp) {
x+=tmp.x;
y+=tmp.y;
z+=tmp.z;
}
KOKKOS_INLINE_FUNCTION
void operator += (const lmp_float3& tmp) volatile {
x+=tmp.x;
y+=tmp.y;
z+=tmp.z;
}
KOKKOS_INLINE_FUNCTION
void operator = (const lmp_float3& tmp) {
x=tmp.x;
y=tmp.y;
z=tmp.z;
}
KOKKOS_INLINE_FUNCTION
void operator = (const lmp_float3& tmp) volatile {
x=tmp.x;
y=tmp.y;
z=tmp.z;
}
};
struct lmp_double3 {
double x,y,z;
KOKKOS_INLINE_FUNCTION
lmp_double3():x(0.0),z(0.0),y(0.0) {}
KOKKOS_INLINE_FUNCTION
void operator += (const lmp_double3& tmp) {
x+=tmp.x;
y+=tmp.y;
z+=tmp.z;
}
KOKKOS_INLINE_FUNCTION
void operator += (const lmp_double3& tmp) volatile {
x+=tmp.x;
y+=tmp.y;
z+=tmp.z;
}
KOKKOS_INLINE_FUNCTION
void operator = (const lmp_double3& tmp) {
x=tmp.x;
y=tmp.y;
z=tmp.z;
}
KOKKOS_INLINE_FUNCTION
void operator = (const lmp_double3& tmp) volatile {
x=tmp.x;
y=tmp.y;
z=tmp.z;
}
};
#if !defined(__CUDACC__) && !defined(__VECTOR_TYPES_H__)
struct double2 {
double x, y;
};
struct float2 {
float x, y;
};
struct float4 {
float x, y, z, w;
};
struct double4 {
double x, y, z, w;
};
#endif
// set LMPHostype and LMPDeviceType from Kokkos Default Types
typedef Kokkos::DefaultExecutionSpace LMPDeviceType;
typedef Kokkos::HostSpace::execution_space LMPHostType;
// set ExecutionSpace stuct with variable "space"
template<class Device>
struct ExecutionSpaceFromDevice;
template<>
struct ExecutionSpaceFromDevice<LMPHostType> {
static const LAMMPS_NS::ExecutionSpace space = LAMMPS_NS::Host;
};
#ifdef KOKKOS_HAVE_CUDA
template<>
struct ExecutionSpaceFromDevice<Kokkos::Cuda> {
static const LAMMPS_NS::ExecutionSpace space = LAMMPS_NS::Device;
};
#endif
// define precision
// handle global precision, force, energy, positions, kspace separately
#ifndef PRECISION
#define PRECISION 2
#endif
#if PRECISION==1
typedef float LMP_FLOAT;
typedef float2 LMP_FLOAT2;
typedef lmp_float3 LMP_FLOAT3;
typedef float4 LMP_FLOAT4;
#else
typedef double LMP_FLOAT;
typedef double2 LMP_FLOAT2;
typedef lmp_double3 LMP_FLOAT3;
typedef double4 LMP_FLOAT4;
#endif
#ifndef PREC_FORCE
#define PREC_FORCE PRECISION
#endif
#if PREC_FORCE==1
typedef float F_FLOAT;
typedef float2 F_FLOAT2;
typedef lmp_float3 F_FLOAT3;
typedef float4 F_FLOAT4;
#else
typedef double F_FLOAT;
typedef double2 F_FLOAT2;
typedef lmp_double3 F_FLOAT3;
typedef double4 F_FLOAT4;
#endif
#ifndef PREC_ENERGY
#define PREC_ENERGY PRECISION
#endif
#if PREC_ENERGY==1
typedef float E_FLOAT;
typedef float2 E_FLOAT2;
typedef float4 E_FLOAT4;
#else
typedef double E_FLOAT;
typedef double2 E_FLOAT2;
typedef double4 E_FLOAT4;
#endif
struct s_EV_FLOAT {
E_FLOAT evdwl;
E_FLOAT ecoul;
E_FLOAT v[6];
KOKKOS_INLINE_FUNCTION
s_EV_FLOAT() {
evdwl = 0;
ecoul = 0;
v[0] = 0; v[1] = 0; v[2] = 0;
v[3] = 0; v[4] = 0; v[5] = 0;
}
KOKKOS_INLINE_FUNCTION
void operator+=(const s_EV_FLOAT &rhs) {
evdwl += rhs.evdwl;
ecoul += rhs.ecoul;
v[0] += rhs.v[0];
v[1] += rhs.v[1];
v[2] += rhs.v[2];
v[3] += rhs.v[3];
v[4] += rhs.v[4];
v[5] += rhs.v[5];
}
KOKKOS_INLINE_FUNCTION
void operator+=(const volatile s_EV_FLOAT &rhs) volatile {
evdwl += rhs.evdwl;
ecoul += rhs.ecoul;
v[0] += rhs.v[0];
v[1] += rhs.v[1];
v[2] += rhs.v[2];
v[3] += rhs.v[3];
v[4] += rhs.v[4];
v[5] += rhs.v[5];
}
};
typedef struct s_EV_FLOAT EV_FLOAT;
#ifndef PREC_POS
#define PREC_POS PRECISION
#endif
#if PREC_POS==1
typedef float X_FLOAT;
typedef float2 X_FLOAT2;
typedef float4 X_FLOAT4;
#else
typedef double X_FLOAT;
typedef double2 X_FLOAT2;
typedef double4 X_FLOAT4;
#endif
#ifndef PREC_VELOCITIES
#define PREC_VELOCITIES PRECISION
#endif
#if PREC_VELOCITIES==1
typedef float V_FLOAT;
typedef float2 V_FLOAT2;
typedef float4 V_FLOAT4;
#else
typedef double V_FLOAT;
typedef double2 V_FLOAT2;
typedef double4 V_FLOAT4;
#endif
#if PREC_KSPACE==1
typedef float K_FLOAT;
typedef float2 K_FLOAT2;
typedef float4 K_FLOAT4;
#else
typedef double K_FLOAT;
typedef double2 K_FLOAT2;
typedef double4 K_FLOAT4;
#endif
// ------------------------------------------------------------------------
// LAMMPS types
template <class DeviceType>
struct ArrayTypes;
template <>
struct ArrayTypes<LMPDeviceType> {
// scalar types
typedef Kokkos::
DualView<int, LMPDeviceType::array_layout, LMPDeviceType> tdual_int_scalar;
typedef tdual_int_scalar::t_dev t_int_scalar;
typedef tdual_int_scalar::t_dev_const t_int_scalar_const;
typedef tdual_int_scalar::t_dev_um t_int_scalar_um;
typedef tdual_int_scalar::t_dev_const_um t_int_scalar_const_um;
typedef Kokkos::
DualView<LMP_FLOAT, LMPDeviceType::array_layout, LMPDeviceType>
tdual_float_scalar;
typedef tdual_float_scalar::t_dev t_float_scalar;
typedef tdual_float_scalar::t_dev_const t_float_scalar_const;
typedef tdual_float_scalar::t_dev_um t_float_scalar_um;
typedef tdual_float_scalar::t_dev_const_um t_float_scalar_const_um;
// generic array types
typedef Kokkos::
DualView<int*, LMPDeviceType::array_layout, LMPDeviceType> tdual_int_1d;
typedef tdual_int_1d::t_dev t_int_1d;
typedef tdual_int_1d::t_dev_const t_int_1d_const;
typedef tdual_int_1d::t_dev_um t_int_1d_um;
typedef tdual_int_1d::t_dev_const_um t_int_1d_const_um;
typedef tdual_int_1d::t_dev_const_randomread t_int_1d_randomread;
typedef Kokkos::
DualView<int*[3], Kokkos::LayoutRight, LMPDeviceType> tdual_int_1d_3;
typedef tdual_int_1d_3::t_dev t_int_1d_3;
typedef tdual_int_1d_3::t_dev_const t_int_1d_3_const;
typedef tdual_int_1d_3::t_dev_um t_int_1d_3_um;
typedef tdual_int_1d_3::t_dev_const_um t_int_1d_3_const_um;
typedef tdual_int_1d_3::t_dev_const_randomread t_int_1d_3_randomread;
typedef Kokkos::
DualView<int**, Kokkos::LayoutRight, LMPDeviceType> tdual_int_2d;
typedef tdual_int_2d::t_dev t_int_2d;
typedef tdual_int_2d::t_dev_const t_int_2d_const;
typedef tdual_int_2d::t_dev_um t_int_2d_um;
typedef tdual_int_2d::t_dev_const_um t_int_2d_const_um;
typedef tdual_int_2d::t_dev_const_randomread t_int_2d_randomread;
typedef Kokkos::
DualView<LAMMPS_NS::tagint*, LMPDeviceType::array_layout, LMPDeviceType>
tdual_tagint_1d;
typedef tdual_tagint_1d::t_dev t_tagint_1d;
typedef tdual_tagint_1d::t_dev_const t_tagint_1d_const;
typedef tdual_tagint_1d::t_dev_um t_tagint_1d_um;
typedef tdual_tagint_1d::t_dev_const_um t_tagint_1d_const_um;
typedef tdual_tagint_1d::t_dev_const_randomread t_tagint_1d_randomread;
typedef Kokkos::
DualView<LAMMPS_NS::tagint**, Kokkos::LayoutRight, LMPDeviceType>
tdual_tagint_2d;
typedef tdual_tagint_2d::t_dev t_tagint_2d;
typedef tdual_tagint_2d::t_dev_const t_tagint_2d_const;
typedef tdual_tagint_2d::t_dev_um t_tagint_2d_um;
typedef tdual_tagint_2d::t_dev_const_um t_tagint_2d_const_um;
typedef tdual_tagint_2d::t_dev_const_randomread t_tagint_2d_randomread;
typedef Kokkos::
DualView<LAMMPS_NS::imageint*, LMPDeviceType::array_layout, LMPDeviceType>
tdual_imageint_1d;
typedef tdual_imageint_1d::t_dev t_imageint_1d;
typedef tdual_imageint_1d::t_dev_const t_imageint_1d_const;
typedef tdual_imageint_1d::t_dev_um t_imageint_1d_um;
typedef tdual_imageint_1d::t_dev_const_um t_imageint_1d_const_um;
typedef tdual_imageint_1d::t_dev_const_randomread t_imageint_1d_randomread;
typedef Kokkos::
DualView<double*, Kokkos::LayoutRight, LMPDeviceType> tdual_double_1d;
typedef tdual_double_1d::t_dev t_double_1d;
typedef tdual_double_1d::t_dev_const t_double_1d_const;
typedef tdual_double_1d::t_dev_um t_double_1d_um;
typedef tdual_double_1d::t_dev_const_um t_double_1d_const_um;
typedef tdual_double_1d::t_dev_const_randomread t_double_1d_randomread;
typedef Kokkos::
DualView<double**, Kokkos::LayoutRight, LMPDeviceType> tdual_double_2d;
typedef tdual_double_2d::t_dev t_double_2d;
typedef tdual_double_2d::t_dev_const t_double_2d_const;
typedef tdual_double_2d::t_dev_um t_double_2d_um;
typedef tdual_double_2d::t_dev_const_um t_double_2d_const_um;
typedef tdual_double_2d::t_dev_const_randomread t_double_2d_randomread;
// 1d float array n
typedef Kokkos::DualView<LMP_FLOAT*, LMPDeviceType::array_layout, LMPDeviceType> tdual_float_1d;
typedef tdual_float_1d::t_dev t_float_1d;
typedef tdual_float_1d::t_dev_const t_float_1d_const;
typedef tdual_float_1d::t_dev_um t_float_1d_um;
typedef tdual_float_1d::t_dev_const_um t_float_1d_const_um;
typedef tdual_float_1d::t_dev_const_randomread t_float_1d_randomread;
//2d float array n
typedef Kokkos::DualView<LMP_FLOAT**, Kokkos::LayoutRight, LMPDeviceType> tdual_float_2d;
typedef tdual_float_2d::t_dev t_float_2d;
typedef tdual_float_2d::t_dev_const t_float_2d_const;
typedef tdual_float_2d::t_dev_um t_float_2d_um;
typedef tdual_float_2d::t_dev_const_um t_float_2d_const_um;
typedef tdual_float_2d::t_dev_const_randomread t_float_2d_randomread;
//Position Types
//1d X_FLOAT array n
typedef Kokkos::DualView<X_FLOAT*, LMPDeviceType::array_layout, LMPDeviceType> tdual_xfloat_1d;
typedef tdual_xfloat_1d::t_dev t_xfloat_1d;
typedef tdual_xfloat_1d::t_dev_const t_xfloat_1d_const;
typedef tdual_xfloat_1d::t_dev_um t_xfloat_1d_um;
typedef tdual_xfloat_1d::t_dev_const_um t_xfloat_1d_const_um;
typedef tdual_xfloat_1d::t_dev_const_randomread t_xfloat_1d_randomread;
//2d X_FLOAT array n*m
typedef Kokkos::DualView<X_FLOAT**, Kokkos::LayoutRight, LMPDeviceType> tdual_xfloat_2d;
typedef tdual_xfloat_2d::t_dev t_xfloat_2d;
typedef tdual_xfloat_2d::t_dev_const t_xfloat_2d_const;
typedef tdual_xfloat_2d::t_dev_um t_xfloat_2d_um;
typedef tdual_xfloat_2d::t_dev_const_um t_xfloat_2d_const_um;
typedef tdual_xfloat_2d::t_dev_const_randomread t_xfloat_2d_randomread;
//2d X_FLOAT array n*4
#ifdef LMP_KOKKOS_NO_LEGACY
typedef Kokkos::DualView<X_FLOAT*[3], Kokkos::LayoutLeft, LMPDeviceType> tdual_x_array;
#else
typedef Kokkos::DualView<X_FLOAT*[3], Kokkos::LayoutRight, LMPDeviceType> tdual_x_array;
#endif
typedef tdual_x_array::t_dev t_x_array;
typedef tdual_x_array::t_dev_const t_x_array_const;
typedef tdual_x_array::t_dev_um t_x_array_um;
typedef tdual_x_array::t_dev_const_um t_x_array_const_um;
typedef tdual_x_array::t_dev_const_randomread t_x_array_randomread;
//Velocity Types
//1d V_FLOAT array n
typedef Kokkos::DualView<V_FLOAT*, LMPDeviceType::array_layout, LMPDeviceType> tdual_vfloat_1d;
typedef tdual_vfloat_1d::t_dev t_vfloat_1d;
typedef tdual_vfloat_1d::t_dev_const t_vfloat_1d_const;
typedef tdual_vfloat_1d::t_dev_um t_vfloat_1d_um;
typedef tdual_vfloat_1d::t_dev_const_um t_vfloat_1d_const_um;
typedef tdual_vfloat_1d::t_dev_const_randomread t_vfloat_1d_randomread;
//2d V_FLOAT array n*m
typedef Kokkos::DualView<V_FLOAT**, Kokkos::LayoutRight, LMPDeviceType> tdual_vfloat_2d;
typedef tdual_vfloat_2d::t_dev t_vfloat_2d;
typedef tdual_vfloat_2d::t_dev_const t_vfloat_2d_const;
typedef tdual_vfloat_2d::t_dev_um t_vfloat_2d_um;
typedef tdual_vfloat_2d::t_dev_const_um t_vfloat_2d_const_um;
typedef tdual_vfloat_2d::t_dev_const_randomread t_vfloat_2d_randomread;
//2d V_FLOAT array n*3
typedef Kokkos::DualView<V_FLOAT*[3], Kokkos::LayoutRight, LMPDeviceType> tdual_v_array;
//typedef Kokkos::DualView<V_FLOAT*[3], LMPDeviceType::array_layout, LMPDeviceType> tdual_v_array;
typedef tdual_v_array::t_dev t_v_array;
typedef tdual_v_array::t_dev_const t_v_array_const;
typedef tdual_v_array::t_dev_um t_v_array_um;
typedef tdual_v_array::t_dev_const_um t_v_array_const_um;
typedef tdual_v_array::t_dev_const_randomread t_v_array_randomread;
//Force Types
//1d F_FLOAT array n
typedef Kokkos::DualView<F_FLOAT*, LMPDeviceType::array_layout, LMPDeviceType> tdual_ffloat_1d;
typedef tdual_ffloat_1d::t_dev t_ffloat_1d;
typedef tdual_ffloat_1d::t_dev_const t_ffloat_1d_const;
typedef tdual_ffloat_1d::t_dev_um t_ffloat_1d_um;
typedef tdual_ffloat_1d::t_dev_const_um t_ffloat_1d_const_um;
typedef tdual_ffloat_1d::t_dev_const_randomread t_ffloat_1d_randomread;
//2d F_FLOAT array n*m
typedef Kokkos::DualView<F_FLOAT**, Kokkos::LayoutRight, LMPDeviceType> tdual_ffloat_2d;
typedef tdual_ffloat_2d::t_dev t_ffloat_2d;
typedef tdual_ffloat_2d::t_dev_const t_ffloat_2d_const;
typedef tdual_ffloat_2d::t_dev_um t_ffloat_2d_um;
typedef tdual_ffloat_2d::t_dev_const_um t_ffloat_2d_const_um;
typedef tdual_ffloat_2d::t_dev_const_randomread t_ffloat_2d_randomread;
//2d F_FLOAT array n*3
typedef Kokkos::DualView<F_FLOAT*[3], Kokkos::LayoutRight, LMPDeviceType> tdual_f_array;
//typedef Kokkos::DualView<F_FLOAT*[3], LMPDeviceType::array_layout, LMPDeviceType> tdual_f_array;
typedef tdual_f_array::t_dev t_f_array;
typedef tdual_f_array::t_dev_const t_f_array_const;
typedef tdual_f_array::t_dev_um t_f_array_um;
typedef tdual_f_array::t_dev_const_um t_f_array_const_um;
typedef tdual_f_array::t_dev_const_randomread t_f_array_randomread;
//2d F_FLOAT array n*6 (for virial)
typedef Kokkos::DualView<F_FLOAT*[6], Kokkos::LayoutRight, LMPDeviceType> tdual_virial_array;
typedef tdual_virial_array::t_dev t_virial_array;
typedef tdual_virial_array::t_dev_const t_virial_array_const;
typedef tdual_virial_array::t_dev_um t_virial_array_um;
typedef tdual_virial_array::t_dev_const_um t_virial_array_const_um;
typedef tdual_virial_array::t_dev_const_randomread t_virial_array_randomread;
//Energy Types
//1d E_FLOAT array n
typedef Kokkos::DualView<E_FLOAT*, LMPDeviceType::array_layout, LMPDeviceType> tdual_efloat_1d;
typedef tdual_efloat_1d::t_dev t_efloat_1d;
typedef tdual_efloat_1d::t_dev_const t_efloat_1d_const;
typedef tdual_efloat_1d::t_dev_um t_efloat_1d_um;
typedef tdual_efloat_1d::t_dev_const_um t_efloat_1d_const_um;
typedef tdual_efloat_1d::t_dev_const_randomread t_efloat_1d_randomread;
//2d E_FLOAT array n*m
typedef Kokkos::DualView<E_FLOAT**, Kokkos::LayoutRight, LMPDeviceType> tdual_efloat_2d;
typedef tdual_efloat_2d::t_dev t_efloat_2d;
typedef tdual_efloat_2d::t_dev_const t_efloat_2d_const;
typedef tdual_efloat_2d::t_dev_um t_efloat_2d_um;
typedef tdual_efloat_2d::t_dev_const_um t_efloat_2d_const_um;
typedef tdual_efloat_2d::t_dev_const_randomread t_efloat_2d_randomread;
//2d E_FLOAT array n*3
typedef Kokkos::DualView<E_FLOAT*[3], Kokkos::LayoutRight, LMPDeviceType> tdual_e_array;
typedef tdual_e_array::t_dev t_e_array;
typedef tdual_e_array::t_dev_const t_e_array_const;
typedef tdual_e_array::t_dev_um t_e_array_um;
typedef tdual_e_array::t_dev_const_um t_e_array_const_um;
typedef tdual_e_array::t_dev_const_randomread t_e_array_randomread;
//Neighbor Types
typedef Kokkos::DualView<int**, LMPDeviceType::array_layout, LMPDeviceType> tdual_neighbors_2d;
typedef tdual_neighbors_2d::t_dev t_neighbors_2d;
typedef tdual_neighbors_2d::t_dev_const t_neighbors_2d_const;
typedef tdual_neighbors_2d::t_dev_um t_neighbors_2d_um;
typedef tdual_neighbors_2d::t_dev_const_um t_neighbors_2d_const_um;
typedef tdual_neighbors_2d::t_dev_const_randomread t_neighbors_2d_randomread;
};
#ifdef KOKKOS_HAVE_CUDA
template <>
struct ArrayTypes<LMPHostType> {
//Scalar Types
typedef Kokkos::DualView<int, LMPDeviceType::array_layout, LMPDeviceType> tdual_int_scalar;
typedef tdual_int_scalar::t_host t_int_scalar;
typedef tdual_int_scalar::t_host_const t_int_scalar_const;
typedef tdual_int_scalar::t_host_um t_int_scalar_um;
typedef tdual_int_scalar::t_host_const_um t_int_scalar_const_um;
typedef Kokkos::DualView<LMP_FLOAT, LMPDeviceType::array_layout, LMPDeviceType> tdual_float_scalar;
typedef tdual_float_scalar::t_host t_float_scalar;
typedef tdual_float_scalar::t_host_const t_float_scalar_const;
typedef tdual_float_scalar::t_host_um t_float_scalar_um;
typedef tdual_float_scalar::t_host_const_um t_float_scalar_const_um;
//Generic ArrayTypes
typedef Kokkos::DualView<int*, LMPDeviceType::array_layout, LMPDeviceType> tdual_int_1d;
typedef tdual_int_1d::t_host t_int_1d;
typedef tdual_int_1d::t_host_const t_int_1d_const;
typedef tdual_int_1d::t_host_um t_int_1d_um;
typedef tdual_int_1d::t_host_const_um t_int_1d_const_um;
typedef tdual_int_1d::t_host_const_randomread t_int_1d_randomread;
typedef Kokkos::DualView<int*[3], Kokkos::LayoutRight, LMPDeviceType> tdual_int_1d_3;
typedef tdual_int_1d_3::t_host t_int_1d_3;
typedef tdual_int_1d_3::t_host_const t_int_1d_3_const;
typedef tdual_int_1d_3::t_host_um t_int_1d_3_um;
typedef tdual_int_1d_3::t_host_const_um t_int_1d_3_const_um;
typedef tdual_int_1d_3::t_host_const_randomread t_int_1d_3_randomread;
typedef Kokkos::DualView<int**, Kokkos::LayoutRight, LMPDeviceType> tdual_int_2d;
typedef tdual_int_2d::t_host t_int_2d;
typedef tdual_int_2d::t_host_const t_int_2d_const;
typedef tdual_int_2d::t_host_um t_int_2d_um;
typedef tdual_int_2d::t_host_const_um t_int_2d_const_um;
typedef tdual_int_2d::t_host_const_randomread t_int_2d_randomread;
typedef Kokkos::DualView<LAMMPS_NS::tagint*, LMPDeviceType::array_layout, LMPDeviceType> tdual_tagint_1d;
typedef tdual_tagint_1d::t_host t_tagint_1d;
typedef tdual_tagint_1d::t_host_const t_tagint_1d_const;
typedef tdual_tagint_1d::t_host_um t_tagint_1d_um;
typedef tdual_tagint_1d::t_host_const_um t_tagint_1d_const_um;
typedef tdual_tagint_1d::t_host_const_randomread t_tagint_1d_randomread;
typedef Kokkos::
DualView<LAMMPS_NS::tagint**, Kokkos::LayoutRight, LMPDeviceType>
tdual_tagint_2d;
typedef tdual_tagint_2d::t_host t_tagint_2d;
typedef tdual_tagint_2d::t_host_const t_tagint_2d_const;
typedef tdual_tagint_2d::t_host_um t_tagint_2d_um;
typedef tdual_tagint_2d::t_host_const_um t_tagint_2d_const_um;
typedef tdual_tagint_2d::t_host_const_randomread t_tagint_2d_randomread;
typedef Kokkos::
DualView<LAMMPS_NS::imageint*, LMPDeviceType::array_layout, LMPDeviceType>
tdual_imageint_1d;
typedef tdual_imageint_1d::t_host t_imageint_1d;
typedef tdual_imageint_1d::t_host_const t_imageint_1d_const;
typedef tdual_imageint_1d::t_host_um t_imageint_1d_um;
typedef tdual_imageint_1d::t_host_const_um t_imageint_1d_const_um;
typedef tdual_imageint_1d::t_host_const_randomread t_imageint_1d_randomread;
typedef Kokkos::
DualView<double*, Kokkos::LayoutRight, LMPDeviceType> tdual_double_1d;
typedef tdual_double_1d::t_host t_double_1d;
typedef tdual_double_1d::t_host_const t_double_1d_const;
typedef tdual_double_1d::t_host_um t_double_1d_um;
typedef tdual_double_1d::t_host_const_um t_double_1d_const_um;
typedef tdual_double_1d::t_host_const_randomread t_double_1d_randomread;
typedef Kokkos::
DualView<double**, Kokkos::LayoutRight, LMPDeviceType> tdual_double_2d;
typedef tdual_double_2d::t_host t_double_2d;
typedef tdual_double_2d::t_host_const t_double_2d_const;
typedef tdual_double_2d::t_host_um t_double_2d_um;
typedef tdual_double_2d::t_host_const_um t_double_2d_const_um;
typedef tdual_double_2d::t_host_const_randomread t_double_2d_randomread;
//1d float array n
typedef Kokkos::DualView<LMP_FLOAT*, LMPDeviceType::array_layout, LMPDeviceType> tdual_float_1d;
typedef tdual_float_1d::t_host t_float_1d;
typedef tdual_float_1d::t_host_const t_float_1d_const;
typedef tdual_float_1d::t_host_um t_float_1d_um;
typedef tdual_float_1d::t_host_const_um t_float_1d_const_um;
typedef tdual_float_1d::t_host_const_randomread t_float_1d_randomread;
//2d float array n
typedef Kokkos::DualView<LMP_FLOAT**, Kokkos::LayoutRight, LMPDeviceType> tdual_float_2d;
typedef tdual_float_2d::t_host t_float_2d;
typedef tdual_float_2d::t_host_const t_float_2d_const;
typedef tdual_float_2d::t_host_um t_float_2d_um;
typedef tdual_float_2d::t_host_const_um t_float_2d_const_um;
typedef tdual_float_2d::t_host_const_randomread t_float_2d_randomread;
//Position Types
//1d X_FLOAT array n
typedef Kokkos::DualView<X_FLOAT*, LMPDeviceType::array_layout, LMPDeviceType> tdual_xfloat_1d;
typedef tdual_xfloat_1d::t_host t_xfloat_1d;
typedef tdual_xfloat_1d::t_host_const t_xfloat_1d_const;
typedef tdual_xfloat_1d::t_host_um t_xfloat_1d_um;
typedef tdual_xfloat_1d::t_host_const_um t_xfloat_1d_const_um;
typedef tdual_xfloat_1d::t_host_const_randomread t_xfloat_1d_randomread;
//2d X_FLOAT array n*m
typedef Kokkos::DualView<X_FLOAT**, Kokkos::LayoutRight, LMPDeviceType> tdual_xfloat_2d;
typedef tdual_xfloat_2d::t_host t_xfloat_2d;
typedef tdual_xfloat_2d::t_host_const t_xfloat_2d_const;
typedef tdual_xfloat_2d::t_host_um t_xfloat_2d_um;
typedef tdual_xfloat_2d::t_host_const_um t_xfloat_2d_const_um;
typedef tdual_xfloat_2d::t_host_const_randomread t_xfloat_2d_randomread;
//2d X_FLOAT array n*3
typedef Kokkos::DualView<X_FLOAT*[3], Kokkos::LayoutRight, LMPDeviceType> tdual_x_array;
typedef tdual_x_array::t_host t_x_array;
typedef tdual_x_array::t_host_const t_x_array_const;
typedef tdual_x_array::t_host_um t_x_array_um;
typedef tdual_x_array::t_host_const_um t_x_array_const_um;
typedef tdual_x_array::t_host_const_randomread t_x_array_randomread;
//Velocity Types
//1d V_FLOAT array n
typedef Kokkos::DualView<V_FLOAT*, LMPDeviceType::array_layout, LMPDeviceType> tdual_vfloat_1d;
typedef tdual_vfloat_1d::t_host t_vfloat_1d;
typedef tdual_vfloat_1d::t_host_const t_vfloat_1d_const;
typedef tdual_vfloat_1d::t_host_um t_vfloat_1d_um;
typedef tdual_vfloat_1d::t_host_const_um t_vfloat_1d_const_um;
typedef tdual_vfloat_1d::t_host_const_randomread t_vfloat_1d_randomread;
//2d V_FLOAT array n*m
typedef Kokkos::DualView<V_FLOAT**, Kokkos::LayoutRight, LMPDeviceType> tdual_vfloat_2d;
typedef tdual_vfloat_2d::t_host t_vfloat_2d;
typedef tdual_vfloat_2d::t_host_const t_vfloat_2d_const;
typedef tdual_vfloat_2d::t_host_um t_vfloat_2d_um;
typedef tdual_vfloat_2d::t_host_const_um t_vfloat_2d_const_um;
typedef tdual_vfloat_2d::t_host_const_randomread t_vfloat_2d_randomread;
//2d V_FLOAT array n*3
typedef Kokkos::DualView<V_FLOAT*[3], Kokkos::LayoutRight, LMPDeviceType> tdual_v_array;
//typedef Kokkos::DualView<V_FLOAT*[3], LMPDeviceType::array_layout, LMPDeviceType> tdual_v_array;
typedef tdual_v_array::t_host t_v_array;
typedef tdual_v_array::t_host_const t_v_array_const;
typedef tdual_v_array::t_host_um t_v_array_um;
typedef tdual_v_array::t_host_const_um t_v_array_const_um;
typedef tdual_v_array::t_host_const_randomread t_v_array_randomread;
//Force Types
//1d F_FLOAT array n
typedef Kokkos::DualView<F_FLOAT*, LMPDeviceType::array_layout, LMPDeviceType> tdual_ffloat_1d;
typedef tdual_ffloat_1d::t_host t_ffloat_1d;
typedef tdual_ffloat_1d::t_host_const t_ffloat_1d_const;
typedef tdual_ffloat_1d::t_host_um t_ffloat_1d_um;
typedef tdual_ffloat_1d::t_host_const_um t_ffloat_1d_const_um;
typedef tdual_ffloat_1d::t_host_const_randomread t_ffloat_1d_randomread;
//2d F_FLOAT array n*m
typedef Kokkos::DualView<F_FLOAT**, Kokkos::LayoutRight, LMPDeviceType> tdual_ffloat_2d;
typedef tdual_ffloat_2d::t_host t_ffloat_2d;
typedef tdual_ffloat_2d::t_host_const t_ffloat_2d_const;
typedef tdual_ffloat_2d::t_host_um t_ffloat_2d_um;
typedef tdual_ffloat_2d::t_host_const_um t_ffloat_2d_const_um;
typedef tdual_ffloat_2d::t_host_const_randomread t_ffloat_2d_randomread;
//2d F_FLOAT array n*3
typedef Kokkos::DualView<F_FLOAT*[3], Kokkos::LayoutRight, LMPDeviceType> tdual_f_array;
//typedef Kokkos::DualView<F_FLOAT*[3], LMPDeviceType::array_layout, LMPDeviceType> tdual_f_array;
typedef tdual_f_array::t_host t_f_array;
typedef tdual_f_array::t_host_const t_f_array_const;
typedef tdual_f_array::t_host_um t_f_array_um;
typedef tdual_f_array::t_host_const_um t_f_array_const_um;
typedef tdual_f_array::t_host_const_randomread t_f_array_randomread;
//2d F_FLOAT array n*6 (for virial)
typedef Kokkos::DualView<F_FLOAT*[6], Kokkos::LayoutRight, LMPDeviceType> tdual_virial_array;
typedef tdual_virial_array::t_host t_virial_array;
typedef tdual_virial_array::t_host_const t_virial_array_const;
typedef tdual_virial_array::t_host_um t_virial_array_um;
typedef tdual_virial_array::t_host_const_um t_virial_array_const_um;
typedef tdual_virial_array::t_host_const_randomread t_virial_array_randomread;
//Energy Types
//1d E_FLOAT array n
typedef Kokkos::DualView<E_FLOAT*, LMPDeviceType::array_layout, LMPDeviceType> tdual_efloat_1d;
typedef tdual_efloat_1d::t_host t_efloat_1d;
typedef tdual_efloat_1d::t_host_const t_efloat_1d_const;
typedef tdual_efloat_1d::t_host_um t_efloat_1d_um;
typedef tdual_efloat_1d::t_host_const_um t_efloat_1d_const_um;
typedef tdual_efloat_1d::t_host_const_randomread t_efloat_1d_randomread;
//2d E_FLOAT array n*m
typedef Kokkos::DualView<E_FLOAT**, Kokkos::LayoutRight, LMPDeviceType> tdual_efloat_2d;
typedef tdual_efloat_2d::t_host t_efloat_2d;
typedef tdual_efloat_2d::t_host_const t_efloat_2d_const;
typedef tdual_efloat_2d::t_host_um t_efloat_2d_um;
typedef tdual_efloat_2d::t_host_const_um t_efloat_2d_const_um;
typedef tdual_efloat_2d::t_host_const_randomread t_efloat_2d_randomread;
//2d E_FLOAT array n*3
typedef Kokkos::DualView<E_FLOAT*[3], Kokkos::LayoutRight, LMPDeviceType> tdual_e_array;
typedef tdual_e_array::t_host t_e_array;
typedef tdual_e_array::t_host_const t_e_array_const;
typedef tdual_e_array::t_host_um t_e_array_um;
typedef tdual_e_array::t_host_const_um t_e_array_const_um;
typedef tdual_e_array::t_host_const_randomread t_e_array_randomread;
//Neighbor Types
typedef Kokkos::DualView<int**, LMPDeviceType::array_layout, LMPDeviceType> tdual_neighbors_2d;
typedef tdual_neighbors_2d::t_host t_neighbors_2d;
typedef tdual_neighbors_2d::t_host_const t_neighbors_2d_const;
typedef tdual_neighbors_2d::t_host_um t_neighbors_2d_um;
typedef tdual_neighbors_2d::t_host_const_um t_neighbors_2d_const_um;
typedef tdual_neighbors_2d::t_host_const_randomread t_neighbors_2d_randomread;
};
#endif
//default LAMMPS Types
typedef struct ArrayTypes<LMPDeviceType> DAT;
typedef struct ArrayTypes<LMPHostType> HAT;
template<class DeviceType, class BufferView, class DualView>
void buffer_view(BufferView &buf, DualView &view,
const size_t n0,
const size_t n1 = 0,
const size_t n2 = 0,
const size_t n3 = 0,
const size_t n4 = 0,
const size_t n5 = 0,
const size_t n6 = 0,
const size_t n7 = 0) {
buf = BufferView(
view.template view<DeviceType>().ptr_on_device(),
n0,n1,n2,n3,n4,n5,n6,n7);
}
template<class DeviceType>
struct MemsetZeroFunctor {
typedef DeviceType execution_space ;
void* ptr;
KOKKOS_INLINE_FUNCTION void operator()(const int i) const {
((int*)ptr)[i] = 0;
}
};
template<class ViewType>
void memset_kokkos (ViewType &view) {
static MemsetZeroFunctor<typename ViewType::execution_space> f;
f.ptr = view.ptr_on_device();
Kokkos::parallel_for(view.capacity()*sizeof(typename ViewType::value_type)/4, f);
ViewType::execution_space::fence();
}
+#if defined(KOKKOS_HAVE_CXX11)
+#undef ISFINITE
+#define ISFINITE(x) std::isfinite(x)
+#endif
#endif
diff --git a/src/KOKKOS/verlet_kokkos.cpp b/src/KOKKOS/verlet_kokkos.cpp
index 67e852a7d..527e10add 100644
--- a/src/KOKKOS/verlet_kokkos.cpp
+++ b/src/KOKKOS/verlet_kokkos.cpp
@@ -1,508 +1,508 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#include <string.h>
#include "verlet_kokkos.h"
#include "neighbor.h"
#include "domain.h"
#include "comm.h"
#include "atom.h"
#include "atom_kokkos.h"
#include "atom_masks.h"
#include "force.h"
#include "pair.h"
#include "bond.h"
#include "angle.h"
#include "dihedral.h"
#include "improper.h"
#include "kspace.h"
#include "output.h"
#include "update.h"
#include "modify.h"
#include "compute.h"
#include "fix.h"
#include "timer.h"
#include "memory.h"
#include "error.h"
#include <ctime>
using namespace LAMMPS_NS;
/* ---------------------------------------------------------------------- */
VerletKokkos::VerletKokkos(LAMMPS *lmp, int narg, char **arg) :
Verlet(lmp, narg, arg)
{
atomKK = (AtomKokkos *) atom;
}
/* ----------------------------------------------------------------------
setup before run
------------------------------------------------------------------------- */
void VerletKokkos::setup()
{
if (comm->me == 0 && screen) {
fprintf(screen,"Setting up Verlet run ...\n");
fprintf(screen," Unit style : %s\n", update->unit_style);
fprintf(screen," Current step: " BIGINT_FORMAT "\n", update->ntimestep);
fprintf(screen," Time step : %g\n", update->dt);
}
update->setupflag = 1;
// setup domain, communication and neighboring
// acquire ghosts
// build neighbor lists
atomKK->modified(Host,ALL_MASK);
atomKK->setup();
modify->setup_pre_exchange();
// debug
atomKK->sync(Host,ALL_MASK);
atomKK->modified(Host,ALL_MASK);
if (triclinic) domain->x2lamda(atomKK->nlocal);
domain->pbc();
atomKK->sync(Host,ALL_MASK);
domain->reset_box();
comm->setup();
if (neighbor->style) neighbor->setup_bins();
comm->exchange();
if (atomKK->sortfreq > 0) atomKK->sort();
comm->borders();
if (triclinic) domain->lamda2x(atomKK->nlocal+atomKK->nghost);
atomKK->sync(Host,ALL_MASK);
domain->image_check();
domain->box_too_small_check();
modify->setup_pre_neighbor();
atomKK->modified(Host,ALL_MASK);
neighbor->build();
neighbor->ncalls = 0;
// compute all forces
ev_set(update->ntimestep);
force_clear();
modify->setup_pre_force(vflag);
if (pair_compute_flag) {
atomKK->sync(force->pair->execution_space,force->pair->datamask_read);
atomKK->modified(force->pair->execution_space,force->pair->datamask_modify);
force->pair->compute(eflag,vflag);
timer->stamp(Timer::PAIR);
}
else if (force->pair) force->pair->compute_dummy(eflag,vflag);
if (atomKK->molecular) {
if (force->bond) {
atomKK->sync(force->bond->execution_space,force->bond->datamask_read);
atomKK->modified(force->bond->execution_space,force->bond->datamask_modify);
force->bond->compute(eflag,vflag);
}
if (force->angle) {
atomKK->sync(force->angle->execution_space,force->angle->datamask_read);
atomKK->modified(force->angle->execution_space,force->angle->datamask_modify);
force->angle->compute(eflag,vflag);
}
if (force->dihedral) {
atomKK->sync(force->dihedral->execution_space,force->dihedral->datamask_read);
atomKK->modified(force->dihedral->execution_space,force->dihedral->datamask_modify);
force->dihedral->compute(eflag,vflag);
}
if (force->improper) {
atomKK->sync(force->improper->execution_space,force->improper->datamask_read);
atomKK->modified(force->improper->execution_space,force->improper->datamask_modify);
force->improper->compute(eflag,vflag);
}
timer->stamp(Timer::BOND);
}
if(force->kspace) {
force->kspace->setup();
if (kspace_compute_flag) {
atomKK->sync(force->kspace->execution_space,force->kspace->datamask_read);
atomKK->modified(force->kspace->execution_space,force->kspace->datamask_modify);
force->kspace->compute(eflag,vflag);
timer->stamp(Timer::KSPACE);
} else force->kspace->compute_dummy(eflag,vflag);
}
if (force->newton) comm->reverse_comm();
modify->setup(vflag);
output->setup();
update->setupflag = 0;
}
/* ----------------------------------------------------------------------
setup without output
flag = 0 = just force calculation
flag = 1 = reneighbor and force calculation
------------------------------------------------------------------------- */
void VerletKokkos::setup_minimal(int flag)
{
update->setupflag = 1;
// setup domain, communication and neighboring
// acquire ghosts
// build neighbor lists
if (flag) {
atomKK->modified(Host,ALL_MASK);
modify->setup_pre_exchange();
// debug
atomKK->sync(Host,ALL_MASK);
atomKK->modified(Host,ALL_MASK);
if (triclinic) domain->x2lamda(atomKK->nlocal);
domain->pbc();
atomKK->sync(Host,ALL_MASK);
domain->reset_box();
comm->setup();
if (neighbor->style) neighbor->setup_bins();
comm->exchange();
comm->borders();
if (triclinic) domain->lamda2x(atomKK->nlocal+atomKK->nghost);
atomKK->sync(Host,ALL_MASK);
domain->image_check();
domain->box_too_small_check();
modify->setup_pre_neighbor();
atomKK->modified(Host,ALL_MASK);
neighbor->build();
neighbor->ncalls = 0;
}
// compute all forces
ev_set(update->ntimestep);
force_clear();
modify->setup_pre_force(vflag);
if (pair_compute_flag) {
atomKK->sync(force->pair->execution_space,force->pair->datamask_read);
atomKK->modified(force->pair->execution_space,force->pair->datamask_modify);
force->pair->compute(eflag,vflag);
timer->stamp(Timer::PAIR);
}
else if (force->pair) force->pair->compute_dummy(eflag,vflag);
if (atomKK->molecular) {
if (force->bond) {
atomKK->sync(force->bond->execution_space,force->bond->datamask_read);
atomKK->modified(force->bond->execution_space,force->bond->datamask_modify);
force->bond->compute(eflag,vflag);
}
if (force->angle) {
atomKK->sync(force->angle->execution_space,force->angle->datamask_read);
atomKK->modified(force->angle->execution_space,force->angle->datamask_modify);
force->angle->compute(eflag,vflag);
}
if (force->dihedral) {
atomKK->sync(force->dihedral->execution_space,force->dihedral->datamask_read);
atomKK->modified(force->dihedral->execution_space,force->dihedral->datamask_modify);
force->dihedral->compute(eflag,vflag);
}
if (force->improper) {
atomKK->sync(force->improper->execution_space,force->improper->datamask_read);
atomKK->modified(force->improper->execution_space,force->improper->datamask_modify);
force->improper->compute(eflag,vflag);
}
timer->stamp(Timer::BOND);
}
if(force->kspace) {
force->kspace->setup();
if (kspace_compute_flag) {
atomKK->sync(force->kspace->execution_space,force->kspace->datamask_read);
atomKK->modified(force->kspace->execution_space,force->kspace->datamask_modify);
force->kspace->compute(eflag,vflag);
timer->stamp(Timer::KSPACE);
} else force->kspace->compute_dummy(eflag,vflag);
}
if (force->newton) comm->reverse_comm();
modify->setup(vflag);
update->setupflag = 0;
}
/* ----------------------------------------------------------------------
run for N steps
------------------------------------------------------------------------- */
void VerletKokkos::run(int n)
{
bigint ntimestep;
int nflag,sortflag;
int n_post_integrate = modify->n_post_integrate;
int n_pre_exchange = modify->n_pre_exchange;
int n_pre_neighbor = modify->n_pre_neighbor;
int n_pre_force = modify->n_pre_force;
int n_post_force = modify->n_post_force;
int n_end_of_step = modify->n_end_of_step;
if (atomKK->sortfreq > 0) sortflag = 1;
else sortflag = 0;
static double time = 0.0;
static int count = 0;
atomKK->sync(Device,ALL_MASK);
Kokkos::Impl::Timer ktimer;
for (int i = 0; i < n; i++) {
ntimestep = ++update->ntimestep;
ev_set(ntimestep);
// initial time integration
ktimer.reset();
timer->stamp();
modify->initial_integrate(vflag);
time += ktimer.seconds();
if (n_post_integrate) modify->post_integrate();
timer->stamp(Timer::MODIFY);
// regular communication vs neighbor list rebuild
nflag = neighbor->decide();
if (nflag == 0) {
timer->stamp();
comm->forward_comm();
timer->stamp(Timer::COMM);
} else {
// added debug
//atomKK->sync(Host,ALL_MASK);
//atomKK->modified(Host,ALL_MASK);
if (n_pre_exchange) {
timer->stamp();
modify->pre_exchange();
timer->stamp(Timer::MODIFY);
}
// debug
//atomKK->sync(Host,ALL_MASK);
//atomKK->modified(Host,ALL_MASK);
if (triclinic) domain->x2lamda(atomKK->nlocal);
domain->pbc();
if (domain->box_change) {
domain->reset_box();
comm->setup();
if (neighbor->style) neighbor->setup_bins();
}
timer->stamp();
// added debug
//atomKK->sync(Device,ALL_MASK);
//atomKK->modified(Device,ALL_MASK);
comm->exchange();
if (sortflag && ntimestep >= atomKK->nextsort) atomKK->sort();
comm->borders();
// added debug
//atomKK->sync(Host,ALL_MASK);
//atomKK->modified(Host,ALL_MASK);
if (triclinic) domain->lamda2x(atomKK->nlocal+atomKK->nghost);
timer->stamp(Timer::COMM);
if (n_pre_neighbor) {
modify->pre_neighbor();
timer->stamp(Timer::MODIFY);
}
neighbor->build();
timer->stamp(Timer::NEIGH);
}
// force computations
// important for pair to come before bonded contributions
// since some bonded potentials tally pairwise energy/virial
// and Pair:ev_tally() needs to be called before any tallying
force_clear();
timer->stamp();
// added for debug
//atomKK->k_x.sync<LMPHostType>();
//atomKK->k_f.sync<LMPHostType>();
//atomKK->k_f.modify<LMPHostType>();
if (n_pre_force) {
modify->pre_force(vflag);
timer->stamp(Timer::MODIFY);
}
if (pair_compute_flag) {
atomKK->sync(force->pair->execution_space,force->pair->datamask_read);
atomKK->modified(force->pair->execution_space,force->pair->datamask_modify);
force->pair->compute(eflag,vflag);
timer->stamp(Timer::PAIR);
}
if (atomKK->molecular) {
if (force->bond) {
atomKK->sync(force->bond->execution_space,force->bond->datamask_read);
atomKK->modified(force->bond->execution_space,force->bond->datamask_modify);
force->bond->compute(eflag,vflag);
}
if (force->angle) {
atomKK->sync(force->angle->execution_space,force->angle->datamask_read);
atomKK->modified(force->angle->execution_space,force->angle->datamask_modify);
force->angle->compute(eflag,vflag);
}
if (force->dihedral) {
atomKK->sync(force->dihedral->execution_space,force->dihedral->datamask_read);
atomKK->modified(force->dihedral->execution_space,force->dihedral->datamask_modify);
force->dihedral->compute(eflag,vflag);
}
if (force->improper) {
atomKK->sync(force->improper->execution_space,force->improper->datamask_read);
atomKK->modified(force->improper->execution_space,force->improper->datamask_modify);
force->improper->compute(eflag,vflag);
}
timer->stamp(Timer::BOND);
}
if (kspace_compute_flag) {
atomKK->sync(force->kspace->execution_space,force->kspace->datamask_read);
atomKK->modified(force->kspace->execution_space,force->kspace->datamask_modify);
force->kspace->compute(eflag,vflag);
timer->stamp(Timer::KSPACE);
}
// reverse communication of forces
if (force->newton) comm->reverse_comm();
timer->stamp(Timer::COMM);
// force modifications, final time integration, diagnostics
ktimer.reset();
if (n_post_force) modify->post_force(vflag);
modify->final_integrate();
if (n_end_of_step) modify->end_of_step();
timer->stamp(Timer::MODIFY);
time += ktimer.seconds();
// all output
if (ntimestep == output->next) {
atomKK->sync(Host,ALL_MASK);
timer->stamp();
output->write(ntimestep);
timer->stamp(Timer::OUTPUT);
}
}
}
/* ----------------------------------------------------------------------
clear force on own & ghost atoms
clear other arrays as needed
------------------------------------------------------------------------- */
void VerletKokkos::force_clear()
{
int i;
if (external_force_clear) return;
// clear force on all particles
// if either newton flag is set, also include ghosts
// when using threads always clear all forces.
if (neighbor->includegroup == 0) {
int nall;
if (force->newton) nall = atomKK->nlocal + atomKK->nghost;
else nall = atomKK->nlocal;
size_t nbytes = sizeof(double) * nall;
if (nbytes) {
- if (atomKK->k_f.modified_host > atomKK->k_f.modified_device) {
+ if (atomKK->k_f.modified_host() > atomKK->k_f.modified_device()) {
memset_kokkos(atomKK->k_f.view<LMPHostType>());
atomKK->modified(Host,F_MASK);
} else {
memset_kokkos(atomKK->k_f.view<LMPDeviceType>());
atomKK->modified(Device,F_MASK);
}
if (torqueflag) memset(&(atomKK->torque[0][0]),0,3*nbytes);
}
// neighbor includegroup flag is set
// clear force only on initial nfirst particles
// if either newton flag is set, also include ghosts
} else {
int nall = atomKK->nfirst;
- if (atomKK->k_f.modified_host > atomKK->k_f.modified_device) {
+ if (atomKK->k_f.modified_host() > atomKK->k_f.modified_device()) {
memset_kokkos(atomKK->k_f.view<LMPHostType>());
atomKK->modified(Host,F_MASK);
} else {
memset_kokkos(atomKK->k_f.view<LMPDeviceType>());
atomKK->modified(Device,F_MASK);
}
if (torqueflag) {
double **torque = atomKK->torque;
for (i = 0; i < nall; i++) {
torque[i][0] = 0.0;
torque[i][1] = 0.0;
torque[i][2] = 0.0;
}
}
if (force->newton) {
nall = atomKK->nlocal + atomKK->nghost;
if (torqueflag) {
double **torque = atomKK->torque;
for (i = atomKK->nlocal; i < nall; i++) {
torque[i][0] = 0.0;
torque[i][1] = 0.0;
torque[i][2] = 0.0;
}
}
}
}
}
diff --git a/src/KSPACE/msm.cpp b/src/KSPACE/msm.cpp
index 92ed86338..7341688bb 100644
--- a/src/KSPACE/msm.cpp
+++ b/src/KSPACE/msm.cpp
@@ -1,3404 +1,3404 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing authors: Paul Crozier, Stan Moore, Stephen Bond, (all SNL)
------------------------------------------------------------------------- */
#include <mpi.h>
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include "msm.h"
#include "atom.h"
#include "comm.h"
#include "gridcomm.h"
#include "neighbor.h"
#include "force.h"
#include "pair.h"
#include "domain.h"
#include "memory.h"
#include "error.h"
#include "math_const.h"
using namespace LAMMPS_NS;
using namespace MathConst;
#define MAX_LEVELS 10
#define OFFSET 16384
#define SMALL 0.00001
enum{REVERSE_RHO,REVERSE_AD,REVERSE_AD_PERATOM};
enum{FORWARD_RHO,FORWARD_AD,FORWARD_AD_PERATOM};
/* ---------------------------------------------------------------------- */
MSM::MSM(LAMMPS *lmp, int narg, char **arg) : KSpace(lmp, narg, arg)
{
if (narg < 1) error->all(FLERR,"Illegal kspace_style msm command");
msmflag = 1;
accuracy_relative = fabs(force->numeric(FLERR,arg[0]));
nfactors = 1;
factors = new int[nfactors];
factors[0] = 2;
MPI_Comm_rank(world,&me);
procneigh_levels = NULL;
world_levels = NULL;
active_flag = NULL;
phi1d = dphi1d = NULL;
nmax = 0;
part2grid = NULL;
g_direct = NULL;
g_direct_top = NULL;
v0_direct = v1_direct = v2_direct = NULL;
v3_direct = v4_direct = v5_direct = NULL;
v0_direct_top = v1_direct_top = v2_direct_top = NULL;
v3_direct_top = v4_direct_top = v5_direct_top = NULL;
cg_all = cg_peratom_all = NULL;
cg = cg_peratom = NULL;
ngrid = NULL;
cg = NULL;
cg_peratom = NULL;
procneigh_levels = NULL;
world_levels = NULL;
active_flag = NULL;
alpha = betax = betay = betaz = NULL;
nx_msm = ny_msm = nz_msm = NULL;
nxlo_in = nylo_in = nzlo_in = NULL;
nxhi_in = nyhi_in = nzhi_in = NULL;
nxlo_out = nylo_out = nzlo_out = NULL;
nxhi_out = nyhi_out = nzhi_out = NULL;
delxinv = delyinv = delzinv = NULL;
qgrid = NULL;
egrid = NULL;
v0grid = v1grid = v2grid = v3grid = v4grid = v5grid = NULL;
levels = 0;
peratom_allocate_flag = 0;
scalar_pressure_flag = 1;
warn_nonneutral = 0;
order = 10;
}
/* ----------------------------------------------------------------------
free all memory
------------------------------------------------------------------------- */
MSM::~MSM()
{
delete [] factors;
deallocate();
if (peratom_allocate_flag) deallocate_peratom();
memory->destroy(part2grid);
memory->destroy(g_direct);
memory->destroy(g_direct_top);
memory->destroy(v0_direct);
memory->destroy(v1_direct);
memory->destroy(v2_direct);
memory->destroy(v3_direct);
memory->destroy(v4_direct);
memory->destroy(v5_direct);
memory->destroy(v0_direct_top);
memory->destroy(v1_direct_top);
memory->destroy(v2_direct_top);
memory->destroy(v3_direct_top);
memory->destroy(v4_direct_top);
memory->destroy(v5_direct_top);
deallocate_levels();
}
/* ----------------------------------------------------------------------
called once before run
------------------------------------------------------------------------- */
void MSM::init()
{
if (me == 0) {
if (screen) fprintf(screen,"MSM initialization ...\n");
if (logfile) fprintf(logfile,"MSM initialization ...\n");
}
// error check
triclinic_check();
if (domain->dimension == 2)
error->all(FLERR,"Cannot (yet) use MSM with 2d simulation");
if (comm->style != 0)
error->universe_all(FLERR,"MSM can only currently be used with "
"comm_style brick");
if (!atom->q_flag) error->all(FLERR,"Kspace style requires atom attribute q");
if ((slabflag == 1) && (me == 0))
error->warning(FLERR,"Slab correction not needed for MSM");
if (order < 4 || order > 10) {
char str[128];
sprintf(str,"MSM order must be 4, 6, 8, or 10");
error->all(FLERR,str);
}
if (order%2 != 0) error->all(FLERR,"MSM order must be 4, 6, 8, or 10");
if (sizeof(FFT_SCALAR) != 8)
error->all(FLERR,"Cannot (yet) use single precision with MSM "
"(remove -DFFT_SINGLE from Makefile and recompile)");
// extract short-range Coulombic cutoff from pair style
triclinic = domain->triclinic;
pair_check();
int itmp;
double *p_cutoff = (double *) force->pair->extract("cut_coul",itmp);
if (p_cutoff == NULL)
error->all(FLERR,"KSpace style is incompatible with Pair style");
cutoff = *p_cutoff;
// compute qsum & qsqsum and error if not charge-neutral
scale = 1.0;
qqrd2e = force->qqrd2e;
qsum_qsq();
natoms_original = atom->natoms;
// set accuracy (force units) from accuracy_relative or accuracy_absolute
if (accuracy_absolute >= 0.0) accuracy = accuracy_absolute;
else accuracy = accuracy_relative * two_charge_force;
// setup MSM grid resolution
set_grid_global();
setup();
double estimated_error = estimate_total_error();
// output grid stats
int ngrid_max;
MPI_Allreduce(&ngrid[0],&ngrid_max,1,MPI_INT,MPI_MAX,world);
if (me == 0) {
if (screen) {
fprintf(screen," 3d grid size/proc = %d\n",
ngrid_max);
fprintf(screen," estimated absolute RMS force accuracy = %g\n",
estimated_error);
fprintf(screen," estimated relative force accuracy = %g\n",
estimated_error/two_charge_force);
}
if (logfile) {
fprintf(logfile," 3d grid size/proc = %d\n",
ngrid_max);
fprintf(logfile," estimated absolute RMS force accuracy = %g\n",
estimated_error);
fprintf(logfile," estimated relative force accuracy = %g\n",
estimated_error/two_charge_force);
}
}
if (me == 0) {
if (screen) {
fprintf(screen," grid = %d %d %d\n",nx_msm[0],ny_msm[0],nz_msm[0]);
fprintf(screen," order = %d\n",order);
}
if (logfile) {
fprintf(logfile," grid = %d %d %d\n",nx_msm[0],ny_msm[0],nz_msm[0]);
fprintf(logfile," order = %d\n",order);
}
}
}
/* ----------------------------------------------------------------------
estimate 1d grid RMS force error for MSM
------------------------------------------------------------------------- */
double MSM::estimate_1d_error(double h, double prd)
{
double a = cutoff;
int p = order - 1;
double Mp,cprime,error_scaling;
Mp = cprime = error_scaling = 1;
// Mp values from Table 5.1 of Hardy's thesis
// cprime values from equation 4.17 of Hardy's thesis
// error scaling from empirical fitting to convert to rms force errors
if (p == 3) {
Mp = 9;
cprime = 1.0/6.0;
error_scaling = 0.39189561;
} else if (p == 5) {
Mp = 825;
cprime = 1.0/30.0;
error_scaling = 0.150829428;
} else if (p == 7) {
Mp = 130095;
cprime = 1.0/140.0;
error_scaling = 0.049632967;
} else if (p == 9) {
Mp = 34096545;
cprime = 1.0/630.0;
error_scaling = 0.013520855;
} else {
error->all(FLERR,"MSM order must be 4, 6, 8, or 10");
}
// equation 4.1 from Hardy's thesis
C_p = 4.0*cprime*Mp/3.0;
// use empirical parameters to convert to rms force errors
C_p *= error_scaling;
// equation 3.197 from Hardy's thesis
double error_1d = C_p*pow(h,(p-1))/pow(a,(p+1));
// include dependency of error on other terms
error_1d *= q2*a/(prd*sqrt(double(atom->natoms)));
return error_1d;
}
/* ----------------------------------------------------------------------
estimate 3d grid RMS force error
------------------------------------------------------------------------- */
double MSM::estimate_3d_error()
{
double xprd = domain->xprd;
double yprd = domain->yprd;
double zprd = domain->zprd;
double error_x = estimate_1d_error(h_x,xprd);
double error_y = estimate_1d_error(h_y,yprd);
double error_z = estimate_1d_error(h_z,zprd);
double error_3d =
sqrt(error_x*error_x + error_y*error_y + error_z*error_z) / sqrt(3.0);
return error_3d;
}
/* ----------------------------------------------------------------------
estimate total RMS force error
------------------------------------------------------------------------- */
double MSM::estimate_total_error()
{
double xprd = domain->xprd;
double yprd = domain->yprd;
double zprd = domain->zprd;
bigint natoms = atom->natoms;
double grid_error = estimate_3d_error();
double q2_over_sqrt = q2 / sqrt(natoms*cutoff*xprd*yprd*zprd);
double short_range_error = 0.0;
double table_error =
estimate_table_accuracy(q2_over_sqrt,short_range_error);
double estimated_total_error = sqrt(grid_error*grid_error +
short_range_error*short_range_error + table_error*table_error);
return estimated_total_error;
}
/* ----------------------------------------------------------------------
adjust MSM coeffs, called initially and whenever volume has changed
------------------------------------------------------------------------- */
void MSM::setup()
{
double *prd;
double a = cutoff;
// volume-dependent factors
prd = domain->prd;
double xprd = prd[0];
double yprd = prd[1];
double zprd = prd[2];
volume = xprd * yprd * zprd;
// loop over grid levels and compute grid spacing
for (int n=0; n<levels; n++) {
if (triclinic == 0) {
delxinv[n] = nx_msm[n]/xprd;
delyinv[n] = ny_msm[n]/yprd;
delzinv[n] = nz_msm[n]/zprd;
} else { // use lamda (0-1) coordinates
delxinv[n] = nx_msm[n];
delyinv[n] = ny_msm[n];
delzinv[n] = nz_msm[n];
}
}
double ax = a;
double ay = a;
double az = a;
// transform the interaction sphere in box coords to an
// ellipsoid in lamda (0-1) coords to
// get the direct sum interaction limits for a triclinic system
if (triclinic) {
double tmp[3];
kspacebbox(a,&tmp[0]);
ax = tmp[0];
ay = tmp[1];
az = tmp[2];
}
// direct sum interaction limits
nxhi_direct = static_cast<int> (2.0*ax*delxinv[0]);
nxlo_direct = -nxhi_direct;
nyhi_direct = static_cast<int> (2.0*ay*delyinv[0]);
nylo_direct = -nyhi_direct;
nzhi_direct = static_cast<int> (2.0*az*delzinv[0]);
nzlo_direct = -nzhi_direct;
nmax_direct = 8*(nxhi_direct+1)*(nyhi_direct+1)*(nzhi_direct+1);
deallocate();
if (peratom_allocate_flag) deallocate_peratom();
// compute direct sum interaction weights
if (!peratom_allocate_flag) { // Timestep 0
get_g_direct();
get_virial_direct();
if (domain->nonperiodic) {
get_g_direct_top(levels-1);
get_virial_direct_top(levels-1);
}
} else {
get_g_direct();
if (domain->nonperiodic) get_g_direct_top(levels-1);
if (vflag_either && !scalar_pressure_flag) {
get_virial_direct();
if (domain->nonperiodic) get_virial_direct_top(levels-1);
}
}
if (!triclinic)
boxlo = domain->boxlo;
else
boxlo = domain->boxlo_lamda;
// ghost grid points depend on direct sum interaction limits,
// so need to recompute local grid
set_grid_local();
// allocate K-space dependent memory
// don't invoke allocate_peratom(), compute() will allocate when needed
allocate();
// setup commgrid
cg_all->ghost_notify();
cg_all->setup();
for (int n=0; n<levels; n++) {
if (!active_flag[n]) continue;
cg[n]->ghost_notify();
cg[n]->setup();
}
}
/* ----------------------------------------------------------------------
compute the MSM long-range force, energy, virial
------------------------------------------------------------------------- */
void MSM::compute(int eflag, int vflag)
{
int i,j;
// set energy/virial flags
if (eflag || vflag) ev_setup(eflag,vflag);
else evflag = evflag_atom = eflag_global = vflag_global =
eflag_atom = vflag_atom = eflag_either = vflag_either = 0;
if (scalar_pressure_flag && vflag_either) {
if (vflag_atom)
error->all(FLERR,"Must use 'kspace_modify pressure/scalar no' to obtain "
"per-atom virial with kspace_style MSM");
// must switch on global energy computation if not already on
if (eflag == 0 || eflag == 2) {
eflag++;
ev_setup(eflag,vflag);
}
}
// if atom count has changed, update qsum and qsqsum
if (atom->natoms != natoms_original) {
qsum_qsq();
natoms_original = atom->natoms;
}
// return if there are no charges
if (qsqsum == 0.0) return;
// invoke allocate_peratom() if needed for first time
if (vflag_atom && !peratom_allocate_flag) {
allocate_peratom();
cg_peratom_all->ghost_notify();
cg_peratom_all->setup();
for (int n=0; n<levels; n++) {
if (!active_flag[n]) continue;
cg_peratom[n]->ghost_notify();
cg_peratom[n]->setup();
}
}
// convert atoms from box to lamda coords
if (triclinic)
domain->x2lamda(atom->nlocal);
// extend size of per-atom arrays if necessary
if (atom->nlocal > nmax) {
memory->destroy(part2grid);
nmax = atom->nmax;
memory->create(part2grid,nmax,3,"msm:part2grid");
}
// find grid points for all my particles
// map my particle charge onto my local 3d density grid (aninterpolation)
particle_map();
make_rho();
// all procs reverse communicate charge density values from
// their ghost grid points
// to fully sum contribution in their 3d grid
current_level = 0;
cg_all->reverse_comm(this,REVERSE_RHO);
// forward communicate charge density values to fill ghost grid points
// compute direct sum interaction and then restrict to coarser grid
for (int n=0; n<=levels-2; n++) {
if (!active_flag[n]) continue;
current_level = n;
cg[n]->forward_comm(this,FORWARD_RHO);
direct(n);
restriction(n);
}
// compute direct interation for top grid level for nonperiodic
// and for second from top grid level for periodic
if (active_flag[levels-1]) {
if (domain->nonperiodic) {
current_level = levels-1;
cg[levels-1]->forward_comm(this,FORWARD_RHO);
direct_top(levels-1);
cg[levels-1]->reverse_comm(this,REVERSE_AD);
if (vflag_atom)
cg_peratom[levels-1]->reverse_comm(this,REVERSE_AD_PERATOM);
} else {
// Here using MPI_Allreduce is cheaper than using commgrid
grid_swap_forward(levels-1,qgrid[levels-1]);
direct(levels-1);
grid_swap_reverse(levels-1,egrid[levels-1]);
current_level = levels-1;
if (vflag_atom)
cg_peratom[levels-1]->reverse_comm(this,REVERSE_AD_PERATOM);
}
}
// prolongate energy/virial from coarser grid to finer grid
// reverse communicate from ghost grid points to get full sum
for (int n=levels-2; n>=0; n--) {
if (!active_flag[n]) continue;
prolongation(n);
current_level = n;
cg[n]->reverse_comm(this,REVERSE_AD);
// extra per-atom virial communication
if (vflag_atom)
cg_peratom[n]->reverse_comm(this,REVERSE_AD_PERATOM);
}
// all procs communicate E-field values
// to fill ghost cells surrounding their 3d bricks
current_level = 0;
cg_all->forward_comm(this,FORWARD_AD);
// extra per-atom energy/virial communication
if (vflag_atom)
cg_peratom_all->forward_comm(this,FORWARD_AD_PERATOM);
// calculate the force on my particles (interpolation)
fieldforce();
// calculate the per-atom energy/virial for my particles
if (evflag_atom) fieldforce_peratom();
// sum global energy across procs and add in self-energy term
const double qscale = qqrd2e * scale;
if (eflag_global) {
double energy_all;
MPI_Allreduce(&energy,&energy_all,1,MPI_DOUBLE,MPI_SUM,world);
energy = energy_all;
double e_self = qsqsum*gamma(0.0)/cutoff;
energy -= e_self;
energy *= 0.5*qscale;
}
// total long-range virial
if (vflag_global && !scalar_pressure_flag) {
double virial_all[6];
MPI_Allreduce(virial,virial_all,6,MPI_DOUBLE,MPI_SUM,world);
for (i = 0; i < 6; i++) virial[i] = 0.5*qscale*virial_all[i];
}
// fast compute of scalar pressure (if requested)
if (scalar_pressure_flag && vflag_global)
for (i = 0; i < 3; i++) virial[i] = energy/3.0;
// per-atom energy/virial
// energy includes self-energy correction
if (evflag_atom) {
double *q = atom->q;
int nlocal = atom->nlocal;
if (eflag_atom) {
for (i = 0; i < nlocal; i++) {
eatom[i] -= q[i]*q[i]*gamma(0.0)/cutoff;
eatom[i] *= 0.5*qscale;
}
}
if (vflag_atom) {
for (i = 0; i < nlocal; i++)
for (j = 0; j < 6; j++) vatom[i][j] *= 0.5*qscale;
}
}
// convert atoms back from lamda to box coords
if (triclinic)
domain->lamda2x(atom->nlocal);
}
/* ----------------------------------------------------------------------
allocate memory that depends on # of grid points
------------------------------------------------------------------------- */
void MSM::allocate()
{
// interpolation coeffs
order_allocated = order;
memory->create2d_offset(phi1d,3,-order,order,"msm:phi1d");
memory->create2d_offset(dphi1d,3,-order,order,"msm:dphi1d");
// commgrid using all processors for finest grid level
int (*procneigh_all)[2] = comm->procneigh;
cg_all = new GridComm(lmp,world,1,1,
nxlo_in[0],nxhi_in[0],nylo_in[0],nyhi_in[0],nzlo_in[0],nzhi_in[0],
nxlo_out_all,nxhi_out_all,nylo_out_all,nyhi_out_all,nzlo_out_all,nzhi_out_all,
nxlo_out[0],nxhi_out[0],nylo_out[0],nyhi_out[0],nzlo_out[0],nzhi_out[0],
procneigh_all[0][0],procneigh_all[0][1],procneigh_all[1][0],
procneigh_all[1][1],procneigh_all[2][0],procneigh_all[2][1]);
// allocate memory for each grid level
for (int n=0; n<levels; n++) {
memory->create3d_offset(qgrid[n],nzlo_out[n],nzhi_out[n],
nylo_out[n],nyhi_out[n],nxlo_out[n],nxhi_out[n],"msm:qgrid");
memory->create3d_offset(egrid[n],nzlo_out[n],nzhi_out[n],
nylo_out[n],nyhi_out[n],nxlo_out[n],nxhi_out[n],"msm:egrid");
// create commgrid object for rho and electric field communication
if (active_flag[n]) {
int **procneigh = procneigh_levels[n];
cg[n] = new GridComm(lmp,world_levels[n],1,1,
nxlo_in[n],nxhi_in[n],nylo_in[n],nyhi_in[n],nzlo_in[n],nzhi_in[n],
nxlo_out[n],nxhi_out[n],nylo_out[n],nyhi_out[n],nzlo_out[n],nzhi_out[n],
procneigh[0][0],procneigh[0][1],procneigh[1][0],
procneigh[1][1],procneigh[2][0],procneigh[2][1]);
}
}
}
/* ----------------------------------------------------------------------
deallocate memory that depends on # of grid points
------------------------------------------------------------------------- */
void MSM::deallocate()
{
memory->destroy2d_offset(phi1d,-order_allocated);
memory->destroy2d_offset(dphi1d,-order_allocated);
if (cg_all) delete cg_all;
for (int n=0; n<levels; n++) {
if (qgrid[n])
memory->destroy3d_offset(qgrid[n],nzlo_out[n],nylo_out[n],nxlo_out[n]);
if (egrid[n])
memory->destroy3d_offset(egrid[n],nzlo_out[n],nylo_out[n],nxlo_out[n]);
if (world_levels)
if (world_levels[n] != MPI_COMM_NULL)
MPI_Comm_free(&world_levels[n]);
if (cg)
if (cg[n]) delete cg[n];
}
}
/* ----------------------------------------------------------------------
allocate per-atom virial memory that depends on # of grid points
------------------------------------------------------------------------- */
void MSM::allocate_peratom()
{
peratom_allocate_flag = 1;
// create commgrid object for per-atom virial using all processors
int (*procneigh_all)[2] = comm->procneigh;
cg_peratom_all =
new GridComm(lmp,world,6,6,
nxlo_in[0],nxhi_in[0],nylo_in[0],nyhi_in[0],nzlo_in[0],nzhi_in[0],
nxlo_out_all,nxhi_out_all,nylo_out_all,nyhi_out_all,nzlo_out_all,nzhi_out_all,
nxlo_out[0],nxhi_out[0],nylo_out[0],nyhi_out[0],nzlo_out[0],nzhi_out[0],
procneigh_all[0][0],procneigh_all[0][1],procneigh_all[1][0],
procneigh_all[1][1],procneigh_all[2][0],procneigh_all[2][1]);
// allocate memory for each grid level
for (int n=0; n<levels; n++) {
memory->create3d_offset(v0grid[n],nzlo_out[n],nzhi_out[n],
nylo_out[n],nyhi_out[n],nxlo_out[n],nxhi_out[n],"msm:v0grid");
memory->create3d_offset(v1grid[n],nzlo_out[n],nzhi_out[n],
nylo_out[n],nyhi_out[n],nxlo_out[n],nxhi_out[n],"msm:v1grid");
memory->create3d_offset(v2grid[n],nzlo_out[n],nzhi_out[n],
nylo_out[n],nyhi_out[n],nxlo_out[n],nxhi_out[n],"msm:v2grid");
memory->create3d_offset(v3grid[n],nzlo_out[n],nzhi_out[n],
nylo_out[n],nyhi_out[n],nxlo_out[n],nxhi_out[n],"msm:v3grid");
memory->create3d_offset(v4grid[n],nzlo_out[n],nzhi_out[n],
nylo_out[n],nyhi_out[n],nxlo_out[n],nxhi_out[n],"msm:v4grid");
memory->create3d_offset(v5grid[n],nzlo_out[n],nzhi_out[n],
nylo_out[n],nyhi_out[n],nxlo_out[n],nxhi_out[n],"msm:v5grid");
// create commgrid object for per-atom virial
if (active_flag[n]) {
int **procneigh = procneigh_levels[n];
cg_peratom[n] =
new GridComm(lmp,world_levels[n],6,6,
nxlo_in[n],nxhi_in[n],nylo_in[n],nyhi_in[n],nzlo_in[n],nzhi_in[n],
nxlo_out[n],nxhi_out[n],nylo_out[n],nyhi_out[n],nzlo_out[n],nzhi_out[n],
procneigh[0][0],procneigh[0][1],procneigh[1][0],
procneigh[1][1],procneigh[2][0],procneigh[2][1]);
}
}
}
/* ----------------------------------------------------------------------
deallocate per-atom virial memory that depends on # of grid points
------------------------------------------------------------------------- */
void MSM::deallocate_peratom()
{
peratom_allocate_flag = 0;
if (cg_peratom_all) delete cg_peratom_all;
for (int n=0; n<levels; n++) {
if (v0grid[n])
memory->destroy3d_offset(v0grid[n],nzlo_out[n],nylo_out[n],nxlo_out[n]);
if (v1grid[n])
memory->destroy3d_offset(v1grid[n],nzlo_out[n],nylo_out[n],nxlo_out[n]);
if (v2grid[n])
memory->destroy3d_offset(v2grid[n],nzlo_out[n],nylo_out[n],nxlo_out[n]);
if (v3grid[n])
memory->destroy3d_offset(v3grid[n],nzlo_out[n],nylo_out[n],nxlo_out[n]);
if (v4grid[n])
memory->destroy3d_offset(v4grid[n],nzlo_out[n],nylo_out[n],nxlo_out[n]);
if (v5grid[n])
memory->destroy3d_offset(v5grid[n],nzlo_out[n],nylo_out[n],nxlo_out[n]);
if (cg_peratom)
if (cg_peratom[n]) delete cg_peratom[n];
}
}
/* ----------------------------------------------------------------------
allocate memory that depends on # of grid levels
------------------------------------------------------------------------- */
void MSM::allocate_levels()
{
ngrid = new int[levels];
cg = new GridComm*[levels];
cg_peratom = new GridComm*[levels];
memory->create(procneigh_levels,levels,3,2,"msm:procneigh_levels");
world_levels = new MPI_Comm[levels];
active_flag = new int[levels];
alpha = new int[levels];
betax = new int[levels];
betay = new int[levels];
betaz = new int[levels];
nx_msm = new int[levels];
ny_msm = new int[levels];
nz_msm = new int[levels];
nxlo_in = new int[levels];
nylo_in = new int[levels];
nzlo_in = new int[levels];
nxhi_in = new int[levels];
nyhi_in = new int[levels];
nzhi_in = new int[levels];
nxlo_out = new int[levels];
nylo_out = new int[levels];
nzlo_out = new int[levels];
nxhi_out = new int[levels];
nyhi_out = new int[levels];
nzhi_out = new int[levels];
delxinv = new double[levels];
delyinv = new double[levels];
delzinv = new double[levels];
qgrid = new double***[levels];
egrid = new double***[levels];
v0grid = new double***[levels];
v1grid = new double***[levels];
v2grid = new double***[levels];
v3grid = new double***[levels];
v4grid = new double***[levels];
v5grid = new double***[levels];
for (int n=0; n<levels; n++) {
cg[n] = NULL;
world_levels[n] = MPI_COMM_NULL;
cg_peratom[n] = NULL;
qgrid[n] = NULL;
egrid[n] = NULL;
v0grid[n] = NULL;
v1grid[n] = NULL;
v2grid[n] = NULL;
v3grid[n] = NULL;
v4grid[n] = NULL;
v5grid[n] = NULL;
}
}
/* ----------------------------------------------------------------------
deallocate memory that depends on # of grid levels
------------------------------------------------------------------------- */
void MSM::deallocate_levels()
{
delete [] ngrid;
memory->destroy(procneigh_levels);
delete [] world_levels;
delete [] active_flag;
delete [] cg;
delete [] cg_peratom;
delete [] alpha;
delete [] betax;
delete [] betay;
delete [] betaz;
delete [] nx_msm;
delete [] ny_msm;
delete [] nz_msm;
delete [] nxlo_in;
delete [] nylo_in;
delete [] nzlo_in;
delete [] nxhi_in;
delete [] nyhi_in;
delete [] nzhi_in;
delete [] nxlo_out;
delete [] nylo_out;
delete [] nzlo_out;
delete [] nxhi_out;
delete [] nyhi_out;
delete [] nzhi_out;
delete [] delxinv;
delete [] delyinv;
delete [] delzinv;
delete [] qgrid;
delete [] egrid;
delete [] v0grid;
delete [] v1grid;
delete [] v2grid;
delete [] v3grid;
delete [] v4grid;
delete [] v5grid;
}
/* ----------------------------------------------------------------------
set total size of MSM grids
------------------------------------------------------------------------- */
void MSM::set_grid_global()
{
if (accuracy_relative <= 0.0)
error->all(FLERR,"KSpace accuracy must be > 0");
double xprd = domain->xprd;
double yprd = domain->yprd;
double zprd = domain->zprd;
int nx_max,ny_max,nz_max;
double hx,hy,hz;
if (adjust_cutoff_flag && !gridflag) {
// seek to choose optimal Coulombic cutoff and number of grid levels
// (based on a cost estimate in Hardy's thesis)
int p = order - 1;
double hmin = 3072.0*(p+1)/(p-1)/
(448.0*MY_PI + 56.0*MY_PI*order/2 + 1701.0);
hmin = pow(hmin,1.0/6.0)*pow(xprd*yprd*zprd/atom->natoms,1.0/3.0);
nx_max = static_cast<int>(xprd/hmin);
ny_max = static_cast<int>(yprd/hmin);
nz_max = static_cast<int>(zprd/hmin);
nx_max = MAX(nx_max,2);
ny_max = MAX(ny_max,2);
nz_max = MAX(nz_max,2);
} else if (!gridflag) {
// Coulombic cutoff is set by user, choose grid to give requested error
nx_max = ny_max = nz_max = 2;
hx = xprd/nx_max;
hy = yprd/ny_max;
hz = zprd/nz_max;
double x_error = 2.0*accuracy;
double y_error = 2.0*accuracy;
double z_error = 2.0*accuracy;
while (x_error > accuracy) {
nx_max *= 2;
hx = xprd/nx_max;
x_error = estimate_1d_error(hx,xprd);
}
while (y_error > accuracy) {
ny_max *= 2;
hy = yprd/ny_max;
y_error = estimate_1d_error(hy,yprd);
}
while (z_error > accuracy) {
nz_max *= 2;
hz = zprd/nz_max;
z_error = estimate_1d_error(hz,zprd);
}
} else {
// cutoff and grid are set by user
nx_max = nx_msm_max;
ny_max = ny_msm_max;
nz_max = nz_msm_max;
}
// scale grid for triclinic skew
if (triclinic && !gridflag) {
double tmp[3];
tmp[0] = nx_max/xprd;
tmp[1] = ny_max/yprd;
tmp[2] = nz_max/zprd;
lamda2xT(&tmp[0],&tmp[0]);
nx_max = static_cast<int>(tmp[0]);
ny_max = static_cast<int>(tmp[1]);
nz_max = static_cast<int>(tmp[2]);
}
// boost grid size until it is factorable by 2
// round up or down, depending on which is closer
int flag = 0;
int xlevels,ylevels,zlevels;
while (!factorable(nx_max,flag,xlevels)) {
double k = log(nx_max)/log(2.0);
double r = k - floor(k);
if (r > 0.5) nx_max++;
else nx_max--;
}
while (!factorable(ny_max,flag,ylevels)) {
double k = log(ny_max)/log(2.0);
double r = k - floor(k);
if (r > 0.5) ny_max++;
else ny_max--;
}
while (!factorable(nz_max,flag,zlevels)) {
double k = log(nz_max)/log(2.0);
double r = k - floor(k);
if (r > 0.5) nz_max++;
else nz_max--;
}
if (flag && gridflag && me == 0)
error->warning(FLERR,
"Number of MSM mesh points changed to be a multiple of 2");
// adjust Coulombic cutoff to give desired error (if requested)
if (adjust_cutoff_flag) {
hx = xprd/nx_max;
hy = yprd/ny_max;
hz = zprd/nz_max;
int p = order - 1;
double Lx2 = xprd*xprd;
double Ly2 = yprd*yprd;
double Lz2 = zprd*zprd;
double hx2pm2 = pow(hx,2.0*p-2.0);
double hy2pm2 = pow(hy,2.0*p-2.0);
double hz2pm2 = pow(hz,2.0*p-2.0);
estimate_1d_error(1.0,1.0); // make sure that C_p is defined
double k = q2*C_p/accuracy/sqrt(double(atom->natoms));
double sum = hx2pm2/Lx2 + hy2pm2/Ly2 + hz2pm2/Lz2;
cutoff = pow(k*k*sum/3.0,1.0/(2.0*p));
int itmp;
double *p_cutoff = (double *) force->pair->extract("cut_coul",itmp);
*p_cutoff = cutoff;
char str[128];
sprintf(str,"Adjusting Coulombic cutoff for MSM, new cutoff = %g",cutoff);
if (me == 0) error->warning(FLERR,str);
}
if (triclinic == 0) {
h_x = xprd/nx_max;
h_y = yprd/ny_max;
h_z = zprd/nz_max;
} else {
double tmp[3];
tmp[0] = nx_max;
tmp[1] = ny_max;
tmp[2] = nz_max;
x2lamdaT(&tmp[0],&tmp[0]);
h_x = 1.0/tmp[0];
h_y = 1.0/tmp[1];
h_z = 1.0/tmp[2];
}
// find maximum number of levels
levels = MAX(xlevels,ylevels);
levels = MAX(levels,zlevels);
if (levels > MAX_LEVELS) error->all(FLERR,"Too many MSM grid levels");
// need at least 2 MSM levels for periodic systems
if (levels <= 1) {
levels = xlevels = ylevels = zlevels = 2;
nx_max = ny_max = nz_max = 2;
if (gridflag)
error->warning(FLERR,
"MSM mesh too small, increasing to 2 points in each direction");
}
// omit top grid level for periodic systems
if (!domain->nonperiodic) levels -= 1;
deallocate_levels();
allocate_levels();
// find number of grid levels in each direction
for (int n = 0; n < levels; n++) {
if (xlevels-n-1 > 0)
nx_msm[n] = static_cast<int> (pow(2.0,xlevels-n-1));
else
nx_msm[n] = 1;
if (ylevels-n-1 > 0)
ny_msm[n] = static_cast<int> (pow(2.0,ylevels-n-1));
else
ny_msm[n] = 1;
if (zlevels-n-1 > 0)
nz_msm[n] = static_cast<int> (pow(2.0,zlevels-n-1));
else
nz_msm[n] = 1;
}
if (nx_msm[0] >= OFFSET || ny_msm[0] >= OFFSET || nz_msm[0] >= OFFSET)
error->all(FLERR,"MSM grid is too large");
// compute number of extra grid points needed for nonperiodic boundary conditions
if (domain->nonperiodic) {
alpha[0] = -(order/2 - 1);
betax[0] = nx_msm[0] + (order/2 - 1);
betay[0] = ny_msm[0] + (order/2 - 1);
betaz[0] = nz_msm[0] + (order/2 - 1);
for (int n = 1; n < levels; n++) {
alpha[n] = -((-alpha[n-1]+1)/2) - (order/2 - 1);
betax[n] = ((betax[n-1]+1)/2) + (order/2 - 1);
betay[n] = ((betay[n-1]+1)/2) + (order/2 - 1);
betaz[n] = ((betaz[n-1]+1)/2) + (order/2 - 1);
}
}
if (domain->nonperiodic) {
alpha[0] = -(order/2 - 1);
betax[0] = nx_msm[0] + (order/2 - 1);
betay[0] = ny_msm[0] + (order/2 - 1);
betaz[0] = nz_msm[0] + (order/2 - 1);
for (int n = 1; n < levels; n++) {
alpha[n] = -((-alpha[n-1]+1)/2) - (order/2 - 1);
betax[n] = ((betax[n-1]+1)/2) + (order/2 - 1);
betay[n] = ((betay[n-1]+1)/2) + (order/2 - 1);
betaz[n] = ((betaz[n-1]+1)/2) + (order/2 - 1);
}
}
}
/* ----------------------------------------------------------------------
set local subset of MSM grid that I own
n xyz lo/hi in = 3d grid that I own (inclusive)
n xyz lo/hi out = 3d grid + ghost cells in 6 directions (inclusive)
------------------------------------------------------------------------- */
void MSM::set_grid_local()
{
// loop over grid levels
for (int n=0; n<levels; n++) {
// global indices of MSM grid range from 0 to N-1
// nlo_in,nhi_in = lower/upper limits of the 3d sub-brick of
// global MSM grid that I own without ghost cells
nxlo_in[n] = static_cast<int> (comm->xsplit[comm->myloc[0]] * nx_msm[n]);
nxhi_in[n] = static_cast<int> (comm->xsplit[comm->myloc[0]+1] * nx_msm[n]) - 1;
nylo_in[n] = static_cast<int> (comm->ysplit[comm->myloc[1]] * ny_msm[n]);
nyhi_in[n] = static_cast<int> (comm->ysplit[comm->myloc[1]+1] * ny_msm[n]) - 1;
nzlo_in[n] = static_cast<int> (comm->zsplit[comm->myloc[2]] * nz_msm[n]);
nzhi_in[n] = static_cast<int> (comm->zsplit[comm->myloc[2]+1] * nz_msm[n]) - 1;
// nlower,nupper = stencil size for mapping (interpolating) particles to MSM grid
nlower = -(order-1)/2;
nupper = order/2;
// lengths of box and processor sub-domains
double *prd,*sublo,*subhi;
if (!triclinic) {
prd = domain->prd;
sublo = domain->sublo;
subhi = domain->subhi;
} else {
prd = domain->prd_lamda;
sublo = domain->sublo_lamda;
subhi = domain->subhi_lamda;
}
double xprd = prd[0];
double yprd = prd[1];
double zprd = prd[2];
// shift values for particle <-> grid mapping
// add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1
// nlo_out,nhi_out = lower/upper limits of the 3d sub-brick of
// global MSM grid that my particles can contribute charge to
// effectively nlo_in,nhi_in + ghost cells
// nlo,nhi = global coords of grid pt to "lower left" of smallest/largest
// position a particle in my box can be at
// dist[3] = particle position bound = subbox + skin/2.0
// nlo_out,nhi_out = nlo,nhi + stencil size for particle mapping
double dist[3];
double cuthalf = 0.0;
if (n == 0) cuthalf = 0.5*neighbor->skin; // only applies to finest grid
dist[0] = dist[1] = dist[2] = cuthalf;
if (triclinic) kspacebbox(cuthalf,&dist[0]);
int nlo,nhi;
nlo = static_cast<int> ((sublo[0]-dist[0]-boxlo[0]) *
nx_msm[n]/xprd + OFFSET) - OFFSET;
nhi = static_cast<int> ((subhi[0]+dist[0]-boxlo[0]) *
nx_msm[n]/xprd + OFFSET) - OFFSET;
if (n == 0) {
// use a smaller ghost region for interpolation
nxlo_out_all = nlo + nlower;
nxhi_out_all = nhi + nupper;
}
// a larger ghost region is needed for the direct sum and for restriction/prolongation
nxlo_out[n] = nlo + MIN(-order,nxlo_direct);
nxhi_out[n] = nhi + MAX(order,nxhi_direct);
nlo = static_cast<int> ((sublo[1]-dist[1]-boxlo[1]) *
ny_msm[n]/yprd + OFFSET) - OFFSET;
nhi = static_cast<int> ((subhi[1]+dist[1]-boxlo[1]) *
ny_msm[n]/yprd + OFFSET) - OFFSET;
if (n == 0) {
nylo_out_all = nlo + nlower;
nyhi_out_all = nhi + nupper;
}
nylo_out[n] = nlo + MIN(-order,nylo_direct);
nyhi_out[n] = nhi + MAX(order,nyhi_direct);
nlo = static_cast<int> ((sublo[2]-dist[2]-boxlo[2]) *
nz_msm[n]/zprd + OFFSET) - OFFSET;
nhi = static_cast<int> ((subhi[2]+dist[2]-boxlo[2]) *
nz_msm[n]/zprd + OFFSET) - OFFSET;
if (n == 0) {
nzlo_out_all = nlo + nlower;
nzhi_out_all = nhi + nupper;
}
// a hemisphere is used for direct sum interactions,
// so no ghosting is needed for direct sum in the -z direction
nzlo_out[n] = nlo - order;
nzhi_out[n] = nhi + MAX(order,nzhi_direct);
// add extra grid points for nonperiodic boundary conditions
if (domain->nonperiodic) {
if (!domain->xperiodic) {
if (nxlo_in[n] == 0)
nxlo_in[n] = alpha[n];
nxlo_out[n] = MAX(nxlo_out[n],alpha[n]);
if (n == 0) nxlo_out_all = MAX(nxlo_out_all,alpha[0]);
if (nxhi_in[n] == nx_msm[n] - 1)
nxhi_in[n] = betax[n];
nxhi_out[n] = MIN(nxhi_out[n],betax[n]);
if (n == 0) nxhi_out_all = MIN(nxhi_out_all,betax[0]);
if (nxhi_in[n] < 0)
nxhi_in[n] = alpha[n] - 1;
}
if (!domain->yperiodic) {
if (nylo_in[n] == 0)
nylo_in[n] = alpha[n];
nylo_out[n] = MAX(nylo_out[n],alpha[n]);
if (n == 0) nylo_out_all = MAX(nylo_out_all,alpha[0]);
if (nyhi_in[n] == ny_msm[n] - 1)
nyhi_in[n] = betay[n];
nyhi_out[n] = MIN(nyhi_out[n],betay[n]);
if (n == 0) nyhi_out_all = MIN(nyhi_out_all,betay[0]);
if (nyhi_in[n] < 0)
nyhi_in[n] = alpha[n] - 1;
}
if (!domain->zperiodic) {
if (nzlo_in[n] == 0)
nzlo_in[n] = alpha[n];
nzlo_out[n] = MAX(nzlo_out[n],alpha[n]);
if (n == 0) nzlo_out_all = MAX(nzlo_out_all,alpha[0]);
if (nzhi_in[n] == nz_msm[n] - 1)
nzhi_in[n] = betaz[n];
nzhi_out[n] = MIN(nzhi_out[n],betaz[n]);
if (n == 0) nzhi_out_all = MIN(nzhi_out_all,betaz[0]);
if (nzhi_in[n] < 0)
nzhi_in[n] = alpha[n] - 1;
}
}
// prevent inactive processors from participating in MPI communication routines
set_proc_grid(n);
// MSM grids for this proc, including ghosts
ngrid[n] = (nxhi_out[n]-nxlo_out[n]+1) * (nyhi_out[n]-nylo_out[n]+1) *
(nzhi_out[n]-nzlo_out[n]+1);
}
}
/* ----------------------------------------------------------------------
find active procs and neighbor procs for each grid level
------------------------------------------------------------------------- */
void MSM::set_proc_grid(int n)
{
for (int i=0; i<3; i++)
myloc[i] = comm->myloc[i];
// size of inner MSM grid owned by this proc
int nxgrid_in = nxhi_in[n]-nxlo_in[n]+1;
int nygrid_in = nyhi_in[n]-nylo_in[n]+1;
int nzgrid_in = nzhi_in[n]-nzlo_in[n]+1;
int ngrid_in = nxgrid_in * nygrid_in * nzgrid_in;
// check to see if this proc owns any inner grid points on this grid level
// if not, deactivate by setting active_flag = 0
int cnt[3];
cnt[0] = 0;
if (myloc[1] == 0 && myloc[2] == 0)
if (nxgrid_in > 0)
cnt[0] = 1;
cnt[1] = 0;
if (myloc[0] == 0 && myloc[2] == 0)
if (nygrid_in > 0)
cnt[1] = 1;
cnt[2] = 0;
if (myloc[0] == 0 && myloc[1] == 0)
if (nzgrid_in > 0)
cnt[2] = 1;
MPI_Allreduce(&cnt[0],&procgrid[0],3,MPI_INT,MPI_SUM,world);
int color;
if (ngrid_in > 0) {
active_flag[n] = 1;
color = 0;
} else {
active_flag[n] = 0;
color = MPI_UNDEFINED;
}
// define a new MPI communicator for this grid level that only includes active procs
MPI_Comm_split(world,color,me,&world_levels[n]);
if (!active_flag[n]) return;
int procneigh[3][2]; // my 6 neighboring procs, 0/1 = left/right
// map processor IDs to new 3d processor grid
// produces myloc, procneigh
int periods[3];
periods[0] = periods[1] = periods[2] = 1;
MPI_Comm cartesian;
MPI_Cart_create(world_levels[n],3,procgrid,periods,0,&cartesian);
MPI_Cart_get(cartesian,3,procgrid,periods,myloc);
MPI_Cart_shift(cartesian,0,1,&procneigh[0][0],&procneigh[0][1]);
MPI_Cart_shift(cartesian,1,1,&procneigh[1][0],&procneigh[1][1]);
MPI_Cart_shift(cartesian,2,1,&procneigh[2][0],&procneigh[2][1]);
MPI_Comm_free(&cartesian);
for (int i=0; i<3; i++)
for (int j=0; j<2; j++)
procneigh_levels[n][i][j] = procneigh[i][j];
}
/* ----------------------------------------------------------------------
reset local grid arrays and communication stencils
called by fix balance b/c it changed sizes of processor sub-domains
------------------------------------------------------------------------- */
void MSM::setup_grid()
{
// free all arrays previously allocated
// pre-compute volume-dependent coeffs
// reset portion of global grid that each proc owns
// reallocate MSM long-range dependent memory
// don't invoke allocate_peratom(), compute() will allocate when needed
setup();
}
/* ----------------------------------------------------------------------
check if all factors of n are in list of factors
return 1 if yes, 0 if no
------------------------------------------------------------------------- */
int MSM::factorable(int n, int &flag, int &levels)
{
int i;
levels = 1;
while (n > 1) {
for (i = 0; i < nfactors; i++) {
if (n % factors[i] == 0) {
n /= factors[i];
levels++;
break;
}
}
if (i == nfactors) {
flag = 1;
return 0;
}
}
return 1;
}
/* ----------------------------------------------------------------------
find center grid pt for each of my particles
check that full stencil for the particle will fit in my 3d brick
store central grid pt indices in part2grid array
------------------------------------------------------------------------- */
void MSM::particle_map()
{
int nx,ny,nz;
double **x = atom->x;
int nlocal = atom->nlocal;
int flag = 0;
- if (!isfinite(boxlo[0]) || !isfinite(boxlo[1]) || !isfinite(boxlo[2]))
+ if (!ISFINITE(boxlo[0]) || !ISFINITE(boxlo[1]) || !ISFINITE(boxlo[2]))
error->one(FLERR,"Non-numeric box dimensions - simulation unstable");
for (int i = 0; i < nlocal; i++) {
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
// current particle coord can be outside global and local box
// add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1
nx = static_cast<int> ((x[i][0]-boxlo[0])*delxinv[0]+OFFSET) - OFFSET;
ny = static_cast<int> ((x[i][1]-boxlo[1])*delyinv[0]+OFFSET) - OFFSET;
nz = static_cast<int> ((x[i][2]-boxlo[2])*delzinv[0]+OFFSET) - OFFSET;
part2grid[i][0] = nx;
part2grid[i][1] = ny;
part2grid[i][2] = nz;
// check that entire stencil around nx,ny,nz will fit in my 3d brick
if (nx+nlower < nxlo_out[0] || nx+nupper > nxhi_out[0] ||
ny+nlower < nylo_out[0] || ny+nupper > nyhi_out[0] ||
nz+nlower < nzlo_out[0] || nz+nupper > nzhi_out[0]) flag = 1;
}
if (flag) error->one(FLERR,"Out of range atoms - cannot compute MSM");
}
/* ----------------------------------------------------------------------
aninterpolation: interpolate charges from particles to grid
------------------------------------------------------------------------- */
void MSM::make_rho()
{
//fprintf(screen,"MSM aninterpolation\n\n");
int i,l,m,n,nx,ny,nz,mx,my,mz;
double dx,dy,dz,x0,y0,z0;
// clear 3d density array
double ***qgridn = qgrid[0];
memset(&(qgridn[nzlo_out[0]][nylo_out[0]][nxlo_out[0]]),0,ngrid[0]*sizeof(double));
// loop over my charges, add their contribution to nearby grid points
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
// (dx,dy,dz) = distance to "lower left" grid pt
// (mx,my,mz) = global coords of moving stencil pt
double *q = atom->q;
double **x = atom->x;
int nlocal = atom->nlocal;
for (i = 0; i < nlocal; i++) {
nx = part2grid[i][0];
ny = part2grid[i][1];
nz = part2grid[i][2];
dx = nx - (x[i][0]-boxlo[0])*delxinv[0];
dy = ny - (x[i][1]-boxlo[1])*delyinv[0];
dz = nz - (x[i][2]-boxlo[2])*delzinv[0];
compute_phis(dx,dy,dz);
z0 = q[i];
for (n = nlower; n <= nupper; n++) {
mz = n+nz;
y0 = z0*phi1d[2][n];
for (m = nlower; m <= nupper; m++) {
my = m+ny;
x0 = y0*phi1d[1][m];
for (l = nlower; l <= nupper; l++) {
mx = l+nx;
qgridn[mz][my][mx] += x0*phi1d[0][l];
}
}
}
}
}
/* ----------------------------------------------------------------------
MSM direct sum procedure for intermediate grid levels, solve Poisson's
equation to get energy, virial, etc.
------------------------------------------------------------------------- */
void MSM::direct(int n)
{
//fprintf(screen,"Direct contribution on level %i\n\n",n);
double ***qgridn = qgrid[n];
double ***egridn = egrid[n];
double ***v0gridn = v0grid[n];
double ***v1gridn = v1grid[n];
double ***v2gridn = v2grid[n];
double ***v3gridn = v3grid[n];
double ***v4gridn = v4grid[n];
double ***v5gridn = v5grid[n];
double *g_directn = g_direct[n];
double *v0_directn = v0_direct[n];
double *v1_directn = v1_direct[n];
double *v2_directn = v2_direct[n];
double *v3_directn = v3_direct[n];
double *v4_directn = v4_direct[n];
double *v5_directn = v5_direct[n];
// zero out electric potential
memset(&(egridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]]),0,ngrid[n]*sizeof(double));
// zero out virial
if (vflag_atom) {
memset(&(v0gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]]),0,ngrid[n]*sizeof(double));
memset(&(v1gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]]),0,ngrid[n]*sizeof(double));
memset(&(v2gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]]),0,ngrid[n]*sizeof(double));
memset(&(v3gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]]),0,ngrid[n]*sizeof(double));
memset(&(v4gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]]),0,ngrid[n]*sizeof(double));
memset(&(v5gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]]),0,ngrid[n]*sizeof(double));
}
int icx,icy,icz,ix,iy,iz,zk,zyk,k;
int ii,jj,kk;
int imin,imax,jmin,jmax,kmax;
double qtmp,qtmp2,gtmp;
double esum,v0sum,v1sum,v2sum,v3sum,v4sum,v5sum;
double **qk,**ek;
double *qkj,*ekj;
int nx = nxhi_direct - nxlo_direct + 1;
int ny = nyhi_direct - nylo_direct + 1;
// loop over inner grid points
for (icz = nzlo_in[n]; icz <= nzhi_in[n]; icz++) {
if (domain->zperiodic) {
kmax = nzhi_direct;
} else {
kmax = MIN(nzhi_direct,betaz[n] - icz);
}
for (icy = nylo_in[n]; icy <= nyhi_in[n]; icy++) {
if (domain->yperiodic) {
jmin = nylo_direct;
jmax = nyhi_direct;
} else {
jmin = MAX(nylo_direct,alpha[n] - icy);
jmax = MIN(nyhi_direct,betay[n] - icy);
}
for (icx = nxlo_in[n]; icx <= nxhi_in[n]; icx++) {
if (domain->xperiodic) {
imin = nxlo_direct;
imax = nxhi_direct;
} else {
imin = MAX(nxlo_direct,alpha[n] - icx);
imax = MIN(nxhi_direct,betax[n] - icx);
}
qtmp = qgridn[icz][icy][icx]; // charge on center grid point
esum = 0.0;
if (vflag_either && !scalar_pressure_flag)
v0sum = v1sum = v2sum = v3sum = v4sum = v5sum = 0.0;
// use hemisphere to avoid double computation of pair-wise
// interactions in direct sum (no computations in -z direction)
for (iz = 1; iz <= kmax; iz++) {
kk = icz+iz;
qk = qgridn[kk];
ek = egridn[kk];
zk = (iz + nzhi_direct)*ny;
for (iy = jmin; iy <= jmax; iy++) {
jj = icy+iy;
qkj = qk[jj];
ekj = ek[jj];
zyk = (zk + iy + nyhi_direct)*nx;
for (ix = imin; ix <= imax; ix++) {
ii = icx+ix;
qtmp2 = qkj[ii]; // charge on outer grid point
k = zyk + ix + nxhi_direct;
gtmp = g_directn[k];
esum += gtmp * qtmp2;
ekj[ii] += gtmp * qtmp;
if (vflag_either && !scalar_pressure_flag) {
v0sum += v0_directn[k] * qtmp2;
v1sum += v1_directn[k] * qtmp2;
v2sum += v2_directn[k] * qtmp2;
v3sum += v3_directn[k] * qtmp2;
v4sum += v4_directn[k] * qtmp2;
v5sum += v5_directn[k] * qtmp2;
}
}
}
}
// iz=0
iz = 0;
kk = icz+iz;
qk = qgridn[kk];
ek = egridn[kk];
zk = (iz + nzhi_direct)*ny;
for (iy = 1; iy <= jmax; iy++) {
jj = icy+iy;
qkj = qk[jj];
ekj = ek[jj];
zyk = (zk + iy + nyhi_direct)*nx;
for (ix = imin; ix <= imax; ix++) {
ii = icx+ix;
qtmp2 = qkj[ii];
k = zyk + ix + nxhi_direct;
gtmp = g_directn[k];
esum += gtmp * qtmp2;
ekj[ii] += gtmp * qtmp;
if (vflag_either && !scalar_pressure_flag) {
v0sum += v0_directn[k] * qtmp2;
v1sum += v1_directn[k] * qtmp2;
v2sum += v2_directn[k] * qtmp2;
v3sum += v3_directn[k] * qtmp2;
v4sum += v4_directn[k] * qtmp2;
v5sum += v5_directn[k] * qtmp2;
}
}
}
// iz=0, iy=0
iz = 0;
kk = icz+iz;
qk = qgridn[kk];
ek = egridn[kk];
zk = (iz + nzhi_direct)*ny;
iy = 0;
jj = icy+iy;
qkj = qk[jj];
ekj = ek[jj];
zyk = (zk + iy + nyhi_direct)*nx;
for (ix = 1; ix <= imax; ix++) {
ii = icx+ix;
qtmp2 = qkj[ii];
k = zyk + ix + nxhi_direct;
gtmp = g_directn[k];
esum += gtmp * qtmp2;
ekj[ii] += gtmp * qtmp;
if (vflag_either && !scalar_pressure_flag) {
v0sum += v0_directn[k] * qtmp2;
v1sum += v1_directn[k] * qtmp2;
v2sum += v2_directn[k] * qtmp2;
v3sum += v3_directn[k] * qtmp2;
v4sum += v4_directn[k] * qtmp2;
v5sum += v5_directn[k] * qtmp2;
}
}
// iz=0, iy=0, ix=0
iz = 0;
zk = (iz + nzhi_direct)*ny;
iy = 0;
zyk = (zk + iy + nyhi_direct)*nx;
ix = 0;
k = zyk + ix + nxhi_direct;
gtmp = g_directn[k];
esum += 0.5 * gtmp * qtmp;
egridn[icz][icy][icx] += 0.5 * gtmp * qtmp;
// virial is zero for iz=0, iy=0, ix=0
// accumulate per-atom energy/virial
egridn[icz][icy][icx] += esum;
if (vflag_atom && !scalar_pressure_flag) {
v0gridn[icz][icy][icx] += v0sum;
v1gridn[icz][icy][icx] += v1sum;
v2gridn[icz][icy][icx] += v2sum;
v3gridn[icz][icy][icx] += v3sum;
v4gridn[icz][icy][icx] += v4sum;
v5gridn[icz][icy][icx] += v5sum;
}
// accumulate total energy/virial
if (evflag) {
qtmp = qgridn[icz][icy][icx];
if (eflag_global) energy += 2.0 * esum * qtmp;
if (vflag_global && !scalar_pressure_flag) {
virial[0] += 2.0 * v0sum * qtmp;
virial[1] += 2.0 * v1sum * qtmp;
virial[2] += 2.0 * v2sum * qtmp;
virial[3] += 2.0 * v3sum * qtmp;
virial[4] += 2.0 * v4sum * qtmp;
virial[5] += 2.0 * v5sum * qtmp;
}
}
}
}
}
// compute per-atom virial (if requested)
if (vflag_atom)
direct_peratom(n);
}
/* ----------------------------------------------------------------------
MSM direct sum procedure for intermediate grid levels, solve Poisson's
equation to get per-atom virial, separate method used for performance
reasons
------------------------------------------------------------------------- */
void MSM::direct_peratom(int n)
{
//fprintf(screen,"Direct contribution on level %i\n\n",n);
double ***qgridn = qgrid[n];
double ***v0gridn = v0grid[n];
double ***v1gridn = v1grid[n];
double ***v2gridn = v2grid[n];
double ***v3gridn = v3grid[n];
double ***v4gridn = v4grid[n];
double ***v5gridn = v5grid[n];
int icx,icy,icz,ix,iy,iz,zk,zyk,k;
int ii,jj,kk;
int imin,imax,jmin,jmax,kmax;
double qtmp;
int nx = nxhi_direct - nxlo_direct + 1;
int ny = nyhi_direct - nylo_direct + 1;
// loop over inner grid points
for (icz = nzlo_in[n]; icz <= nzhi_in[n]; icz++) {
if (domain->zperiodic) {
kmax = nzhi_direct;
} else {
kmax = MIN(nzhi_direct,betaz[n] - icz);
}
for (icy = nylo_in[n]; icy <= nyhi_in[n]; icy++) {
if (domain->yperiodic) {
jmin = nylo_direct;
jmax = nyhi_direct;
} else {
jmin = MAX(nylo_direct,alpha[n] - icy);
jmax = MIN(nyhi_direct,betay[n] - icy);
}
for (icx = nxlo_in[n]; icx <= nxhi_in[n]; icx++) {
if (domain->xperiodic) {
imin = nxlo_direct;
imax = nxhi_direct;
} else {
imin = MAX(nxlo_direct,alpha[n] - icx);
imax = MIN(nxhi_direct,betax[n] - icx);
}
qtmp = qgridn[icz][icy][icx]; // center grid point
// use hemisphere to avoid double computation of pair-wise
// interactions in direct sum (no computations in -z direction)
for (iz = 1; iz <= kmax; iz++) {
kk = icz+iz;
zk = (iz + nzhi_direct)*ny;
for (iy = jmin; iy <= jmax; iy++) {
jj = icy+iy;
zyk = (zk + iy + nyhi_direct)*nx;
for (ix = imin; ix <= imax; ix++) {
ii = icx+ix;
k = zyk + ix + nxhi_direct;
v0gridn[kk][jj][ii] += v0_direct[n][k] * qtmp;
v1gridn[kk][jj][ii] += v1_direct[n][k] * qtmp;
v2gridn[kk][jj][ii] += v2_direct[n][k] * qtmp;
v3gridn[kk][jj][ii] += v3_direct[n][k] * qtmp;
v4gridn[kk][jj][ii] += v4_direct[n][k] * qtmp;
v5gridn[kk][jj][ii] += v5_direct[n][k] * qtmp;
}
}
}
// iz=0
iz = 0;
kk = icz+iz;
zk = (iz + nzhi_direct)*ny;
for (iy = 1; iy <= jmax; iy++) {
jj = icy+iy;
zyk = (zk + iy + nyhi_direct)*nx;
for (ix = imin; ix <= imax; ix++) {
ii = icx+ix;
k = zyk + ix + nxhi_direct;
v0gridn[kk][jj][ii] += v0_direct[n][k] * qtmp;
v1gridn[kk][jj][ii] += v1_direct[n][k] * qtmp;
v2gridn[kk][jj][ii] += v2_direct[n][k] * qtmp;
v3gridn[kk][jj][ii] += v3_direct[n][k] * qtmp;
v4gridn[kk][jj][ii] += v4_direct[n][k] * qtmp;
v5gridn[kk][jj][ii] += v5_direct[n][k] * qtmp;
}
}
// iz=0, iy=0
iz = 0;
kk = icz+iz;
zk = (iz + nzhi_direct)*ny;
iy = 0;
jj = icy+iy;
zyk = (zk + iy + nyhi_direct)*nx;
for (ix = 1; ix <= imax; ix++) {
ii = icx+ix;
k = zyk + ix + nxhi_direct;
v0gridn[kk][jj][ii] += v0_direct[n][k] * qtmp;
v1gridn[kk][jj][ii] += v1_direct[n][k] * qtmp;
v2gridn[kk][jj][ii] += v2_direct[n][k] * qtmp;
v3gridn[kk][jj][ii] += v3_direct[n][k] * qtmp;
v4gridn[kk][jj][ii] += v4_direct[n][k] * qtmp;
v5gridn[kk][jj][ii] += v5_direct[n][k] * qtmp;
}
// virial is zero for iz=0, iy=0, ix=0
}
}
}
}
/* ----------------------------------------------------------------------
MSM direct sum procedure for top grid level (nonperiodic systems only)
------------------------------------------------------------------------- */
void MSM::direct_top(int n)
{
//fprintf(screen,"Direct contribution on level %i\n\n",n);
double ***qgridn = qgrid[n];
double ***egridn = egrid[n];
double ***v0gridn = v0grid[n];
double ***v1gridn = v1grid[n];
double ***v2gridn = v2grid[n];
double ***v3gridn = v3grid[n];
double ***v4gridn = v4grid[n];
double ***v5gridn = v5grid[n];
// zero out electric potential
memset(&(egridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]]),0,ngrid[n]*sizeof(double));
// zero out virial
if (vflag_atom) {
memset(&(v0gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]]),0,ngrid[n]*sizeof(double));
memset(&(v1gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]]),0,ngrid[n]*sizeof(double));
memset(&(v2gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]]),0,ngrid[n]*sizeof(double));
memset(&(v3gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]]),0,ngrid[n]*sizeof(double));
memset(&(v4gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]]),0,ngrid[n]*sizeof(double));
memset(&(v5gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]]),0,ngrid[n]*sizeof(double));
}
int icx,icy,icz,ix,iy,iz,zk,zyk,k;
int ii,jj,kk;
int imin,imax,jmin,jmax,kmax;
double qtmp,qtmp2,gtmp;
double esum,v0sum,v1sum,v2sum,v3sum,v4sum,v5sum;
double **qk,**ek;
double *qkj,*ekj;
int nx_top = betax[n] - alpha[n];
int ny_top = betay[n] - alpha[n];
int nz_top = betaz[n] - alpha[n];
int nx = 2*nx_top + 1;
int ny = 2*ny_top + 1;
// loop over inner grid points
for (icz = nzlo_in[n]; icz <= nzhi_in[n]; icz++) {
if (domain->zperiodic) {
kmax = nz_msm[n]-1;
} else {
kmax = betaz[n] - icz;
}
for (icy = nylo_in[n]; icy <= nyhi_in[n]; icy++) {
if (domain->yperiodic) {
jmin = 0;
jmax = ny_msm[n]-1;
} else {
jmin = alpha[n] - icy;
jmax = betay[n] - icy;
}
for (icx = nxlo_in[n]; icx <= nxhi_in[n]; icx++) {
if (domain->xperiodic) {
imin = 0;
imax = nx_msm[n]-1;
} else {
imin = alpha[n] - icx;
imax = betax[n] - icx;
}
qtmp = qgridn[icz][icy][icx];
esum = 0.0;
if (vflag_either && !scalar_pressure_flag)
v0sum = v1sum = v2sum = v3sum = v4sum = v5sum = 0.0;
// use hemisphere to avoid double computation of pair-wise
// interactions in direct sum (no computations in -z direction)
for (iz = 1; iz <= kmax; iz++) {
kk = icz+iz;
qk = qgridn[kk];
ek = egridn[kk];
zk = (iz + nz_top)*ny;
for (iy = jmin; iy <= jmax; iy++) {
jj = icy+iy;
qkj = qk[jj];
ekj = ek[jj];
zyk = (zk + iy + ny_top)*nx;
for (ix = imin; ix <= imax; ix++) {
ii = icx+ix;
qtmp2 = qkj[ii];
k = zyk + ix + nx_top;
gtmp = g_direct_top[k];
esum += gtmp * qtmp2;
ekj[ii] += gtmp * qtmp;
if (vflag_either && !scalar_pressure_flag) {
v0sum += v0_direct_top[k] * qtmp2;
v1sum += v1_direct_top[k] * qtmp2;
v2sum += v2_direct_top[k] * qtmp2;
v3sum += v3_direct_top[k] * qtmp2;
v4sum += v4_direct_top[k] * qtmp2;
v5sum += v5_direct_top[k] * qtmp2;
}
}
}
}
// iz=0
iz = 0;
kk = icz+iz;
qk = qgridn[kk];
ek = egridn[kk];
zk = (iz + nz_top)*ny;
for (iy = 1; iy <= jmax; iy++) {
jj = icy+iy;
qkj = qk[jj];
ekj = ek[jj];
zyk = (zk + iy + ny_top)*nx;
for (ix = imin; ix <= imax; ix++) {
ii = icx+ix;
qtmp2 = qkj[ii];
k = zyk + ix + nx_top;
gtmp = g_direct_top[k];
esum += gtmp * qtmp2;
ekj[ii] += gtmp * qtmp;
if (vflag_either && !scalar_pressure_flag) {
v0sum += v0_direct_top[k] * qtmp2;
v1sum += v1_direct_top[k] * qtmp2;
v2sum += v2_direct_top[k] * qtmp2;
v3sum += v3_direct_top[k] * qtmp2;
v4sum += v4_direct_top[k] * qtmp2;
v5sum += v5_direct_top[k] * qtmp2;
}
}
}
// iz=0, iy=0
iz = 0;
kk = icz+iz;
qk = qgridn[kk];
ek = egridn[kk];
zk = (iz + nz_top)*ny;
iy = 0;
jj = icy+iy;
qkj = qk[jj];
ekj = ek[jj];
zyk = (zk + iy + ny_top)*nx;
for (ix = 1; ix <= imax; ix++) {
ii = icx+ix;
qtmp2 = qkj[ii];
k = zyk + ix + nx_top;
gtmp = g_direct_top[k];
esum += gtmp * qtmp2;
ekj[ii] += gtmp * qtmp;
if (vflag_either && !scalar_pressure_flag) {
v0sum += v0_direct_top[k] * qtmp2;
v1sum += v1_direct_top[k] * qtmp2;
v2sum += v2_direct_top[k] * qtmp2;
v3sum += v3_direct_top[k] * qtmp2;
v4sum += v4_direct_top[k] * qtmp2;
v5sum += v5_direct_top[k] * qtmp2;
}
}
// iz=0, iy=0, ix=0
iz = 0;
zk = (iz + nz_top)*ny;
iy = 0;
zyk = (zk + iy + ny_top)*nx;
ix = 0;
ii = icx+ix;
k = zyk + ix + nx_top;
gtmp = g_direct_top[k];
esum += 0.5 * gtmp * qtmp;
egridn[icz][icy][icx] += 0.5 * gtmp * qtmp;
if (vflag_either && !scalar_pressure_flag) {
v0sum += v0_direct_top[k] * qtmp;
v1sum += v1_direct_top[k] * qtmp;
v2sum += v2_direct_top[k] * qtmp;
v3sum += v3_direct_top[k] * qtmp;
v4sum += v4_direct_top[k] * qtmp;
v5sum += v5_direct_top[k] * qtmp;
}
// accumulate per-atom energy/virial
egridn[icz][icy][icx] += esum;
if (vflag_atom && !scalar_pressure_flag) {
v0gridn[icz][icy][icx] += v0sum;
v1gridn[icz][icy][icx] += v1sum;
v2gridn[icz][icy][icx] += v2sum;
v3gridn[icz][icy][icx] += v3sum;
v4gridn[icz][icy][icx] += v4sum;
v5gridn[icz][icy][icx] += v5sum;
}
// accumulate total energy/virial
if (evflag) {
qtmp = qgridn[icz][icy][icx];
if (eflag_global) energy += 2.0 * esum * qtmp;
if (vflag_global && !scalar_pressure_flag) {
virial[0] += 2.0 * v0sum * qtmp;
virial[1] += 2.0 * v1sum * qtmp;
virial[2] += 2.0 * v2sum * qtmp;
virial[3] += 2.0 * v3sum * qtmp;
virial[4] += 2.0 * v4sum * qtmp;
virial[5] += 2.0 * v5sum * qtmp;
}
}
}
}
}
// compute per-atom virial (if requested)
if (vflag_atom)
direct_peratom_top(n);
}
/* ----------------------------------------------------------------------
MSM direct sum procedure for top grid level, solve Poisson's
equation to get per-atom virial, separate method used for performance
reasons
------------------------------------------------------------------------- */
void MSM::direct_peratom_top(int n)
{
double ***qgridn = qgrid[n];
double ***v0gridn = v0grid[n];
double ***v1gridn = v1grid[n];
double ***v2gridn = v2grid[n];
double ***v3gridn = v3grid[n];
double ***v4gridn = v4grid[n];
double ***v5gridn = v5grid[n];
int icx,icy,icz,ix,iy,iz,zk,zyk,k;
int ii,jj,kk;
int imin,imax,jmin,jmax,kmax;
double qtmp;
int nx_top = betax[n] - alpha[n];
int ny_top = betay[n] - alpha[n];
int nz_top = betaz[n] - alpha[n];
int nx = 2*nx_top + 1;
int ny = 2*ny_top + 1;
// loop over inner grid points
for (icz = nzlo_in[n]; icz <= nzhi_in[n]; icz++) {
if (domain->zperiodic) {
kmax = nz_msm[n]-1;
} else {
kmax = betaz[n] - icz;
}
for (icy = nylo_in[n]; icy <= nyhi_in[n]; icy++) {
if (domain->yperiodic) {
jmin = 0;
jmax = ny_msm[n]-1;
} else {
jmin = alpha[n] - icy;
jmax = betay[n] - icy;
}
for (icx = nxlo_in[n]; icx <= nxhi_in[n]; icx++) {
if (domain->xperiodic) {
imin = 0;
imax = nx_msm[n]-1;
} else {
imin = alpha[n] - icx;
imax = betax[n] - icx;
}
qtmp = qgridn[icz][icy][icx]; // center grid point
// use hemisphere to avoid double computation of pair-wise
// interactions in direct sum (no computations in -z direction)
for (iz = 1; iz <= kmax; iz++) {
kk = icz+iz;
zk = (iz + nz_top)*ny;
for (iy = jmin; iy <= jmax; iy++) {
jj = icy+iy;
zyk = (zk + iy + ny_top)*nx;
for (ix = imin; ix <= imax; ix++) {
ii = icx+ix;
k = zyk + ix + nx_top;
v0gridn[kk][jj][ii] += v0_direct_top[k] * qtmp;
v1gridn[kk][jj][ii] += v1_direct_top[k] * qtmp;
v2gridn[kk][jj][ii] += v2_direct_top[k] * qtmp;
v3gridn[kk][jj][ii] += v3_direct_top[k] * qtmp;
v4gridn[kk][jj][ii] += v4_direct_top[k] * qtmp;
v5gridn[kk][jj][ii] += v5_direct_top[k] * qtmp;
}
}
}
// iz=0
iz = 0;
kk = icz+iz;
zk = (iz + nz_top)*ny;
for (iy = 1; iy <= jmax; iy++) {
jj = icy+iy;
zyk = (zk + iy + ny_top)*nx;
for (ix = imin; ix <= imax; ix++) {
ii = icx+ix;
k = zyk + ix + nx_top;
v0gridn[kk][jj][ii] += v0_direct_top[k] * qtmp;
v1gridn[kk][jj][ii] += v1_direct_top[k] * qtmp;
v2gridn[kk][jj][ii] += v2_direct_top[k] * qtmp;
v3gridn[kk][jj][ii] += v3_direct_top[k] * qtmp;
v4gridn[kk][jj][ii] += v4_direct_top[k] * qtmp;
v5gridn[kk][jj][ii] += v5_direct_top[k] * qtmp;
}
}
// iz=0, iy=0
iz = 0;
kk = icz+iz;
zk = (iz + nz_top)*ny;
iy = 0;
jj = icy+iy;
zyk = (zk + iy + ny_top)*nx;
for (ix = 1; ix <= imax; ix++) {
ii = icx+ix;
k = zyk + ix + nx_top;
v0gridn[kk][jj][ii] += v0_direct_top[k] * qtmp;
v1gridn[kk][jj][ii] += v1_direct_top[k] * qtmp;
v2gridn[kk][jj][ii] += v2_direct_top[k] * qtmp;
v3gridn[kk][jj][ii] += v3_direct_top[k] * qtmp;
v4gridn[kk][jj][ii] += v4_direct_top[k] * qtmp;
v5gridn[kk][jj][ii] += v5_direct_top[k] * qtmp;
}
// virial is zero for iz=0, iy=0, ix=0
}
}
}
}
/* ----------------------------------------------------------------------
MSM restriction procedure for intermediate grid levels, interpolate
charges from finer grid to coarser grid
------------------------------------------------------------------------- */
void MSM::restriction(int n)
{
//fprintf(screen,"Restricting from level %i to %i\n\n",n,n+1);
const int p = order-1;
double ***qgrid1 = qgrid[n];
double ***qgrid2 = qgrid[n+1];
int k = 0;
int index[p+2];
for (int nu=-p; nu<=p; nu++) {
if (nu%2 == 0 && nu != 0) continue;
phi1d[0][k] = compute_phi(nu*delxinv[n+1]/delxinv[n]);
phi1d[1][k] = compute_phi(nu*delyinv[n+1]/delyinv[n]);
phi1d[2][k] = compute_phi(nu*delzinv[n+1]/delzinv[n]);
index[k] = nu;
k++;
}
int ip,jp,kp,ic,jc,kc,i,j;
int ii,jj,kk;
double phiz,phizy,q2sum;
// zero out charge on coarser grid
memset(&(qgrid2[nzlo_out[n+1]][nylo_out[n+1]][nxlo_out[n+1]]),0,
ngrid[n+1]*sizeof(double));
for (kp = nzlo_in[n+1]; kp <= nzhi_in[n+1]; kp++)
for (jp = nylo_in[n+1]; jp <= nyhi_in[n+1]; jp++)
for (ip = nxlo_in[n+1]; ip <= nxhi_in[n+1]; ip++) {
ic = ip * static_cast<int> (delxinv[n]/delxinv[n+1]);
jc = jp * static_cast<int> (delyinv[n]/delyinv[n+1]);
kc = kp * static_cast<int> (delzinv[n]/delzinv[n+1]);
q2sum = 0.0;
for (k=0; k<=p+1; k++) {
kk = kc+index[k];
if (!domain->zperiodic) {
if (kk < alpha[n]) continue;
if (kk > betaz[n]) break;
}
phiz = phi1d[2][k];
for (j=0; j<=p+1; j++) {
jj = jc+index[j];
if (!domain->yperiodic) {
if (jj < alpha[n]) continue;
if (jj > betay[n]) break;
}
phizy = phi1d[1][j]*phiz;
for (i=0; i<=p+1; i++) {
ii = ic+index[i];
if (!domain->xperiodic) {
if (ii < alpha[n]) continue;
if (ii > betax[n]) break;
}
q2sum += qgrid1[kk][jj][ii] *
phi1d[0][i]*phizy;
}
}
}
qgrid2[kp][jp][ip] += q2sum;
}
}
/* ----------------------------------------------------------------------
MSM prolongation procedure for intermediate grid levels, interpolate
per-atom energy/virial from coarser grid to finer grid
------------------------------------------------------------------------- */
void MSM::prolongation(int n)
{
//fprintf(screen,"Prolongating from level %i to %i\n\n",n+1,n);
const int p = order-1;
double ***egrid1 = egrid[n];
double ***egrid2 = egrid[n+1];
double ***v0grid1 = v0grid[n];
double ***v0grid2 = v0grid[n+1];
double ***v1grid1 = v1grid[n];
double ***v1grid2 = v1grid[n+1];
double ***v2grid1 = v2grid[n];
double ***v2grid2 = v2grid[n+1];
double ***v3grid1 = v3grid[n];
double ***v3grid2 = v3grid[n+1];
double ***v4grid1 = v4grid[n];
double ***v4grid2 = v4grid[n+1];
double ***v5grid1 = v5grid[n];
double ***v5grid2 = v5grid[n+1];
int k = 0;
int index[p+2];
for (int nu=-p; nu<=p; nu++) {
if (nu%2 == 0 && nu != 0) continue;
phi1d[0][k] = compute_phi(nu*delxinv[n+1]/delxinv[n]);
phi1d[1][k] = compute_phi(nu*delyinv[n+1]/delyinv[n]);
phi1d[2][k] = compute_phi(nu*delzinv[n+1]/delzinv[n]);
index[k] = nu;
k++;
}
int ip,jp,kp,ic,jc,kc,i,j;
int ii,jj,kk;
double phiz,phizy,phi3d;
double etmp2,v0tmp2,v1tmp2,v2tmp2,v3tmp2,v4tmp2,v5tmp2;
for (kp = nzlo_in[n+1]; kp <= nzhi_in[n+1]; kp++)
for (jp = nylo_in[n+1]; jp <= nyhi_in[n+1]; jp++)
for (ip = nxlo_in[n+1]; ip <= nxhi_in[n+1]; ip++) {
ic = ip * static_cast<int> (delxinv[n]/delxinv[n+1]);
jc = jp * static_cast<int> (delyinv[n]/delyinv[n+1]);
kc = kp * static_cast<int> (delzinv[n]/delzinv[n+1]);
etmp2 = egrid2[kp][jp][ip];
if (vflag_atom) {
v0tmp2 = v0grid2[kp][jp][ip];
v1tmp2 = v1grid2[kp][jp][ip];
v2tmp2 = v2grid2[kp][jp][ip];
v3tmp2 = v3grid2[kp][jp][ip];
v4tmp2 = v4grid2[kp][jp][ip];
v5tmp2 = v5grid2[kp][jp][ip];
}
for (k=0; k<=p+1; k++) {
kk = kc+index[k];
if (!domain->zperiodic) {
if (kk < alpha[n]) continue;
if (kk > betaz[n]) break;
}
phiz = phi1d[2][k];
for (j=0; j<=p+1; j++) {
jj = jc+index[j];
if (!domain->yperiodic) {
if (jj < alpha[n]) continue;
if (jj > betay[n]) break;
}
phizy = phi1d[1][j]*phiz;
for (i=0; i<=p+1; i++) {
ii = ic+index[i];
if (!domain->xperiodic) {
if (ii < alpha[n]) continue;
if (ii > betax[n]) break;
}
phi3d = phi1d[0][i]*phizy;
egrid1[kk][jj][ii] += etmp2 * phi3d;
if (vflag_atom) {
v0grid1[kk][jj][ii] += v0tmp2 * phi3d;
v1grid1[kk][jj][ii] += v1tmp2 * phi3d;
v2grid1[kk][jj][ii] += v2tmp2 * phi3d;
v3grid1[kk][jj][ii] += v3tmp2 * phi3d;
v4grid1[kk][jj][ii] += v4tmp2 * phi3d;
v5grid1[kk][jj][ii] += v5tmp2 * phi3d;
}
}
}
}
}
}
/* ----------------------------------------------------------------------
Use MPI_Allreduce to fill ghost grid values, for coarse grids this may
be cheaper than using nearest-neighbor communication (commgrid), right
now only works for periodic boundary conditions
------------------------------------------------------------------------- */
void MSM::grid_swap_forward(int n, double*** &gridn)
{
double ***gridn_tmp;
memory->create(gridn_tmp,nz_msm[n],ny_msm[n],nx_msm[n],"msm:grid_tmp");
double ***gridn_all;
memory->create(gridn_all,nz_msm[n],ny_msm[n],nx_msm[n],"msm:grid_all");
int ngrid_in = nx_msm[n] * ny_msm[n] * nz_msm[n];
memset(&(gridn_tmp[0][0][0]),0,ngrid_in*sizeof(double));
memset(&(gridn_all[0][0][0]),0,ngrid_in*sizeof(double));
// copy inner grid cell values from gridn into gridn_tmp
int icx,icy,icz;
for (icz = nzlo_in[n]; icz <= nzhi_in[n]; icz++)
for (icy = nylo_in[n]; icy <= nyhi_in[n]; icy++)
for (icx = nxlo_in[n]; icx <= nxhi_in[n]; icx++)
gridn_tmp[icz][icy][icx] = gridn[icz][icy][icx];
MPI_Allreduce(&(gridn_tmp[0][0][0]),
&(gridn_all[0][0][0]),
ngrid_in,MPI_DOUBLE,MPI_SUM,world_levels[n]);
// bitmask for PBCs (only works for power of 2 numbers)
int PBCx,PBCy,PBCz;
PBCx = nx_msm[n]-1;
PBCy = ny_msm[n]-1;
PBCz = nz_msm[n]-1;
// copy from gridn_all into gridn to fill ghost grid cell values
for (icz = nzlo_out[n]; icz <= nzhi_out[n]; icz++)
for (icy = nylo_out[n]; icy <= nyhi_out[n]; icy++)
for (icx = nxlo_out[n]; icx <= nxhi_out[n]; icx++)
gridn[icz][icy][icx] = gridn_all[icz&PBCz][icy&PBCy][icx&PBCx];
memory->destroy(gridn_tmp);
memory->destroy(gridn_all);
}
/* ----------------------------------------------------------------------
Use MPI_Allreduce to get contribution from ghost grid cells, for coarse
grids this may be cheaper than using nearest-neighbor communication
(commgrid), right now only works for periodic boundary conditions
------------------------------------------------------------------------- */
void MSM::grid_swap_reverse(int n, double*** &gridn)
{
double ***gridn_tmp;
memory->create(gridn_tmp,nz_msm[n],ny_msm[n],nx_msm[n],"msm:grid_tmp");
double ***gridn_all;
memory->create(gridn_all,nz_msm[n],ny_msm[n],nx_msm[n],"msm:grid_all");
int ngrid_in = nx_msm[n] * ny_msm[n] * nz_msm[n];
memset(&(gridn_tmp[0][0][0]),0,ngrid_in*sizeof(double));
memset(&(gridn_all[0][0][0]),0,ngrid_in*sizeof(double));
// bitmask for PBCs (only works for power of 2 numbers)
int icx,icy,icz;
int PBCx,PBCy,PBCz;
PBCx = nx_msm[n]-1;
PBCy = ny_msm[n]-1;
PBCz = nz_msm[n]-1;
// copy ghost grid cell values from gridn into inner portion of gridn_tmp
for (icz = nzlo_out[n]; icz <= nzhi_out[n]; icz++)
for (icy = nylo_out[n]; icy <= nyhi_out[n]; icy++)
for (icx = nxlo_out[n]; icx <= nxhi_out[n]; icx++)
gridn_tmp[icz&PBCz][icy&PBCy][icx&PBCx] += gridn[icz][icy][icx];
MPI_Allreduce(&(gridn_tmp[0][0][0]),
&(gridn_all[0][0][0]),
ngrid_in,MPI_DOUBLE,MPI_SUM,world_levels[n]);
// copy inner grid cell values from gridn_all into gridn
for (icz = nzlo_in[n]; icz <= nzhi_in[n]; icz++)
for (icy = nylo_in[n]; icy <= nyhi_in[n]; icy++)
for (icx = nxlo_in[n]; icx <= nxhi_in[n]; icx++)
gridn[icz][icy][icx] = gridn_all[icz][icy][icx];
memory->destroy(gridn_tmp);
memory->destroy(gridn_all);
}
/* ----------------------------------------------------------------------
pack own values to buf to send to another proc (used by commgrid)
------------------------------------------------------------------------- */
void MSM::pack_forward(int flag, double *buf, int nlist, int *list)
{
int n = current_level;
double ***qgridn = qgrid[n];
double ***egridn = egrid[n];
double ***v0gridn = v0grid[n];
double ***v1gridn = v1grid[n];
double ***v2gridn = v2grid[n];
double ***v3gridn = v3grid[n];
double ***v4gridn = v4grid[n];
double ***v5gridn = v5grid[n];
int k = 0;
if (flag == FORWARD_RHO) {
double *qsrc = &qgridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
for (int i = 0; i < nlist; i++) {
buf[k++] = qsrc[list[i]];
}
} else if (flag == FORWARD_AD) {
double *src = &egridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
for (int i = 0; i < nlist; i++)
buf[i] = src[list[i]];
} else if (flag == FORWARD_AD_PERATOM) {
double *v0src = &v0gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
double *v1src = &v1gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
double *v2src = &v2gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
double *v3src = &v3gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
double *v4src = &v4gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
double *v5src = &v5gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
for (int i = 0; i < nlist; i++) {
buf[k++] = v0src[list[i]];
buf[k++] = v1src[list[i]];
buf[k++] = v2src[list[i]];
buf[k++] = v3src[list[i]];
buf[k++] = v4src[list[i]];
buf[k++] = v5src[list[i]];
}
}
}
/* ----------------------------------------------------------------------
unpack another proc's own values from buf and set own ghost values
------------------------------------------------------------------------- */
void MSM::unpack_forward(int flag, double *buf, int nlist, int *list)
{
int n = current_level;
double ***qgridn = qgrid[n];
double ***egridn = egrid[n];
double ***v0gridn = v0grid[n];
double ***v1gridn = v1grid[n];
double ***v2gridn = v2grid[n];
double ***v3gridn = v3grid[n];
double ***v4gridn = v4grid[n];
double ***v5gridn = v5grid[n];
int k = 0;
if (flag == FORWARD_RHO) {
double *dest = &qgridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
for (int i = 0; i < nlist; i++) {
dest[list[i]] = buf[k++];
}
} else if (flag == FORWARD_AD) {
double *dest = &egridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
for (int i = 0; i < nlist; i++)
dest[list[i]] = buf[k++];
} else if (flag == FORWARD_AD_PERATOM) {
double *v0src = &v0gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
double *v1src = &v1gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
double *v2src = &v2gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
double *v3src = &v3gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
double *v4src = &v4gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
double *v5src = &v5gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
for (int i = 0; i < nlist; i++) {
v0src[list[i]] = buf[k++];
v1src[list[i]] = buf[k++];
v2src[list[i]] = buf[k++];
v3src[list[i]] = buf[k++];
v4src[list[i]] = buf[k++];
v5src[list[i]] = buf[k++];
}
}
}
/* ----------------------------------------------------------------------
pack ghost values into buf to send to another proc
------------------------------------------------------------------------- */
void MSM::pack_reverse(int flag, double *buf, int nlist, int *list)
{
int n = current_level;
double ***qgridn = qgrid[n];
double ***egridn = egrid[n];
double ***v0gridn = v0grid[n];
double ***v1gridn = v1grid[n];
double ***v2gridn = v2grid[n];
double ***v3gridn = v3grid[n];
double ***v4gridn = v4grid[n];
double ***v5gridn = v5grid[n];
int k = 0;
if (flag == REVERSE_RHO) {
double *qsrc = &qgridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
for (int i = 0; i < nlist; i++) {
buf[k++] = qsrc[list[i]];
}
} else if (flag == REVERSE_AD) {
double *src = &egridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
for (int i = 0; i < nlist; i++)
buf[i] = src[list[i]];
} else if (flag == REVERSE_AD_PERATOM) {
double *v0src = &v0gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
double *v1src = &v1gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
double *v2src = &v2gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
double *v3src = &v3gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
double *v4src = &v4gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
double *v5src = &v5gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
for (int i = 0; i < nlist; i++) {
buf[k++] = v0src[list[i]];
buf[k++] = v1src[list[i]];
buf[k++] = v2src[list[i]];
buf[k++] = v3src[list[i]];
buf[k++] = v4src[list[i]];
buf[k++] = v5src[list[i]];
}
}
}
/* ----------------------------------------------------------------------
unpack another proc's ghost values from buf and add to own values
------------------------------------------------------------------------- */
void MSM::unpack_reverse(int flag, double *buf, int nlist, int *list)
{
int n = current_level;
double ***qgridn = qgrid[n];
double ***egridn = egrid[n];
double ***v0gridn = v0grid[n];
double ***v1gridn = v1grid[n];
double ***v2gridn = v2grid[n];
double ***v3gridn = v3grid[n];
double ***v4gridn = v4grid[n];
double ***v5gridn = v5grid[n];
int k = 0;
if (flag == REVERSE_RHO) {
double *dest = &qgridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
for (int i = 0; i < nlist; i++) {
dest[list[i]] += buf[k++];
}
} else if (flag == REVERSE_AD) {
double *dest = &egridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
for (int i = 0; i < nlist; i++)
dest[list[i]] += buf[k++];
} else if (flag == REVERSE_AD_PERATOM) {
double *v0src = &v0gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
double *v1src = &v1gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
double *v2src = &v2gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
double *v3src = &v3gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
double *v4src = &v4gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
double *v5src = &v5gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
for (int i = 0; i < nlist; i++) {
v0src[list[i]] += buf[k++];
v1src[list[i]] += buf[k++];
v2src[list[i]] += buf[k++];
v3src[list[i]] += buf[k++];
v4src[list[i]] += buf[k++];
v5src[list[i]] += buf[k++];
}
}
}
/* ----------------------------------------------------------------------
interpolate from grid to get force on my particles
------------------------------------------------------------------------- */
void MSM::fieldforce()
{
//fprintf(screen,"MSM interpolation\n\n");
double ***egridn = egrid[0];
int i,l,m,n,nx,ny,nz,mx,my,mz;
double dx,dy,dz;
double phi_x,phi_y,phi_z;
double dphi_x,dphi_y,dphi_z;
double ekx,eky,ekz,etmp;
// loop over my charges, interpolate electric field from nearby grid points
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
// (dx,dy,dz) = distance to "lower left" grid pt
// (mx,my,mz) = global coords of moving stencil pt
// ek = 3 components of E-field on particle
double *q = atom->q;
double **x = atom->x;
double **f = atom->f;
int nlocal = atom->nlocal;
for (i = 0; i < nlocal; i++) {
nx = part2grid[i][0];
ny = part2grid[i][1];
nz = part2grid[i][2];
dx = nx - (x[i][0]-boxlo[0])*delxinv[0];
dy = ny - (x[i][1]-boxlo[1])*delyinv[0];
dz = nz - (x[i][2]-boxlo[2])*delzinv[0];
compute_phis_and_dphis(dx,dy,dz);
ekx = eky = ekz = 0.0;
for (n = nlower; n <= nupper; n++) {
mz = n+nz;
phi_z = phi1d[2][n];
dphi_z = dphi1d[2][n];
for (m = nlower; m <= nupper; m++) {
my = m+ny;
phi_y = phi1d[1][m];
dphi_y = dphi1d[1][m];
for (l = nlower; l <= nupper; l++) {
mx = l+nx;
phi_x = phi1d[0][l];
dphi_x = dphi1d[0][l];
etmp = egridn[mz][my][mx];
ekx += dphi_x*phi_y*phi_z*etmp;
eky += phi_x*dphi_y*phi_z*etmp;
ekz += phi_x*phi_y*dphi_z*etmp;
}
}
}
ekx *= delxinv[0];
eky *= delyinv[0];
ekz *= delzinv[0];
// effectively divide by length for a triclinic system
if (triclinic) {
double tmp[3];
tmp[0] = ekx;
tmp[1] = eky;
tmp[2] = ekz;
x2lamdaT(&tmp[0],&tmp[0]);
ekx = tmp[0];
eky = tmp[1];
ekz = tmp[2];
}
// convert E-field to force
const double qfactor = qqrd2e*scale*q[i];
f[i][0] += qfactor*ekx;
f[i][1] += qfactor*eky;
f[i][2] += qfactor*ekz;
}
}
/* ----------------------------------------------------------------------
interpolate from grid to get per-atom energy/virial
------------------------------------------------------------------------- */
void MSM::fieldforce_peratom()
{
int i,l,m,n,nx,ny,nz,mx,my,mz;
double dx,dy,dz,x0,y0,z0;
double u,v0,v1,v2,v3,v4,v5;
double ***egridn = egrid[0];
double ***v0gridn = v0grid[0];
double ***v1gridn = v1grid[0];
double ***v2gridn = v2grid[0];
double ***v3gridn = v3grid[0];
double ***v4gridn = v4grid[0];
double ***v5gridn = v5grid[0];
// loop over my charges, interpolate from nearby grid points
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
// (dx,dy,dz) = distance to "lower left" grid pt
// (mx,my,mz) = global coords of moving stencil pt
double *q = atom->q;
double **x = atom->x;
int nlocal = atom->nlocal;
for (i = 0; i < nlocal; i++) {
nx = part2grid[i][0];
ny = part2grid[i][1];
nz = part2grid[i][2];
dx = nx - (x[i][0]-boxlo[0])*delxinv[0];
dy = ny - (x[i][1]-boxlo[1])*delyinv[0];
dz = nz - (x[i][2]-boxlo[2])*delzinv[0];
compute_phis_and_dphis(dx,dy,dz);
u = v0 = v1 = v2 = v3 = v4 = v5 = 0.0;
for (n = nlower; n <= nupper; n++) {
mz = n+nz;
z0 = phi1d[2][n];
for (m = nlower; m <= nupper; m++) {
my = m+ny;
y0 = z0*phi1d[1][m];
for (l = nlower; l <= nupper; l++) {
mx = l+nx;
x0 = y0*phi1d[0][l];
if (eflag_atom) u += x0*egridn[mz][my][mx];
if (vflag_atom) {
v0 += x0*v0gridn[mz][my][mx];
v1 += x0*v1gridn[mz][my][mx];
v2 += x0*v2gridn[mz][my][mx];
v3 += x0*v3gridn[mz][my][mx];
v4 += x0*v4gridn[mz][my][mx];
v5 += x0*v5gridn[mz][my][mx];
}
}
}
}
if (eflag_atom) eatom[i] += q[i]*u;
if (vflag_atom) {
vatom[i][0] += q[i]*v0;
vatom[i][1] += q[i]*v1;
vatom[i][2] += q[i]*v2;
vatom[i][3] += q[i]*v3;
vatom[i][4] += q[i]*v4;
vatom[i][5] += q[i]*v5;
}
}
}
/* ----------------------------------------------------------------------
charge assignment into phi1d (interpolation coefficients)
------------------------------------------------------------------------- */
void MSM::compute_phis(const double &dx, const double &dy,
const double &dz)
{
double delx,dely,delz;
for (int nu = nlower; nu <= nupper; nu++) {
delx = dx + double(nu);
dely = dy + double(nu);
delz = dz + double(nu);
phi1d[0][nu] = compute_phi(delx);
phi1d[1][nu] = compute_phi(dely);
phi1d[2][nu] = compute_phi(delz);
}
}
/* ----------------------------------------------------------------------
charge assignment into phi1d and dphi1d (interpolation coefficients)
------------------------------------------------------------------------- */
void MSM::compute_phis_and_dphis(const double &dx, const double &dy,
const double &dz)
{
double delx,dely,delz;
for (int nu = nlower; nu <= nupper; nu++) {
delx = dx + double(nu);
dely = dy + double(nu);
delz = dz + double(nu);
phi1d[0][nu] = compute_phi(delx);
phi1d[1][nu] = compute_phi(dely);
phi1d[2][nu] = compute_phi(delz);
dphi1d[0][nu] = compute_dphi(delx);
dphi1d[1][nu] = compute_dphi(dely);
dphi1d[2][nu] = compute_dphi(delz);
}
}
/* ----------------------------------------------------------------------
compute phi using interpolating polynomial
see Eq 7 from Parallel Computing 35 (2009) 164–177
and Hardy's thesis
------------------------------------------------------------------------- */
inline double MSM::compute_phi(const double &xi)
{
double phi = 0.0;
double abs_xi = fabs(xi);
double xi2 = xi*xi;
if (order == 4) {
if (abs_xi <= 1) {
phi = (1.0 - abs_xi)*(1.0 + abs_xi - 1.5*xi2);
} else if (abs_xi <= 2) {
phi = -0.5*(abs_xi - 1.0)*(2.0 - abs_xi)*(2.0 - abs_xi);
} else {
phi = 0.0;
}
} else if (order == 6) {
if (abs_xi <= 1) {
phi = (1.0 - xi2)*(2.0 - abs_xi)*(6.0 + 3.0*abs_xi -
5.0*xi2)/12.0;
} else if (abs_xi <= 2) {
phi = -(abs_xi - 1.0)*(2.0 - abs_xi)*(3.0 - abs_xi)*
(4.0 + 9.0*abs_xi - 5.0*xi2)/24.0;
} else if (abs_xi <= 3) {
phi = (abs_xi - 1.0)*(abs_xi - 2.0)*(3.0 - abs_xi)*
(3.0 - abs_xi)*(4.0 - abs_xi)/24.0;
} else {
phi = 0.0;
}
} else if (order == 8) {
if (abs_xi <= 1) {
phi = (1.0 - xi2)*(4.0 - xi2)*(3.0 - abs_xi)*
(12.0 + 4.0*abs_xi - 7.0*xi2)/144.0;
} else if (abs_xi <= 2) {
phi = -(xi2 - 1.0)*(2.0 - abs_xi)*(3.0 - abs_xi)*
(4.0 - abs_xi)*(10.0 + 12.0*abs_xi - 7.0*xi2)/240.0;
} else if (abs_xi <= 3) {
phi = (abs_xi - 1.0)*(abs_xi - 2.0)*(3.0 - abs_xi)*(4.0 - abs_xi)*
(5.0 - abs_xi)*(6.0 + 20.0*abs_xi - 7.0*xi2)/720.0;
} else if (abs_xi <= 4) {
phi = -(abs_xi - 1.0)*(abs_xi - 2.0)*(abs_xi - 3.0)*(4.0 - abs_xi)*
(4.0 - abs_xi)*(5.0 - abs_xi)*(6.0 - abs_xi)/720.0;
} else {
phi = 0.0;
}
} else if (order == 10) {
if (abs_xi <= 1) {
phi = (1.0 - xi2)*(4.0 - xi2)*(9.0 - xi2)*
(4.0 - abs_xi)*(20.0 + 5.0*abs_xi - 9.0*xi2)/2880.0;
} else if (abs_xi <= 2) {
phi = -(xi2 - 1.0)*(4.0 - xi2)*(3.0 - abs_xi)*(4.0 - abs_xi)*
(5.0 - abs_xi)*(6.0 + 5.0*abs_xi - 3.0*xi2)/1440.0;
} else if (abs_xi <= 3) {
phi = (xi2 - 1.0)*(abs_xi - 2.0)*(3.0 - abs_xi)*(4.0 - abs_xi)*
(5.0 - abs_xi)*(6.0 - abs_xi)*(14.0 + 25.0*abs_xi - 9.0*xi2)/10080.0;
} else if (abs_xi <= 4) {
phi = -(abs_xi - 1.0)*(abs_xi - 2.0)*(abs_xi - 3.0)*(4.0 - abs_xi)*
(5.0 - abs_xi)*(6.0 - abs_xi)*(7.0 - abs_xi)*
(8.0 + 35.0*abs_xi - 9.0*xi2)/40320.0;
} else if (abs_xi <= 5) {
phi = (abs_xi - 1.0)*(abs_xi - 2.0)*(abs_xi - 3.0)*
(abs_xi - 4.0)*(5.0 - abs_xi)*(5.0 - abs_xi)*(6.0 - abs_xi)*
(7.0 - abs_xi)*(8.0 - abs_xi)/40320.0;
} else {
phi = 0.0;
}
}
return phi;
}
/* ----------------------------------------------------------------------
compute the derivative of phi
phi is an interpolating polynomial
see Eq 7 from Parallel Computing 35 (2009) 164–177
and Hardy's thesis
------------------------------------------------------------------------- */
inline double MSM::compute_dphi(const double &xi)
{
double dphi = 0.0;
double abs_xi = fabs(xi);
if (order == 4) {
double xi2 = xi*xi;
double abs_xi2 = abs_xi*abs_xi;
if (abs_xi == 0.0) {
dphi = 0.0;
} else if (abs_xi <= 1) {
dphi = xi*(3*xi2 + 6*abs_xi2 - 10*abs_xi)/2.0/abs_xi;
} else if (abs_xi <= 2) {
dphi = xi*(2 - abs_xi)*(3*abs_xi - 4)/2.0/abs_xi;
} else {
dphi = 0.0;
}
} else if (order == 6) {
double xi2 = xi*xi;
double xi4 = xi2*xi2;
double abs_xi2 = abs_xi*abs_xi;
double abs_xi3 = abs_xi2*abs_xi;
double abs_xi4 = abs_xi2*abs_xi2;
if (abs_xi == 0.0) {
dphi = 0.0;
} else if (abs_xi <= 1) {
dphi = xi*(46*xi2*abs_xi - 20*xi2*abs_xi2 - 5*xi4 + 5*xi2 +
6*abs_xi3 + 10*abs_xi2 - 50*abs_xi)/12.0/abs_xi;
} else if (abs_xi <= 2) {
dphi = xi*(15*xi2*abs_xi2 - 60*xi2*abs_xi + 55*xi2 +
10*abs_xi4 - 96*abs_xi3 + 260*abs_xi2 - 210*abs_xi + 10)/
24.0/abs_xi;
} else if (abs_xi <= 3) {
dphi = -xi*(abs_xi - 3)*(5*abs_xi3 - 37*abs_xi2 +
84*abs_xi - 58)/24.0/abs_xi;
} else {
dphi = 0.0;
}
} else if (order == 8) {
double xi2 = xi*xi;
double xi4 = xi2*xi2;
double xi6 = xi4*xi2;
double abs_xi3 = xi2*abs_xi;
double abs_xi5 = xi4*abs_xi;
if (abs_xi == 0.0) {
dphi = 0.0;
} else if (abs_xi <= 1) {
dphi = xi*(49*xi6 - 175*xi4 + 84*xi2 - 150*abs_xi5 +
644*abs_xi3 - 560*abs_xi)/144.0/abs_xi;
} else if (abs_xi <= 2) {
dphi = xi*(-49*xi6 - 1365*xi4 + 756*xi2 +
450*abs_xi5 + 1260*abs_xi3 - 1260*abs_xi + 28)/240.0/abs_xi;
} else if (abs_xi <= 3) {
dphi = xi*(49*xi6 + 4445*xi4 + 17724*xi2 -
750*abs_xi5 - 12740*abs_xi3 - 9940*abs_xi + 756)/720.0/abs_xi;
} else if (abs_xi <= 4) {
dphi = -xi*(abs_xi - 4)*(7*abs_xi5 - 122*xi4 +
807*abs_xi3 - 2512*xi2 + 3644*abs_xi - 1944)/720.0/abs_xi;
} else {
dphi = 0.0;
}
} else if (order == 10) {
double xi2 = xi*xi;
double xi4 = xi2*xi2;
double xi6 = xi4*xi2;
double xi8 = xi6*xi2;
double abs_xi2 = abs_xi*abs_xi;
double abs_xi3 = abs_xi2*abs_xi;
double abs_xi4 = abs_xi2*abs_xi2;
double abs_xi5 = abs_xi4*abs_xi;
double abs_xi6 = abs_xi5*abs_xi;
double abs_xi7 = abs_xi6*abs_xi;
double abs_xi8 = abs_xi7*abs_xi;
if (abs_xi == 0.0) {
dphi = 0.0;
} else if (abs_xi <= 1) {
dphi = xi*(298*xi6*abs_xi - 72*xi6*abs_xi2 - 9*xi8 +
126*xi6 + 30*xi4*abs_xi3 + 756*xi4*abs_xi2 - 3644*xi4*abs_xi -
441*xi4 - 280*xi2*abs_xi3 - 1764*xi2*abs_xi2 + 12026*xi2*abs_xi +
324*xi2 + 490*abs_xi3 + 648*abs_xi2 - 10792*abs_xi)/2880.0/abs_xi;
} else if (abs_xi <= 2) {
dphi = xi*(9*xi6*abs_xi2 - 72*xi6*abs_xi + 141*xi6 +
18*xi4*abs_xi4 - 236*xi4*abs_xi3 + 963*xi4*abs_xi2 -
1046*xi4*abs_xi - 687*xi4 - 20*xi2*abs_xi5 + 156*xi2*abs_xi4 +
168*xi2*abs_xi3 - 3522*xi2*abs_xi2 + 6382*xi2*abs_xi + 474*xi2 +
50*abs_xi5 - 516*abs_xi4 + 1262*abs_xi3 + 1596*abs_xi2 -
6344*abs_xi + 72)/1440.0/abs_xi;
} else if (abs_xi <= 3) {
dphi = xi*(720*xi4*abs_xi3 - 45*xi4*abs_xi4 - 4185*xi4*abs_xi2 +
10440*xi4*abs_xi - 9396*xi4 - 36*xi2*abs_xi6 + 870*xi2*abs_xi5 -
7965*xi2*abs_xi4 + 34540*xi2*abs_xi3 - 70389*xi2*abs_xi2 +
51440*xi2*abs_xi + 6012*xi2 + 50*abs_xi7 - 954*abs_xi6 +
6680*abs_xi5 - 19440*abs_xi4 + 11140*abs_xi3 + 49014*abs_xi2 -
69080*abs_xi + 3384)/10080.0/abs_xi;
} else if (abs_xi <= 4) {
dphi = xi*(63*xi2*abs_xi6 - 1512*xi2*abs_xi5 + 14490*xi2*abs_xi4 -
70560*xi2*abs_xi3 + 182763*xi2*abs_xi2 - 236376*xi2*abs_xi +
117612*xi2 + 18*abs_xi8 - 784*abs_xi7 + 12600*abs_xi6 -
101556*abs_xi5 + 451962*abs_xi4 - 1121316*abs_xi3 +
1451628*abs_xi2 - 795368*abs_xi + 71856)/40320.0/abs_xi;
} else if (abs_xi <= 5) {
dphi = -xi*(abs_xi - 5)*(9*abs_xi7 - 283*abs_xi6 +
3667*abs_xi5 - 25261*abs_xi4 + 99340*abs_xi3 -
221416*abs_xi2 + 256552*abs_xi - 117648)/40320.0/abs_xi;
} else {
dphi = 0.0;
}
}
return dphi;
}
/* ----------------------------------------------------------------------
Compute direct interaction (energy) weights for intermediate grid levels
------------------------------------------------------------------------- */
void MSM::get_g_direct()
{
if (g_direct) memory->destroy(g_direct);
memory->create(g_direct,levels,nmax_direct,"msm:g_direct");
double a = cutoff;
int n,zk,zyk,k,ix,iy,iz;
double xdiff,ydiff,zdiff;
double dx,dy,dz;
double tmp[3];
double rsq,rho,two_n;
two_n = 1.0;
int nx = nxhi_direct - nxlo_direct + 1;
int ny = nyhi_direct - nylo_direct + 1;
for (n=0; n<levels; n++) {
for (iz = nzlo_direct; iz <= nzhi_direct; iz++) {
zdiff = iz/delzinv[n];
zk = (iz + nzhi_direct)*ny;
for (iy = nylo_direct; iy <= nyhi_direct; iy++) {
ydiff = iy/delyinv[n];
zyk = (zk + iy + nyhi_direct)*nx;
for (ix = nxlo_direct; ix <= nxhi_direct; ix++) {
xdiff = ix/delxinv[n];
// transform grid point pair-wise distance from lamda (0-1) coords to box coords
if (triclinic) {
tmp[0] = xdiff;
tmp[1] = ydiff;
tmp[2] = zdiff;
lamda2xvector(&tmp[0],&tmp[0]);
dx = tmp[0];
dy = tmp[1];
dz = tmp[2];
} else {
dx = xdiff;
dy = ydiff;
dz = zdiff;
}
rsq = dx*dx + dy*dy + dz*dz;
rho = sqrt(rsq)/(two_n*a);
k = zyk + ix + nxhi_direct;
g_direct[n][k] = gamma(rho)/(two_n*a) - gamma(rho/2.0)/(2.0*two_n*a);
}
}
}
two_n *= 2.0;
}
}
/* ----------------------------------------------------------------------
Compute direct interaction (virial) weights for intermediate grid levels
------------------------------------------------------------------------- */
void MSM::get_virial_direct()
{
if (v0_direct) memory->destroy(v0_direct);
memory->create(v0_direct,levels,nmax_direct,"msm:v0_direct");
if (v1_direct) memory->destroy(v1_direct);
memory->create(v1_direct,levels,nmax_direct,"msm:v1_direct");
if (v2_direct) memory->destroy(v2_direct);
memory->create(v2_direct,levels,nmax_direct,"msm:v2_direct");
if (v3_direct) memory->destroy(v3_direct);
memory->create(v3_direct,levels,nmax_direct,"msm:v3_direct");
if (v4_direct) memory->destroy(v4_direct);
memory->create(v4_direct,levels,nmax_direct,"msm:v4_direct");
if (v5_direct) memory->destroy(v5_direct);
memory->create(v5_direct,levels,nmax_direct,"msm:v5_direct");
double a = cutoff;
double a_sq = cutoff*cutoff;
int n,zk,zyk,k,ix,iy,iz;
double xdiff,ydiff,zdiff;
double dx,dy,dz;
double tmp[3];
double rsq,r,rho,two_n,two_nsq,dg;
two_n = 1.0;
int nx = nxhi_direct - nxlo_direct + 1;
int ny = nyhi_direct - nylo_direct + 1;
for (n=0; n<levels; n++) {
two_nsq = two_n * two_n;
for (iz = nzlo_direct; iz <= nzhi_direct; iz++) {
zdiff = iz/delzinv[n];
zk = (iz + nzhi_direct)*ny;
for (iy = nylo_direct; iy <= nyhi_direct; iy++) {
ydiff = iy/delyinv[n];
zyk = (zk + iy + nyhi_direct)*nx;
for (ix = nxlo_direct; ix <= nxhi_direct; ix++) {
xdiff = ix/delxinv[n];
if (triclinic) {
tmp[0] = xdiff;
tmp[1] = ydiff;
tmp[2] = zdiff;
lamda2xvector(&tmp[0],&tmp[0]);
dx = tmp[0];
dy = tmp[1];
dz = tmp[2];
} else {
dx = xdiff;
dy = ydiff;
dz = zdiff;
}
rsq = dx*dx + dy*dy + dz*dz;
k = zyk + ix + nxhi_direct;
r = sqrt(rsq);
if (r == 0) {
v0_direct[n][k] = 0.0;
v1_direct[n][k] = 0.0;
v2_direct[n][k] = 0.0;
v3_direct[n][k] = 0.0;
v4_direct[n][k] = 0.0;
v5_direct[n][k] = 0.0;
} else {
rho = r/(two_n*a);
dg = -(dgamma(rho)/(two_nsq*a_sq) -
dgamma(rho/2.0)/(4.0*two_nsq*a_sq))/r;
v0_direct[n][k] = dg * dx * dx;
v1_direct[n][k] = dg * dy * dy;
v2_direct[n][k] = dg * dz * dz;
v3_direct[n][k] = dg * dx * dy;
v4_direct[n][k] = dg * dx * dz;
v5_direct[n][k] = dg * dy * dz;
}
}
}
}
two_n *= 2.0;
}
}
/* ----------------------------------------------------------------------
Compute direct interaction (energy) weights for top grid level
(nonperiodic systems only)
------------------------------------------------------------------------- */
void MSM::get_g_direct_top(int n)
{
int nx_top = betax[n] - alpha[n];
int ny_top = betay[n] - alpha[n];
int nz_top = betaz[n] - alpha[n];
int nx = 2*nx_top + 1;
int ny = 2*ny_top + 1;
int nz = 2*nz_top + 1;
int nmax_top = 8*(nx+1)*(ny*1)*(nz+1);
if (g_direct_top) memory->destroy(g_direct_top);
memory->create(g_direct_top,nmax_top,"msm:g_direct_top");
double a = cutoff;
int zk,zyk,k,ix,iy,iz;
double xdiff,ydiff,zdiff;
double dx,dy,dz;
double tmp[3];
double rsq,rho,two_n;
two_n = pow(2.0,n);
for (iz = -nz_top; iz <= nz_top; iz++) {
zdiff = iz/delzinv[n];
zk = (iz + nz_top)*ny;
for (iy = -ny_top; iy <= ny_top; iy++) {
ydiff = iy/delyinv[n];
zyk = (zk + iy + ny_top)*nx;
for (ix = -nx_top; ix <= nx_top; ix++) {
xdiff = ix/delxinv[n];
if (triclinic) {
tmp[0] = xdiff;
tmp[1] = ydiff;
tmp[2] = zdiff;
lamda2xvector(&tmp[0],&tmp[0]);
dx = tmp[0];
dy = tmp[1];
dz = tmp[2];
} else {
dx = xdiff;
dy = ydiff;
dz = zdiff;
}
rsq = dx*dx + dy*dy + dz*dz;
rho = sqrt(rsq)/(two_n*a);
k = zyk + ix + nx_top;
g_direct_top[k] = gamma(rho)/(two_n*a);
}
}
}
}
/* ----------------------------------------------------------------------
Compute direct interaction (virial) weights for top grid level
(nonperiodic systems only)
------------------------------------------------------------------------- */
void MSM::get_virial_direct_top(int n)
{
int nx_top = betax[n] - alpha[n];
int ny_top = betay[n] - alpha[n];
int nz_top = betaz[n] - alpha[n];
int nx = 2*nx_top + 1;
int ny = 2*ny_top + 1;
int nz = 2*nz_top + 1;
int nmax_top = 8*(nx+1)*(ny*1)*(nz+1);
if (v0_direct_top) memory->destroy(v0_direct_top);
memory->create(v0_direct_top,nmax_top,"msm:v0_direct_top");
if (v1_direct_top) memory->destroy(v1_direct_top);
memory->create(v1_direct_top,nmax_top,"msm:v1_direct_top");
if (v2_direct_top) memory->destroy(v2_direct_top);
memory->create(v2_direct_top,nmax_top,"msm:v2_direct_top");
if (v3_direct_top) memory->destroy(v3_direct_top);
memory->create(v3_direct_top,nmax_top,"msm:v3_direct_top");
if (v4_direct_top) memory->destroy(v4_direct_top);
memory->create(v4_direct_top,nmax_top,"msm:v4_direct_top");
if (v5_direct_top) memory->destroy(v5_direct_top);
memory->create(v5_direct_top,nmax_top,"msm:v5_direct_top");
double a = cutoff;
double a_sq = cutoff*cutoff;
int zk,zyk,k,ix,iy,iz;
double xdiff,ydiff,zdiff;
double dx,dy,dz;
double tmp[3];
double rsq,r,rho,two_n,two_nsq,dg;
two_n = pow(2.0,n);
two_nsq = two_n * two_n;
for (iz = -nz_top; iz <= nz_top; iz++) {
zdiff = iz/delzinv[n];
zk = (iz + nz_top)*ny;
for (iy = -ny_top; iy <= ny_top; iy++) {
ydiff = iy/delyinv[n];
zyk = (zk + iy + ny_top)*nx;
for (ix = -nx_top; ix <= nx_top; ix++) {
xdiff = ix/delxinv[n];
if (triclinic) {
tmp[0] = xdiff;
tmp[1] = ydiff;
tmp[2] = zdiff;
lamda2xvector(&tmp[0],&tmp[0]);
dx = tmp[0];
dy = tmp[1];
dz = tmp[2];
} else {
dx = xdiff;
dy = ydiff;
dz = zdiff;
}
rsq = dx*dx + dy*dy + dz*dz;
k = zyk + ix + nx_top;
r = sqrt(rsq);
if (r == 0) {
v0_direct_top[k] = 0.0;
v1_direct_top[k] = 0.0;
v2_direct_top[k] = 0.0;
v3_direct_top[k] = 0.0;
v4_direct_top[k] = 0.0;
v5_direct_top[k] = 0.0;
} else {
rho = r/(two_n*a);
dg = -(dgamma(rho)/(two_nsq*a_sq))/r;
v0_direct_top[k] = dg * dx * dx;
v1_direct_top[k] = dg * dy * dy;
v2_direct_top[k] = dg * dz * dz;
v3_direct_top[k] = dg * dx * dy;
v4_direct_top[k] = dg * dx * dz;
v5_direct_top[k] = dg * dy * dz;
}
}
}
}
}
diff --git a/src/KSPACE/msm_cg.cpp b/src/KSPACE/msm_cg.cpp
index e7000c26d..c7af52ef8 100644
--- a/src/KSPACE/msm_cg.cpp
+++ b/src/KSPACE/msm_cg.cpp
@@ -1,549 +1,549 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing authors: Paul Crozier, Stan Moore, Stephen Bond, (all SNL)
------------------------------------------------------------------------- */
#include <mpi.h>
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "atom.h"
#include "gridcomm.h"
#include "domain.h"
#include "error.h"
#include "force.h"
#include "neighbor.h"
#include "memory.h"
#include "msm_cg.h"
#include "math_const.h"
using namespace LAMMPS_NS;
using namespace MathConst;
#define OFFSET 16384
#define SMALLQ 0.00001
enum{REVERSE_RHO,REVERSE_AD,REVERSE_AD_PERATOM};
enum{FORWARD_RHO,FORWARD_AD,FORWARD_AD_PERATOM};
/* ---------------------------------------------------------------------- */
MSMCG::MSMCG(LAMMPS *lmp, int narg, char **arg) : MSM(lmp, narg, arg)
{
if ((narg < 1) || (narg > 2))
error->all(FLERR,"Illegal kspace_style msm/cg command");
triclinic_support = 0;
if (narg == 2) smallq = fabs(force->numeric(FLERR,arg[1]));
else smallq = SMALLQ;
num_charged = -1;
is_charged = NULL;
}
/* ----------------------------------------------------------------------
free all memory
------------------------------------------------------------------------- */
MSMCG::~MSMCG()
{
memory->destroy(is_charged);
}
/* ----------------------------------------------------------------------
compute the MSM long-range force, energy, virial
------------------------------------------------------------------------- */
void MSMCG::compute(int eflag, int vflag)
{
if (scalar_pressure_flag)
error->all(FLERR,"Must use 'kspace_modify pressure/scalar no' with "
"kspace_style msm/cg");
const double * const q = atom->q;
const int nlocal = atom->nlocal;
int i,j,n;
// set energy/virial flags
if (eflag || vflag) ev_setup(eflag,vflag);
else evflag = evflag_atom = eflag_global = vflag_global =
eflag_atom = vflag_atom = eflag_either = vflag_either = 0;
// invoke allocate_peratom() if needed for first time
if (vflag_atom && !peratom_allocate_flag) {
allocate_peratom();
cg_peratom_all->ghost_notify();
cg_peratom_all->setup();
for (int n=0; n<levels; n++) {
if (!active_flag[n]) continue;
cg_peratom[n]->ghost_notify();
cg_peratom[n]->setup();
}
peratom_allocate_flag = 1;
}
// extend size of per-atom arrays if necessary
if (nlocal > nmax) {
memory->destroy(part2grid);
memory->destroy(is_charged);
nmax = atom->nmax;
memory->create(part2grid,nmax,3,"msm:part2grid");
memory->create(is_charged,nmax,"msm/cg:is_charged");
}
// one time setup message
if (num_charged < 0) {
bigint charged_all, charged_num;
double charged_frac, charged_fmax, charged_fmin;
num_charged=0;
for (i=0; i < nlocal; ++i)
if (fabs(q[i]) > smallq)
++num_charged;
// get fraction of charged particles per domain
if (nlocal > 0)
charged_frac = static_cast<double>(num_charged) * 100.0
/ static_cast<double>(nlocal);
else
charged_frac = 0.0;
MPI_Reduce(&charged_frac,&charged_fmax,1,MPI_DOUBLE,MPI_MAX,0,world);
MPI_Reduce(&charged_frac,&charged_fmin,1,MPI_DOUBLE,MPI_MIN,0,world);
// get fraction of charged particles overall
charged_num = num_charged;
MPI_Reduce(&charged_num,&charged_all,1,MPI_LMP_BIGINT,MPI_SUM,0,world);
charged_frac = static_cast<double>(charged_all) * 100.0
/ static_cast<double>(atom->natoms);
if (me == 0) {
if (screen)
fprintf(screen,
" MSM/cg optimization cutoff: %g\n"
" Total charged atoms: %.1f%%\n"
" Min/max charged atoms/proc: %.1f%% %.1f%%\n",
smallq,charged_frac,charged_fmin,charged_fmax);
if (logfile)
fprintf(logfile,
" MSM/cg optimization cutoff: %g\n"
" Total charged atoms: %.1f%%\n"
" Min/max charged atoms/proc: %.1f%% %.1f%%\n",
smallq,charged_frac,charged_fmin,charged_fmax);
}
}
// only need to rebuild this list after a neighbor list update
if (neighbor->ago == 0) {
num_charged = 0;
for (i = 0; i < nlocal; ++i) {
if (fabs(q[i]) > smallq) {
is_charged[num_charged] = i;
++num_charged;
}
}
}
// find grid points for all my particles
// map my particle charge onto my local 3d density grid (aninterpolation)
particle_map();
make_rho();
// all procs reverse communicate charge density values from their ghost grid points
// to fully sum contribution in their 3d grid
current_level = 0;
cg_all->reverse_comm(this,REVERSE_RHO);
// forward communicate charge density values to fill ghost grid points
// compute direct sum interaction and then restrict to coarser grid
for (int n=0; n<=levels-2; n++) {
if (!active_flag[n]) continue;
current_level = n;
cg[n]->forward_comm(this,FORWARD_RHO);
direct(n);
restriction(n);
}
// compute direct interation for top grid level for nonperiodic
// and for second from top grid level for periodic
if (active_flag[levels-1]) {
if (domain->nonperiodic) {
current_level = levels-1;
cg[levels-1]->forward_comm(this,FORWARD_RHO);
direct_top(levels-1);
cg[levels-1]->reverse_comm(this,REVERSE_AD);
if (vflag_atom)
cg_peratom[levels-1]->reverse_comm(this,REVERSE_AD_PERATOM);
} else {
// Here using MPI_Allreduce is cheaper than using commgrid
grid_swap_forward(levels-1,qgrid[levels-1]);
direct(levels-1);
grid_swap_reverse(levels-1,egrid[levels-1]);
current_level = levels-1;
if (vflag_atom)
cg_peratom[levels-1]->reverse_comm(this,REVERSE_AD_PERATOM);
}
}
// prolongate energy/virial from coarser grid to finer grid
// reverse communicate from ghost grid points to get full sum
for (int n=levels-2; n>=0; n--) {
if (!active_flag[n]) continue;
prolongation(n);
current_level = n;
cg[n]->reverse_comm(this,REVERSE_AD);
// extra per-atom virial communication
if (vflag_atom)
cg_peratom[n]->reverse_comm(this,REVERSE_AD_PERATOM);
}
// all procs communicate E-field values
// to fill ghost cells surrounding their 3d bricks
current_level = 0;
cg_all->forward_comm(this,FORWARD_AD);
// extra per-atom energy/virial communication
if (vflag_atom)
cg_peratom_all->forward_comm(this,FORWARD_AD_PERATOM);
// calculate the force on my particles (interpolation)
fieldforce();
// calculate the per-atom energy/virial for my particles
if (evflag_atom) fieldforce_peratom();
// update qsum and qsqsum, if atom count has changed and energy needed
if ((eflag_global || eflag_atom) && atom->natoms != natoms_original) {
qsum_qsq();
natoms_original = atom->natoms;
}
// sum global energy across procs and add in self-energy term
const double qscale = force->qqrd2e * scale;
if (eflag_global) {
double energy_all;
MPI_Allreduce(&energy,&energy_all,1,MPI_DOUBLE,MPI_SUM,world);
energy = energy_all;
double e_self = qsqsum*gamma(0.0)/cutoff;
energy -= e_self;
energy *= 0.5*qscale;
}
// total long-range virial
if (vflag_global) {
double virial_all[6];
MPI_Allreduce(virial,virial_all,6,MPI_DOUBLE,MPI_SUM,world);
for (i = 0; i < 6; i++) virial[i] = 0.5*qscale*virial_all[i];
}
// per-atom energy/virial
// energy includes self-energy correction
if (evflag_atom) {
const double qs = 0.5*qscale;
if (eflag_atom) {
const double sf = gamma(0.0)/cutoff;
for (j = 0; j < num_charged; j++) {
i = is_charged[j];
eatom[i] -= q[i]*q[i]*sf;
eatom[i] *= qs;
}
}
if (vflag_atom) {
for (n = 0; n < num_charged; n++) {
i = is_charged[n];
for (j = 0; j < 6; j++)
vatom[i][j] *= qs;
}
}
}
}
/* ----------------------------------------------------------------------
find center grid pt for each of my particles
check that full stencil for the particle will fit in my 3d brick
store central grid pt indices in part2grid array
------------------------------------------------------------------------- */
void MSMCG::particle_map()
{
const double * const * const x = atom->x;
int flag = 0;
int i;
- if (!isfinite(boxlo[0]) || !isfinite(boxlo[1]) || !isfinite(boxlo[2]))
+ if (!ISFINITE(boxlo[0]) || !ISFINITE(boxlo[1]) || !ISFINITE(boxlo[2]))
error->one(FLERR,"Non-numeric box dimensions - simulation unstable");
for (int j = 0; j < num_charged; j++) {
i = is_charged[j];
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
// current particle coord can be outside global and local box
// add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1
const int nx=static_cast<int>((x[i][0]-boxlo[0])*delxinv[0]+OFFSET)-OFFSET;
const int ny=static_cast<int>((x[i][1]-boxlo[1])*delyinv[0]+OFFSET)-OFFSET;
const int nz=static_cast<int>((x[i][2]-boxlo[2])*delzinv[0]+OFFSET)-OFFSET;
part2grid[i][0] = nx;
part2grid[i][1] = ny;
part2grid[i][2] = nz;
// check that entire stencil around nx,ny,nz will fit in my 3d brick
if (nx+nlower < nxlo_out[0] || nx+nupper > nxhi_out[0] ||
ny+nlower < nylo_out[0] || ny+nupper > nyhi_out[0] ||
nz+nlower < nzlo_out[0] || nz+nupper > nzhi_out[0])
flag = 1;
}
if (flag) error->one(FLERR,"Out of range atoms - cannot compute MSM");
}
/* ----------------------------------------------------------------------
create discretized "density" on section of global grid due to my particles
density(x,y,z) = charge "density" at grid points of my 3d brick
(nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts)
in global grid
------------------------------------------------------------------------- */
void MSMCG::make_rho()
{
const double * const q = atom->q;
const double * const * const x = atom->x;
// clear 3d density array
double * const * const * const qgridn = qgrid[0];
memset(&(qgridn[nzlo_out[0]][nylo_out[0]][nxlo_out[0]]),0,ngrid[0]*sizeof(double));
double dx,dy,dz,x0,y0,z0;
int i,j,l,m,n,nx,ny,nz,mx,my,mz;
// loop over my charges, add their contribution to nearby grid points
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
// (dx,dy,dz) = distance to "lower left" grid pt
// (mx,my,mz) = global coords of moving stencil pt
for (j = 0; j < num_charged; j++) {
i = is_charged[j];
nx = part2grid[i][0];
ny = part2grid[i][1];
nz = part2grid[i][2];
dx = nx - (x[i][0]-boxlo[0])*delxinv[0];
dy = ny - (x[i][1]-boxlo[1])*delyinv[0];
dz = nz - (x[i][2]-boxlo[2])*delzinv[0];
compute_phis(dx,dy,dz);
z0 = q[i];
for (n = nlower; n <= nupper; n++) {
mz = n+nz;
y0 = z0*phi1d[2][n];
for (m = nlower; m <= nupper; m++) {
my = m+ny;
x0 = y0*phi1d[1][m];
for (l = nlower; l <= nupper; l++) {
mx = l+nx;
qgridn[mz][my][mx] += x0*phi1d[0][l];
}
}
}
}
}
/* ----------------------------------------------------------------------
interpolate from grid to get force on my particles
------------------------------------------------------------------------- */
void MSMCG::fieldforce()
{
const double * const * const * const egridn = egrid[0];
const double * const * const x = atom->x;
double * const * const f = atom->f;
const double * const q = atom->q;
int i,j,l,m,n,nx,ny,nz,mx,my,mz;
double dx,dy,dz;
double phi_x,phi_y,phi_z;
double dphi_x,dphi_y,dphi_z;
double ekx,eky,ekz;
// loop over my charges, interpolate electric field from nearby grid points
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
// (dx,dy,dz) = distance to "lower left" grid pt
// (mx,my,mz) = global coords of moving stencil pt
// ek = 3 components of E-field on particle
for (j = 0; j < num_charged; j++) {
i = is_charged[j];
nx = part2grid[i][0];
ny = part2grid[i][1];
nz = part2grid[i][2];
dx = nx - (x[i][0]-boxlo[0])*delxinv[0];
dy = ny - (x[i][1]-boxlo[1])*delyinv[0];
dz = nz - (x[i][2]-boxlo[2])*delzinv[0];
compute_phis_and_dphis(dx,dy,dz);
ekx = eky = ekz = 0.0;
for (n = nlower; n <= nupper; n++) {
mz = n+nz;
phi_z = phi1d[2][n];
dphi_z = dphi1d[2][n];
for (m = nlower; m <= nupper; m++) {
my = m+ny;
phi_y = phi1d[1][m];
dphi_y = dphi1d[1][m];
for (l = nlower; l <= nupper; l++) {
mx = l+nx;
phi_x = phi1d[0][l];
dphi_x = dphi1d[0][l];
ekx += dphi_x*phi_y*phi_z*egridn[mz][my][mx];
eky += phi_x*dphi_y*phi_z*egridn[mz][my][mx];
ekz += phi_x*phi_y*dphi_z*egridn[mz][my][mx];
}
}
}
ekx *= delxinv[0];
eky *= delyinv[0];
ekz *= delzinv[0];
// convert E-field to force
const double qfactor = force->qqrd2e*scale*q[i];
f[i][0] += qfactor*ekx;
f[i][1] += qfactor*eky;
f[i][2] += qfactor*ekz;
}
}
/* ----------------------------------------------------------------------
interpolate from grid to get per-atom energy/virial
------------------------------------------------------------------------- */
void MSMCG::fieldforce_peratom()
{
const double * const q = atom->q;
const double * const * const x = atom->x;
double ***egridn = egrid[0];
double ***v0gridn = v0grid[0];
double ***v1gridn = v1grid[0];
double ***v2gridn = v2grid[0];
double ***v3gridn = v3grid[0];
double ***v4gridn = v4grid[0];
double ***v5gridn = v5grid[0];
int i,j,l,m,n,nx,ny,nz,mx,my,mz;
double dx,dy,dz,x0,y0,z0;
double u,v0,v1,v2,v3,v4,v5;
// loop over my charges, interpolate from nearby grid points
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
// (dx,dy,dz) = distance to "lower left" grid pt
// (mx,my,mz) = global coords of moving stencil pt
for (j = 0; j < num_charged; j++) {
i = is_charged[j];
nx = part2grid[i][0];
ny = part2grid[i][1];
nz = part2grid[i][2];
dx = nx - (x[i][0]-boxlo[0])*delxinv[0];
dy = ny - (x[i][1]-boxlo[1])*delyinv[0];
dz = nz - (x[i][2]-boxlo[2])*delzinv[0];
compute_phis_and_dphis(dx,dy,dz);
u = v0 = v1 = v2 = v3 = v4 = v5 = 0.0;
for (n = nlower; n <= nupper; n++) {
mz = n+nz;
z0 = phi1d[2][n];
for (m = nlower; m <= nupper; m++) {
my = m+ny;
y0 = z0*phi1d[1][m];
for (l = nlower; l <= nupper; l++) {
mx = l+nx;
x0 = y0*phi1d[0][l];
if (eflag_atom) u += x0*egridn[mz][my][mx];
if (vflag_atom) {
v0 += x0*v0gridn[mz][my][mx];
v1 += x0*v1gridn[mz][my][mx];
v2 += x0*v2gridn[mz][my][mx];
v3 += x0*v3gridn[mz][my][mx];
v4 += x0*v4gridn[mz][my][mx];
v5 += x0*v5gridn[mz][my][mx];
}
}
}
}
if (eflag_atom) eatom[i] += q[i]*u;
if (vflag_atom) {
vatom[i][0] += q[i]*v0;
vatom[i][1] += q[i]*v1;
vatom[i][2] += q[i]*v2;
vatom[i][3] += q[i]*v3;
vatom[i][4] += q[i]*v4;
vatom[i][5] += q[i]*v5;
}
}
}
double MSMCG::memory_usage()
{
double bytes = MSM::memory_usage();
bytes += nmax * sizeof(int);
return bytes;
}
diff --git a/src/KSPACE/pppm.cpp b/src/KSPACE/pppm.cpp
index 99bae3f41..a4cc6f89d 100644
--- a/src/KSPACE/pppm.cpp
+++ b/src/KSPACE/pppm.cpp
@@ -1,3514 +1,3514 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing authors: Roy Pollock (LLNL), Paul Crozier (SNL)
per-atom energy/virial & group/group energy/force added by Stan Moore (BYU)
analytic diff (2 FFT) option added by Rolf Isele-Holder (Aachen University)
triclinic added by Stan Moore (SNL)
------------------------------------------------------------------------- */
#include <mpi.h>
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include "pppm.h"
#include "atom.h"
#include "comm.h"
#include "gridcomm.h"
#include "neighbor.h"
#include "force.h"
#include "pair.h"
#include "bond.h"
#include "angle.h"
#include "domain.h"
#include "fft3d_wrap.h"
#include "remap_wrap.h"
#include "memory.h"
#include "error.h"
#include "math_const.h"
#include "math_special.h"
using namespace LAMMPS_NS;
using namespace MathConst;
using namespace MathSpecial;
#define MAXORDER 7
#define OFFSET 16384
#define LARGE 10000.0
#define SMALL 0.00001
#define EPS_HOC 1.0e-7
enum{REVERSE_RHO};
enum{FORWARD_IK,FORWARD_AD,FORWARD_IK_PERATOM,FORWARD_AD_PERATOM};
#ifdef FFT_SINGLE
#define ZEROF 0.0f
#define ONEF 1.0f
#else
#define ZEROF 0.0
#define ONEF 1.0
#endif
/* ---------------------------------------------------------------------- */
PPPM::PPPM(LAMMPS *lmp, int narg, char **arg) : KSpace(lmp, narg, arg)
{
if (narg < 1) error->all(FLERR,"Illegal kspace_style pppm command");
pppmflag = 1;
group_group_enable = 1;
accuracy_relative = fabs(force->numeric(FLERR,arg[0]));
nfactors = 3;
factors = new int[nfactors];
factors[0] = 2;
factors[1] = 3;
factors[2] = 5;
MPI_Comm_rank(world,&me);
MPI_Comm_size(world,&nprocs);
density_brick = vdx_brick = vdy_brick = vdz_brick = NULL;
density_fft = NULL;
u_brick = NULL;
v0_brick = v1_brick = v2_brick = v3_brick = v4_brick = v5_brick = NULL;
greensfn = NULL;
work1 = work2 = NULL;
vg = NULL;
fkx = fky = fkz = NULL;
sf_precoeff1 = sf_precoeff2 = sf_precoeff3 =
sf_precoeff4 = sf_precoeff5 = sf_precoeff6 = NULL;
density_A_brick = density_B_brick = NULL;
density_A_fft = density_B_fft = NULL;
gf_b = NULL;
rho1d = rho_coeff = drho1d = drho_coeff = NULL;
fft1 = fft2 = NULL;
remap = NULL;
cg = NULL;
cg_peratom = NULL;
nmax = 0;
part2grid = NULL;
peratom_allocate_flag = 0;
group_allocate_flag = 0;
// define acons coefficients for estimation of kspace errors
// see JCP 109, pg 7698 for derivation of coefficients
// higher order coefficients may be computed if needed
memory->create(acons,8,7,"pppm:acons");
acons[1][0] = 2.0 / 3.0;
acons[2][0] = 1.0 / 50.0;
acons[2][1] = 5.0 / 294.0;
acons[3][0] = 1.0 / 588.0;
acons[3][1] = 7.0 / 1440.0;
acons[3][2] = 21.0 / 3872.0;
acons[4][0] = 1.0 / 4320.0;
acons[4][1] = 3.0 / 1936.0;
acons[4][2] = 7601.0 / 2271360.0;
acons[4][3] = 143.0 / 28800.0;
acons[5][0] = 1.0 / 23232.0;
acons[5][1] = 7601.0 / 13628160.0;
acons[5][2] = 143.0 / 69120.0;
acons[5][3] = 517231.0 / 106536960.0;
acons[5][4] = 106640677.0 / 11737571328.0;
acons[6][0] = 691.0 / 68140800.0;
acons[6][1] = 13.0 / 57600.0;
acons[6][2] = 47021.0 / 35512320.0;
acons[6][3] = 9694607.0 / 2095994880.0;
acons[6][4] = 733191589.0 / 59609088000.0;
acons[6][5] = 326190917.0 / 11700633600.0;
acons[7][0] = 1.0 / 345600.0;
acons[7][1] = 3617.0 / 35512320.0;
acons[7][2] = 745739.0 / 838397952.0;
acons[7][3] = 56399353.0 / 12773376000.0;
acons[7][4] = 25091609.0 / 1560084480.0;
acons[7][5] = 1755948832039.0 / 36229939200000.0;
acons[7][6] = 4887769399.0 / 37838389248.0;
}
/* ----------------------------------------------------------------------
free all memory
------------------------------------------------------------------------- */
PPPM::~PPPM()
{
delete [] factors;
deallocate();
if (peratom_allocate_flag) deallocate_peratom();
if (group_allocate_flag) deallocate_groups();
memory->destroy(part2grid);
memory->destroy(acons);
}
/* ----------------------------------------------------------------------
called once before run
------------------------------------------------------------------------- */
void PPPM::init()
{
if (me == 0) {
if (screen) fprintf(screen,"PPPM initialization ...\n");
if (logfile) fprintf(logfile,"PPPM initialization ...\n");
}
// error check
triclinic_check();
if (domain->triclinic && differentiation_flag == 1)
error->all(FLERR,"Cannot (yet) use PPPM with triclinic box "
"and kspace_modify diff ad");
if (domain->triclinic && slabflag)
error->all(FLERR,"Cannot (yet) use PPPM with triclinic box and "
"slab correction");
if (domain->dimension == 2) error->all(FLERR,
"Cannot use PPPM with 2d simulation");
if (comm->style != 0)
error->universe_all(FLERR,"PPPM can only currently be used with "
"comm_style brick");
if (!atom->q_flag) error->all(FLERR,"Kspace style requires atom attribute q");
if (slabflag == 0 && domain->nonperiodic > 0)
error->all(FLERR,"Cannot use nonperiodic boundaries with PPPM");
if (slabflag) {
if (domain->xperiodic != 1 || domain->yperiodic != 1 ||
domain->boundary[2][0] != 1 || domain->boundary[2][1] != 1)
error->all(FLERR,"Incorrect boundaries with slab PPPM");
}
if (order < 2 || order > MAXORDER) {
char str[128];
sprintf(str,"PPPM order cannot be < 2 or > than %d",MAXORDER);
error->all(FLERR,str);
}
// extract short-range Coulombic cutoff from pair style
triclinic = domain->triclinic;
pair_check();
int itmp = 0;
double *p_cutoff = (double *) force->pair->extract("cut_coul",itmp);
if (p_cutoff == NULL)
error->all(FLERR,"KSpace style is incompatible with Pair style");
cutoff = *p_cutoff;
// if kspace is TIP4P, extract TIP4P params from pair style
// bond/angle are not yet init(), so insure equilibrium request is valid
qdist = 0.0;
if (tip4pflag) {
if (me == 0) {
if (screen) fprintf(screen," extracting TIP4P info from pair style\n");
if (logfile) fprintf(logfile," extracting TIP4P info from pair style\n");
}
double *p_qdist = (double *) force->pair->extract("qdist",itmp);
int *p_typeO = (int *) force->pair->extract("typeO",itmp);
int *p_typeH = (int *) force->pair->extract("typeH",itmp);
int *p_typeA = (int *) force->pair->extract("typeA",itmp);
int *p_typeB = (int *) force->pair->extract("typeB",itmp);
if (!p_qdist || !p_typeO || !p_typeH || !p_typeA || !p_typeB)
error->all(FLERR,"Pair style is incompatible with TIP4P KSpace style");
qdist = *p_qdist;
typeO = *p_typeO;
typeH = *p_typeH;
int typeA = *p_typeA;
int typeB = *p_typeB;
if (force->angle == NULL || force->bond == NULL ||
force->angle->setflag == NULL || force->bond->setflag == NULL)
error->all(FLERR,"Bond and angle potentials must be defined for TIP4P");
if (typeA < 1 || typeA > atom->nangletypes ||
force->angle->setflag[typeA] == 0)
error->all(FLERR,"Bad TIP4P angle type for PPPM/TIP4P");
if (typeB < 1 || typeB > atom->nbondtypes ||
force->bond->setflag[typeB] == 0)
error->all(FLERR,"Bad TIP4P bond type for PPPM/TIP4P");
double theta = force->angle->equilibrium_angle(typeA);
double blen = force->bond->equilibrium_distance(typeB);
alpha = qdist / (cos(0.5*theta) * blen);
if (domain->triclinic)
error->all(FLERR,"Cannot (yet) use PPPM with triclinic box and TIP4P");
}
// compute qsum & qsqsum and warn if not charge-neutral
scale = 1.0;
qqrd2e = force->qqrd2e;
qsum_qsq();
natoms_original = atom->natoms;
// set accuracy (force units) from accuracy_relative or accuracy_absolute
if (accuracy_absolute >= 0.0) accuracy = accuracy_absolute;
else accuracy = accuracy_relative * two_charge_force;
// free all arrays previously allocated
deallocate();
if (peratom_allocate_flag) deallocate_peratom();
if (group_allocate_flag) deallocate_groups();
// setup FFT grid resolution and g_ewald
// normally one iteration thru while loop is all that is required
// if grid stencil does not extend beyond neighbor proc
// or overlap is allowed, then done
// else reduce order and try again
int (*procneigh)[2] = comm->procneigh;
GridComm *cgtmp = NULL;
int iteration = 0;
while (order >= minorder) {
if (iteration && me == 0)
error->warning(FLERR,"Reducing PPPM order b/c stencil extends "
"beyond nearest neighbor processor");
if (stagger_flag && !differentiation_flag) compute_gf_denom();
set_grid_global();
set_grid_local();
if (overlap_allowed) break;
cgtmp = new GridComm(lmp,world,1,1,
nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out,
procneigh[0][0],procneigh[0][1],procneigh[1][0],
procneigh[1][1],procneigh[2][0],procneigh[2][1]);
cgtmp->ghost_notify();
if (!cgtmp->ghost_overlap()) break;
delete cgtmp;
order--;
iteration++;
}
if (order < minorder) error->all(FLERR,"PPPM order < minimum allowed order");
if (!overlap_allowed && cgtmp->ghost_overlap())
error->all(FLERR,"PPPM grid stencil extends "
"beyond nearest neighbor processor");
if (cgtmp) delete cgtmp;
// adjust g_ewald
if (!gewaldflag) adjust_gewald();
// calculate the final accuracy
double estimated_accuracy = final_accuracy();
// print stats
int ngrid_max,nfft_both_max;
MPI_Allreduce(&ngrid,&ngrid_max,1,MPI_INT,MPI_MAX,world);
MPI_Allreduce(&nfft_both,&nfft_both_max,1,MPI_INT,MPI_MAX,world);
if (me == 0) {
#ifdef FFT_SINGLE
const char fft_prec[] = "single";
#else
const char fft_prec[] = "double";
#endif
if (screen) {
fprintf(screen," G vector (1/distance) = %g\n",g_ewald);
fprintf(screen," grid = %d %d %d\n",nx_pppm,ny_pppm,nz_pppm);
fprintf(screen," stencil order = %d\n",order);
fprintf(screen," estimated absolute RMS force accuracy = %g\n",
estimated_accuracy);
fprintf(screen," estimated relative force accuracy = %g\n",
estimated_accuracy/two_charge_force);
fprintf(screen," using %s precision FFTs\n",fft_prec);
fprintf(screen," 3d grid and FFT values/proc = %d %d\n",
ngrid_max,nfft_both_max);
}
if (logfile) {
fprintf(logfile," G vector (1/distance) = %g\n",g_ewald);
fprintf(logfile," grid = %d %d %d\n",nx_pppm,ny_pppm,nz_pppm);
fprintf(logfile," stencil order = %d\n",order);
fprintf(logfile," estimated absolute RMS force accuracy = %g\n",
estimated_accuracy);
fprintf(logfile," estimated relative force accuracy = %g\n",
estimated_accuracy/two_charge_force);
fprintf(logfile," using %s precision FFTs\n",fft_prec);
fprintf(logfile," 3d grid and FFT values/proc = %d %d\n",
ngrid_max,nfft_both_max);
}
}
// allocate K-space dependent memory
// don't invoke allocate peratom() or group(), will be allocated when needed
allocate();
cg->ghost_notify();
cg->setup();
// pre-compute Green's function denomiator expansion
// pre-compute 1d charge distribution coefficients
compute_gf_denom();
if (differentiation_flag == 1) compute_sf_precoeff();
compute_rho_coeff();
}
/* ----------------------------------------------------------------------
adjust PPPM coeffs, called initially and whenever volume has changed
------------------------------------------------------------------------- */
void PPPM::setup()
{
if (triclinic) {
setup_triclinic();
return;
}
// perform some checks to avoid illegal boundaries with read_data
if (slabflag == 0 && domain->nonperiodic > 0)
error->all(FLERR,"Cannot use nonperiodic boundaries with PPPM");
if (slabflag) {
if (domain->xperiodic != 1 || domain->yperiodic != 1 ||
domain->boundary[2][0] != 1 || domain->boundary[2][1] != 1)
error->all(FLERR,"Incorrect boundaries with slab PPPM");
}
int i,j,k,n;
double *prd;
// volume-dependent factors
// adjust z dimension for 2d slab PPPM
// z dimension for 3d PPPM is zprd since slab_volfactor = 1.0
if (triclinic == 0) prd = domain->prd;
else prd = domain->prd_lamda;
double xprd = prd[0];
double yprd = prd[1];
double zprd = prd[2];
double zprd_slab = zprd*slab_volfactor;
volume = xprd * yprd * zprd_slab;
delxinv = nx_pppm/xprd;
delyinv = ny_pppm/yprd;
delzinv = nz_pppm/zprd_slab;
delvolinv = delxinv*delyinv*delzinv;
double unitkx = (MY_2PI/xprd);
double unitky = (MY_2PI/yprd);
double unitkz = (MY_2PI/zprd_slab);
// fkx,fky,fkz for my FFT grid pts
double per;
for (i = nxlo_fft; i <= nxhi_fft; i++) {
per = i - nx_pppm*(2*i/nx_pppm);
fkx[i] = unitkx*per;
}
for (i = nylo_fft; i <= nyhi_fft; i++) {
per = i - ny_pppm*(2*i/ny_pppm);
fky[i] = unitky*per;
}
for (i = nzlo_fft; i <= nzhi_fft; i++) {
per = i - nz_pppm*(2*i/nz_pppm);
fkz[i] = unitkz*per;
}
// virial coefficients
double sqk,vterm;
n = 0;
for (k = nzlo_fft; k <= nzhi_fft; k++) {
for (j = nylo_fft; j <= nyhi_fft; j++) {
for (i = nxlo_fft; i <= nxhi_fft; i++) {
sqk = fkx[i]*fkx[i] + fky[j]*fky[j] + fkz[k]*fkz[k];
if (sqk == 0.0) {
vg[n][0] = 0.0;
vg[n][1] = 0.0;
vg[n][2] = 0.0;
vg[n][3] = 0.0;
vg[n][4] = 0.0;
vg[n][5] = 0.0;
} else {
vterm = -2.0 * (1.0/sqk + 0.25/(g_ewald*g_ewald));
vg[n][0] = 1.0 + vterm*fkx[i]*fkx[i];
vg[n][1] = 1.0 + vterm*fky[j]*fky[j];
vg[n][2] = 1.0 + vterm*fkz[k]*fkz[k];
vg[n][3] = vterm*fkx[i]*fky[j];
vg[n][4] = vterm*fkx[i]*fkz[k];
vg[n][5] = vterm*fky[j]*fkz[k];
}
n++;
}
}
}
if (differentiation_flag == 1) compute_gf_ad();
else compute_gf_ik();
}
/* ----------------------------------------------------------------------
adjust PPPM coeffs, called initially and whenever volume has changed
for a triclinic system
------------------------------------------------------------------------- */
void PPPM::setup_triclinic()
{
int i,j,k,n;
double *prd;
// volume-dependent factors
// adjust z dimension for 2d slab PPPM
// z dimension for 3d PPPM is zprd since slab_volfactor = 1.0
prd = domain->prd;
double xprd = prd[0];
double yprd = prd[1];
double zprd = prd[2];
double zprd_slab = zprd*slab_volfactor;
volume = xprd * yprd * zprd_slab;
// use lamda (0-1) coordinates
delxinv = nx_pppm;
delyinv = ny_pppm;
delzinv = nz_pppm;
delvolinv = delxinv*delyinv*delzinv/volume;
// fkx,fky,fkz for my FFT grid pts
double per_i,per_j,per_k;
n = 0;
for (k = nzlo_fft; k <= nzhi_fft; k++) {
per_k = k - nz_pppm*(2*k/nz_pppm);
for (j = nylo_fft; j <= nyhi_fft; j++) {
per_j = j - ny_pppm*(2*j/ny_pppm);
for (i = nxlo_fft; i <= nxhi_fft; i++) {
per_i = i - nx_pppm*(2*i/nx_pppm);
double unitk_lamda[3];
unitk_lamda[0] = 2.0*MY_PI*per_i;
unitk_lamda[1] = 2.0*MY_PI*per_j;
unitk_lamda[2] = 2.0*MY_PI*per_k;
x2lamdaT(&unitk_lamda[0],&unitk_lamda[0]);
fkx[n] = unitk_lamda[0];
fky[n] = unitk_lamda[1];
fkz[n] = unitk_lamda[2];
n++;
}
}
}
// virial coefficients
double sqk,vterm;
for (n = 0; n < nfft; n++) {
sqk = fkx[n]*fkx[n] + fky[n]*fky[n] + fkz[n]*fkz[n];
if (sqk == 0.0) {
vg[n][0] = 0.0;
vg[n][1] = 0.0;
vg[n][2] = 0.0;
vg[n][3] = 0.0;
vg[n][4] = 0.0;
vg[n][5] = 0.0;
} else {
vterm = -2.0 * (1.0/sqk + 0.25/(g_ewald*g_ewald));
vg[n][0] = 1.0 + vterm*fkx[n]*fkx[n];
vg[n][1] = 1.0 + vterm*fky[n]*fky[n];
vg[n][2] = 1.0 + vterm*fkz[n]*fkz[n];
vg[n][3] = vterm*fkx[n]*fky[n];
vg[n][4] = vterm*fkx[n]*fkz[n];
vg[n][5] = vterm*fky[n]*fkz[n];
}
}
compute_gf_ik_triclinic();
}
/* ----------------------------------------------------------------------
reset local grid arrays and communication stencils
called by fix balance b/c it changed sizes of processor sub-domains
------------------------------------------------------------------------- */
void PPPM::setup_grid()
{
// free all arrays previously allocated
deallocate();
if (peratom_allocate_flag) deallocate_peratom();
if (group_allocate_flag) deallocate_groups();
// reset portion of global grid that each proc owns
set_grid_local();
// reallocate K-space dependent memory
// check if grid communication is now overlapping if not allowed
// don't invoke allocate peratom() or group(), will be allocated when needed
allocate();
cg->ghost_notify();
if (overlap_allowed == 0 && cg->ghost_overlap())
error->all(FLERR,"PPPM grid stencil extends "
"beyond nearest neighbor processor");
cg->setup();
// pre-compute Green's function denomiator expansion
// pre-compute 1d charge distribution coefficients
compute_gf_denom();
if (differentiation_flag == 1) compute_sf_precoeff();
compute_rho_coeff();
// pre-compute volume-dependent coeffs
setup();
}
/* ----------------------------------------------------------------------
compute the PPPM long-range force, energy, virial
------------------------------------------------------------------------- */
void PPPM::compute(int eflag, int vflag)
{
int i,j;
// set energy/virial flags
// invoke allocate_peratom() if needed for first time
if (eflag || vflag) ev_setup(eflag,vflag);
else evflag = evflag_atom = eflag_global = vflag_global =
eflag_atom = vflag_atom = 0;
if (evflag_atom && !peratom_allocate_flag) {
allocate_peratom();
cg_peratom->ghost_notify();
cg_peratom->setup();
}
// if atom count has changed, update qsum and qsqsum
if (atom->natoms != natoms_original) {
qsum_qsq();
natoms_original = atom->natoms;
}
// return if there are no charges
if (qsqsum == 0.0) return;
// convert atoms from box to lamda coords
if (triclinic == 0) boxlo = domain->boxlo;
else {
boxlo = domain->boxlo_lamda;
domain->x2lamda(atom->nlocal);
}
// extend size of per-atom arrays if necessary
if (atom->nlocal > nmax) {
memory->destroy(part2grid);
nmax = atom->nmax;
memory->create(part2grid,nmax,3,"pppm:part2grid");
}
// find grid points for all my particles
// map my particle charge onto my local 3d density grid
particle_map();
make_rho();
// all procs communicate density values from their ghost cells
// to fully sum contribution in their 3d bricks
// remap from 3d decomposition to FFT decomposition
cg->reverse_comm(this,REVERSE_RHO);
brick2fft();
// compute potential gradient on my FFT grid and
// portion of e_long on this proc's FFT grid
// return gradients (electric fields) in 3d brick decomposition
// also performs per-atom calculations via poisson_peratom()
poisson();
// all procs communicate E-field values
// to fill ghost cells surrounding their 3d bricks
if (differentiation_flag == 1) cg->forward_comm(this,FORWARD_AD);
else cg->forward_comm(this,FORWARD_IK);
// extra per-atom energy/virial communication
if (evflag_atom) {
if (differentiation_flag == 1 && vflag_atom)
cg_peratom->forward_comm(this,FORWARD_AD_PERATOM);
else if (differentiation_flag == 0)
cg_peratom->forward_comm(this,FORWARD_IK_PERATOM);
}
// calculate the force on my particles
fieldforce();
// extra per-atom energy/virial communication
if (evflag_atom) fieldforce_peratom();
// sum global energy across procs and add in volume-dependent term
const double qscale = qqrd2e * scale;
if (eflag_global) {
double energy_all;
MPI_Allreduce(&energy,&energy_all,1,MPI_DOUBLE,MPI_SUM,world);
energy = energy_all;
energy *= 0.5*volume;
energy -= g_ewald*qsqsum/MY_PIS +
MY_PI2*qsum*qsum / (g_ewald*g_ewald*volume);
energy *= qscale;
}
// sum global virial across procs
if (vflag_global) {
double virial_all[6];
MPI_Allreduce(virial,virial_all,6,MPI_DOUBLE,MPI_SUM,world);
for (i = 0; i < 6; i++) virial[i] = 0.5*qscale*volume*virial_all[i];
}
// per-atom energy/virial
// energy includes self-energy correction
// ntotal accounts for TIP4P tallying eatom/vatom for ghost atoms
if (evflag_atom) {
double *q = atom->q;
int nlocal = atom->nlocal;
int ntotal = nlocal;
if (tip4pflag) ntotal += atom->nghost;
if (eflag_atom) {
for (i = 0; i < nlocal; i++) {
eatom[i] *= 0.5;
eatom[i] -= g_ewald*q[i]*q[i]/MY_PIS + MY_PI2*q[i]*qsum /
(g_ewald*g_ewald*volume);
eatom[i] *= qscale;
}
for (i = nlocal; i < ntotal; i++) eatom[i] *= 0.5*qscale;
}
if (vflag_atom) {
for (i = 0; i < ntotal; i++)
for (j = 0; j < 6; j++) vatom[i][j] *= 0.5*qscale;
}
}
// 2d slab correction
if (slabflag == 1) slabcorr();
// convert atoms back from lamda to box coords
if (triclinic) domain->lamda2x(atom->nlocal);
}
/* ----------------------------------------------------------------------
allocate memory that depends on # of K-vectors and order
------------------------------------------------------------------------- */
void PPPM::allocate()
{
memory->create3d_offset(density_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
nxlo_out,nxhi_out,"pppm:density_brick");
memory->create(density_fft,nfft_both,"pppm:density_fft");
memory->create(greensfn,nfft_both,"pppm:greensfn");
memory->create(work1,2*nfft_both,"pppm:work1");
memory->create(work2,2*nfft_both,"pppm:work2");
memory->create(vg,nfft_both,6,"pppm:vg");
if (triclinic == 0) {
memory->create1d_offset(fkx,nxlo_fft,nxhi_fft,"pppm:fkx");
memory->create1d_offset(fky,nylo_fft,nyhi_fft,"pppm:fky");
memory->create1d_offset(fkz,nzlo_fft,nzhi_fft,"pppm:fkz");
} else {
memory->create(fkx,nfft_both,"pppm:fkx");
memory->create(fky,nfft_both,"pppm:fky");
memory->create(fkz,nfft_both,"pppm:fkz");
}
if (differentiation_flag == 1) {
memory->create3d_offset(u_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
nxlo_out,nxhi_out,"pppm:u_brick");
memory->create(sf_precoeff1,nfft_both,"pppm:sf_precoeff1");
memory->create(sf_precoeff2,nfft_both,"pppm:sf_precoeff2");
memory->create(sf_precoeff3,nfft_both,"pppm:sf_precoeff3");
memory->create(sf_precoeff4,nfft_both,"pppm:sf_precoeff4");
memory->create(sf_precoeff5,nfft_both,"pppm:sf_precoeff5");
memory->create(sf_precoeff6,nfft_both,"pppm:sf_precoeff6");
} else {
memory->create3d_offset(vdx_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
nxlo_out,nxhi_out,"pppm:vdx_brick");
memory->create3d_offset(vdy_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
nxlo_out,nxhi_out,"pppm:vdy_brick");
memory->create3d_offset(vdz_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
nxlo_out,nxhi_out,"pppm:vdz_brick");
}
// summation coeffs
order_allocated = order;
if (!stagger_flag) memory->create(gf_b,order,"pppm:gf_b");
memory->create2d_offset(rho1d,3,-order/2,order/2,"pppm:rho1d");
memory->create2d_offset(drho1d,3,-order/2,order/2,"pppm:drho1d");
memory->create2d_offset(rho_coeff,order,(1-order)/2,order/2,"pppm:rho_coeff");
memory->create2d_offset(drho_coeff,order,(1-order)/2,order/2,
"pppm:drho_coeff");
// create 2 FFTs and a Remap
// 1st FFT keeps data in FFT decompostion
// 2nd FFT returns data in 3d brick decomposition
// remap takes data from 3d brick to FFT decomposition
int tmp;
fft1 = new FFT3d(lmp,world,nx_pppm,ny_pppm,nz_pppm,
nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft,
nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft,
0,0,&tmp,collective_flag);
fft2 = new FFT3d(lmp,world,nx_pppm,ny_pppm,nz_pppm,
nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft,
nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
0,0,&tmp,collective_flag);
remap = new Remap(lmp,world,
nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft,
1,0,0,FFT_PRECISION,collective_flag);
// create ghost grid object for rho and electric field communication
int (*procneigh)[2] = comm->procneigh;
if (differentiation_flag == 1)
cg = new GridComm(lmp,world,1,1,
nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out,
procneigh[0][0],procneigh[0][1],procneigh[1][0],
procneigh[1][1],procneigh[2][0],procneigh[2][1]);
else
cg = new GridComm(lmp,world,3,1,
nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out,
procneigh[0][0],procneigh[0][1],procneigh[1][0],
procneigh[1][1],procneigh[2][0],procneigh[2][1]);
}
/* ----------------------------------------------------------------------
deallocate memory that depends on # of K-vectors and order
------------------------------------------------------------------------- */
void PPPM::deallocate()
{
memory->destroy3d_offset(density_brick,nzlo_out,nylo_out,nxlo_out);
if (differentiation_flag == 1) {
memory->destroy3d_offset(u_brick,nzlo_out,nylo_out,nxlo_out);
memory->destroy(sf_precoeff1);
memory->destroy(sf_precoeff2);
memory->destroy(sf_precoeff3);
memory->destroy(sf_precoeff4);
memory->destroy(sf_precoeff5);
memory->destroy(sf_precoeff6);
} else {
memory->destroy3d_offset(vdx_brick,nzlo_out,nylo_out,nxlo_out);
memory->destroy3d_offset(vdy_brick,nzlo_out,nylo_out,nxlo_out);
memory->destroy3d_offset(vdz_brick,nzlo_out,nylo_out,nxlo_out);
}
memory->destroy(density_fft);
memory->destroy(greensfn);
memory->destroy(work1);
memory->destroy(work2);
memory->destroy(vg);
if (triclinic == 0) {
memory->destroy1d_offset(fkx,nxlo_fft);
memory->destroy1d_offset(fky,nylo_fft);
memory->destroy1d_offset(fkz,nzlo_fft);
} else {
memory->destroy(fkx);
memory->destroy(fky);
memory->destroy(fkz);
}
memory->destroy(gf_b);
if (stagger_flag) gf_b = NULL;
memory->destroy2d_offset(rho1d,-order_allocated/2);
memory->destroy2d_offset(drho1d,-order_allocated/2);
memory->destroy2d_offset(rho_coeff,(1-order_allocated)/2);
memory->destroy2d_offset(drho_coeff,(1-order_allocated)/2);
delete fft1;
delete fft2;
delete remap;
delete cg;
}
/* ----------------------------------------------------------------------
allocate per-atom memory that depends on # of K-vectors and order
------------------------------------------------------------------------- */
void PPPM::allocate_peratom()
{
peratom_allocate_flag = 1;
if (differentiation_flag != 1)
memory->create3d_offset(u_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
nxlo_out,nxhi_out,"pppm:u_brick");
memory->create3d_offset(v0_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
nxlo_out,nxhi_out,"pppm:v0_brick");
memory->create3d_offset(v1_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
nxlo_out,nxhi_out,"pppm:v1_brick");
memory->create3d_offset(v2_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
nxlo_out,nxhi_out,"pppm:v2_brick");
memory->create3d_offset(v3_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
nxlo_out,nxhi_out,"pppm:v3_brick");
memory->create3d_offset(v4_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
nxlo_out,nxhi_out,"pppm:v4_brick");
memory->create3d_offset(v5_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
nxlo_out,nxhi_out,"pppm:v5_brick");
// create ghost grid object for rho and electric field communication
int (*procneigh)[2] = comm->procneigh;
if (differentiation_flag == 1)
cg_peratom =
new GridComm(lmp,world,6,1,
nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out,
procneigh[0][0],procneigh[0][1],procneigh[1][0],
procneigh[1][1],procneigh[2][0],procneigh[2][1]);
else
cg_peratom =
new GridComm(lmp,world,7,1,
nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out,
procneigh[0][0],procneigh[0][1],procneigh[1][0],
procneigh[1][1],procneigh[2][0],procneigh[2][1]);
}
/* ----------------------------------------------------------------------
deallocate per-atom memory that depends on # of K-vectors and order
------------------------------------------------------------------------- */
void PPPM::deallocate_peratom()
{
peratom_allocate_flag = 0;
memory->destroy3d_offset(v0_brick,nzlo_out,nylo_out,nxlo_out);
memory->destroy3d_offset(v1_brick,nzlo_out,nylo_out,nxlo_out);
memory->destroy3d_offset(v2_brick,nzlo_out,nylo_out,nxlo_out);
memory->destroy3d_offset(v3_brick,nzlo_out,nylo_out,nxlo_out);
memory->destroy3d_offset(v4_brick,nzlo_out,nylo_out,nxlo_out);
memory->destroy3d_offset(v5_brick,nzlo_out,nylo_out,nxlo_out);
if (differentiation_flag != 1)
memory->destroy3d_offset(u_brick,nzlo_out,nylo_out,nxlo_out);
delete cg_peratom;
}
/* ----------------------------------------------------------------------
set global size of PPPM grid = nx,ny,nz_pppm
used for charge accumulation, FFTs, and electric field interpolation
------------------------------------------------------------------------- */
void PPPM::set_grid_global()
{
// use xprd,yprd,zprd (even if triclinic, and then scale later)
// adjust z dimension for 2d slab PPPM
// 3d PPPM just uses zprd since slab_volfactor = 1.0
double xprd = domain->xprd;
double yprd = domain->yprd;
double zprd = domain->zprd;
double zprd_slab = zprd*slab_volfactor;
// make initial g_ewald estimate
// based on desired accuracy and real space cutoff
// fluid-occupied volume used to estimate real-space error
// zprd used rather than zprd_slab
double h;
bigint natoms = atom->natoms;
if (!gewaldflag) {
if (accuracy <= 0.0)
error->all(FLERR,"KSpace accuracy must be > 0");
g_ewald = accuracy*sqrt(natoms*cutoff*xprd*yprd*zprd) / (2.0*q2);
if (g_ewald >= 1.0) g_ewald = (1.35 - 0.15*log(accuracy))/cutoff;
else g_ewald = sqrt(-log(g_ewald)) / cutoff;
}
// set optimal nx_pppm,ny_pppm,nz_pppm based on order and accuracy
// nz_pppm uses extended zprd_slab instead of zprd
// reduce it until accuracy target is met
if (!gridflag) {
if (differentiation_flag == 1 || stagger_flag) {
h = h_x = h_y = h_z = 4.0/g_ewald;
int count = 0;
while (1) {
// set grid dimension
nx_pppm = static_cast<int> (xprd/h_x);
ny_pppm = static_cast<int> (yprd/h_y);
nz_pppm = static_cast<int> (zprd_slab/h_z);
if (nx_pppm <= 1) nx_pppm = 2;
if (ny_pppm <= 1) ny_pppm = 2;
if (nz_pppm <= 1) nz_pppm = 2;
//set local grid dimension
int npey_fft,npez_fft;
if (nz_pppm >= nprocs) {
npey_fft = 1;
npez_fft = nprocs;
} else procs2grid2d(nprocs,ny_pppm,nz_pppm,&npey_fft,&npez_fft);
int me_y = me % npey_fft;
int me_z = me / npey_fft;
nxlo_fft = 0;
nxhi_fft = nx_pppm - 1;
nylo_fft = me_y*ny_pppm/npey_fft;
nyhi_fft = (me_y+1)*ny_pppm/npey_fft - 1;
nzlo_fft = me_z*nz_pppm/npez_fft;
nzhi_fft = (me_z+1)*nz_pppm/npez_fft - 1;
double df_kspace = compute_df_kspace();
count++;
// break loop if the accuracy has been reached or
// too many loops have been performed
if (df_kspace <= accuracy) break;
if (count > 500) error->all(FLERR, "Could not compute grid size");
h *= 0.95;
h_x = h_y = h_z = h;
}
} else {
double err;
h_x = h_y = h_z = 1.0/g_ewald;
nx_pppm = static_cast<int> (xprd/h_x) + 1;
ny_pppm = static_cast<int> (yprd/h_y) + 1;
nz_pppm = static_cast<int> (zprd_slab/h_z) + 1;
err = estimate_ik_error(h_x,xprd,natoms);
while (err > accuracy) {
err = estimate_ik_error(h_x,xprd,natoms);
nx_pppm++;
h_x = xprd/nx_pppm;
}
err = estimate_ik_error(h_y,yprd,natoms);
while (err > accuracy) {
err = estimate_ik_error(h_y,yprd,natoms);
ny_pppm++;
h_y = yprd/ny_pppm;
}
err = estimate_ik_error(h_z,zprd_slab,natoms);
while (err > accuracy) {
err = estimate_ik_error(h_z,zprd_slab,natoms);
nz_pppm++;
h_z = zprd_slab/nz_pppm;
}
}
// scale grid for triclinic skew
if (triclinic) {
double tmp[3];
tmp[0] = nx_pppm/xprd;
tmp[1] = ny_pppm/yprd;
tmp[2] = nz_pppm/zprd;
lamda2xT(&tmp[0],&tmp[0]);
nx_pppm = static_cast<int>(tmp[0]) + 1;
ny_pppm = static_cast<int>(tmp[1]) + 1;
nz_pppm = static_cast<int>(tmp[2]) + 1;
}
}
// boost grid size until it is factorable
while (!factorable(nx_pppm)) nx_pppm++;
while (!factorable(ny_pppm)) ny_pppm++;
while (!factorable(nz_pppm)) nz_pppm++;
if (triclinic == 0) {
h_x = xprd/nx_pppm;
h_y = yprd/ny_pppm;
h_z = zprd_slab/nz_pppm;
} else {
double tmp[3];
tmp[0] = nx_pppm;
tmp[1] = ny_pppm;
tmp[2] = nz_pppm;
x2lamdaT(&tmp[0],&tmp[0]);
h_x = 1.0/tmp[0];
h_y = 1.0/tmp[1];
h_z = 1.0/tmp[2];
}
if (nx_pppm >= OFFSET || ny_pppm >= OFFSET || nz_pppm >= OFFSET)
error->all(FLERR,"PPPM grid is too large");
}
/* ----------------------------------------------------------------------
check if all factors of n are in list of factors
return 1 if yes, 0 if no
------------------------------------------------------------------------- */
int PPPM::factorable(int n)
{
int i;
while (n > 1) {
for (i = 0; i < nfactors; i++) {
if (n % factors[i] == 0) {
n /= factors[i];
break;
}
}
if (i == nfactors) return 0;
}
return 1;
}
/* ----------------------------------------------------------------------
compute estimated kspace force error
------------------------------------------------------------------------- */
double PPPM::compute_df_kspace()
{
double xprd = domain->xprd;
double yprd = domain->yprd;
double zprd = domain->zprd;
double zprd_slab = zprd*slab_volfactor;
bigint natoms = atom->natoms;
double df_kspace = 0.0;
if (differentiation_flag == 1 || stagger_flag) {
double qopt = compute_qopt();
df_kspace = sqrt(qopt/natoms)*q2/(xprd*yprd*zprd_slab);
} else {
double lprx = estimate_ik_error(h_x,xprd,natoms);
double lpry = estimate_ik_error(h_y,yprd,natoms);
double lprz = estimate_ik_error(h_z,zprd_slab,natoms);
df_kspace = sqrt(lprx*lprx + lpry*lpry + lprz*lprz) / sqrt(3.0);
}
return df_kspace;
}
/* ----------------------------------------------------------------------
compute qopt
------------------------------------------------------------------------- */
double PPPM::compute_qopt()
{
double qopt = 0.0;
double *prd = domain->prd;
const double xprd = prd[0];
const double yprd = prd[1];
const double zprd = prd[2];
const double zprd_slab = zprd*slab_volfactor;
volume = xprd * yprd * zprd_slab;
const double unitkx = (MY_2PI/xprd);
const double unitky = (MY_2PI/yprd);
const double unitkz = (MY_2PI/zprd_slab);
double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz;
double u1, u2, sqk;
double sum1,sum2,sum3,sum4,dot2;
int k,l,m,nx,ny,nz;
const int twoorder = 2*order;
for (m = nzlo_fft; m <= nzhi_fft; m++) {
const int mper = m - nz_pppm*(2*m/nz_pppm);
for (l = nylo_fft; l <= nyhi_fft; l++) {
const int lper = l - ny_pppm*(2*l/ny_pppm);
for (k = nxlo_fft; k <= nxhi_fft; k++) {
const int kper = k - nx_pppm*(2*k/nx_pppm);
sqk = square(unitkx*kper) + square(unitky*lper) + square(unitkz*mper);
if (sqk != 0.0) {
sum1 = 0.0;
sum2 = 0.0;
sum3 = 0.0;
sum4 = 0.0;
for (nx = -2; nx <= 2; nx++) {
qx = unitkx*(kper+nx_pppm*nx);
sx = exp(-0.25*square(qx/g_ewald));
argx = 0.5*qx*xprd/nx_pppm;
wx = powsinxx(argx,twoorder);
qx *= qx;
for (ny = -2; ny <= 2; ny++) {
qy = unitky*(lper+ny_pppm*ny);
sy = exp(-0.25*square(qy/g_ewald));
argy = 0.5*qy*yprd/ny_pppm;
wy = powsinxx(argy,twoorder);
qy *= qy;
for (nz = -2; nz <= 2; nz++) {
qz = unitkz*(mper+nz_pppm*nz);
sz = exp(-0.25*square(qz/g_ewald));
argz = 0.5*qz*zprd_slab/nz_pppm;
wz = powsinxx(argz,twoorder);
qz *= qz;
dot2 = qx+qy+qz;
u1 = sx*sy*sz;
u2 = wx*wy*wz;
sum1 += u1*u1/dot2*MY_4PI*MY_4PI;
sum2 += u1 * u2 * MY_4PI;
sum3 += u2;
sum4 += dot2*u2;
}
}
}
sum2 *= sum2;
qopt += sum1 - sum2/(sum3*sum4);
}
}
}
}
double qopt_all;
MPI_Allreduce(&qopt,&qopt_all,1,MPI_DOUBLE,MPI_SUM,world);
return qopt_all;
}
/* ----------------------------------------------------------------------
estimate kspace force error for ik method
------------------------------------------------------------------------- */
double PPPM::estimate_ik_error(double h, double prd, bigint natoms)
{
double sum = 0.0;
for (int m = 0; m < order; m++)
sum += acons[order][m] * pow(h*g_ewald,2.0*m);
double value = q2 * pow(h*g_ewald,(double)order) *
sqrt(g_ewald*prd*sqrt(MY_2PI)*sum/natoms) / (prd*prd);
return value;
}
/* ----------------------------------------------------------------------
adjust the g_ewald parameter to near its optimal value
using a Newton-Raphson solver
------------------------------------------------------------------------- */
void PPPM::adjust_gewald()
{
double dx;
for (int i = 0; i < LARGE; i++) {
dx = newton_raphson_f() / derivf();
g_ewald -= dx;
if (fabs(newton_raphson_f()) < SMALL) return;
}
char str[128];
sprintf(str, "Could not compute g_ewald");
error->all(FLERR, str);
}
/* ----------------------------------------------------------------------
calculate f(x) using Newton-Raphson solver
------------------------------------------------------------------------- */
double PPPM::newton_raphson_f()
{
double xprd = domain->xprd;
double yprd = domain->yprd;
double zprd = domain->zprd;
bigint natoms = atom->natoms;
double df_rspace = 2.0*q2*exp(-g_ewald*g_ewald*cutoff*cutoff) /
sqrt(natoms*cutoff*xprd*yprd*zprd);
double df_kspace = compute_df_kspace();
return df_rspace - df_kspace;
}
/* ----------------------------------------------------------------------
calculate numerical derivative f'(x) using forward difference
[f(x + h) - f(x)] / h
------------------------------------------------------------------------- */
double PPPM::derivf()
{
double h = 0.000001; //Derivative step-size
double df,f1,f2,g_ewald_old;
f1 = newton_raphson_f();
g_ewald_old = g_ewald;
g_ewald += h;
f2 = newton_raphson_f();
g_ewald = g_ewald_old;
df = (f2 - f1)/h;
return df;
}
/* ----------------------------------------------------------------------
calculate the final estimate of the accuracy
------------------------------------------------------------------------- */
double PPPM::final_accuracy()
{
double xprd = domain->xprd;
double yprd = domain->yprd;
double zprd = domain->zprd;
bigint natoms = atom->natoms;
double df_kspace = compute_df_kspace();
double q2_over_sqrt = q2 / sqrt(natoms*cutoff*xprd*yprd*zprd);
double df_rspace = 2.0 * q2_over_sqrt * exp(-g_ewald*g_ewald*cutoff*cutoff);
double df_table = estimate_table_accuracy(q2_over_sqrt,df_rspace);
double estimated_accuracy = sqrt(df_kspace*df_kspace + df_rspace*df_rspace +
df_table*df_table);
return estimated_accuracy;
}
/* ----------------------------------------------------------------------
set local subset of PPPM/FFT grid that I own
n xyz lo/hi in = 3d brick that I own (inclusive)
n xyz lo/hi out = 3d brick + ghost cells in 6 directions (inclusive)
n xyz lo/hi fft = FFT columns that I own (all of x dim, 2d decomp in yz)
------------------------------------------------------------------------- */
void PPPM::set_grid_local()
{
// global indices of PPPM grid range from 0 to N-1
// nlo_in,nhi_in = lower/upper limits of the 3d sub-brick of
// global PPPM grid that I own without ghost cells
// for slab PPPM, assign z grid as if it were not extended
nxlo_in = static_cast<int> (comm->xsplit[comm->myloc[0]] * nx_pppm);
nxhi_in = static_cast<int> (comm->xsplit[comm->myloc[0]+1] * nx_pppm) - 1;
nylo_in = static_cast<int> (comm->ysplit[comm->myloc[1]] * ny_pppm);
nyhi_in = static_cast<int> (comm->ysplit[comm->myloc[1]+1] * ny_pppm) - 1;
nzlo_in = static_cast<int>
(comm->zsplit[comm->myloc[2]] * nz_pppm/slab_volfactor);
nzhi_in = static_cast<int>
(comm->zsplit[comm->myloc[2]+1] * nz_pppm/slab_volfactor) - 1;
// nlower,nupper = stencil size for mapping particles to PPPM grid
nlower = -(order-1)/2;
nupper = order/2;
// shift values for particle <-> grid mapping
// add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1
if (order % 2) shift = OFFSET + 0.5;
else shift = OFFSET;
if (order % 2) shiftone = 0.0;
else shiftone = 0.5;
// nlo_out,nhi_out = lower/upper limits of the 3d sub-brick of
// global PPPM grid that my particles can contribute charge to
// effectively nlo_in,nhi_in + ghost cells
// nlo,nhi = global coords of grid pt to "lower left" of smallest/largest
// position a particle in my box can be at
// dist[3] = particle position bound = subbox + skin/2.0 + qdist
// qdist = offset due to TIP4P fictitious charge
// convert to triclinic if necessary
// nlo_out,nhi_out = nlo,nhi + stencil size for particle mapping
// for slab PPPM, assign z grid as if it were not extended
double *prd,*sublo,*subhi;
if (triclinic == 0) {
prd = domain->prd;
boxlo = domain->boxlo;
sublo = domain->sublo;
subhi = domain->subhi;
} else {
prd = domain->prd_lamda;
boxlo = domain->boxlo_lamda;
sublo = domain->sublo_lamda;
subhi = domain->subhi_lamda;
}
double xprd = prd[0];
double yprd = prd[1];
double zprd = prd[2];
double zprd_slab = zprd*slab_volfactor;
double dist[3];
double cuthalf = 0.5*neighbor->skin + qdist;
if (triclinic == 0) dist[0] = dist[1] = dist[2] = cuthalf;
else kspacebbox(cuthalf,&dist[0]);
int nlo,nhi;
nlo = static_cast<int> ((sublo[0]-dist[0]-boxlo[0]) *
nx_pppm/xprd + shift) - OFFSET;
nhi = static_cast<int> ((subhi[0]+dist[0]-boxlo[0]) *
nx_pppm/xprd + shift) - OFFSET;
nxlo_out = nlo + nlower;
nxhi_out = nhi + nupper;
nlo = static_cast<int> ((sublo[1]-dist[1]-boxlo[1]) *
ny_pppm/yprd + shift) - OFFSET;
nhi = static_cast<int> ((subhi[1]+dist[1]-boxlo[1]) *
ny_pppm/yprd + shift) - OFFSET;
nylo_out = nlo + nlower;
nyhi_out = nhi + nupper;
nlo = static_cast<int> ((sublo[2]-dist[2]-boxlo[2]) *
nz_pppm/zprd_slab + shift) - OFFSET;
nhi = static_cast<int> ((subhi[2]+dist[2]-boxlo[2]) *
nz_pppm/zprd_slab + shift) - OFFSET;
nzlo_out = nlo + nlower;
nzhi_out = nhi + nupper;
if (stagger_flag) {
nxhi_out++;
nyhi_out++;
nzhi_out++;
}
// for slab PPPM, change the grid boundary for processors at +z end
// to include the empty volume between periodically repeating slabs
// for slab PPPM, want charge data communicated from -z proc to +z proc,
// but not vice versa, also want field data communicated from +z proc to
// -z proc, but not vice versa
// this is accomplished by nzhi_in = nzhi_out on +z end (no ghost cells)
// also insure no other procs use ghost cells beyond +z limit
if (slabflag == 1) {
if (comm->myloc[2] == comm->procgrid[2]-1)
nzhi_in = nzhi_out = nz_pppm - 1;
nzhi_out = MIN(nzhi_out,nz_pppm-1);
}
// decomposition of FFT mesh
// global indices range from 0 to N-1
// proc owns entire x-dimension, clumps of columns in y,z dimensions
// npey_fft,npez_fft = # of procs in y,z dims
// if nprocs is small enough, proc can own 1 or more entire xy planes,
// else proc owns 2d sub-blocks of yz plane
// me_y,me_z = which proc (0-npe_fft-1) I am in y,z dimensions
// nlo_fft,nhi_fft = lower/upper limit of the section
// of the global FFT mesh that I own
int npey_fft,npez_fft;
if (nz_pppm >= nprocs) {
npey_fft = 1;
npez_fft = nprocs;
} else procs2grid2d(nprocs,ny_pppm,nz_pppm,&npey_fft,&npez_fft);
int me_y = me % npey_fft;
int me_z = me / npey_fft;
nxlo_fft = 0;
nxhi_fft = nx_pppm - 1;
nylo_fft = me_y*ny_pppm/npey_fft;
nyhi_fft = (me_y+1)*ny_pppm/npey_fft - 1;
nzlo_fft = me_z*nz_pppm/npez_fft;
nzhi_fft = (me_z+1)*nz_pppm/npez_fft - 1;
// PPPM grid pts owned by this proc, including ghosts
ngrid = (nxhi_out-nxlo_out+1) * (nyhi_out-nylo_out+1) *
(nzhi_out-nzlo_out+1);
// FFT grids owned by this proc, without ghosts
// nfft = FFT points in FFT decomposition on this proc
// nfft_brick = FFT points in 3d brick-decomposition on this proc
// nfft_both = greater of 2 values
nfft = (nxhi_fft-nxlo_fft+1) * (nyhi_fft-nylo_fft+1) *
(nzhi_fft-nzlo_fft+1);
int nfft_brick = (nxhi_in-nxlo_in+1) * (nyhi_in-nylo_in+1) *
(nzhi_in-nzlo_in+1);
nfft_both = MAX(nfft,nfft_brick);
}
/* ----------------------------------------------------------------------
pre-compute Green's function denominator expansion coeffs, Gamma(2n)
------------------------------------------------------------------------- */
void PPPM::compute_gf_denom()
{
int k,l,m;
for (l = 1; l < order; l++) gf_b[l] = 0.0;
gf_b[0] = 1.0;
for (m = 1; m < order; m++) {
for (l = m; l > 0; l--)
gf_b[l] = 4.0 * (gf_b[l]*(l-m)*(l-m-0.5)-gf_b[l-1]*(l-m-1)*(l-m-1));
gf_b[0] = 4.0 * (gf_b[0]*(l-m)*(l-m-0.5));
}
bigint ifact = 1;
for (k = 1; k < 2*order; k++) ifact *= k;
double gaminv = 1.0/ifact;
for (l = 0; l < order; l++) gf_b[l] *= gaminv;
}
/* ----------------------------------------------------------------------
pre-compute modified (Hockney-Eastwood) Coulomb Green's function
------------------------------------------------------------------------- */
void PPPM::compute_gf_ik()
{
const double * const prd = domain->prd;
const double xprd = prd[0];
const double yprd = prd[1];
const double zprd = prd[2];
const double zprd_slab = zprd*slab_volfactor;
const double unitkx = (MY_2PI/xprd);
const double unitky = (MY_2PI/yprd);
const double unitkz = (MY_2PI/zprd_slab);
double snx,sny,snz;
double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz;
double sum1,dot1,dot2;
double numerator,denominator;
double sqk;
int k,l,m,n,nx,ny,nz,kper,lper,mper;
const int nbx = static_cast<int> ((g_ewald*xprd/(MY_PI*nx_pppm)) *
pow(-log(EPS_HOC),0.25));
const int nby = static_cast<int> ((g_ewald*yprd/(MY_PI*ny_pppm)) *
pow(-log(EPS_HOC),0.25));
const int nbz = static_cast<int> ((g_ewald*zprd_slab/(MY_PI*nz_pppm)) *
pow(-log(EPS_HOC),0.25));
const int twoorder = 2*order;
n = 0;
for (m = nzlo_fft; m <= nzhi_fft; m++) {
mper = m - nz_pppm*(2*m/nz_pppm);
snz = square(sin(0.5*unitkz*mper*zprd_slab/nz_pppm));
for (l = nylo_fft; l <= nyhi_fft; l++) {
lper = l - ny_pppm*(2*l/ny_pppm);
sny = square(sin(0.5*unitky*lper*yprd/ny_pppm));
for (k = nxlo_fft; k <= nxhi_fft; k++) {
kper = k - nx_pppm*(2*k/nx_pppm);
snx = square(sin(0.5*unitkx*kper*xprd/nx_pppm));
sqk = square(unitkx*kper) + square(unitky*lper) + square(unitkz*mper);
if (sqk != 0.0) {
numerator = 12.5663706/sqk;
denominator = gf_denom(snx,sny,snz);
sum1 = 0.0;
for (nx = -nbx; nx <= nbx; nx++) {
qx = unitkx*(kper+nx_pppm*nx);
sx = exp(-0.25*square(qx/g_ewald));
argx = 0.5*qx*xprd/nx_pppm;
wx = powsinxx(argx,twoorder);
for (ny = -nby; ny <= nby; ny++) {
qy = unitky*(lper+ny_pppm*ny);
sy = exp(-0.25*square(qy/g_ewald));
argy = 0.5*qy*yprd/ny_pppm;
wy = powsinxx(argy,twoorder);
for (nz = -nbz; nz <= nbz; nz++) {
qz = unitkz*(mper+nz_pppm*nz);
sz = exp(-0.25*square(qz/g_ewald));
argz = 0.5*qz*zprd_slab/nz_pppm;
wz = powsinxx(argz,twoorder);
dot1 = unitkx*kper*qx + unitky*lper*qy + unitkz*mper*qz;
dot2 = qx*qx+qy*qy+qz*qz;
sum1 += (dot1/dot2) * sx*sy*sz * wx*wy*wz;
}
}
}
greensfn[n++] = numerator*sum1/denominator;
} else greensfn[n++] = 0.0;
}
}
}
}
/* ----------------------------------------------------------------------
pre-compute modified (Hockney-Eastwood) Coulomb Green's function
for a triclinic system
------------------------------------------------------------------------- */
void PPPM::compute_gf_ik_triclinic()
{
double snx,sny,snz;
double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz;
double sum1,dot1,dot2;
double numerator,denominator;
double sqk;
int k,l,m,n,nx,ny,nz,kper,lper,mper;
double tmp[3];
tmp[0] = (g_ewald/(MY_PI*nx_pppm)) * pow(-log(EPS_HOC),0.25);
tmp[1] = (g_ewald/(MY_PI*ny_pppm)) * pow(-log(EPS_HOC),0.25);
tmp[2] = (g_ewald/(MY_PI*nz_pppm)) * pow(-log(EPS_HOC),0.25);
lamda2xT(&tmp[0],&tmp[0]);
const int nbx = static_cast<int> (tmp[0]);
const int nby = static_cast<int> (tmp[1]);
const int nbz = static_cast<int> (tmp[2]);
const int twoorder = 2*order;
n = 0;
for (m = nzlo_fft; m <= nzhi_fft; m++) {
mper = m - nz_pppm*(2*m/nz_pppm);
snz = square(sin(MY_PI*mper/nz_pppm));
for (l = nylo_fft; l <= nyhi_fft; l++) {
lper = l - ny_pppm*(2*l/ny_pppm);
sny = square(sin(MY_PI*lper/ny_pppm));
for (k = nxlo_fft; k <= nxhi_fft; k++) {
kper = k - nx_pppm*(2*k/nx_pppm);
snx = square(sin(MY_PI*kper/nx_pppm));
double unitk_lamda[3];
unitk_lamda[0] = 2.0*MY_PI*kper;
unitk_lamda[1] = 2.0*MY_PI*lper;
unitk_lamda[2] = 2.0*MY_PI*mper;
x2lamdaT(&unitk_lamda[0],&unitk_lamda[0]);
sqk = square(unitk_lamda[0]) + square(unitk_lamda[1]) + square(unitk_lamda[2]);
if (sqk != 0.0) {
numerator = 12.5663706/sqk;
denominator = gf_denom(snx,sny,snz);
sum1 = 0.0;
for (nx = -nbx; nx <= nbx; nx++) {
argx = MY_PI*kper/nx_pppm + MY_PI*nx;
wx = powsinxx(argx,twoorder);
for (ny = -nby; ny <= nby; ny++) {
argy = MY_PI*lper/ny_pppm + MY_PI*ny;
wy = powsinxx(argy,twoorder);
for (nz = -nbz; nz <= nbz; nz++) {
argz = MY_PI*mper/nz_pppm + MY_PI*nz;
wz = powsinxx(argz,twoorder);
double b[3];
b[0] = 2.0*MY_PI*nx_pppm*nx;
b[1] = 2.0*MY_PI*ny_pppm*ny;
b[2] = 2.0*MY_PI*nz_pppm*nz;
x2lamdaT(&b[0],&b[0]);
qx = unitk_lamda[0]+b[0];
sx = exp(-0.25*square(qx/g_ewald));
qy = unitk_lamda[1]+b[1];
sy = exp(-0.25*square(qy/g_ewald));
qz = unitk_lamda[2]+b[2];
sz = exp(-0.25*square(qz/g_ewald));
dot1 = unitk_lamda[0]*qx + unitk_lamda[1]*qy + unitk_lamda[2]*qz;
dot2 = qx*qx+qy*qy+qz*qz;
sum1 += (dot1/dot2) * sx*sy*sz * wx*wy*wz;
}
}
}
greensfn[n++] = numerator*sum1/denominator;
} else greensfn[n++] = 0.0;
}
}
}
}
/* ----------------------------------------------------------------------
compute optimized Green's function for energy calculation
------------------------------------------------------------------------- */
void PPPM::compute_gf_ad()
{
const double * const prd = domain->prd;
const double xprd = prd[0];
const double yprd = prd[1];
const double zprd = prd[2];
const double zprd_slab = zprd*slab_volfactor;
const double unitkx = (MY_2PI/xprd);
const double unitky = (MY_2PI/yprd);
const double unitkz = (MY_2PI/zprd_slab);
double snx,sny,snz,sqk;
double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz;
double numerator,denominator;
int k,l,m,n,kper,lper,mper;
const int twoorder = 2*order;
for (int i = 0; i < 6; i++) sf_coeff[i] = 0.0;
n = 0;
for (m = nzlo_fft; m <= nzhi_fft; m++) {
mper = m - nz_pppm*(2*m/nz_pppm);
qz = unitkz*mper;
snz = square(sin(0.5*qz*zprd_slab/nz_pppm));
sz = exp(-0.25*square(qz/g_ewald));
argz = 0.5*qz*zprd_slab/nz_pppm;
wz = powsinxx(argz,twoorder);
for (l = nylo_fft; l <= nyhi_fft; l++) {
lper = l - ny_pppm*(2*l/ny_pppm);
qy = unitky*lper;
sny = square(sin(0.5*qy*yprd/ny_pppm));
sy = exp(-0.25*square(qy/g_ewald));
argy = 0.5*qy*yprd/ny_pppm;
wy = powsinxx(argy,twoorder);
for (k = nxlo_fft; k <= nxhi_fft; k++) {
kper = k - nx_pppm*(2*k/nx_pppm);
qx = unitkx*kper;
snx = square(sin(0.5*qx*xprd/nx_pppm));
sx = exp(-0.25*square(qx/g_ewald));
argx = 0.5*qx*xprd/nx_pppm;
wx = powsinxx(argx,twoorder);
sqk = qx*qx + qy*qy + qz*qz;
if (sqk != 0.0) {
numerator = MY_4PI/sqk;
denominator = gf_denom(snx,sny,snz);
greensfn[n] = numerator*sx*sy*sz*wx*wy*wz/denominator;
sf_coeff[0] += sf_precoeff1[n]*greensfn[n];
sf_coeff[1] += sf_precoeff2[n]*greensfn[n];
sf_coeff[2] += sf_precoeff3[n]*greensfn[n];
sf_coeff[3] += sf_precoeff4[n]*greensfn[n];
sf_coeff[4] += sf_precoeff5[n]*greensfn[n];
sf_coeff[5] += sf_precoeff6[n]*greensfn[n];
n++;
} else {
greensfn[n] = 0.0;
sf_coeff[0] += sf_precoeff1[n]*greensfn[n];
sf_coeff[1] += sf_precoeff2[n]*greensfn[n];
sf_coeff[2] += sf_precoeff3[n]*greensfn[n];
sf_coeff[3] += sf_precoeff4[n]*greensfn[n];
sf_coeff[4] += sf_precoeff5[n]*greensfn[n];
sf_coeff[5] += sf_precoeff6[n]*greensfn[n];
n++;
}
}
}
}
// compute the coefficients for the self-force correction
double prex, prey, prez;
prex = prey = prez = MY_PI/volume;
prex *= nx_pppm/xprd;
prey *= ny_pppm/yprd;
prez *= nz_pppm/zprd_slab;
sf_coeff[0] *= prex;
sf_coeff[1] *= prex*2;
sf_coeff[2] *= prey;
sf_coeff[3] *= prey*2;
sf_coeff[4] *= prez;
sf_coeff[5] *= prez*2;
// communicate values with other procs
double tmp[6];
MPI_Allreduce(sf_coeff,tmp,6,MPI_DOUBLE,MPI_SUM,world);
for (n = 0; n < 6; n++) sf_coeff[n] = tmp[n];
}
/* ----------------------------------------------------------------------
compute self force coefficients for ad-differentiation scheme
------------------------------------------------------------------------- */
void PPPM::compute_sf_precoeff()
{
int i,k,l,m,n;
int nx,ny,nz,kper,lper,mper;
double wx0[5],wy0[5],wz0[5],wx1[5],wy1[5],wz1[5],wx2[5],wy2[5],wz2[5];
double qx0,qy0,qz0,qx1,qy1,qz1,qx2,qy2,qz2;
double u0,u1,u2,u3,u4,u5,u6;
double sum1,sum2,sum3,sum4,sum5,sum6;
n = 0;
for (m = nzlo_fft; m <= nzhi_fft; m++) {
mper = m - nz_pppm*(2*m/nz_pppm);
for (l = nylo_fft; l <= nyhi_fft; l++) {
lper = l - ny_pppm*(2*l/ny_pppm);
for (k = nxlo_fft; k <= nxhi_fft; k++) {
kper = k - nx_pppm*(2*k/nx_pppm);
sum1 = sum2 = sum3 = sum4 = sum5 = sum6 = 0.0;
for (i = 0; i < 5; i++) {
qx0 = MY_2PI*(kper+nx_pppm*(i-2));
qx1 = MY_2PI*(kper+nx_pppm*(i-1));
qx2 = MY_2PI*(kper+nx_pppm*(i ));
wx0[i] = powsinxx(0.5*qx0/nx_pppm,order);
wx1[i] = powsinxx(0.5*qx1/nx_pppm,order);
wx2[i] = powsinxx(0.5*qx2/nx_pppm,order);
qy0 = MY_2PI*(lper+ny_pppm*(i-2));
qy1 = MY_2PI*(lper+ny_pppm*(i-1));
qy2 = MY_2PI*(lper+ny_pppm*(i ));
wy0[i] = powsinxx(0.5*qy0/ny_pppm,order);
wy1[i] = powsinxx(0.5*qy1/ny_pppm,order);
wy2[i] = powsinxx(0.5*qy2/ny_pppm,order);
qz0 = MY_2PI*(mper+nz_pppm*(i-2));
qz1 = MY_2PI*(mper+nz_pppm*(i-1));
qz2 = MY_2PI*(mper+nz_pppm*(i ));
wz0[i] = powsinxx(0.5*qz0/nz_pppm,order);
wz1[i] = powsinxx(0.5*qz1/nz_pppm,order);
wz2[i] = powsinxx(0.5*qz2/nz_pppm,order);
}
for (nx = 0; nx < 5; nx++) {
for (ny = 0; ny < 5; ny++) {
for (nz = 0; nz < 5; nz++) {
u0 = wx0[nx]*wy0[ny]*wz0[nz];
u1 = wx1[nx]*wy0[ny]*wz0[nz];
u2 = wx2[nx]*wy0[ny]*wz0[nz];
u3 = wx0[nx]*wy1[ny]*wz0[nz];
u4 = wx0[nx]*wy2[ny]*wz0[nz];
u5 = wx0[nx]*wy0[ny]*wz1[nz];
u6 = wx0[nx]*wy0[ny]*wz2[nz];
sum1 += u0*u1;
sum2 += u0*u2;
sum3 += u0*u3;
sum4 += u0*u4;
sum5 += u0*u5;
sum6 += u0*u6;
}
}
}
// store values
sf_precoeff1[n] = sum1;
sf_precoeff2[n] = sum2;
sf_precoeff3[n] = sum3;
sf_precoeff4[n] = sum4;
sf_precoeff5[n] = sum5;
sf_precoeff6[n++] = sum6;
}
}
}
}
/* ----------------------------------------------------------------------
find center grid pt for each of my particles
check that full stencil for the particle will fit in my 3d brick
store central grid pt indices in part2grid array
------------------------------------------------------------------------- */
void PPPM::particle_map()
{
int nx,ny,nz;
double **x = atom->x;
int nlocal = atom->nlocal;
int flag = 0;
- if (!isfinite(boxlo[0]) || !isfinite(boxlo[1]) || !isfinite(boxlo[2]))
+ if (!ISFINITE(boxlo[0]) || !ISFINITE(boxlo[1]) || !ISFINITE(boxlo[2]))
error->one(FLERR,"Non-numeric box dimensions - simulation unstable");
for (int i = 0; i < nlocal; i++) {
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
// current particle coord can be outside global and local box
// add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1
nx = static_cast<int> ((x[i][0]-boxlo[0])*delxinv+shift) - OFFSET;
ny = static_cast<int> ((x[i][1]-boxlo[1])*delyinv+shift) - OFFSET;
nz = static_cast<int> ((x[i][2]-boxlo[2])*delzinv+shift) - OFFSET;
part2grid[i][0] = nx;
part2grid[i][1] = ny;
part2grid[i][2] = nz;
// check that entire stencil around nx,ny,nz will fit in my 3d brick
if (nx+nlower < nxlo_out || nx+nupper > nxhi_out ||
ny+nlower < nylo_out || ny+nupper > nyhi_out ||
nz+nlower < nzlo_out || nz+nupper > nzhi_out)
flag = 1;
}
if (flag) error->one(FLERR,"Out of range atoms - cannot compute PPPM");
}
/* ----------------------------------------------------------------------
create discretized "density" on section of global grid due to my particles
density(x,y,z) = charge "density" at grid points of my 3d brick
(nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts)
in global grid
------------------------------------------------------------------------- */
void PPPM::make_rho()
{
int l,m,n,nx,ny,nz,mx,my,mz;
FFT_SCALAR dx,dy,dz,x0,y0,z0;
// clear 3d density array
memset(&(density_brick[nzlo_out][nylo_out][nxlo_out]),0,
ngrid*sizeof(FFT_SCALAR));
// loop over my charges, add their contribution to nearby grid points
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
// (dx,dy,dz) = distance to "lower left" grid pt
// (mx,my,mz) = global coords of moving stencil pt
double *q = atom->q;
double **x = atom->x;
int nlocal = atom->nlocal;
for (int i = 0; i < nlocal; i++) {
nx = part2grid[i][0];
ny = part2grid[i][1];
nz = part2grid[i][2];
dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv;
dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv;
dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv;
compute_rho1d(dx,dy,dz);
z0 = delvolinv * q[i];
for (n = nlower; n <= nupper; n++) {
mz = n+nz;
y0 = z0*rho1d[2][n];
for (m = nlower; m <= nupper; m++) {
my = m+ny;
x0 = y0*rho1d[1][m];
for (l = nlower; l <= nupper; l++) {
mx = l+nx;
density_brick[mz][my][mx] += x0*rho1d[0][l];
}
}
}
}
}
/* ----------------------------------------------------------------------
remap density from 3d brick decomposition to FFT decomposition
------------------------------------------------------------------------- */
void PPPM::brick2fft()
{
int n,ix,iy,iz;
// copy grabs inner portion of density from 3d brick
// remap could be done as pre-stage of FFT,
// but this works optimally on only double values, not complex values
n = 0;
for (iz = nzlo_in; iz <= nzhi_in; iz++)
for (iy = nylo_in; iy <= nyhi_in; iy++)
for (ix = nxlo_in; ix <= nxhi_in; ix++)
density_fft[n++] = density_brick[iz][iy][ix];
remap->perform(density_fft,density_fft,work1);
}
/* ----------------------------------------------------------------------
FFT-based Poisson solver
------------------------------------------------------------------------- */
void PPPM::poisson()
{
if (differentiation_flag == 1) poisson_ad();
else poisson_ik();
}
/* ----------------------------------------------------------------------
FFT-based Poisson solver for ik
------------------------------------------------------------------------- */
void PPPM::poisson_ik()
{
int i,j,k,n;
double eng;
// transform charge density (r -> k)
n = 0;
for (i = 0; i < nfft; i++) {
work1[n++] = density_fft[i];
work1[n++] = ZEROF;
}
fft1->compute(work1,work1,1);
// global energy and virial contribution
double scaleinv = 1.0/(nx_pppm*ny_pppm*nz_pppm);
double s2 = scaleinv*scaleinv;
if (eflag_global || vflag_global) {
if (vflag_global) {
n = 0;
for (i = 0; i < nfft; i++) {
eng = s2 * greensfn[i] * (work1[n]*work1[n] + work1[n+1]*work1[n+1]);
for (j = 0; j < 6; j++) virial[j] += eng*vg[i][j];
if (eflag_global) energy += eng;
n += 2;
}
} else {
n = 0;
for (i = 0; i < nfft; i++) {
energy +=
s2 * greensfn[i] * (work1[n]*work1[n] + work1[n+1]*work1[n+1]);
n += 2;
}
}
}
// scale by 1/total-grid-pts to get rho(k)
// multiply by Green's function to get V(k)
n = 0;
for (i = 0; i < nfft; i++) {
work1[n++] *= scaleinv * greensfn[i];
work1[n++] *= scaleinv * greensfn[i];
}
// extra FFTs for per-atom energy/virial
if (evflag_atom) poisson_peratom();
// triclinic system
if (triclinic) {
poisson_ik_triclinic();
return;
}
// compute gradients of V(r) in each of 3 dims by transformimg -ik*V(k)
// FFT leaves data in 3d brick decomposition
// copy it into inner portion of vdx,vdy,vdz arrays
// x direction gradient
n = 0;
for (k = nzlo_fft; k <= nzhi_fft; k++)
for (j = nylo_fft; j <= nyhi_fft; j++)
for (i = nxlo_fft; i <= nxhi_fft; i++) {
work2[n] = fkx[i]*work1[n+1];
work2[n+1] = -fkx[i]*work1[n];
n += 2;
}
fft2->compute(work2,work2,-1);
n = 0;
for (k = nzlo_in; k <= nzhi_in; k++)
for (j = nylo_in; j <= nyhi_in; j++)
for (i = nxlo_in; i <= nxhi_in; i++) {
vdx_brick[k][j][i] = work2[n];
n += 2;
}
// y direction gradient
n = 0;
for (k = nzlo_fft; k <= nzhi_fft; k++)
for (j = nylo_fft; j <= nyhi_fft; j++)
for (i = nxlo_fft; i <= nxhi_fft; i++) {
work2[n] = fky[j]*work1[n+1];
work2[n+1] = -fky[j]*work1[n];
n += 2;
}
fft2->compute(work2,work2,-1);
n = 0;
for (k = nzlo_in; k <= nzhi_in; k++)
for (j = nylo_in; j <= nyhi_in; j++)
for (i = nxlo_in; i <= nxhi_in; i++) {
vdy_brick[k][j][i] = work2[n];
n += 2;
}
// z direction gradient
n = 0;
for (k = nzlo_fft; k <= nzhi_fft; k++)
for (j = nylo_fft; j <= nyhi_fft; j++)
for (i = nxlo_fft; i <= nxhi_fft; i++) {
work2[n] = fkz[k]*work1[n+1];
work2[n+1] = -fkz[k]*work1[n];
n += 2;
}
fft2->compute(work2,work2,-1);
n = 0;
for (k = nzlo_in; k <= nzhi_in; k++)
for (j = nylo_in; j <= nyhi_in; j++)
for (i = nxlo_in; i <= nxhi_in; i++) {
vdz_brick[k][j][i] = work2[n];
n += 2;
}
}
/* ----------------------------------------------------------------------
FFT-based Poisson solver for ik for a triclinic system
------------------------------------------------------------------------- */
void PPPM::poisson_ik_triclinic()
{
int i,j,k,n;
// compute gradients of V(r) in each of 3 dims by transformimg -ik*V(k)
// FFT leaves data in 3d brick decomposition
// copy it into inner portion of vdx,vdy,vdz arrays
// x direction gradient
n = 0;
for (i = 0; i < nfft; i++) {
work2[n] = fkx[i]*work1[n+1];
work2[n+1] = -fkx[i]*work1[n];
n += 2;
}
fft2->compute(work2,work2,-1);
n = 0;
for (k = nzlo_in; k <= nzhi_in; k++)
for (j = nylo_in; j <= nyhi_in; j++)
for (i = nxlo_in; i <= nxhi_in; i++) {
vdx_brick[k][j][i] = work2[n];
n += 2;
}
// y direction gradient
n = 0;
for (i = 0; i < nfft; i++) {
work2[n] = fky[i]*work1[n+1];
work2[n+1] = -fky[i]*work1[n];
n += 2;
}
fft2->compute(work2,work2,-1);
n = 0;
for (k = nzlo_in; k <= nzhi_in; k++)
for (j = nylo_in; j <= nyhi_in; j++)
for (i = nxlo_in; i <= nxhi_in; i++) {
vdy_brick[k][j][i] = work2[n];
n += 2;
}
// z direction gradient
n = 0;
for (i = 0; i < nfft; i++) {
work2[n] = fkz[i]*work1[n+1];
work2[n+1] = -fkz[i]*work1[n];
n += 2;
}
fft2->compute(work2,work2,-1);
n = 0;
for (k = nzlo_in; k <= nzhi_in; k++)
for (j = nylo_in; j <= nyhi_in; j++)
for (i = nxlo_in; i <= nxhi_in; i++) {
vdz_brick[k][j][i] = work2[n];
n += 2;
}
}
/* ----------------------------------------------------------------------
FFT-based Poisson solver for ad
------------------------------------------------------------------------- */
void PPPM::poisson_ad()
{
int i,j,k,n;
double eng;
// transform charge density (r -> k)
n = 0;
for (i = 0; i < nfft; i++) {
work1[n++] = density_fft[i];
work1[n++] = ZEROF;
}
fft1->compute(work1,work1,1);
// global energy and virial contribution
double scaleinv = 1.0/(nx_pppm*ny_pppm*nz_pppm);
double s2 = scaleinv*scaleinv;
if (eflag_global || vflag_global) {
if (vflag_global) {
n = 0;
for (i = 0; i < nfft; i++) {
eng = s2 * greensfn[i] * (work1[n]*work1[n] + work1[n+1]*work1[n+1]);
for (j = 0; j < 6; j++) virial[j] += eng*vg[i][j];
if (eflag_global) energy += eng;
n += 2;
}
} else {
n = 0;
for (i = 0; i < nfft; i++) {
energy +=
s2 * greensfn[i] * (work1[n]*work1[n] + work1[n+1]*work1[n+1]);
n += 2;
}
}
}
// scale by 1/total-grid-pts to get rho(k)
// multiply by Green's function to get V(k)
n = 0;
for (i = 0; i < nfft; i++) {
work1[n++] *= scaleinv * greensfn[i];
work1[n++] *= scaleinv * greensfn[i];
}
// extra FFTs for per-atom energy/virial
if (vflag_atom) poisson_peratom();
n = 0;
for (i = 0; i < nfft; i++) {
work2[n] = work1[n];
work2[n+1] = work1[n+1];
n += 2;
}
fft2->compute(work2,work2,-1);
n = 0;
for (k = nzlo_in; k <= nzhi_in; k++)
for (j = nylo_in; j <= nyhi_in; j++)
for (i = nxlo_in; i <= nxhi_in; i++) {
u_brick[k][j][i] = work2[n];
n += 2;
}
}
/* ----------------------------------------------------------------------
FFT-based Poisson solver for per-atom energy/virial
------------------------------------------------------------------------- */
void PPPM::poisson_peratom()
{
int i,j,k,n;
// energy
if (eflag_atom && differentiation_flag != 1) {
n = 0;
for (i = 0; i < nfft; i++) {
work2[n] = work1[n];
work2[n+1] = work1[n+1];
n += 2;
}
fft2->compute(work2,work2,-1);
n = 0;
for (k = nzlo_in; k <= nzhi_in; k++)
for (j = nylo_in; j <= nyhi_in; j++)
for (i = nxlo_in; i <= nxhi_in; i++) {
u_brick[k][j][i] = work2[n];
n += 2;
}
}
// 6 components of virial in v0 thru v5
if (!vflag_atom) return;
n = 0;
for (i = 0; i < nfft; i++) {
work2[n] = work1[n]*vg[i][0];
work2[n+1] = work1[n+1]*vg[i][0];
n += 2;
}
fft2->compute(work2,work2,-1);
n = 0;
for (k = nzlo_in; k <= nzhi_in; k++)
for (j = nylo_in; j <= nyhi_in; j++)
for (i = nxlo_in; i <= nxhi_in; i++) {
v0_brick[k][j][i] = work2[n];
n += 2;
}
n = 0;
for (i = 0; i < nfft; i++) {
work2[n] = work1[n]*vg[i][1];
work2[n+1] = work1[n+1]*vg[i][1];
n += 2;
}
fft2->compute(work2,work2,-1);
n = 0;
for (k = nzlo_in; k <= nzhi_in; k++)
for (j = nylo_in; j <= nyhi_in; j++)
for (i = nxlo_in; i <= nxhi_in; i++) {
v1_brick[k][j][i] = work2[n];
n += 2;
}
n = 0;
for (i = 0; i < nfft; i++) {
work2[n] = work1[n]*vg[i][2];
work2[n+1] = work1[n+1]*vg[i][2];
n += 2;
}
fft2->compute(work2,work2,-1);
n = 0;
for (k = nzlo_in; k <= nzhi_in; k++)
for (j = nylo_in; j <= nyhi_in; j++)
for (i = nxlo_in; i <= nxhi_in; i++) {
v2_brick[k][j][i] = work2[n];
n += 2;
}
n = 0;
for (i = 0; i < nfft; i++) {
work2[n] = work1[n]*vg[i][3];
work2[n+1] = work1[n+1]*vg[i][3];
n += 2;
}
fft2->compute(work2,work2,-1);
n = 0;
for (k = nzlo_in; k <= nzhi_in; k++)
for (j = nylo_in; j <= nyhi_in; j++)
for (i = nxlo_in; i <= nxhi_in; i++) {
v3_brick[k][j][i] = work2[n];
n += 2;
}
n = 0;
for (i = 0; i < nfft; i++) {
work2[n] = work1[n]*vg[i][4];
work2[n+1] = work1[n+1]*vg[i][4];
n += 2;
}
fft2->compute(work2,work2,-1);
n = 0;
for (k = nzlo_in; k <= nzhi_in; k++)
for (j = nylo_in; j <= nyhi_in; j++)
for (i = nxlo_in; i <= nxhi_in; i++) {
v4_brick[k][j][i] = work2[n];
n += 2;
}
n = 0;
for (i = 0; i < nfft; i++) {
work2[n] = work1[n]*vg[i][5];
work2[n+1] = work1[n+1]*vg[i][5];
n += 2;
}
fft2->compute(work2,work2,-1);
n = 0;
for (k = nzlo_in; k <= nzhi_in; k++)
for (j = nylo_in; j <= nyhi_in; j++)
for (i = nxlo_in; i <= nxhi_in; i++) {
v5_brick[k][j][i] = work2[n];
n += 2;
}
}
/* ----------------------------------------------------------------------
interpolate from grid to get electric field & force on my particles
------------------------------------------------------------------------- */
void PPPM::fieldforce()
{
if (differentiation_flag == 1) fieldforce_ad();
else fieldforce_ik();
}
/* ----------------------------------------------------------------------
interpolate from grid to get electric field & force on my particles for ik
------------------------------------------------------------------------- */
void PPPM::fieldforce_ik()
{
int i,l,m,n,nx,ny,nz,mx,my,mz;
FFT_SCALAR dx,dy,dz,x0,y0,z0;
FFT_SCALAR ekx,eky,ekz;
// loop over my charges, interpolate electric field from nearby grid points
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
// (dx,dy,dz) = distance to "lower left" grid pt
// (mx,my,mz) = global coords of moving stencil pt
// ek = 3 components of E-field on particle
double *q = atom->q;
double **x = atom->x;
double **f = atom->f;
int nlocal = atom->nlocal;
for (i = 0; i < nlocal; i++) {
nx = part2grid[i][0];
ny = part2grid[i][1];
nz = part2grid[i][2];
dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv;
dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv;
dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv;
compute_rho1d(dx,dy,dz);
ekx = eky = ekz = ZEROF;
for (n = nlower; n <= nupper; n++) {
mz = n+nz;
z0 = rho1d[2][n];
for (m = nlower; m <= nupper; m++) {
my = m+ny;
y0 = z0*rho1d[1][m];
for (l = nlower; l <= nupper; l++) {
mx = l+nx;
x0 = y0*rho1d[0][l];
ekx -= x0*vdx_brick[mz][my][mx];
eky -= x0*vdy_brick[mz][my][mx];
ekz -= x0*vdz_brick[mz][my][mx];
}
}
}
// convert E-field to force
const double qfactor = qqrd2e * scale * q[i];
f[i][0] += qfactor*ekx;
f[i][1] += qfactor*eky;
if (slabflag != 2) f[i][2] += qfactor*ekz;
}
}
/* ----------------------------------------------------------------------
interpolate from grid to get electric field & force on my particles for ad
------------------------------------------------------------------------- */
void PPPM::fieldforce_ad()
{
int i,l,m,n,nx,ny,nz,mx,my,mz;
FFT_SCALAR dx,dy,dz;
FFT_SCALAR ekx,eky,ekz;
double s1,s2,s3;
double sf = 0.0;
double *prd;
prd = domain->prd;
double xprd = prd[0];
double yprd = prd[1];
double zprd = prd[2];
double hx_inv = nx_pppm/xprd;
double hy_inv = ny_pppm/yprd;
double hz_inv = nz_pppm/zprd;
// loop over my charges, interpolate electric field from nearby grid points
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
// (dx,dy,dz) = distance to "lower left" grid pt
// (mx,my,mz) = global coords of moving stencil pt
// ek = 3 components of E-field on particle
double *q = atom->q;
double **x = atom->x;
double **f = atom->f;
int nlocal = atom->nlocal;
for (i = 0; i < nlocal; i++) {
nx = part2grid[i][0];
ny = part2grid[i][1];
nz = part2grid[i][2];
dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv;
dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv;
dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv;
compute_rho1d(dx,dy,dz);
compute_drho1d(dx,dy,dz);
ekx = eky = ekz = ZEROF;
for (n = nlower; n <= nupper; n++) {
mz = n+nz;
for (m = nlower; m <= nupper; m++) {
my = m+ny;
for (l = nlower; l <= nupper; l++) {
mx = l+nx;
ekx += drho1d[0][l]*rho1d[1][m]*rho1d[2][n]*u_brick[mz][my][mx];
eky += rho1d[0][l]*drho1d[1][m]*rho1d[2][n]*u_brick[mz][my][mx];
ekz += rho1d[0][l]*rho1d[1][m]*drho1d[2][n]*u_brick[mz][my][mx];
}
}
}
ekx *= hx_inv;
eky *= hy_inv;
ekz *= hz_inv;
// convert E-field to force and substract self forces
const double qfactor = qqrd2e * scale;
s1 = x[i][0]*hx_inv;
s2 = x[i][1]*hy_inv;
s3 = x[i][2]*hz_inv;
sf = sf_coeff[0]*sin(2*MY_PI*s1);
sf += sf_coeff[1]*sin(4*MY_PI*s1);
sf *= 2*q[i]*q[i];
f[i][0] += qfactor*(ekx*q[i] - sf);
sf = sf_coeff[2]*sin(2*MY_PI*s2);
sf += sf_coeff[3]*sin(4*MY_PI*s2);
sf *= 2*q[i]*q[i];
f[i][1] += qfactor*(eky*q[i] - sf);
sf = sf_coeff[4]*sin(2*MY_PI*s3);
sf += sf_coeff[5]*sin(4*MY_PI*s3);
sf *= 2*q[i]*q[i];
if (slabflag != 2) f[i][2] += qfactor*(ekz*q[i] - sf);
}
}
/* ----------------------------------------------------------------------
interpolate from grid to get per-atom energy/virial
------------------------------------------------------------------------- */
void PPPM::fieldforce_peratom()
{
int i,l,m,n,nx,ny,nz,mx,my,mz;
FFT_SCALAR dx,dy,dz,x0,y0,z0;
FFT_SCALAR u,v0,v1,v2,v3,v4,v5;
// loop over my charges, interpolate from nearby grid points
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
// (dx,dy,dz) = distance to "lower left" grid pt
// (mx,my,mz) = global coords of moving stencil pt
double *q = atom->q;
double **x = atom->x;
int nlocal = atom->nlocal;
for (i = 0; i < nlocal; i++) {
nx = part2grid[i][0];
ny = part2grid[i][1];
nz = part2grid[i][2];
dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv;
dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv;
dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv;
compute_rho1d(dx,dy,dz);
u = v0 = v1 = v2 = v3 = v4 = v5 = ZEROF;
for (n = nlower; n <= nupper; n++) {
mz = n+nz;
z0 = rho1d[2][n];
for (m = nlower; m <= nupper; m++) {
my = m+ny;
y0 = z0*rho1d[1][m];
for (l = nlower; l <= nupper; l++) {
mx = l+nx;
x0 = y0*rho1d[0][l];
if (eflag_atom) u += x0*u_brick[mz][my][mx];
if (vflag_atom) {
v0 += x0*v0_brick[mz][my][mx];
v1 += x0*v1_brick[mz][my][mx];
v2 += x0*v2_brick[mz][my][mx];
v3 += x0*v3_brick[mz][my][mx];
v4 += x0*v4_brick[mz][my][mx];
v5 += x0*v5_brick[mz][my][mx];
}
}
}
}
if (eflag_atom) eatom[i] += q[i]*u;
if (vflag_atom) {
vatom[i][0] += q[i]*v0;
vatom[i][1] += q[i]*v1;
vatom[i][2] += q[i]*v2;
vatom[i][3] += q[i]*v3;
vatom[i][4] += q[i]*v4;
vatom[i][5] += q[i]*v5;
}
}
}
/* ----------------------------------------------------------------------
pack own values to buf to send to another proc
------------------------------------------------------------------------- */
void PPPM::pack_forward(int flag, FFT_SCALAR *buf, int nlist, int *list)
{
int n = 0;
if (flag == FORWARD_IK) {
FFT_SCALAR *xsrc = &vdx_brick[nzlo_out][nylo_out][nxlo_out];
FFT_SCALAR *ysrc = &vdy_brick[nzlo_out][nylo_out][nxlo_out];
FFT_SCALAR *zsrc = &vdz_brick[nzlo_out][nylo_out][nxlo_out];
for (int i = 0; i < nlist; i++) {
buf[n++] = xsrc[list[i]];
buf[n++] = ysrc[list[i]];
buf[n++] = zsrc[list[i]];
}
} else if (flag == FORWARD_AD) {
FFT_SCALAR *src = &u_brick[nzlo_out][nylo_out][nxlo_out];
for (int i = 0; i < nlist; i++)
buf[i] = src[list[i]];
} else if (flag == FORWARD_IK_PERATOM) {
FFT_SCALAR *esrc = &u_brick[nzlo_out][nylo_out][nxlo_out];
FFT_SCALAR *v0src = &v0_brick[nzlo_out][nylo_out][nxlo_out];
FFT_SCALAR *v1src = &v1_brick[nzlo_out][nylo_out][nxlo_out];
FFT_SCALAR *v2src = &v2_brick[nzlo_out][nylo_out][nxlo_out];
FFT_SCALAR *v3src = &v3_brick[nzlo_out][nylo_out][nxlo_out];
FFT_SCALAR *v4src = &v4_brick[nzlo_out][nylo_out][nxlo_out];
FFT_SCALAR *v5src = &v5_brick[nzlo_out][nylo_out][nxlo_out];
for (int i = 0; i < nlist; i++) {
if (eflag_atom) buf[n++] = esrc[list[i]];
if (vflag_atom) {
buf[n++] = v0src[list[i]];
buf[n++] = v1src[list[i]];
buf[n++] = v2src[list[i]];
buf[n++] = v3src[list[i]];
buf[n++] = v4src[list[i]];
buf[n++] = v5src[list[i]];
}
}
} else if (flag == FORWARD_AD_PERATOM) {
FFT_SCALAR *v0src = &v0_brick[nzlo_out][nylo_out][nxlo_out];
FFT_SCALAR *v1src = &v1_brick[nzlo_out][nylo_out][nxlo_out];
FFT_SCALAR *v2src = &v2_brick[nzlo_out][nylo_out][nxlo_out];
FFT_SCALAR *v3src = &v3_brick[nzlo_out][nylo_out][nxlo_out];
FFT_SCALAR *v4src = &v4_brick[nzlo_out][nylo_out][nxlo_out];
FFT_SCALAR *v5src = &v5_brick[nzlo_out][nylo_out][nxlo_out];
for (int i = 0; i < nlist; i++) {
buf[n++] = v0src[list[i]];
buf[n++] = v1src[list[i]];
buf[n++] = v2src[list[i]];
buf[n++] = v3src[list[i]];
buf[n++] = v4src[list[i]];
buf[n++] = v5src[list[i]];
}
}
}
/* ----------------------------------------------------------------------
unpack another proc's own values from buf and set own ghost values
------------------------------------------------------------------------- */
void PPPM::unpack_forward(int flag, FFT_SCALAR *buf, int nlist, int *list)
{
int n = 0;
if (flag == FORWARD_IK) {
FFT_SCALAR *xdest = &vdx_brick[nzlo_out][nylo_out][nxlo_out];
FFT_SCALAR *ydest = &vdy_brick[nzlo_out][nylo_out][nxlo_out];
FFT_SCALAR *zdest = &vdz_brick[nzlo_out][nylo_out][nxlo_out];
for (int i = 0; i < nlist; i++) {
xdest[list[i]] = buf[n++];
ydest[list[i]] = buf[n++];
zdest[list[i]] = buf[n++];
}
} else if (flag == FORWARD_AD) {
FFT_SCALAR *dest = &u_brick[nzlo_out][nylo_out][nxlo_out];
for (int i = 0; i < nlist; i++)
dest[list[i]] = buf[i];
} else if (flag == FORWARD_IK_PERATOM) {
FFT_SCALAR *esrc = &u_brick[nzlo_out][nylo_out][nxlo_out];
FFT_SCALAR *v0src = &v0_brick[nzlo_out][nylo_out][nxlo_out];
FFT_SCALAR *v1src = &v1_brick[nzlo_out][nylo_out][nxlo_out];
FFT_SCALAR *v2src = &v2_brick[nzlo_out][nylo_out][nxlo_out];
FFT_SCALAR *v3src = &v3_brick[nzlo_out][nylo_out][nxlo_out];
FFT_SCALAR *v4src = &v4_brick[nzlo_out][nylo_out][nxlo_out];
FFT_SCALAR *v5src = &v5_brick[nzlo_out][nylo_out][nxlo_out];
for (int i = 0; i < nlist; i++) {
if (eflag_atom) esrc[list[i]] = buf[n++];
if (vflag_atom) {
v0src[list[i]] = buf[n++];
v1src[list[i]] = buf[n++];
v2src[list[i]] = buf[n++];
v3src[list[i]] = buf[n++];
v4src[list[i]] = buf[n++];
v5src[list[i]] = buf[n++];
}
}
} else if (flag == FORWARD_AD_PERATOM) {
FFT_SCALAR *v0src = &v0_brick[nzlo_out][nylo_out][nxlo_out];
FFT_SCALAR *v1src = &v1_brick[nzlo_out][nylo_out][nxlo_out];
FFT_SCALAR *v2src = &v2_brick[nzlo_out][nylo_out][nxlo_out];
FFT_SCALAR *v3src = &v3_brick[nzlo_out][nylo_out][nxlo_out];
FFT_SCALAR *v4src = &v4_brick[nzlo_out][nylo_out][nxlo_out];
FFT_SCALAR *v5src = &v5_brick[nzlo_out][nylo_out][nxlo_out];
for (int i = 0; i < nlist; i++) {
v0src[list[i]] = buf[n++];
v1src[list[i]] = buf[n++];
v2src[list[i]] = buf[n++];
v3src[list[i]] = buf[n++];
v4src[list[i]] = buf[n++];
v5src[list[i]] = buf[n++];
}
}
}
/* ----------------------------------------------------------------------
pack ghost values into buf to send to another proc
------------------------------------------------------------------------- */
void PPPM::pack_reverse(int flag, FFT_SCALAR *buf, int nlist, int *list)
{
if (flag == REVERSE_RHO) {
FFT_SCALAR *src = &density_brick[nzlo_out][nylo_out][nxlo_out];
for (int i = 0; i < nlist; i++)
buf[i] = src[list[i]];
}
}
/* ----------------------------------------------------------------------
unpack another proc's ghost values from buf and add to own values
------------------------------------------------------------------------- */
void PPPM::unpack_reverse(int flag, FFT_SCALAR *buf, int nlist, int *list)
{
if (flag == REVERSE_RHO) {
FFT_SCALAR *dest = &density_brick[nzlo_out][nylo_out][nxlo_out];
for (int i = 0; i < nlist; i++)
dest[list[i]] += buf[i];
}
}
/* ----------------------------------------------------------------------
map nprocs to NX by NY grid as PX by PY procs - return optimal px,py
------------------------------------------------------------------------- */
void PPPM::procs2grid2d(int nprocs, int nx, int ny, int *px, int *py)
{
// loop thru all possible factorizations of nprocs
// surf = surface area of largest proc sub-domain
// innermost if test minimizes surface area and surface/volume ratio
int bestsurf = 2 * (nx + ny);
int bestboxx = 0;
int bestboxy = 0;
int boxx,boxy,surf,ipx,ipy;
ipx = 1;
while (ipx <= nprocs) {
if (nprocs % ipx == 0) {
ipy = nprocs/ipx;
boxx = nx/ipx;
if (nx % ipx) boxx++;
boxy = ny/ipy;
if (ny % ipy) boxy++;
surf = boxx + boxy;
if (surf < bestsurf ||
(surf == bestsurf && boxx*boxy > bestboxx*bestboxy)) {
bestsurf = surf;
bestboxx = boxx;
bestboxy = boxy;
*px = ipx;
*py = ipy;
}
}
ipx++;
}
}
/* ----------------------------------------------------------------------
charge assignment into rho1d
dx,dy,dz = distance of particle from "lower left" grid point
------------------------------------------------------------------------- */
void PPPM::compute_rho1d(const FFT_SCALAR &dx, const FFT_SCALAR &dy,
const FFT_SCALAR &dz)
{
int k,l;
FFT_SCALAR r1,r2,r3;
for (k = (1-order)/2; k <= order/2; k++) {
r1 = r2 = r3 = ZEROF;
for (l = order-1; l >= 0; l--) {
r1 = rho_coeff[l][k] + r1*dx;
r2 = rho_coeff[l][k] + r2*dy;
r3 = rho_coeff[l][k] + r3*dz;
}
rho1d[0][k] = r1;
rho1d[1][k] = r2;
rho1d[2][k] = r3;
}
}
/* ----------------------------------------------------------------------
charge assignment into drho1d
dx,dy,dz = distance of particle from "lower left" grid point
------------------------------------------------------------------------- */
void PPPM::compute_drho1d(const FFT_SCALAR &dx, const FFT_SCALAR &dy,
const FFT_SCALAR &dz)
{
int k,l;
FFT_SCALAR r1,r2,r3;
for (k = (1-order)/2; k <= order/2; k++) {
r1 = r2 = r3 = ZEROF;
for (l = order-2; l >= 0; l--) {
r1 = drho_coeff[l][k] + r1*dx;
r2 = drho_coeff[l][k] + r2*dy;
r3 = drho_coeff[l][k] + r3*dz;
}
drho1d[0][k] = r1;
drho1d[1][k] = r2;
drho1d[2][k] = r3;
}
}
/* ----------------------------------------------------------------------
generate coeffients for the weight function of order n
(n-1)
Wn(x) = Sum wn(k,x) , Sum is over every other integer
k=-(n-1)
For k=-(n-1),-(n-1)+2, ....., (n-1)-2,n-1
k is odd integers if n is even and even integers if n is odd
---
| n-1
| Sum a(l,j)*(x-k/2)**l if abs(x-k/2) < 1/2
wn(k,x) = < l=0
|
| 0 otherwise
---
a coeffients are packed into the array rho_coeff to eliminate zeros
rho_coeff(l,((k+mod(n+1,2))/2) = a(l,k)
------------------------------------------------------------------------- */
void PPPM::compute_rho_coeff()
{
int j,k,l,m;
FFT_SCALAR s;
FFT_SCALAR **a;
memory->create2d_offset(a,order,-order,order,"pppm:a");
for (k = -order; k <= order; k++)
for (l = 0; l < order; l++)
a[l][k] = 0.0;
a[0][0] = 1.0;
for (j = 1; j < order; j++) {
for (k = -j; k <= j; k += 2) {
s = 0.0;
for (l = 0; l < j; l++) {
a[l+1][k] = (a[l][k+1]-a[l][k-1]) / (l+1);
#ifdef FFT_SINGLE
s += powf(0.5,(float) l+1) *
(a[l][k-1] + powf(-1.0,(float) l) * a[l][k+1]) / (l+1);
#else
s += pow(0.5,(double) l+1) *
(a[l][k-1] + pow(-1.0,(double) l) * a[l][k+1]) / (l+1);
#endif
}
a[0][k] = s;
}
}
m = (1-order)/2;
for (k = -(order-1); k < order; k += 2) {
for (l = 0; l < order; l++)
rho_coeff[l][m] = a[l][k];
for (l = 1; l < order; l++)
drho_coeff[l-1][m] = l*a[l][k];
m++;
}
memory->destroy2d_offset(a,-order);
}
/* ----------------------------------------------------------------------
Slab-geometry correction term to dampen inter-slab interactions between
periodically repeating slabs. Yields good approximation to 2D Ewald if
adequate empty space is left between repeating slabs (J. Chem. Phys.
111, 3155). Slabs defined here to be parallel to the xy plane. Also
extended to non-neutral systems (J. Chem. Phys. 131, 094107).
------------------------------------------------------------------------- */
void PPPM::slabcorr()
{
// compute local contribution to global dipole moment
double *q = atom->q;
double **x = atom->x;
double zprd = domain->zprd;
int nlocal = atom->nlocal;
double dipole = 0.0;
for (int i = 0; i < nlocal; i++) dipole += q[i]*x[i][2];
// sum local contributions to get global dipole moment
double dipole_all;
MPI_Allreduce(&dipole,&dipole_all,1,MPI_DOUBLE,MPI_SUM,world);
// need to make non-neutral systems and/or
// per-atom energy translationally invariant
double dipole_r2 = 0.0;
if (eflag_atom || fabs(qsum) > SMALL) {
for (int i = 0; i < nlocal; i++)
dipole_r2 += q[i]*x[i][2]*x[i][2];
// sum local contributions
double tmp;
MPI_Allreduce(&dipole_r2,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
dipole_r2 = tmp;
}
// compute corrections
const double e_slabcorr = MY_2PI*(dipole_all*dipole_all -
qsum*dipole_r2 - qsum*qsum*zprd*zprd/12.0)/volume;
const double qscale = qqrd2e * scale;
if (eflag_global) energy += qscale * e_slabcorr;
// per-atom energy
if (eflag_atom) {
double efact = qscale * MY_2PI/volume;
for (int i = 0; i < nlocal; i++)
eatom[i] += efact * q[i]*(x[i][2]*dipole_all - 0.5*(dipole_r2 +
qsum*x[i][2]*x[i][2]) - qsum*zprd*zprd/12.0);
}
// add on force corrections
double ffact = qscale * (-4.0*MY_PI/volume);
double **f = atom->f;
for (int i = 0; i < nlocal; i++) f[i][2] += ffact * q[i]*(dipole_all - qsum*x[i][2]);
}
/* ----------------------------------------------------------------------
perform and time the 1d FFTs required for N timesteps
------------------------------------------------------------------------- */
int PPPM::timing_1d(int n, double &time1d)
{
double time1,time2;
for (int i = 0; i < 2*nfft_both; i++) work1[i] = ZEROF;
MPI_Barrier(world);
time1 = MPI_Wtime();
for (int i = 0; i < n; i++) {
fft1->timing1d(work1,nfft_both,1);
fft2->timing1d(work1,nfft_both,-1);
if (differentiation_flag != 1) {
fft2->timing1d(work1,nfft_both,-1);
fft2->timing1d(work1,nfft_both,-1);
}
}
MPI_Barrier(world);
time2 = MPI_Wtime();
time1d = time2 - time1;
if (differentiation_flag) return 2;
return 4;
}
/* ----------------------------------------------------------------------
perform and time the 3d FFTs required for N timesteps
------------------------------------------------------------------------- */
int PPPM::timing_3d(int n, double &time3d)
{
double time1,time2;
for (int i = 0; i < 2*nfft_both; i++) work1[i] = ZEROF;
MPI_Barrier(world);
time1 = MPI_Wtime();
for (int i = 0; i < n; i++) {
fft1->compute(work1,work1,1);
fft2->compute(work1,work1,-1);
if (differentiation_flag != 1) {
fft2->compute(work1,work1,-1);
fft2->compute(work1,work1,-1);
}
}
MPI_Barrier(world);
time2 = MPI_Wtime();
time3d = time2 - time1;
if (differentiation_flag) return 2;
return 4;
}
/* ----------------------------------------------------------------------
memory usage of local arrays
------------------------------------------------------------------------- */
double PPPM::memory_usage()
{
double bytes = nmax*3 * sizeof(double);
int nbrick = (nxhi_out-nxlo_out+1) * (nyhi_out-nylo_out+1) *
(nzhi_out-nzlo_out+1);
if (differentiation_flag == 1) {
bytes += 2 * nbrick * sizeof(FFT_SCALAR);
} else {
bytes += 4 * nbrick * sizeof(FFT_SCALAR);
}
if (triclinic) bytes += 3 * nfft_both * sizeof(double);
bytes += 6 * nfft_both * sizeof(double);
bytes += nfft_both * sizeof(double);
bytes += nfft_both*5 * sizeof(FFT_SCALAR);
if (peratom_allocate_flag)
bytes += 6 * nbrick * sizeof(FFT_SCALAR);
if (group_allocate_flag) {
bytes += 2 * nbrick * sizeof(FFT_SCALAR);
bytes += 2 * nfft_both * sizeof(FFT_SCALAR);;
}
bytes += cg->memory_usage();
return bytes;
}
/* ----------------------------------------------------------------------
group-group interactions
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
compute the PPPM total long-range force and energy for groups A and B
------------------------------------------------------------------------- */
void PPPM::compute_group_group(int groupbit_A, int groupbit_B, int AA_flag)
{
if (slabflag && triclinic)
error->all(FLERR,"Cannot (yet) use K-space slab "
"correction with compute group/group for triclinic systems");
if (differentiation_flag)
error->all(FLERR,"Cannot (yet) use kspace_modify "
"diff ad with compute group/group");
if (!group_allocate_flag) allocate_groups();
// convert atoms from box to lamda coords
if (triclinic == 0) boxlo = domain->boxlo;
else {
boxlo = domain->boxlo_lamda;
domain->x2lamda(atom->nlocal);
}
e2group = 0.0; //energy
f2group[0] = 0.0; //force in x-direction
f2group[1] = 0.0; //force in y-direction
f2group[2] = 0.0; //force in z-direction
// map my particle charge onto my local 3d density grid
make_rho_groups(groupbit_A,groupbit_B,AA_flag);
// all procs communicate density values from their ghost cells
// to fully sum contribution in their 3d bricks
// remap from 3d decomposition to FFT decomposition
// temporarily store and switch pointers so we can
// use brick2fft() for groups A and B (without
// writing an additional function)
FFT_SCALAR ***density_brick_real = density_brick;
FFT_SCALAR *density_fft_real = density_fft;
// group A
density_brick = density_A_brick;
density_fft = density_A_fft;
cg->reverse_comm(this,REVERSE_RHO);
brick2fft();
// group B
density_brick = density_B_brick;
density_fft = density_B_fft;
cg->reverse_comm(this,REVERSE_RHO);
brick2fft();
// switch back pointers
density_brick = density_brick_real;
density_fft = density_fft_real;
// compute potential gradient on my FFT grid and
// portion of group-group energy/force on this proc's FFT grid
poisson_groups(AA_flag);
const double qscale = qqrd2e * scale;
// total group A <--> group B energy
// self and boundary correction terms are in compute_group_group.cpp
double e2group_all;
MPI_Allreduce(&e2group,&e2group_all,1,MPI_DOUBLE,MPI_SUM,world);
e2group = e2group_all;
e2group *= qscale*0.5*volume;
// total group A <--> group B force
double f2group_all[3];
MPI_Allreduce(f2group,f2group_all,3,MPI_DOUBLE,MPI_SUM,world);
f2group[0] = qscale*volume*f2group_all[0];
f2group[1] = qscale*volume*f2group_all[1];
if (slabflag != 2) f2group[2] = qscale*volume*f2group_all[2];
// convert atoms back from lamda to box coords
if (triclinic) domain->lamda2x(atom->nlocal);
if (slabflag == 1)
slabcorr_groups(groupbit_A, groupbit_B, AA_flag);
}
/* ----------------------------------------------------------------------
allocate group-group memory that depends on # of K-vectors and order
------------------------------------------------------------------------- */
void PPPM::allocate_groups()
{
group_allocate_flag = 1;
memory->create3d_offset(density_A_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
nxlo_out,nxhi_out,"pppm:density_A_brick");
memory->create3d_offset(density_B_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
nxlo_out,nxhi_out,"pppm:density_B_brick");
memory->create(density_A_fft,nfft_both,"pppm:density_A_fft");
memory->create(density_B_fft,nfft_both,"pppm:density_B_fft");
}
/* ----------------------------------------------------------------------
deallocate group-group memory that depends on # of K-vectors and order
------------------------------------------------------------------------- */
void PPPM::deallocate_groups()
{
group_allocate_flag = 0;
memory->destroy3d_offset(density_A_brick,nzlo_out,nylo_out,nxlo_out);
memory->destroy3d_offset(density_B_brick,nzlo_out,nylo_out,nxlo_out);
memory->destroy(density_A_fft);
memory->destroy(density_B_fft);
}
/* ----------------------------------------------------------------------
create discretized "density" on section of global grid due to my particles
density(x,y,z) = charge "density" at grid points of my 3d brick
(nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts)
in global grid for group-group interactions
------------------------------------------------------------------------- */
void PPPM::make_rho_groups(int groupbit_A, int groupbit_B, int AA_flag)
{
int l,m,n,nx,ny,nz,mx,my,mz;
FFT_SCALAR dx,dy,dz,x0,y0,z0;
// clear 3d density arrays
memset(&(density_A_brick[nzlo_out][nylo_out][nxlo_out]),0,
ngrid*sizeof(FFT_SCALAR));
memset(&(density_B_brick[nzlo_out][nylo_out][nxlo_out]),0,
ngrid*sizeof(FFT_SCALAR));
// loop over my charges, add their contribution to nearby grid points
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
// (dx,dy,dz) = distance to "lower left" grid pt
// (mx,my,mz) = global coords of moving stencil pt
double *q = atom->q;
double **x = atom->x;
int nlocal = atom->nlocal;
int *mask = atom->mask;
for (int i = 0; i < nlocal; i++) {
if (!((mask[i] & groupbit_A) && (mask[i] & groupbit_B)))
if (AA_flag) continue;
if ((mask[i] & groupbit_A) || (mask[i] & groupbit_B)) {
nx = part2grid[i][0];
ny = part2grid[i][1];
nz = part2grid[i][2];
dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv;
dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv;
dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv;
compute_rho1d(dx,dy,dz);
z0 = delvolinv * q[i];
for (n = nlower; n <= nupper; n++) {
mz = n+nz;
y0 = z0*rho1d[2][n];
for (m = nlower; m <= nupper; m++) {
my = m+ny;
x0 = y0*rho1d[1][m];
for (l = nlower; l <= nupper; l++) {
mx = l+nx;
// group A
if (mask[i] & groupbit_A)
density_A_brick[mz][my][mx] += x0*rho1d[0][l];
// group B
if (mask[i] & groupbit_B)
density_B_brick[mz][my][mx] += x0*rho1d[0][l];
}
}
}
}
}
}
/* ----------------------------------------------------------------------
FFT-based Poisson solver for group-group interactions
------------------------------------------------------------------------- */
void PPPM::poisson_groups(int AA_flag)
{
int i,j,k,n;
// reuse memory (already declared)
FFT_SCALAR *work_A = work1;
FFT_SCALAR *work_B = work2;
// transform charge density (r -> k)
// group A
n = 0;
for (i = 0; i < nfft; i++) {
work_A[n++] = density_A_fft[i];
work_A[n++] = ZEROF;
}
fft1->compute(work_A,work_A,1);
// group B
n = 0;
for (i = 0; i < nfft; i++) {
work_B[n++] = density_B_fft[i];
work_B[n++] = ZEROF;
}
fft1->compute(work_B,work_B,1);
// group-group energy and force contribution,
// keep everything in reciprocal space so
// no inverse FFTs needed
double scaleinv = 1.0/(nx_pppm*ny_pppm*nz_pppm);
double s2 = scaleinv*scaleinv;
// energy
n = 0;
for (i = 0; i < nfft; i++) {
e2group += s2 * greensfn[i] *
(work_A[n]*work_B[n] + work_A[n+1]*work_B[n+1]);
n += 2;
}
if (AA_flag) return;
// multiply by Green's function and s2
// (only for work_A so it is not squared below)
n = 0;
for (i = 0; i < nfft; i++) {
work_A[n++] *= s2 * greensfn[i];
work_A[n++] *= s2 * greensfn[i];
}
// triclinic system
if (triclinic) {
poisson_groups_triclinic();
return;
}
double partial_group;
// force, x direction
n = 0;
for (k = nzlo_fft; k <= nzhi_fft; k++)
for (j = nylo_fft; j <= nyhi_fft; j++)
for (i = nxlo_fft; i <= nxhi_fft; i++) {
partial_group = work_A[n+1]*work_B[n] - work_A[n]*work_B[n+1];
f2group[0] += fkx[i] * partial_group;
n += 2;
}
// force, y direction
n = 0;
for (k = nzlo_fft; k <= nzhi_fft; k++)
for (j = nylo_fft; j <= nyhi_fft; j++)
for (i = nxlo_fft; i <= nxhi_fft; i++) {
partial_group = work_A[n+1]*work_B[n] - work_A[n]*work_B[n+1];
f2group[1] += fky[j] * partial_group;
n += 2;
}
// force, z direction
n = 0;
for (k = nzlo_fft; k <= nzhi_fft; k++)
for (j = nylo_fft; j <= nyhi_fft; j++)
for (i = nxlo_fft; i <= nxhi_fft; i++) {
partial_group = work_A[n+1]*work_B[n] - work_A[n]*work_B[n+1];
f2group[2] += fkz[k] * partial_group;
n += 2;
}
}
/* ----------------------------------------------------------------------
FFT-based Poisson solver for group-group interactions
for a triclinic system
------------------------------------------------------------------------- */
void PPPM::poisson_groups_triclinic()
{
int i,n;
// reuse memory (already declared)
FFT_SCALAR *work_A = work1;
FFT_SCALAR *work_B = work2;
double partial_group;
// force, x direction
n = 0;
for (i = 0; i < nfft; i++) {
partial_group = work_A[n+1]*work_B[n] - work_A[n]*work_B[n+1];
f2group[0] += fkx[i] * partial_group;
n += 2;
}
// force, y direction
n = 0;
for (i = 0; i < nfft; i++) {
partial_group = work_A[n+1]*work_B[n] - work_A[n]*work_B[n+1];
f2group[1] += fky[i] * partial_group;
n += 2;
}
// force, z direction
n = 0;
for (i = 0; i < nfft; i++) {
partial_group = work_A[n+1]*work_B[n] - work_A[n]*work_B[n+1];
f2group[2] += fkz[i] * partial_group;
n += 2;
}
}
/* ----------------------------------------------------------------------
Slab-geometry correction term to dampen inter-slab interactions between
periodically repeating slabs. Yields good approximation to 2D Ewald if
adequate empty space is left between repeating slabs (J. Chem. Phys.
111, 3155). Slabs defined here to be parallel to the xy plane. Also
extended to non-neutral systems (J. Chem. Phys. 131, 094107).
------------------------------------------------------------------------- */
void PPPM::slabcorr_groups(int groupbit_A, int groupbit_B, int AA_flag)
{
// compute local contribution to global dipole moment
double *q = atom->q;
double **x = atom->x;
double zprd = domain->zprd;
int *mask = atom->mask;
int nlocal = atom->nlocal;
double qsum_A = 0.0;
double qsum_B = 0.0;
double dipole_A = 0.0;
double dipole_B = 0.0;
double dipole_r2_A = 0.0;
double dipole_r2_B = 0.0;
for (int i = 0; i < nlocal; i++) {
if (!((mask[i] & groupbit_A) && (mask[i] & groupbit_B)))
if (AA_flag) continue;
if (mask[i] & groupbit_A) {
qsum_A += q[i];
dipole_A += q[i]*x[i][2];
dipole_r2_A += q[i]*x[i][2]*x[i][2];
}
if (mask[i] & groupbit_B) {
qsum_B += q[i];
dipole_B += q[i]*x[i][2];
dipole_r2_B += q[i]*x[i][2]*x[i][2];
}
}
// sum local contributions to get total charge and global dipole moment
// for each group
double tmp;
MPI_Allreduce(&qsum_A,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
qsum_A = tmp;
MPI_Allreduce(&qsum_B,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
qsum_B = tmp;
MPI_Allreduce(&dipole_A,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
dipole_A = tmp;
MPI_Allreduce(&dipole_B,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
dipole_B = tmp;
MPI_Allreduce(&dipole_r2_A,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
dipole_r2_A = tmp;
MPI_Allreduce(&dipole_r2_B,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
dipole_r2_B = tmp;
// compute corrections
const double qscale = qqrd2e * scale;
const double efact = qscale * MY_2PI/volume;
e2group += efact * (dipole_A*dipole_B - 0.5*(qsum_A*dipole_r2_B +
qsum_B*dipole_r2_A) - qsum_A*qsum_B*zprd*zprd/12.0);
// add on force corrections
const double ffact = qscale * (-4.0*MY_PI/volume);
f2group[2] += ffact * (qsum_A*dipole_B - qsum_B*dipole_A);
}
diff --git a/src/KSPACE/pppm_cg.cpp b/src/KSPACE/pppm_cg.cpp
index f07f38b4b..6fcdb438a 100644
--- a/src/KSPACE/pppm_cg.cpp
+++ b/src/KSPACE/pppm_cg.cpp
@@ -1,737 +1,737 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing author: Axel Kohlmeyer (Temple U)
------------------------------------------------------------------------- */
#include <mpi.h>
#include <math.h>
#include <stdlib.h>
#include <string.h>
#include "atom.h"
#include "gridcomm.h"
#include "domain.h"
#include "error.h"
#include "force.h"
#include "neighbor.h"
#include "memory.h"
#include "pppm_cg.h"
#include "math_const.h"
using namespace LAMMPS_NS;
using namespace MathConst;
#define OFFSET 16384
#define SMALLQ 0.00001
enum{REVERSE_RHO};
enum{FORWARD_IK,FORWARD_AD,FORWARD_IK_PERATOM,FORWARD_AD_PERATOM};
#ifdef FFT_SINGLE
#define ZEROF 0.0f
#else
#define ZEROF 0.0
#endif
/* ---------------------------------------------------------------------- */
PPPMCG::PPPMCG(LAMMPS *lmp, int narg, char **arg) : PPPM(lmp, narg, arg)
{
if ((narg < 1) || (narg > 2))
error->all(FLERR,"Illegal kspace_style pppm/cg command");
triclinic_support = 0;
if (narg == 2) smallq = fabs(force->numeric(FLERR,arg[1]));
else smallq = SMALLQ;
num_charged = -1;
is_charged = NULL;
group_group_enable = 1;
}
/* ----------------------------------------------------------------------
free all memory
------------------------------------------------------------------------- */
PPPMCG::~PPPMCG()
{
memory->destroy(is_charged);
}
/* ----------------------------------------------------------------------
compute the PPPM long-range force, energy, virial
------------------------------------------------------------------------- */
void PPPMCG::compute(int eflag, int vflag)
{
// set energy/virial flags
// invoke allocate_peratom() if needed for first time
if (eflag || vflag) ev_setup(eflag,vflag);
else evflag = evflag_atom = eflag_global = vflag_global =
eflag_atom = vflag_atom = 0;
if (evflag_atom && !peratom_allocate_flag) {
allocate_peratom();
cg_peratom->ghost_notify();
cg_peratom->setup();
}
// convert atoms from box to lamda coords
if (triclinic == 0) boxlo = domain->boxlo;
else {
boxlo = domain->boxlo_lamda;
domain->x2lamda(atom->nlocal);
}
// extend size of per-atom arrays if necessary
if (atom->nlocal > nmax) {
memory->destroy(part2grid);
memory->destroy(is_charged);
nmax = atom->nmax;
memory->create(part2grid,nmax,3,"pppm:part2grid");
memory->create(is_charged,nmax,"pppm/cg:is_charged");
}
// one time setup message
if (num_charged < 0) {
bigint charged_all, charged_num;
double charged_frac, charged_fmax, charged_fmin;
num_charged=0;
for (int i=0; i < atom->nlocal; ++i)
if (fabs(atom->q[i]) > smallq)
++num_charged;
// get fraction of charged particles per domain
if (atom->nlocal > 0)
charged_frac = static_cast<double>(num_charged) * 100.0
/ static_cast<double>(atom->nlocal);
else
charged_frac = 0.0;
MPI_Reduce(&charged_frac,&charged_fmax,1,MPI_DOUBLE,MPI_MAX,0,world);
MPI_Reduce(&charged_frac,&charged_fmin,1,MPI_DOUBLE,MPI_MIN,0,world);
// get fraction of charged particles overall
charged_num = num_charged;
MPI_Reduce(&charged_num,&charged_all,1,MPI_LMP_BIGINT,MPI_SUM,0,world);
charged_frac = static_cast<double>(charged_all) * 100.0
/ static_cast<double>(atom->natoms);
if (me == 0) {
if (screen)
fprintf(screen,
" PPPM/cg optimization cutoff: %g\n"
" Total charged atoms: %.1f%%\n"
" Min/max charged atoms/proc: %.1f%% %.1f%%\n",
smallq,charged_frac,charged_fmin,charged_fmax);
if (logfile)
fprintf(logfile,
" PPPM/cg optimization cutoff: %g\n"
" Total charged atoms: %.1f%%\n"
" Min/max charged atoms/proc: %.1f%% %.1f%%\n",
smallq,charged_frac,charged_fmin,charged_fmax);
}
}
// only need to rebuild this list after a neighbor list update
if (neighbor->ago == 0) {
num_charged = 0;
for (int i = 0; i < atom->nlocal; ++i) {
if (fabs(atom->q[i]) > smallq) {
is_charged[num_charged] = i;
++num_charged;
}
}
}
// find grid points for all my particles
// map my particle charge onto my local 3d density grid
particle_map();
make_rho();
// all procs communicate density values from their ghost cells
// to fully sum contribution in their 3d bricks
// remap from 3d decomposition to FFT decomposition
cg->reverse_comm(this,REVERSE_RHO);
brick2fft();
// compute potential gradient on my FFT grid and
// portion of e_long on this proc's FFT grid
// return gradients (electric fields) in 3d brick decomposition
// also performs per-atom calculations via poisson_peratom()
poisson();
// all procs communicate E-field values
// to fill ghost cells surrounding their 3d bricks
if (differentiation_flag == 1) cg->forward_comm(this,FORWARD_AD);
else cg->forward_comm(this,FORWARD_IK);
// extra per-atom energy/virial communication
if (evflag_atom) {
if (differentiation_flag == 1 && vflag_atom)
cg_peratom->forward_comm(this,FORWARD_AD_PERATOM);
else if (differentiation_flag == 0)
cg_peratom->forward_comm(this,FORWARD_IK_PERATOM);
}
// calculate the force on my particles
fieldforce();
// extra per-atom energy/virial communication
if (evflag_atom) fieldforce_peratom();
// update qsum and qsqsum, if atom count has changed and energy needed
if ((eflag_global || eflag_atom) && atom->natoms != natoms_original) {
qsum_qsq();
natoms_original = atom->natoms;
}
// sum global energy across procs and add in volume-dependent term
const double qscale = qqrd2e * scale;
if (eflag_global) {
double energy_all;
MPI_Allreduce(&energy,&energy_all,1,MPI_DOUBLE,MPI_SUM,world);
energy = energy_all;
energy *= 0.5*volume;
energy -= g_ewald*qsqsum/MY_PIS +
MY_PI2*qsum*qsum / (g_ewald*g_ewald*volume);
energy *= qscale;
}
// sum global virial across procs
if (vflag_global) {
double virial_all[6];
MPI_Allreduce(virial,virial_all,6,MPI_DOUBLE,MPI_SUM,world);
for (int i = 0; i < 6; i++) virial[i] = 0.5*qscale*volume*virial_all[i];
}
// per-atom energy/virial
// energy includes self-energy correction
if (evflag_atom) {
const double * const q = atom->q;
if (eflag_atom) {
for (int j = 0; j < num_charged; j++) {
const int i = is_charged[j];
eatom[i] *= 0.5;
eatom[i] -= g_ewald*q[i]*q[i]/MY_PIS + MY_PI2*q[i]*qsum /
(g_ewald*g_ewald*volume);
eatom[i] *= qscale;
}
}
if (vflag_atom) {
for (int n = 0; n < num_charged; n++) {
const int i = is_charged[n];
for (int j = 0; j < 6; j++) vatom[i][j] *= 0.5*qscale;
}
}
}
// 2d slab correction
if (slabflag == 1) slabcorr();
// convert atoms back from lamda to box coords
if (triclinic) domain->lamda2x(atom->nlocal);
}
/* ----------------------------------------------------------------------
find center grid pt for each of my particles
check that full stencil for the particle will fit in my 3d brick
store central grid pt indices in part2grid array
------------------------------------------------------------------------- */
void PPPMCG::particle_map()
{
int nx,ny,nz;
double **x = atom->x;
- if (!isfinite(boxlo[0]) || !isfinite(boxlo[1]) || !isfinite(boxlo[2]))
+ if (!ISFINITE(boxlo[0]) || !ISFINITE(boxlo[1]) || !ISFINITE(boxlo[2]))
error->one(FLERR,"Non-numeric box dimensions - simulation unstable");
int flag = 0;
for (int j = 0; j < num_charged; j++) {
int i = is_charged[j];
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
// current particle coord can be outside global and local box
// add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1
nx = static_cast<int> ((x[i][0]-boxlo[0])*delxinv+shift) - OFFSET;
ny = static_cast<int> ((x[i][1]-boxlo[1])*delyinv+shift) - OFFSET;
nz = static_cast<int> ((x[i][2]-boxlo[2])*delzinv+shift) - OFFSET;
part2grid[i][0] = nx;
part2grid[i][1] = ny;
part2grid[i][2] = nz;
// check that entire stencil around nx,ny,nz will fit in my 3d brick
if (nx+nlower < nxlo_out || nx+nupper > nxhi_out ||
ny+nlower < nylo_out || ny+nupper > nyhi_out ||
nz+nlower < nzlo_out || nz+nupper > nzhi_out)
flag = 1;
}
if (flag) error->one(FLERR,"Out of range atoms - cannot compute PPPM");
}
/* ----------------------------------------------------------------------
create discretized "density" on section of global grid due to my particles
density(x,y,z) = charge "density" at grid points of my 3d brick
(nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts)
in global grid
------------------------------------------------------------------------- */
void PPPMCG::make_rho()
{
int i,l,m,n,nx,ny,nz,mx,my,mz;
FFT_SCALAR dx,dy,dz,x0,y0,z0;
// clear 3d density array
memset(&(density_brick[nzlo_out][nylo_out][nxlo_out]),0,
ngrid*sizeof(FFT_SCALAR));
// loop over my charges, add their contribution to nearby grid points
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
// (dx,dy,dz) = distance to "lower left" grid pt
// (mx,my,mz) = global coords of moving stencil pt
double *q = atom->q;
double **x = atom->x;
for (int j = 0; j < num_charged; j++) {
i = is_charged[j];
nx = part2grid[i][0];
ny = part2grid[i][1];
nz = part2grid[i][2];
dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv;
dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv;
dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv;
compute_rho1d(dx,dy,dz);
z0 = delvolinv * q[i];
for (n = nlower; n <= nupper; n++) {
mz = n+nz;
y0 = z0*rho1d[2][n];
for (m = nlower; m <= nupper; m++) {
my = m+ny;
x0 = y0*rho1d[1][m];
for (l = nlower; l <= nupper; l++) {
mx = l+nx;
density_brick[mz][my][mx] += x0*rho1d[0][l];
}
}
}
}
}
/* ----------------------------------------------------------------------
interpolate from grid to get electric field & force on my particles for ik
------------------------------------------------------------------------- */
void PPPMCG::fieldforce_ik()
{
int i,l,m,n,nx,ny,nz,mx,my,mz;
FFT_SCALAR dx,dy,dz,x0,y0,z0;
FFT_SCALAR ekx,eky,ekz;
// loop over my charges, interpolate electric field from nearby grid points
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
// (dx,dy,dz) = distance to "lower left" grid pt
// (mx,my,mz) = global coords of moving stencil pt
// ek = 3 components of E-field on particle
double *q = atom->q;
double **x = atom->x;
double **f = atom->f;
for (int j = 0; j < num_charged; j++) {
i = is_charged[j];
nx = part2grid[i][0];
ny = part2grid[i][1];
nz = part2grid[i][2];
dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv;
dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv;
dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv;
compute_rho1d(dx,dy,dz);
ekx = eky = ekz = ZEROF;
for (n = nlower; n <= nupper; n++) {
mz = n+nz;
z0 = rho1d[2][n];
for (m = nlower; m <= nupper; m++) {
my = m+ny;
y0 = z0*rho1d[1][m];
for (l = nlower; l <= nupper; l++) {
mx = l+nx;
x0 = y0*rho1d[0][l];
ekx -= x0*vdx_brick[mz][my][mx];
eky -= x0*vdy_brick[mz][my][mx];
ekz -= x0*vdz_brick[mz][my][mx];
}
}
}
// convert E-field to force
const double qfactor = qqrd2e * scale * q[i];
f[i][0] += qfactor*ekx;
f[i][1] += qfactor*eky;
if (slabflag != 2) f[i][2] += qfactor*ekz;
}
}
/* ----------------------------------------------------------------------
interpolate from grid to get electric field & force on my particles for ad
------------------------------------------------------------------------- */
void PPPMCG::fieldforce_ad()
{
int i,l,m,n,nx,ny,nz,mx,my,mz;
FFT_SCALAR dx,dy,dz;
FFT_SCALAR ekx,eky,ekz;
double s1,s2,s3;
double sf = 0.0;
double *prd;
if (triclinic == 0) prd = domain->prd;
else prd = domain->prd_lamda;
double xprd = prd[0];
double yprd = prd[1];
double zprd = prd[2];
double hx_inv = nx_pppm/xprd;
double hy_inv = ny_pppm/yprd;
double hz_inv = nz_pppm/zprd;
// loop over my charges, interpolate electric field from nearby grid points
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
// (dx,dy,dz) = distance to "lower left" grid pt
// (mx,my,mz) = global coords of moving stencil pt
// ek = 3 components of E-field on particle
double *q = atom->q;
double **x = atom->x;
double **f = atom->f;
for (int j = 0; j < num_charged; j++) {
i = is_charged[j];
nx = part2grid[i][0];
ny = part2grid[i][1];
nz = part2grid[i][2];
dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv;
dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv;
dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv;
compute_rho1d(dx,dy,dz);
compute_drho1d(dx,dy,dz);
ekx = eky = ekz = ZEROF;
for (n = nlower; n <= nupper; n++) {
mz = n+nz;
for (m = nlower; m <= nupper; m++) {
my = m+ny;
for (l = nlower; l <= nupper; l++) {
mx = l+nx;
ekx += drho1d[0][l]*rho1d[1][m]*rho1d[2][n]*u_brick[mz][my][mx];
eky += rho1d[0][l]*drho1d[1][m]*rho1d[2][n]*u_brick[mz][my][mx];
ekz += rho1d[0][l]*rho1d[1][m]*drho1d[2][n]*u_brick[mz][my][mx];
}
}
}
ekx *= hx_inv;
eky *= hy_inv;
ekz *= hz_inv;
// convert E-field to force and substract self forces
const double qfactor = qqrd2e * scale;
s1 = x[i][0]*hx_inv;
s2 = x[i][1]*hy_inv;
s3 = x[i][2]*hz_inv;
sf = sf_coeff[0]*sin(2*MY_PI*s1);
sf += sf_coeff[1]*sin(4*MY_PI*s1);
sf *= 2*q[i]*q[i];
f[i][0] += qfactor*(ekx*q[i] - sf);
sf = sf_coeff[2]*sin(2*MY_PI*s2);
sf += sf_coeff[3]*sin(4*MY_PI*s2);
sf *= 2*q[i]*q[i];
f[i][1] += qfactor*(eky*q[i] - sf);
sf = sf_coeff[4]*sin(2*MY_PI*s3);
sf += sf_coeff[5]*sin(4*MY_PI*s3);
sf *= 2*q[i]*q[i];
if (slabflag != 2) f[i][2] += qfactor*(ekz*q[i] - sf);
}
}
/* ----------------------------------------------------------------------
interpolate from grid to get per-atom energy/virial
------------------------------------------------------------------------- */
void PPPMCG::fieldforce_peratom()
{
int i,l,m,n,nx,ny,nz,mx,my,mz;
FFT_SCALAR dx,dy,dz,x0,y0,z0;
FFT_SCALAR u,v0,v1,v2,v3,v4,v5;
// loop over my charges, interpolate from nearby grid points
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
// (dx,dy,dz) = distance to "lower left" grid pt
// (mx,my,mz) = global coords of moving stencil pt
double *q = atom->q;
double **x = atom->x;
for (int j = 0; j < num_charged; j++) {
i = is_charged[j];
nx = part2grid[i][0];
ny = part2grid[i][1];
nz = part2grid[i][2];
dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv;
dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv;
dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv;
compute_rho1d(dx,dy,dz);
u = v0 = v1 = v2 = v3 = v4 = v5 = ZEROF;
for (n = nlower; n <= nupper; n++) {
mz = n+nz;
z0 = rho1d[2][n];
for (m = nlower; m <= nupper; m++) {
my = m+ny;
y0 = z0*rho1d[1][m];
for (l = nlower; l <= nupper; l++) {
mx = l+nx;
x0 = y0*rho1d[0][l];
if (eflag_atom) u += x0*u_brick[mz][my][mx];
if (vflag_atom) {
v0 += x0*v0_brick[mz][my][mx];
v1 += x0*v1_brick[mz][my][mx];
v2 += x0*v2_brick[mz][my][mx];
v3 += x0*v3_brick[mz][my][mx];
v4 += x0*v4_brick[mz][my][mx];
v5 += x0*v5_brick[mz][my][mx];
}
}
}
}
if (eflag_atom) eatom[i] += q[i]*u;
if (vflag_atom) {
vatom[i][0] += q[i]*v0;
vatom[i][1] += q[i]*v1;
vatom[i][2] += q[i]*v2;
vatom[i][3] += q[i]*v3;
vatom[i][4] += q[i]*v4;
vatom[i][5] += q[i]*v5;
}
}
}
/* ----------------------------------------------------------------------
Slab-geometry correction term to dampen inter-slab interactions between
periodically repeating slabs. Yields good approximation to 2D Ewald if
adequate empty space is left between repeating slabs (J. Chem. Phys.
111, 3155). Slabs defined here to be parallel to the xy plane. Also
extended to non-neutral systems (J. Chem. Phys. 131, 094107).
------------------------------------------------------------------------- */
void PPPMCG::slabcorr()
{
int i,j;
// compute local contribution to global dipole moment
const double * const q = atom->q;
const double * const * const x = atom->x;
const double zprd = domain->zprd;
double dipole = 0.0;
for (j = 0; j < num_charged; j++) {
i = is_charged[j];
dipole += q[i]*x[i][2];
}
// sum local contributions to get global dipole moment
double dipole_all;
MPI_Allreduce(&dipole,&dipole_all,1,MPI_DOUBLE,MPI_SUM,world);
// need to make non-neutral systems and/or
// per-atom energy translationally invariant
double dipole_r2 = 0.0;
if (eflag_atom || fabs(qsum) > SMALLQ) {
for (j = 0; j < num_charged; j++) {
i = is_charged[j];
dipole_r2 += q[i]*x[i][2]*x[i][2];
}
// sum local contributions
double tmp;
MPI_Allreduce(&dipole_r2,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
dipole_r2 = tmp;
}
// compute corrections
const double e_slabcorr = MY_2PI*(dipole_all*dipole_all -
qsum*dipole_r2 - qsum*qsum*zprd*zprd/12.0)/volume;
const double qscale = qqrd2e * scale;
if (eflag_global) energy += qscale * e_slabcorr;
// per-atom energy
if (eflag_atom) {
const double efact = qscale * MY_2PI/volume;
for (j = 0; j < num_charged; j++) {
i = is_charged[j];
eatom[i] += efact * q[i]*(x[i][2]*dipole_all - 0.5*(dipole_r2 +
qsum*x[i][2]*x[i][2]) - qsum*zprd*zprd/12.0);
}
}
// add on force corrections
const double ffact = qscale * (-MY_4PI/volume);
double * const * const f = atom->f;
for (j = 0; j < num_charged; j++) {
i = is_charged[j];
f[i][2] += ffact * q[i]*(dipole_all - qsum*x[i][2]);
}
}
/* ----------------------------------------------------------------------
create discretized "density" on section of global grid due to my particles
density(x,y,z) = charge "density" at grid points of my 3d brick
(nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts)
in global grid for group-group interactions
------------------------------------------------------------------------- */
void PPPMCG::make_rho_groups(int groupbit_A, int groupbit_B, int BA_flag)
{
int i,l,m,n,nx,ny,nz,mx,my,mz;
FFT_SCALAR dx,dy,dz,x0,y0,z0;
// clear 3d density arrays
memset(&(density_A_brick[nzlo_out][nylo_out][nxlo_out]),0,
ngrid*sizeof(FFT_SCALAR));
memset(&(density_B_brick[nzlo_out][nylo_out][nxlo_out]),0,
ngrid*sizeof(FFT_SCALAR));
// loop over my charges, add their contribution to nearby grid points
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
// (dx,dy,dz) = distance to "lower left" grid pt
// (mx,my,mz) = global coords of moving stencil pt
const double * const q = atom->q;
const double * const * const x = atom->x;
const int * const mask = atom->mask;
for (int j = 0; j < num_charged; j++) {
i = is_charged[j];
if ((mask[i] & groupbit_A) && (mask[i] & groupbit_B))
if (BA_flag) continue;
if ((mask[i] & groupbit_A) || (mask[i] & groupbit_B)) {
nx = part2grid[i][0];
ny = part2grid[i][1];
nz = part2grid[i][2];
dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv;
dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv;
dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv;
compute_rho1d(dx,dy,dz);
z0 = delvolinv * q[i];
for (n = nlower; n <= nupper; n++) {
mz = n+nz;
y0 = z0*rho1d[2][n];
for (m = nlower; m <= nupper; m++) {
my = m+ny;
x0 = y0*rho1d[1][m];
for (l = nlower; l <= nupper; l++) {
mx = l+nx;
// group A
if (mask[i] & groupbit_A)
density_A_brick[mz][my][mx] += x0*rho1d[0][l];
// group B
if (mask[i] & groupbit_B)
density_B_brick[mz][my][mx] += x0*rho1d[0][l];
}
}
}
}
}
}
/* ----------------------------------------------------------------------
memory usage of local arrays
------------------------------------------------------------------------- */
double PPPMCG::memory_usage()
{
double bytes = PPPM::memory_usage();
bytes += nmax * sizeof(int);
return bytes;
}
diff --git a/src/KSPACE/pppm_disp.cpp b/src/KSPACE/pppm_disp.cpp
index 538f9de28..2c065e926 100755
--- a/src/KSPACE/pppm_disp.cpp
+++ b/src/KSPACE/pppm_disp.cpp
@@ -1,8214 +1,8214 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing authors: Rolf Isele-Holder (Aachen University)
Paul Crozier (SNL)
------------------------------------------------------------------------- */
#include <mpi.h>
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include "pppm_disp.h"
#include "math_const.h"
#include "atom.h"
#include "comm.h"
#include "gridcomm.h"
#include "neighbor.h"
#include "force.h"
#include "pair.h"
#include "bond.h"
#include "angle.h"
#include "domain.h"
#include "fft3d_wrap.h"
#include "remap_wrap.h"
#include "memory.h"
#include "error.h"
using namespace LAMMPS_NS;
using namespace MathConst;
#define MAXORDER 7
#define OFFSET 16384
#define SMALL 0.00001
#define LARGE 10000.0
#define EPS_HOC 1.0e-7
enum{GEOMETRIC,ARITHMETIC,SIXTHPOWER};
enum{REVERSE_RHO, REVERSE_RHO_G, REVERSE_RHO_A, REVERSE_RHO_NONE};
enum{FORWARD_IK, FORWARD_AD, FORWARD_IK_PERATOM, FORWARD_AD_PERATOM,
FORWARD_IK_G, FORWARD_AD_G, FORWARD_IK_PERATOM_G, FORWARD_AD_PERATOM_G,
FORWARD_IK_A, FORWARD_AD_A, FORWARD_IK_PERATOM_A, FORWARD_AD_PERATOM_A,
FORWARD_IK_NONE, FORWARD_AD_NONE, FORWARD_IK_PERATOM_NONE, FORWARD_AD_PERATOM_NONE};
#ifdef FFT_SINGLE
#define ZEROF 0.0f
#define ONEF 1.0f
#else
#define ZEROF 0.0
#define ONEF 1.0
#endif
/* ---------------------------------------------------------------------- */
PPPMDisp::PPPMDisp(LAMMPS *lmp, int narg, char **arg) : KSpace(lmp, narg, arg)
{
if (narg < 1) error->all(FLERR,"Illegal kspace_style pppm/disp command");
triclinic_support = 0;
pppmflag = dispersionflag = 1;
accuracy_relative = fabs(force->numeric(FLERR,arg[0]));
nfactors = 3;
factors = new int[nfactors];
factors[0] = 2;
factors[1] = 3;
factors[2] = 5;
MPI_Comm_rank(world,&me);
MPI_Comm_size(world,&nprocs);
csumflag = 0;
B = NULL;
cii = NULL;
csumi = NULL;
peratom_allocate_flag = 0;
density_brick = vdx_brick = vdy_brick = vdz_brick = NULL;
density_fft = NULL;
u_brick = v0_brick = v1_brick = v2_brick = v3_brick =
v4_brick = v5_brick = NULL;
density_brick_g = vdx_brick_g = vdy_brick_g = vdz_brick_g = NULL;
density_fft_g = NULL;
u_brick_g = v0_brick_g = v1_brick_g = v2_brick_g = v3_brick_g =
v4_brick_g = v5_brick_g = NULL;
density_brick_a0 = vdx_brick_a0 = vdy_brick_a0 = vdz_brick_a0 = NULL;
density_fft_a0 = NULL;
u_brick_a0 = v0_brick_a0 = v1_brick_a0 = v2_brick_a0 = v3_brick_a0 =
v4_brick_a0 = v5_brick_a0 = NULL;
density_brick_a1 = vdx_brick_a1 = vdy_brick_a1 = vdz_brick_a1 = NULL;
density_fft_a1 = NULL;
u_brick_a1 = v0_brick_a1 = v1_brick_a1 = v2_brick_a1 = v3_brick_a1 =
v4_brick_a1 = v5_brick_a1 = NULL;
density_brick_a2 = vdx_brick_a2 = vdy_brick_a2 = vdz_brick_a2 = NULL;
density_fft_a2 = NULL;
u_brick_a2 = v0_brick_a2 = v1_brick_a2 = v2_brick_a2 = v3_brick_a2 =
v4_brick_a2 = v5_brick_a2 = NULL;
density_brick_a3 = vdx_brick_a3 = vdy_brick_a3 = vdz_brick_a3 = NULL;
density_fft_a3 = NULL;
u_brick_a3 = v0_brick_a3 = v1_brick_a3 = v2_brick_a3 = v3_brick_a3 =
v4_brick_a3 = v5_brick_a3 = NULL;
density_brick_a4 = vdx_brick_a4 = vdy_brick_a4 = vdz_brick_a4 = NULL;
density_fft_a4 = NULL;
u_brick_a4 = v0_brick_a4 = v1_brick_a4 = v2_brick_a4 = v3_brick_a4 =
v4_brick_a4 = v5_brick_a4 = NULL;
density_brick_a5 = vdx_brick_a5 = vdy_brick_a5 = vdz_brick_a5 = NULL;
density_fft_a5 = NULL;
u_brick_a5 = v0_brick_a5 = v1_brick_a5 = v2_brick_a5 = v3_brick_a5 =
v4_brick_a5 = v5_brick_a5 = NULL;
density_brick_a6 = vdx_brick_a6 = vdy_brick_a6 = vdz_brick_a6 = NULL;
density_fft_a6 = NULL;
u_brick_a6 = v0_brick_a6 = v1_brick_a6 = v2_brick_a6 = v3_brick_a6 =
v4_brick_a6 = v5_brick_a6 = NULL;
density_brick_none = vdx_brick_none = vdy_brick_none = vdz_brick_none = NULL;
density_fft_none = NULL;
u_brick_none = v0_brick_none = v1_brick_none = v2_brick_none = v3_brick_none =
v4_brick_none = v5_brick_none = NULL;
greensfn = NULL;
greensfn_6 = NULL;
work1 = work2 = NULL;
work1_6 = work2_6 = NULL;
vg = NULL;
vg2 = NULL;
vg_6 = NULL;
vg2_6 = NULL;
fkx = fky = fkz = NULL;
fkx2 = fky2 = fkz2 = NULL;
fkx_6 = fky_6 = fkz_6 = NULL;
fkx2_6 = fky2_6 = fkz2_6 = NULL;
sf_precoeff1 = sf_precoeff2 = sf_precoeff3 = sf_precoeff4 =
sf_precoeff5 = sf_precoeff6 = NULL;
sf_precoeff1_6 = sf_precoeff2_6 = sf_precoeff3_6 = sf_precoeff4_6 =
sf_precoeff5_6 = sf_precoeff6_6 = NULL;
gf_b = NULL;
gf_b_6 = NULL;
rho1d = rho_coeff = NULL;
drho1d = drho_coeff = NULL;
rho1d_6 = rho_coeff_6 = NULL;
drho1d_6 = drho_coeff_6 = NULL;
fft1 = fft2 = NULL;
fft1_6 = fft2_6 = NULL;
remap = NULL;
remap_6 = NULL;
nmax = 0;
part2grid = NULL;
part2grid_6 = NULL;
cg = NULL;
cg_peratom = NULL;
cg_6 = NULL;
cg_peratom_6 = NULL;
memset(function, 0, EWALD_FUNCS*sizeof(int));
}
/* ----------------------------------------------------------------------
free all memory
------------------------------------------------------------------------- */
PPPMDisp::~PPPMDisp()
{
delete [] factors;
delete [] B;
B = NULL;
delete [] cii;
cii = NULL;
delete [] csumi;
csumi = NULL;
deallocate();
deallocate_peratom();
memory->destroy(part2grid);
memory->destroy(part2grid_6);
part2grid = part2grid_6 = NULL;
}
/* ----------------------------------------------------------------------
called once before run
------------------------------------------------------------------------- */
void PPPMDisp::init()
{
if (me == 0) {
if (screen) fprintf(screen,"PPPMDisp initialization ...\n");
if (logfile) fprintf(logfile,"PPPMDisp initialization ...\n");
}
triclinic_check();
if (domain->dimension == 2)
error->all(FLERR,"Cannot use PPPMDisp with 2d simulation");
if (comm->style != 0)
error->universe_all(FLERR,"PPPMDisp can only currently be used with "
"comm_style brick");
if (slabflag == 0 && domain->nonperiodic > 0)
error->all(FLERR,"Cannot use nonperiodic boundaries with PPPMDisp");
if (slabflag == 1) {
if (domain->xperiodic != 1 || domain->yperiodic != 1 ||
domain->boundary[2][0] != 1 || domain->boundary[2][1] != 1)
error->all(FLERR,"Incorrect boundaries with slab PPPMDisp");
}
if (order > MAXORDER || order_6 > MAXORDER) {
char str[128];
sprintf(str,"PPPMDisp coulomb order cannot be greater than %d",MAXORDER);
error->all(FLERR,str);
}
// free all arrays previously allocated
deallocate();
deallocate_peratom();
// check whether cutoff and pair style are set
triclinic = domain->triclinic;
pair_check();
int tmp;
Pair *pair = force->pair;
int *ptr = pair ? (int *) pair->extract("ewald_order",tmp) : NULL;
double *p_cutoff = pair ? (double *) pair->extract("cut_coul",tmp) : NULL;
double *p_cutoff_lj = pair ? (double *) pair->extract("cut_LJ",tmp) : NULL;
if (!(ptr||p_cutoff||p_cutoff_lj))
error->all(FLERR,"KSpace style is incompatible with Pair style");
cutoff = *p_cutoff;
cutoff_lj = *p_cutoff_lj;
double tmp2;
MPI_Allreduce(&cutoff, &tmp2,1,MPI_DOUBLE,MPI_SUM,world);
// check out which types of potentials will have to be calculated
int ewald_order = ptr ? *((int *) ptr) : 1<<1;
int ewald_mix = ptr ? *((int *) pair->extract("ewald_mix",tmp)) : GEOMETRIC;
memset(function, 0, EWALD_FUNCS*sizeof(int));
for (int i=0; i<=EWALD_MAXORDER; ++i) // transcribe order
if (ewald_order&(1<<i)) { // from pair_style
int k=0;
char str[128];
switch (i) {
case 1:
k = 0; break;
case 6:
if ((ewald_mix==GEOMETRIC || ewald_mix==SIXTHPOWER ||
mixflag == 1) && mixflag!= 2) { k = 1; break; }
else if (ewald_mix==ARITHMETIC && mixflag!=2) { k = 2; break; }
else if (mixflag == 2) { k = 3; break; }
default:
sprintf(str, "Unsupported order in kspace_style "
"pppm/disp, pair_style %s", force->pair_style);
error->all(FLERR,str);
}
function[k] = 1;
}
// warn, if function[0] is not set but charge attribute is set!
if (!function[0] && atom->q_flag && me == 0) {
char str[128];
sprintf(str, "Charges are set, but coulombic solver is not used");
error->warning(FLERR, str);
}
// show error message if pppm/disp is not used correctly
if (function[1] || function[2] || function[3]) {
if (!gridflag_6 && !gewaldflag_6 && accuracy_real_6 < 0
&& accuracy_kspace_6 < 0 && !auto_disp_flag) {
error->all(FLERR, "PPPMDisp used but no parameters set, "
"for further information please see the pppm/disp "
"documentation");
}
}
// compute qsum & qsqsum, if function[0] is set, warn if not charge-neutral
scale = 1.0;
qqrd2e = force->qqrd2e;
natoms_original = atom->natoms;
if (function[0]) qsum_qsq();
// if kspace is TIP4P, extract TIP4P params from pair style
// bond/angle are not yet init(), so insure equilibrium request is valid
qdist = 0.0;
if (tip4pflag) {
int itmp;
double *p_qdist = (double *) force->pair->extract("qdist",itmp);
int *p_typeO = (int *) force->pair->extract("typeO",itmp);
int *p_typeH = (int *) force->pair->extract("typeH",itmp);
int *p_typeA = (int *) force->pair->extract("typeA",itmp);
int *p_typeB = (int *) force->pair->extract("typeB",itmp);
if (!p_qdist || !p_typeO || !p_typeH || !p_typeA || !p_typeB)
error->all(FLERR,"KSpace style is incompatible with Pair style");
qdist = *p_qdist;
typeO = *p_typeO;
typeH = *p_typeH;
int typeA = *p_typeA;
int typeB = *p_typeB;
if (force->angle == NULL || force->bond == NULL)
error->all(FLERR,"Bond and angle potentials must be defined for TIP4P");
if (typeA < 1 || typeA > atom->nangletypes ||
force->angle->setflag[typeA] == 0)
error->all(FLERR,"Bad TIP4P angle type for PPPMDisp/TIP4P");
if (typeB < 1 || typeB > atom->nbondtypes ||
force->bond->setflag[typeB] == 0)
error->all(FLERR,"Bad TIP4P bond type for PPPMDisp/TIP4P");
double theta = force->angle->equilibrium_angle(typeA);
double blen = force->bond->equilibrium_distance(typeB);
alpha = qdist / (cos(0.5*theta) * blen);
}
// initialize the pair style to get the coefficients
neighrequest_flag = 0;
pair->init();
neighrequest_flag = 1;
init_coeffs();
//if g_ewald and g_ewald_6 have not been specified, set some initial value
// to avoid problems when calculating the energies!
if (!gewaldflag) g_ewald = 1;
if (!gewaldflag_6) g_ewald_6 = 1;
// set accuracy (force units) from accuracy_relative or accuracy_absolute
if (accuracy_absolute >= 0.0) accuracy = accuracy_absolute;
else accuracy = accuracy_relative * two_charge_force;
int (*procneigh)[2] = comm->procneigh;
int iteration = 0;
if (function[0]) {
GridComm *cgtmp = NULL;
while (order >= minorder) {
if (iteration && me == 0)
error->warning(FLERR,"Reducing PPPMDisp Coulomb order "
"b/c stencil extends beyond neighbor processor");
iteration++;
// set grid for dispersion interaction and coulomb interactions
set_grid();
if (nx_pppm >= OFFSET || ny_pppm >= OFFSET || nz_pppm >= OFFSET)
error->all(FLERR,"PPPMDisp Coulomb grid is too large");
set_fft_parameters(nx_pppm, ny_pppm, nz_pppm,
nxlo_fft, nylo_fft, nzlo_fft,
nxhi_fft, nyhi_fft, nzhi_fft,
nxlo_in, nylo_in, nzlo_in,
nxhi_in, nyhi_in, nzhi_in,
nxlo_out, nylo_out, nzlo_out,
nxhi_out, nyhi_out, nzhi_out,
nlower, nupper,
ngrid, nfft, nfft_both,
shift, shiftone, order);
if (overlap_allowed) break;
cgtmp = new GridComm(lmp, world,1,1,
nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
nxlo_out,nxhi_out,nylo_out,nyhi_out,
nzlo_out,nzhi_out,
procneigh[0][0],procneigh[0][1],procneigh[1][0],
procneigh[1][1],procneigh[2][0],procneigh[2][1]);
cgtmp->ghost_notify();
if (!cgtmp->ghost_overlap()) break;
delete cgtmp;
order--;
}
if (order < minorder)
error->all(FLERR,
"Coulomb PPPMDisp order has been reduced below minorder");
if (cgtmp) delete cgtmp;
// adjust g_ewald
if (!gewaldflag) adjust_gewald();
// calculate the final accuracy
double acc = final_accuracy();
// print stats
int ngrid_max,nfft_both_max;
MPI_Allreduce(&ngrid,&ngrid_max,1,MPI_INT,MPI_MAX,world);
MPI_Allreduce(&nfft_both,&nfft_both_max,1,MPI_INT,MPI_MAX,world);
if (me == 0) {
#ifdef FFT_SINGLE
const char fft_prec[] = "single";
#else
const char fft_prec[] = "double";
#endif
if (screen) {
fprintf(screen," Coulomb G vector (1/distance)= %g\n",g_ewald);
fprintf(screen," Coulomb grid = %d %d %d\n",nx_pppm,ny_pppm,nz_pppm);
fprintf(screen," Coulomb stencil order = %d\n",order);
fprintf(screen," Coulomb estimated absolute RMS force accuracy = %g\n",
acc);
fprintf(screen," Coulomb estimated relative force accuracy = %g\n",
acc/two_charge_force);
fprintf(screen," using %s precision FFTs\n",fft_prec);
fprintf(screen," 3d grid and FFT values/proc = %d %d\n",
ngrid_max, nfft_both_max);
}
if (logfile) {
fprintf(logfile," Coulomb G vector (1/distance) = %g\n",g_ewald);
fprintf(logfile," Coulomb grid = %d %d %d\n",nx_pppm,ny_pppm,nz_pppm);
fprintf(logfile," Coulomb stencil order = %d\n",order);
fprintf(logfile,
" Coulomb estimated absolute RMS force accuracy = %g\n",
acc);
fprintf(logfile," Coulomb estimated relative force accuracy = %g\n",
acc/two_charge_force);
fprintf(logfile," using %s precision FFTs\n",fft_prec);
fprintf(logfile," 3d grid and FFT values/proc = %d %d\n",
ngrid_max, nfft_both_max);
}
}
}
iteration = 0;
if (function[1] + function[2] + function[3]) {
GridComm *cgtmp = NULL;
while (order_6 >= minorder) {
if (iteration && me == 0)
error->warning(FLERR,"Reducing PPPMDisp dispersion order "
"b/c stencil extends beyond neighbor processor");
iteration++;
set_grid_6();
if (nx_pppm_6 >= OFFSET || ny_pppm_6 >= OFFSET || nz_pppm_6 >= OFFSET)
error->all(FLERR,"PPPMDisp Dispersion grid is too large");
set_fft_parameters(nx_pppm_6, ny_pppm_6, nz_pppm_6,
nxlo_fft_6, nylo_fft_6, nzlo_fft_6,
nxhi_fft_6, nyhi_fft_6, nzhi_fft_6,
nxlo_in_6, nylo_in_6, nzlo_in_6,
nxhi_in_6, nyhi_in_6, nzhi_in_6,
nxlo_out_6, nylo_out_6, nzlo_out_6,
nxhi_out_6, nyhi_out_6, nzhi_out_6,
nlower_6, nupper_6,
ngrid_6, nfft_6, nfft_both_6,
shift_6, shiftone_6, order_6);
if (overlap_allowed) break;
cgtmp = new GridComm(lmp,world,1,1,
nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,
nzlo_in_6,nzhi_in_6,
nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,
nzlo_out_6,nzhi_out_6,
procneigh[0][0],procneigh[0][1],procneigh[1][0],
procneigh[1][1],procneigh[2][0],procneigh[2][1]);
cgtmp->ghost_notify();
if (!cgtmp->ghost_overlap()) break;
delete cgtmp;
order_6--;
}
if (order_6 < minorder)
error->all(FLERR,"Dispersion PPPMDisp order has been "
"reduced below minorder");
if (cgtmp) delete cgtmp;
// adjust g_ewald_6
if (!gewaldflag_6 && accuracy_kspace_6 == accuracy_real_6)
adjust_gewald_6();
// calculate the final accuracy
double acc, acc_real, acc_kspace;
final_accuracy_6(acc, acc_real, acc_kspace);
// print stats
int ngrid_max,nfft_both_max;
MPI_Allreduce(&ngrid_6,&ngrid_max,1,MPI_INT,MPI_MAX,world);
MPI_Allreduce(&nfft_both_6,&nfft_both_max,1,MPI_INT,MPI_MAX,world);
if (me == 0) {
#ifdef FFT_SINGLE
const char fft_prec[] = "single";
#else
const char fft_prec[] = "double";
#endif
if (screen) {
fprintf(screen," Dispersion G vector (1/distance)= %g\n",g_ewald_6);
fprintf(screen," Dispersion grid = %d %d %d\n",
nx_pppm_6,ny_pppm_6,nz_pppm_6);
fprintf(screen," Dispersion stencil order = %d\n",order_6);
fprintf(screen," Dispersion estimated absolute "
"RMS force accuracy = %g\n",acc);
fprintf(screen," Dispersion estimated absolute "
"real space RMS force accuracy = %g\n",acc_real);
fprintf(screen," Dispersion estimated absolute "
"kspace RMS force accuracy = %g\n",acc_kspace);
fprintf(screen," Dispersion estimated relative force accuracy = %g\n",
acc/two_charge_force);
fprintf(screen," using %s precision FFTs\n",fft_prec);
fprintf(screen," 3d grid and FFT values/proc dispersion = %d %d\n",
ngrid_max,nfft_both_max);
}
if (logfile) {
fprintf(logfile," Dispersion G vector (1/distance) = %g\n",g_ewald_6);
fprintf(logfile," Dispersion grid = %d %d %d\n",
nx_pppm_6,ny_pppm_6,nz_pppm_6);
fprintf(logfile," Dispersion stencil order = %d\n",order_6);
fprintf(logfile," Dispersion estimated absolute "
"RMS force accuracy = %g\n",acc);
fprintf(logfile," Dispersion estimated absolute "
"real space RMS force accuracy = %g\n",acc_real);
fprintf(logfile," Dispersion estimated absolute "
"kspace RMS force accuracy = %g\n",acc_kspace);
fprintf(logfile," Disperion estimated relative force accuracy = %g\n",
acc/two_charge_force);
fprintf(logfile," using %s precision FFTs\n",fft_prec);
fprintf(logfile," 3d grid and FFT values/proc dispersion = %d %d\n",
ngrid_max,nfft_both_max);
}
}
}
// allocate K-space dependent memory
allocate();
// pre-compute Green's function denomiator expansion
// pre-compute 1d charge distribution coefficients
if (function[0]) {
compute_gf_denom(gf_b, order);
compute_rho_coeff(rho_coeff, drho_coeff, order);
cg->ghost_notify();
cg->setup();
if (differentiation_flag == 1)
compute_sf_precoeff(nx_pppm, ny_pppm, nz_pppm, order,
nxlo_fft, nylo_fft, nzlo_fft,
nxhi_fft, nyhi_fft, nzhi_fft,
sf_precoeff1, sf_precoeff2, sf_precoeff3,
sf_precoeff4, sf_precoeff5, sf_precoeff6);
}
if (function[1] + function[2] + function[3]) {
compute_gf_denom(gf_b_6, order_6);
compute_rho_coeff(rho_coeff_6, drho_coeff_6, order_6);
cg_6->ghost_notify();
cg_6->setup();
if (differentiation_flag == 1)
compute_sf_precoeff(nx_pppm_6, ny_pppm_6, nz_pppm_6, order_6,
nxlo_fft_6, nylo_fft_6, nzlo_fft_6,
nxhi_fft_6, nyhi_fft_6, nzhi_fft_6,
sf_precoeff1_6, sf_precoeff2_6, sf_precoeff3_6,
sf_precoeff4_6, sf_precoeff5_6, sf_precoeff6_6);
}
}
/* ----------------------------------------------------------------------
adjust PPPM coeffs, called initially and whenever volume has changed
------------------------------------------------------------------------- */
void PPPMDisp::setup()
{
if (slabflag == 0 && domain->nonperiodic > 0)
error->all(FLERR,"Cannot use nonperiodic boundaries with PPPMDisp");
if (slabflag == 1) {
if (domain->xperiodic != 1 || domain->yperiodic != 1 ||
domain->boundary[2][0] != 1 || domain->boundary[2][1] != 1)
error->all(FLERR,"Incorrect boundaries with slab PPPMDisp");
}
double *prd;
// volume-dependent factors
// adjust z dimension for 2d slab PPPM
// z dimension for 3d PPPM is zprd since slab_volfactor = 1.0
if (triclinic == 0) prd = domain->prd;
else prd = domain->prd_lamda;
double xprd = prd[0];
double yprd = prd[1];
double zprd = prd[2];
double zprd_slab = zprd*slab_volfactor;
volume = xprd * yprd * zprd_slab;
// compute fkx,fky,fkz for my FFT grid pts
double unitkx = (2.0*MY_PI/xprd);
double unitky = (2.0*MY_PI/yprd);
double unitkz = (2.0*MY_PI/zprd_slab);
//compute the virial coefficients and green functions
if (function[0]){
delxinv = nx_pppm/xprd;
delyinv = ny_pppm/yprd;
delzinv = nz_pppm/zprd_slab;
delvolinv = delxinv*delyinv*delzinv;
double per;
int i, j, k, n;
for (i = nxlo_fft; i <= nxhi_fft; i++) {
per = i - nx_pppm*(2*i/nx_pppm);
fkx[i] = unitkx*per;
j = (nx_pppm - i) % nx_pppm;
per = j - nx_pppm*(2*j/nx_pppm);
fkx2[i] = unitkx*per;
}
for (i = nylo_fft; i <= nyhi_fft; i++) {
per = i - ny_pppm*(2*i/ny_pppm);
fky[i] = unitky*per;
j = (ny_pppm - i) % ny_pppm;
per = j - ny_pppm*(2*j/ny_pppm);
fky2[i] = unitky*per;
}
for (i = nzlo_fft; i <= nzhi_fft; i++) {
per = i - nz_pppm*(2*i/nz_pppm);
fkz[i] = unitkz*per;
j = (nz_pppm - i) % nz_pppm;
per = j - nz_pppm*(2*j/nz_pppm);
fkz2[i] = unitkz*per;
}
double sqk,vterm;
double gew2inv = 1/(g_ewald*g_ewald);
n = 0;
for (k = nzlo_fft; k <= nzhi_fft; k++) {
for (j = nylo_fft; j <= nyhi_fft; j++) {
for (i = nxlo_fft; i <= nxhi_fft; i++) {
sqk = fkx[i]*fkx[i] + fky[j]*fky[j] + fkz[k]*fkz[k];
if (sqk == 0.0) {
vg[n][0] = 0.0;
vg[n][1] = 0.0;
vg[n][2] = 0.0;
vg[n][3] = 0.0;
vg[n][4] = 0.0;
vg[n][5] = 0.0;
} else {
vterm = -2.0 * (1.0/sqk + 0.25*gew2inv);
vg[n][0] = 1.0 + vterm*fkx[i]*fkx[i];
vg[n][1] = 1.0 + vterm*fky[j]*fky[j];
vg[n][2] = 1.0 + vterm*fkz[k]*fkz[k];
vg[n][3] = vterm*fkx[i]*fky[j];
vg[n][4] = vterm*fkx[i]*fkz[k];
vg[n][5] = vterm*fky[j]*fkz[k];
vg2[n][0] = vterm*0.5*(fkx[i]*fky[j] + fkx2[i]*fky2[j]);
vg2[n][1] = vterm*0.5*(fkx[i]*fkz[k] + fkx2[i]*fkz2[k]);
vg2[n][2] = vterm*0.5*(fky[j]*fkz[k] + fky2[j]*fkz2[k]);
}
n++;
}
}
}
compute_gf();
if (differentiation_flag == 1) compute_sf_coeff();
}
if (function[1] + function[2] + function[3]) {
delxinv_6 = nx_pppm_6/xprd;
delyinv_6 = ny_pppm_6/yprd;
delzinv_6 = nz_pppm_6/zprd_slab;
delvolinv_6 = delxinv_6*delyinv_6*delzinv_6;
double per;
int i, j, k, n;
for (i = nxlo_fft_6; i <= nxhi_fft_6; i++) {
per = i - nx_pppm_6*(2*i/nx_pppm_6);
fkx_6[i] = unitkx*per;
j = (nx_pppm_6 - i) % nx_pppm_6;
per = j - nx_pppm_6*(2*j/nx_pppm_6);
fkx2_6[i] = unitkx*per;
}
for (i = nylo_fft_6; i <= nyhi_fft_6; i++) {
per = i - ny_pppm_6*(2*i/ny_pppm_6);
fky_6[i] = unitky*per;
j = (ny_pppm_6 - i) % ny_pppm_6;
per = j - ny_pppm_6*(2*j/ny_pppm_6);
fky2_6[i] = unitky*per;
}
for (i = nzlo_fft_6; i <= nzhi_fft_6; i++) {
per = i - nz_pppm_6*(2*i/nz_pppm_6);
fkz_6[i] = unitkz*per;
j = (nz_pppm_6 - i) % nz_pppm_6;
per = j - nz_pppm_6*(2*j/nz_pppm_6);
fkz2_6[i] = unitkz*per;
}
double sqk,vterm;
long double erft, expt,nom, denom;
long double b, bs, bt;
double rtpi = sqrt(MY_PI);
double gewinv = 1/g_ewald_6;
n = 0;
for (k = nzlo_fft_6; k <= nzhi_fft_6; k++) {
for (j = nylo_fft_6; j <= nyhi_fft_6; j++) {
for (i = nxlo_fft_6; i <= nxhi_fft_6; i++) {
sqk = fkx_6[i]*fkx_6[i] + fky_6[j]*fky_6[j] + fkz_6[k]*fkz_6[k];
if (sqk == 0.0) {
vg_6[n][0] = 0.0;
vg_6[n][1] = 0.0;
vg_6[n][2] = 0.0;
vg_6[n][3] = 0.0;
vg_6[n][4] = 0.0;
vg_6[n][5] = 0.0;
} else {
b = 0.5*sqrt(sqk)*gewinv;
bs = b*b;
bt = bs*b;
erft = 2*bt*rtpi*erfc((double) b);
expt = exp(-bs);
nom = erft - 2*bs*expt;
denom = nom + expt;
if (denom == 0) vterm = 3.0/sqk;
else vterm = 3.0*nom/(sqk*denom);
vg_6[n][0] = 1.0 + vterm*fkx_6[i]*fkx_6[i];
vg_6[n][1] = 1.0 + vterm*fky_6[j]*fky_6[j];
vg_6[n][2] = 1.0 + vterm*fkz_6[k]*fkz_6[k];
vg_6[n][3] = vterm*fkx_6[i]*fky_6[j];
vg_6[n][4] = vterm*fkx_6[i]*fkz_6[k];
vg_6[n][5] = vterm*fky_6[j]*fkz_6[k];
vg2_6[n][0] = vterm*0.5*(fkx_6[i]*fky_6[j] + fkx2_6[i]*fky2_6[j]);
vg2_6[n][1] = vterm*0.5*(fkx_6[i]*fkz_6[k] + fkx2_6[i]*fkz2_6[k]);
vg2_6[n][2] = vterm*0.5*(fky_6[j]*fkz_6[k] + fky2_6[j]*fkz2_6[k]);
}
n++;
}
}
}
compute_gf_6();
if (differentiation_flag == 1) compute_sf_coeff_6();
}
}
/* ----------------------------------------------------------------------
reset local grid arrays and communication stencils
called by fix balance b/c it changed sizes of processor sub-domains
------------------------------------------------------------------------- */
void PPPMDisp::setup_grid()
{
// free all arrays previously allocated
deallocate();
deallocate_peratom();
// reset portion of global grid that each proc owns
if (function[0])
set_fft_parameters(nx_pppm, ny_pppm, nz_pppm,
nxlo_fft, nylo_fft, nzlo_fft,
nxhi_fft, nyhi_fft, nzhi_fft,
nxlo_in, nylo_in, nzlo_in,
nxhi_in, nyhi_in, nzhi_in,
nxlo_out, nylo_out, nzlo_out,
nxhi_out, nyhi_out, nzhi_out,
nlower, nupper,
ngrid, nfft, nfft_both,
shift, shiftone, order);
if (function[1] + function[2] + function[3])
set_fft_parameters(nx_pppm_6, ny_pppm_6, nz_pppm_6,
nxlo_fft_6, nylo_fft_6, nzlo_fft_6,
nxhi_fft_6, nyhi_fft_6, nzhi_fft_6,
nxlo_in_6, nylo_in_6, nzlo_in_6,
nxhi_in_6, nyhi_in_6, nzhi_in_6,
nxlo_out_6, nylo_out_6, nzlo_out_6,
nxhi_out_6, nyhi_out_6, nzhi_out_6,
nlower_6, nupper_6,
ngrid_6, nfft_6, nfft_both_6,
shift_6, shiftone_6, order_6);
// reallocate K-space dependent memory
// check if grid communication is now overlapping if not allowed
// don't invoke allocate_peratom(), compute() will allocate when needed
allocate();
if (function[0]) {
cg->ghost_notify();
if (overlap_allowed == 0 && cg->ghost_overlap())
error->all(FLERR,"PPPM grid stencil extends "
"beyond nearest neighbor processor");
cg->setup();
}
if (function[1] + function[2] + function[3]) {
cg_6->ghost_notify();
if (overlap_allowed == 0 && cg_6->ghost_overlap())
error->all(FLERR,"PPPM grid stencil extends "
"beyond nearest neighbor processor");
cg_6->setup();
}
// pre-compute Green's function denomiator expansion
// pre-compute 1d charge distribution coefficients
if (function[0]) {
compute_gf_denom(gf_b, order);
compute_rho_coeff(rho_coeff, drho_coeff, order);
if (differentiation_flag == 1)
compute_sf_precoeff(nx_pppm, ny_pppm, nz_pppm, order,
nxlo_fft, nylo_fft, nzlo_fft,
nxhi_fft, nyhi_fft, nzhi_fft,
sf_precoeff1, sf_precoeff2, sf_precoeff3,
sf_precoeff4, sf_precoeff5, sf_precoeff6);
}
if (function[1] + function[2] + function[3]) {
compute_gf_denom(gf_b_6, order_6);
compute_rho_coeff(rho_coeff_6, drho_coeff_6, order_6);
if (differentiation_flag == 1)
compute_sf_precoeff(nx_pppm_6, ny_pppm_6, nz_pppm_6, order_6,
nxlo_fft_6, nylo_fft_6, nzlo_fft_6,
nxhi_fft_6, nyhi_fft_6, nzhi_fft_6,
sf_precoeff1_6, sf_precoeff2_6, sf_precoeff3_6,
sf_precoeff4_6, sf_precoeff5_6, sf_precoeff6_6);
}
// pre-compute volume-dependent coeffs
setup();
}
/* ----------------------------------------------------------------------
compute the PPPM long-range force, energy, virial
------------------------------------------------------------------------- */
void PPPMDisp::compute(int eflag, int vflag)
{
int i;
// convert atoms from box to lamda coords
if (eflag || vflag) ev_setup(eflag,vflag);
else evflag = evflag_atom = eflag_global = vflag_global =
eflag_atom = vflag_atom = 0;
if (evflag_atom && !peratom_allocate_flag) {
allocate_peratom();
if (function[0]) {
cg_peratom->ghost_notify();
cg_peratom->setup();
}
if (function[1] + function[2] + function[3]) {
cg_peratom_6->ghost_notify();
cg_peratom_6->setup();
}
peratom_allocate_flag = 1;
}
if (triclinic == 0) boxlo = domain->boxlo;
else {
boxlo = domain->boxlo_lamda;
domain->x2lamda(atom->nlocal);
}
// extend size of per-atom arrays if necessary
if (atom->nlocal > nmax) {
if (function[0]) memory->destroy(part2grid);
if (function[1] + function[2] + function[3]) memory->destroy(part2grid_6);
nmax = atom->nmax;
if (function[0]) memory->create(part2grid,nmax,3,"pppm/disp:part2grid");
if (function[1] + function[2] + function[3])
memory->create(part2grid_6,nmax,3,"pppm/disp:part2grid_6");
}
energy = 0.0;
energy_1 = 0.0;
energy_6 = 0.0;
if (vflag) for (i = 0; i < 6; i++) virial_6[i] = virial_1[i] = 0.0;
// find grid points for all my particles
// distribute partcles' charges/dispersion coefficients on the grid
// communication between processors and remapping two fft
// Solution of poissons equation in k-space and backtransformation
// communication between processors
// calculation of forces
if (function[0]) {
//perfrom calculations for coulomb interactions only
particle_map_c(delxinv, delyinv, delzinv, shift, part2grid, nupper, nlower,
nxlo_out, nylo_out, nzlo_out, nxhi_out, nyhi_out, nzhi_out);
make_rho_c();
cg->reverse_comm(this,REVERSE_RHO);
brick2fft(nxlo_in, nylo_in, nzlo_in, nxhi_in, nyhi_in, nzhi_in,
density_brick, density_fft, work1,remap);
if (differentiation_flag == 1) {
poisson_ad(work1, work2, density_fft, fft1, fft2,
nx_pppm, ny_pppm, nz_pppm, nfft,
nxlo_fft, nylo_fft, nzlo_fft, nxhi_fft, nyhi_fft, nzhi_fft,
nxlo_in, nylo_in, nzlo_in, nxhi_in, nyhi_in, nzhi_in,
energy_1, greensfn,
virial_1, vg,vg2,
u_brick, v0_brick, v1_brick, v2_brick, v3_brick, v4_brick, v5_brick);
cg->forward_comm(this,FORWARD_AD);
fieldforce_c_ad();
if (vflag_atom) cg_peratom->forward_comm(this, FORWARD_AD_PERATOM);
} else {
poisson_ik(work1, work2, density_fft, fft1, fft2,
nx_pppm, ny_pppm, nz_pppm, nfft,
nxlo_fft, nylo_fft, nzlo_fft, nxhi_fft, nyhi_fft, nzhi_fft,
nxlo_in, nylo_in, nzlo_in, nxhi_in, nyhi_in, nzhi_in,
energy_1, greensfn,
fkx, fky, fkz,fkx2, fky2, fkz2,
vdx_brick, vdy_brick, vdz_brick, virial_1, vg,vg2,
u_brick, v0_brick, v1_brick, v2_brick, v3_brick, v4_brick, v5_brick);
cg->forward_comm(this, FORWARD_IK);
fieldforce_c_ik();
if (evflag_atom) cg_peratom->forward_comm(this, FORWARD_IK_PERATOM);
}
if (evflag_atom) fieldforce_c_peratom();
}
if (function[1]) {
//perfrom calculations for geometric mixing
particle_map(delxinv_6, delyinv_6, delzinv_6, shift_6, part2grid_6, nupper_6, nlower_6,
nxlo_out_6, nylo_out_6, nzlo_out_6, nxhi_out_6, nyhi_out_6, nzhi_out_6);
make_rho_g();
cg_6->reverse_comm(this, REVERSE_RHO_G);
brick2fft(nxlo_in_6, nylo_in_6, nzlo_in_6, nxhi_in_6, nyhi_in_6, nzhi_in_6,
density_brick_g, density_fft_g, work1_6,remap_6);
if (differentiation_flag == 1) {
poisson_ad(work1_6, work2_6, density_fft_g, fft1_6, fft2_6,
nx_pppm_6, ny_pppm_6, nz_pppm_6, nfft_6,
nxlo_fft_6, nylo_fft_6, nzlo_fft_6, nxhi_fft_6, nyhi_fft_6, nzhi_fft_6,
nxlo_in_6, nylo_in_6, nzlo_in_6, nxhi_in_6, nyhi_in_6, nzhi_in_6,
energy_6, greensfn_6,
virial_6, vg_6, vg2_6,
u_brick_g, v0_brick_g, v1_brick_g, v2_brick_g, v3_brick_g, v4_brick_g, v5_brick_g);
cg_6->forward_comm(this,FORWARD_AD_G);
fieldforce_g_ad();
if (vflag_atom) cg_peratom_6->forward_comm(this,FORWARD_AD_PERATOM_G);
} else {
poisson_ik(work1_6, work2_6, density_fft_g, fft1_6, fft2_6,
nx_pppm_6, ny_pppm_6, nz_pppm_6, nfft_6,
nxlo_fft_6, nylo_fft_6, nzlo_fft_6, nxhi_fft_6, nyhi_fft_6, nzhi_fft_6,
nxlo_in_6, nylo_in_6, nzlo_in_6, nxhi_in_6, nyhi_in_6, nzhi_in_6,
energy_6, greensfn_6,
fkx_6, fky_6, fkz_6,fkx2_6, fky2_6, fkz2_6,
vdx_brick_g, vdy_brick_g, vdz_brick_g, virial_6, vg_6, vg2_6,
u_brick_g, v0_brick_g, v1_brick_g, v2_brick_g, v3_brick_g, v4_brick_g, v5_brick_g);
cg_6->forward_comm(this,FORWARD_IK_G);
fieldforce_g_ik();
if (evflag_atom) cg_peratom_6->forward_comm(this, FORWARD_IK_PERATOM_G);
}
if (evflag_atom) fieldforce_g_peratom();
}
if (function[2]) {
//perform calculations for arithmetic mixing
particle_map(delxinv_6, delyinv_6, delzinv_6, shift_6, part2grid_6, nupper_6, nlower_6,
nxlo_out_6, nylo_out_6, nzlo_out_6, nxhi_out_6, nyhi_out_6, nzhi_out_6);
make_rho_a();
cg_6->reverse_comm(this, REVERSE_RHO_A);
brick2fft_a();
if ( differentiation_flag == 1) {
poisson_ad(work1_6, work2_6, density_fft_a3, fft1_6, fft2_6,
nx_pppm_6, ny_pppm_6, nz_pppm_6, nfft_6,
nxlo_fft_6, nylo_fft_6, nzlo_fft_6, nxhi_fft_6, nyhi_fft_6, nzhi_fft_6,
nxlo_in_6, nylo_in_6, nzlo_in_6, nxhi_in_6, nyhi_in_6, nzhi_in_6,
energy_6, greensfn_6,
virial_6, vg_6, vg2_6,
u_brick_a3, v0_brick_a3, v1_brick_a3, v2_brick_a3, v3_brick_a3, v4_brick_a3, v5_brick_a3);
poisson_2s_ad(density_fft_a0, density_fft_a6,
u_brick_a0, v0_brick_a0, v1_brick_a0, v2_brick_a0, v3_brick_a0, v4_brick_a0, v5_brick_a0,
u_brick_a6, v0_brick_a6, v1_brick_a6, v2_brick_a6, v3_brick_a6, v4_brick_a6, v5_brick_a6);
poisson_2s_ad(density_fft_a1, density_fft_a5,
u_brick_a1, v0_brick_a1, v1_brick_a1, v2_brick_a1, v3_brick_a1, v4_brick_a1, v5_brick_a1,
u_brick_a5, v0_brick_a5, v1_brick_a5, v2_brick_a5, v3_brick_a5, v4_brick_a5, v5_brick_a5);
poisson_2s_ad(density_fft_a2, density_fft_a4,
u_brick_a2, v0_brick_a2, v1_brick_a2, v2_brick_a2, v3_brick_a2, v4_brick_a2, v5_brick_a2,
u_brick_a4, v0_brick_a4, v1_brick_a4, v2_brick_a4, v3_brick_a4, v4_brick_a4, v5_brick_a4);
cg_6->forward_comm(this, FORWARD_AD_A);
fieldforce_a_ad();
if (evflag_atom) cg_peratom_6->forward_comm(this, FORWARD_AD_PERATOM_A);
} else {
poisson_ik(work1_6, work2_6, density_fft_a3, fft1_6, fft2_6,
nx_pppm_6, ny_pppm_6, nz_pppm_6, nfft_6,
nxlo_fft_6, nylo_fft_6, nzlo_fft_6, nxhi_fft_6, nyhi_fft_6, nzhi_fft_6,
nxlo_in_6, nylo_in_6, nzlo_in_6, nxhi_in_6, nyhi_in_6, nzhi_in_6,
energy_6, greensfn_6,
fkx_6, fky_6, fkz_6,fkx2_6, fky2_6, fkz2_6,
vdx_brick_a3, vdy_brick_a3, vdz_brick_a3, virial_6, vg_6, vg2_6,
u_brick_a3, v0_brick_a3, v1_brick_a3, v2_brick_a3, v3_brick_a3, v4_brick_a3, v5_brick_a3);
poisson_2s_ik(density_fft_a0, density_fft_a6,
vdx_brick_a0, vdy_brick_a0, vdz_brick_a0,
vdx_brick_a6, vdy_brick_a6, vdz_brick_a6,
u_brick_a0, v0_brick_a0, v1_brick_a0, v2_brick_a0, v3_brick_a0, v4_brick_a0, v5_brick_a0,
u_brick_a6, v0_brick_a6, v1_brick_a6, v2_brick_a6, v3_brick_a6, v4_brick_a6, v5_brick_a6);
poisson_2s_ik(density_fft_a1, density_fft_a5,
vdx_brick_a1, vdy_brick_a1, vdz_brick_a1,
vdx_brick_a5, vdy_brick_a5, vdz_brick_a5,
u_brick_a1, v0_brick_a1, v1_brick_a1, v2_brick_a1, v3_brick_a1, v4_brick_a1, v5_brick_a1,
u_brick_a5, v0_brick_a5, v1_brick_a5, v2_brick_a5, v3_brick_a5, v4_brick_a5, v5_brick_a5);
poisson_2s_ik(density_fft_a2, density_fft_a4,
vdx_brick_a2, vdy_brick_a2, vdz_brick_a2,
vdx_brick_a4, vdy_brick_a4, vdz_brick_a4,
u_brick_a2, v0_brick_a2, v1_brick_a2, v2_brick_a2, v3_brick_a2, v4_brick_a2, v5_brick_a2,
u_brick_a4, v0_brick_a4, v1_brick_a4, v2_brick_a4, v3_brick_a4, v4_brick_a4, v5_brick_a4);
cg_6->forward_comm(this, FORWARD_IK_A);
fieldforce_a_ik();
if (evflag_atom) cg_peratom_6->forward_comm(this, FORWARD_IK_PERATOM_A);
}
if (evflag_atom) fieldforce_a_peratom();
}
if (function[3]) {
//perfrom calculations if no mixing rule applies
particle_map(delxinv_6, delyinv_6, delzinv_6, shift_6, part2grid_6, nupper_6, nlower_6,
nxlo_out_6, nylo_out_6, nzlo_out_6, nxhi_out_6, nyhi_out_6, nzhi_out_6);
make_rho_none();
cg_6->reverse_comm(this, REVERSE_RHO_NONE);
brick2fft_none();
if (differentiation_flag == 1) {
int n = 0;
for (int k = 0; k<nsplit_alloc/2; k++) {
poisson_none_ad(n,n+1,density_fft_none[n],density_fft_none[n+1],
u_brick_none[n],u_brick_none[n+1],
v0_brick_none, v1_brick_none, v2_brick_none,
v3_brick_none, v4_brick_none, v5_brick_none);
n += 2;
}
cg_6->forward_comm(this,FORWARD_AD_NONE);
fieldforce_none_ad();
if (vflag_atom) cg_peratom_6->forward_comm(this,FORWARD_AD_PERATOM_NONE);
} else {
int n = 0;
for (int k = 0; k<nsplit_alloc/2; k++) {
poisson_none_ik(n,n+1,density_fft_none[n], density_fft_none[n+1],
vdx_brick_none[n], vdy_brick_none[n], vdz_brick_none[n],
vdx_brick_none[n+1], vdy_brick_none[n+1], vdz_brick_none[n+1],
u_brick_none, v0_brick_none, v1_brick_none, v2_brick_none,
v3_brick_none, v4_brick_none, v5_brick_none);
n += 2;
}
cg_6->forward_comm(this,FORWARD_IK_NONE);
fieldforce_none_ik();
if (evflag_atom)
cg_peratom_6->forward_comm(this, FORWARD_IK_PERATOM_NONE);
}
if (evflag_atom) fieldforce_none_peratom();
}
// update qsum and qsqsum, if atom count has changed and energy needed
if ((eflag_global || eflag_atom) && atom->natoms != natoms_original) {
qsum_qsq();
natoms_original = atom->natoms;
}
// sum energy across procs and add in volume-dependent term
const double qscale = force->qqrd2e * scale;
if (eflag_global) {
double energy_all;
MPI_Allreduce(&energy_1,&energy_all,1,MPI_DOUBLE,MPI_SUM,world);
energy_1 = energy_all;
MPI_Allreduce(&energy_6,&energy_all,1,MPI_DOUBLE,MPI_SUM,world);
energy_6 = energy_all;
energy_1 *= 0.5*volume;
energy_6 *= 0.5*volume;
energy_1 -= g_ewald*qsqsum/MY_PIS +
MY_PI2*qsum*qsum / (g_ewald*g_ewald*volume);
energy_6 += - MY_PI*MY_PIS/(6*volume)*pow(g_ewald_6,3)*csumij +
1.0/12.0*pow(g_ewald_6,6)*csum;
energy_1 *= qscale;
}
// sum virial across procs
if (vflag_global) {
double virial_all[6];
MPI_Allreduce(virial_1,virial_all,6,MPI_DOUBLE,MPI_SUM,world);
for (i = 0; i < 6; i++) virial[i] = 0.5*qscale*volume*virial_all[i];
MPI_Allreduce(virial_6,virial_all,6,MPI_DOUBLE,MPI_SUM,world);
for (i = 0; i < 6; i++) virial[i] += 0.5*volume*virial_all[i];
if (function[1]+function[2]+function[3]){
double a = MY_PI*MY_PIS/(6*volume)*pow(g_ewald_6,3)*csumij;
virial[0] -= a;
virial[1] -= a;
virial[2] -= a;
}
}
if (eflag_atom) {
if (function[0]) {
double *q = atom->q;
for (i = 0; i < atom->nlocal; i++) {
eatom[i] -= qscale*g_ewald*q[i]*q[i]/MY_PIS + qscale*MY_PI2*q[i]*qsum / (g_ewald*g_ewald*volume); //coulomb self energy correction
}
}
if (function[1] + function[2] + function[3]) {
int tmp;
for (i = 0; i < atom->nlocal; i++) {
tmp = atom->type[i];
eatom[i] += - MY_PI*MY_PIS/(6*volume)*pow(g_ewald_6,3)*csumi[tmp] +
1.0/12.0*pow(g_ewald_6,6)*cii[tmp];
}
}
}
if (vflag_atom) {
if (function[1] + function[2] + function[3]) {
int tmp;
for (i = 0; i < atom->nlocal; i++) {
tmp = atom->type[i];
for (int n = 0; n < 3; n++) vatom[i][n] -= MY_PI*MY_PIS/(6*volume)*pow(g_ewald_6,3)*csumi[tmp]; //dispersion self virial correction
}
}
}
// 2d slab correction
if (slabflag) slabcorr(eflag);
if (function[0]) energy += energy_1;
if (function[1] + function[2] + function[3]) energy += energy_6;
// convert atoms back from lamda to box coords
if (triclinic) domain->lamda2x(atom->nlocal);
}
/* ----------------------------------------------------------------------
initialize coefficients needed for the dispersion density on the grids
------------------------------------------------------------------------- */
void PPPMDisp::init_coeffs() // local pair coeffs
{
int tmp;
int n = atom->ntypes;
int converged;
delete [] B;
B = NULL;
if (function[3] + function[2]) { // no mixing rule or arithmetic
if (function[2] && me == 0) {
if (screen) fprintf(screen," Optimizing splitting of Dispersion coefficients\n");
if (logfile) fprintf(logfile," Optimizing splitting of Dispersion coefficients\n");
}
// allocate data for eigenvalue decomposition
double **A=NULL;
double **Q=NULL;
if ( n > 1 ) {
// get dispersion coefficients
double **b = (double **) force->pair->extract("B",tmp);
memory->create(A,n,n,"pppm/disp:A");
memory->create(Q,n,n,"pppm/disp:Q");
// fill coefficients to matrix a
for (int i = 1; i <= n; i++)
for (int j = 1; j <= n; j++)
A[i-1][j-1] = b[i][j];
// transform q to a unity matrix
for (int i = 0; i < n; i++)
for (int j = 0; j < n; j++)
Q[i][j] = 0.0;
for (int i = 0; i < n; i++)
Q[i][i] = 1.0;
// perfrom eigenvalue decomposition with QR algorithm
converged = qr_alg(A,Q,n);
if (function[3] && !converged) {
error->all(FLERR,"Matrix factorization to split dispersion coefficients failed");
}
// determine number of used eigenvalues
// based on maximum allowed number or cutoff criterion
// sort eigenvalues according to their size with bubble sort
double t;
for (int i = 0; i < n; i++) {
for (int j = 0; j < n-1-i; j++) {
if (fabs(A[j][j]) < fabs(A[j+1][j+1])) {
t = A[j][j];
A[j][j] = A[j+1][j+1];
A[j+1][j+1] = t;
for (int k = 0; k < n; k++) {
t = Q[k][j];
Q[k][j] = Q[k][j+1];
Q[k][j+1] = t;
}
}
}
}
// check which eigenvalue is the first that is smaller
// than a specified tolerance
// check how many are maximum allowed by the user
double amax = fabs(A[0][0]);
double acrit = amax*splittol;
double bmax = 0;
double err = 0;
nsplit = 0;
for (int i = 0; i < n; i++) {
if (fabs(A[i][i]) > acrit) nsplit++;
else {
bmax = fabs(A[i][i]);
break;
}
}
err = bmax/amax;
if (err > 1.0e-4) {
char str[128];
sprintf(str,"Estimated error in splitting of dispersion coeffs is %g",err);
error->warning(FLERR, str);
}
// set B
B = new double[nsplit*n+nsplit];
for (int i = 0; i< nsplit; i++) {
B[i] = A[i][i];
for (int j = 0; j < n; j++) {
B[nsplit*(j+1) + i] = Q[j][i];
}
}
nsplit_alloc = nsplit;
if (nsplit%2 == 1) nsplit_alloc = nsplit + 1;
} else
nsplit = 1; // use geometric mixing
// check if the function should preferably be [1] or [2] or [3]
if (nsplit == 1) {
if ( B ) delete [] B;
function[3] = 0;
function[2] = 0;
function[1] = 1;
if (me == 0) {
if (screen) fprintf(screen," Using geometric mixing for reciprocal space\n");
if (logfile) fprintf(logfile," Using geometric mixing for reciprocal space\n");
}
}
if (function[2] && nsplit <= 6) {
if (me == 0) {
if (screen) fprintf(screen," Using %d instead of 7 structure factors\n",nsplit);
if (logfile) fprintf(logfile," Using %d instead of 7 structure factors\n",nsplit);
}
function[3] = 1;
function[2] = 0;
}
if (function[2] && (nsplit > 6)) {
if (me == 0) {
if (screen) fprintf(screen," Using 7 structure factors\n");
if (logfile) fprintf(logfile," Using 7 structure factors\n");
}
if ( B ) delete [] B;
}
if (function[3]) {
if (me == 0) {
if (screen) fprintf(screen," Using %d structure factors\n",nsplit);
if (logfile) fprintf(logfile," Using %d structure factors\n",nsplit);
}
if (nsplit > 9) error->warning(FLERR, "Simulations might be very slow because of large number of structure factors");
}
memory->destroy(A);
memory->destroy(Q);
}
if (function[1]) { // geometric 1/r^6
double **b = (double **) force->pair->extract("B",tmp);
B = new double[n+1];
for (int i=0; i<=n; ++i) B[i] = sqrt(fabs(b[i][i]));
}
if (function[2]) { // arithmetic 1/r^6
//cannot use epsilon, because this has not been set yet
double **epsilon = (double **) force->pair->extract("epsilon",tmp);
//cannot use sigma, because this has not been set yet
double **sigma = (double **) force->pair->extract("sigma",tmp);
if (!(epsilon&&sigma))
error->all(FLERR,"Epsilon or sigma reference not set by pair style in PPPMDisp");
double eps_i, sigma_i, sigma_n, *bi = B = new double[7*n+7];
double c[7] = {
1.0, sqrt(6.0), sqrt(15.0), sqrt(20.0), sqrt(15.0), sqrt(6.0), 1.0};
for (int i=0; i<=n; ++i) {
eps_i = sqrt(epsilon[i][i]);
sigma_i = sigma[i][i];
sigma_n = 1.0;
for (int j=0; j<7; ++j) {
*(bi++) = sigma_n*eps_i*c[j]*0.25;
sigma_n *= sigma_i;
}
}
}
}
/* ----------------------------------------------------------------------
Eigenvalue decomposition of a real, symmetric matrix with the QR
method (includes transpformation to Tridiagonal Matrix + Wilkinson
shift)
------------------------------------------------------------------------- */
int PPPMDisp::qr_alg(double **A, double **Q, int n)
{
int converged = 0;
double an1, an, bn1, d, mue;
// allocate some memory for the required operations
double **A0,**Qi,**C,**D,**E;
// make a copy of A for convergence check
memory->create(A0,n,n,"pppm/disp:A0");
for (int i = 0; i < n; i++)
for (int j = 0; j < n; j++)
A0[i][j] = A[i][j];
// allocate an auxiliary matrix Qi
memory->create(Qi,n,n,"pppm/disp:Qi");
// alllocate an auxillary matrices for the matrix multiplication
memory->create(C,n,n,"pppm/disp:C");
memory->create(D,n,n,"pppm/disp:D");
memory->create(E,n,n,"pppm/disp:E");
// transform Matrix A to Tridiagonal form
hessenberg(A,Q,n);
// start loop for the matrix factorization
int count = 0;
int countmax = 100000;
while (1) {
// make a Wilkinson shift
an1 = A[n-2][n-2];
an = A[n-1][n-1];
bn1 = A[n-2][n-1];
d = (an1-an)/2;
mue = an + d - copysign(1.,d)*sqrt(d*d + bn1*bn1);
for (int i = 0; i < n; i++)
A[i][i] -= mue;
// perform a QR factorization for a tridiagonal matrix A
qr_tri(Qi,A,n);
// update the matrices
mmult(A,Qi,C,n);
mmult(Q,Qi,C,n);
// backward Wilkinson shift
for (int i = 0; i < n; i++)
A[i][i] += mue;
// check the convergence
converged = check_convergence(A,Q,A0,C,D,E,n);
if (converged) break;
count = count + 1;
if (count == countmax) break;
}
// free allocated memory
memory->destroy(Qi);
memory->destroy(A0);
memory->destroy(C);
memory->destroy(D);
memory->destroy(E);
return converged;
}
/* ----------------------------------------------------------------------
Transform a Matrix to Hessenberg form (for symmetric Matrices, the
result will be a tridiagonal matrix)
------------------------------------------------------------------------- */
void PPPMDisp::hessenberg(double **A, double **Q, int n)
{
double r,a,b,c,s,x1,x2;
for (int i = 0; i < n-1; i++) {
for (int j = i+2; j < n; j++) {
// compute coeffs for the rotation matrix
a = A[i+1][i];
b = A[j][i];
r = sqrt(a*a + b*b);
c = a/r;
s = b/r;
// update the entries of A with multiplication from the left
for (int k = 0; k < n; k++) {
x1 = A[i+1][k];
x2 = A[j][k];
A[i+1][k] = c*x1 + s*x2;
A[j][k] = -s*x1 + c*x2;
}
// update the entries of A and Q with a multiplication from the right
for (int k = 0; k < n; k++) {
x1 = A[k][i+1];
x2 = A[k][j];
A[k][i+1] = c*x1 + s*x2;
A[k][j] = -s*x1 + c*x2;
x1 = Q[k][i+1];
x2 = Q[k][j];
Q[k][i+1] = c*x1 + s*x2;
Q[k][j] = -s*x1 + c*x2;
}
}
}
}
/* ----------------------------------------------------------------------
QR factorization for a tridiagonal matrix; Result of the factorization
is stored in A and Qi
------------------------------------------------------------------------- */
void PPPMDisp::qr_tri(double** Qi,double** A,int n)
{
double r,a,b,c,s,x1,x2;
int j,k,k0,kmax;
// make Qi a unity matrix
for (int i = 0; i < n; i++)
for (int j = 0; j < n; j++)
Qi[i][j] = 0.0;
for (int i = 0; i < n; i++)
Qi[i][i] = 1.0;
// loop over main diagonal and first of diagonal of A
for (int i = 0; i < n-1; i++) {
j = i+1;
// coefficients of the rotation matrix
a = A[i][i];
b = A[j][i];
r = sqrt(a*a + b*b);
c = a/r;
s = b/r;
// update the entries of A and Q
k0 = (i-1>0)?i-1:0; //min(i-1,0);
kmax = (i+3<n)?i+3:n; //min(i+3,n);
for (k = k0; k < kmax; k++) {
x1 = A[i][k];
x2 = A[j][k];
A[i][k] = c*x1 + s*x2;
A[j][k] = -s*x1 + c*x2;
}
for (k = 0; k < n; k++) {
x1 = Qi[k][i];
x2 = Qi[k][j];
Qi[k][i] = c*x1 + s*x2;
Qi[k][j] = -s*x1 + c*x2;
}
}
}
/* ----------------------------------------------------------------------
Multiply two matrices A and B, store the result in A; C provides
some memory to store intermediate results
------------------------------------------------------------------------- */
void PPPMDisp::mmult(double** A, double** B, double** C, int n)
{
for (int i = 0; i < n; i++)
for (int j = 0; j < n; j++)
C[i][j] = 0.0;
// perform matrix multiplication
for (int i = 0; i < n; i++)
for (int j = 0; j < n; j++)
for (int k = 0; k < n; k++)
C[i][j] += A[i][k] * B[k][j];
// copy the result back to matrix A
for (int i = 0; i < n; i++)
for (int j = 0; j < n; j++)
A[i][j] = C[i][j];
}
/* ----------------------------------------------------------------------
Check if the factorization has converged by comparing all elements of the
original matrix and the new matrix
------------------------------------------------------------------------- */
int PPPMDisp::check_convergence(double** A,double** Q,double** A0,
double** C,double** D,double** E,int n)
{
double eps = 1.0e-8;
int converged = 1;
double epsmax = -1;
double Bmax = 0.0;
double diff;
// get the largest eigenvalue of the original matrix
for (int i = 0; i < n; i++)
for (int j = 0; j < n; j++)
Bmax = (Bmax>A0[i][j])?Bmax:A0[i][j]; //max(Bmax,A0[i][j]);
double epsabs = eps*Bmax;
// reconstruct the original matrix
// store the diagonal elements in D
for (int i = 0; i < n; i++)
for (int j = 0; j < n; j++)
D[i][j] = 0.0;
for (int i = 0; i < n; i++)
D[i][i] = A[i][i];
// store matrix Q in E
for (int i = 0; i < n; i++)
for (int j = 0; j < n; j++)
E[i][j] = Q[i][j];
// E = Q*A
mmult(E,D,C,n);
// store transpose of Q in D
for (int i = 0; i < n; i++)
for (int j = 0; j < n; j++)
D[i][j] = Q[j][i];
// E = Q*A*Q.t
mmult(E,D,C,n);
//compare the original matrix and the final matrix
for (int i = 0; i < n; i++) {
for (int j = 0; j < n; j++) {
diff = A0[i][j] - E[i][j];
epsmax = (epsmax>fabs(diff))?epsmax:fabs(diff);//max(epsmax,fabs(diff));
}
}
if (epsmax > epsabs) converged = 0;
return converged;
}
/* ----------------------------------------------------------------------
allocate memory that depends on # of K-vectors and order
------------------------------------------------------------------------- */
void PPPMDisp::allocate()
{
int (*procneigh)[2] = comm->procneigh;
if (function[0]) {
memory->create(work1,2*nfft_both,"pppm/disp:work1");
memory->create(work2,2*nfft_both,"pppm/disp:work2");
memory->create1d_offset(fkx,nxlo_fft,nxhi_fft,"pppm/disp:fkx");
memory->create1d_offset(fky,nylo_fft,nyhi_fft,"pppm/disp:fky");
memory->create1d_offset(fkz,nzlo_fft,nzhi_fft,"pppm/disp:fkz");
memory->create1d_offset(fkx2,nxlo_fft,nxhi_fft,"pppm/disp:fkx2");
memory->create1d_offset(fky2,nylo_fft,nyhi_fft,"pppm/disp:fky2");
memory->create1d_offset(fkz2,nzlo_fft,nzhi_fft,"pppm/disp:fkz2");
memory->create(gf_b,order,"pppm/disp:gf_b");
memory->create2d_offset(rho1d,3,-order/2,order/2,"pppm/disp:rho1d");
memory->create2d_offset(rho_coeff,order,(1-order)/2,order/2,"pppm/disp:rho_coeff");
memory->create2d_offset(drho1d,3,-order/2,order/2,"pppm/disp:rho1d");
memory->create2d_offset(drho_coeff,order,(1-order)/2,order/2,"pppm/disp:drho_coeff");
memory->create(greensfn,nfft_both,"pppm/disp:greensfn");
memory->create(vg,nfft_both,6,"pppm/disp:vg");
memory->create(vg2,nfft_both,3,"pppm/disp:vg2");
memory->create3d_offset(density_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
nxlo_out,nxhi_out,"pppm/disp:density_brick");
if ( differentiation_flag == 1) {
memory->create3d_offset(u_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
nxlo_out,nxhi_out,"pppm/disp:u_brick");
memory->create(sf_precoeff1,nfft_both,"pppm/disp:sf_precoeff1");
memory->create(sf_precoeff2,nfft_both,"pppm/disp:sf_precoeff2");
memory->create(sf_precoeff3,nfft_both,"pppm/disp:sf_precoeff3");
memory->create(sf_precoeff4,nfft_both,"pppm/disp:sf_precoeff4");
memory->create(sf_precoeff5,nfft_both,"pppm/disp:sf_precoeff5");
memory->create(sf_precoeff6,nfft_both,"pppm/disp:sf_precoeff6");
} else {
memory->create3d_offset(vdx_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
nxlo_out,nxhi_out,"pppm/disp:vdx_brick");
memory->create3d_offset(vdy_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
nxlo_out,nxhi_out,"pppm/disp:vdy_brick");
memory->create3d_offset(vdz_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
nxlo_out,nxhi_out,"pppm/disp:vdz_brick");
}
memory->create(density_fft,nfft_both,"pppm/disp:density_fft");
int tmp;
fft1 = new FFT3d(lmp,world,nx_pppm,ny_pppm,nz_pppm,
nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft,
nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft,
0,0,&tmp,collective_flag);
fft2 = new FFT3d(lmp,world,nx_pppm,ny_pppm,nz_pppm,
nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft,
nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
0,0,&tmp,collective_flag);
remap = new Remap(lmp,world,
nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft,
1,0,0,FFT_PRECISION,collective_flag);
// create ghost grid object for rho and electric field communication
if (differentiation_flag == 1)
cg = new GridComm(lmp,world,1,1,
nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out,
procneigh[0][0],procneigh[0][1],procneigh[1][0],
procneigh[1][1],procneigh[2][0],procneigh[2][1]);
else
cg = new GridComm(lmp,world,3,1,
nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out,
procneigh[0][0],procneigh[0][1],procneigh[1][0],
procneigh[1][1],procneigh[2][0],procneigh[2][1]);
}
if (function[1]) {
memory->create(work1_6,2*nfft_both_6,"pppm/disp:work1_6");
memory->create(work2_6,2*nfft_both_6,"pppm/disp:work2_6");
memory->create1d_offset(fkx_6,nxlo_fft_6,nxhi_fft_6,"pppm/disp:fkx_6");
memory->create1d_offset(fky_6,nylo_fft_6,nyhi_fft_6,"pppm/disp:fky_6");
memory->create1d_offset(fkz_6,nzlo_fft_6,nzhi_fft_6,"pppm/disp:fkz_6");
memory->create1d_offset(fkx2_6,nxlo_fft_6,nxhi_fft_6,"pppm/disp:fkx2_6");
memory->create1d_offset(fky2_6,nylo_fft_6,nyhi_fft_6,"pppm/disp:fky2_6");
memory->create1d_offset(fkz2_6,nzlo_fft_6,nzhi_fft_6,"pppm/disp:fkz2_6");
memory->create(gf_b_6,order_6,"pppm/disp:gf_b_6");
memory->create2d_offset(rho1d_6,3,-order_6/2,order_6/2,"pppm/disp:rho1d_6");
memory->create2d_offset(rho_coeff_6,order_6,(1-order_6)/2,order_6/2,"pppm/disp:rho_coeff_6");
memory->create2d_offset(drho1d_6,3,-order_6/2,order_6/2,"pppm/disp:drho1d_6");
memory->create2d_offset(drho_coeff_6,order_6,(1-order_6)/2,order_6/2,"pppm/disp:drho_coeff_6");
memory->create(greensfn_6,nfft_both_6,"pppm/disp:greensfn_6");
memory->create(vg_6,nfft_both_6,6,"pppm/disp:vg_6");
memory->create(vg2_6,nfft_both_6,3,"pppm/disp:vg2_6");
memory->create3d_offset(density_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
nxlo_out_6,nxhi_out_6,"pppm/disp:density_brick_g");
if ( differentiation_flag == 1) {
memory->create3d_offset(u_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_g");
memory->create(sf_precoeff1_6,nfft_both_6,"pppm/disp:sf_precoeff1_6");
memory->create(sf_precoeff2_6,nfft_both_6,"pppm/disp:sf_precoeff2_6");
memory->create(sf_precoeff3_6,nfft_both_6,"pppm/disp:sf_precoeff3_6");
memory->create(sf_precoeff4_6,nfft_both_6,"pppm/disp:sf_precoeff4_6");
memory->create(sf_precoeff5_6,nfft_both_6,"pppm/disp:sf_precoeff5_6");
memory->create(sf_precoeff6_6,nfft_both_6,"pppm/disp:sf_precoeff6_6");
} else {
memory->create3d_offset(vdx_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
nxlo_out_6,nxhi_out_6,"pppm/disp:vdx_brick_g");
memory->create3d_offset(vdy_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
nxlo_out_6,nxhi_out_6,"pppm/disp:vdy_brick_g");
memory->create3d_offset(vdz_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
nxlo_out_6,nxhi_out_6,"pppm/disp:vdz_brick_g");
}
memory->create(density_fft_g,nfft_both_6,"pppm/disp:density_fft_g");
int tmp;
fft1_6 = new FFT3d(lmp,world,nx_pppm_6,ny_pppm_6,nz_pppm_6,
nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6,
nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6,
0,0,&tmp,collective_flag);
fft2_6 = new FFT3d(lmp,world,nx_pppm_6,ny_pppm_6,nz_pppm_6,
nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6,
nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
0,0,&tmp,collective_flag);
remap_6 = new Remap(lmp,world,
nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6,
1,0,0,FFT_PRECISION,collective_flag);
// create ghost grid object for rho and electric field communication
if (differentiation_flag == 1)
cg_6 = new GridComm(lmp,world,1,1,
nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6,
procneigh[0][0],procneigh[0][1],procneigh[1][0],
procneigh[1][1],procneigh[2][0],procneigh[2][1]);
else
cg_6 = new GridComm(lmp,world,3,1,
nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6,
procneigh[0][0],procneigh[0][1],procneigh[1][0],
procneigh[1][1],procneigh[2][0],procneigh[2][1]);
}
if (function[2]) {
memory->create(work1_6,2*nfft_both_6,"pppm/disp:work1_6");
memory->create(work2_6,2*nfft_both_6,"pppm/disp:work2_6");
memory->create1d_offset(fkx_6,nxlo_fft_6,nxhi_fft_6,"pppm/disp:fkx_6");
memory->create1d_offset(fky_6,nylo_fft_6,nyhi_fft_6,"pppm/disp:fky_6");
memory->create1d_offset(fkz_6,nzlo_fft_6,nzhi_fft_6,"pppm/disp:fkz_6");
memory->create1d_offset(fkx2_6,nxlo_fft_6,nxhi_fft_6,"pppm/disp:fkx2_6");
memory->create1d_offset(fky2_6,nylo_fft_6,nyhi_fft_6,"pppm/disp:fky2_6");
memory->create1d_offset(fkz2_6,nzlo_fft_6,nzhi_fft_6,"pppm/disp:fkz2_6");
memory->create(gf_b_6,order_6,"pppm/disp:gf_b_6");
memory->create2d_offset(rho1d_6,3,-order_6/2,order_6/2,"pppm/disp:rho1d_6");
memory->create2d_offset(rho_coeff_6,order_6,(1-order_6)/2,order_6/2,"pppm/disp:rho_coeff_6");
memory->create2d_offset(drho1d_6,3,-order_6/2,order_6/2,"pppm/disp:drho1d_6");
memory->create2d_offset(drho_coeff_6,order_6,(1-order_6)/2,order_6/2,"pppm/disp:drho_coeff_6");
memory->create(greensfn_6,nfft_both_6,"pppm/disp:greensfn_6");
memory->create(vg_6,nfft_both_6,6,"pppm/disp:vg_6");
memory->create(vg2_6,nfft_both_6,3,"pppm/disp:vg2_6");
memory->create3d_offset(density_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
nxlo_out_6,nxhi_out_6,"pppm/disp:density_brick_a0");
memory->create3d_offset(density_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
nxlo_out_6,nxhi_out_6,"pppm/disp:density_brick_a1");
memory->create3d_offset(density_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
nxlo_out_6,nxhi_out_6,"pppm/disp:density_brick_a2");
memory->create3d_offset(density_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
nxlo_out_6,nxhi_out_6,"pppm/disp:density_brick_a3");
memory->create3d_offset(density_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
nxlo_out_6,nxhi_out_6,"pppm/disp:density_brick_a4");
memory->create3d_offset(density_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
nxlo_out_6,nxhi_out_6,"pppm/disp:density_brick_a5");
memory->create3d_offset(density_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
nxlo_out_6,nxhi_out_6,"pppm/disp:density_brick_a6");
memory->create(density_fft_a0,nfft_both_6,"pppm/disp:density_fft_a0");
memory->create(density_fft_a1,nfft_both_6,"pppm/disp:density_fft_a1");
memory->create(density_fft_a2,nfft_both_6,"pppm/disp:density_fft_a2");
memory->create(density_fft_a3,nfft_both_6,"pppm/disp:density_fft_a3");
memory->create(density_fft_a4,nfft_both_6,"pppm/disp:density_fft_a4");
memory->create(density_fft_a5,nfft_both_6,"pppm/disp:density_fft_a5");
memory->create(density_fft_a6,nfft_both_6,"pppm/disp:density_fft_a6");
if ( differentiation_flag == 1 ) {
memory->create3d_offset(u_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a0");
memory->create3d_offset(u_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a1");
memory->create3d_offset(u_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a2");
memory->create3d_offset(u_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a3");
memory->create3d_offset(u_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a4");
memory->create3d_offset(u_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a5");
memory->create3d_offset(u_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a6");
memory->create(sf_precoeff1_6,nfft_both_6,"pppm/disp:sf_precoeff1_6");
memory->create(sf_precoeff2_6,nfft_both_6,"pppm/disp:sf_precoeff2_6");
memory->create(sf_precoeff3_6,nfft_both_6,"pppm/disp:sf_precoeff3_6");
memory->create(sf_precoeff4_6,nfft_both_6,"pppm/disp:sf_precoeff4_6");
memory->create(sf_precoeff5_6,nfft_both_6,"pppm/disp:sf_precoeff5_6");
memory->create(sf_precoeff6_6,nfft_both_6,"pppm/disp:sf_precoeff6_6");
} else {
memory->create3d_offset(vdx_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
nxlo_out_6,nxhi_out_6,"pppm/disp:vdx_brick_a0");
memory->create3d_offset(vdy_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
nxlo_out_6,nxhi_out_6,"pppm/disp:vdy_brick_a0");
memory->create3d_offset(vdz_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
nxlo_out_6,nxhi_out_6,"pppm/disp:vdz_brick_a0");
memory->create3d_offset(vdx_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
nxlo_out_6,nxhi_out_6,"pppm/disp:vdx_brick_a1");
memory->create3d_offset(vdy_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
nxlo_out_6,nxhi_out_6,"pppm/disp:vdy_brick_a1");
memory->create3d_offset(vdz_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
nxlo_out_6,nxhi_out_6,"pppm/disp:vdz_brick_a1");
memory->create3d_offset(vdx_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
nxlo_out_6,nxhi_out_6,"pppm/disp:vdx_brick_a2");
memory->create3d_offset(vdy_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
nxlo_out_6,nxhi_out_6,"pppm/disp:vdy_brick_a2");
memory->create3d_offset(vdz_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
nxlo_out_6,nxhi_out_6,"pppm/disp:vdz_brick_a2");
memory->create3d_offset(vdx_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
nxlo_out_6,nxhi_out_6,"pppm/disp:vdx_brick_a3");
memory->create3d_offset(vdy_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
nxlo_out_6,nxhi_out_6,"pppm/disp:vdy_brick_a3");
memory->create3d_offset(vdz_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
nxlo_out_6,nxhi_out_6,"pppm/disp:vdz_brick_a3");
memory->create3d_offset(vdx_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
nxlo_out_6,nxhi_out_6,"pppm/disp:vdx_brick_a4");
memory->create3d_offset(vdy_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
nxlo_out_6,nxhi_out_6,"pppm/disp:vdy_brick_a4");
memory->create3d_offset(vdz_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
nxlo_out_6,nxhi_out_6,"pppm/disp:vdz_brick_a4");
memory->create3d_offset(vdx_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
nxlo_out_6,nxhi_out_6,"pppm/disp:vdx_brick_a5");
memory->create3d_offset(vdy_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
nxlo_out_6,nxhi_out_6,"pppm/disp:vdy_brick_a5");
memory->create3d_offset(vdz_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
nxlo_out_6,nxhi_out_6,"pppm/disp:vdz_brick_a5");
memory->create3d_offset(vdx_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
nxlo_out_6,nxhi_out_6,"pppm/disp:vdx_brick_a6");
memory->create3d_offset(vdy_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
nxlo_out_6,nxhi_out_6,"pppm/disp:vdy_brick_a6");
memory->create3d_offset(vdz_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
nxlo_out_6,nxhi_out_6,"pppm/disp:vdz_brick_a6");
}
int tmp;
fft1_6 = new FFT3d(lmp,world,nx_pppm_6,ny_pppm_6,nz_pppm_6,
nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6,
nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6,
0,0,&tmp,collective_flag);
fft2_6 = new FFT3d(lmp,world,nx_pppm_6,ny_pppm_6,nz_pppm_6,
nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6,
nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
0,0,&tmp,collective_flag);
remap_6 = new Remap(lmp,world,
nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6,
1,0,0,FFT_PRECISION,collective_flag);
// create ghost grid object for rho and electric field communication
if (differentiation_flag == 1)
cg_6 = new GridComm(lmp,world,7,7,
nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6,
procneigh[0][0],procneigh[0][1],procneigh[1][0],
procneigh[1][1],procneigh[2][0],procneigh[2][1]);
else
cg_6 = new GridComm(lmp,world,21,7,
nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6,
procneigh[0][0],procneigh[0][1],procneigh[1][0],
procneigh[1][1],procneigh[2][0],procneigh[2][1]);
}
if (function[3]) {
memory->create(work1_6,2*nfft_both_6,"pppm/disp:work1_6");
memory->create(work2_6,2*nfft_both_6,"pppm/disp:work2_6");
memory->create1d_offset(fkx_6,nxlo_fft_6,nxhi_fft_6,"pppm/disp:fkx_6");
memory->create1d_offset(fky_6,nylo_fft_6,nyhi_fft_6,"pppm/disp:fky_6");
memory->create1d_offset(fkz_6,nzlo_fft_6,nzhi_fft_6,"pppm/disp:fkz_6");
memory->create1d_offset(fkx2_6,nxlo_fft_6,nxhi_fft_6,"pppm/disp:fkx2_6");
memory->create1d_offset(fky2_6,nylo_fft_6,nyhi_fft_6,"pppm/disp:fky2_6");
memory->create1d_offset(fkz2_6,nzlo_fft_6,nzhi_fft_6,"pppm/disp:fkz2_6");
memory->create(gf_b_6,order_6,"pppm/disp:gf_b_6");
memory->create2d_offset(rho1d_6,3,-order_6/2,order_6/2,"pppm/disp:rho1d_6");
memory->create2d_offset(rho_coeff_6,order_6,(1-order_6)/2,order_6/2,"pppm/disp:rho_coeff_6");
memory->create2d_offset(drho1d_6,3,-order_6/2,order_6/2,"pppm/disp:drho1d_6");
memory->create2d_offset(drho_coeff_6,order_6,(1-order_6)/2,order_6/2,"pppm/disp:drho_coeff_6");
memory->create(greensfn_6,nfft_both_6,"pppm/disp:greensfn_6");
memory->create(vg_6,nfft_both_6,6,"pppm/disp:vg_6");
memory->create(vg2_6,nfft_both_6,3,"pppm/disp:vg2_6");
memory->create4d_offset(density_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
nxlo_out_6,nxhi_out_6,"pppm/disp:density_brick_none");
if ( differentiation_flag == 1) {
memory->create4d_offset(u_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_none");
memory->create(sf_precoeff1_6,nfft_both_6,"pppm/disp:sf_precoeff1_6");
memory->create(sf_precoeff2_6,nfft_both_6,"pppm/disp:sf_precoeff2_6");
memory->create(sf_precoeff3_6,nfft_both_6,"pppm/disp:sf_precoeff3_6");
memory->create(sf_precoeff4_6,nfft_both_6,"pppm/disp:sf_precoeff4_6");
memory->create(sf_precoeff5_6,nfft_both_6,"pppm/disp:sf_precoeff5_6");
memory->create(sf_precoeff6_6,nfft_both_6,"pppm/disp:sf_precoeff6_6");
} else {
memory->create4d_offset(vdx_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
nxlo_out_6,nxhi_out_6,"pppm/disp:vdx_brick_none");
memory->create4d_offset(vdy_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
nxlo_out_6,nxhi_out_6,"pppm/disp:vdy_brick_none");
memory->create4d_offset(vdz_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
nxlo_out_6,nxhi_out_6,"pppm/disp:vdz_brick_none");
}
memory->create(density_fft_none,nsplit_alloc,nfft_both_6,"pppm/disp:density_fft_none");
int tmp;
fft1_6 = new FFT3d(lmp,world,nx_pppm_6,ny_pppm_6,nz_pppm_6,
nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6,
nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6,
0,0,&tmp,collective_flag);
fft2_6 = new FFT3d(lmp,world,nx_pppm_6,ny_pppm_6,nz_pppm_6,
nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6,
nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
0,0,&tmp,collective_flag);
remap_6 = new Remap(lmp,world,
nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6,
1,0,0,FFT_PRECISION,collective_flag);
// create ghost grid object for rho and electric field communication
if (differentiation_flag == 1)
cg_6 = new GridComm(lmp,world,nsplit_alloc,nsplit_alloc,
nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6,
procneigh[0][0],procneigh[0][1],procneigh[1][0],
procneigh[1][1],procneigh[2][0],procneigh[2][1]);
else
cg_6 = new GridComm(lmp,world,3*nsplit_alloc,nsplit_alloc,
nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6,
procneigh[0][0],procneigh[0][1],procneigh[1][0],
procneigh[1][1],procneigh[2][0],procneigh[2][1]);
}
}
/* ----------------------------------------------------------------------
allocate memory that depends on # of K-vectors and order
for per atom calculations
------------------------------------------------------------------------- */
void PPPMDisp::allocate_peratom()
{
int (*procneigh)[2] = comm->procneigh;
if (function[0]) {
if (differentiation_flag != 1)
memory->create3d_offset(u_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
nxlo_out,nxhi_out,"pppm/disp:u_brick");
memory->create3d_offset(v0_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
nxlo_out,nxhi_out,"pppm/disp:v0_brick");
memory->create3d_offset(v1_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
nxlo_out,nxhi_out,"pppm/disp:v1_brick");
memory->create3d_offset(v2_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
nxlo_out,nxhi_out,"pppm/disp:v2_brick");
memory->create3d_offset(v3_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
nxlo_out,nxhi_out,"pppm/disp:v3_brick");
memory->create3d_offset(v4_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
nxlo_out,nxhi_out,"pppm/disp:v4_brick");
memory->create3d_offset(v5_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
nxlo_out,nxhi_out,"pppm/disp:v5_brick");
// create ghost grid object for rho and electric field communication
if (differentiation_flag == 1)
cg_peratom =
new GridComm(lmp,world,6,1,
nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out,
procneigh[0][0],procneigh[0][1],procneigh[1][0],
procneigh[1][1],procneigh[2][0],procneigh[2][1]);
else
cg_peratom =
new GridComm(lmp,world,7,1,
nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out,
procneigh[0][0],procneigh[0][1],procneigh[1][0],
procneigh[1][1],procneigh[2][0],procneigh[2][1]);
}
if (function[1]) {
if ( differentiation_flag != 1 )
memory->create3d_offset(u_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_g");
memory->create3d_offset(v0_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
nxlo_out_6,nxhi_out_6,"pppm/disp:v0_brick_g");
memory->create3d_offset(v1_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
nxlo_out_6,nxhi_out_6,"pppm/disp:v1_brick_g");
memory->create3d_offset(v2_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
nxlo_out_6,nxhi_out_6,"pppm/disp:v2_brick_g");
memory->create3d_offset(v3_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
nxlo_out_6,nxhi_out_6,"pppm/disp:v3_brick_g");
memory->create3d_offset(v4_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
nxlo_out_6,nxhi_out_6,"pppm/disp:v4_brick_g");
memory->create3d_offset(v5_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
nxlo_out_6,nxhi_out_6,"pppm/disp:v5_brick_g");
// create ghost grid object for rho and electric field communication
if (differentiation_flag == 1)
cg_peratom_6 =
new GridComm(lmp,world,6,1,
nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6,
procneigh[0][0],procneigh[0][1],procneigh[1][0],
procneigh[1][1],procneigh[2][0],procneigh[2][1]);
else
cg_peratom_6 =
new GridComm(lmp,world,7,1,
nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6,
procneigh[0][0],procneigh[0][1],procneigh[1][0],
procneigh[1][1],procneigh[2][0],procneigh[2][1]);
}
if (function[2]) {
if ( differentiation_flag != 1 ) {
memory->create3d_offset(u_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a0");
memory->create3d_offset(u_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a1");
memory->create3d_offset(u_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a2");
memory->create3d_offset(u_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a3");
memory->create3d_offset(u_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a4");
memory->create3d_offset(u_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a5");
memory->create3d_offset(u_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a6");
}
memory->create3d_offset(v0_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
nxlo_out_6,nxhi_out_6,"pppm/disp:v0_brick_a0");
memory->create3d_offset(v1_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
nxlo_out_6,nxhi_out_6,"pppm/disp:v1_brick_a0");
memory->create3d_offset(v2_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
nxlo_out_6,nxhi_out_6,"pppm/disp:v2_brick_a0");
memory->create3d_offset(v3_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
nxlo_out_6,nxhi_out_6,"pppm/disp:v3_brick_a0");
memory->create3d_offset(v4_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
nxlo_out_6,nxhi_out_6,"pppm/disp:v4_brick_a0");
memory->create3d_offset(v5_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
nxlo_out_6,nxhi_out_6,"pppm/disp:v5_brick_a0");
memory->create3d_offset(v0_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
nxlo_out_6,nxhi_out_6,"pppm/disp:v0_brick_a1");
memory->create3d_offset(v1_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
nxlo_out_6,nxhi_out_6,"pppm/disp:v1_brick_a1");
memory->create3d_offset(v2_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
nxlo_out_6,nxhi_out_6,"pppm/disp:v2_brick_a1");
memory->create3d_offset(v3_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
nxlo_out_6,nxhi_out_6,"pppm/disp:v3_brick_a1");
memory->create3d_offset(v4_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
nxlo_out_6,nxhi_out_6,"pppm/disp:v4_brick_a1");
memory->create3d_offset(v5_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
nxlo_out_6,nxhi_out_6,"pppm/disp:v5_brick_a1");
memory->create3d_offset(v0_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
nxlo_out_6,nxhi_out_6,"pppm/disp:v0_brick_a2");
memory->create3d_offset(v1_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
nxlo_out_6,nxhi_out_6,"pppm/disp:v1_brick_a2");
memory->create3d_offset(v2_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
nxlo_out_6,nxhi_out_6,"pppm/disp:v2_brick_a2");
memory->create3d_offset(v3_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
nxlo_out_6,nxhi_out_6,"pppm/disp:v3_brick_a2");
memory->create3d_offset(v4_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
nxlo_out_6,nxhi_out_6,"pppm/disp:v4_brick_a2");
memory->create3d_offset(v5_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
nxlo_out_6,nxhi_out_6,"pppm/disp:v5_brick_a2");
memory->create3d_offset(v0_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
nxlo_out_6,nxhi_out_6,"pppm/disp:v0_brick_a3");
memory->create3d_offset(v1_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
nxlo_out_6,nxhi_out_6,"pppm/disp:v1_brick_a3");
memory->create3d_offset(v2_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
nxlo_out_6,nxhi_out_6,"pppm/disp:v2_brick_a3");
memory->create3d_offset(v3_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
nxlo_out_6,nxhi_out_6,"pppm/disp:v3_brick_a3");
memory->create3d_offset(v4_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
nxlo_out_6,nxhi_out_6,"pppm/disp:v4_brick_a3");
memory->create3d_offset(v5_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
nxlo_out_6,nxhi_out_6,"pppm/disp:v5_brick_a3");
memory->create3d_offset(v0_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
nxlo_out_6,nxhi_out_6,"pppm/disp:v0_brick_a4");
memory->create3d_offset(v1_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
nxlo_out_6,nxhi_out_6,"pppm/disp:v1_brick_a4");
memory->create3d_offset(v2_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
nxlo_out_6,nxhi_out_6,"pppm/disp:v2_brick_a4");
memory->create3d_offset(v3_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
nxlo_out_6,nxhi_out_6,"pppm/disp:v3_brick_a4");
memory->create3d_offset(v4_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
nxlo_out_6,nxhi_out_6,"pppm/disp:v4_brick_a4");
memory->create3d_offset(v5_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
nxlo_out_6,nxhi_out_6,"pppm/disp:v5_brick_a4");
memory->create3d_offset(v0_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
nxlo_out_6,nxhi_out_6,"pppm/disp:v0_brick_a5");
memory->create3d_offset(v1_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
nxlo_out_6,nxhi_out_6,"pppm/disp:v1_brick_a5");
memory->create3d_offset(v2_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
nxlo_out_6,nxhi_out_6,"pppm/disp:v2_brick_a5");
memory->create3d_offset(v3_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
nxlo_out_6,nxhi_out_6,"pppm/disp:v3_brick_a5");
memory->create3d_offset(v4_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
nxlo_out_6,nxhi_out_6,"pppm/disp:v4_brick_a5");
memory->create3d_offset(v5_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
nxlo_out_6,nxhi_out_6,"pppm/disp:v5_brick_a5");
memory->create3d_offset(v0_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
nxlo_out_6,nxhi_out_6,"pppm/disp:v0_brick_a6");
memory->create3d_offset(v1_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
nxlo_out_6,nxhi_out_6,"pppm/disp:v1_brick_a6");
memory->create3d_offset(v2_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
nxlo_out_6,nxhi_out_6,"pppm/disp:v2_brick_a6");
memory->create3d_offset(v3_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
nxlo_out_6,nxhi_out_6,"pppm/disp:v3_brick_a6");
memory->create3d_offset(v4_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
nxlo_out_6,nxhi_out_6,"pppm/disp:v4_brick_a6");
memory->create3d_offset(v5_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
nxlo_out_6,nxhi_out_6,"pppm/disp:v5_brick_a6");
// create ghost grid object for rho and electric field communication
if (differentiation_flag == 1)
cg_peratom_6 =
new GridComm(lmp,world,42,1,
nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6,
procneigh[0][0],procneigh[0][1],procneigh[1][0],
procneigh[1][1],procneigh[2][0],procneigh[2][1]);
else
cg_peratom_6 =
new GridComm(lmp,world,49,1,
nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6,
procneigh[0][0],procneigh[0][1],procneigh[1][0],
procneigh[1][1],procneigh[2][0],procneigh[2][1]);
}
if (function[3]) {
if ( differentiation_flag != 1 )
memory->create4d_offset(u_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_none");
memory->create4d_offset(v0_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
nxlo_out_6,nxhi_out_6,"pppm/disp:v0_brick_none");
memory->create4d_offset(v1_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
nxlo_out_6,nxhi_out_6,"pppm/disp:v1_brick_none");
memory->create4d_offset(v2_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
nxlo_out_6,nxhi_out_6,"pppm/disp:v2_brick_none");
memory->create4d_offset(v3_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
nxlo_out_6,nxhi_out_6,"pppm/disp:v3_brick_none");
memory->create4d_offset(v4_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
nxlo_out_6,nxhi_out_6,"pppm/disp:v4_brick_none");
memory->create4d_offset(v5_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
nxlo_out_6,nxhi_out_6,"pppm/disp:v5_brick_none");
// create ghost grid object for rho and electric field communication
if (differentiation_flag == 1)
cg_peratom_6 =
new GridComm(lmp,world,6*nsplit_alloc,1,
nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6,
procneigh[0][0],procneigh[0][1],procneigh[1][0],
procneigh[1][1],procneigh[2][0],procneigh[2][1]);
else
cg_peratom_6 =
new GridComm(lmp,world,7*nsplit_alloc,1,
nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6,
procneigh[0][0],procneigh[0][1],procneigh[1][0],
procneigh[1][1],procneigh[2][0],procneigh[2][1]);
}
}
/* ----------------------------------------------------------------------
deallocate memory that depends on # of K-vectors and order
------------------------------------------------------------------------- */
void PPPMDisp::deallocate()
{
memory->destroy3d_offset(density_brick,nzlo_out,nylo_out,nxlo_out);
memory->destroy3d_offset(vdx_brick,nzlo_out,nylo_out,nxlo_out);
memory->destroy3d_offset(vdy_brick,nzlo_out,nylo_out,nxlo_out);
memory->destroy3d_offset(vdz_brick,nzlo_out,nylo_out,nxlo_out);
memory->destroy(density_fft);
density_brick = vdx_brick = vdy_brick = vdz_brick = NULL;
density_fft = NULL;
memory->destroy3d_offset(density_brick_g,nzlo_out_6,nylo_out_6,nxlo_out_6);
memory->destroy3d_offset(vdx_brick_g,nzlo_out_6,nylo_out_6,nxlo_out_6);
memory->destroy3d_offset(vdy_brick_g,nzlo_out_6,nylo_out_6,nxlo_out_6);
memory->destroy3d_offset(vdz_brick_g,nzlo_out_6,nylo_out_6,nxlo_out_6);
memory->destroy(density_fft_g);
density_brick_g = vdx_brick_g = vdy_brick_g = vdz_brick_g = NULL;
density_fft_g = NULL;
memory->destroy3d_offset(density_brick_a0,nzlo_out_6,nylo_out_6,nxlo_out_6);
memory->destroy3d_offset(vdx_brick_a0,nzlo_out_6,nylo_out_6,nxlo_out_6);
memory->destroy3d_offset(vdy_brick_a0,nzlo_out_6,nylo_out_6,nxlo_out_6);
memory->destroy3d_offset(vdz_brick_a0,nzlo_out_6,nylo_out_6,nxlo_out_6);
memory->destroy(density_fft_a0);
density_brick_a0 = vdx_brick_a0 = vdy_brick_a0 = vdz_brick_a0 = NULL;
density_fft_a0 = NULL;
memory->destroy3d_offset(density_brick_a1,nzlo_out_6,nylo_out_6,nxlo_out_6);
memory->destroy3d_offset(vdx_brick_a1,nzlo_out_6,nylo_out_6,nxlo_out_6);
memory->destroy3d_offset(vdy_brick_a1,nzlo_out_6,nylo_out_6,nxlo_out_6);
memory->destroy3d_offset(vdz_brick_a1,nzlo_out_6,nylo_out_6,nxlo_out_6);
memory->destroy(density_fft_a1);
density_brick_a1 = vdx_brick_a1 = vdy_brick_a1 = vdz_brick_a1 = NULL;
density_fft_a1 = NULL;
memory->destroy3d_offset(density_brick_a2,nzlo_out_6,nylo_out_6,nxlo_out_6);
memory->destroy3d_offset(vdx_brick_a2,nzlo_out_6,nylo_out_6,nxlo_out_6);
memory->destroy3d_offset(vdy_brick_a2,nzlo_out_6,nylo_out_6,nxlo_out_6);
memory->destroy3d_offset(vdz_brick_a2,nzlo_out_6,nylo_out_6,nxlo_out_6);
memory->destroy(density_fft_a2);
density_brick_a2 = vdx_brick_a2 = vdy_brick_a2 = vdz_brick_a2 = NULL;
density_fft_a2 = NULL;
memory->destroy3d_offset(density_brick_a3,nzlo_out_6,nylo_out_6,nxlo_out_6);
memory->destroy3d_offset(vdx_brick_a3,nzlo_out_6,nylo_out_6,nxlo_out_6);
memory->destroy3d_offset(vdy_brick_a3,nzlo_out_6,nylo_out_6,nxlo_out_6);
memory->destroy3d_offset(vdz_brick_a3,nzlo_out_6,nylo_out_6,nxlo_out_6);
memory->destroy(density_fft_a3);
density_brick_a3 = vdx_brick_a3 = vdy_brick_a3 = vdz_brick_a3 = NULL;
density_fft_a3 = NULL;
memory->destroy3d_offset(density_brick_a4,nzlo_out_6,nylo_out_6,nxlo_out_6);
memory->destroy3d_offset(vdx_brick_a4,nzlo_out_6,nylo_out_6,nxlo_out_6);
memory->destroy3d_offset(vdy_brick_a4,nzlo_out_6,nylo_out_6,nxlo_out_6);
memory->destroy3d_offset(vdz_brick_a4,nzlo_out_6,nylo_out_6,nxlo_out_6);
memory->destroy(density_fft_a4);
density_brick_a4 = vdx_brick_a4 = vdy_brick_a4 = vdz_brick_a4 = NULL;
density_fft_a4 = NULL;
memory->destroy3d_offset(density_brick_a5,nzlo_out_6,nylo_out_6,nxlo_out_6);
memory->destroy3d_offset(vdx_brick_a5,nzlo_out_6,nylo_out_6,nxlo_out_6);
memory->destroy3d_offset(vdy_brick_a5,nzlo_out_6,nylo_out_6,nxlo_out_6);
memory->destroy3d_offset(vdz_brick_a5,nzlo_out_6,nylo_out_6,nxlo_out_6);
memory->destroy(density_fft_a5);
density_brick_a5 = vdx_brick_a5 = vdy_brick_a5 = vdz_brick_a5 = NULL;
density_fft_a5 = NULL;
memory->destroy3d_offset(density_brick_a6,nzlo_out_6,nylo_out_6,nxlo_out_6);
memory->destroy3d_offset(vdx_brick_a6,nzlo_out_6,nylo_out_6,nxlo_out_6);
memory->destroy3d_offset(vdy_brick_a6,nzlo_out_6,nylo_out_6,nxlo_out_6);
memory->destroy3d_offset(vdz_brick_a6,nzlo_out_6,nylo_out_6,nxlo_out_6);
memory->destroy(density_fft_a6);
density_brick_a6 = vdx_brick_a6 = vdy_brick_a6 = vdz_brick_a6 = NULL;
density_fft_a6 = NULL;
memory->destroy4d_offset(density_brick_none,nzlo_out_6,nylo_out_6,nxlo_out_6);
memory->destroy4d_offset(vdx_brick_none,nzlo_out_6,nylo_out_6,nxlo_out_6);
memory->destroy4d_offset(vdy_brick_none,nzlo_out_6,nylo_out_6,nxlo_out_6);
memory->destroy4d_offset(vdz_brick_none,nzlo_out_6,nylo_out_6,nxlo_out_6);
memory->destroy(density_fft_none);
density_brick_none = vdx_brick_none = vdy_brick_none = vdz_brick_none = NULL;
density_fft_none = NULL;
memory->destroy(sf_precoeff1);
memory->destroy(sf_precoeff2);
memory->destroy(sf_precoeff3);
memory->destroy(sf_precoeff4);
memory->destroy(sf_precoeff5);
memory->destroy(sf_precoeff6);
sf_precoeff1 = sf_precoeff2 = sf_precoeff3 = sf_precoeff4 = sf_precoeff5 = sf_precoeff6 = NULL;
memory->destroy(sf_precoeff1_6);
memory->destroy(sf_precoeff2_6);
memory->destroy(sf_precoeff3_6);
memory->destroy(sf_precoeff4_6);
memory->destroy(sf_precoeff5_6);
memory->destroy(sf_precoeff6_6);
sf_precoeff1_6 = sf_precoeff2_6 = sf_precoeff3_6 = sf_precoeff4_6 = sf_precoeff5_6 = sf_precoeff6_6 = NULL;
memory->destroy(greensfn);
memory->destroy(greensfn_6);
memory->destroy(work1);
memory->destroy(work2);
memory->destroy(work1_6);
memory->destroy(work2_6);
memory->destroy(vg);
memory->destroy(vg2);
memory->destroy(vg_6);
memory->destroy(vg2_6);
greensfn = greensfn_6 = NULL;
work1 = work2 = work1_6 = work2_6 = NULL;
vg = vg2 = vg_6 = vg2_6 = NULL;
memory->destroy1d_offset(fkx,nxlo_fft);
memory->destroy1d_offset(fky,nylo_fft);
memory->destroy1d_offset(fkz,nzlo_fft);
fkx = fky = fkz = NULL;
memory->destroy1d_offset(fkx2,nxlo_fft);
memory->destroy1d_offset(fky2,nylo_fft);
memory->destroy1d_offset(fkz2,nzlo_fft);
fkx2 = fky2 = fkz2 = NULL;
memory->destroy1d_offset(fkx_6,nxlo_fft_6);
memory->destroy1d_offset(fky_6,nylo_fft_6);
memory->destroy1d_offset(fkz_6,nzlo_fft_6);
fkx_6 = fky_6 = fkz_6 = NULL;
memory->destroy1d_offset(fkx2_6,nxlo_fft_6);
memory->destroy1d_offset(fky2_6,nylo_fft_6);
memory->destroy1d_offset(fkz2_6,nzlo_fft_6);
fkx2_6 = fky2_6 = fkz2_6 = NULL;
memory->destroy(gf_b);
memory->destroy2d_offset(rho1d,-order/2);
memory->destroy2d_offset(rho_coeff,(1-order)/2);
memory->destroy2d_offset(drho1d,-order/2);
memory->destroy2d_offset(drho_coeff, (1-order)/2);
gf_b = NULL;
rho1d = rho_coeff = drho1d = drho_coeff = NULL;
memory->destroy(gf_b_6);
memory->destroy2d_offset(rho1d_6,-order_6/2);
memory->destroy2d_offset(rho_coeff_6,(1-order_6)/2);
memory->destroy2d_offset(drho1d_6,-order_6/2);
memory->destroy2d_offset(drho_coeff_6,(1-order_6)/2);
gf_b_6 = NULL;
rho1d_6 = rho_coeff_6 = drho1d_6 = drho_coeff_6 = NULL;
delete fft1;
delete fft2;
delete remap;
delete cg;
fft1 = fft2 = NULL;
remap = NULL;
cg = NULL;
delete fft1_6;
delete fft2_6;
delete remap_6;
delete cg_6;
fft1_6 = fft2_6 = NULL;
remap_6 = NULL;
cg_6 = NULL;
}
/* ----------------------------------------------------------------------
deallocate memory that depends on # of K-vectors and order
for per atom calculations
------------------------------------------------------------------------- */
void PPPMDisp::deallocate_peratom()
{
peratom_allocate_flag = 0;
memory->destroy3d_offset(u_brick, nzlo_out, nylo_out, nxlo_out);
memory->destroy3d_offset(v0_brick, nzlo_out, nylo_out, nxlo_out);
memory->destroy3d_offset(v1_brick, nzlo_out, nylo_out, nxlo_out);
memory->destroy3d_offset(v2_brick, nzlo_out, nylo_out, nxlo_out);
memory->destroy3d_offset(v3_brick, nzlo_out, nylo_out, nxlo_out);
memory->destroy3d_offset(v4_brick, nzlo_out, nylo_out, nxlo_out);
memory->destroy3d_offset(v5_brick, nzlo_out, nylo_out, nxlo_out);
u_brick = v0_brick = v1_brick = v2_brick = v3_brick = v4_brick = v5_brick = NULL;
memory->destroy3d_offset(u_brick_g, nzlo_out_6, nylo_out_6, nxlo_out_6);
memory->destroy3d_offset(v0_brick_g, nzlo_out_6, nylo_out_6, nxlo_out_6);
memory->destroy3d_offset(v1_brick_g, nzlo_out_6, nylo_out_6, nxlo_out_6);
memory->destroy3d_offset(v2_brick_g, nzlo_out_6, nylo_out_6, nxlo_out_6);
memory->destroy3d_offset(v3_brick_g, nzlo_out_6, nylo_out_6, nxlo_out_6);
memory->destroy3d_offset(v4_brick_g, nzlo_out_6, nylo_out_6, nxlo_out_6);
memory->destroy3d_offset(v5_brick_g, nzlo_out_6, nylo_out_6, nxlo_out_6);
u_brick_g = v0_brick_g = v1_brick_g = v2_brick_g = v3_brick_g = v4_brick_g = v5_brick_g = NULL;
memory->destroy3d_offset(u_brick_a0, nzlo_out_6, nylo_out_6, nxlo_out_6);
memory->destroy3d_offset(v0_brick_a0, nzlo_out_6, nylo_out_6, nxlo_out_6);
memory->destroy3d_offset(v1_brick_a0, nzlo_out_6, nylo_out_6, nxlo_out_6);
memory->destroy3d_offset(v2_brick_a0, nzlo_out_6, nylo_out_6, nxlo_out_6);
memory->destroy3d_offset(v3_brick_a0, nzlo_out_6, nylo_out_6, nxlo_out_6);
memory->destroy3d_offset(v4_brick_a0, nzlo_out_6, nylo_out_6, nxlo_out_6);
memory->destroy3d_offset(v5_brick_a0, nzlo_out_6, nylo_out_6, nxlo_out_6);
u_brick_a0 = v0_brick_a0 = v1_brick_a0 = v2_brick_a0 = v3_brick_a0 = v4_brick_a0 = v5_brick_a0 = NULL;
memory->destroy3d_offset(u_brick_a1, nzlo_out_6, nylo_out_6, nxlo_out_6);
memory->destroy3d_offset(v0_brick_a1, nzlo_out_6, nylo_out_6, nxlo_out_6);
memory->destroy3d_offset(v1_brick_a1, nzlo_out_6, nylo_out_6, nxlo_out_6);
memory->destroy3d_offset(v2_brick_a1, nzlo_out_6, nylo_out_6, nxlo_out_6);
memory->destroy3d_offset(v3_brick_a1, nzlo_out_6, nylo_out_6, nxlo_out_6);
memory->destroy3d_offset(v4_brick_a1, nzlo_out_6, nylo_out_6, nxlo_out_6);
memory->destroy3d_offset(v5_brick_a1, nzlo_out_6, nylo_out_6, nxlo_out_6);
u_brick_a1 = v0_brick_a1 = v1_brick_a1 = v2_brick_a1 = v3_brick_a1 = v4_brick_a1 = v5_brick_a1 = NULL;
memory->destroy3d_offset(u_brick_a2, nzlo_out_6, nylo_out_6, nxlo_out_6);
memory->destroy3d_offset(v0_brick_a2, nzlo_out_6, nylo_out_6, nxlo_out_6);
memory->destroy3d_offset(v1_brick_a2, nzlo_out_6, nylo_out_6, nxlo_out_6);
memory->destroy3d_offset(v2_brick_a2, nzlo_out_6, nylo_out_6, nxlo_out_6);
memory->destroy3d_offset(v3_brick_a2, nzlo_out_6, nylo_out_6, nxlo_out_6);
memory->destroy3d_offset(v4_brick_a2, nzlo_out_6, nylo_out_6, nxlo_out_6);
memory->destroy3d_offset(v5_brick_a2, nzlo_out_6, nylo_out_6, nxlo_out_6);
u_brick_a2 = v0_brick_a2 = v1_brick_a2 = v2_brick_a2 = v3_brick_a2 = v4_brick_a2 = v5_brick_a2 = NULL;
memory->destroy3d_offset(u_brick_a3, nzlo_out_6, nylo_out_6, nxlo_out_6);
memory->destroy3d_offset(v0_brick_a3, nzlo_out_6, nylo_out_6, nxlo_out_6);
memory->destroy3d_offset(v1_brick_a3, nzlo_out_6, nylo_out_6, nxlo_out_6);
memory->destroy3d_offset(v2_brick_a3, nzlo_out_6, nylo_out_6, nxlo_out_6);
memory->destroy3d_offset(v3_brick_a3, nzlo_out_6, nylo_out_6, nxlo_out_6);
memory->destroy3d_offset(v4_brick_a3, nzlo_out_6, nylo_out_6, nxlo_out_6);
memory->destroy3d_offset(v5_brick_a3, nzlo_out_6, nylo_out_6, nxlo_out_6);
u_brick_a3 = v0_brick_a3 = v1_brick_a3 = v2_brick_a3 = v3_brick_a3 = v4_brick_a3 = v5_brick_a3 = NULL;
memory->destroy3d_offset(u_brick_a4, nzlo_out_6, nylo_out_6, nxlo_out_6);
memory->destroy3d_offset(v0_brick_a4, nzlo_out_6, nylo_out_6, nxlo_out_6);
memory->destroy3d_offset(v1_brick_a4, nzlo_out_6, nylo_out_6, nxlo_out_6);
memory->destroy3d_offset(v2_brick_a4, nzlo_out_6, nylo_out_6, nxlo_out_6);
memory->destroy3d_offset(v3_brick_a4, nzlo_out_6, nylo_out_6, nxlo_out_6);
memory->destroy3d_offset(v4_brick_a4, nzlo_out_6, nylo_out_6, nxlo_out_6);
memory->destroy3d_offset(v5_brick_a4, nzlo_out_6, nylo_out_6, nxlo_out_6);
u_brick_a4 = v0_brick_a4 = v1_brick_a4 = v2_brick_a4 = v3_brick_a4 = v4_brick_a4 = v5_brick_a4 = NULL;
memory->destroy3d_offset(u_brick_a5, nzlo_out_6, nylo_out_6, nxlo_out_6);
memory->destroy3d_offset(v0_brick_a5, nzlo_out_6, nylo_out_6, nxlo_out_6);
memory->destroy3d_offset(v1_brick_a5, nzlo_out_6, nylo_out_6, nxlo_out_6);
memory->destroy3d_offset(v2_brick_a5, nzlo_out_6, nylo_out_6, nxlo_out_6);
memory->destroy3d_offset(v3_brick_a5, nzlo_out_6, nylo_out_6, nxlo_out_6);
memory->destroy3d_offset(v4_brick_a5, nzlo_out_6, nylo_out_6, nxlo_out_6);
memory->destroy3d_offset(v5_brick_a5, nzlo_out_6, nylo_out_6, nxlo_out_6);
u_brick_a5 = v0_brick_a5 = v1_brick_a5 = v2_brick_a5 = v3_brick_a5 = v4_brick_a5 = v5_brick_a5 = NULL;
memory->destroy3d_offset(u_brick_a6, nzlo_out_6, nylo_out_6, nxlo_out_6);
memory->destroy3d_offset(v0_brick_a6, nzlo_out_6, nylo_out_6, nxlo_out_6);
memory->destroy3d_offset(v1_brick_a6, nzlo_out_6, nylo_out_6, nxlo_out_6);
memory->destroy3d_offset(v2_brick_a6, nzlo_out_6, nylo_out_6, nxlo_out_6);
memory->destroy3d_offset(v3_brick_a6, nzlo_out_6, nylo_out_6, nxlo_out_6);
memory->destroy3d_offset(v4_brick_a6, nzlo_out_6, nylo_out_6, nxlo_out_6);
memory->destroy3d_offset(v5_brick_a6, nzlo_out_6, nylo_out_6, nxlo_out_6);
u_brick_a6 = v0_brick_a6 = v1_brick_a6 = v2_brick_a6 = v3_brick_a6 = v4_brick_a6 = v5_brick_a6 = NULL;
memory->destroy4d_offset(u_brick_none, nzlo_out_6, nylo_out_6, nxlo_out_6);
memory->destroy4d_offset(v0_brick_none, nzlo_out_6, nylo_out_6, nxlo_out_6);
memory->destroy4d_offset(v1_brick_none, nzlo_out_6, nylo_out_6, nxlo_out_6);
memory->destroy4d_offset(v2_brick_none, nzlo_out_6, nylo_out_6, nxlo_out_6);
memory->destroy4d_offset(v3_brick_none, nzlo_out_6, nylo_out_6, nxlo_out_6);
memory->destroy4d_offset(v4_brick_none, nzlo_out_6, nylo_out_6, nxlo_out_6);
memory->destroy4d_offset(v5_brick_none, nzlo_out_6, nylo_out_6, nxlo_out_6);
u_brick_none = v0_brick_none = v1_brick_none = v2_brick_none = v3_brick_none = v4_brick_none = v5_brick_none = NULL;
delete cg_peratom;
delete cg_peratom_6;
cg_peratom = cg_peratom_6 = NULL;
}
/* ----------------------------------------------------------------------
set size of FFT grid (nx,ny,nz_pppm) and g_ewald
for Coulomb interactions
------------------------------------------------------------------------- */
void PPPMDisp::set_grid()
{
double q2 = qsqsum * force->qqrd2e;
// use xprd,yprd,zprd even if triclinic so grid size is the same
// adjust z dimension for 2d slab PPPM
// 3d PPPM just uses zprd since slab_volfactor = 1.0
double xprd = domain->xprd;
double yprd = domain->yprd;
double zprd = domain->zprd;
double zprd_slab = zprd*slab_volfactor;
// make initial g_ewald estimate
// based on desired accuracy and real space cutoff
// fluid-occupied volume used to estimate real-space error
// zprd used rather than zprd_slab
double h, h_x,h_y,h_z;
bigint natoms = atom->natoms;
if (!gewaldflag) {
g_ewald = accuracy*sqrt(natoms*cutoff*xprd*yprd*zprd) / (2.0*q2);
if (g_ewald >= 1.0)
error->all(FLERR,"KSpace accuracy too large to estimate G vector");
g_ewald = sqrt(-log(g_ewald)) / cutoff;
}
// set optimal nx_pppm,ny_pppm,nz_pppm based on order and accuracy
// nz_pppm uses extended zprd_slab instead of zprd
// reduce it until accuracy target is met
if (!gridflag) {
h = h_x = h_y = h_z = 4.0/g_ewald;
int count = 0;
while (1) {
// set grid dimension
nx_pppm = static_cast<int> (xprd/h_x);
ny_pppm = static_cast<int> (yprd/h_y);
nz_pppm = static_cast<int> (zprd_slab/h_z);
if (nx_pppm <= 1) nx_pppm = 2;
if (ny_pppm <= 1) ny_pppm = 2;
if (nz_pppm <= 1) nz_pppm = 2;
//set local grid dimension
int npey_fft,npez_fft;
if (nz_pppm >= nprocs) {
npey_fft = 1;
npez_fft = nprocs;
} else procs2grid2d(nprocs,ny_pppm,nz_pppm,&npey_fft,&npez_fft);
int me_y = me % npey_fft;
int me_z = me / npey_fft;
nxlo_fft = 0;
nxhi_fft = nx_pppm - 1;
nylo_fft = me_y*ny_pppm/npey_fft;
nyhi_fft = (me_y+1)*ny_pppm/npey_fft - 1;
nzlo_fft = me_z*nz_pppm/npez_fft;
nzhi_fft = (me_z+1)*nz_pppm/npez_fft - 1;
double qopt = compute_qopt();
double dfkspace = sqrt(qopt/natoms)*q2/(xprd*yprd*zprd_slab);
count++;
// break loop if the accuracy has been reached or too many loops have been performed
if (dfkspace <= accuracy) break;
if (count > 500) error->all(FLERR, "Could not compute grid size for Coulomb interaction");
h *= 0.95;
h_x = h_y = h_z = h;
}
}
// boost grid size until it is factorable
while (!factorable(nx_pppm)) nx_pppm++;
while (!factorable(ny_pppm)) ny_pppm++;
while (!factorable(nz_pppm)) nz_pppm++;
}
/* ----------------------------------------------------------------------
set the FFT parameters
------------------------------------------------------------------------- */
void PPPMDisp::set_fft_parameters(int& nx_p,int& ny_p,int& nz_p,
int& nxlo_f,int& nylo_f,int& nzlo_f,
int& nxhi_f,int& nyhi_f,int& nzhi_f,
int& nxlo_i,int& nylo_i,int& nzlo_i,
int& nxhi_i,int& nyhi_i,int& nzhi_i,
int& nxlo_o,int& nylo_o,int& nzlo_o,
int& nxhi_o,int& nyhi_o,int& nzhi_o,
int& nlow, int& nupp,
int& ng, int& nf, int& nfb,
double& sft,double& sftone, int& ord)
{
// global indices of PPPM grid range from 0 to N-1
// nlo_in,nhi_in = lower/upper limits of the 3d sub-brick of
// global PPPM grid that I own without ghost cells
// for slab PPPM, assign z grid as if it were not extended
nxlo_i = static_cast<int> (comm->xsplit[comm->myloc[0]] * nx_p);
nxhi_i = static_cast<int> (comm->xsplit[comm->myloc[0]+1] * nx_p) - 1;
nylo_i = static_cast<int> (comm->ysplit[comm->myloc[1]] * ny_p);
nyhi_i = static_cast<int> (comm->ysplit[comm->myloc[1]+1] * ny_p) - 1;
nzlo_i = static_cast<int>
(comm->zsplit[comm->myloc[2]] * nz_p/slab_volfactor);
nzhi_i = static_cast<int>
(comm->zsplit[comm->myloc[2]+1] * nz_p/slab_volfactor) - 1;
// nlow,nupp = stencil size for mapping particles to PPPM grid
nlow = -(ord-1)/2;
nupp = ord/2;
// sft values for particle <-> grid mapping
// add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1
if (ord % 2) sft = OFFSET + 0.5;
else sft = OFFSET;
if (ord % 2) sftone = 0.0;
else sftone = 0.5;
// nlo_out,nhi_out = lower/upper limits of the 3d sub-brick of
// global PPPM grid that my particles can contribute charge to
// effectively nlo_in,nhi_in + ghost cells
// nlo,nhi = global coords of grid pt to "lower left" of smallest/largest
// position a particle in my box can be at
// dist[3] = particle position bound = subbox + skin/2.0 + qdist
// qdist = offset due to TIP4P fictitious charge
// convert to triclinic if necessary
// nlo_out,nhi_out = nlo,nhi + stencil size for particle mapping
// for slab PPPM, assign z grid as if it were not extended
double *prd,*sublo,*subhi;
if (triclinic == 0) {
prd = domain->prd;
boxlo = domain->boxlo;
sublo = domain->sublo;
subhi = domain->subhi;
} else {
prd = domain->prd_lamda;
boxlo = domain->boxlo_lamda;
sublo = domain->sublo_lamda;
subhi = domain->subhi_lamda;
}
double xprd = prd[0];
double yprd = prd[1];
double zprd = prd[2];
double zprd_slab = zprd*slab_volfactor;
double dist[3];
double cuthalf = 0.5*neighbor->skin + qdist;
if (triclinic == 0) dist[0] = dist[1] = dist[2] = cuthalf;
else {
dist[0] = cuthalf/domain->prd[0];
dist[1] = cuthalf/domain->prd[1];
dist[2] = cuthalf/domain->prd[2];
}
int nlo,nhi;
nlo = static_cast<int> ((sublo[0]-dist[0]-boxlo[0]) *
nx_p/xprd + sft) - OFFSET;
nhi = static_cast<int> ((subhi[0]+dist[0]-boxlo[0]) *
nx_p/xprd + sft) - OFFSET;
nxlo_o = nlo + nlow;
nxhi_o = nhi + nupp;
nlo = static_cast<int> ((sublo[1]-dist[1]-boxlo[1]) *
ny_p/yprd + sft) - OFFSET;
nhi = static_cast<int> ((subhi[1]+dist[1]-boxlo[1]) *
ny_p/yprd + sft) - OFFSET;
nylo_o = nlo + nlow;
nyhi_o = nhi + nupp;
nlo = static_cast<int> ((sublo[2]-dist[2]-boxlo[2]) *
nz_p/zprd_slab + sft) - OFFSET;
nhi = static_cast<int> ((subhi[2]+dist[2]-boxlo[2]) *
nz_p/zprd_slab + sft) - OFFSET;
nzlo_o = nlo + nlow;
nzhi_o = nhi + nupp;
// for slab PPPM, change the grid boundary for processors at +z end
// to include the empty volume between periodically repeating slabs
// for slab PPPM, want charge data communicated from -z proc to +z proc,
// but not vice versa, also want field data communicated from +z proc to
// -z proc, but not vice versa
// this is accomplished by nzhi_i = nzhi_o on +z end (no ghost cells)
if (slabflag && (comm->myloc[2] == comm->procgrid[2]-1)) {
nzhi_i = nz_p - 1;
nzhi_o = nz_p - 1;
}
// decomposition of FFT mesh
// global indices range from 0 to N-1
// proc owns entire x-dimension, clump of columns in y,z dimensions
// npey_fft,npez_fft = # of procs in y,z dims
// if nprocs is small enough, proc can own 1 or more entire xy planes,
// else proc owns 2d sub-blocks of yz plane
// me_y,me_z = which proc (0-npe_fft-1) I am in y,z dimensions
// nlo_fft,nhi_fft = lower/upper limit of the section
// of the global FFT mesh that I own
int npey_fft,npez_fft;
if (nz_p >= nprocs) {
npey_fft = 1;
npez_fft = nprocs;
} else procs2grid2d(nprocs,ny_p,nz_p,&npey_fft,&npez_fft);
int me_y = me % npey_fft;
int me_z = me / npey_fft;
nxlo_f = 0;
nxhi_f = nx_p - 1;
nylo_f = me_y*ny_p/npey_fft;
nyhi_f = (me_y+1)*ny_p/npey_fft - 1;
nzlo_f = me_z*nz_p/npez_fft;
nzhi_f = (me_z+1)*nz_p/npez_fft - 1;
// PPPM grid for this proc, including ghosts
ng = (nxhi_o-nxlo_o+1) * (nyhi_o-nylo_o+1) *
(nzhi_o-nzlo_o+1);
// FFT arrays on this proc, without ghosts
// nfft = FFT points in FFT decomposition on this proc
// nfft_brick = FFT points in 3d brick-decomposition on this proc
// nfft_both = greater of 2 values
nf = (nxhi_f-nxlo_f+1) * (nyhi_f-nylo_f+1) *
(nzhi_f-nzlo_f+1);
int nfft_brick = (nxhi_i-nxlo_i+1) * (nyhi_i-nylo_i+1) *
(nzhi_i-nzlo_i+1);
nfb = MAX(nf,nfft_brick);
}
/* ----------------------------------------------------------------------
check if all factors of n are in list of factors
return 1 if yes, 0 if no
------------------------------------------------------------------------- */
int PPPMDisp::factorable(int n)
{
int i;
while (n > 1) {
for (i = 0; i < nfactors; i++) {
if (n % factors[i] == 0) {
n /= factors[i];
break;
}
}
if (i == nfactors) return 0;
}
return 1;
}
/* ----------------------------------------------------------------------
pre-compute Green's function denominator expansion coeffs, Gamma(2n)
------------------------------------------------------------------------- */
void PPPMDisp::adjust_gewald()
{
// Use Newton solver to find g_ewald
double dx;
// Begin algorithm
for (int i = 0; i < LARGE; i++) {
dx = f() / derivf();
g_ewald -= dx; //Update g_ewald
if (fabs(f()) < SMALL) return;
}
// Failed to converge
char str[128];
sprintf(str, "Could not compute g_ewald");
error->all(FLERR, str);
}
/* ----------------------------------------------------------------------
Calculate f(x)
------------------------------------------------------------------------- */
double PPPMDisp::f()
{
double df_rspace, df_kspace;
double q2 = qsqsum * force->qqrd2e;
double xprd = domain->xprd;
double yprd = domain->yprd;
double zprd = domain->zprd;
double zprd_slab = zprd*slab_volfactor;
bigint natoms = atom->natoms;
df_rspace = 2.0*q2*exp(-g_ewald*g_ewald*cutoff*cutoff) /
sqrt(natoms*cutoff*xprd*yprd*zprd);
double qopt = compute_qopt();
df_kspace = sqrt(qopt/natoms)*q2/(xprd*yprd*zprd_slab);
return df_rspace - df_kspace;
}
/* ----------------------------------------------------------------------
Calculate numerical derivative f'(x) using forward difference
[f(x + h) - f(x)] / h
------------------------------------------------------------------------- */
double PPPMDisp::derivf()
{
double h = 0.000001; //Derivative step-size
double df,f1,f2,g_ewald_old;
f1 = f();
g_ewald_old = g_ewald;
g_ewald += h;
f2 = f();
g_ewald = g_ewald_old;
df = (f2 - f1)/h;
return df;
}
/* ----------------------------------------------------------------------
Calculate the final estimator for the accuracy
------------------------------------------------------------------------- */
double PPPMDisp::final_accuracy()
{
double df_rspace, df_kspace;
double q2 = qsqsum * force->qqrd2e;
double xprd = domain->xprd;
double yprd = domain->yprd;
double zprd = domain->zprd;
double zprd_slab = zprd*slab_volfactor;
bigint natoms = atom->natoms;
df_rspace = 2.0*q2 * exp(-g_ewald*g_ewald*cutoff*cutoff) /
sqrt(natoms*cutoff*xprd*yprd*zprd);
double qopt = compute_qopt();
df_kspace = sqrt(qopt/natoms)*q2/(xprd*yprd*zprd_slab);
double acc = sqrt(df_rspace*df_rspace + df_kspace*df_kspace);
return acc;
}
/* ----------------------------------------------------------------------
Calculate the final estimator for the Dispersion accuracy
------------------------------------------------------------------------- */
void PPPMDisp::final_accuracy_6(double& acc, double& acc_real, double& acc_kspace)
{
double xprd = domain->xprd;
double yprd = domain->yprd;
double zprd = domain->zprd;
double zprd_slab = zprd*slab_volfactor;
bigint natoms = atom->natoms;
acc_real = lj_rspace_error();
double qopt = compute_qopt_6();
acc_kspace = sqrt(qopt/natoms)*csum/(xprd*yprd*zprd_slab);
acc = sqrt(acc_real*acc_real + acc_kspace*acc_kspace);
return;
}
/* ----------------------------------------------------------------------
Compute qopt for Coulomb interactions
------------------------------------------------------------------------- */
double PPPMDisp::compute_qopt()
{
double qopt;
if (differentiation_flag == 1) {
qopt = compute_qopt_ad();
} else {
qopt = compute_qopt_ik();
}
double qopt_all;
MPI_Allreduce(&qopt,&qopt_all,1,MPI_DOUBLE,MPI_SUM,world);
return qopt_all;
}
/* ----------------------------------------------------------------------
Compute qopt for Dispersion interactions
------------------------------------------------------------------------- */
double PPPMDisp::compute_qopt_6()
{
double qopt;
if (differentiation_flag == 1) {
qopt = compute_qopt_6_ad();
} else {
qopt = compute_qopt_6_ik();
}
double qopt_all;
MPI_Allreduce(&qopt,&qopt_all,1,MPI_DOUBLE,MPI_SUM,world);
return qopt_all;
}
/* ----------------------------------------------------------------------
Compute qopt for the ik differentiation scheme and Coulomb interaction
------------------------------------------------------------------------- */
double PPPMDisp::compute_qopt_ik()
{
double qopt = 0.0;
int k,l,m;
double *prd;
if (triclinic == 0) prd = domain->prd;
else prd = domain->prd_lamda;
double xprd = prd[0];
double yprd = prd[1];
double zprd = prd[2];
double zprd_slab = zprd*slab_volfactor;
double unitkx = (2.0*MY_PI/xprd);
double unitky = (2.0*MY_PI/yprd);
double unitkz = (2.0*MY_PI/zprd_slab);
int nx,ny,nz,kper,lper,mper;
double sqk, u2;
double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz;
double sum1,sum2, sum3,dot1,dot2;
int nbx = 2;
int nby = 2;
int nbz = 2;
for (m = nzlo_fft; m <= nzhi_fft; m++) {
mper = m - nz_pppm*(2*m/nz_pppm);
for (l = nylo_fft; l <= nyhi_fft; l++) {
lper = l - ny_pppm*(2*l/ny_pppm);
for (k = nxlo_fft; k <= nxhi_fft; k++) {
kper = k - nx_pppm*(2*k/nx_pppm);
sqk = pow(unitkx*kper,2.0) + pow(unitky*lper,2.0) +
pow(unitkz*mper,2.0);
if (sqk != 0.0) {
sum1 = 0.0;
sum2 = 0.0;
sum3 = 0.0;
for (nx = -nbx; nx <= nbx; nx++) {
qx = unitkx*(kper+nx_pppm*nx);
sx = exp(-0.25*pow(qx/g_ewald,2.0));
wx = 1.0;
argx = 0.5*qx*xprd/nx_pppm;
if (argx != 0.0) wx = pow(sin(argx)/argx,order);
for (ny = -nby; ny <= nby; ny++) {
qy = unitky*(lper+ny_pppm*ny);
sy = exp(-0.25*pow(qy/g_ewald,2.0));
wy = 1.0;
argy = 0.5*qy*yprd/ny_pppm;
if (argy != 0.0) wy = pow(sin(argy)/argy,order);
for (nz = -nbz; nz <= nbz; nz++) {
qz = unitkz*(mper+nz_pppm*nz);
sz = exp(-0.25*pow(qz/g_ewald,2.0));
wz = 1.0;
argz = 0.5*qz*zprd_slab/nz_pppm;
if (argz != 0.0) wz = pow(sin(argz)/argz,order);
dot1 = unitkx*kper*qx + unitky*lper*qy + unitkz*mper*qz;
dot2 = qx*qx+qy*qy+qz*qz;
u2 = pow(wx*wy*wz,2.0);
sum1 += sx*sy*sz*sx*sy*sz/dot2*4.0*4.0*MY_PI*MY_PI;
sum2 += u2*sx*sy*sz*4.0*MY_PI/dot2*dot1;
sum3 += u2;
}
}
}
sum2 *= sum2;
sum3 *= sum3*sqk;
qopt += sum1 -sum2/sum3;
}
}
}
}
return qopt;
}
/* ----------------------------------------------------------------------
Compute qopt for the ad differentiation scheme and Coulomb interaction
------------------------------------------------------------------------- */
double PPPMDisp::compute_qopt_ad()
{
double qopt = 0.0;
int k,l,m;
double *prd;
if (triclinic == 0) prd = domain->prd;
else prd = domain->prd_lamda;
double xprd = prd[0];
double yprd = prd[1];
double zprd = prd[2];
double zprd_slab = zprd*slab_volfactor;
double unitkx = (2.0*MY_PI/xprd);
double unitky = (2.0*MY_PI/yprd);
double unitkz = (2.0*MY_PI/zprd_slab);
int nx,ny,nz,kper,lper,mper;
double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz;
double u2, sqk;
double sum1,sum2,sum3,sum4,dot2;
int nbx = 2;
int nby = 2;
int nbz = 2;
for (m = nzlo_fft; m <= nzhi_fft; m++) {
mper = m - nz_pppm*(2*m/nz_pppm);
for (l = nylo_fft; l <= nyhi_fft; l++) {
lper = l - ny_pppm*(2*l/ny_pppm);
for (k = nxlo_fft; k <= nxhi_fft; k++) {
kper = k - nx_pppm*(2*k/nx_pppm);
sqk = pow(unitkx*kper,2.0) + pow(unitky*lper,2.0) +
pow(unitkz*mper,2.0);
if (sqk != 0.0) {
sum1 = 0.0;
sum2 = 0.0;
sum3 = 0.0;
sum4 = 0.0;
for (nx = -nbx; nx <= nbx; nx++) {
qx = unitkx*(kper+nx_pppm*nx);
sx = exp(-0.25*pow(qx/g_ewald,2.0));
wx = 1.0;
argx = 0.5*qx*xprd/nx_pppm;
if (argx != 0.0) wx = pow(sin(argx)/argx,order);
for (ny = -nby; ny <= nby; ny++) {
qy = unitky*(lper+ny_pppm*ny);
sy = exp(-0.25*pow(qy/g_ewald,2.0));
wy = 1.0;
argy = 0.5*qy*yprd/ny_pppm;
if (argy != 0.0) wy = pow(sin(argy)/argy,order);
for (nz = -nbz; nz <= nbz; nz++) {
qz = unitkz*(mper+nz_pppm*nz);
sz = exp(-0.25*pow(qz/g_ewald,2.0));
wz = 1.0;
argz = 0.5*qz*zprd_slab/nz_pppm;
if (argz != 0.0) wz = pow(sin(argz)/argz,order);
dot2 = qx*qx+qy*qy+qz*qz;
u2 = pow(wx*wy*wz,2.0);
sum1 += sx*sy*sz*sx*sy*sz/dot2*4.0*4.0*MY_PI*MY_PI;
sum2 += sx*sy*sz * u2*4.0*MY_PI;
sum3 += u2;
sum4 += dot2*u2;
}
}
}
sum2 *= sum2;
qopt += sum1 - sum2/(sum3*sum4);
}
}
}
}
return qopt;
}
/* ----------------------------------------------------------------------
Compute qopt for the ik differentiation scheme and Dispersion interaction
------------------------------------------------------------------------- */
double PPPMDisp::compute_qopt_6_ik()
{
double qopt = 0.0;
int k,l,m;
double *prd;
if (triclinic == 0) prd = domain->prd;
else prd = domain->prd_lamda;
double xprd = prd[0];
double yprd = prd[1];
double zprd = prd[2];
double zprd_slab = zprd*slab_volfactor;
double unitkx = (2.0*MY_PI/xprd);
double unitky = (2.0*MY_PI/yprd);
double unitkz = (2.0*MY_PI/zprd_slab);
int nx,ny,nz,kper,lper,mper;
double sqk, u2;
double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz;
double sum1,sum2, sum3;
double dot1,dot2, rtdot2, term;
double inv2ew = 2*g_ewald_6;
inv2ew = 1.0/inv2ew;
double rtpi = sqrt(MY_PI);
int nbx = 2;
int nby = 2;
int nbz = 2;
for (m = nzlo_fft_6; m <= nzhi_fft_6; m++) {
mper = m - nz_pppm_6*(2*m/nz_pppm_6);
for (l = nylo_fft_6; l <= nyhi_fft_6; l++) {
lper = l - ny_pppm_6*(2*l/ny_pppm_6);
for (k = nxlo_fft_6; k <= nxhi_fft_6; k++) {
kper = k - nx_pppm_6*(2*k/nx_pppm_6);
sqk = pow(unitkx*kper,2.0) + pow(unitky*lper,2.0) +
pow(unitkz*mper,2.0);
if (sqk != 0.0) {
sum1 = 0.0;
sum2 = 0.0;
sum3 = 0.0;
for (nx = -nbx; nx <= nbx; nx++) {
qx = unitkx*(kper+nx_pppm_6*nx);
sx = exp(-qx*qx*inv2ew*inv2ew);
wx = 1.0;
argx = 0.5*qx*xprd/nx_pppm_6;
if (argx != 0.0) wx = pow(sin(argx)/argx,order_6);
for (ny = -nby; ny <= nby; ny++) {
qy = unitky*(lper+ny_pppm_6*ny);
sy = exp(-qy*qy*inv2ew*inv2ew);
wy = 1.0;
argy = 0.5*qy*yprd/ny_pppm_6;
if (argy != 0.0) wy = pow(sin(argy)/argy,order_6);
for (nz = -nbz; nz <= nbz; nz++) {
qz = unitkz*(mper+nz_pppm_6*nz);
sz = exp(-qz*qz*inv2ew*inv2ew);
wz = 1.0;
argz = 0.5*qz*zprd_slab/nz_pppm_6;
if (argz != 0.0) wz = pow(sin(argz)/argz,order_6);
dot1 = unitkx*kper*qx + unitky*lper*qy + unitkz*mper*qz;
dot2 = qx*qx+qy*qy+qz*qz;
rtdot2 = sqrt(dot2);
term = (1-2*dot2*inv2ew*inv2ew)*sx*sy*sz +
2*dot2*rtdot2*inv2ew*inv2ew*inv2ew*rtpi*erfc(rtdot2*inv2ew);
term *= g_ewald_6*g_ewald_6*g_ewald_6;
u2 = pow(wx*wy*wz,2.0);
sum1 += term*term*MY_PI*MY_PI*MY_PI/9.0 * dot2;
sum2 += -u2*term*MY_PI*rtpi/3.0*dot1;
sum3 += u2;
}
}
}
sum2 *= sum2;
sum3 *= sum3*sqk;
qopt += sum1 -sum2/sum3;
}
}
}
}
return qopt;
}
/* ----------------------------------------------------------------------
Compute qopt for the ad differentiation scheme and Dispersion interaction
------------------------------------------------------------------------- */
double PPPMDisp::compute_qopt_6_ad()
{
double qopt = 0.0;
int k,l,m;
double *prd;
if (triclinic == 0) prd = domain->prd;
else prd = domain->prd_lamda;
double xprd = prd[0];
double yprd = prd[1];
double zprd = prd[2];
double zprd_slab = zprd*slab_volfactor;
double unitkx = (2.0*MY_PI/xprd);
double unitky = (2.0*MY_PI/yprd);
double unitkz = (2.0*MY_PI/zprd_slab);
int nx,ny,nz,kper,lper,mper;
double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz;
double u2, sqk;
double sum1,sum2,sum3,sum4;
double dot2, rtdot2, term;
double inv2ew = 2*g_ewald_6;
inv2ew = 1/inv2ew;
double rtpi = sqrt(MY_PI);
int nbx = 2;
int nby = 2;
int nbz = 2;
for (m = nzlo_fft_6; m <= nzhi_fft_6; m++) {
mper = m - nz_pppm_6*(2*m/nz_pppm_6);
for (l = nylo_fft_6; l <= nyhi_fft_6; l++) {
lper = l - ny_pppm_6*(2*l/ny_pppm_6);
for (k = nxlo_fft_6; k <= nxhi_fft_6; k++) {
kper = k - nx_pppm_6*(2*k/nx_pppm_6);
sqk = pow(unitkx*kper,2.0) + pow(unitky*lper,2.0) +
pow(unitkz*mper,2.0);
if (sqk != 0.0) {
sum1 = 0.0;
sum2 = 0.0;
sum3 = 0.0;
sum4 = 0.0;
for (nx = -nbx; nx <= nbx; nx++) {
qx = unitkx*(kper+nx_pppm_6*nx);
sx = exp(-qx*qx*inv2ew*inv2ew);
wx = 1.0;
argx = 0.5*qx*xprd/nx_pppm_6;
if (argx != 0.0) wx = pow(sin(argx)/argx,order_6);
for (ny = -nby; ny <= nby; ny++) {
qy = unitky*(lper+ny_pppm_6*ny);
sy = exp(-qy*qy*inv2ew*inv2ew);
wy = 1.0;
argy = 0.5*qy*yprd/ny_pppm_6;
if (argy != 0.0) wy = pow(sin(argy)/argy,order_6);
for (nz = -nbz; nz <= nbz; nz++) {
qz = unitkz*(mper+nz_pppm_6*nz);
sz = exp(-qz*qz*inv2ew*inv2ew);
wz = 1.0;
argz = 0.5*qz*zprd_slab/nz_pppm_6;
if (argz != 0.0) wz = pow(sin(argz)/argz,order_6);
dot2 = qx*qx+qy*qy+qz*qz;
rtdot2 = sqrt(dot2);
term = (1-2*dot2*inv2ew*inv2ew)*sx*sy*sz +
2*dot2*rtdot2*inv2ew*inv2ew*inv2ew*rtpi*erfc(rtdot2*inv2ew);
term *= g_ewald_6*g_ewald_6*g_ewald_6;
u2 = pow(wx*wy*wz,2.0);
sum1 += term*term*MY_PI*MY_PI*MY_PI/9.0 * dot2;
sum2 += -term*MY_PI*rtpi/3.0 * u2 * dot2;
sum3 += u2;
sum4 += dot2*u2;
}
}
}
sum2 *= sum2;
qopt += sum1 - sum2/(sum3*sum4);
}
}
}
}
return qopt;
}
/* ----------------------------------------------------------------------
set size of FFT grid and g_ewald_6
for Dispersion interactions
------------------------------------------------------------------------- */
void PPPMDisp::set_grid_6()
{
// Calculate csum
if (!csumflag) calc_csum();
if (!gewaldflag_6) set_init_g6();
if (!gridflag_6) set_n_pppm_6();
while (!factorable(nx_pppm_6)) nx_pppm_6++;
while (!factorable(ny_pppm_6)) ny_pppm_6++;
while (!factorable(nz_pppm_6)) nz_pppm_6++;
}
/* ----------------------------------------------------------------------
Calculate the sum of the squared dispersion coefficients and other
related quantities required for the calculations
------------------------------------------------------------------------- */
void PPPMDisp::calc_csum()
{
csumij = 0.0;
csum = 0.0;
int ntypes = atom->ntypes;
int i,j,k;
delete [] cii;
cii = new double[ntypes +1];
for (i = 0; i<=ntypes; i++) cii[i] = 0.0;
delete [] csumi;
csumi = new double[ntypes +1];
for (i = 0; i<=ntypes; i++) csumi[i] = 0.0;
int *neach = new int[ntypes+1];
for (i = 0; i<=ntypes; i++) neach[i] = 0;
//the following variables are needed to distinguish between arithmetic
// and geometric mixing
if (function[1]) {
for (i = 1; i <= ntypes; i++)
cii[i] = B[i]*B[i];
int tmp;
for (i = 0; i < atom->nlocal; i++) {
tmp = atom->type[i];
neach[tmp]++;
csum += B[tmp]*B[tmp];
}
}
if (function[2]) {
for (i = 1; i <= ntypes; i++)
cii[i] = 64.0/20.0*B[7*i+3]*B[7*i+3];
int tmp;
for (i = 0; i < atom->nlocal; i++) {
tmp = atom->type[i];
neach[tmp]++;
csum += 64.0/20.0*B[7*tmp+3]*B[7*tmp+3];
}
}
if (function[3]) {
for (i = 1; i <= ntypes; i++)
for (j = 0; j < nsplit; j++)
cii[i] += B[j]*B[nsplit*i + j]*B[nsplit*i + j];
int tmp;
for (i = 0; i < atom->nlocal; i++) {
tmp = atom->type[i];
neach[tmp]++;
for (j = 0; j < nsplit; j++)
csum += B[j]*B[nsplit*tmp + j]*B[nsplit*tmp + j];
}
}
double tmp2;
MPI_Allreduce(&csum,&tmp2,1,MPI_DOUBLE,MPI_SUM,world);
csum = tmp2;
csumflag = 1;
int *neach_all = new int[ntypes+1];
MPI_Allreduce(neach,neach_all,ntypes+1,MPI_INT,MPI_SUM,world);
// copmute csumij and csumi
double d1, d2;
if (function[1]){
for (i=1; i<=ntypes; i++) {
for (j=1; j<=ntypes; j++) {
csumi[i] += neach_all[j]*B[i]*B[j];
d1 = neach_all[i]*B[i];
d2 = neach_all[j]*B[j];
csumij += d1*d2;
//csumij += neach_all[i]*neach_all[j]*B[i]*B[j];
}
}
}
if (function[2]) {
for (i=1; i<=ntypes; i++) {
for (j=1; j<=ntypes; j++) {
for (k=0; k<=6; k++) {
csumi[i] += neach_all[j]*B[7*i + k]*B[7*(j+1)-k-1];
d1 = neach_all[i]*B[7*i + k];
d2 = neach_all[j]*B[7*(j+1)-k-1];
csumij += d1*d2;
//csumij += neach_all[i]*neach_all[j]*B[7*i + k]*B[7*(j+1)-k-1];
}
}
}
}
if (function[3]) {
for (i=1; i<=ntypes; i++) {
for (j=1; j<=ntypes; j++) {
for (k=0; k<nsplit; k++) {
csumi[i] += neach_all[j]*B[k]*B[nsplit*i+k]*B[nsplit*j+k];
d1 = neach_all[i]*B[nsplit*i+k];
d2 = neach_all[j]*B[nsplit*j+k];
csumij += B[k]*d1*d2;
}
}
}
}
delete [] neach;
delete [] neach_all;
}
/* ----------------------------------------------------------------------
adjust g_ewald_6 to the new grid size
------------------------------------------------------------------------- */
void PPPMDisp::adjust_gewald_6()
{
// Use Newton solver to find g_ewald_6
double dx;
// Start loop
for (int i = 0; i < LARGE; i++) {
dx = f_6() / derivf_6();
g_ewald_6 -= dx; //update g_ewald_6
if (fabs(f_6()) < SMALL) return;
}
// Failed to converge
char str[128];
sprintf(str, "Could not adjust g_ewald_6");
error->all(FLERR, str);
}
/* ----------------------------------------------------------------------
Calculate f(x) for Dispersion interaction
------------------------------------------------------------------------- */
double PPPMDisp::f_6()
{
double df_rspace, df_kspace;
double *prd;
if (triclinic == 0) prd = domain->prd;
else prd = domain->prd_lamda;
double xprd = prd[0];
double yprd = prd[1];
double zprd = prd[2];
double zprd_slab = zprd*slab_volfactor;
bigint natoms = atom->natoms;
df_rspace = lj_rspace_error();
double qopt = compute_qopt_6();
df_kspace = sqrt(qopt/natoms)*csum/(xprd*yprd*zprd_slab);
return df_rspace - df_kspace;
}
/* ----------------------------------------------------------------------
Calculate numerical derivative f'(x) using forward difference
[f(x + h) - f(x)] / h
------------------------------------------------------------------------- */
double PPPMDisp::derivf_6()
{
double h = 0.000001; //Derivative step-size
double df,f1,f2,g_ewald_old;
f1 = f_6();
g_ewald_old = g_ewald_6;
g_ewald_6 += h;
f2 = f_6();
g_ewald_6 = g_ewald_old;
df = (f2 - f1)/h;
return df;
}
/* ----------------------------------------------------------------------
calculate an initial value for g_ewald_6
---------------------------------------------------------------------- */
void PPPMDisp::set_init_g6()
{
// use xprd,yprd,zprd even if triclinic so grid size is the same
// adjust z dimension for 2d slab PPPM
// 3d PPPM just uses zprd since slab_volfactor = 1.0
// make initial g_ewald estimate
// based on desired error and real space cutoff
// compute initial value for df_real with g_ewald_6 = 1/cutoff_lj
// if df_real > 0, repeat divide g_ewald_6 by 2 until df_real < 0
// else, repeat multiply g_ewald_6 by 2 until df_real > 0
// perform bisection for the last two values of
double df_real;
double g_ewald_old;
double gmin, gmax;
// check if there is a user defined accuracy
double acc_rspace = accuracy;
if (accuracy_real_6 > 0) acc_rspace = accuracy_real_6;
g_ewald_old = g_ewald_6 = 1.0/cutoff_lj;
df_real = lj_rspace_error() - acc_rspace;
int counter = 0;
if (df_real > 0) {
while (df_real > 0 && counter < LARGE) {
counter++;
g_ewald_old = g_ewald_6;
g_ewald_6 *= 2;
df_real = lj_rspace_error() - acc_rspace;
}
}
if (df_real < 0) {
while (df_real < 0 && counter < LARGE) {
counter++;
g_ewald_old = g_ewald_6;
g_ewald_6 *= 0.5;
df_real = lj_rspace_error() - acc_rspace;
}
}
if (counter >= LARGE-1) error->all(FLERR,"Cannot compute initial g_ewald_disp");
gmin = MIN(g_ewald_6, g_ewald_old);
gmax = MAX(g_ewald_6, g_ewald_old);
g_ewald_6 = gmin + 0.5*(gmax-gmin);
counter = 0;
while (gmax-gmin > SMALL && counter < LARGE) {
counter++;
df_real = lj_rspace_error() -acc_rspace;
if (df_real < 0) gmax = g_ewald_6;
else gmin = g_ewald_6;
g_ewald_6 = gmin + 0.5*(gmax-gmin);
}
if (counter >= LARGE-1) error->all(FLERR,"Cannot compute initial g_ewald_disp");
}
/* ----------------------------------------------------------------------
calculate nx_pppm, ny_pppm, nz_pppm for dispersion interaction
---------------------------------------------------------------------- */
void PPPMDisp::set_n_pppm_6()
{
bigint natoms = atom->natoms;
double *prd;
if (triclinic == 0) prd = domain->prd;
else prd = domain->prd_lamda;
double xprd = prd[0];
double yprd = prd[1];
double zprd = prd[2];
double zprd_slab = zprd*slab_volfactor;
double h, h_x,h_y,h_z;
double acc_kspace = accuracy;
if (accuracy_kspace_6 > 0.0) acc_kspace = accuracy_kspace_6;
// initial value for the grid spacing
h = h_x = h_y = h_z = 4.0/g_ewald_6;
// decrease grid spacing untill required precision is obtained
int count = 0;
while(1) {
// set grid dimension
nx_pppm_6 = static_cast<int> (xprd/h_x);
ny_pppm_6 = static_cast<int> (yprd/h_y);
nz_pppm_6 = static_cast<int> (zprd_slab/h_z);
if (nx_pppm_6 <= 1) nx_pppm_6 = 2;
if (ny_pppm_6 <= 1) ny_pppm_6 = 2;
if (nz_pppm_6 <= 1) nz_pppm_6 = 2;
//set local grid dimension
int npey_fft,npez_fft;
if (nz_pppm_6 >= nprocs) {
npey_fft = 1;
npez_fft = nprocs;
} else procs2grid2d(nprocs,ny_pppm_6,nz_pppm_6,&npey_fft,&npez_fft);
int me_y = me % npey_fft;
int me_z = me / npey_fft;
nxlo_fft_6 = 0;
nxhi_fft_6 = nx_pppm_6 - 1;
nylo_fft_6 = me_y*ny_pppm_6/npey_fft;
nyhi_fft_6 = (me_y+1)*ny_pppm_6/npey_fft - 1;
nzlo_fft_6 = me_z*nz_pppm_6/npez_fft;
nzhi_fft_6 = (me_z+1)*nz_pppm_6/npez_fft - 1;
double qopt = compute_qopt_6();
double df_kspace = sqrt(qopt/natoms)*csum/(xprd*yprd*zprd_slab);
count++;
// break loop if the accuracy has been reached or too many loops have been performed
if (df_kspace <= acc_kspace) break;
if (count > 500) error->all(FLERR, "Could not compute grid size for Dispersion");
h *= 0.95;
h_x = h_y = h_z = h;
}
}
/* ----------------------------------------------------------------------
calculate the real space error for dispersion interactions
---------------------------------------------------------------------- */
double PPPMDisp::lj_rspace_error()
{
bigint natoms = atom->natoms;
double xprd = domain->xprd;
double yprd = domain->yprd;
double zprd = domain->zprd;
double zprd_slab = zprd*slab_volfactor;
double deltaf;
double rgs = (cutoff_lj*g_ewald_6);
rgs *= rgs;
double rgs_inv = 1.0/rgs;
deltaf = csum/sqrt(natoms*xprd*yprd*zprd_slab*cutoff_lj)*sqrt(MY_PI)*pow(g_ewald_6, 5)*
exp(-rgs)*(1+rgs_inv*(3+rgs_inv*(6+rgs_inv*6)));
return deltaf;
}
/* ----------------------------------------------------------------------
Compyute the modified (hockney-eastwood) coulomb green function
---------------------------------------------------------------------- */
void PPPMDisp::compute_gf()
{
int k,l,m,n;
double *prd;
if (triclinic == 0) prd = domain->prd;
else prd = domain->prd_lamda;
double xprd = prd[0];
double yprd = prd[1];
double zprd = prd[2];
double zprd_slab = zprd*slab_volfactor;
volume = xprd * yprd * zprd_slab;
double unitkx = (2.0*MY_PI/xprd);
double unitky = (2.0*MY_PI/yprd);
double unitkz = (2.0*MY_PI/zprd_slab);
int kper,lper,mper;
double snx,sny,snz,snx2,sny2,snz2;
double sqk;
double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz;
double numerator,denominator;
n = 0;
for (m = nzlo_fft; m <= nzhi_fft; m++) {
mper = m - nz_pppm*(2*m/nz_pppm);
qz = unitkz*mper;
snz = sin(0.5*qz*zprd_slab/nz_pppm);
snz2 = snz*snz;
sz = exp(-0.25*pow(qz/g_ewald,2.0));
wz = 1.0;
argz = 0.5*qz*zprd_slab/nz_pppm;
if (argz != 0.0) wz = pow(sin(argz)/argz,order);
wz *= wz;
for (l = nylo_fft; l <= nyhi_fft; l++) {
lper = l - ny_pppm*(2*l/ny_pppm);
qy = unitky*lper;
sny = sin(0.5*qy*yprd/ny_pppm);
sny2 = sny*sny;
sy = exp(-0.25*pow(qy/g_ewald,2.0));
wy = 1.0;
argy = 0.5*qy*yprd/ny_pppm;
if (argy != 0.0) wy = pow(sin(argy)/argy,order);
wy *= wy;
for (k = nxlo_fft; k <= nxhi_fft; k++) {
kper = k - nx_pppm*(2*k/nx_pppm);
qx = unitkx*kper;
snx = sin(0.5*qx*xprd/nx_pppm);
snx2 = snx*snx;
sx = exp(-0.25*pow(qx/g_ewald,2.0));
wx = 1.0;
argx = 0.5*qx*xprd/nx_pppm;
if (argx != 0.0) wx = pow(sin(argx)/argx,order);
wx *= wx;
sqk = pow(qx,2.0) + pow(qy,2.0) + pow(qz,2.0);
if (sqk != 0.0) {
numerator = 4.0*MY_PI/sqk;
denominator = gf_denom(snx2,sny2,snz2, gf_b, order);
greensfn[n++] = numerator*sx*sy*sz*wx*wy*wz/denominator;
} else greensfn[n++] = 0.0;
}
}
}
}
/* ----------------------------------------------------------------------
compute self force coefficients for ad-differentiation scheme
and Coulomb interaction
------------------------------------------------------------------------- */
void PPPMDisp::compute_sf_precoeff(int nxp, int nyp, int nzp, int ord,
int nxlo_ft, int nylo_ft, int nzlo_ft,
int nxhi_ft, int nyhi_ft, int nzhi_ft,
double *sf_pre1, double *sf_pre2, double *sf_pre3,
double *sf_pre4, double *sf_pre5, double *sf_pre6)
{
int i,k,l,m,n;
double *prd;
// volume-dependent factors
// adjust z dimension for 2d slab PPPM
// z dimension for 3d PPPM is zprd since slab_volfactor = 1.0
if (triclinic == 0) prd = domain->prd;
else prd = domain->prd_lamda;
double xprd = prd[0];
double yprd = prd[1];
double zprd = prd[2];
double zprd_slab = zprd*slab_volfactor;
double unitkx = (2.0*MY_PI/xprd);
double unitky = (2.0*MY_PI/yprd);
double unitkz = (2.0*MY_PI/zprd_slab);
int nx,ny,nz,kper,lper,mper;
double argx,argy,argz;
double wx0[5],wy0[5],wz0[5],wx1[5],wy1[5],wz1[5],wx2[5],wy2[5],wz2[5];
double qx0,qy0,qz0,qx1,qy1,qz1,qx2,qy2,qz2;
double u0,u1,u2,u3,u4,u5,u6;
double sum1,sum2,sum3,sum4,sum5,sum6;
int nb = 2;
n = 0;
for (m = nzlo_ft; m <= nzhi_ft; m++) {
mper = m - nzp*(2*m/nzp);
for (l = nylo_ft; l <= nyhi_ft; l++) {
lper = l - nyp*(2*l/nyp);
for (k = nxlo_ft; k <= nxhi_ft; k++) {
kper = k - nxp*(2*k/nxp);
sum1 = sum2 = sum3 = sum4 = sum5 = sum6 = 0.0;
for (i = -nb; i <= nb; i++) {
qx0 = unitkx*(kper+nxp*i);
qx1 = unitkx*(kper+nxp*(i+1));
qx2 = unitkx*(kper+nxp*(i+2));
wx0[i+2] = 1.0;
wx1[i+2] = 1.0;
wx2[i+2] = 1.0;
argx = 0.5*qx0*xprd/nxp;
if (argx != 0.0) wx0[i+2] = pow(sin(argx)/argx,ord);
argx = 0.5*qx1*xprd/nxp;
if (argx != 0.0) wx1[i+2] = pow(sin(argx)/argx,ord);
argx = 0.5*qx2*xprd/nxp;
if (argx != 0.0) wx2[i+2] = pow(sin(argx)/argx,ord);
qy0 = unitky*(lper+nyp*i);
qy1 = unitky*(lper+nyp*(i+1));
qy2 = unitky*(lper+nyp*(i+2));
wy0[i+2] = 1.0;
wy1[i+2] = 1.0;
wy2[i+2] = 1.0;
argy = 0.5*qy0*yprd/nyp;
if (argy != 0.0) wy0[i+2] = pow(sin(argy)/argy,ord);
argy = 0.5*qy1*yprd/nyp;
if (argy != 0.0) wy1[i+2] = pow(sin(argy)/argy,ord);
argy = 0.5*qy2*yprd/nyp;
if (argy != 0.0) wy2[i+2] = pow(sin(argy)/argy,ord);
qz0 = unitkz*(mper+nzp*i);
qz1 = unitkz*(mper+nzp*(i+1));
qz2 = unitkz*(mper+nzp*(i+2));
wz0[i+2] = 1.0;
wz1[i+2] = 1.0;
wz2[i+2] = 1.0;
argz = 0.5*qz0*zprd_slab/nzp;
if (argz != 0.0) wz0[i+2] = pow(sin(argz)/argz,ord);
argz = 0.5*qz1*zprd_slab/nzp;
if (argz != 0.0) wz1[i+2] = pow(sin(argz)/argz,ord);
argz = 0.5*qz2*zprd_slab/nzp;
if (argz != 0.0) wz2[i+2] = pow(sin(argz)/argz,ord);
}
for (nx = 0; nx <= 4; nx++) {
for (ny = 0; ny <= 4; ny++) {
for (nz = 0; nz <= 4; nz++) {
u0 = wx0[nx]*wy0[ny]*wz0[nz];
u1 = wx1[nx]*wy0[ny]*wz0[nz];
u2 = wx2[nx]*wy0[ny]*wz0[nz];
u3 = wx0[nx]*wy1[ny]*wz0[nz];
u4 = wx0[nx]*wy2[ny]*wz0[nz];
u5 = wx0[nx]*wy0[ny]*wz1[nz];
u6 = wx0[nx]*wy0[ny]*wz2[nz];
sum1 += u0*u1;
sum2 += u0*u2;
sum3 += u0*u3;
sum4 += u0*u4;
sum5 += u0*u5;
sum6 += u0*u6;
}
}
}
// store values
sf_pre1[n] = sum1;
sf_pre2[n] = sum2;
sf_pre3[n] = sum3;
sf_pre4[n] = sum4;
sf_pre5[n] = sum5;
sf_pre6[n++] = sum6;
}
}
}
}
/* ----------------------------------------------------------------------
Compute the modified (hockney-eastwood) dispersion green function
---------------------------------------------------------------------- */
void PPPMDisp::compute_gf_6()
{
double *prd;
int k,l,m,n;
// volume-dependent factors
// adjust z dimension for 2d slab PPPM
// z dimension for 3d PPPM is zprd since slab_volfactor = 1.0
if (triclinic == 0) prd = domain->prd;
else prd = domain->prd_lamda;
double xprd = prd[0];
double yprd = prd[1];
double zprd = prd[2];
double zprd_slab = zprd*slab_volfactor;
double unitkx = (2.0*MY_PI/xprd);
double unitky = (2.0*MY_PI/yprd);
double unitkz = (2.0*MY_PI/zprd_slab);
int kper,lper,mper;
double sqk;
double snx,sny,snz,snx2,sny2,snz2;
double argx,argy,argz,wx,wy,wz,sx,sy,sz;
double qx,qy,qz;
double rtsqk, term;
double numerator,denominator;
double inv2ew = 2*g_ewald_6;
inv2ew = 1/inv2ew;
double rtpi = sqrt(MY_PI);
numerator = -MY_PI*rtpi*g_ewald_6*g_ewald_6*g_ewald_6/(3.0);
n = 0;
for (m = nzlo_fft_6; m <= nzhi_fft_6; m++) {
mper = m - nz_pppm_6*(2*m/nz_pppm_6);
qz = unitkz*mper;
snz = sin(0.5*unitkz*mper*zprd_slab/nz_pppm_6);
snz2 = snz*snz;
sz = exp(-qz*qz*inv2ew*inv2ew);
wz = 1.0;
argz = 0.5*qz*zprd_slab/nz_pppm_6;
if (argz != 0.0) wz = pow(sin(argz)/argz,order_6);
wz *= wz;
for (l = nylo_fft_6; l <= nyhi_fft_6; l++) {
lper = l - ny_pppm_6*(2*l/ny_pppm_6);
qy = unitky*lper;
sny = sin(0.5*unitky*lper*yprd/ny_pppm_6);
sny2 = sny*sny;
sy = exp(-qy*qy*inv2ew*inv2ew);
wy = 1.0;
argy = 0.5*qy*yprd/ny_pppm_6;
if (argy != 0.0) wy = pow(sin(argy)/argy,order_6);
wy *= wy;
for (k = nxlo_fft_6; k <= nxhi_fft_6; k++) {
kper = k - nx_pppm_6*(2*k/nx_pppm_6);
qx = unitkx*kper;
snx = sin(0.5*unitkx*kper*xprd/nx_pppm_6);
snx2 = snx*snx;
sx = exp(-qx*qx*inv2ew*inv2ew);
wx = 1.0;
argx = 0.5*qx*xprd/nx_pppm_6;
if (argx != 0.0) wx = pow(sin(argx)/argx,order_6);
wx *= wx;
sqk = pow(qx,2.0) + pow(qy,2.0) + pow(qz,2.0);
if (sqk != 0.0) {
denominator = gf_denom(snx2,sny2,snz2, gf_b_6, order_6);
rtsqk = sqrt(sqk);
term = (1-2*sqk*inv2ew*inv2ew)*sx*sy*sz +
2*sqk*rtsqk*inv2ew*inv2ew*inv2ew*rtpi*erfc(rtsqk*inv2ew);
greensfn_6[n++] = numerator*term*wx*wy*wz/denominator;
} else greensfn_6[n++] = 0.0;
}
}
}
}
/* ----------------------------------------------------------------------
compute self force coefficients for ad-differentiation scheme
and Coulomb interaction
------------------------------------------------------------------------- */
void PPPMDisp::compute_sf_coeff()
{
int i,k,l,m,n;
double *prd;
if (triclinic == 0) prd = domain->prd;
else prd = domain->prd_lamda;
double xprd = prd[0];
double yprd = prd[1];
double zprd = prd[2];
double zprd_slab = zprd*slab_volfactor;
volume = xprd * yprd * zprd_slab;
for (i = 0; i <= 5; i++) sf_coeff[i] = 0.0;
n = 0;
for (m = nzlo_fft; m <= nzhi_fft; m++) {
for (l = nylo_fft; l <= nyhi_fft; l++) {
for (k = nxlo_fft; k <= nxhi_fft; k++) {
sf_coeff[0] += sf_precoeff1[n]*greensfn[n];
sf_coeff[1] += sf_precoeff2[n]*greensfn[n];
sf_coeff[2] += sf_precoeff3[n]*greensfn[n];
sf_coeff[3] += sf_precoeff4[n]*greensfn[n];
sf_coeff[4] += sf_precoeff5[n]*greensfn[n];
sf_coeff[5] += sf_precoeff6[n]*greensfn[n];
++n;
}
}
}
// Compute the coefficients for the self-force correction
double prex, prey, prez;
prex = prey = prez = MY_PI/volume;
prex *= nx_pppm/xprd;
prey *= ny_pppm/yprd;
prez *= nz_pppm/zprd_slab;
sf_coeff[0] *= prex;
sf_coeff[1] *= prex*2;
sf_coeff[2] *= prey;
sf_coeff[3] *= prey*2;
sf_coeff[4] *= prez;
sf_coeff[5] *= prez*2;
// communicate values with other procs
double tmp[6];
MPI_Allreduce(sf_coeff,tmp,6,MPI_DOUBLE,MPI_SUM,world);
for (n = 0; n < 6; n++) sf_coeff[n] = tmp[n];
}
/* ----------------------------------------------------------------------
compute self force coefficients for ad-differentiation scheme
and Dispersion interaction
------------------------------------------------------------------------- */
void PPPMDisp::compute_sf_coeff_6()
{
int i,k,l,m,n;
double *prd;
if (triclinic == 0) prd = domain->prd;
else prd = domain->prd_lamda;
double xprd = prd[0];
double yprd = prd[1];
double zprd = prd[2];
double zprd_slab = zprd*slab_volfactor;
volume = xprd * yprd * zprd_slab;
for (i = 0; i <= 5; i++) sf_coeff_6[i] = 0.0;
n = 0;
for (m = nzlo_fft_6; m <= nzhi_fft_6; m++) {
for (l = nylo_fft_6; l <= nyhi_fft_6; l++) {
for (k = nxlo_fft_6; k <= nxhi_fft_6; k++) {
sf_coeff_6[0] += sf_precoeff1_6[n]*greensfn_6[n];
sf_coeff_6[1] += sf_precoeff2_6[n]*greensfn_6[n];
sf_coeff_6[2] += sf_precoeff3_6[n]*greensfn_6[n];
sf_coeff_6[3] += sf_precoeff4_6[n]*greensfn_6[n];
sf_coeff_6[4] += sf_precoeff5_6[n]*greensfn_6[n];
sf_coeff_6[5] += sf_precoeff6_6[n]*greensfn_6[n];
++n;
}
}
}
// perform multiplication with prefactors
double prex, prey, prez;
prex = prey = prez = MY_PI/volume;
prex *= nx_pppm_6/xprd;
prey *= ny_pppm_6/yprd;
prez *= nz_pppm_6/zprd_slab;
sf_coeff_6[0] *= prex;
sf_coeff_6[1] *= prex*2;
sf_coeff_6[2] *= prey;
sf_coeff_6[3] *= prey*2;
sf_coeff_6[4] *= prez;
sf_coeff_6[5] *= prez*2;
// communicate values with other procs
double tmp[6];
MPI_Allreduce(sf_coeff_6,tmp,6,MPI_DOUBLE,MPI_SUM,world);
for (n = 0; n < 6; n++) sf_coeff_6[n] = tmp[n];
}
/* ----------------------------------------------------------------------
denominator for Hockney-Eastwood Green's function
of x,y,z = sin(kx*deltax/2), etc
inf n-1
S(n,k) = Sum W(k+pi*j)**2 = Sum b(l)*(z*z)**l
j=-inf l=0
= -(z*z)**n /(2n-1)! * (d/dx)**(2n-1) cot(x) at z = sin(x)
gf_b = denominator expansion coeffs
------------------------------------------------------------------------- */
double PPPMDisp::gf_denom(double x, double y, double z, double *g_b, int ord)
{
double sx,sy,sz;
sz = sy = sx = 0.0;
for (int l = ord-1; l >= 0; l--) {
sx = g_b[l] + sx*x;
sy = g_b[l] + sy*y;
sz = g_b[l] + sz*z;
}
double s = sx*sy*sz;
return s*s;
}
/* ----------------------------------------------------------------------
pre-compute Green's function denominator expansion coeffs, Gamma(2n)
------------------------------------------------------------------------- */
void PPPMDisp::compute_gf_denom(double* gf, int ord)
{
int k,l,m;
for (l = 1; l < ord; l++) gf[l] = 0.0;
gf[0] = 1.0;
for (m = 1; m < ord; m++) {
for (l = m; l > 0; l--)
gf[l] = 4.0 * (gf[l]*(l-m)*(l-m-0.5)-gf[l-1]*(l-m-1)*(l-m-1));
gf[0] = 4.0 * (gf[0]*(l-m)*(l-m-0.5));
}
bigint ifact = 1;
for (k = 1; k < 2*ord; k++) ifact *= k;
double gaminv = 1.0/ifact;
for (l = 0; l < ord; l++) gf[l] *= gaminv;
}
/* ----------------------------------------------------------------------
ghost-swap to accumulate full density in brick decomposition
remap density from 3d brick decomposition to FFTdecomposition
for coulomb interaction or dispersion interaction with geometric
mixing
------------------------------------------------------------------------- */
void PPPMDisp::brick2fft(int nxlo_i, int nylo_i, int nzlo_i,
int nxhi_i, int nyhi_i, int nzhi_i,
FFT_SCALAR*** dbrick, FFT_SCALAR* dfft, FFT_SCALAR* work,
LAMMPS_NS::Remap* rmp)
{
int n,ix,iy,iz;
// copy grabs inner portion of density from 3d brick
// remap could be done as pre-stage of FFT,
// but this works optimally on only double values, not complex values
n = 0;
for (iz = nzlo_i; iz <= nzhi_i; iz++)
for (iy = nylo_i; iy <= nyhi_i; iy++)
for (ix = nxlo_i; ix <= nxhi_i; ix++)
dfft[n++] = dbrick[iz][iy][ix];
rmp->perform(dfft,dfft,work);
}
/* ----------------------------------------------------------------------
ghost-swap to accumulate full density in brick decomposition
remap density from 3d brick decomposition to FFTdecomposition
for dispersion with arithmetic mixing rule
------------------------------------------------------------------------- */
void PPPMDisp::brick2fft_a()
{
int n,ix,iy,iz;
// copy grabs inner portion of density from 3d brick
// remap could be done as pre-stage of FFT,
// but this works optimally on only double values, not complex values
n = 0;
for (iz = nzlo_in_6; iz <= nzhi_in_6; iz++)
for (iy = nylo_in_6; iy <= nyhi_in_6; iy++)
for (ix = nxlo_in_6; ix <= nxhi_in_6; ix++) {
density_fft_a0[n] = density_brick_a0[iz][iy][ix];
density_fft_a1[n] = density_brick_a1[iz][iy][ix];
density_fft_a2[n] = density_brick_a2[iz][iy][ix];
density_fft_a3[n] = density_brick_a3[iz][iy][ix];
density_fft_a4[n] = density_brick_a4[iz][iy][ix];
density_fft_a5[n] = density_brick_a5[iz][iy][ix];
density_fft_a6[n++] = density_brick_a6[iz][iy][ix];
}
remap_6->perform(density_fft_a0,density_fft_a0,work1_6);
remap_6->perform(density_fft_a1,density_fft_a1,work1_6);
remap_6->perform(density_fft_a2,density_fft_a2,work1_6);
remap_6->perform(density_fft_a3,density_fft_a3,work1_6);
remap_6->perform(density_fft_a4,density_fft_a4,work1_6);
remap_6->perform(density_fft_a5,density_fft_a5,work1_6);
remap_6->perform(density_fft_a6,density_fft_a6,work1_6);
}
/* ----------------------------------------------------------------------
ghost-swap to accumulate full density in brick decomposition
remap density from 3d brick decomposition to FFTdecomposition
for dispersion with special case
------------------------------------------------------------------------- */
void PPPMDisp::brick2fft_none()
{
int k,n,ix,iy,iz;
// copy grabs inner portion of density from 3d brick
// remap could be done as pre-stage of FFT,
// but this works optimally on only double values, not complex values
for (k = 0; k<nsplit_alloc; k++) {
n = 0;
for (iz = nzlo_in_6; iz <= nzhi_in_6; iz++)
for (iy = nylo_in_6; iy <= nyhi_in_6; iy++)
for (ix = nxlo_in_6; ix <= nxhi_in_6; ix++)
density_fft_none[k][n++] = density_brick_none[k][iz][iy][ix];
}
for (k=0; k<nsplit_alloc; k++)
remap_6->perform(density_fft_none[k],density_fft_none[k],work1_6);
}
/* ----------------------------------------------------------------------
find center grid pt for each of my particles
check that full stencil for the particle will fit in my 3d brick
store central grid pt indices in part2grid array
------------------------------------------------------------------------- */
void PPPMDisp::particle_map(double delx, double dely, double delz,
double sft, int** p2g, int nup, int nlow,
int nxlo, int nylo, int nzlo,
int nxhi, int nyhi, int nzhi)
{
int nx,ny,nz;
double **x = atom->x;
int nlocal = atom->nlocal;
- if (!isfinite(boxlo[0]) || !isfinite(boxlo[1]) || !isfinite(boxlo[2]))
+ if (!ISFINITE(boxlo[0]) || !ISFINITE(boxlo[1]) || !ISFINITE(boxlo[2]))
error->one(FLERR,"Non-numeric box dimensions - simulation unstable");
int flag = 0;
for (int i = 0; i < nlocal; i++) {
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
// current particle coord can be outside global and local box
// add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1
nx = static_cast<int> ((x[i][0]-boxlo[0])*delx+sft) - OFFSET;
ny = static_cast<int> ((x[i][1]-boxlo[1])*dely+sft) - OFFSET;
nz = static_cast<int> ((x[i][2]-boxlo[2])*delz+sft) - OFFSET;
p2g[i][0] = nx;
p2g[i][1] = ny;
p2g[i][2] = nz;
// check that entire stencil around nx,ny,nz will fit in my 3d brick
if (nx+nlow < nxlo || nx+nup > nxhi ||
ny+nlow < nylo || ny+nup > nyhi ||
nz+nlow < nzlo || nz+nup > nzhi)
flag = 1;
}
if (flag) error->one(FLERR,"Out of range atoms - cannot compute PPPMDisp");
}
void PPPMDisp::particle_map_c(double delx, double dely, double delz,
double sft, int** p2g, int nup, int nlow,
int nxlo, int nylo, int nzlo,
int nxhi, int nyhi, int nzhi)
{
particle_map(delx, dely, delz, sft, p2g, nup, nlow,
nxlo, nylo, nzlo, nxhi, nyhi, nzhi);
}
/* ----------------------------------------------------------------------
create discretized "density" on section of global grid due to my particles
density(x,y,z) = charge "density" at grid points of my 3d brick
(nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts)
in global grid
------------------------------------------------------------------------- */
void PPPMDisp::make_rho_c()
{
int l,m,n,nx,ny,nz,mx,my,mz;
FFT_SCALAR dx,dy,dz,x0,y0,z0;
// clear 3d density array
memset(&(density_brick[nzlo_out][nylo_out][nxlo_out]),0,
ngrid*sizeof(FFT_SCALAR));
// loop over my charges, add their contribution to nearby grid points
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
// (dx,dy,dz) = distance to "lower left" grid pt
// (mx,my,mz) = global coords of moving stencil pt
double *q = atom->q;
double **x = atom->x;
int nlocal = atom->nlocal;
for (int i = 0; i < nlocal; i++) {
nx = part2grid[i][0];
ny = part2grid[i][1];
nz = part2grid[i][2];
dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv;
dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv;
dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv;
compute_rho1d(dx,dy,dz, order, rho_coeff, rho1d);
z0 = delvolinv * q[i];
for (n = nlower; n <= nupper; n++) {
mz = n+nz;
y0 = z0*rho1d[2][n];
for (m = nlower; m <= nupper; m++) {
my = m+ny;
x0 = y0*rho1d[1][m];
for (l = nlower; l <= nupper; l++) {
mx = l+nx;
density_brick[mz][my][mx] += x0*rho1d[0][l];
}
}
}
}
}
/* ----------------------------------------------------------------------
create discretized "density" on section of global grid due to my particles
density(x,y,z) = dispersion "density" at grid points of my 3d brick
(nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts)
in global grid --- geometric mixing
------------------------------------------------------------------------- */
void PPPMDisp::make_rho_g()
{
int l,m,n,nx,ny,nz,mx,my,mz;
FFT_SCALAR dx,dy,dz,x0,y0,z0;
// clear 3d density array
memset(&(density_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]),0,
ngrid_6*sizeof(FFT_SCALAR));
// loop over my charges, add their contribution to nearby grid points
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
// (dx,dy,dz) = distance to "lower left" grid pt
// (mx,my,mz) = global coords of moving stencil pt
int type;
double **x = atom->x;
int nlocal = atom->nlocal;
for (int i = 0; i < nlocal; i++) {
nx = part2grid_6[i][0];
ny = part2grid_6[i][1];
nz = part2grid_6[i][2];
dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6;
dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6;
dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6;
compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6);
type = atom->type[i];
z0 = delvolinv_6 * B[type];
for (n = nlower_6; n <= nupper_6; n++) {
mz = n+nz;
y0 = z0*rho1d_6[2][n];
for (m = nlower_6; m <= nupper_6; m++) {
my = m+ny;
x0 = y0*rho1d_6[1][m];
for (l = nlower_6; l <= nupper_6; l++) {
mx = l+nx;
density_brick_g[mz][my][mx] += x0*rho1d_6[0][l];
}
}
}
}
}
/* ----------------------------------------------------------------------
create discretized "density" on section of global grid due to my particles
density(x,y,z) = dispersion "density" at grid points of my 3d brick
(nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts)
in global grid --- arithmetic mixing
------------------------------------------------------------------------- */
void PPPMDisp::make_rho_a()
{
int l,m,n,nx,ny,nz,mx,my,mz;
FFT_SCALAR dx,dy,dz,x0,y0,z0,w;
// clear 3d density array
memset(&(density_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]),0,
ngrid_6*sizeof(FFT_SCALAR));
memset(&(density_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]),0,
ngrid_6*sizeof(FFT_SCALAR));
memset(&(density_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]),0,
ngrid_6*sizeof(FFT_SCALAR));
memset(&(density_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]),0,
ngrid_6*sizeof(FFT_SCALAR));
memset(&(density_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]),0,
ngrid_6*sizeof(FFT_SCALAR));
memset(&(density_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]),0,
ngrid_6*sizeof(FFT_SCALAR));
memset(&(density_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]),0,
ngrid_6*sizeof(FFT_SCALAR));
// loop over my particles, add their contribution to nearby grid points
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
// (dx,dy,dz) = distance to "lower left" grid pt
// (mx,my,mz) = global coords of moving stencil pt
int type;
double **x = atom->x;
int nlocal = atom->nlocal;
for (int i = 0; i < nlocal; i++) {
//do the following for all 4 grids
nx = part2grid_6[i][0];
ny = part2grid_6[i][1];
nz = part2grid_6[i][2];
dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6;
dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6;
dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6;
compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6);
type = atom->type[i];
z0 = delvolinv_6;
for (n = nlower_6; n <= nupper_6; n++) {
mz = n+nz;
y0 = z0*rho1d_6[2][n];
for (m = nlower_6; m <= nupper_6; m++) {
my = m+ny;
x0 = y0*rho1d_6[1][m];
for (l = nlower_6; l <= nupper_6; l++) {
mx = l+nx;
w = x0*rho1d_6[0][l];
density_brick_a0[mz][my][mx] += w*B[7*type];
density_brick_a1[mz][my][mx] += w*B[7*type+1];
density_brick_a2[mz][my][mx] += w*B[7*type+2];
density_brick_a3[mz][my][mx] += w*B[7*type+3];
density_brick_a4[mz][my][mx] += w*B[7*type+4];
density_brick_a5[mz][my][mx] += w*B[7*type+5];
density_brick_a6[mz][my][mx] += w*B[7*type+6];
}
}
}
}
}
/* ----------------------------------------------------------------------
create discretized "density" on section of global grid due to my particles
density(x,y,z) = dispersion "density" at grid points of my 3d brick
(nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts)
in global grid --- case when mixing rules don't apply
------------------------------------------------------------------------- */
void PPPMDisp::make_rho_none()
{
int k,l,m,n,nx,ny,nz,mx,my,mz;
FFT_SCALAR dx,dy,dz,x0,y0,z0,w;
// clear 3d density array
for (k = 0; k < nsplit_alloc; k++)
memset(&(density_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]),0,
ngrid_6*sizeof(FFT_SCALAR));
// loop over my particles, add their contribution to nearby grid points
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
// (dx,dy,dz) = distance to "lower left" grid pt
// (mx,my,mz) = global coords of moving stencil pt
int type;
double **x = atom->x;
int nlocal = atom->nlocal;
for (int i = 0; i < nlocal; i++) {
//do the following for all 4 grids
nx = part2grid_6[i][0];
ny = part2grid_6[i][1];
nz = part2grid_6[i][2];
dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6;
dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6;
dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6;
compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6);
type = atom->type[i];
z0 = delvolinv_6;
for (n = nlower_6; n <= nupper_6; n++) {
mz = n+nz;
y0 = z0*rho1d_6[2][n];
for (m = nlower_6; m <= nupper_6; m++) {
my = m+ny;
x0 = y0*rho1d_6[1][m];
for (l = nlower_6; l <= nupper_6; l++) {
mx = l+nx;
w = x0*rho1d_6[0][l];
for (k = 0; k < nsplit; k++)
density_brick_none[k][mz][my][mx] += w*B[nsplit*type + k];
}
}
}
}
}
/* ----------------------------------------------------------------------
FFT-based Poisson solver for ik differentiation
------------------------------------------------------------------------- */
void PPPMDisp::poisson_ik(FFT_SCALAR* wk1, FFT_SCALAR* wk2,
FFT_SCALAR* dfft, LAMMPS_NS::FFT3d* ft1,LAMMPS_NS::FFT3d* ft2,
int nx_p, int ny_p, int nz_p, int nft,
int nxlo_ft, int nylo_ft, int nzlo_ft,
int nxhi_ft, int nyhi_ft, int nzhi_ft,
int nxlo_i, int nylo_i, int nzlo_i,
int nxhi_i, int nyhi_i, int nzhi_i,
double& egy, double* gfn,
double* kx, double* ky, double* kz,
double* kx2, double* ky2, double* kz2,
FFT_SCALAR*** vx_brick, FFT_SCALAR*** vy_brick, FFT_SCALAR*** vz_brick,
double* vir, double** vcoeff, double** vcoeff2,
FFT_SCALAR*** u_pa, FFT_SCALAR*** v0_pa, FFT_SCALAR*** v1_pa, FFT_SCALAR*** v2_pa,
FFT_SCALAR*** v3_pa, FFT_SCALAR*** v4_pa, FFT_SCALAR*** v5_pa)
{
int i,j,k,n;
double eng;
// transform charge/dispersion density (r -> k)
n = 0;
for (i = 0; i < nft; i++) {
wk1[n++] = dfft[i];
wk1[n++] = ZEROF;
}
ft1->compute(wk1,wk1,1);
// if requested, compute energy and virial contribution
double scaleinv = 1.0/(nx_p*ny_p*nz_p);
double s2 = scaleinv*scaleinv;
if (eflag_global || vflag_global) {
if (vflag_global) {
n = 0;
for (i = 0; i < nft; i++) {
eng = s2 * gfn[i] * (wk1[n]*wk1[n] + wk1[n+1]*wk1[n+1]);
for (j = 0; j < 6; j++) vir[j] += eng*vcoeff[i][j];
if (eflag_global) egy += eng;
n += 2;
}
} else {
n = 0;
for (i = 0; i < nft; i++) {
egy +=
s2 * gfn[i] * (wk1[n]*wk1[n] + wk1[n+1]*wk1[n+1]);
n += 2;
}
}
}
// scale by 1/total-grid-pts to get rho(k)
// multiply by Green's function to get V(k)
n = 0;
for (i = 0; i < nft; i++) {
wk1[n++] *= scaleinv * gfn[i];
wk1[n++] *= scaleinv * gfn[i];
}
// compute gradients of V(r) in each of 3 dims by transformimg -ik*V(k)
// FFT leaves data in 3d brick decomposition
// copy it into inner portion of vdx,vdy,vdz arrays
// x & y direction gradient
n = 0;
for (k = nzlo_ft; k <= nzhi_ft; k++)
for (j = nylo_ft; j <= nyhi_ft; j++)
for (i = nxlo_ft; i <= nxhi_ft; i++) {
wk2[n] = 0.5*(kx[i]-kx2[i])*wk1[n+1] + 0.5*(ky[j]-ky2[j])*wk1[n];
wk2[n+1] = -0.5*(kx[i]-kx2[i])*wk1[n] + 0.5*(ky[j]-ky2[j])*wk1[n+1];
n += 2;
}
ft2->compute(wk2,wk2,-1);
n = 0;
for (k = nzlo_i; k <= nzhi_i; k++)
for (j = nylo_i; j <= nyhi_i; j++)
for (i = nxlo_i; i <= nxhi_i; i++) {
vx_brick[k][j][i] = wk2[n++];
vy_brick[k][j][i] = wk2[n++];
}
if (!eflag_atom) {
// z direction gradient only
n = 0;
for (k = nzlo_ft; k <= nzhi_ft; k++)
for (j = nylo_ft; j <= nyhi_ft; j++)
for (i = nxlo_ft; i <= nxhi_ft; i++) {
wk2[n] = kz[k]*wk1[n+1];
wk2[n+1] = -kz[k]*wk1[n];
n += 2;
}
ft2->compute(wk2,wk2,-1);
n = 0;
for (k = nzlo_i; k <= nzhi_i; k++)
for (j = nylo_i; j <= nyhi_i; j++)
for (i = nxlo_i; i <= nxhi_i; i++) {
vz_brick[k][j][i] = wk2[n];
n += 2;
}
}
else {
// z direction gradient & per-atom energy
n = 0;
for (k = nzlo_ft; k <= nzhi_ft; k++)
for (j = nylo_ft; j <= nyhi_ft; j++)
for (i = nxlo_ft; i <= nxhi_ft; i++) {
wk2[n] = 0.5*(kz[k]-kz2[k])*wk1[n+1] - wk1[n+1];
wk2[n+1] = -0.5*(kz[k]-kz2[k])*wk1[n] + wk1[n];
n += 2;
}
ft2->compute(wk2,wk2,-1);
n = 0;
for (k = nzlo_i; k <= nzhi_i; k++)
for (j = nylo_i; j <= nyhi_i; j++)
for (i = nxlo_i; i <= nxhi_i; i++) {
vz_brick[k][j][i] = wk2[n++];
u_pa[k][j][i] = wk2[n++];;
}
}
if (vflag_atom) poisson_peratom(wk1, wk2, ft2, vcoeff, vcoeff2, nft,
nxlo_i, nylo_i, nzlo_i, nxhi_i, nyhi_i, nzhi_i,
v0_pa, v1_pa, v2_pa, v3_pa, v4_pa, v5_pa);
}
/* ----------------------------------------------------------------------
FFT-based Poisson solver for ad differentiation
------------------------------------------------------------------------- */
void PPPMDisp::poisson_ad(FFT_SCALAR* wk1, FFT_SCALAR* wk2,
FFT_SCALAR* dfft, LAMMPS_NS::FFT3d* ft1,LAMMPS_NS::FFT3d* ft2,
int nx_p, int ny_p, int nz_p, int nft,
int nxlo_ft, int nylo_ft, int nzlo_ft,
int nxhi_ft, int nyhi_ft, int nzhi_ft,
int nxlo_i, int nylo_i, int nzlo_i,
int nxhi_i, int nyhi_i, int nzhi_i,
double& egy, double* gfn,
double* vir, double** vcoeff, double** vcoeff2,
FFT_SCALAR*** u_pa, FFT_SCALAR*** v0_pa, FFT_SCALAR*** v1_pa, FFT_SCALAR*** v2_pa,
FFT_SCALAR*** v3_pa, FFT_SCALAR*** v4_pa, FFT_SCALAR*** v5_pa)
{
int i,j,k,n;
double eng;
// transform charge/dispersion density (r -> k)
n = 0;
for (i = 0; i < nft; i++) {
wk1[n++] = dfft[i];
wk1[n++] = ZEROF;
}
ft1->compute(wk1,wk1,1);
// if requested, compute energy and virial contribution
double scaleinv = 1.0/(nx_p*ny_p*nz_p);
double s2 = scaleinv*scaleinv;
if (eflag_global || vflag_global) {
if (vflag_global) {
n = 0;
for (i = 0; i < nft; i++) {
eng = s2 * gfn[i] * (wk1[n]*wk1[n] + wk1[n+1]*wk1[n+1]);
for (j = 0; j < 6; j++) vir[j] += eng*vcoeff[i][j];
if (eflag_global) egy += eng;
n += 2;
}
} else {
n = 0;
for (i = 0; i < nft; i++) {
egy +=
s2 * gfn[i] * (wk1[n]*wk1[n] + wk1[n+1]*wk1[n+1]);
n += 2;
}
}
}
// scale by 1/total-grid-pts to get rho(k)
// multiply by Green's function to get V(k)
n = 0;
for (i = 0; i < nft; i++) {
wk1[n++] *= scaleinv * gfn[i];
wk1[n++] *= scaleinv * gfn[i];
}
n = 0;
for (k = nzlo_ft; k <= nzhi_ft; k++)
for (j = nylo_ft; j <= nyhi_ft; j++)
for (i = nxlo_ft; i <= nxhi_ft; i++) {
wk2[n] = wk1[n];
wk2[n+1] = wk1[n+1];
n += 2;
}
ft2->compute(wk2,wk2,-1);
n = 0;
for (k = nzlo_i; k <= nzhi_i; k++)
for (j = nylo_i; j <= nyhi_i; j++)
for (i = nxlo_i; i <= nxhi_i; i++) {
u_pa[k][j][i] = wk2[n++];
n++;
}
if (vflag_atom) poisson_peratom(wk1, wk2, ft2, vcoeff, vcoeff2, nft,
nxlo_i, nylo_i, nzlo_i, nxhi_i, nyhi_i, nzhi_i,
v0_pa, v1_pa, v2_pa, v3_pa, v4_pa, v5_pa);
}
/* ----------------------------------------------------------------------
Fourier Transform for per atom virial calculations
------------------------------------------------------------------------- */
void PPPMDisp:: poisson_peratom(FFT_SCALAR* wk1, FFT_SCALAR* wk2, LAMMPS_NS::FFT3d* ft2,
double** vcoeff, double** vcoeff2, int nft,
int nxlo_i, int nylo_i, int nzlo_i,
int nxhi_i, int nyhi_i, int nzhi_i,
FFT_SCALAR*** v0_pa, FFT_SCALAR*** v1_pa, FFT_SCALAR*** v2_pa,
FFT_SCALAR*** v3_pa, FFT_SCALAR*** v4_pa, FFT_SCALAR*** v5_pa)
{
//v0 & v1 term
int n, i, j, k;
n = 0;
for (i = 0; i < nft; i++) {
wk2[n] = wk1[n]*vcoeff[i][0] - wk1[n+1]*vcoeff[i][1];
wk2[n+1] = wk1[n+1]*vcoeff[i][0] + wk1[n]*vcoeff[i][1];
n += 2;
}
ft2->compute(wk2,wk2,-1);
n = 0;
for (k = nzlo_i; k <= nzhi_i; k++)
for (j = nylo_i; j <= nyhi_i; j++)
for (i = nxlo_i; i <= nxhi_i; i++) {
v0_pa[k][j][i] = wk2[n++];
v1_pa[k][j][i] = wk2[n++];
}
//v2 & v3 term
n = 0;
for (i = 0; i < nft; i++) {
wk2[n] = wk1[n]*vcoeff[i][2] - wk1[n+1]*vcoeff2[i][0];
wk2[n+1] = wk1[n+1]*vcoeff[i][2] + wk1[n]*vcoeff2[i][0];
n += 2;
}
ft2->compute(wk2,wk2,-1);
n = 0;
for (k = nzlo_i; k <= nzhi_i; k++)
for (j = nylo_i; j <= nyhi_i; j++)
for (i = nxlo_i; i <= nxhi_i; i++) {
v2_pa[k][j][i] = wk2[n++];
v3_pa[k][j][i] = wk2[n++];
}
//v4 & v5 term
n = 0;
for (i = 0; i < nft; i++) {
wk2[n] = wk1[n]*vcoeff2[i][1] - wk1[n+1]*vcoeff2[i][2];
wk2[n+1] = wk1[n+1]*vcoeff2[i][1] + wk1[n]*vcoeff2[i][2];
n += 2;
}
ft2->compute(wk2,wk2,-1);
n = 0;
for (k = nzlo_i; k <= nzhi_i; k++)
for (j = nylo_i; j <= nyhi_i; j++)
for (i = nxlo_i; i <= nxhi_i; i++) {
v4_pa[k][j][i] = wk2[n++];
v5_pa[k][j][i] = wk2[n++];
}
}
/* ----------------------------------------------------------------------
Poisson solver for one mesh with 2 different dispersion densities
for ik scheme
------------------------------------------------------------------------- */
void PPPMDisp::poisson_2s_ik(FFT_SCALAR* dfft_1, FFT_SCALAR* dfft_2,
FFT_SCALAR*** vxbrick_1, FFT_SCALAR*** vybrick_1, FFT_SCALAR*** vzbrick_1,
FFT_SCALAR*** vxbrick_2, FFT_SCALAR*** vybrick_2, FFT_SCALAR*** vzbrick_2,
FFT_SCALAR*** u_pa_1, FFT_SCALAR*** v0_pa_1, FFT_SCALAR*** v1_pa_1, FFT_SCALAR*** v2_pa_1,
FFT_SCALAR*** v3_pa_1, FFT_SCALAR*** v4_pa_1, FFT_SCALAR*** v5_pa_1,
FFT_SCALAR*** u_pa_2, FFT_SCALAR*** v0_pa_2, FFT_SCALAR*** v1_pa_2, FFT_SCALAR*** v2_pa_2,
FFT_SCALAR*** v3_pa_2, FFT_SCALAR*** v4_pa_2, FFT_SCALAR*** v5_pa_2)
{
int i,j,k,n;
double eng;
double scaleinv = 1.0/(nx_pppm_6*ny_pppm_6*nz_pppm_6);
// transform charge/dispersion density (r -> k)
// only one tansform required when energies and pressures do not
// need to be calculated
if (eflag_global + vflag_global == 0) {
n = 0;
for (i = 0; i < nfft_6; i++) {
work1_6[n++] = dfft_1[i];
work1_6[n++] = dfft_2[i];
}
fft1_6->compute(work1_6,work1_6,1);
}
// two transforms are required when energies and pressures are
// calculated
else {
n = 0;
for (i = 0; i < nfft_6; i++) {
work1_6[n] = dfft_1[i];
work2_6[n++] = ZEROF;
work1_6[n] = ZEROF;
work2_6[n++] = dfft_2[i];
}
fft1_6->compute(work1_6,work1_6,1);
fft1_6->compute(work2_6,work2_6,1);
double s2 = scaleinv*scaleinv;
if (vflag_global) {
n = 0;
for (i = 0; i < nfft_6; i++) {
eng = 2 * s2 * greensfn_6[i] * (work1_6[n]*work2_6[n+1] - work1_6[n+1]*work2_6[n]);
for (j = 0; j < 6; j++) virial_6[j] += eng*vg_6[i][j];
if (eflag_global)energy_6 += eng;
n += 2;
}
} else {
n = 0;
for (i = 0; i < nfft_6; i++) {
energy_6 +=
2 * s2 * greensfn_6[i] * (work1_6[n]*work2_6[n+1] - work1_6[n+1]*work2_6[n]);
n += 2;
}
}
// unify the two transformed vectors for efficient calculations later
for ( i = 0; i < 2*nfft_6; i++) {
work1_6[i] += work2_6[i];
}
}
n = 0;
for (i = 0; i < nfft_6; i++) {
work1_6[n++] *= scaleinv * greensfn_6[i];
work1_6[n++] *= scaleinv * greensfn_6[i];
}
// compute gradients of V(r) in each of 3 dims by transformimg -ik*V(k)
// FFT leaves data in 3d brick decomposition
// copy it into inner portion of vdx,vdy,vdz arrays
// x direction gradient
n = 0;
for (k = nzlo_fft_6; k <= nzhi_fft_6; k++)
for (j = nylo_fft_6; j <= nyhi_fft_6; j++)
for (i = nxlo_fft_6; i <= nxhi_fft_6; i++) {
work2_6[n] = 0.5*(fkx_6[i]-fkx2_6[i])*work1_6[n+1];
work2_6[n+1] = -0.5*(fkx_6[i]-fkx2_6[i])*work1_6[n];
n += 2;
}
fft2_6->compute(work2_6,work2_6,-1);
n = 0;
for (k = nzlo_in_6; k <= nzhi_in_6; k++)
for (j = nylo_in_6; j <= nyhi_in_6; j++)
for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
vxbrick_1[k][j][i] = work2_6[n++];
vxbrick_2[k][j][i] = work2_6[n++];
}
// y direction gradient
n = 0;
for (k = nzlo_fft_6; k <= nzhi_fft_6; k++)
for (j = nylo_fft_6; j <= nyhi_fft_6; j++)
for (i = nxlo_fft_6; i <= nxhi_fft_6; i++) {
work2_6[n] = 0.5*(fky_6[j]-fky2_6[j])*work1_6[n+1];
work2_6[n+1] = -0.5*(fky_6[j]-fky2_6[j])*work1_6[n];
n += 2;
}
fft2_6->compute(work2_6,work2_6,-1);
n = 0;
for (k = nzlo_in_6; k <= nzhi_in_6; k++)
for (j = nylo_in_6; j <= nyhi_in_6; j++)
for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
vybrick_1[k][j][i] = work2_6[n++];
vybrick_2[k][j][i] = work2_6[n++];
}
// z direction gradient
n = 0;
for (k = nzlo_fft_6; k <= nzhi_fft_6; k++)
for (j = nylo_fft_6; j <= nyhi_fft_6; j++)
for (i = nxlo_fft_6; i <= nxhi_fft_6; i++) {
work2_6[n] = 0.5*(fkz_6[k]-fkz2_6[k])*work1_6[n+1];
work2_6[n+1] = -0.5*(fkz_6[k]-fkz2_6[k])*work1_6[n];
n += 2;
}
fft2_6->compute(work2_6,work2_6,-1);
n = 0;
for (k = nzlo_in_6; k <= nzhi_in_6; k++)
for (j = nylo_in_6; j <= nyhi_in_6; j++)
for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
vzbrick_1[k][j][i] = work2_6[n++];
vzbrick_2[k][j][i] = work2_6[n++];
}
//Per-atom energy
if (eflag_atom) {
n = 0;
for (i = 0; i < nfft_6; i++) {
work2_6[n] = work1_6[n];
work2_6[n+1] = work1_6[n+1];
n += 2;
}
fft2_6->compute(work2_6,work2_6,-1);
n = 0;
for (k = nzlo_in_6; k <= nzhi_in_6; k++)
for (j = nylo_in_6; j <= nyhi_in_6; j++)
for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
u_pa_1[k][j][i] = work2_6[n++];
u_pa_2[k][j][i] = work2_6[n++];
}
}
if (vflag_atom) poisson_2s_peratom(v0_pa_1, v1_pa_1, v2_pa_1, v3_pa_1, v4_pa_1, v5_pa_1,
v0_pa_2, v1_pa_2, v2_pa_2, v3_pa_2, v4_pa_2, v5_pa_2);
}
/* ----------------------------------------------------------------------
Poisson solver for one mesh with 2 different dispersion densities
for ik scheme
------------------------------------------------------------------------- */
void PPPMDisp::poisson_none_ik(int n1, int n2,FFT_SCALAR* dfft_1, FFT_SCALAR* dfft_2,
FFT_SCALAR*** vxbrick_1, FFT_SCALAR*** vybrick_1, FFT_SCALAR*** vzbrick_1,
FFT_SCALAR*** vxbrick_2, FFT_SCALAR*** vybrick_2, FFT_SCALAR*** vzbrick_2,
FFT_SCALAR**** u_pa, FFT_SCALAR**** v0_pa, FFT_SCALAR**** v1_pa, FFT_SCALAR**** v2_pa,
FFT_SCALAR**** v3_pa, FFT_SCALAR**** v4_pa, FFT_SCALAR**** v5_pa)
{
int i,j,k,n;
double eng;
double scaleinv = 1.0/(nx_pppm_6*ny_pppm_6*nz_pppm_6);
// transform charge/dispersion density (r -> k)
// only one tansform required when energies and pressures do not
// need to be calculated
if (eflag_global + vflag_global == 0) {
n = 0;
for (i = 0; i < nfft_6; i++) {
work1_6[n++] = dfft_1[i];
work1_6[n++] = dfft_2[i];
}
fft1_6->compute(work1_6,work1_6,1);
}
// two transforms are required when energies and pressures are
// calculated
else {
n = 0;
for (i = 0; i < nfft_6; i++) {
work1_6[n] = dfft_1[i];
work2_6[n++] = ZEROF;
work1_6[n] = ZEROF;
work2_6[n++] = dfft_2[i];
}
fft1_6->compute(work1_6,work1_6,1);
fft1_6->compute(work2_6,work2_6,1);
double s2 = scaleinv*scaleinv;
if (vflag_global) {
n = 0;
for (i = 0; i < nfft_6; i++) {
eng = s2 * greensfn_6[i] * (B[n1]*(work1_6[n]*work1_6[n] + work1_6[n+1]*work1_6[n+1]) + B[n2]*(work2_6[n]*work2_6[n] + work2_6[n+1]*work2_6[n+1]));
for (j = 0; j < 6; j++) virial_6[j] += eng*vg_6[i][j];
if (eflag_global)energy_6 += eng;
n += 2;
}
} else {
n = 0;
for (i = 0; i < nfft_6; i++) {
energy_6 +=
s2 * greensfn_6[i] * (B[n1]*(work1_6[n]*work1_6[n] + work1_6[n+1]*work1_6[n+1]) + B[n2]*(work2_6[n]*work2_6[n] + work2_6[n+1]*work2_6[n+1]));
n += 2;
}
}
// unify the two transformed vectors for efficient calculations later
for ( i = 0; i < 2*nfft_6; i++) {
work1_6[i] += work2_6[i];
}
}
n = 0;
for (i = 0; i < nfft_6; i++) {
work1_6[n++] *= scaleinv * greensfn_6[i];
work1_6[n++] *= scaleinv * greensfn_6[i];
}
// compute gradients of V(r) in each of 3 dims by transformimg -ik*V(k)
// FFT leaves data in 3d brick decomposition
// copy it into inner portion of vdx,vdy,vdz arrays
// x direction gradient
n = 0;
for (k = nzlo_fft_6; k <= nzhi_fft_6; k++)
for (j = nylo_fft_6; j <= nyhi_fft_6; j++)
for (i = nxlo_fft_6; i <= nxhi_fft_6; i++) {
work2_6[n] = 0.5*(fkx_6[i]-fkx2_6[i])*work1_6[n+1];
work2_6[n+1] = -0.5*(fkx_6[i]-fkx2_6[i])*work1_6[n];
n += 2;
}
fft2_6->compute(work2_6,work2_6,-1);
n = 0;
for (k = nzlo_in_6; k <= nzhi_in_6; k++)
for (j = nylo_in_6; j <= nyhi_in_6; j++)
for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
vxbrick_1[k][j][i] = B[n1]*work2_6[n++];
vxbrick_2[k][j][i] = B[n2]*work2_6[n++];
}
// y direction gradient
n = 0;
for (k = nzlo_fft_6; k <= nzhi_fft_6; k++)
for (j = nylo_fft_6; j <= nyhi_fft_6; j++)
for (i = nxlo_fft_6; i <= nxhi_fft_6; i++) {
work2_6[n] = 0.5*(fky_6[j]-fky2_6[j])*work1_6[n+1];
work2_6[n+1] = -0.5*(fky_6[j]-fky2_6[j])*work1_6[n];
n += 2;
}
fft2_6->compute(work2_6,work2_6,-1);
n = 0;
for (k = nzlo_in_6; k <= nzhi_in_6; k++)
for (j = nylo_in_6; j <= nyhi_in_6; j++)
for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
vybrick_1[k][j][i] = B[n1]*work2_6[n++];
vybrick_2[k][j][i] = B[n2]*work2_6[n++];
}
// z direction gradient
n = 0;
for (k = nzlo_fft_6; k <= nzhi_fft_6; k++)
for (j = nylo_fft_6; j <= nyhi_fft_6; j++)
for (i = nxlo_fft_6; i <= nxhi_fft_6; i++) {
work2_6[n] = 0.5*(fkz_6[k]-fkz2_6[k])*work1_6[n+1];
work2_6[n+1] = -0.5*(fkz_6[k]-fkz2_6[k])*work1_6[n];
n += 2;
}
fft2_6->compute(work2_6,work2_6,-1);
n = 0;
for (k = nzlo_in_6; k <= nzhi_in_6; k++)
for (j = nylo_in_6; j <= nyhi_in_6; j++)
for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
vzbrick_1[k][j][i] = B[n1]*work2_6[n++];
vzbrick_2[k][j][i] = B[n2]*work2_6[n++];
}
//Per-atom energy
if (eflag_atom) {
n = 0;
for (i = 0; i < nfft_6; i++) {
work2_6[n] = work1_6[n];
work2_6[n+1] = work1_6[n+1];
n += 2;
}
fft2_6->compute(work2_6,work2_6,-1);
n = 0;
for (k = nzlo_in_6; k <= nzhi_in_6; k++)
for (j = nylo_in_6; j <= nyhi_in_6; j++)
for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
u_pa[n1][k][j][i] = B[n1]*work2_6[n++];
u_pa[n2][k][j][i] = B[n2]*work2_6[n++];
}
}
if (vflag_atom) poisson_none_peratom(n1,n2,
v0_pa[n1], v1_pa[n1], v2_pa[n1], v3_pa[n1], v4_pa[n1], v5_pa[n1],
v0_pa[n2], v1_pa[n2], v2_pa[n2], v3_pa[n2], v4_pa[n2], v5_pa[n2]);
}
/* ----------------------------------------------------------------------
Poisson solver for one mesh with 2 different dispersion densities
for ad scheme
------------------------------------------------------------------------- */
void PPPMDisp::poisson_2s_ad(FFT_SCALAR* dfft_1, FFT_SCALAR* dfft_2,
FFT_SCALAR*** u_pa_1, FFT_SCALAR*** v0_pa_1, FFT_SCALAR*** v1_pa_1, FFT_SCALAR*** v2_pa_1,
FFT_SCALAR*** v3_pa_1, FFT_SCALAR*** v4_pa_1, FFT_SCALAR*** v5_pa_1,
FFT_SCALAR*** u_pa_2, FFT_SCALAR*** v0_pa_2, FFT_SCALAR*** v1_pa_2, FFT_SCALAR*** v2_pa_2,
FFT_SCALAR*** v3_pa_2, FFT_SCALAR*** v4_pa_2, FFT_SCALAR*** v5_pa_2)
{
int i,j,k,n;
double eng;
double scaleinv = 1.0/(nx_pppm_6*ny_pppm_6*nz_pppm_6);
// transform charge/dispersion density (r -> k)
// only one tansform required when energies and pressures do not
// need to be calculated
if (eflag_global + vflag_global == 0) {
n = 0;
for (i = 0; i < nfft_6; i++) {
work1_6[n++] = dfft_1[i];
work1_6[n++] = dfft_2[i];
}
fft1_6->compute(work1_6,work1_6,1);
}
// two transforms are required when energies and pressures are
// calculated
else {
n = 0;
for (i = 0; i < nfft_6; i++) {
work1_6[n] = dfft_1[i];
work2_6[n++] = ZEROF;
work1_6[n] = ZEROF;
work2_6[n++] = dfft_2[i];
}
fft1_6->compute(work1_6,work1_6,1);
fft1_6->compute(work2_6,work2_6,1);
double s2 = scaleinv*scaleinv;
if (vflag_global) {
n = 0;
for (i = 0; i < nfft_6; i++) {
eng = 2 * s2 * greensfn_6[i] * (work1_6[n]*work2_6[n+1] - work1_6[n+1]*work2_6[n]);
for (j = 0; j < 6; j++) virial_6[j] += eng*vg_6[i][j];
if (eflag_global)energy_6 += eng;
n += 2;
}
} else {
n = 0;
for (i = 0; i < nfft_6; i++) {
energy_6 +=
2 * s2 * greensfn_6[i] * (work1_6[n]*work2_6[n+1] - work1_6[n+1]*work2_6[n]);
n += 2;
}
}
// unify the two transformed vectors for efficient calculations later
for ( i = 0; i < 2*nfft_6; i++) {
work1_6[i] += work2_6[i];
}
}
n = 0;
for (i = 0; i < nfft_6; i++) {
work1_6[n++] *= scaleinv * greensfn_6[i];
work1_6[n++] *= scaleinv * greensfn_6[i];
}
n = 0;
for (i = 0; i < nfft_6; i++) {
work2_6[n] = work1_6[n];
work2_6[n+1] = work1_6[n+1];
n += 2;
}
fft2_6->compute(work2_6,work2_6,-1);
n = 0;
for (k = nzlo_in_6; k <= nzhi_in_6; k++)
for (j = nylo_in_6; j <= nyhi_in_6; j++)
for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
u_pa_1[k][j][i] = work2_6[n++];
u_pa_2[k][j][i] = work2_6[n++];
}
if (vflag_atom) poisson_2s_peratom(v0_pa_1, v1_pa_1, v2_pa_1, v3_pa_1, v4_pa_1, v5_pa_1,
v0_pa_2, v1_pa_2, v2_pa_2, v3_pa_2, v4_pa_2, v5_pa_2);
}
/* ----------------------------------------------------------------------
Poisson solver for one mesh with 2 different dispersion densities
for ad scheme
------------------------------------------------------------------------- */
void PPPMDisp::poisson_none_ad(int n1, int n2, FFT_SCALAR* dfft_1, FFT_SCALAR* dfft_2,
FFT_SCALAR*** u_pa_1, FFT_SCALAR*** u_pa_2,
FFT_SCALAR**** v0_pa, FFT_SCALAR**** v1_pa, FFT_SCALAR**** v2_pa,
FFT_SCALAR**** v3_pa, FFT_SCALAR**** v4_pa, FFT_SCALAR**** v5_pa)
{
int i,j,k,n;
double eng;
double scaleinv = 1.0/(nx_pppm_6*ny_pppm_6*nz_pppm_6);
// transform charge/dispersion density (r -> k)
// only one tansform required when energies and pressures do not
// need to be calculated
if (eflag_global + vflag_global == 0) {
n = 0;
for (i = 0; i < nfft_6; i++) {
work1_6[n++] = dfft_1[i];
work1_6[n++] = dfft_2[i];
}
fft1_6->compute(work1_6,work1_6,1);
}
// two transforms are required when energies and pressures are
// calculated
else {
n = 0;
for (i = 0; i < nfft_6; i++) {
work1_6[n] = dfft_1[i];
work2_6[n++] = ZEROF;
work1_6[n] = ZEROF;
work2_6[n++] = dfft_2[i];
}
fft1_6->compute(work1_6,work1_6,1);
fft1_6->compute(work2_6,work2_6,1);
double s2 = scaleinv*scaleinv;
if (vflag_global) {
n = 0;
for (i = 0; i < nfft_6; i++) {
eng = s2 * greensfn_6[i] * (B[n1]*(work1_6[n]*work1_6[n] + work1_6[n+1]*work1_6[n+1]) + B[n2]*(work2_6[n]*work2_6[n] + work2_6[n+1]*work2_6[n+1]));
for (j = 0; j < 6; j++) virial_6[j] += eng*vg_6[i][j];
if (eflag_global)energy_6 += eng;
n += 2;
}
} else {
n = 0;
for (i = 0; i < nfft_6; i++) {
energy_6 +=
s2 * greensfn_6[i] * (B[n1]*(work1_6[n]*work1_6[n] + work1_6[n+1]*work1_6[n+1]) + B[n2]*(work2_6[n]*work2_6[n] + work2_6[n+1]*work2_6[n+1]));
n += 2;
}
}
// unify the two transformed vectors for efficient calculations later
for ( i = 0; i < 2*nfft_6; i++) {
work1_6[i] += work2_6[i];
}
}
n = 0;
for (i = 0; i < nfft_6; i++) {
work1_6[n++] *= scaleinv * greensfn_6[i];
work1_6[n++] *= scaleinv * greensfn_6[i];
}
n = 0;
for (i = 0; i < nfft_6; i++) {
work2_6[n] = work1_6[n];
work2_6[n+1] = work1_6[n+1];
n += 2;
}
fft2_6->compute(work2_6,work2_6,-1);
n = 0;
for (k = nzlo_in_6; k <= nzhi_in_6; k++)
for (j = nylo_in_6; j <= nyhi_in_6; j++)
for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
u_pa_1[k][j][i] = B[n1]*work2_6[n++];
u_pa_2[k][j][i] = B[n2]*work2_6[n++];
}
if (vflag_atom) poisson_none_peratom(n1,n2,
v0_pa[n1], v1_pa[n1], v2_pa[n1], v3_pa[n1], v4_pa[n1], v5_pa[n1],
v0_pa[n2], v1_pa[n2], v2_pa[n2], v3_pa[n2], v4_pa[n2], v5_pa[n2]);
}
/* ----------------------------------------------------------------------
Fourier Transform for per atom virial calculations
------------------------------------------------------------------------- */
void PPPMDisp::poisson_2s_peratom(FFT_SCALAR*** v0_pa_1, FFT_SCALAR*** v1_pa_1, FFT_SCALAR*** v2_pa_1,
FFT_SCALAR*** v3_pa_1, FFT_SCALAR*** v4_pa_1, FFT_SCALAR*** v5_pa_1,
FFT_SCALAR*** v0_pa_2, FFT_SCALAR*** v1_pa_2, FFT_SCALAR*** v2_pa_2,
FFT_SCALAR*** v3_pa_2, FFT_SCALAR*** v4_pa_2, FFT_SCALAR*** v5_pa_2)
{
//Compute first virial term v0
int n, i, j, k;
n = 0;
for (i = 0; i < nfft_6; i++) {
work2_6[n] = work1_6[n]*vg_6[i][0];
work2_6[n+1] = work1_6[n+1]*vg_6[i][0];
n += 2;
}
fft2_6->compute(work2_6,work2_6,-1);
n = 0;
for (k = nzlo_in_6; k <= nzhi_in_6; k++)
for (j = nylo_in_6; j <= nyhi_in_6; j++)
for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
v0_pa_1[k][j][i] = work2_6[n++];
v0_pa_2[k][j][i] = work2_6[n++];
}
//Compute second virial term v1
n = 0;
for (i = 0; i < nfft_6; i++) {
work2_6[n] = work1_6[n]*vg_6[i][1];
work2_6[n+1] = work1_6[n+1]*vg_6[i][1];
n += 2;
}
fft2_6->compute(work2_6,work2_6,-1);
n = 0;
for (k = nzlo_in_6; k <= nzhi_in_6; k++)
for (j = nylo_in_6; j <= nyhi_in_6; j++)
for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
v1_pa_1[k][j][i] = work2_6[n++];
v1_pa_2[k][j][i] = work2_6[n++];
}
//Compute third virial term v2
n = 0;
for (i = 0; i < nfft_6; i++) {
work2_6[n] = work1_6[n]*vg_6[i][2];
work2_6[n+1] = work1_6[n+1]*vg_6[i][2];
n += 2;
}
fft2_6->compute(work2_6,work2_6,-1);
n = 0;
for (k = nzlo_in_6; k <= nzhi_in_6; k++)
for (j = nylo_in_6; j <= nyhi_in_6; j++)
for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
v2_pa_1[k][j][i] = work2_6[n++];
v2_pa_2[k][j][i] = work2_6[n++];
}
//Compute fourth virial term v3
n = 0;
for (i = 0; i < nfft_6; i++) {
work2_6[n] = work1_6[n]*vg2_6[i][0];
work2_6[n+1] = work1_6[n+1]*vg2_6[i][0];
n += 2;
}
fft2_6->compute(work2_6,work2_6,-1);
n = 0;
for (k = nzlo_in_6; k <= nzhi_in_6; k++)
for (j = nylo_in_6; j <= nyhi_in_6; j++)
for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
v3_pa_1[k][j][i] = work2_6[n++];
v3_pa_2[k][j][i] = work2_6[n++];
}
//Compute fifth virial term v4
n = 0;
for (i = 0; i < nfft_6; i++) {
work2_6[n] = work1_6[n]*vg2_6[i][1];
work2_6[n+1] = work1_6[n+1]*vg2_6[i][1];
n += 2;
}
fft2_6->compute(work2_6,work2_6,-1);
n = 0;
for (k = nzlo_in_6; k <= nzhi_in_6; k++)
for (j = nylo_in_6; j <= nyhi_in_6; j++)
for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
v4_pa_1[k][j][i] = work2_6[n++];
v4_pa_2[k][j][i] = work2_6[n++];
}
//Compute last virial term v5
n = 0;
for (i = 0; i < nfft_6; i++) {
work2_6[n] = work1_6[n]*vg2_6[i][2];
work2_6[n+1] = work1_6[n+1]*vg2_6[i][2];
n += 2;
}
fft2_6->compute(work2_6,work2_6,-1);
n = 0;
for (k = nzlo_in_6; k <= nzhi_in_6; k++)
for (j = nylo_in_6; j <= nyhi_in_6; j++)
for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
v5_pa_1[k][j][i] = work2_6[n++];
v5_pa_2[k][j][i] = work2_6[n++];
}
}
/* ----------------------------------------------------------------------
Fourier Transform for per atom virial calculations
------------------------------------------------------------------------- */
void PPPMDisp::poisson_none_peratom(int n1, int n2,
FFT_SCALAR*** v0_pa_1, FFT_SCALAR*** v1_pa_1, FFT_SCALAR*** v2_pa_1,
FFT_SCALAR*** v3_pa_1, FFT_SCALAR*** v4_pa_1, FFT_SCALAR*** v5_pa_1,
FFT_SCALAR*** v0_pa_2, FFT_SCALAR*** v1_pa_2, FFT_SCALAR*** v2_pa_2,
FFT_SCALAR*** v3_pa_2, FFT_SCALAR*** v4_pa_2, FFT_SCALAR*** v5_pa_2)
{
//Compute first virial term v0
int n, i, j, k;
n = 0;
for (i = 0; i < nfft_6; i++) {
work2_6[n] = work1_6[n]*vg_6[i][0];
work2_6[n+1] = work1_6[n+1]*vg_6[i][0];
n += 2;
}
fft2_6->compute(work2_6,work2_6,-1);
n = 0;
for (k = nzlo_in_6; k <= nzhi_in_6; k++)
for (j = nylo_in_6; j <= nyhi_in_6; j++)
for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
v0_pa_1[k][j][i] = B[n1]*work2_6[n++];
v0_pa_2[k][j][i] = B[n2]*work2_6[n++];
}
//Compute second virial term v1
n = 0;
for (i = 0; i < nfft_6; i++) {
work2_6[n] = work1_6[n]*vg_6[i][1];
work2_6[n+1] = work1_6[n+1]*vg_6[i][1];
n += 2;
}
fft2_6->compute(work2_6,work2_6,-1);
n = 0;
for (k = nzlo_in_6; k <= nzhi_in_6; k++)
for (j = nylo_in_6; j <= nyhi_in_6; j++)
for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
v1_pa_1[k][j][i] = B[n1]*work2_6[n++];
v1_pa_2[k][j][i] = B[n2]*work2_6[n++];
}
//Compute third virial term v2
n = 0;
for (i = 0; i < nfft_6; i++) {
work2_6[n] = work1_6[n]*vg_6[i][2];
work2_6[n+1] = work1_6[n+1]*vg_6[i][2];
n += 2;
}
fft2_6->compute(work2_6,work2_6,-1);
n = 0;
for (k = nzlo_in_6; k <= nzhi_in_6; k++)
for (j = nylo_in_6; j <= nyhi_in_6; j++)
for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
v2_pa_1[k][j][i] = B[n1]*work2_6[n++];
v2_pa_2[k][j][i] = B[n2]*work2_6[n++];
}
//Compute fourth virial term v3
n = 0;
for (i = 0; i < nfft_6; i++) {
work2_6[n] = work1_6[n]*vg2_6[i][0];
work2_6[n+1] = work1_6[n+1]*vg2_6[i][0];
n += 2;
}
fft2_6->compute(work2_6,work2_6,-1);
n = 0;
for (k = nzlo_in_6; k <= nzhi_in_6; k++)
for (j = nylo_in_6; j <= nyhi_in_6; j++)
for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
v3_pa_1[k][j][i] = B[n1]*work2_6[n++];
v3_pa_2[k][j][i] = B[n2]*work2_6[n++];
}
//Compute fifth virial term v4
n = 0;
for (i = 0; i < nfft_6; i++) {
work2_6[n] = work1_6[n]*vg2_6[i][1];
work2_6[n+1] = work1_6[n+1]*vg2_6[i][1];
n += 2;
}
fft2_6->compute(work2_6,work2_6,-1);
n = 0;
for (k = nzlo_in_6; k <= nzhi_in_6; k++)
for (j = nylo_in_6; j <= nyhi_in_6; j++)
for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
v4_pa_1[k][j][i] = B[n1]*work2_6[n++];
v4_pa_2[k][j][i] = B[n2]*work2_6[n++];
}
//Compute last virial term v5
n = 0;
for (i = 0; i < nfft_6; i++) {
work2_6[n] = work1_6[n]*vg2_6[i][2];
work2_6[n+1] = work1_6[n+1]*vg2_6[i][2];
n += 2;
}
fft2_6->compute(work2_6,work2_6,-1);
n = 0;
for (k = nzlo_in_6; k <= nzhi_in_6; k++)
for (j = nylo_in_6; j <= nyhi_in_6; j++)
for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
v5_pa_1[k][j][i] = B[n1]*work2_6[n++];
v5_pa_2[k][j][i] = B[n2]*work2_6[n++];
}
}
/* ----------------------------------------------------------------------
interpolate from grid to get electric field & force on my particles
for ik scheme
------------------------------------------------------------------------- */
void PPPMDisp::fieldforce_c_ik()
{
int i,l,m,n,nx,ny,nz,mx,my,mz;
FFT_SCALAR dx,dy,dz,x0,y0,z0;
FFT_SCALAR ekx,eky,ekz;
// loop over my charges, interpolate electric field from nearby grid points
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
// (dx,dy,dz) = distance to "lower left" grid pt
// (mx,my,mz) = global coords of moving stencil pt
// ek = 3 components of E-field on particle
double *q = atom->q;
double **x = atom->x;
double **f = atom->f;
int nlocal = atom->nlocal;
for (i = 0; i < nlocal; i++) {
nx = part2grid[i][0];
ny = part2grid[i][1];
nz = part2grid[i][2];
dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv;
dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv;
dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv;
compute_rho1d(dx,dy,dz, order, rho_coeff, rho1d);
ekx = eky = ekz = ZEROF;
for (n = nlower; n <= nupper; n++) {
mz = n+nz;
z0 = rho1d[2][n];
for (m = nlower; m <= nupper; m++) {
my = m+ny;
y0 = z0*rho1d[1][m];
for (l = nlower; l <= nupper; l++) {
mx = l+nx;
x0 = y0*rho1d[0][l];
ekx -= x0*vdx_brick[mz][my][mx];
eky -= x0*vdy_brick[mz][my][mx];
ekz -= x0*vdz_brick[mz][my][mx];
}
}
}
// convert E-field to force
const double qfactor = force->qqrd2e * scale * q[i];
f[i][0] += qfactor*ekx;
f[i][1] += qfactor*eky;
if (slabflag != 2) f[i][2] += qfactor*ekz;
}
}
/* ----------------------------------------------------------------------
interpolate from grid to get electric field & force on my particles
for ad scheme
------------------------------------------------------------------------- */
void PPPMDisp::fieldforce_c_ad()
{
int i,l,m,n,nx,ny,nz,mx,my,mz;
FFT_SCALAR dx,dy,dz;
FFT_SCALAR ekx,eky,ekz;
double s1,s2,s3;
double sf = 0.0;
double *prd;
if (triclinic == 0) prd = domain->prd;
else prd = domain->prd_lamda;
double xprd = prd[0];
double yprd = prd[1];
double zprd = prd[2];
double zprd_slab = zprd*slab_volfactor;
double hx_inv = nx_pppm/xprd;
double hy_inv = ny_pppm/yprd;
double hz_inv = nz_pppm/zprd_slab;
// loop over my charges, interpolate electric field from nearby grid points
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
// (dx,dy,dz) = distance to "lower left" grid pt
// (mx,my,mz) = global coords of moving stencil pt
// ek = 3 components of E-field on particle
double *q = atom->q;
double **x = atom->x;
double **f = atom->f;
int nlocal = atom->nlocal;
for (i = 0; i < nlocal; i++) {
nx = part2grid[i][0];
ny = part2grid[i][1];
nz = part2grid[i][2];
dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv;
dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv;
dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv;
compute_rho1d(dx,dy,dz, order, rho_coeff, rho1d);
compute_drho1d(dx,dy,dz, order, drho_coeff, drho1d);
ekx = eky = ekz = ZEROF;
for (n = nlower; n <= nupper; n++) {
mz = n+nz;
for (m = nlower; m <= nupper; m++) {
my = m+ny;
for (l = nlower; l <= nupper; l++) {
mx = l+nx;
ekx += drho1d[0][l]*rho1d[1][m]*rho1d[2][n]*u_brick[mz][my][mx];
eky += rho1d[0][l]*drho1d[1][m]*rho1d[2][n]*u_brick[mz][my][mx];
ekz += rho1d[0][l]*rho1d[1][m]*drho1d[2][n]*u_brick[mz][my][mx];
}
}
}
ekx *= hx_inv;
eky *= hy_inv;
ekz *= hz_inv;
// convert E-field to force and substract self forces
const double qfactor = force->qqrd2e * scale;
s1 = x[i][0]*hx_inv;
s2 = x[i][1]*hy_inv;
s3 = x[i][2]*hz_inv;
sf = sf_coeff[0]*sin(2*MY_PI*s1);
sf += sf_coeff[1]*sin(4*MY_PI*s1);
sf *= 2*q[i]*q[i];
f[i][0] += qfactor*(ekx*q[i] - sf);
sf = sf_coeff[2]*sin(2*MY_PI*s2);
sf += sf_coeff[3]*sin(4*MY_PI*s2);
sf *= 2*q[i]*q[i];
f[i][1] += qfactor*(eky*q[i] - sf);
sf = sf_coeff[4]*sin(2*MY_PI*s3);
sf += sf_coeff[5]*sin(4*MY_PI*s3);
sf *= 2*q[i]*q[i];
if (slabflag != 2) f[i][2] += qfactor*(ekz*q[i] - sf);
}
}
/* ----------------------------------------------------------------------
interpolate from grid to get electric field & force on my particles
------------------------------------------------------------------------- */
void PPPMDisp::fieldforce_c_peratom()
{
int i,l,m,n,nx,ny,nz,mx,my,mz;
FFT_SCALAR dx,dy,dz,x0,y0,z0;
FFT_SCALAR u_pa,v0,v1,v2,v3,v4,v5;
// loop over my charges, interpolate electric field from nearby grid points
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
// (dx,dy,dz) = distance to "lower left" grid pt
// (mx,my,mz) = global coords of moving stencil pt
// ek = 3 components of E-field on particle
double *q = atom->q;
double **x = atom->x;
int nlocal = atom->nlocal;
for (i = 0; i < nlocal; i++) {
nx = part2grid[i][0];
ny = part2grid[i][1];
nz = part2grid[i][2];
dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv;
dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv;
dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv;
compute_rho1d(dx,dy,dz, order, rho_coeff, rho1d);
u_pa = v0 = v1 = v2 = v3 = v4 = v5 = ZEROF;
for (n = nlower; n <= nupper; n++) {
mz = n+nz;
z0 = rho1d[2][n];
for (m = nlower; m <= nupper; m++) {
my = m+ny;
y0 = z0*rho1d[1][m];
for (l = nlower; l <= nupper; l++) {
mx = l+nx;
x0 = y0*rho1d[0][l];
if (eflag_atom) u_pa += x0*u_brick[mz][my][mx];
if (vflag_atom) {
v0 += x0*v0_brick[mz][my][mx];
v1 += x0*v1_brick[mz][my][mx];
v2 += x0*v2_brick[mz][my][mx];
v3 += x0*v3_brick[mz][my][mx];
v4 += x0*v4_brick[mz][my][mx];
v5 += x0*v5_brick[mz][my][mx];
}
}
}
}
// convert E-field to force
const double qfactor = 0.5*force->qqrd2e * scale * q[i];
if (eflag_atom) eatom[i] += u_pa*qfactor;
if (vflag_atom) {
vatom[i][0] += v0*qfactor;
vatom[i][1] += v1*qfactor;
vatom[i][2] += v2*qfactor;
vatom[i][3] += v3*qfactor;
vatom[i][4] += v4*qfactor;
vatom[i][5] += v5*qfactor;
}
}
}
/* ----------------------------------------------------------------------
interpolate from grid to get dispersion field & force on my particles
for geometric mixing rule
------------------------------------------------------------------------- */
void PPPMDisp::fieldforce_g_ik()
{
int i,l,m,n,nx,ny,nz,mx,my,mz;
FFT_SCALAR dx,dy,dz,x0,y0,z0;
FFT_SCALAR ekx,eky,ekz;
// loop over my charges, interpolate electric field from nearby grid points
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
// (dx,dy,dz) = distance to "lower left" grid pt
// (mx,my,mz) = global coords of moving stencil pt
// ek = 3 components of dispersion field on particle
double **x = atom->x;
double **f = atom->f;
int type;
double lj;
int nlocal = atom->nlocal;
for (i = 0; i < nlocal; i++) {
nx = part2grid_6[i][0];
ny = part2grid_6[i][1];
nz = part2grid_6[i][2];
dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6;
dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6;
dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6;
compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6);
ekx = eky = ekz = ZEROF;
for (n = nlower_6; n <= nupper_6; n++) {
mz = n+nz;
z0 = rho1d_6[2][n];
for (m = nlower_6; m <= nupper_6; m++) {
my = m+ny;
y0 = z0*rho1d_6[1][m];
for (l = nlower_6; l <= nupper_6; l++) {
mx = l+nx;
x0 = y0*rho1d_6[0][l];
ekx -= x0*vdx_brick_g[mz][my][mx];
eky -= x0*vdy_brick_g[mz][my][mx];
ekz -= x0*vdz_brick_g[mz][my][mx];
}
}
}
// convert E-field to force
type = atom->type[i];
lj = B[type];
f[i][0] += lj*ekx;
f[i][1] += lj*eky;
if (slabflag != 2) f[i][2] += lj*ekz;
}
}
/* ----------------------------------------------------------------------
interpolate from grid to get dispersion field & force on my particles
for geometric mixing rule for ad scheme
------------------------------------------------------------------------- */
void PPPMDisp::fieldforce_g_ad()
{
int i,l,m,n,nx,ny,nz,mx,my,mz;
FFT_SCALAR dx,dy,dz;
FFT_SCALAR ekx,eky,ekz;
double s1,s2,s3;
double sf = 0.0;
double *prd;
if (triclinic == 0) prd = domain->prd;
else prd = domain->prd_lamda;
double xprd = prd[0];
double yprd = prd[1];
double zprd = prd[2];
double zprd_slab = zprd*slab_volfactor;
double hx_inv = nx_pppm_6/xprd;
double hy_inv = ny_pppm_6/yprd;
double hz_inv = nz_pppm_6/zprd_slab;
// loop over my charges, interpolate electric field from nearby grid points
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
// (dx,dy,dz) = distance to "lower left" grid pt
// (mx,my,mz) = global coords of moving stencil pt
// ek = 3 components of dispersion field on particle
double **x = atom->x;
double **f = atom->f;
int type;
double lj;
int nlocal = atom->nlocal;
for (i = 0; i < nlocal; i++) {
nx = part2grid_6[i][0];
ny = part2grid_6[i][1];
nz = part2grid_6[i][2];
dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6;
dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6;
dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6;
compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6);
compute_drho1d(dx,dy,dz, order_6, drho_coeff_6, drho1d_6);
ekx = eky = ekz = ZEROF;
for (n = nlower_6; n <= nupper_6; n++) {
mz = n+nz;
for (m = nlower_6; m <= nupper_6; m++) {
my = m+ny;
for (l = nlower_6; l <= nupper_6; l++) {
mx = l+nx;
ekx += drho1d_6[0][l]*rho1d_6[1][m]*rho1d_6[2][n]*u_brick_g[mz][my][mx];
eky += rho1d_6[0][l]*drho1d_6[1][m]*rho1d_6[2][n]*u_brick_g[mz][my][mx];
ekz += rho1d_6[0][l]*rho1d_6[1][m]*drho1d_6[2][n]*u_brick_g[mz][my][mx];
}
}
}
ekx *= hx_inv;
eky *= hy_inv;
ekz *= hz_inv;
// convert E-field to force
type = atom->type[i];
lj = B[type];
s1 = x[i][0]*hx_inv;
s2 = x[i][1]*hy_inv;
s3 = x[i][2]*hz_inv;
sf = sf_coeff_6[0]*sin(2*MY_PI*s1);
sf += sf_coeff_6[1]*sin(4*MY_PI*s1);
sf *= 2*lj*lj;
f[i][0] += ekx*lj - sf;
sf = sf_coeff_6[2]*sin(2*MY_PI*s2);
sf += sf_coeff_6[3]*sin(4*MY_PI*s2);
sf *= 2*lj*lj;
f[i][1] += eky*lj - sf;
sf = sf_coeff_6[4]*sin(2*MY_PI*s3);
sf += sf_coeff_6[5]*sin(4*MY_PI*s3);
sf *= 2*lj*lj;
if (slabflag != 2) f[i][2] += ekz*lj - sf;
}
}
/* ----------------------------------------------------------------------
interpolate from grid to get dispersion field & force on my particles
for geometric mixing rule for per atom quantities
------------------------------------------------------------------------- */
void PPPMDisp::fieldforce_g_peratom()
{
int i,l,m,n,nx,ny,nz,mx,my,mz;
FFT_SCALAR dx,dy,dz,x0,y0,z0;
FFT_SCALAR u_pa,v0,v1,v2,v3,v4,v5;
// loop over my charges, interpolate electric field from nearby grid points
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
// (dx,dy,dz) = distance to "lower left" grid pt
// (mx,my,mz) = global coords of moving stencil pt
// ek = 3 components of dispersion field on particle
double **x = atom->x;
int type;
double lj;
int nlocal = atom->nlocal;
for (i = 0; i < nlocal; i++) {
nx = part2grid_6[i][0];
ny = part2grid_6[i][1];
nz = part2grid_6[i][2];
dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6;
dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6;
dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6;
compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6);
u_pa = v0 = v1 = v2 = v3 = v4 = v5 = ZEROF;
for (n = nlower_6; n <= nupper_6; n++) {
mz = n+nz;
z0 = rho1d_6[2][n];
for (m = nlower_6; m <= nupper_6; m++) {
my = m+ny;
y0 = z0*rho1d_6[1][m];
for (l = nlower_6; l <= nupper_6; l++) {
mx = l+nx;
x0 = y0*rho1d_6[0][l];
if (eflag_atom) u_pa += x0*u_brick_g[mz][my][mx];
if (vflag_atom) {
v0 += x0*v0_brick_g[mz][my][mx];
v1 += x0*v1_brick_g[mz][my][mx];
v2 += x0*v2_brick_g[mz][my][mx];
v3 += x0*v3_brick_g[mz][my][mx];
v4 += x0*v4_brick_g[mz][my][mx];
v5 += x0*v5_brick_g[mz][my][mx];
}
}
}
}
// convert E-field to force
type = atom->type[i];
lj = B[type]*0.5;
if (eflag_atom) eatom[i] += u_pa*lj;
if (vflag_atom) {
vatom[i][0] += v0*lj;
vatom[i][1] += v1*lj;
vatom[i][2] += v2*lj;
vatom[i][3] += v3*lj;
vatom[i][4] += v4*lj;
vatom[i][5] += v5*lj;
}
}
}
/* ----------------------------------------------------------------------
interpolate from grid to get dispersion field & force on my particles
for arithmetic mixing rule and ik scheme
------------------------------------------------------------------------- */
void PPPMDisp::fieldforce_a_ik()
{
int i,l,m,n,nx,ny,nz,mx,my,mz;
FFT_SCALAR dx,dy,dz,x0,y0,z0;
FFT_SCALAR ekx0, eky0, ekz0, ekx1, eky1, ekz1, ekx2, eky2, ekz2;
FFT_SCALAR ekx3, eky3, ekz3, ekx4, eky4, ekz4, ekx5, eky5, ekz5;
FFT_SCALAR ekx6, eky6, ekz6;
// loop over my charges, interpolate electric field from nearby grid points
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
// (dx,dy,dz) = distance to "lower left" grid pt
// (mx,my,mz) = global coords of moving stencil pt
// ek = 3 components of dispersion field on particle
double **x = atom->x;
double **f = atom->f;
int type;
double lj0, lj1, lj2, lj3, lj4, lj5, lj6;
int nlocal = atom->nlocal;
for (i = 0; i < nlocal; i++) {
nx = part2grid_6[i][0];
ny = part2grid_6[i][1];
nz = part2grid_6[i][2];
dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6;
dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6;
dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6;
compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6);
ekx0 = eky0 = ekz0 = ZEROF;
ekx1 = eky1 = ekz1 = ZEROF;
ekx2 = eky2 = ekz2 = ZEROF;
ekx3 = eky3 = ekz3 = ZEROF;
ekx4 = eky4 = ekz4 = ZEROF;
ekx5 = eky5 = ekz5 = ZEROF;
ekx6 = eky6 = ekz6 = ZEROF;
for (n = nlower_6; n <= nupper_6; n++) {
mz = n+nz;
z0 = rho1d_6[2][n];
for (m = nlower_6; m <= nupper_6; m++) {
my = m+ny;
y0 = z0*rho1d_6[1][m];
for (l = nlower_6; l <= nupper_6; l++) {
mx = l+nx;
x0 = y0*rho1d_6[0][l];
ekx0 -= x0*vdx_brick_a0[mz][my][mx];
eky0 -= x0*vdy_brick_a0[mz][my][mx];
ekz0 -= x0*vdz_brick_a0[mz][my][mx];
ekx1 -= x0*vdx_brick_a1[mz][my][mx];
eky1 -= x0*vdy_brick_a1[mz][my][mx];
ekz1 -= x0*vdz_brick_a1[mz][my][mx];
ekx2 -= x0*vdx_brick_a2[mz][my][mx];
eky2 -= x0*vdy_brick_a2[mz][my][mx];
ekz2 -= x0*vdz_brick_a2[mz][my][mx];
ekx3 -= x0*vdx_brick_a3[mz][my][mx];
eky3 -= x0*vdy_brick_a3[mz][my][mx];
ekz3 -= x0*vdz_brick_a3[mz][my][mx];
ekx4 -= x0*vdx_brick_a4[mz][my][mx];
eky4 -= x0*vdy_brick_a4[mz][my][mx];
ekz4 -= x0*vdz_brick_a4[mz][my][mx];
ekx5 -= x0*vdx_brick_a5[mz][my][mx];
eky5 -= x0*vdy_brick_a5[mz][my][mx];
ekz5 -= x0*vdz_brick_a5[mz][my][mx];
ekx6 -= x0*vdx_brick_a6[mz][my][mx];
eky6 -= x0*vdy_brick_a6[mz][my][mx];
ekz6 -= x0*vdz_brick_a6[mz][my][mx];
}
}
}
// convert D-field to force
type = atom->type[i];
lj0 = B[7*type+6];
lj1 = B[7*type+5];
lj2 = B[7*type+4];
lj3 = B[7*type+3];
lj4 = B[7*type+2];
lj5 = B[7*type+1];
lj6 = B[7*type];
f[i][0] += lj0*ekx0 + lj1*ekx1 + lj2*ekx2 + lj3*ekx3 + lj4*ekx4 + lj5*ekx5 + lj6*ekx6;
f[i][1] += lj0*eky0 + lj1*eky1 + lj2*eky2 + lj3*eky3 + lj4*eky4 + lj5*eky5 + lj6*eky6;
if (slabflag != 2) f[i][2] += lj0*ekz0 + lj1*ekz1 + lj2*ekz2 + lj3*ekz3 + lj4*ekz4 + lj5*ekz5 + lj6*ekz6;
}
}
/* ----------------------------------------------------------------------
interpolate from grid to get dispersion field & force on my particles
for arithmetic mixing rule for the ad scheme
------------------------------------------------------------------------- */
void PPPMDisp::fieldforce_a_ad()
{
int i,l,m,n,nx,ny,nz,mx,my,mz;
FFT_SCALAR dx,dy,dz,x0,y0,z0;
FFT_SCALAR ekx0, eky0, ekz0, ekx1, eky1, ekz1, ekx2, eky2, ekz2;
FFT_SCALAR ekx3, eky3, ekz3, ekx4, eky4, ekz4, ekx5, eky5, ekz5;
FFT_SCALAR ekx6, eky6, ekz6;
double s1,s2,s3;
double sf = 0.0;
double *prd;
if (triclinic == 0) prd = domain->prd;
else prd = domain->prd_lamda;
double xprd = prd[0];
double yprd = prd[1];
double zprd = prd[2];
double zprd_slab = zprd*slab_volfactor;
double hx_inv = nx_pppm_6/xprd;
double hy_inv = ny_pppm_6/yprd;
double hz_inv = nz_pppm_6/zprd_slab;
// loop over my charges, interpolate electric field from nearby grid points
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
// (dx,dy,dz) = distance to "lower left" grid pt
// (mx,my,mz) = global coords of moving stencil pt
// ek = 3 components of dispersion field on particle
double **x = atom->x;
double **f = atom->f;
int type;
double lj0, lj1, lj2, lj3, lj4, lj5, lj6;
int nlocal = atom->nlocal;
for (i = 0; i < nlocal; i++) {
nx = part2grid_6[i][0];
ny = part2grid_6[i][1];
nz = part2grid_6[i][2];
dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6;
dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6;
dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6;
compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6);
compute_drho1d(dx,dy,dz, order_6, drho_coeff_6, drho1d_6);
ekx0 = eky0 = ekz0 = ZEROF;
ekx1 = eky1 = ekz1 = ZEROF;
ekx2 = eky2 = ekz2 = ZEROF;
ekx3 = eky3 = ekz3 = ZEROF;
ekx4 = eky4 = ekz4 = ZEROF;
ekx5 = eky5 = ekz5 = ZEROF;
ekx6 = eky6 = ekz6 = ZEROF;
for (n = nlower_6; n <= nupper_6; n++) {
mz = n+nz;
for (m = nlower_6; m <= nupper_6; m++) {
my = m+ny;
for (l = nlower_6; l <= nupper_6; l++) {
mx = l+nx;
x0 = drho1d_6[0][l]*rho1d_6[1][m]*rho1d_6[2][n];
y0 = rho1d_6[0][l]*drho1d_6[1][m]*rho1d_6[2][n];
z0 = rho1d_6[0][l]*rho1d_6[1][m]*drho1d_6[2][n];
ekx0 += x0*u_brick_a0[mz][my][mx];
eky0 += y0*u_brick_a0[mz][my][mx];
ekz0 += z0*u_brick_a0[mz][my][mx];
ekx1 += x0*u_brick_a1[mz][my][mx];
eky1 += y0*u_brick_a1[mz][my][mx];
ekz1 += z0*u_brick_a1[mz][my][mx];
ekx2 += x0*u_brick_a2[mz][my][mx];
eky2 += y0*u_brick_a2[mz][my][mx];
ekz2 += z0*u_brick_a2[mz][my][mx];
ekx3 += x0*u_brick_a3[mz][my][mx];
eky3 += y0*u_brick_a3[mz][my][mx];
ekz3 += z0*u_brick_a3[mz][my][mx];
ekx4 += x0*u_brick_a4[mz][my][mx];
eky4 += y0*u_brick_a4[mz][my][mx];
ekz4 += z0*u_brick_a4[mz][my][mx];
ekx5 += x0*u_brick_a5[mz][my][mx];
eky5 += y0*u_brick_a5[mz][my][mx];
ekz5 += z0*u_brick_a5[mz][my][mx];
ekx6 += x0*u_brick_a6[mz][my][mx];
eky6 += y0*u_brick_a6[mz][my][mx];
ekz6 += z0*u_brick_a6[mz][my][mx];
}
}
}
ekx0 *= hx_inv;
eky0 *= hy_inv;
ekz0 *= hz_inv;
ekx1 *= hx_inv;
eky1 *= hy_inv;
ekz1 *= hz_inv;
ekx2 *= hx_inv;
eky2 *= hy_inv;
ekz2 *= hz_inv;
ekx3 *= hx_inv;
eky3 *= hy_inv;
ekz3 *= hz_inv;
ekx4 *= hx_inv;
eky4 *= hy_inv;
ekz4 *= hz_inv;
ekx5 *= hx_inv;
eky5 *= hy_inv;
ekz5 *= hz_inv;
ekx6 *= hx_inv;
eky6 *= hy_inv;
ekz6 *= hz_inv;
// convert D-field to force
type = atom->type[i];
lj0 = B[7*type+6];
lj1 = B[7*type+5];
lj2 = B[7*type+4];
lj3 = B[7*type+3];
lj4 = B[7*type+2];
lj5 = B[7*type+1];
lj6 = B[7*type];
s1 = x[i][0]*hx_inv;
s2 = x[i][1]*hy_inv;
s3 = x[i][2]*hz_inv;
sf = sf_coeff_6[0]*sin(2*MY_PI*s1);
sf += sf_coeff_6[1]*sin(4*MY_PI*s1);
sf *= 4*lj0*lj6 + 4*lj1*lj5 + 4*lj2*lj4 + 2*lj3*lj3;
f[i][0] += lj0*ekx0 + lj1*ekx1 + lj2*ekx2 + lj3*ekx3 + lj4*ekx4 + lj5*ekx5 + lj6*ekx6 - sf;
sf = sf_coeff_6[2]*sin(2*MY_PI*s2);
sf += sf_coeff_6[3]*sin(4*MY_PI*s2);
sf *= 4*lj0*lj6 + 4*lj1*lj5 + 4*lj2*lj4 + 2*lj3*lj3;
f[i][1] += lj0*eky0 + lj1*eky1 + lj2*eky2 + lj3*eky3 + lj4*eky4 + lj5*eky5 + lj6*eky6 - sf;
sf = sf_coeff_6[4]*sin(2*MY_PI*s3);
sf += sf_coeff_6[5]*sin(4*MY_PI*s3);
sf *= 4*lj0*lj6 + 4*lj1*lj5 + 4*lj2*lj4 + 2*lj3*lj3;
if (slabflag != 2) f[i][2] += lj0*ekz0 + lj1*ekz1 + lj2*ekz2 + lj3*ekz3 + lj4*ekz4 + lj5*ekz5 + lj6*ekz6 - sf;
}
}
/* ----------------------------------------------------------------------
interpolate from grid to get dispersion field & force on my particles
for arithmetic mixing rule for per atom quantities
------------------------------------------------------------------------- */
void PPPMDisp::fieldforce_a_peratom()
{
int i,l,m,n,nx,ny,nz,mx,my,mz;
FFT_SCALAR dx,dy,dz,x0,y0,z0;
FFT_SCALAR u_pa0,v00,v10,v20,v30,v40,v50;
FFT_SCALAR u_pa1,v01,v11,v21,v31,v41,v51;
FFT_SCALAR u_pa2,v02,v12,v22,v32,v42,v52;
FFT_SCALAR u_pa3,v03,v13,v23,v33,v43,v53;
FFT_SCALAR u_pa4,v04,v14,v24,v34,v44,v54;
FFT_SCALAR u_pa5,v05,v15,v25,v35,v45,v55;
FFT_SCALAR u_pa6,v06,v16,v26,v36,v46,v56;
// loop over my charges, interpolate electric field from nearby grid points
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
// (dx,dy,dz) = distance to "lower left" grid pt
// (mx,my,mz) = global coords of moving stencil pt
// ek = 3 components of dispersion field on particle
double **x = atom->x;
int type;
double lj0, lj1, lj2, lj3, lj4, lj5, lj6;
int nlocal = atom->nlocal;
for (i = 0; i < nlocal; i++) {
nx = part2grid_6[i][0];
ny = part2grid_6[i][1];
nz = part2grid_6[i][2];
dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6;
dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6;
dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6;
compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6);
u_pa0 = v00 = v10 = v20 = v30 = v40 = v50 = ZEROF;
u_pa1 = v01 = v11 = v21 = v31 = v41 = v51 = ZEROF;
u_pa2 = v02 = v12 = v22 = v32 = v42 = v52 = ZEROF;
u_pa3 = v03 = v13 = v23 = v33 = v43 = v53 = ZEROF;
u_pa4 = v04 = v14 = v24 = v34 = v44 = v54 = ZEROF;
u_pa5 = v05 = v15 = v25 = v35 = v45 = v55 = ZEROF;
u_pa6 = v06 = v16 = v26 = v36 = v46 = v56 = ZEROF;
for (n = nlower_6; n <= nupper_6; n++) {
mz = n+nz;
z0 = rho1d_6[2][n];
for (m = nlower_6; m <= nupper_6; m++) {
my = m+ny;
y0 = z0*rho1d_6[1][m];
for (l = nlower_6; l <= nupper_6; l++) {
mx = l+nx;
x0 = y0*rho1d_6[0][l];
if (eflag_atom) {
u_pa0 += x0*u_brick_a0[mz][my][mx];
u_pa1 += x0*u_brick_a1[mz][my][mx];
u_pa2 += x0*u_brick_a2[mz][my][mx];
u_pa3 += x0*u_brick_a3[mz][my][mx];
u_pa4 += x0*u_brick_a4[mz][my][mx];
u_pa5 += x0*u_brick_a5[mz][my][mx];
u_pa6 += x0*u_brick_a6[mz][my][mx];
}
if (vflag_atom) {
v00 += x0*v0_brick_a0[mz][my][mx];
v10 += x0*v1_brick_a0[mz][my][mx];
v20 += x0*v2_brick_a0[mz][my][mx];
v30 += x0*v3_brick_a0[mz][my][mx];
v40 += x0*v4_brick_a0[mz][my][mx];
v50 += x0*v5_brick_a0[mz][my][mx];
v01 += x0*v0_brick_a1[mz][my][mx];
v11 += x0*v1_brick_a1[mz][my][mx];
v21 += x0*v2_brick_a1[mz][my][mx];
v31 += x0*v3_brick_a1[mz][my][mx];
v41 += x0*v4_brick_a1[mz][my][mx];
v51 += x0*v5_brick_a1[mz][my][mx];
v02 += x0*v0_brick_a2[mz][my][mx];
v12 += x0*v1_brick_a2[mz][my][mx];
v22 += x0*v2_brick_a2[mz][my][mx];
v32 += x0*v3_brick_a2[mz][my][mx];
v42 += x0*v4_brick_a2[mz][my][mx];
v52 += x0*v5_brick_a2[mz][my][mx];
v03 += x0*v0_brick_a3[mz][my][mx];
v13 += x0*v1_brick_a3[mz][my][mx];
v23 += x0*v2_brick_a3[mz][my][mx];
v33 += x0*v3_brick_a3[mz][my][mx];
v43 += x0*v4_brick_a3[mz][my][mx];
v53 += x0*v5_brick_a3[mz][my][mx];
v04 += x0*v0_brick_a4[mz][my][mx];
v14 += x0*v1_brick_a4[mz][my][mx];
v24 += x0*v2_brick_a4[mz][my][mx];
v34 += x0*v3_brick_a4[mz][my][mx];
v44 += x0*v4_brick_a4[mz][my][mx];
v54 += x0*v5_brick_a4[mz][my][mx];
v05 += x0*v0_brick_a5[mz][my][mx];
v15 += x0*v1_brick_a5[mz][my][mx];
v25 += x0*v2_brick_a5[mz][my][mx];
v35 += x0*v3_brick_a5[mz][my][mx];
v45 += x0*v4_brick_a5[mz][my][mx];
v55 += x0*v5_brick_a5[mz][my][mx];
v06 += x0*v0_brick_a6[mz][my][mx];
v16 += x0*v1_brick_a6[mz][my][mx];
v26 += x0*v2_brick_a6[mz][my][mx];
v36 += x0*v3_brick_a6[mz][my][mx];
v46 += x0*v4_brick_a6[mz][my][mx];
v56 += x0*v5_brick_a6[mz][my][mx];
}
}
}
}
// convert D-field to force
type = atom->type[i];
lj0 = B[7*type+6]*0.5;
lj1 = B[7*type+5]*0.5;
lj2 = B[7*type+4]*0.5;
lj3 = B[7*type+3]*0.5;
lj4 = B[7*type+2]*0.5;
lj5 = B[7*type+1]*0.5;
lj6 = B[7*type]*0.5;
if (eflag_atom)
eatom[i] += u_pa0*lj0 + u_pa1*lj1 + u_pa2*lj2 +
u_pa3*lj3 + u_pa4*lj4 + u_pa5*lj5 + u_pa6*lj6;
if (vflag_atom) {
vatom[i][0] += v00*lj0 + v01*lj1 + v02*lj2 + v03*lj3 +
v04*lj4 + v05*lj5 + v06*lj6;
vatom[i][1] += v10*lj0 + v11*lj1 + v12*lj2 + v13*lj3 +
v14*lj4 + v15*lj5 + v16*lj6;
vatom[i][2] += v20*lj0 + v21*lj1 + v22*lj2 + v23*lj3 +
v24*lj4 + v25*lj5 + v26*lj6;
vatom[i][3] += v30*lj0 + v31*lj1 + v32*lj2 + v33*lj3 +
v34*lj4 + v35*lj5 + v36*lj6;
vatom[i][4] += v40*lj0 + v41*lj1 + v42*lj2 + v43*lj3 +
v44*lj4 + v45*lj5 + v46*lj6;
vatom[i][5] += v50*lj0 + v51*lj1 + v52*lj2 + v53*lj3 +
v54*lj4 + v55*lj5 + v56*lj6;
}
}
}
/* ----------------------------------------------------------------------
interpolate from grid to get dispersion field & force on my particles
for arithmetic mixing rule and ik scheme
------------------------------------------------------------------------- */
void PPPMDisp::fieldforce_none_ik()
{
int i,k,l,m,n,nx,ny,nz,mx,my,mz;
FFT_SCALAR dx,dy,dz,x0,y0,z0;
FFT_SCALAR *ekx, *eky, *ekz;
ekx = new FFT_SCALAR[nsplit];
eky = new FFT_SCALAR[nsplit];
ekz = new FFT_SCALAR[nsplit];
// loop over my charges, interpolate electric field from nearby grid points
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
// (dx,dy,dz) = distance to "lower left" grid pt
// (mx,my,mz) = global coords of moving stencil pt
// ek = 3 components of dispersion field on particle
double **x = atom->x;
double **f = atom->f;
int type;
double lj;
int nlocal = atom->nlocal;
for (i = 0; i < nlocal; i++) {
nx = part2grid_6[i][0];
ny = part2grid_6[i][1];
nz = part2grid_6[i][2];
dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6;
dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6;
dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6;
compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6);
for (k = 0; k < nsplit; k++)
ekx[k] = eky[k] = ekz[k] = ZEROF;
for (n = nlower_6; n <= nupper_6; n++) {
mz = n+nz;
z0 = rho1d_6[2][n];
for (m = nlower_6; m <= nupper_6; m++) {
my = m+ny;
y0 = z0*rho1d_6[1][m];
for (l = nlower_6; l <= nupper_6; l++) {
mx = l+nx;
x0 = y0*rho1d_6[0][l];
for (k = 0; k < nsplit; k++) {
ekx[k] -= x0*vdx_brick_none[k][mz][my][mx];
eky[k] -= x0*vdy_brick_none[k][mz][my][mx];
ekz[k] -= x0*vdz_brick_none[k][mz][my][mx];
}
}
}
}
// convert D-field to force
type = atom->type[i];
for (k = 0; k < nsplit; k++) {
lj = B[nsplit*type + k];
f[i][0] += lj*ekx[k];
f[i][1] +=lj*eky[k];
if (slabflag != 2) f[i][2] +=lj*ekz[k];
}
}
delete [] ekx;
delete [] eky;
delete [] ekz;
}
/* ----------------------------------------------------------------------
interpolate from grid to get dispersion field & force on my particles
for arithmetic mixing rule for the ad scheme
------------------------------------------------------------------------- */
void PPPMDisp::fieldforce_none_ad()
{
int i,k,l,m,n,nx,ny,nz,mx,my,mz;
FFT_SCALAR dx,dy,dz,x0,y0,z0;
FFT_SCALAR *ekx, *eky, *ekz;
ekx = new FFT_SCALAR[nsplit];
eky = new FFT_SCALAR[nsplit];
ekz = new FFT_SCALAR[nsplit];
double s1,s2,s3;
double sf1,sf2,sf3;
double sf = 0.0;
double *prd;
if (triclinic == 0) prd = domain->prd;
else prd = domain->prd_lamda;
double xprd = prd[0];
double yprd = prd[1];
double zprd = prd[2];
double zprd_slab = zprd*slab_volfactor;
double hx_inv = nx_pppm_6/xprd;
double hy_inv = ny_pppm_6/yprd;
double hz_inv = nz_pppm_6/zprd_slab;
// loop over my charges, interpolate electric field from nearby grid points
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
// (dx,dy,dz) = distance to "lower left" grid pt
// (mx,my,mz) = global coords of moving stencil pt
// ek = 3 components of dispersion field on particle
double **x = atom->x;
double **f = atom->f;
int type;
double lj;
int nlocal = atom->nlocal;
for (i = 0; i < nlocal; i++) {
nx = part2grid_6[i][0];
ny = part2grid_6[i][1];
nz = part2grid_6[i][2];
dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6;
dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6;
dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6;
compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6);
compute_drho1d(dx,dy,dz, order_6, drho_coeff_6, drho1d_6);
for (k = 0; k < nsplit; k++)
ekx[k] = eky[k] = ekz[k] = ZEROF;
for (n = nlower_6; n <= nupper_6; n++) {
mz = n+nz;
for (m = nlower_6; m <= nupper_6; m++) {
my = m+ny;
for (l = nlower_6; l <= nupper_6; l++) {
mx = l+nx;
x0 = drho1d_6[0][l]*rho1d_6[1][m]*rho1d_6[2][n];
y0 = rho1d_6[0][l]*drho1d_6[1][m]*rho1d_6[2][n];
z0 = rho1d_6[0][l]*rho1d_6[1][m]*drho1d_6[2][n];
for (k = 0; k < nsplit; k++) {
ekx[k] += x0*u_brick_none[k][mz][my][mx];
eky[k] += y0*u_brick_none[k][mz][my][mx];
ekz[k] += z0*u_brick_none[k][mz][my][mx];
}
}
}
}
for (k = 0; k < nsplit; k++) {
ekx[k] *= hx_inv;
eky[k] *= hy_inv;
ekz[k] *= hz_inv;
}
// convert D-field to force
type = atom->type[i];
s1 = x[i][0]*hx_inv;
s2 = x[i][1]*hy_inv;
s3 = x[i][2]*hz_inv;
sf1 = sf_coeff_6[0]*sin(2*MY_PI*s1);
sf1 += sf_coeff_6[1]*sin(4*MY_PI*s1);
sf2 = sf_coeff_6[2]*sin(2*MY_PI*s2);
sf2 += sf_coeff_6[3]*sin(4*MY_PI*s2);
sf3 = sf_coeff_6[4]*sin(2*MY_PI*s3);
sf3 += sf_coeff_6[5]*sin(4*MY_PI*s3);
for (k = 0; k < nsplit; k++) {
lj = B[nsplit*type + k];
sf = sf1*B[k]*2*lj*lj;
f[i][0] += lj*ekx[k] - sf;
sf = sf2*B[k]*2*lj*lj;
f[i][1] += lj*eky[k] - sf;
sf = sf3*B[k]*2*lj*lj;
if (slabflag != 2) f[i][2] += lj*ekz[k] - sf;
}
}
delete [] ekx;
delete [] eky;
delete [] ekz;
}
/* ----------------------------------------------------------------------
interpolate from grid to get dispersion field & force on my particles
for arithmetic mixing rule for per atom quantities
------------------------------------------------------------------------- */
void PPPMDisp::fieldforce_none_peratom()
{
int i,k,l,m,n,nx,ny,nz,mx,my,mz;
FFT_SCALAR dx,dy,dz,x0,y0,z0;
FFT_SCALAR *u_pa,*v0,*v1,*v2,*v3,*v4,*v5;
u_pa = new FFT_SCALAR[nsplit];
v0 = new FFT_SCALAR[nsplit];
v1 = new FFT_SCALAR[nsplit];
v2 = new FFT_SCALAR[nsplit];
v3 = new FFT_SCALAR[nsplit];
v4 = new FFT_SCALAR[nsplit];
v5 = new FFT_SCALAR[nsplit];
// loop over my charges, interpolate electric field from nearby grid points
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
// (dx,dy,dz) = distance to "lower left" grid pt
// (mx,my,mz) = global coords of moving stencil pt
// ek = 3 components of dispersion field on particle
double **x = atom->x;
int type;
double lj;
int nlocal = atom->nlocal;
for (i = 0; i < nlocal; i++) {
nx = part2grid_6[i][0];
ny = part2grid_6[i][1];
nz = part2grid_6[i][2];
dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6;
dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6;
dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6;
compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6);
for (k = 0; k < nsplit; k++)
u_pa[k] = v0[k] = v1[k] = v2[k] = v3[k] = v4[k] = v5[k] = ZEROF;
for (n = nlower_6; n <= nupper_6; n++) {
mz = n+nz;
z0 = rho1d_6[2][n];
for (m = nlower_6; m <= nupper_6; m++) {
my = m+ny;
y0 = z0*rho1d_6[1][m];
for (l = nlower_6; l <= nupper_6; l++) {
mx = l+nx;
x0 = y0*rho1d_6[0][l];
if (eflag_atom) {
for (k = 0; k < nsplit; k++)
u_pa[k] += x0*u_brick_none[k][mz][my][mx];
}
if (vflag_atom) {
for (k = 0; k < nsplit; k++) {
v0[k] += x0*v0_brick_none[k][mz][my][mx];
v1[k] += x0*v1_brick_none[k][mz][my][mx];
v2[k] += x0*v2_brick_none[k][mz][my][mx];
v3[k] += x0*v3_brick_none[k][mz][my][mx];
v4[k] += x0*v4_brick_none[k][mz][my][mx];
v5[k] += x0*v5_brick_none[k][mz][my][mx];
}
}
}
}
}
// convert D-field to force
type = atom->type[i];
for (k = 0; k < nsplit; k++) {
lj = B[nsplit*type + k]*0.5;
if (eflag_atom) {
eatom[i] += u_pa[k]*lj;
}
if (vflag_atom) {
vatom[i][0] += v0[k]*lj;
vatom[i][1] += v1[k]*lj;
vatom[i][2] += v2[k]*lj;
vatom[i][3] += v3[k]*lj;
vatom[i][4] += v4[k]*lj;
vatom[i][5] += v5[k]*lj;
}
}
}
delete [] u_pa;
delete [] v0;
delete [] v1;
delete [] v2;
delete [] v3;
delete [] v4;
delete [] v5;
}
/* ----------------------------------------------------------------------
pack values to buf to send to another proc
------------------------------------------------------------------------- */
void PPPMDisp::pack_forward(int flag, FFT_SCALAR *buf, int nlist, int *list)
{
int n = 0;
switch (flag) {
// Coulomb interactions
case FORWARD_IK: {
FFT_SCALAR *xsrc = &vdx_brick[nzlo_out][nylo_out][nxlo_out];
FFT_SCALAR *ysrc = &vdy_brick[nzlo_out][nylo_out][nxlo_out];
FFT_SCALAR *zsrc = &vdz_brick[nzlo_out][nylo_out][nxlo_out];
for (int i = 0; i < nlist; i++) {
buf[n++] = xsrc[list[i]];
buf[n++] = ysrc[list[i]];
buf[n++] = zsrc[list[i]];
}
break;
}
case FORWARD_AD: {
FFT_SCALAR *src = &u_brick[nzlo_out][nylo_out][nxlo_out];
for (int i = 0; i < nlist; i++)
buf[i] = src[list[i]];
break;
}
case FORWARD_IK_PERATOM: {
FFT_SCALAR *esrc = &u_brick[nzlo_out][nylo_out][nxlo_out];
FFT_SCALAR *v0src = &v0_brick[nzlo_out][nylo_out][nxlo_out];
FFT_SCALAR *v1src = &v1_brick[nzlo_out][nylo_out][nxlo_out];
FFT_SCALAR *v2src = &v2_brick[nzlo_out][nylo_out][nxlo_out];
FFT_SCALAR *v3src = &v3_brick[nzlo_out][nylo_out][nxlo_out];
FFT_SCALAR *v4src = &v4_brick[nzlo_out][nylo_out][nxlo_out];
FFT_SCALAR *v5src = &v5_brick[nzlo_out][nylo_out][nxlo_out];
for (int i = 0; i < nlist; i++) {
if (eflag_atom) buf[n++] = esrc[list[i]];
if (vflag_atom) {
buf[n++] = v0src[list[i]];
buf[n++] = v1src[list[i]];
buf[n++] = v2src[list[i]];
buf[n++] = v3src[list[i]];
buf[n++] = v4src[list[i]];
buf[n++] = v5src[list[i]];
}
}
break;
}
case FORWARD_AD_PERATOM: {
FFT_SCALAR *v0src = &v0_brick[nzlo_out][nylo_out][nxlo_out];
FFT_SCALAR *v1src = &v1_brick[nzlo_out][nylo_out][nxlo_out];
FFT_SCALAR *v2src = &v2_brick[nzlo_out][nylo_out][nxlo_out];
FFT_SCALAR *v3src = &v3_brick[nzlo_out][nylo_out][nxlo_out];
FFT_SCALAR *v4src = &v4_brick[nzlo_out][nylo_out][nxlo_out];
FFT_SCALAR *v5src = &v5_brick[nzlo_out][nylo_out][nxlo_out];
for (int i = 0; i < nlist; i++) {
buf[n++] = v0src[list[i]];
buf[n++] = v1src[list[i]];
buf[n++] = v2src[list[i]];
buf[n++] = v3src[list[i]];
buf[n++] = v4src[list[i]];
buf[n++] = v5src[list[i]];
}
break;
}
// Dispersion interactions, geometric mixing
case FORWARD_IK_G: {
FFT_SCALAR *xsrc = &vdx_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *ysrc = &vdy_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *zsrc = &vdz_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
for (int i = 0; i < nlist; i++) {
buf[n++] = xsrc[list[i]];
buf[n++] = ysrc[list[i]];
buf[n++] = zsrc[list[i]];
}
break;
}
case FORWARD_AD_G: {
FFT_SCALAR *src = &u_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
for (int i = 0; i < nlist; i++)
buf[i] = src[list[i]];
break;
}
case FORWARD_IK_PERATOM_G: {
FFT_SCALAR *esrc = &u_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v0src = &v0_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v1src = &v1_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v2src = &v2_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v3src = &v3_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v4src = &v4_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v5src = &v5_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
for (int i = 0; i < nlist; i++) {
if (eflag_atom) buf[n++] = esrc[list[i]];
if (vflag_atom) {
buf[n++] = v0src[list[i]];
buf[n++] = v1src[list[i]];
buf[n++] = v2src[list[i]];
buf[n++] = v3src[list[i]];
buf[n++] = v4src[list[i]];
buf[n++] = v5src[list[i]];
}
}
break;
}
case FORWARD_AD_PERATOM_G: {
FFT_SCALAR *v0src = &v0_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v1src = &v1_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v2src = &v2_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v3src = &v3_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v4src = &v4_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v5src = &v5_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
for (int i = 0; i < nlist; i++) {
buf[n++] = v0src[list[i]];
buf[n++] = v1src[list[i]];
buf[n++] = v2src[list[i]];
buf[n++] = v3src[list[i]];
buf[n++] = v4src[list[i]];
buf[n++] = v5src[list[i]];
}
break;
}
// Dispersion interactions, arithmetic mixing
case FORWARD_IK_A: {
FFT_SCALAR *xsrc0 = &vdx_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *ysrc0 = &vdy_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *zsrc0 = &vdz_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *xsrc1 = &vdx_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *ysrc1 = &vdy_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *zsrc1 = &vdz_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *xsrc2 = &vdx_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *ysrc2 = &vdy_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *zsrc2 = &vdz_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *xsrc3 = &vdx_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *ysrc3 = &vdy_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *zsrc3 = &vdz_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *xsrc4 = &vdx_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *ysrc4 = &vdy_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *zsrc4 = &vdz_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *xsrc5 = &vdx_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *ysrc5 = &vdy_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *zsrc5 = &vdz_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *xsrc6 = &vdx_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *ysrc6 = &vdy_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *zsrc6 = &vdz_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
for (int i = 0; i < nlist; i++) {
buf[n++] = xsrc0[list[i]];
buf[n++] = ysrc0[list[i]];
buf[n++] = zsrc0[list[i]];
buf[n++] = xsrc1[list[i]];
buf[n++] = ysrc1[list[i]];
buf[n++] = zsrc1[list[i]];
buf[n++] = xsrc2[list[i]];
buf[n++] = ysrc2[list[i]];
buf[n++] = zsrc2[list[i]];
buf[n++] = xsrc3[list[i]];
buf[n++] = ysrc3[list[i]];
buf[n++] = zsrc3[list[i]];
buf[n++] = xsrc4[list[i]];
buf[n++] = ysrc4[list[i]];
buf[n++] = zsrc4[list[i]];
buf[n++] = xsrc5[list[i]];
buf[n++] = ysrc5[list[i]];
buf[n++] = zsrc5[list[i]];
buf[n++] = xsrc6[list[i]];
buf[n++] = ysrc6[list[i]];
buf[n++] = zsrc6[list[i]];
}
break;
}
case FORWARD_AD_A: {
FFT_SCALAR *src0 = &u_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *src1 = &u_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *src2 = &u_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *src3 = &u_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *src4 = &u_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *src5 = &u_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *src6 = &u_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
for (int i = 0; i < nlist; i++) {
buf[n++] = src0[list[i]];
buf[n++] = src1[list[i]];
buf[n++] = src2[list[i]];
buf[n++] = src3[list[i]];
buf[n++] = src4[list[i]];
buf[n++] = src5[list[i]];
buf[n++] = src6[list[i]];
}
break;
}
case FORWARD_IK_PERATOM_A: {
FFT_SCALAR *esrc0 = &u_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v0src0 = &v0_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v1src0 = &v1_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v2src0 = &v2_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v3src0 = &v3_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v4src0 = &v4_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v5src0 = &v5_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *esrc1 = &u_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v0src1 = &v0_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v1src1 = &v1_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v2src1 = &v2_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v3src1 = &v3_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v4src1 = &v4_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v5src1 = &v5_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *esrc2 = &u_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v0src2 = &v0_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v1src2 = &v1_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v2src2 = &v2_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v3src2 = &v3_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v4src2 = &v4_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v5src2 = &v5_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *esrc3 = &u_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v0src3 = &v0_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v1src3 = &v1_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v2src3 = &v2_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v3src3 = &v3_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v4src3 = &v4_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v5src3 = &v5_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *esrc4 = &u_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v0src4 = &v0_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v1src4 = &v1_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v2src4 = &v2_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v3src4 = &v3_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v4src4 = &v4_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v5src4 = &v5_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *esrc5 = &u_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v0src5 = &v0_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v1src5 = &v1_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v2src5 = &v2_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v3src5 = &v3_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v4src5 = &v4_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v5src5 = &v5_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *esrc6 = &u_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v0src6 = &v0_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v1src6 = &v1_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v2src6 = &v2_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v3src6 = &v3_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v4src6 = &v4_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v5src6 = &v5_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
for (int i = 0; i < nlist; i++) {
if (eflag_atom) {
buf[n++] = esrc0[list[i]];
buf[n++] = esrc1[list[i]];
buf[n++] = esrc2[list[i]];
buf[n++] = esrc3[list[i]];
buf[n++] = esrc4[list[i]];
buf[n++] = esrc5[list[i]];
buf[n++] = esrc6[list[i]];
}
if (vflag_atom) {
buf[n++] = v0src0[list[i]];
buf[n++] = v1src0[list[i]];
buf[n++] = v2src0[list[i]];
buf[n++] = v3src0[list[i]];
buf[n++] = v4src0[list[i]];
buf[n++] = v5src0[list[i]];
buf[n++] = v0src1[list[i]];
buf[n++] = v1src1[list[i]];
buf[n++] = v2src1[list[i]];
buf[n++] = v3src1[list[i]];
buf[n++] = v4src1[list[i]];
buf[n++] = v5src1[list[i]];
buf[n++] = v0src2[list[i]];
buf[n++] = v1src2[list[i]];
buf[n++] = v2src2[list[i]];
buf[n++] = v3src2[list[i]];
buf[n++] = v4src2[list[i]];
buf[n++] = v5src2[list[i]];
buf[n++] = v0src3[list[i]];
buf[n++] = v1src3[list[i]];
buf[n++] = v2src3[list[i]];
buf[n++] = v3src3[list[i]];
buf[n++] = v4src3[list[i]];
buf[n++] = v5src3[list[i]];
buf[n++] = v0src4[list[i]];
buf[n++] = v1src4[list[i]];
buf[n++] = v2src4[list[i]];
buf[n++] = v3src4[list[i]];
buf[n++] = v4src4[list[i]];
buf[n++] = v5src4[list[i]];
buf[n++] = v0src5[list[i]];
buf[n++] = v1src5[list[i]];
buf[n++] = v2src5[list[i]];
buf[n++] = v3src5[list[i]];
buf[n++] = v4src5[list[i]];
buf[n++] = v5src5[list[i]];
buf[n++] = v0src6[list[i]];
buf[n++] = v1src6[list[i]];
buf[n++] = v2src6[list[i]];
buf[n++] = v3src6[list[i]];
buf[n++] = v4src6[list[i]];
buf[n++] = v5src6[list[i]];
}
}
break;
}
case FORWARD_AD_PERATOM_A: {
FFT_SCALAR *v0src0 = &v0_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v1src0 = &v1_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v2src0 = &v2_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v3src0 = &v3_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v4src0 = &v4_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v5src0 = &v5_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v0src1 = &v0_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v1src1 = &v1_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v2src1 = &v2_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v3src1 = &v3_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v4src1 = &v4_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v5src1 = &v5_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v0src2 = &v0_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v1src2 = &v1_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v2src2 = &v2_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v3src2 = &v3_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v4src2 = &v4_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v5src2 = &v5_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v0src3 = &v0_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v1src3 = &v1_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v2src3 = &v2_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v3src3 = &v3_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v4src3 = &v4_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v5src3 = &v5_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v0src4 = &v0_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v1src4 = &v1_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v2src4 = &v2_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v3src4 = &v3_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v4src4 = &v4_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v5src4 = &v5_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v0src5 = &v0_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v1src5 = &v1_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v2src5 = &v2_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v3src5 = &v3_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v4src5 = &v4_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v5src5 = &v5_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v0src6 = &v0_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v1src6 = &v1_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v2src6 = &v2_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v3src6 = &v3_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v4src6 = &v4_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v5src6 = &v5_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
for (int i = 0; i < nlist; i++) {
buf[n++] = v0src0[list[i]];
buf[n++] = v1src0[list[i]];
buf[n++] = v2src0[list[i]];
buf[n++] = v3src0[list[i]];
buf[n++] = v4src0[list[i]];
buf[n++] = v5src0[list[i]];
buf[n++] = v0src1[list[i]];
buf[n++] = v1src1[list[i]];
buf[n++] = v2src1[list[i]];
buf[n++] = v3src1[list[i]];
buf[n++] = v4src1[list[i]];
buf[n++] = v5src1[list[i]];
buf[n++] = v0src2[list[i]];
buf[n++] = v1src2[list[i]];
buf[n++] = v2src2[list[i]];
buf[n++] = v3src2[list[i]];
buf[n++] = v4src2[list[i]];
buf[n++] = v5src2[list[i]];
buf[n++] = v0src3[list[i]];
buf[n++] = v1src3[list[i]];
buf[n++] = v2src3[list[i]];
buf[n++] = v3src3[list[i]];
buf[n++] = v4src3[list[i]];
buf[n++] = v5src3[list[i]];
buf[n++] = v0src4[list[i]];
buf[n++] = v1src4[list[i]];
buf[n++] = v2src4[list[i]];
buf[n++] = v3src4[list[i]];
buf[n++] = v4src4[list[i]];
buf[n++] = v5src4[list[i]];
buf[n++] = v0src5[list[i]];
buf[n++] = v1src5[list[i]];
buf[n++] = v2src5[list[i]];
buf[n++] = v3src5[list[i]];
buf[n++] = v4src5[list[i]];
buf[n++] = v5src5[list[i]];
buf[n++] = v0src6[list[i]];
buf[n++] = v1src6[list[i]];
buf[n++] = v2src6[list[i]];
buf[n++] = v3src6[list[i]];
buf[n++] = v4src6[list[i]];
buf[n++] = v5src6[list[i]];
}
break;
}
// Dispersion interactions, no mixing
case FORWARD_IK_NONE: {
for (int k = 0; k < nsplit_alloc; k++) {
FFT_SCALAR *xsrc = &vdx_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *ysrc = &vdy_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *zsrc = &vdz_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
for (int i = 0; i < nlist; i++) {
buf[n++] = xsrc[list[i]];
buf[n++] = ysrc[list[i]];
buf[n++] = zsrc[list[i]];
}
}
break;
}
case FORWARD_AD_NONE: {
for (int k = 0; k < nsplit_alloc; k++) {
FFT_SCALAR *src = &u_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
for (int i = 0; i < nlist; i++)
buf[n++] = src[list[i]];
}
break;
}
case FORWARD_IK_PERATOM_NONE: {
for (int k = 0; k < nsplit_alloc; k++) {
FFT_SCALAR *esrc = &u_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v0src = &v0_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v1src = &v1_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v2src = &v2_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v3src = &v3_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v4src = &v4_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v5src = &v5_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
for (int i = 0; i < nlist; i++) {
if (eflag_atom) buf[n++] = esrc[list[i]];
if (vflag_atom) {
buf[n++] = v0src[list[i]];
buf[n++] = v1src[list[i]];
buf[n++] = v2src[list[i]];
buf[n++] = v3src[list[i]];
buf[n++] = v4src[list[i]];
buf[n++] = v5src[list[i]];
}
}
}
break;
}
case FORWARD_AD_PERATOM_NONE: {
for (int k = 0; k < nsplit_alloc; k++) {
FFT_SCALAR *v0src = &v0_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v1src = &v1_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v2src = &v2_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v3src = &v3_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v4src = &v4_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v5src = &v5_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
for (int i = 0; i < nlist; i++) {
buf[n++] = v0src[list[i]];
buf[n++] = v1src[list[i]];
buf[n++] = v2src[list[i]];
buf[n++] = v3src[list[i]];
buf[n++] = v4src[list[i]];
buf[n++] = v5src[list[i]];
}
}
break;
}
}
}
/* ----------------------------------------------------------------------
unpack another proc's own values from buf and set own ghost values
------------------------------------------------------------------------- */
void PPPMDisp::unpack_forward(int flag, FFT_SCALAR *buf, int nlist, int *list)
{
int n = 0;
switch (flag) {
// Coulomb interactions
case FORWARD_IK: {
FFT_SCALAR *xdest = &vdx_brick[nzlo_out][nylo_out][nxlo_out];
FFT_SCALAR *ydest = &vdy_brick[nzlo_out][nylo_out][nxlo_out];
FFT_SCALAR *zdest = &vdz_brick[nzlo_out][nylo_out][nxlo_out];
for (int i = 0; i < nlist; i++) {
xdest[list[i]] = buf[n++];
ydest[list[i]] = buf[n++];
zdest[list[i]] = buf[n++];
}
break;
}
case FORWARD_AD: {
FFT_SCALAR *dest = &u_brick[nzlo_out][nylo_out][nxlo_out];
for (int i = 0; i < nlist; i++)
dest[list[i]] = buf[n++];
break;
}
case FORWARD_IK_PERATOM: {
FFT_SCALAR *esrc = &u_brick[nzlo_out][nylo_out][nxlo_out];
FFT_SCALAR *v0src = &v0_brick[nzlo_out][nylo_out][nxlo_out];
FFT_SCALAR *v1src = &v1_brick[nzlo_out][nylo_out][nxlo_out];
FFT_SCALAR *v2src = &v2_brick[nzlo_out][nylo_out][nxlo_out];
FFT_SCALAR *v3src = &v3_brick[nzlo_out][nylo_out][nxlo_out];
FFT_SCALAR *v4src = &v4_brick[nzlo_out][nylo_out][nxlo_out];
FFT_SCALAR *v5src = &v5_brick[nzlo_out][nylo_out][nxlo_out];
for (int i = 0; i < nlist; i++) {
if (eflag_atom) esrc[list[i]] = buf[n++];
if (vflag_atom) {
v0src[list[i]] = buf[n++];
v1src[list[i]] = buf[n++];
v2src[list[i]] = buf[n++];
v3src[list[i]] = buf[n++];
v4src[list[i]] = buf[n++];
v5src[list[i]] = buf[n++];
}
}
break;
}
case FORWARD_AD_PERATOM: {
FFT_SCALAR *v0src = &v0_brick[nzlo_out][nylo_out][nxlo_out];
FFT_SCALAR *v1src = &v1_brick[nzlo_out][nylo_out][nxlo_out];
FFT_SCALAR *v2src = &v2_brick[nzlo_out][nylo_out][nxlo_out];
FFT_SCALAR *v3src = &v3_brick[nzlo_out][nylo_out][nxlo_out];
FFT_SCALAR *v4src = &v4_brick[nzlo_out][nylo_out][nxlo_out];
FFT_SCALAR *v5src = &v5_brick[nzlo_out][nylo_out][nxlo_out];
for (int i = 0; i < nlist; i++) {
v0src[list[i]] = buf[n++];
v1src[list[i]] = buf[n++];
v2src[list[i]] = buf[n++];
v3src[list[i]] = buf[n++];
v4src[list[i]] = buf[n++];
v5src[list[i]] = buf[n++];
}
break;
}
// Disperion interactions, geometric mixing
case FORWARD_IK_G: {
FFT_SCALAR *xdest = &vdx_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *ydest = &vdy_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *zdest = &vdz_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
for (int i = 0; i < nlist; i++) {
xdest[list[i]] = buf[n++];
ydest[list[i]] = buf[n++];
zdest[list[i]] = buf[n++];
}
break;
}
case FORWARD_AD_G: {
FFT_SCALAR *dest = &u_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
for (int i = 0; i < nlist; i++)
dest[list[i]] = buf[n++];
break;
}
case FORWARD_IK_PERATOM_G: {
FFT_SCALAR *esrc = &u_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v0src = &v0_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v1src = &v1_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v2src = &v2_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v3src = &v3_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v4src = &v4_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v5src = &v5_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
for (int i = 0; i < nlist; i++) {
if (eflag_atom) esrc[list[i]] = buf[n++];
if (vflag_atom) {
v0src[list[i]] = buf[n++];
v1src[list[i]] = buf[n++];
v2src[list[i]] = buf[n++];
v3src[list[i]] = buf[n++];
v4src[list[i]] = buf[n++];
v5src[list[i]] = buf[n++];
}
}
break;
}
case FORWARD_AD_PERATOM_G: {
FFT_SCALAR *v0src = &v0_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v1src = &v1_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v2src = &v2_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v3src = &v3_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v4src = &v4_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v5src = &v5_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
for (int i = 0; i < nlist; i++) {
v0src[list[i]] = buf[n++];
v1src[list[i]] = buf[n++];
v2src[list[i]] = buf[n++];
v3src[list[i]] = buf[n++];
v4src[list[i]] = buf[n++];
v5src[list[i]] = buf[n++];
}
break;
}
// Disperion interactions, arithmetic mixing
case FORWARD_IK_A: {
FFT_SCALAR *xdest0 = &vdx_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *ydest0 = &vdy_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *zdest0 = &vdz_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *xdest1 = &vdx_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *ydest1 = &vdy_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *zdest1 = &vdz_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *xdest2 = &vdx_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *ydest2 = &vdy_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *zdest2 = &vdz_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *xdest3 = &vdx_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *ydest3 = &vdy_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *zdest3 = &vdz_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *xdest4 = &vdx_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *ydest4 = &vdy_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *zdest4 = &vdz_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *xdest5 = &vdx_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *ydest5 = &vdy_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *zdest5 = &vdz_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *xdest6 = &vdx_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *ydest6 = &vdy_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *zdest6 = &vdz_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
for (int i = 0; i < nlist; i++) {
xdest0[list[i]] = buf[n++];
ydest0[list[i]] = buf[n++];
zdest0[list[i]] = buf[n++];
xdest1[list[i]] = buf[n++];
ydest1[list[i]] = buf[n++];
zdest1[list[i]] = buf[n++];
xdest2[list[i]] = buf[n++];
ydest2[list[i]] = buf[n++];
zdest2[list[i]] = buf[n++];
xdest3[list[i]] = buf[n++];
ydest3[list[i]] = buf[n++];
zdest3[list[i]] = buf[n++];
xdest4[list[i]] = buf[n++];
ydest4[list[i]] = buf[n++];
zdest4[list[i]] = buf[n++];
xdest5[list[i]] = buf[n++];
ydest5[list[i]] = buf[n++];
zdest5[list[i]] = buf[n++];
xdest6[list[i]] = buf[n++];
ydest6[list[i]] = buf[n++];
zdest6[list[i]] = buf[n++];
}
break;
}
case FORWARD_AD_A: {
FFT_SCALAR *dest0 = &u_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *dest1 = &u_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *dest2 = &u_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *dest3 = &u_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *dest4 = &u_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *dest5 = &u_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *dest6 = &u_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
for (int i = 0; i < nlist; i++) {
dest0[list[i]] = buf[n++];
dest1[list[i]] = buf[n++];
dest2[list[i]] = buf[n++];
dest3[list[i]] = buf[n++];
dest4[list[i]] = buf[n++];
dest5[list[i]] = buf[n++];
dest6[list[i]] = buf[n++];
}
break;
}
case FORWARD_IK_PERATOM_A: {
FFT_SCALAR *esrc0 = &u_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v0src0 = &v0_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v1src0 = &v1_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v2src0 = &v2_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v3src0 = &v3_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v4src0 = &v4_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v5src0 = &v5_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *esrc1 = &u_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v0src1 = &v0_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v1src1 = &v1_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v2src1 = &v2_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v3src1 = &v3_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v4src1 = &v4_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v5src1 = &v5_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *esrc2 = &u_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v0src2 = &v0_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v1src2 = &v1_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v2src2 = &v2_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v3src2 = &v3_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v4src2 = &v4_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v5src2 = &v5_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *esrc3 = &u_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v0src3 = &v0_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v1src3 = &v1_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v2src3 = &v2_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v3src3 = &v3_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v4src3 = &v4_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v5src3 = &v5_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *esrc4 = &u_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v0src4 = &v0_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v1src4 = &v1_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v2src4 = &v2_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v3src4 = &v3_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v4src4 = &v4_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v5src4 = &v5_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *esrc5 = &u_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v0src5 = &v0_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v1src5 = &v1_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v2src5 = &v2_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v3src5 = &v3_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v4src5 = &v4_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v5src5 = &v5_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *esrc6 = &u_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v0src6 = &v0_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v1src6 = &v1_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v2src6 = &v2_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v3src6 = &v3_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v4src6 = &v4_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v5src6 = &v5_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
for (int i = 0; i < nlist; i++) {
if (eflag_atom) {
esrc0[list[i]] = buf[n++];
esrc1[list[i]] = buf[n++];
esrc2[list[i]] = buf[n++];
esrc3[list[i]] = buf[n++];
esrc4[list[i]] = buf[n++];
esrc5[list[i]] = buf[n++];
esrc6[list[i]] = buf[n++];
}
if (vflag_atom) {
v0src0[list[i]] = buf[n++];
v1src0[list[i]] = buf[n++];
v2src0[list[i]] = buf[n++];
v3src0[list[i]] = buf[n++];
v4src0[list[i]] = buf[n++];
v5src0[list[i]] = buf[n++];
v0src1[list[i]] = buf[n++];
v1src1[list[i]] = buf[n++];
v2src1[list[i]] = buf[n++];
v3src1[list[i]] = buf[n++];
v4src1[list[i]] = buf[n++];
v5src1[list[i]] = buf[n++];
v0src2[list[i]] = buf[n++];
v1src2[list[i]] = buf[n++];
v2src2[list[i]] = buf[n++];
v3src2[list[i]] = buf[n++];
v4src2[list[i]] = buf[n++];
v5src2[list[i]] = buf[n++];
v0src3[list[i]] = buf[n++];
v1src3[list[i]] = buf[n++];
v2src3[list[i]] = buf[n++];
v3src3[list[i]] = buf[n++];
v4src3[list[i]] = buf[n++];
v5src3[list[i]] = buf[n++];
v0src4[list[i]] = buf[n++];
v1src4[list[i]] = buf[n++];
v2src4[list[i]] = buf[n++];
v3src4[list[i]] = buf[n++];
v4src4[list[i]] = buf[n++];
v5src4[list[i]] = buf[n++];
v0src5[list[i]] = buf[n++];
v1src5[list[i]] = buf[n++];
v2src5[list[i]] = buf[n++];
v3src5[list[i]] = buf[n++];
v4src5[list[i]] = buf[n++];
v5src5[list[i]] = buf[n++];
v0src6[list[i]] = buf[n++];
v1src6[list[i]] = buf[n++];
v2src6[list[i]] = buf[n++];
v3src6[list[i]] = buf[n++];
v4src6[list[i]] = buf[n++];
v5src6[list[i]] = buf[n++];
}
}
break;
}
case FORWARD_AD_PERATOM_A: {
FFT_SCALAR *v0src0 = &v0_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v1src0 = &v1_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v2src0 = &v2_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v3src0 = &v3_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v4src0 = &v4_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v5src0 = &v5_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v0src1 = &v0_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v1src1 = &v1_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v2src1 = &v2_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v3src1 = &v3_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v4src1 = &v4_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v5src1 = &v5_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v0src2 = &v0_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v1src2 = &v1_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v2src2 = &v2_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v3src2 = &v3_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v4src2 = &v4_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v5src2 = &v5_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v0src3 = &v0_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v1src3 = &v1_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v2src3 = &v2_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v3src3 = &v3_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v4src3 = &v4_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v5src3 = &v5_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v0src4 = &v0_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v1src4 = &v1_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v2src4 = &v2_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v3src4 = &v3_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v4src4 = &v4_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v5src4 = &v5_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v0src5 = &v0_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v1src5 = &v1_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v2src5 = &v2_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v3src5 = &v3_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v4src5 = &v4_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v5src5 = &v5_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v0src6 = &v0_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v1src6 = &v1_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v2src6 = &v2_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v3src6 = &v3_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v4src6 = &v4_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v5src6 = &v5_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
for (int i = 0; i < nlist; i++) {
v0src0[list[i]] = buf[n++];
v1src0[list[i]] = buf[n++];
v2src0[list[i]] = buf[n++];
v3src0[list[i]] = buf[n++];
v4src0[list[i]] = buf[n++];
v5src0[list[i]] = buf[n++];
v0src1[list[i]] = buf[n++];
v1src1[list[i]] = buf[n++];
v2src1[list[i]] = buf[n++];
v3src1[list[i]] = buf[n++];
v4src1[list[i]] = buf[n++];
v5src1[list[i]] = buf[n++];
v0src2[list[i]] = buf[n++];
v1src2[list[i]] = buf[n++];
v2src2[list[i]] = buf[n++];
v3src2[list[i]] = buf[n++];
v4src2[list[i]] = buf[n++];
v5src2[list[i]] = buf[n++];
v0src3[list[i]] = buf[n++];
v1src3[list[i]] = buf[n++];
v2src3[list[i]] = buf[n++];
v3src3[list[i]] = buf[n++];
v4src3[list[i]] = buf[n++];
v5src3[list[i]] = buf[n++];
v0src4[list[i]] = buf[n++];
v1src4[list[i]] = buf[n++];
v2src4[list[i]] = buf[n++];
v3src4[list[i]] = buf[n++];
v4src4[list[i]] = buf[n++];
v5src4[list[i]] = buf[n++];
v0src5[list[i]] = buf[n++];
v1src5[list[i]] = buf[n++];
v2src5[list[i]] = buf[n++];
v3src5[list[i]] = buf[n++];
v4src5[list[i]] = buf[n++];
v5src5[list[i]] = buf[n++];
v0src6[list[i]] = buf[n++];
v1src6[list[i]] = buf[n++];
v2src6[list[i]] = buf[n++];
v3src6[list[i]] = buf[n++];
v4src6[list[i]] = buf[n++];
v5src6[list[i]] = buf[n++];
}
break;
}
// Disperion interactions, geometric mixing
case FORWARD_IK_NONE: {
for (int k = 0; k < nsplit_alloc; k++) {
FFT_SCALAR *xdest = &vdx_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *ydest = &vdy_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *zdest = &vdz_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
for (int i = 0; i < nlist; i++) {
xdest[list[i]] = buf[n++];
ydest[list[i]] = buf[n++];
zdest[list[i]] = buf[n++];
}
}
break;
}
case FORWARD_AD_NONE: {
for (int k = 0; k < nsplit_alloc; k++) {
FFT_SCALAR *dest = &u_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
for (int i = 0; i < nlist; i++)
dest[list[i]] = buf[n++];
}
break;
}
case FORWARD_IK_PERATOM_NONE: {
for (int k = 0; k < nsplit_alloc; k++) {
FFT_SCALAR *esrc = &u_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v0src = &v0_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v1src = &v1_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v2src = &v2_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v3src = &v3_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v4src = &v4_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v5src = &v5_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
for (int i = 0; i < nlist; i++) {
if (eflag_atom) esrc[list[i]] = buf[n++];
if (vflag_atom) {
v0src[list[i]] = buf[n++];
v1src[list[i]] = buf[n++];
v2src[list[i]] = buf[n++];
v3src[list[i]] = buf[n++];
v4src[list[i]] = buf[n++];
v5src[list[i]] = buf[n++];
}
}
}
break;
}
case FORWARD_AD_PERATOM_NONE: {
for (int k = 0; k < nsplit_alloc; k++) {
FFT_SCALAR *v0src = &v0_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v1src = &v1_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v2src = &v2_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v3src = &v3_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v4src = &v4_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *v5src = &v5_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
for (int i = 0; i < nlist; i++) {
v0src[list[i]] = buf[n++];
v1src[list[i]] = buf[n++];
v2src[list[i]] = buf[n++];
v3src[list[i]] = buf[n++];
v4src[list[i]] = buf[n++];
v5src[list[i]] = buf[n++];
}
}
break;
}
}
}
/* ----------------------------------------------------------------------
pack ghost values into buf to send to another proc
------------------------------------------------------------------------- */
void PPPMDisp::pack_reverse(int flag, FFT_SCALAR *buf, int nlist, int *list)
{
int n = 0;
//Coulomb interactions
if (flag == REVERSE_RHO) {
FFT_SCALAR *src = &density_brick[nzlo_out][nylo_out][nxlo_out];
for (int i = 0; i < nlist; i++)
buf[i] = src[list[i]];
//Dispersion interactions, geometric mixing
} else if (flag == REVERSE_RHO_G) {
FFT_SCALAR *src = &density_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
for (int i = 0; i < nlist; i++)
buf[i] = src[list[i]];
//Dispersion interactions, arithmetic mixing
} else if (flag == REVERSE_RHO_A) {
FFT_SCALAR *src0 = &density_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *src1 = &density_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *src2 = &density_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *src3 = &density_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *src4 = &density_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *src5 = &density_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *src6 = &density_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
for (int i = 0; i < nlist; i++) {
buf[n++] = src0[list[i]];
buf[n++] = src1[list[i]];
buf[n++] = src2[list[i]];
buf[n++] = src3[list[i]];
buf[n++] = src4[list[i]];
buf[n++] = src5[list[i]];
buf[n++] = src6[list[i]];
}
//Dispersion interactions, no mixing
} else if (flag == REVERSE_RHO_NONE) {
for (int k = 0; k < nsplit_alloc; k++) {
FFT_SCALAR *src = &density_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
for (int i = 0; i < nlist; i++) {
buf[n++] = src[list[i]];
}
}
}
}
/* ----------------------------------------------------------------------
unpack another proc's ghost values from buf and add to own values
------------------------------------------------------------------------- */
void PPPMDisp::unpack_reverse(int flag, FFT_SCALAR *buf, int nlist, int *list)
{
int n = 0;
//Coulomb interactions
if (flag == REVERSE_RHO) {
FFT_SCALAR *dest = &density_brick[nzlo_out][nylo_out][nxlo_out];
for (int i = 0; i < nlist; i++)
dest[list[i]] += buf[i];
//Dispersion interactions, geometric mixing
} else if (flag == REVERSE_RHO_G) {
FFT_SCALAR *dest = &density_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
for (int i = 0; i < nlist; i++)
dest[list[i]] += buf[i];
//Dispersion interactions, arithmetic mixing
} else if (flag == REVERSE_RHO_A) {
FFT_SCALAR *dest0 = &density_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *dest1 = &density_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *dest2 = &density_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *dest3 = &density_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *dest4 = &density_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *dest5 = &density_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
FFT_SCALAR *dest6 = &density_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
for (int i = 0; i < nlist; i++) {
dest0[list[i]] += buf[n++];
dest1[list[i]] += buf[n++];
dest2[list[i]] += buf[n++];
dest3[list[i]] += buf[n++];
dest4[list[i]] += buf[n++];
dest5[list[i]] += buf[n++];
dest6[list[i]] += buf[n++];
}
//Dispersion interactions, no mixing
} else if (flag == REVERSE_RHO_NONE) {
for (int k = 0; k < nsplit_alloc; k++) {
FFT_SCALAR *dest = &density_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
for (int i = 0; i < nlist; i++)
dest[list[i]] += buf[n++];
}
}
}
/* ----------------------------------------------------------------------
map nprocs to NX by NY grid as PX by PY procs - return optimal px,py
------------------------------------------------------------------------- */
void PPPMDisp::procs2grid2d(int nprocs, int nx, int ny, int *px, int *py)
{
// loop thru all possible factorizations of nprocs
// surf = surface area of largest proc sub-domain
// innermost if test minimizes surface area and surface/volume ratio
int bestsurf = 2 * (nx + ny);
int bestboxx = 0;
int bestboxy = 0;
int boxx,boxy,surf,ipx,ipy;
ipx = 1;
while (ipx <= nprocs) {
if (nprocs % ipx == 0) {
ipy = nprocs/ipx;
boxx = nx/ipx;
if (nx % ipx) boxx++;
boxy = ny/ipy;
if (ny % ipy) boxy++;
surf = boxx + boxy;
if (surf < bestsurf ||
(surf == bestsurf && boxx*boxy > bestboxx*bestboxy)) {
bestsurf = surf;
bestboxx = boxx;
bestboxy = boxy;
*px = ipx;
*py = ipy;
}
}
ipx++;
}
}
/* ----------------------------------------------------------------------
charge assignment into rho1d
dx,dy,dz = distance of particle from "lower left" grid point
------------------------------------------------------------------------- */
void PPPMDisp::compute_rho1d(const FFT_SCALAR &dx, const FFT_SCALAR &dy,
const FFT_SCALAR &dz, int ord,
FFT_SCALAR **rho_c, FFT_SCALAR **r1d)
{
int k,l;
FFT_SCALAR r1,r2,r3;
for (k = (1-ord)/2; k <= ord/2; k++) {
r1 = r2 = r3 = ZEROF;
for (l = ord-1; l >= 0; l--) {
r1 = rho_c[l][k] + r1*dx;
r2 = rho_c[l][k] + r2*dy;
r3 = rho_c[l][k] + r3*dz;
}
r1d[0][k] = r1;
r1d[1][k] = r2;
r1d[2][k] = r3;
}
}
/* ----------------------------------------------------------------------
charge assignment into drho1d
dx,dy,dz = distance of particle from "lower left" grid point
------------------------------------------------------------------------- */
void PPPMDisp::compute_drho1d(const FFT_SCALAR &dx, const FFT_SCALAR &dy,
const FFT_SCALAR &dz, int ord,
FFT_SCALAR **drho_c, FFT_SCALAR **dr1d)
{
int k,l;
FFT_SCALAR r1,r2,r3;
for (k = (1-ord)/2; k <= ord/2; k++) {
r1 = r2 = r3 = ZEROF;
for (l = ord-2; l >= 0; l--) {
r1 = drho_c[l][k] + r1*dx;
r2 = drho_c[l][k] + r2*dy;
r3 = drho_c[l][k] + r3*dz;
}
dr1d[0][k] = r1;
dr1d[1][k] = r2;
dr1d[2][k] = r3;
}
}
/* ----------------------------------------------------------------------
generate coeffients for the weight function of order n
(n-1)
Wn(x) = Sum wn(k,x) , Sum is over every other integer
k=-(n-1)
For k=-(n-1),-(n-1)+2, ....., (n-1)-2,n-1
k is odd integers if n is even and even integers if n is odd
---
| n-1
| Sum a(l,j)*(x-k/2)**l if abs(x-k/2) < 1/2
wn(k,x) = < l=0
|
| 0 otherwise
---
a coeffients are packed into the array rho_coeff to eliminate zeros
rho_coeff(l,((k+mod(n+1,2))/2) = a(l,k)
------------------------------------------------------------------------- */
void PPPMDisp::compute_rho_coeff(FFT_SCALAR **coeff , FFT_SCALAR **dcoeff,
int ord)
{
int j,k,l,m;
FFT_SCALAR s;
FFT_SCALAR **a;
memory->create2d_offset(a,ord,-ord,ord,"pppm/disp:a");
for (k = -ord; k <= ord; k++)
for (l = 0; l < ord; l++)
a[l][k] = 0.0;
a[0][0] = 1.0;
for (j = 1; j < ord; j++) {
for (k = -j; k <= j; k += 2) {
s = 0.0;
for (l = 0; l < j; l++) {
a[l+1][k] = (a[l][k+1]-a[l][k-1]) / (l+1);
#ifdef FFT_SINGLE
s += powf(0.5,(float) l+1) *
(a[l][k-1] + powf(-1.0,(float) l) * a[l][k+1]) / (l+1);
#else
s += pow(0.5,(double) l+1) *
(a[l][k-1] + pow(-1.0,(double) l) * a[l][k+1]) / (l+1);
#endif
}
a[0][k] = s;
}
}
m = (1-ord)/2;
for (k = -(ord-1); k < ord; k += 2) {
for (l = 0; l < ord; l++)
coeff[l][m] = a[l][k];
for (l = 1; l < ord; l++)
dcoeff[l-1][m] = l*a[l][k];
m++;
}
memory->destroy2d_offset(a,-ord);
}
/* ----------------------------------------------------------------------
Slab-geometry correction term to dampen inter-slab interactions between
periodically repeating slabs. Yields good approximation to 2D Ewald if
adequate empty space is left between repeating slabs (J. Chem. Phys.
111, 3155). Slabs defined here to be parallel to the xy plane. Also
extended to non-neutral systems (J. Chem. Phys. 131, 094107).
------------------------------------------------------------------------- */
void PPPMDisp::slabcorr(int eflag)
{
// compute local contribution to global dipole moment
double *q = atom->q;
double **x = atom->x;
double zprd = domain->zprd;
int nlocal = atom->nlocal;
double dipole = 0.0;
for (int i = 0; i < nlocal; i++) dipole += q[i]*x[i][2];
// sum local contributions to get global dipole moment
double dipole_all;
MPI_Allreduce(&dipole,&dipole_all,1,MPI_DOUBLE,MPI_SUM,world);
// need to make non-neutral systems and/or
// per-atom energy translationally invariant
double dipole_r2 = 0.0;
if (eflag_atom || fabs(qsum) > SMALL) {
for (int i = 0; i < nlocal; i++)
dipole_r2 += q[i]*x[i][2]*x[i][2];
// sum local contributions
double tmp;
MPI_Allreduce(&dipole_r2,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
dipole_r2 = tmp;
}
// compute corrections
const double e_slabcorr = MY_2PI*(dipole_all*dipole_all -
qsum*dipole_r2 - qsum*qsum*zprd*zprd/12.0)/volume;
const double qscale = force->qqrd2e * scale;
if (eflag_global) energy_1 += qscale * e_slabcorr;
// per-atom energy
if (eflag_atom) {
double efact = qscale * MY_2PI/volume;
for (int i = 0; i < nlocal; i++)
eatom[i] += efact * q[i]*(x[i][2]*dipole_all - 0.5*(dipole_r2 +
qsum*x[i][2]*x[i][2]) - qsum*zprd*zprd/12.0);
}
// add on force corrections
double ffact = qscale * (-4.0*MY_PI/volume);
double **f = atom->f;
for (int i = 0; i < nlocal; i++) f[i][2] += ffact * q[i]*(dipole_all - qsum*x[i][2]);
}
/* ----------------------------------------------------------------------
perform and time the 1d FFTs required for N timesteps
------------------------------------------------------------------------- */
int PPPMDisp::timing_1d(int n, double &time1d)
{
double time1,time2;
int mixing = 1;
if (function[2]) mixing = 4;
if (function[3]) mixing = nsplit_alloc/2;
if (function[0]) for (int i = 0; i < 2*nfft_both; i++) work1[i] = ZEROF;
if (function[1] + function[2] + function[3])
for (int i = 0; i < 2*nfft_both_6; i++) work1_6[i] = ZEROF;
MPI_Barrier(world);
time1 = MPI_Wtime();
if (function[0]) {
for (int i = 0; i < n; i++) {
fft1->timing1d(work1,nfft_both,1);
fft2->timing1d(work1,nfft_both,-1);
if (differentiation_flag != 1){
fft2->timing1d(work1,nfft_both,-1);
fft2->timing1d(work1,nfft_both,-1);
}
}
}
MPI_Barrier(world);
time2 = MPI_Wtime();
time1d = time2 - time1;
MPI_Barrier(world);
time1 = MPI_Wtime();
if (function[1] + function[2] + function[3]) {
for (int i = 0; i < n; i++) {
fft1_6->timing1d(work1_6,nfft_both_6,1);
fft2_6->timing1d(work1_6,nfft_both_6,-1);
if (differentiation_flag != 1){
fft2_6->timing1d(work1_6,nfft_both_6,-1);
fft2_6->timing1d(work1_6,nfft_both_6,-1);
}
}
}
MPI_Barrier(world);
time2 = MPI_Wtime();
time1d += (time2 - time1)*mixing;
if (differentiation_flag) return 2;
return 4;
}
/* ----------------------------------------------------------------------
perform and time the 3d FFTs required for N timesteps
------------------------------------------------------------------------- */
int PPPMDisp::timing_3d(int n, double &time3d)
{
double time1,time2;
int mixing = 1;
if (function[2]) mixing = 4;
if (function[3]) mixing = nsplit_alloc/2;
if (function[0]) for (int i = 0; i < 2*nfft_both; i++) work1[i] = ZEROF;
if (function[1] + function[2] + function[3])
for (int i = 0; i < 2*nfft_both_6; i++) work1_6[i] = ZEROF;
MPI_Barrier(world);
time1 = MPI_Wtime();
if (function[0]) {
for (int i = 0; i < n; i++) {
fft1->compute(work1,work1,1);
fft2->compute(work1,work1,-1);
if (differentiation_flag != 1) {
fft2->compute(work1,work1,-1);
fft2->compute(work1,work1,-1);
}
}
}
MPI_Barrier(world);
time2 = MPI_Wtime();
time3d = time2 - time1;
MPI_Barrier(world);
time1 = MPI_Wtime();
if (function[1] + function[2] + function[3]) {
for (int i = 0; i < n; i++) {
fft1_6->compute(work1_6,work1_6,1);
fft2_6->compute(work1_6,work1_6,-1);
if (differentiation_flag != 1) {
fft2_6->compute(work1_6,work1_6,-1);
fft2_6->compute(work1_6,work1_6,-1);
}
}
}
MPI_Barrier(world);
time2 = MPI_Wtime();
time3d += (time2 - time1) * mixing;
if (differentiation_flag) return 2;
return 4;
}
/* ----------------------------------------------------------------------
memory usage of local arrays
------------------------------------------------------------------------- */
double PPPMDisp::memory_usage()
{
double bytes = nmax*3 * sizeof(double);
int mixing = 1;
int diff = 3; //depends on differentiation
int per = 7; //depends on per atom calculations
if (differentiation_flag) {
diff = 1;
per = 6;
}
if (!evflag_atom) per = 0;
if (function[2]) mixing = 7;
if (function[3]) mixing = nsplit_alloc;
if (function[0]) {
int nbrick = (nxhi_out-nxlo_out+1) * (nyhi_out-nylo_out+1) *
(nzhi_out-nzlo_out+1);
bytes += (1 + diff + per) * nbrick * sizeof(FFT_SCALAR); //brick memory
bytes += 6 * nfft_both * sizeof(double); // vg
bytes += nfft_both * sizeof(double); // greensfn
bytes += nfft_both * 3 * sizeof(FFT_SCALAR); // density_FFT, work1, work2
bytes += cg->memory_usage();
}
if (function[1] + function[2] + function[3]) {
int nbrick = (nxhi_out_6-nxlo_out_6+1) * (nyhi_out_6-nylo_out_6+1) *
(nzhi_out_6-nzlo_out_6+1);
bytes += (1 + diff + per ) * nbrick * sizeof(FFT_SCALAR) * mixing; // density_brick + vd_brick + per atom bricks
bytes += 6 * nfft_both_6 * sizeof(double); // vg
bytes += nfft_both_6 * sizeof(double); // greensfn
bytes += nfft_both_6 * (mixing + 2) * sizeof(FFT_SCALAR); // density_FFT, work1, work2
bytes += cg_6->memory_usage();
}
return bytes;
}
diff --git a/src/KSPACE/pppm_disp_tip4p.cpp b/src/KSPACE/pppm_disp_tip4p.cpp
index a44e524e9..d5a85abb4 100755
--- a/src/KSPACE/pppm_disp_tip4p.cpp
+++ b/src/KSPACE/pppm_disp_tip4p.cpp
@@ -1,520 +1,520 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing authors: Amalie Frischknecht and Ahmed Ismail (SNL)
Rolf Isele-Holder (Aachen University)
------------------------------------------------------------------------- */
#include <math.h>
#include "pppm_disp_tip4p.h"
#include "pppm_disp.h"
#include "atom.h"
#include "domain.h"
#include "force.h"
#include "memory.h"
#include "error.h"
#include "math_const.h"
using namespace LAMMPS_NS;
using namespace MathConst;
#define OFFSET 16384
#ifdef FFT_SINGLE
#define ZEROF 0.0f
#define ONEF 1.0f
#else
#define ZEROF 0.0
#define ONEF 1.0
#endif
/* ---------------------------------------------------------------------- */
PPPMDispTIP4P::PPPMDispTIP4P(LAMMPS *lmp, int narg, char **arg) :
PPPMDisp(lmp, narg, arg)
{
triclinic_support = 0;
tip4pflag = 1;
}
/* ---------------------------------------------------------------------- */
void PPPMDispTIP4P::init()
{
// TIP4P PPPM requires newton on, b/c it computes forces on ghost atoms
if (force->newton == 0)
error->all(FLERR,"Kspace style pppm/disp/tip4p requires newton on");
PPPMDisp::init();
}
/* ----------------------------------------------------------------------
find center grid pt for each of my particles
check that full stencil for the particle will fit in my 3d brick
store central grid pt indices in part2grid array
------------------------------------------------------------------------- */
void PPPMDispTIP4P::particle_map_c(double delx, double dely, double delz,
double sft, int** p2g, int nup, int nlow,
int nxlo, int nylo, int nzlo,
int nxhi, int nyhi, int nzhi)
{
int nx,ny,nz,iH1,iH2;
double *xi,xM[3];
int *type = atom->type;
double **x = atom->x;
int nlocal = atom->nlocal;
- if (!isfinite(boxlo[0]) || !isfinite(boxlo[1]) || !isfinite(boxlo[2]))
+ if (!ISFINITE(boxlo[0]) || !ISFINITE(boxlo[1]) || !ISFINITE(boxlo[2]))
error->one(FLERR,"Non-numeric box dimensions - simulation unstable");
int flag = 0;
for (int i = 0; i < nlocal; i++) {
if (type[i] == typeO) {
find_M(i,iH1,iH2,xM);
xi = xM;
} else xi = x[i];
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
// current particle coord can be outside global and local box
// add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1
nx = static_cast<int> ((xi[0]-boxlo[0])*delx+sft) - OFFSET;
ny = static_cast<int> ((xi[1]-boxlo[1])*dely+sft) - OFFSET;
nz = static_cast<int> ((xi[2]-boxlo[2])*delz+sft) - OFFSET;
p2g[i][0] = nx;
p2g[i][1] = ny;
p2g[i][2] = nz;
// check that entire stencil around nx,ny,nz will fit in my 3d brick
if (nx+nlow < nxlo || nx+nup > nxhi ||
ny+nlow < nylo || ny+nup > nyhi ||
nz+nlow < nzlo || nz+nup > nzhi)
flag = 1;
}
if (flag) error->one(FLERR,"Out of range atoms - cannot compute PPPM");
}
/* ----------------------------------------------------------------------
create discretized "density" on section of global grid due to my particles
density(x,y,z) = charge "density" at grid points of my 3d brick
(nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts)
in global grid
------------------------------------------------------------------------- */
void PPPMDispTIP4P::make_rho_c()
{
int i,l,m,n,nx,ny,nz,mx,my,mz,iH1,iH2;
FFT_SCALAR dx,dy,dz,x0,y0,z0;
double *xi,xM[3];
// clear 3d density array
FFT_SCALAR *vec = &density_brick[nzlo_out][nylo_out][nxlo_out];
for (i = 0; i < ngrid; i++) vec[i] = ZEROF;
// loop over my charges, add their contribution to nearby grid points
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
// (dx,dy,dz) = distance to "lower left" grid pt
// (mx,my,mz) = global coords of moving stencil pt
int *type = atom->type;
double *q = atom->q;
double **x = atom->x;
int nlocal = atom->nlocal;
for (int i = 0; i < nlocal; i++) {
if (type[i] == typeO) {
find_M(i,iH1,iH2,xM);
xi = xM;
} else xi = x[i];
nx = part2grid[i][0];
ny = part2grid[i][1];
nz = part2grid[i][2];
dx = nx+shiftone - (xi[0]-boxlo[0])*delxinv;
dy = ny+shiftone - (xi[1]-boxlo[1])*delyinv;
dz = nz+shiftone - (xi[2]-boxlo[2])*delzinv;
compute_rho1d(dx,dy,dz, order, rho_coeff, rho1d);
z0 = delvolinv * q[i];
for (n = nlower; n <= nupper; n++) {
mz = n+nz;
y0 = z0*rho1d[2][n];
for (m = nlower; m <= nupper; m++) {
my = m+ny;
x0 = y0*rho1d[1][m];
for (l = nlower; l <= nupper; l++) {
mx = l+nx;
density_brick[mz][my][mx] += x0*rho1d[0][l];
}
}
}
}
}
/* ----------------------------------------------------------------------
interpolate from grid to get electric field & force on my particles
for ik differentiation
------------------------------------------------------------------------- */
void PPPMDispTIP4P::fieldforce_c_ik()
{
int i,l,m,n,nx,ny,nz,mx,my,mz;
FFT_SCALAR dx,dy,dz,x0,y0,z0;
FFT_SCALAR ekx,eky,ekz;
double *xi;
int iH1,iH2;
double xM[3];
double fx,fy,fz;
// loop over my charges, interpolate electric field from nearby grid points
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
// (dx,dy,dz) = distance to "lower left" grid pt
// (mx,my,mz) = global coords of moving stencil pt
// ek = 3 components of E-field on particle
double *q = atom->q;
double **x = atom->x;
double **f = atom->f;
int *type = atom->type;
int nlocal = atom->nlocal;
for (i = 0; i < nlocal; i++) {
if (type[i] == typeO) {
find_M(i,iH1,iH2,xM);
xi = xM;
} else xi = x[i];
nx = part2grid[i][0];
ny = part2grid[i][1];
nz = part2grid[i][2];
dx = nx+shiftone - (xi[0]-boxlo[0])*delxinv;
dy = ny+shiftone - (xi[1]-boxlo[1])*delyinv;
dz = nz+shiftone - (xi[2]-boxlo[2])*delzinv;
compute_rho1d(dx,dy,dz, order, rho_coeff, rho1d);
ekx = eky = ekz = ZEROF;
for (n = nlower; n <= nupper; n++) {
mz = n+nz;
z0 = rho1d[2][n];
for (m = nlower; m <= nupper; m++) {
my = m+ny;
y0 = z0*rho1d[1][m];
for (l = nlower; l <= nupper; l++) {
mx = l+nx;
x0 = y0*rho1d[0][l];
ekx -= x0*vdx_brick[mz][my][mx];
eky -= x0*vdy_brick[mz][my][mx];
ekz -= x0*vdz_brick[mz][my][mx];
}
}
}
// convert E-field to force
const double qfactor = force->qqrd2e * scale * q[i];
if (type[i] != typeO) {
f[i][0] += qfactor*ekx;
f[i][1] += qfactor*eky;
if (slabflag != 2) f[i][2] += qfactor*ekz;
} else {
fx = qfactor * ekx;
fy = qfactor * eky;
fz = qfactor * ekz;
find_M(i,iH1,iH2,xM);
f[i][0] += fx*(1 - alpha);
f[i][1] += fy*(1 - alpha);
if (slabflag != 2) f[i][2] += fz*(1 - alpha);
f[iH1][0] += 0.5*alpha*fx;
f[iH1][1] += 0.5*alpha*fy;
if (slabflag != 2) f[iH1][2] += 0.5*alpha*fz;
f[iH2][0] += 0.5*alpha*fx;
f[iH2][1] += 0.5*alpha*fy;
if (slabflag != 2) f[iH2][2] += 0.5*alpha*fz;
}
}
}
/* ----------------------------------------------------------------------
interpolate from grid to get electric field & force on my particles
for ad scheme
------------------------------------------------------------------------- */
void PPPMDispTIP4P::fieldforce_c_ad()
{
int i,l,m,n,nx,ny,nz,mx,my,mz;
FFT_SCALAR dx,dy,dz;
FFT_SCALAR ekx,eky,ekz;
double *xi;
int iH1,iH2;
double xM[3];
double s1,s2,s3;
double *prd;
double fx,fy,fz;
double sf;
if (triclinic == 0) prd = domain->prd;
else prd = domain->prd_lamda;
double xprd = prd[0];
double yprd = prd[1];
double zprd = prd[2];
double zprd_slab = zprd*slab_volfactor;
double hx_inv = nx_pppm/xprd;
double hy_inv = ny_pppm/yprd;
double hz_inv = nz_pppm/zprd_slab;
// loop over my charges, interpolate electric field from nearby grid points
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
// (dx,dy,dz) = distance to "lower left" grid pt
// (mx,my,mz) = global coords of moving stencil pt
// ek = 3 components of E-field on particle
double *q = atom->q;
double **x = atom->x;
double **f = atom->f;
int *type = atom->type;
int nlocal = atom->nlocal;
for (i = 0; i < nlocal; i++) {
if (type[i] == typeO) {
find_M(i,iH1,iH2,xM);
xi = xM;
} else xi = x[i];
nx = part2grid[i][0];
ny = part2grid[i][1];
nz = part2grid[i][2];
dx = nx+shiftone - (xi[0]-boxlo[0])*delxinv;
dy = ny+shiftone - (xi[1]-boxlo[1])*delyinv;
dz = nz+shiftone - (xi[2]-boxlo[2])*delzinv;
compute_rho1d(dx,dy,dz, order, rho_coeff, rho1d);
compute_drho1d(dx,dy,dz, order, drho_coeff, drho1d);
ekx = eky = ekz = ZEROF;
for (n = nlower; n <= nupper; n++) {
mz = n+nz;
for (m = nlower; m <= nupper; m++) {
my = m+ny;
for (l = nlower; l <= nupper; l++) {
mx = l+nx;
ekx += drho1d[0][l]*rho1d[1][m]*rho1d[2][n]*u_brick[mz][my][mx];
eky += rho1d[0][l]*drho1d[1][m]*rho1d[2][n]*u_brick[mz][my][mx];
ekz += rho1d[0][l]*rho1d[1][m]*drho1d[2][n]*u_brick[mz][my][mx];
}
}
}
ekx *= hx_inv;
eky *= hy_inv;
ekz *= hz_inv;
// convert E-field to force and substract self forces
const double qfactor = force->qqrd2e * scale;
s1 = x[i][0]*hx_inv;
s2 = x[i][1]*hy_inv;
s3 = x[i][2]*hz_inv;
sf = sf_coeff[0]*sin(2*MY_PI*s1);
sf += sf_coeff[1]*sin(4*MY_PI*s1);
sf *= 2*q[i]*q[i];
fx = qfactor*(ekx*q[i] - sf);
sf = sf_coeff[2]*sin(2*MY_PI*s2);
sf += sf_coeff[3]*sin(4*MY_PI*s2);
sf *= 2*q[i]*q[i];
fy = qfactor*(eky*q[i] - sf);
sf = sf_coeff[4]*sin(2*MY_PI*s3);
sf += sf_coeff[5]*sin(4*MY_PI*s3);
sf *= 2*q[i]*q[i];
fz = qfactor*(ekz*q[i] - sf);
if (type[i] != typeO) {
f[i][0] += fx;
f[i][1] += fy;
if (slabflag != 2) f[i][2] += fz;
} else {
find_M(i,iH1,iH2,xM);
f[i][0] += fx*(1 - alpha);
f[i][1] += fy*(1 - alpha);
if (slabflag != 2) f[i][2] += fz*(1 - alpha);
f[iH1][0] += 0.5*alpha*fx;
f[iH1][1] += 0.5*alpha*fy;
if (slabflag != 2) f[iH1][2] += 0.5*alpha*fz;
f[iH2][0] += 0.5*alpha*fx;
f[iH2][1] += 0.5*alpha*fy;
if (slabflag != 2) f[iH2][2] += 0.5*alpha*fz;
}
}
}
/* ----------------------------------------------------------------------
interpolate from grid to get electric field & force on my particles
------------------------------------------------------------------------- */
void PPPMDispTIP4P::fieldforce_c_peratom()
{
int i,l,m,n,nx,ny,nz,mx,my,mz;
FFT_SCALAR dx,dy,dz,x0,y0,z0;
double *xi;
int iH1,iH2;
double xM[3];
FFT_SCALAR u_pa,v0,v1,v2,v3,v4,v5;
// loop over my charges, interpolate electric field from nearby grid points
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
// (dx,dy,dz) = distance to "lower left" grid pt
// (mx,my,mz) = global coords of moving stencil pt
// ek = 3 components of E-field on particle
double *q = atom->q;
double **x = atom->x;
int *type = atom->type;
int nlocal = atom->nlocal;
for (i = 0; i < nlocal; i++) {
if (type[i] == typeO) {
find_M(i,iH1,iH2,xM);
xi = xM;
} else xi = x[i];
nx = part2grid[i][0];
ny = part2grid[i][1];
nz = part2grid[i][2];
dx = nx+shiftone - (xi[0]-boxlo[0])*delxinv;
dy = ny+shiftone - (xi[1]-boxlo[1])*delyinv;
dz = nz+shiftone - (xi[2]-boxlo[2])*delzinv;
compute_rho1d(dx,dy,dz, order, rho_coeff, rho1d);
u_pa = v0 = v1 = v2 = v3 = v4 = v5 = ZEROF;
for (n = nlower; n <= nupper; n++) {
mz = n+nz;
z0 = rho1d[2][n];
for (m = nlower; m <= nupper; m++) {
my = m+ny;
y0 = z0*rho1d[1][m];
for (l = nlower; l <= nupper; l++) {
mx = l+nx;
x0 = y0*rho1d[0][l];
if (eflag_atom) u_pa += x0*u_brick[mz][my][mx];
if (vflag_atom) {
v0 += x0*v0_brick[mz][my][mx];
v1 += x0*v1_brick[mz][my][mx];
v2 += x0*v2_brick[mz][my][mx];
v3 += x0*v3_brick[mz][my][mx];
v4 += x0*v4_brick[mz][my][mx];
v5 += x0*v5_brick[mz][my][mx];
}
}
}
}
const double qfactor = 0.5*force->qqrd2e * scale * q[i];
if (eflag_atom) {
if (type[i] != typeO) {
eatom[i] += qfactor*u_pa;
} else {
eatom[i] += qfactor*u_pa*(1-alpha);
eatom[iH1] += qfactor*u_pa*alpha*0.5;
eatom[iH2] += qfactor*u_pa*alpha*0.5;
}
}
if (vflag_atom) {
if (type[i] != typeO) {
vatom[i][0] += v0*qfactor;
vatom[i][1] += v1*qfactor;
vatom[i][2] += v2*qfactor;
vatom[i][3] += v3*qfactor;
vatom[i][4] += v4*qfactor;
vatom[i][5] += v5*qfactor;
} else {
vatom[i][0] += v0*(1-alpha)*qfactor;
vatom[i][1] += v1*(1-alpha)*qfactor;
vatom[i][2] += v2*(1-alpha)*qfactor;
vatom[i][3] += v3*(1-alpha)*qfactor;
vatom[i][4] += v4*(1-alpha)*qfactor;
vatom[i][5] += v5*(1-alpha)*qfactor;
vatom[iH1][0] += v0*alpha*0.5*qfactor;
vatom[iH1][1] += v1*alpha*0.5*qfactor;
vatom[iH1][2] += v2*alpha*0.5*qfactor;
vatom[iH1][3] += v3*alpha*0.5*qfactor;
vatom[iH1][4] += v4*alpha*0.5*qfactor;
vatom[iH1][5] += v5*alpha*0.5*qfactor;
vatom[iH2][0] += v0*alpha*0.5*qfactor;
vatom[iH2][1] += v1*alpha*0.5*qfactor;
vatom[iH2][2] += v2*alpha*0.5*qfactor;
vatom[iH2][3] += v3*alpha*0.5*qfactor;
vatom[iH2][4] += v4*alpha*0.5*qfactor;
vatom[iH2][5] += v5*alpha*0.5*qfactor;
}
}
}
}
/* ----------------------------------------------------------------------
find 2 H atoms bonded to O atom i
compute position xM of fictitious charge site for O atom
also return local indices iH1,iH2 of H atoms
------------------------------------------------------------------------- */
void PPPMDispTIP4P::find_M(int i, int &iH1, int &iH2, double *xM)
{
iH1 = atom->map(atom->tag[i] + 1);
iH2 = atom->map(atom->tag[i] + 2);
if (iH1 == -1 || iH2 == -1) error->one(FLERR,"TIP4P hydrogen is missing");
if (atom->type[iH1] != typeH || atom->type[iH2] != typeH)
error->one(FLERR,"TIP4P hydrogen has incorrect atom type");
double **x = atom->x;
double delx1 = x[iH1][0] - x[i][0];
double dely1 = x[iH1][1] - x[i][1];
double delz1 = x[iH1][2] - x[i][2];
domain->minimum_image(delx1,dely1,delz1);
double delx2 = x[iH2][0] - x[i][0];
double dely2 = x[iH2][1] - x[i][1];
double delz2 = x[iH2][2] - x[i][2];
domain->minimum_image(delx2,dely2,delz2);
xM[0] = x[i][0] + alpha * 0.5 * (delx1 + delx2);
xM[1] = x[i][1] + alpha * 0.5 * (dely1 + dely2);
xM[2] = x[i][2] + alpha * 0.5 * (delz1 + delz2);
}
diff --git a/src/KSPACE/pppm_stagger.cpp b/src/KSPACE/pppm_stagger.cpp
index 8cb71f3e6..7cd719ec7 100755
--- a/src/KSPACE/pppm_stagger.cpp
+++ b/src/KSPACE/pppm_stagger.cpp
@@ -1,997 +1,997 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing author: Stan Moore (Sandia)
------------------------------------------------------------------------- */
#include <mpi.h>
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include "pppm_stagger.h"
#include "atom.h"
#include "gridcomm.h"
#include "force.h"
#include "domain.h"
#include "memory.h"
#include "error.h"
#include "math_const.h"
#include "math_special.h"
using namespace LAMMPS_NS;
using namespace MathConst;
using namespace MathSpecial;
#define OFFSET 16384
#define EPS_HOC 1.0e-7
enum{REVERSE_RHO};
enum{FORWARD_IK,FORWARD_AD,FORWARD_IK_PERATOM,FORWARD_AD_PERATOM};
#ifdef FFT_SINGLE
#define ZEROF 0.0f
#define ONEF 1.0f
#else
#define ZEROF 0.0
#define ONEF 1.0
#endif
/* ---------------------------------------------------------------------- */
PPPMStagger::PPPMStagger(LAMMPS *lmp, int narg, char **arg) :
PPPM(lmp, narg, arg)
{
if (narg < 1) error->all(FLERR,"Illegal kspace_style pppm/stagger command");
stagger_flag = 1;
group_group_enable = 0;
memory->create(gf_b2,8,7,"pppm_stagger:gf_b2");
gf_b2[1][0] = 1.0;
gf_b2[2][0] = 5.0 / 6.0;
gf_b2[2][1] = 1.0 / 6.0;
gf_b2[3][0] = 61.0 / 120.0;
gf_b2[3][1] = 29.0 / 60.0;
gf_b2[3][2] = 1.0 / 120.0;
gf_b2[4][0] = 277.0 / 1008.0;
gf_b2[4][1] = 1037.0 / 1680.0;
gf_b2[4][2] = 181.0 / 1680.00;
gf_b2[4][3] = 1.0 / 5040.0;
gf_b2[5][0] = 50521.0 / 362880.0;
gf_b2[5][1] = 7367.0 / 12960.0;
gf_b2[5][2] = 16861.0 / 60480.0;
gf_b2[5][3] = 1229.0 / 90720.0;
gf_b2[5][4] = 1.0 / 362880.0;
gf_b2[6][0] = 540553.0 / 7983360.0;
gf_b2[6][1] = 17460701.0 / 39916800.0;
gf_b2[6][2] = 8444893.0 / 19958400.0;
gf_b2[6][3] = 1409633.0 / 19958400.0;
gf_b2[6][4] = 44281.0 / 39916800.0;
gf_b2[6][5] = 1.0 / 39916800.0;
gf_b2[7][0] = 199360981.0 / 6227020800.0;
gf_b2[7][1] = 103867703.0 / 345945600.0;
gf_b2[7][2] = 66714163.0 / 138378240.0;
gf_b2[7][3] = 54085121.0 / 311351040.0;
gf_b2[7][4] = 1640063.0 / 138378240.0;
gf_b2[7][5] = 671.0 / 10483200.0;
gf_b2[7][6] = 1.0 / 6227020800.0;
}
/* ----------------------------------------------------------------------
free all memory
------------------------------------------------------------------------- */
PPPMStagger::~PPPMStagger()
{
memory->destroy(gf_b2);
}
/* ----------------------------------------------------------------------
called once before run
------------------------------------------------------------------------- */
void PPPMStagger::init()
{
// error check
if (domain->triclinic)
error->all(FLERR,"Cannot (yet) use kspace_style pppm/stagger "
"with triclinic systems");
PPPM::init();
}
/* ----------------------------------------------------------------------
compute the PPPM long-range force, energy, virial
------------------------------------------------------------------------- */
void PPPMStagger::compute(int eflag, int vflag)
{
int i,j;
// set energy/virial flags
// invoke allocate_peratom() if needed for first time
if (eflag || vflag) ev_setup(eflag,vflag);
else evflag = evflag_atom = eflag_global = vflag_global =
eflag_atom = vflag_atom = 0;
if (evflag_atom && !peratom_allocate_flag) {
allocate_peratom();
cg_peratom->ghost_notify();
cg_peratom->setup();
}
// convert atoms from box to lamda coords
if (triclinic == 0) boxlo = domain->boxlo;
else {
boxlo = domain->boxlo_lamda;
domain->x2lamda(atom->nlocal);
}
// extend size of per-atom arrays if necessary
if (atom->nlocal > nmax) {
memory->destroy(part2grid);
nmax = atom->nmax;
memory->create(part2grid,nmax,3,"pppm:part2grid");
}
nstagger = 2;
stagger = 0.0;
for (int n=0; n<nstagger; n++) {
// find grid points for all my particles
// map my particle charge onto my local 3d density grid
particle_map();
make_rho();
// all procs communicate density values from their ghost cells
// to fully sum contribution in their 3d bricks
// remap from 3d decomposition to FFT decomposition
cg->reverse_comm(this,REVERSE_RHO);
brick2fft();
// compute potential gradient on my FFT grid and
// portion of e_long on this proc's FFT grid
// return gradients (electric fields) in 3d brick decomposition
// also performs per-atom calculations via poisson_peratom()
poisson();
// all procs communicate E-field values
// to fill ghost cells surrounding their 3d bricks
if (differentiation_flag == 1) cg->forward_comm(this,FORWARD_AD);
else cg->forward_comm(this,FORWARD_IK);
// extra per-atom energy/virial communication
if (evflag_atom) {
if (differentiation_flag == 1 && vflag_atom)
cg_peratom->forward_comm(this,FORWARD_AD_PERATOM);
else if (differentiation_flag == 0)
cg_peratom->forward_comm(this,FORWARD_IK_PERATOM);
}
// calculate the force on my particles
fieldforce();
// extra per-atom energy/virial communication
if (evflag_atom) fieldforce_peratom();
stagger += 1.0/float(nstagger);
}
// update qsum and qsqsum, if atom count has changed and energy needed
if ((eflag_global || eflag_atom) && atom->natoms != natoms_original) {
qsum_qsq();
natoms_original = atom->natoms;
}
// sum global energy across procs and add in volume-dependent term
const double qscale = qqrd2e * scale;
if (eflag_global) {
double energy_all;
MPI_Allreduce(&energy,&energy_all,1,MPI_DOUBLE,MPI_SUM,world);
energy = energy_all;
energy *= 0.5*volume/float(nstagger);
energy -= g_ewald*qsqsum/MY_PIS +
MY_PI2*qsum*qsum / (g_ewald*g_ewald*volume);
energy *= qscale;
}
// sum global virial across procs
if (vflag_global) {
double virial_all[6];
MPI_Allreduce(virial,virial_all,6,MPI_DOUBLE,MPI_SUM,world);
for (i = 0; i < 6; i++)
virial[i] = 0.5*qscale*volume*virial_all[i]/float(nstagger);
}
// per-atom energy/virial
// energy includes self-energy correction
// ntotal accounts for TIP4P tallying eatom/vatom for ghost atoms
if (evflag_atom) {
double *q = atom->q;
int nlocal = atom->nlocal;
int ntotal = nlocal;
if (tip4pflag) ntotal += atom->nghost;
if (eflag_atom) {
for (i = 0; i < nlocal; i++) {
eatom[i] *= 0.5;
eatom[i] -= g_ewald*q[i]*q[i]/MY_PIS + MY_PI2*q[i]*qsum /
(g_ewald*g_ewald*volume);
eatom[i] *= qscale;
}
for (i = nlocal; i < ntotal; i++) eatom[i] *= 0.5*qscale;
}
if (vflag_atom) {
for (i = 0; i < ntotal; i++)
for (j = 0; j < 6; j++) vatom[i][j] *= 0.5*qscale;
}
}
// 2d slab correction
if (slabflag == 1) slabcorr();
// convert atoms back from lamda to box coords
if (triclinic) domain->lamda2x(atom->nlocal);
}
/* ----------------------------------------------------------------------
compute qopt
------------------------------------------------------------------------- */
double PPPMStagger::compute_qopt()
{
if (differentiation_flag == 1)
return compute_qopt_ad();
double qopt = 0.0;
const double * const prd = domain->prd;
const double xprd = prd[0];
const double yprd = prd[1];
const double zprd = prd[2];
const double zprd_slab = zprd*slab_volfactor;
const double unitkx = (MY_2PI/xprd);
const double unitky = (MY_2PI/yprd);
const double unitkz = (MY_2PI/zprd_slab);
double snx,sny,snz;
double cnx,cny,cnz;
double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz;
double sum1,sum2,dot1,dot2;
double numerator,denominator;
double u1,u2,u3,sqk;
int k,l,m,nx,ny,nz,kper,lper,mper;
const int nbx = 2;
const int nby = 2;
const int nbz = 2;
const int twoorder = 2*order;
for (m = nzlo_fft; m <= nzhi_fft; m++) {
mper = m - nz_pppm*(2*m/nz_pppm);
snz = square(sin(0.5*unitkz*mper*zprd_slab/nz_pppm));
cnz = cos(0.5*unitkz*mper*zprd_slab/nz_pppm);
for (l = nylo_fft; l <= nyhi_fft; l++) {
lper = l - ny_pppm*(2*l/ny_pppm);
sny = square(sin(0.5*unitky*lper*yprd/ny_pppm));
cny = cos(0.5*unitky*lper*yprd/ny_pppm);
for (k = nxlo_fft; k <= nxhi_fft; k++) {
kper = k - nx_pppm*(2*k/nx_pppm);
snx = square(sin(0.5*unitkx*kper*xprd/nx_pppm));
cnx = cos(0.5*unitkx*kper*xprd/nx_pppm);
sqk = square(unitkx*kper) + square(unitky*lper) + square(unitkz*mper);
if (sqk != 0.0) {
numerator = MY_4PI/sqk;
denominator = 0.5*(gf_denom(snx,sny,snz) + gf_denom2(cnx,cny,cnz));
sum1 = 0.0;
sum2 = 0.0;
for (nx = -nbx; nx <= nbx; nx++) {
qx = unitkx*(kper+nx_pppm*nx);
sx = exp(-0.25*square(qx/g_ewald));
argx = 0.5*qx*xprd/nx_pppm;
wx = powsinxx(argx,twoorder);
for (ny = -nby; ny <= nby; ny++) {
qy = unitky*(lper+ny_pppm*ny);
sy = exp(-0.25*square(qy/g_ewald));
argy = 0.5*qy*yprd/ny_pppm;
wy = powsinxx(argy,twoorder);
for (nz = -nbz; nz <= nbz; nz++) {
qz = unitkz*(mper+nz_pppm*nz);
sz = exp(-0.25*square(qz/g_ewald));
argz = 0.5*qz*zprd_slab/nz_pppm;
wz = powsinxx(argz,twoorder);
dot1 = unitkx*kper*qx + unitky*lper*qy + unitkz*mper*qz;
dot2 = qx*qx + qy*qy + qz*qz;
u1 = sx*sy*sz;
u2 = wx*wy*wz;
u3 = numerator*u1*u2*dot1;
sum1 += u1*u1*MY_4PI*MY_4PI/dot2;
sum2 += u3*u3/dot2;
}
}
}
qopt += sum1 - sum2/denominator;
}
}
}
}
double qopt_all;
MPI_Allreduce(&qopt,&qopt_all,1,MPI_DOUBLE,MPI_SUM,world);
return qopt_all;
}
/* ----------------------------------------------------------------------
compute qopt_ad
------------------------------------------------------------------------- */
double PPPMStagger::compute_qopt_ad()
{
double qopt = 0.0;
const double * const prd = domain->prd;
const double xprd = prd[0];
const double yprd = prd[1];
const double zprd = prd[2];
const double zprd_slab = zprd*slab_volfactor;
const double unitkx = (MY_2PI/xprd);
const double unitky = (MY_2PI/yprd);
const double unitkz = (MY_2PI/zprd_slab);
double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz;
double sum1,sum2,sum3,sum4,sum5,sum6,dot2;
double u1,u2,sqk;
int k,l,m,nx,ny,nz,kper,lper,mper;
const int nbx = 2;
const int nby = 2;
const int nbz = 2;
const int twoorder = 2*order;
for (m = nzlo_fft; m <= nzhi_fft; m++) {
mper = m - nz_pppm*(2*m/nz_pppm);
for (l = nylo_fft; l <= nyhi_fft; l++) {
lper = l - ny_pppm*(2*l/ny_pppm);
for (k = nxlo_fft; k <= nxhi_fft; k++) {
kper = k - nx_pppm*(2*k/nx_pppm);
sqk = square(unitkx*kper) + square(unitky*lper) + square(unitkz*mper);
if (sqk != 0.0) {
sum1 = 0.0;
sum2 = 0.0;
sum3 = 0.0;
sum4 = 0.0;
sum5 = 0.0;
sum6 = 0.0;
for (nx = -nbx; nx <= nbx; nx++) {
qx = unitkx*(kper+nx_pppm*nx);
sx = exp(-0.25*square(qx/g_ewald));
argx = 0.5*qx*xprd/nx_pppm;
wx = powsinxx(argx,twoorder);
for (ny = -nby; ny <= nby; ny++) {
qy = unitky*(lper+ny_pppm*ny);
sy = exp(-0.25*square(qy/g_ewald));
argy = 0.5*qy*yprd/ny_pppm;
wy = powsinxx(argy,twoorder);
for (nz = -nbz; nz <= nbz; nz++) {
qz = unitkz*(mper+nz_pppm*nz);
sz = exp(-0.25*square(qz/g_ewald));
argz = 0.5*qz*zprd_slab/nz_pppm;
wz = powsinxx(argz,twoorder);
dot2 = qx*qx + qy*qy + qz*qz;
u1 = sx*sy*sz;
u2 = wx*wy*wz;
sum1 += u1*u1/dot2*MY_4PI*MY_4PI;
sum2 += u1*u1*u2*u2*MY_4PI*MY_4PI;
sum3 += u2;
sum4 += dot2*u2;
sum5 += u2*powint(-1.0,nx+ny+nz);
sum6 += dot2*u2*powint(-1.0,nx+ny+nz);
}
}
}
qopt += sum1 - sum2/(0.5*(sum3*sum4 + sum5*sum6));
}
}
}
}
double qopt_all;
MPI_Allreduce(&qopt,&qopt_all,1,MPI_DOUBLE,MPI_SUM,world);
return qopt_all;
}
/* ----------------------------------------------------------------------
pre-compute Green's function denominator expansion coeffs, Gamma(2n)
------------------------------------------------------------------------- */
void PPPMStagger::compute_gf_denom()
{
if (gf_b) memory->destroy(gf_b);
memory->create(gf_b,order,"pppm:gf_b");
int k,l,m;
for (l = 1; l < order; l++) gf_b[l] = 0.0;
gf_b[0] = 1.0;
for (m = 1; m < order; m++) {
for (l = m; l > 0; l--)
gf_b[l] = 4.0 * (gf_b[l]*(l-m)*(l-m-0.5)-gf_b[l-1]*(l-m-1)*(l-m-1));
gf_b[0] = 4.0 * (gf_b[0]*(l-m)*(l-m-0.5));
}
bigint ifact = 1;
for (k = 1; k < 2*order; k++) ifact *= k;
double gaminv = 1.0/ifact;
for (l = 0; l < order; l++) gf_b[l] *= gaminv;
}
/* ----------------------------------------------------------------------
pre-compute modified (Hockney-Eastwood) Coulomb Green's function
------------------------------------------------------------------------- */
void PPPMStagger::compute_gf_ik()
{
const double * const prd = domain->prd;
const double xprd = prd[0];
const double yprd = prd[1];
const double zprd = prd[2];
const double zprd_slab = zprd*slab_volfactor;
const double unitkx = (MY_2PI/xprd);
const double unitky = (MY_2PI/yprd);
const double unitkz = (MY_2PI/zprd_slab);
double snx,sny,snz;
double cnx,cny,cnz;
double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz;
double sum1,dot1,dot2;
double numerator,denominator;
double sqk;
int k,l,m,n,nx,ny,nz,kper,lper,mper;
const int nbx = static_cast<int> ((g_ewald*xprd/(MY_PI*nx_pppm)) *
pow(-log(EPS_HOC),0.25));
const int nby = static_cast<int> ((g_ewald*yprd/(MY_PI*ny_pppm)) *
pow(-log(EPS_HOC),0.25));
const int nbz = static_cast<int> ((g_ewald*zprd_slab/(MY_PI*nz_pppm)) *
pow(-log(EPS_HOC),0.25));
const int twoorder = 2*order;
n = 0;
for (m = nzlo_fft; m <= nzhi_fft; m++) {
mper = m - nz_pppm*(2*m/nz_pppm);
snz = square(sin(0.5*unitkz*mper*zprd_slab/nz_pppm));
cnz = cos(0.5*unitkz*mper*zprd_slab/nz_pppm);
for (l = nylo_fft; l <= nyhi_fft; l++) {
lper = l - ny_pppm*(2*l/ny_pppm);
sny = square(sin(0.5*unitky*lper*yprd/ny_pppm));
cny = cos(0.5*unitky*lper*yprd/ny_pppm);
for (k = nxlo_fft; k <= nxhi_fft; k++) {
kper = k - nx_pppm*(2*k/nx_pppm);
snx = square(sin(0.5*unitkx*kper*xprd/nx_pppm));
cnx = cos(0.5*unitkx*kper*xprd/nx_pppm);
sqk = square(unitkx*kper) + square(unitky*lper) + square(unitkz*mper);
if (sqk != 0.0) {
numerator = MY_4PI/sqk;
denominator = 0.5*(gf_denom(snx,sny,snz) + gf_denom2(cnx,cny,cnz));
sum1 = 0.0;
for (nx = -nbx; nx <= nbx; nx++) {
qx = unitkx*(kper+nx_pppm*nx);
sx = exp(-0.25*square(qx/g_ewald));
argx = 0.5*qx*xprd/nx_pppm;
wx = powsinxx(argx,twoorder);
for (ny = -nby; ny <= nby; ny++) {
qy = unitky*(lper+ny_pppm*ny);
sy = exp(-0.25*square(qy/g_ewald));
argy = 0.5*qy*yprd/ny_pppm;
wy = powsinxx(argy,twoorder);
for (nz = -nbz; nz <= nbz; nz++) {
qz = unitkz*(mper+nz_pppm*nz);
sz = exp(-0.25*square(qz/g_ewald));
argz = 0.5*qz*zprd_slab/nz_pppm;
wz = powsinxx(argz,twoorder);
dot1 = unitkx*kper*qx + unitky*lper*qy + unitkz*mper*qz;
dot2 = qx*qx+qy*qy+qz*qz;
sum1 += (dot1/dot2) * sx*sy*sz * wx*wy*wz;
}
}
}
greensfn[n++] = numerator*sum1/denominator;
} else greensfn[n++] = 0.0;
}
}
}
}
/* ----------------------------------------------------------------------
compute optimized Green's function for energy calculation
------------------------------------------------------------------------- */
void PPPMStagger::compute_gf_ad()
{
const double * const prd = domain->prd;
const double xprd = prd[0];
const double yprd = prd[1];
const double zprd = prd[2];
const double zprd_slab = zprd*slab_volfactor;
const double unitkx = (MY_2PI/xprd);
const double unitky = (MY_2PI/yprd);
const double unitkz = (MY_2PI/zprd_slab);
double snx,sny,snz,sqk;
double cnx,cny,cnz;
double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz;
double numerator,denominator;
int k,l,m,n,kper,lper,mper;
const int twoorder = 2*order;
for (int i = 0; i < 6; i++) sf_coeff[i] = 0.0;
n = 0;
for (m = nzlo_fft; m <= nzhi_fft; m++) {
mper = m - nz_pppm*(2*m/nz_pppm);
qz = unitkz*mper;
snz = square(sin(0.5*qz*zprd_slab/nz_pppm));
cnz = cos(0.5*qz*zprd_slab/nz_pppm);
sz = exp(-0.25*square(qz/g_ewald));
argz = 0.5*qz*zprd_slab/nz_pppm;
wz = powsinxx(argz,twoorder);
for (l = nylo_fft; l <= nyhi_fft; l++) {
lper = l - ny_pppm*(2*l/ny_pppm);
qy = unitky*lper;
sny = square(sin(0.5*qy*yprd/ny_pppm));
cny = cos(0.5*qy*yprd/ny_pppm);
sy = exp(-0.25*square(qy/g_ewald));
argy = 0.5*qy*yprd/ny_pppm;
wy = powsinxx(argy,twoorder);
for (k = nxlo_fft; k <= nxhi_fft; k++) {
kper = k - nx_pppm*(2*k/nx_pppm);
qx = unitkx*kper;
snx = square(sin(0.5*qx*xprd/nx_pppm));
cnx = cos(0.5*qx*xprd/nx_pppm);
sx = exp(-0.25*square(qx/g_ewald));
argx = 0.5*qx*xprd/nx_pppm;
wx = powsinxx(argx,twoorder);
sqk = qx*qx + qy*qy + qz*qz;
if (sqk != 0.0) {
numerator = MY_4PI/sqk;
denominator = 0.5*(gf_denom(snx,sny,snz) + gf_denom2(cnx,cny,cnz));
greensfn[n] = numerator*sx*sy*sz*wx*wy*wz/denominator;
sf_coeff[0] += sf_precoeff1[n]*greensfn[n];
sf_coeff[1] += sf_precoeff2[n]*greensfn[n];
sf_coeff[2] += sf_precoeff3[n]*greensfn[n];
sf_coeff[3] += sf_precoeff4[n]*greensfn[n];
sf_coeff[4] += sf_precoeff5[n]*greensfn[n];
sf_coeff[5] += sf_precoeff6[n]*greensfn[n];
n++;
} else {
greensfn[n] = 0.0;
sf_coeff[0] += sf_precoeff1[n]*greensfn[n];
sf_coeff[1] += sf_precoeff2[n]*greensfn[n];
sf_coeff[2] += sf_precoeff3[n]*greensfn[n];
sf_coeff[3] += sf_precoeff4[n]*greensfn[n];
sf_coeff[4] += sf_precoeff5[n]*greensfn[n];
sf_coeff[5] += sf_precoeff6[n]*greensfn[n];
n++;
}
}
}
}
// compute the coefficients for the self-force correction
double prex, prey, prez;
prex = prey = prez = MY_PI/volume;
prex *= nx_pppm/xprd;
prey *= ny_pppm/yprd;
prez *= nz_pppm/zprd_slab;
sf_coeff[0] *= prex;
sf_coeff[1] *= prex*2;
sf_coeff[2] *= prey;
sf_coeff[3] *= prey*2;
sf_coeff[4] *= prez;
sf_coeff[5] *= prez*2;
// communicate values with other procs
double tmp[6];
MPI_Allreduce(sf_coeff,tmp,6,MPI_DOUBLE,MPI_SUM,world);
for (n = 0; n < 6; n++) sf_coeff[n] = tmp[n];
}
/* ----------------------------------------------------------------------
find center grid pt for each of my particles
check that full stencil for the particle will fit in my 3d brick
store central grid pt indices in part2grid array
------------------------------------------------------------------------- */
void PPPMStagger::particle_map()
{
int nx,ny,nz;
double **x = atom->x;
int nlocal = atom->nlocal;
- if (!isfinite(boxlo[0]) || !isfinite(boxlo[1]) || !isfinite(boxlo[2]))
+ if (!ISFINITE(boxlo[0]) || !ISFINITE(boxlo[1]) || !ISFINITE(boxlo[2]))
error->one(FLERR,"Non-numeric box dimensions - simulation unstable");
int flag = 0;
for (int i = 0; i < nlocal; i++) {
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
// current particle coord can be outside global and local box
// add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1
nx = static_cast<int> ((x[i][0]-boxlo[0])*delxinv+shift + stagger) - OFFSET;
ny = static_cast<int> ((x[i][1]-boxlo[1])*delyinv+shift + stagger) - OFFSET;
nz = static_cast<int> ((x[i][2]-boxlo[2])*delzinv+shift + stagger) - OFFSET;
part2grid[i][0] = nx;
part2grid[i][1] = ny;
part2grid[i][2] = nz;
// check that entire stencil around nx,ny,nz will fit in my 3d brick
if (nx+nlower < nxlo_out || nx+nupper > nxhi_out ||
ny+nlower < nylo_out || ny+nupper > nyhi_out ||
nz+nlower < nzlo_out || nz+nupper > nzhi_out)
flag = 1;
}
if (flag) error->one(FLERR,"Out of range atoms - cannot compute PPPM");
}
/* ----------------------------------------------------------------------
create discretized "density" on section of global grid due to my particles
density(x,y,z) = charge "density" at grid points of my 3d brick
(nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts)
in global grid
------------------------------------------------------------------------- */
void PPPMStagger::make_rho()
{
int l,m,n,nx,ny,nz,mx,my,mz;
FFT_SCALAR dx,dy,dz,x0,y0,z0;
// clear 3d density array
memset(&(density_brick[nzlo_out][nylo_out][nxlo_out]),0,
ngrid*sizeof(FFT_SCALAR));
// loop over my charges, add their contribution to nearby grid points
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
// (dx,dy,dz) = distance to "lower left" grid pt
// (mx,my,mz) = global coords of moving stencil pt
double *q = atom->q;
double **x = atom->x;
int nlocal = atom->nlocal;
for (int i = 0; i < nlocal; i++) {
nx = part2grid[i][0];
ny = part2grid[i][1];
nz = part2grid[i][2];
dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv - stagger;
dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv - stagger;
dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv - stagger;
compute_rho1d(dx,dy,dz);
z0 = delvolinv * q[i];
for (n = nlower; n <= nupper; n++) {
mz = n+nz;
y0 = z0*rho1d[2][n];
for (m = nlower; m <= nupper; m++) {
my = m+ny;
x0 = y0*rho1d[1][m];
for (l = nlower; l <= nupper; l++) {
mx = l+nx;
density_brick[mz][my][mx] += x0*rho1d[0][l];
}
}
}
}
}
/* ----------------------------------------------------------------------
interpolate from grid to get electric field & force on my particles for ik
------------------------------------------------------------------------- */
void PPPMStagger::fieldforce_ik()
{
int i,l,m,n,nx,ny,nz,mx,my,mz;
FFT_SCALAR dx,dy,dz,x0,y0,z0;
FFT_SCALAR ekx,eky,ekz;
// loop over my charges, interpolate electric field from nearby grid points
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
// (dx,dy,dz) = distance to "lower left" grid pt
// (mx,my,mz) = global coords of moving stencil pt
// ek = 3 components of E-field on particle
double *q = atom->q;
double **x = atom->x;
double **f = atom->f;
int nlocal = atom->nlocal;
for (i = 0; i < nlocal; i++) {
nx = part2grid[i][0];
ny = part2grid[i][1];
nz = part2grid[i][2];
dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv - stagger;
dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv - stagger;
dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv - stagger;
compute_rho1d(dx,dy,dz);
ekx = eky = ekz = ZEROF;
for (n = nlower; n <= nupper; n++) {
mz = n+nz;
z0 = rho1d[2][n];
for (m = nlower; m <= nupper; m++) {
my = m+ny;
y0 = z0*rho1d[1][m];
for (l = nlower; l <= nupper; l++) {
mx = l+nx;
x0 = y0*rho1d[0][l];
ekx -= x0*vdx_brick[mz][my][mx];
eky -= x0*vdy_brick[mz][my][mx];
ekz -= x0*vdz_brick[mz][my][mx];
}
}
}
// convert E-field to force
const double qfactor = qqrd2e * scale * q[i] / float(nstagger);
f[i][0] += qfactor*ekx;
f[i][1] += qfactor*eky;
if (slabflag != 2) f[i][2] += qfactor*ekz;
}
}
/* ----------------------------------------------------------------------
interpolate from grid to get electric field & force on my particles for ad
------------------------------------------------------------------------- */
void PPPMStagger::fieldforce_ad()
{
int i,l,m,n,nx,ny,nz,mx,my,mz;
FFT_SCALAR dx,dy,dz;
FFT_SCALAR ekx,eky,ekz;
double s1,s2,s3;
double sf = 0.0;
double *prd;
prd = domain->prd;
double xprd = prd[0];
double yprd = prd[1];
double zprd = prd[2];
double hx_inv = nx_pppm/xprd;
double hy_inv = ny_pppm/yprd;
double hz_inv = nz_pppm/zprd;
// loop over my charges, interpolate electric field from nearby grid points
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
// (dx,dy,dz) = distance to "lower left" grid pt
// (mx,my,mz) = global coords of moving stencil pt
// ek = 3 components of E-field on particle
double *q = atom->q;
double **x = atom->x;
double **f = atom->f;
int nlocal = atom->nlocal;
for (i = 0; i < nlocal; i++) {
nx = part2grid[i][0];
ny = part2grid[i][1];
nz = part2grid[i][2];
dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv - stagger;
dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv - stagger;
dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv - stagger;
compute_rho1d(dx,dy,dz);
compute_drho1d(dx,dy,dz);
ekx = eky = ekz = ZEROF;
for (n = nlower; n <= nupper; n++) {
mz = n+nz;
for (m = nlower; m <= nupper; m++) {
my = m+ny;
for (l = nlower; l <= nupper; l++) {
mx = l+nx;
ekx += drho1d[0][l]*rho1d[1][m]*rho1d[2][n]*u_brick[mz][my][mx];
eky += rho1d[0][l]*drho1d[1][m]*rho1d[2][n]*u_brick[mz][my][mx];
ekz += rho1d[0][l]*rho1d[1][m]*drho1d[2][n]*u_brick[mz][my][mx];
}
}
}
ekx *= hx_inv;
eky *= hy_inv;
ekz *= hz_inv;
// convert E-field to force and substract self forces
const double qfactor = qqrd2e * scale / float(nstagger);
s1 = x[i][0]*hx_inv + stagger;
s2 = x[i][1]*hy_inv + stagger;
s3 = x[i][2]*hz_inv + stagger;
sf = sf_coeff[0]*sin(2*MY_PI*s1);
sf += sf_coeff[1]*sin(4*MY_PI*s1);
sf *= 2*q[i]*q[i];
f[i][0] += qfactor*(ekx*q[i] - sf);
sf = sf_coeff[2]*sin(2*MY_PI*s2);
sf += sf_coeff[3]*sin(4*MY_PI*s2);
sf *= 2*q[i]*q[i];
f[i][1] += qfactor*(eky*q[i] - sf);
sf = sf_coeff[4]*sin(2*MY_PI*s3);
sf += sf_coeff[5]*sin(4*MY_PI*s3);
sf *= 2*q[i]*q[i];
if (slabflag != 2) f[i][2] += qfactor*(ekz*q[i] - sf);
}
}
/* ----------------------------------------------------------------------
interpolate from grid to get per-atom energy/virial
------------------------------------------------------------------------- */
void PPPMStagger::fieldforce_peratom()
{
int i,l,m,n,nx,ny,nz,mx,my,mz;
FFT_SCALAR dx,dy,dz,x0,y0,z0;
FFT_SCALAR u,v0,v1,v2,v3,v4,v5;
// loop over my charges, interpolate from nearby grid points
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
// (dx,dy,dz) = distance to "lower left" grid pt
// (mx,my,mz) = global coords of moving stencil pt
double *q = atom->q;
double **x = atom->x;
int nlocal = atom->nlocal;
for (i = 0; i < nlocal; i++) {
nx = part2grid[i][0];
ny = part2grid[i][1];
nz = part2grid[i][2];
dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv - stagger;
dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv - stagger;
dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv - stagger;
compute_rho1d(dx,dy,dz);
u = v0 = v1 = v2 = v3 = v4 = v5 = ZEROF;
for (n = nlower; n <= nupper; n++) {
mz = n+nz;
z0 = rho1d[2][n];
for (m = nlower; m <= nupper; m++) {
my = m+ny;
y0 = z0*rho1d[1][m];
for (l = nlower; l <= nupper; l++) {
mx = l+nx;
x0 = y0*rho1d[0][l];
if (eflag_atom) u += x0*u_brick[mz][my][mx];
if (vflag_atom) {
v0 += x0*v0_brick[mz][my][mx];
v1 += x0*v1_brick[mz][my][mx];
v2 += x0*v2_brick[mz][my][mx];
v3 += x0*v3_brick[mz][my][mx];
v4 += x0*v4_brick[mz][my][mx];
v5 += x0*v5_brick[mz][my][mx];
}
}
}
}
if (eflag_atom) eatom[i] += q[i]*u/float(nstagger);
if (vflag_atom) {
vatom[i][0] += q[i]*v0/float(nstagger);
vatom[i][1] += q[i]*v1/float(nstagger);
vatom[i][2] += q[i]*v2/float(nstagger);
vatom[i][3] += q[i]*v3/float(nstagger);
vatom[i][4] += q[i]*v4/float(nstagger);
vatom[i][5] += q[i]*v5/float(nstagger);
}
}
}
/* ----------------------------------------------------------------------
perform and time the 1d FFTs required for N timesteps
------------------------------------------------------------------------- */
int PPPMStagger::timing_1d(int n, double &time1d)
{
PPPM::timing_1d(n,time1d);
time1d *= 2.0;
if (differentiation_flag) return 2;
return 4;
}
/* ----------------------------------------------------------------------
perform and time the 3d FFTs required for N timesteps
------------------------------------------------------------------------- */
int PPPMStagger::timing_3d(int n, double &time3d)
{
PPPM::timing_3d(n,time3d);
time3d *= 2.0;
if (differentiation_flag) return 2;
return 4;
}
diff --git a/src/KSPACE/pppm_tip4p.cpp b/src/KSPACE/pppm_tip4p.cpp
index 6742a0b14..34d0616fe 100644
--- a/src/KSPACE/pppm_tip4p.cpp
+++ b/src/KSPACE/pppm_tip4p.cpp
@@ -1,511 +1,511 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing authors: Amalie Frischknecht and Ahmed Ismail (SNL)
------------------------------------------------------------------------- */
#include <math.h>
#include "pppm_tip4p.h"
#include "atom.h"
#include "domain.h"
#include "force.h"
#include "memory.h"
#include "error.h"
#include "math_const.h"
using namespace LAMMPS_NS;
using namespace MathConst;
#define OFFSET 16384
#ifdef FFT_SINGLE
#define ZEROF 0.0f
#define ONEF 1.0f
#else
#define ZEROF 0.0
#define ONEF 1.0
#endif
/* ---------------------------------------------------------------------- */
PPPMTIP4P::PPPMTIP4P(LAMMPS *lmp, int narg, char **arg) :
PPPM(lmp, narg, arg)
{
triclinic_support = 0;
tip4pflag = 1;
}
/* ---------------------------------------------------------------------- */
void PPPMTIP4P::init()
{
// TIP4P PPPM requires newton on, b/c it computes forces on ghost atoms
if (force->newton == 0)
error->all(FLERR,"Kspace style pppm/tip4p requires newton on");
PPPM::init();
}
/* ----------------------------------------------------------------------
find center grid pt for each of my particles
check that full stencil for the particle will fit in my 3d brick
store central grid pt indices in part2grid array
------------------------------------------------------------------------- */
void PPPMTIP4P::particle_map()
{
int nx,ny,nz,iH1,iH2;
double *xi,xM[3];
int *type = atom->type;
double **x = atom->x;
int nlocal = atom->nlocal;
- if (!isfinite(boxlo[0]) || !isfinite(boxlo[1]) || !isfinite(boxlo[2]))
+ if (!ISFINITE(boxlo[0]) || !ISFINITE(boxlo[1]) || !ISFINITE(boxlo[2]))
error->one(FLERR,"Non-numeric box dimensions - simulation unstable");
int flag = 0;
for (int i = 0; i < nlocal; i++) {
if (type[i] == typeO) {
find_M(i,iH1,iH2,xM);
xi = xM;
} else xi = x[i];
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
// current particle coord can be outside global and local box
// add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1
nx = static_cast<int> ((xi[0]-boxlo[0])*delxinv+shift) - OFFSET;
ny = static_cast<int> ((xi[1]-boxlo[1])*delyinv+shift) - OFFSET;
nz = static_cast<int> ((xi[2]-boxlo[2])*delzinv+shift) - OFFSET;
part2grid[i][0] = nx;
part2grid[i][1] = ny;
part2grid[i][2] = nz;
// check that entire stencil around nx,ny,nz will fit in my 3d brick
if (nx+nlower < nxlo_out || nx+nupper > nxhi_out ||
ny+nlower < nylo_out || ny+nupper > nyhi_out ||
nz+nlower < nzlo_out || nz+nupper > nzhi_out) flag++;
}
int flag_all;
MPI_Allreduce(&flag,&flag_all,1,MPI_INT,MPI_SUM,world);
if (flag_all) error->all(FLERR,"Out of range atoms - cannot compute PPPM");
}
/* ----------------------------------------------------------------------
create discretized "density" on section of global grid due to my particles
density(x,y,z) = charge "density" at grid points of my 3d brick
(nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts)
in global grid
------------------------------------------------------------------------- */
void PPPMTIP4P::make_rho()
{
int i,l,m,n,nx,ny,nz,mx,my,mz,iH1,iH2;
FFT_SCALAR dx,dy,dz,x0,y0,z0;
double *xi,xM[3];
// clear 3d density array
FFT_SCALAR *vec = &density_brick[nzlo_out][nylo_out][nxlo_out];
for (i = 0; i < ngrid; i++) vec[i] = ZEROF;
// loop over my charges, add their contribution to nearby grid points
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
// (dx,dy,dz) = distance to "lower left" grid pt
// (mx,my,mz) = global coords of moving stencil pt
int *type = atom->type;
double *q = atom->q;
double **x = atom->x;
int nlocal = atom->nlocal;
for (int i = 0; i < nlocal; i++) {
if (type[i] == typeO) {
find_M(i,iH1,iH2,xM);
xi = xM;
} else xi = x[i];
nx = part2grid[i][0];
ny = part2grid[i][1];
nz = part2grid[i][2];
dx = nx+shiftone - (xi[0]-boxlo[0])*delxinv;
dy = ny+shiftone - (xi[1]-boxlo[1])*delyinv;
dz = nz+shiftone - (xi[2]-boxlo[2])*delzinv;
compute_rho1d(dx,dy,dz);
z0 = delvolinv * q[i];
for (n = nlower; n <= nupper; n++) {
mz = n+nz;
y0 = z0*rho1d[2][n];
for (m = nlower; m <= nupper; m++) {
my = m+ny;
x0 = y0*rho1d[1][m];
for (l = nlower; l <= nupper; l++) {
mx = l+nx;
density_brick[mz][my][mx] += x0*rho1d[0][l];
}
}
}
}
}
/* ----------------------------------------------------------------------
interpolate from grid to get electric field & force on my particles for ik
------------------------------------------------------------------------- */
void PPPMTIP4P::fieldforce_ik()
{
int i,l,m,n,nx,ny,nz,mx,my,mz;
FFT_SCALAR dx,dy,dz,x0,y0,z0;
FFT_SCALAR ekx,eky,ekz;
double *xi;
int iH1,iH2;
double xM[3];
double fx,fy,fz;
// loop over my charges, interpolate electric field from nearby grid points
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
// (dx,dy,dz) = distance to "lower left" grid pt
// (mx,my,mz) = global coords of moving stencil pt
// ek = 3 components of E-field on particle
double *q = atom->q;
double **x = atom->x;
double **f = atom->f;
int *type = atom->type;
int nlocal = atom->nlocal;
for (i = 0; i < nlocal; i++) {
if (type[i] == typeO) {
find_M(i,iH1,iH2,xM);
xi = xM;
} else xi = x[i];
nx = part2grid[i][0];
ny = part2grid[i][1];
nz = part2grid[i][2];
dx = nx+shiftone - (xi[0]-boxlo[0])*delxinv;
dy = ny+shiftone - (xi[1]-boxlo[1])*delyinv;
dz = nz+shiftone - (xi[2]-boxlo[2])*delzinv;
compute_rho1d(dx,dy,dz);
ekx = eky = ekz = ZEROF;
for (n = nlower; n <= nupper; n++) {
mz = n+nz;
z0 = rho1d[2][n];
for (m = nlower; m <= nupper; m++) {
my = m+ny;
y0 = z0*rho1d[1][m];
for (l = nlower; l <= nupper; l++) {
mx = l+nx;
x0 = y0*rho1d[0][l];
ekx -= x0*vdx_brick[mz][my][mx];
eky -= x0*vdy_brick[mz][my][mx];
ekz -= x0*vdz_brick[mz][my][mx];
}
}
}
// convert E-field to force
const double qfactor = qqrd2e * scale * q[i];
if (type[i] != typeO) {
f[i][0] += qfactor*ekx;
f[i][1] += qfactor*eky;
if (slabflag != 2) f[i][2] += qfactor*ekz;
} else {
fx = qfactor * ekx;
fy = qfactor * eky;
fz = qfactor * ekz;
find_M(i,iH1,iH2,xM);
f[i][0] += fx*(1 - alpha);
f[i][1] += fy*(1 - alpha);
if (slabflag != 2) f[i][2] += fz*(1 - alpha);
f[iH1][0] += 0.5*alpha*fx;
f[iH1][1] += 0.5*alpha*fy;
if (slabflag != 2) f[iH1][2] += 0.5*alpha*fz;
f[iH2][0] += 0.5*alpha*fx;
f[iH2][1] += 0.5*alpha*fy;
if (slabflag != 2) f[iH2][2] += 0.5*alpha*fz;
}
}
}
/* ----------------------------------------------------------------------
interpolate from grid to get electric field & force on my particles for ad
------------------------------------------------------------------------- */
void PPPMTIP4P::fieldforce_ad()
{
int i,l,m,n,nx,ny,nz,mx,my,mz;
FFT_SCALAR dx,dy,dz;
FFT_SCALAR ekx,eky,ekz;
double *xi;
int iH1,iH2;
double xM[3];
double s1,s2,s3;
double sf;
double *prd;
double fx,fy,fz;
if (triclinic == 0) prd = domain->prd;
else prd = domain->prd_lamda;
double xprd = prd[0];
double yprd = prd[1];
double zprd = prd[2];
double hx_inv = nx_pppm/xprd;
double hy_inv = ny_pppm/yprd;
double hz_inv = nz_pppm/zprd;
// loop over my charges, interpolate electric field from nearby grid points
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
// (dx,dy,dz) = distance to "lower left" grid pt
// (mx,my,mz) = global coords of moving stencil pt
// ek = 3 components of E-field on particle
double *q = atom->q;
double **x = atom->x;
double **f = atom->f;
int *type = atom->type;
int nlocal = atom->nlocal;
for (i = 0; i < nlocal; i++) {
if (type[i] == typeO) {
find_M(i,iH1,iH2,xM);
xi = xM;
} else xi = x[i];
nx = part2grid[i][0];
ny = part2grid[i][1];
nz = part2grid[i][2];
dx = nx+shiftone - (xi[0]-boxlo[0])*delxinv;
dy = ny+shiftone - (xi[1]-boxlo[1])*delyinv;
dz = nz+shiftone - (xi[2]-boxlo[2])*delzinv;
compute_rho1d(dx,dy,dz);
compute_drho1d(dx,dy,dz);
ekx = eky = ekz = ZEROF;
for (n = nlower; n <= nupper; n++) {
mz = n+nz;
for (m = nlower; m <= nupper; m++) {
my = m+ny;
for (l = nlower; l <= nupper; l++) {
mx = l+nx;
ekx += drho1d[0][l]*rho1d[1][m]*rho1d[2][n]*u_brick[mz][my][mx];
eky += rho1d[0][l]*drho1d[1][m]*rho1d[2][n]*u_brick[mz][my][mx];
ekz += rho1d[0][l]*rho1d[1][m]*drho1d[2][n]*u_brick[mz][my][mx];
}
}
}
ekx *= hx_inv;
eky *= hy_inv;
ekz *= hz_inv;
// convert E-field to force and substract self forces
const double qfactor = qqrd2e * scale;
s1 = x[i][0]*hx_inv;
s2 = x[i][1]*hy_inv;
s3 = x[i][2]*hz_inv;
sf = sf_coeff[0]*sin(2*MY_PI*s1);
sf += sf_coeff[1]*sin(4*MY_PI*s1);
sf *= 2.0*q[i]*q[i];
fx = qfactor*(ekx*q[i] - sf);
sf = sf_coeff[2]*sin(2*MY_PI*s2);
sf += sf_coeff[3]*sin(4*MY_PI*s2);
sf *= 2.0*q[i]*q[i];
fy = qfactor*(eky*q[i] - sf);
sf = sf_coeff[4]*sin(2*MY_PI*s3);
sf += sf_coeff[5]*sin(4*MY_PI*s3);
sf *= 2.0*q[i]*q[i];
fz = qfactor*(ekz*q[i] - sf);
if (type[i] != typeO) {
f[i][0] += fx;
f[i][1] += fy;
if (slabflag != 2) f[i][2] += fz;
} else {
find_M(i,iH1,iH2,xM);
f[i][0] += fx*(1 - alpha);
f[i][1] += fy*(1 - alpha);
if (slabflag != 2) f[i][2] += fz*(1 - alpha);
f[iH1][0] += 0.5*alpha*fx;
f[iH1][1] += 0.5*alpha*fy;
if (slabflag != 2) f[iH1][2] += 0.5*alpha*fz;
f[iH2][0] += 0.5*alpha*fx;
f[iH2][1] += 0.5*alpha*fy;
if (slabflag != 2) f[iH2][2] += 0.5*alpha*fz;
}
}
}
/* ----------------------------------------------------------------------
interpolate from grid to get electric field & force on my particles
------------------------------------------------------------------------- */
void PPPMTIP4P::fieldforce_peratom()
{
int i,l,m,n,nx,ny,nz,mx,my,mz;
FFT_SCALAR dx,dy,dz,x0,y0,z0;
double *xi;
int iH1,iH2;
double xM[3];
FFT_SCALAR u_pa,v0,v1,v2,v3,v4,v5;
// loop over my charges, interpolate electric field from nearby grid points
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
// (dx,dy,dz) = distance to "lower left" grid pt
// (mx,my,mz) = global coords of moving stencil pt
// ek = 3 components of E-field on particle
double *q = atom->q;
double **x = atom->x;
int *type = atom->type;
int nlocal = atom->nlocal;
for (i = 0; i < nlocal; i++) {
if (type[i] == typeO) {
find_M(i,iH1,iH2,xM);
xi = xM;
} else xi = x[i];
nx = part2grid[i][0];
ny = part2grid[i][1];
nz = part2grid[i][2];
dx = nx+shiftone - (xi[0]-boxlo[0])*delxinv;
dy = ny+shiftone - (xi[1]-boxlo[1])*delyinv;
dz = nz+shiftone - (xi[2]-boxlo[2])*delzinv;
compute_rho1d(dx,dy,dz);
u_pa = v0 = v1 = v2 = v3 = v4 = v5 = ZEROF;
for (n = nlower; n <= nupper; n++) {
mz = n+nz;
z0 = rho1d[2][n];
for (m = nlower; m <= nupper; m++) {
my = m+ny;
y0 = z0*rho1d[1][m];
for (l = nlower; l <= nupper; l++) {
mx = l+nx;
x0 = y0*rho1d[0][l];
if (eflag_atom) u_pa += x0*u_brick[mz][my][mx];
if (vflag_atom) {
v0 += x0*v0_brick[mz][my][mx];
v1 += x0*v1_brick[mz][my][mx];
v2 += x0*v2_brick[mz][my][mx];
v3 += x0*v3_brick[mz][my][mx];
v4 += x0*v4_brick[mz][my][mx];
v5 += x0*v5_brick[mz][my][mx];
}
}
}
}
if (eflag_atom) {
if (type[i] != typeO) {
eatom[i] += q[i]*u_pa;
} else {
eatom[i] += q[i]*u_pa*(1-alpha);
eatom[iH1] += q[i]*u_pa*alpha*0.5;
eatom[iH2] += q[i]*u_pa*alpha*0.5;
}
}
if (vflag_atom) {
if (type[i] != typeO) {
vatom[i][0] += v0*q[i];
vatom[i][1] += v1*q[i];
vatom[i][2] += v2*q[i];
vatom[i][3] += v3*q[i];
vatom[i][4] += v4*q[i];
vatom[i][5] += v5*q[i];
} else {
vatom[i][0] += v0*(1-alpha)*q[i];
vatom[i][1] += v1*(1-alpha)*q[i];
vatom[i][2] += v2*(1-alpha)*q[i];
vatom[i][3] += v3*(1-alpha)*q[i];
vatom[i][4] += v4*(1-alpha)*q[i];
vatom[i][5] += v5*(1-alpha)*q[i];
vatom[iH1][0] += v0*alpha*0.5*q[i];
vatom[iH1][1] += v1*alpha*0.5*q[i];
vatom[iH1][2] += v2*alpha*0.5*q[i];
vatom[iH1][3] += v3*alpha*0.5*q[i];
vatom[iH1][4] += v4*alpha*0.5*q[i];
vatom[iH1][5] += v5*alpha*0.5*q[i];
vatom[iH2][0] += v0*alpha*0.5*q[i];
vatom[iH2][1] += v1*alpha*0.5*q[i];
vatom[iH2][2] += v2*alpha*0.5*q[i];
vatom[iH2][3] += v3*alpha*0.5*q[i];
vatom[iH2][4] += v4*alpha*0.5*q[i];
vatom[iH2][5] += v5*alpha*0.5*q[i];
}
}
}
}
/* ----------------------------------------------------------------------
find 2 H atoms bonded to O atom i
compute position xM of fictitious charge site for O atom
also return local indices iH1,iH2 of H atoms
------------------------------------------------------------------------- */
void PPPMTIP4P::find_M(int i, int &iH1, int &iH2, double *xM)
{
iH1 = atom->map(atom->tag[i] + 1);
iH2 = atom->map(atom->tag[i] + 2);
if (iH1 == -1 || iH2 == -1) error->one(FLERR,"TIP4P hydrogen is missing");
if (atom->type[iH1] != typeH || atom->type[iH2] != typeH)
error->one(FLERR,"TIP4P hydrogen has incorrect atom type");
double **x = atom->x;
double delx1 = x[iH1][0] - x[i][0];
double dely1 = x[iH1][1] - x[i][1];
double delz1 = x[iH1][2] - x[i][2];
domain->minimum_image(delx1,dely1,delz1);
double delx2 = x[iH2][0] - x[i][0];
double dely2 = x[iH2][1] - x[i][1];
double delz2 = x[iH2][2] - x[i][2];
domain->minimum_image(delx2,dely2,delz2);
xM[0] = x[i][0] + alpha * 0.5 * (delx1 + delx2);
xM[1] = x[i][1] + alpha * 0.5 * (dely1 + dely2);
xM[2] = x[i][2] + alpha * 0.5 * (delz1 + delz2);
}
diff --git a/src/USER-OMP/msm_cg_omp.cpp b/src/USER-OMP/msm_cg_omp.cpp
index 93379c8d3..f743abae6 100644
--- a/src/USER-OMP/msm_cg_omp.cpp
+++ b/src/USER-OMP/msm_cg_omp.cpp
@@ -1,567 +1,567 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing author: Axel Kohlmeyer (Temple U)
Original MSM class by: Paul Crozier, Stan Moore, Stephen Bond, (all SNL)
------------------------------------------------------------------------- */
#include <mpi.h>
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "atom.h"
#include "gridcomm.h"
#include "domain.h"
#include "error.h"
#include "force.h"
#include "neighbor.h"
#include "memory.h"
#include "msm_cg_omp.h"
#include "math_const.h"
using namespace LAMMPS_NS;
using namespace MathConst;
#define OFFSET 16384
#define SMALLQ 0.00001
enum{REVERSE_RHO,REVERSE_AD,REVERSE_AD_PERATOM};
enum{FORWARD_RHO,FORWARD_AD,FORWARD_AD_PERATOM};
/* ---------------------------------------------------------------------- */
MSMCGOMP::MSMCGOMP(LAMMPS *lmp, int narg, char **arg) : MSMOMP(lmp, narg, arg)
{
if ((narg < 1) || (narg > 2))
error->all(FLERR,"Illegal kspace_style msm/cg/omp command");
triclinic_support = 0;
if (narg == 2) smallq = fabs(force->numeric(FLERR,arg[1]));
else smallq = SMALLQ;
num_charged = -1;
is_charged = NULL;
}
/* ----------------------------------------------------------------------
free all memory
------------------------------------------------------------------------- */
MSMCGOMP::~MSMCGOMP()
{
memory->destroy(is_charged);
}
/* ----------------------------------------------------------------------
compute the MSM long-range force, energy, virial
------------------------------------------------------------------------- */
void MSMCGOMP::compute(int eflag, int vflag)
{
if (scalar_pressure_flag)
error->all(FLERR,"Must use 'kspace_modify pressure/scalar no' "
"with kspace_style msm/cg/omp");
const double * const q = atom->q;
const int nlocal = atom->nlocal;
int i,j,n;
// set energy/virial flags
if (eflag || vflag) ev_setup(eflag,vflag);
else evflag = evflag_atom = eflag_global = vflag_global =
eflag_atom = vflag_atom = eflag_either = vflag_either = 0;
// invoke allocate_peratom() if needed for first time
if (vflag_atom && !peratom_allocate_flag) {
allocate_peratom();
cg_peratom_all->ghost_notify();
cg_peratom_all->setup();
for (int n=0; n<levels; n++) {
if (!active_flag[n]) continue;
cg_peratom[n]->ghost_notify();
cg_peratom[n]->setup();
}
peratom_allocate_flag = 1;
}
// extend size of per-atom arrays if necessary
if (nlocal > nmax) {
memory->destroy(part2grid);
memory->destroy(is_charged);
nmax = atom->nmax;
memory->create(part2grid,nmax,3,"msm:part2grid");
memory->create(is_charged,nmax,"msm/cg:is_charged");
}
// one time setup message
if (num_charged < 0) {
bigint charged_all, charged_num;
double charged_frac, charged_fmax, charged_fmin;
num_charged=0;
for (i=0; i < nlocal; ++i)
if (fabs(q[i]) > smallq)
++num_charged;
// get fraction of charged particles per domain
if (nlocal > 0)
charged_frac = static_cast<double>(num_charged) * 100.0
/ static_cast<double>(nlocal);
else
charged_frac = 0.0;
MPI_Reduce(&charged_frac,&charged_fmax,1,MPI_DOUBLE,MPI_MAX,0,world);
MPI_Reduce(&charged_frac,&charged_fmin,1,MPI_DOUBLE,MPI_MIN,0,world);
// get fraction of charged particles overall
charged_num = num_charged;
MPI_Reduce(&charged_num,&charged_all,1,MPI_LMP_BIGINT,MPI_SUM,0,world);
charged_frac = static_cast<double>(charged_all) * 100.0
/ static_cast<double>(atom->natoms);
if (me == 0) {
if (screen)
fprintf(screen,
" MSM/cg optimization cutoff: %g\n"
" Total charged atoms: %.1f%%\n"
" Min/max charged atoms/proc: %.1f%% %.1f%%\n",
smallq,charged_frac,charged_fmin,charged_fmax);
if (logfile)
fprintf(logfile,
" MSM/cg optimization cutoff: %g\n"
" Total charged atoms: %.1f%%\n"
" Min/max charged atoms/proc: %.1f%% %.1f%%\n",
smallq,charged_frac,charged_fmin,charged_fmax);
}
}
// only need to rebuild this list after a neighbor list update
if (neighbor->ago == 0) {
num_charged = 0;
for (i = 0; i < nlocal; ++i) {
if (fabs(q[i]) > smallq) {
is_charged[num_charged] = i;
++num_charged;
}
}
}
// find grid points for all my particles
// map my particle charge onto my local 3d density grid (aninterpolation)
particle_map();
make_rho();
// all procs reverse communicate charge density values from their ghost grid points
// to fully sum contribution in their 3d grid
current_level = 0;
cg_all->reverse_comm(this,REVERSE_RHO);
// forward communicate charge density values to fill ghost grid points
// compute direct sum interaction and then restrict to coarser grid
for (int n=0; n<=levels-2; n++) {
if (!active_flag[n]) continue;
current_level = n;
cg[n]->forward_comm(this,FORWARD_RHO);
direct(n);
restriction(n);
}
// compute direct interation for top grid level for nonperiodic
// and for second from top grid level for periodic
if (active_flag[levels-1]) {
if (domain->nonperiodic) {
current_level = levels-1;
cg[levels-1]->forward_comm(this,FORWARD_RHO);
direct_top(levels-1);
cg[levels-1]->reverse_comm(this,REVERSE_AD);
if (vflag_atom)
cg_peratom[levels-1]->reverse_comm(this,REVERSE_AD_PERATOM);
} else {
// Here using MPI_Allreduce is cheaper than using commgrid
grid_swap_forward(levels-1,qgrid[levels-1]);
direct(levels-1);
grid_swap_reverse(levels-1,egrid[levels-1]);
current_level = levels-1;
if (vflag_atom)
cg_peratom[levels-1]->reverse_comm(this,REVERSE_AD_PERATOM);
}
}
// prolongate energy/virial from coarser grid to finer grid
// reverse communicate from ghost grid points to get full sum
for (int n=levels-2; n>=0; n--) {
if (!active_flag[n]) continue;
prolongation(n);
current_level = n;
cg[n]->reverse_comm(this,REVERSE_AD);
// extra per-atom virial communication
if (vflag_atom)
cg_peratom[n]->reverse_comm(this,REVERSE_AD_PERATOM);
}
// all procs communicate E-field values
// to fill ghost cells surrounding their 3d bricks
current_level = 0;
cg_all->forward_comm(this,FORWARD_AD);
// extra per-atom energy/virial communication
if (vflag_atom)
cg_peratom_all->forward_comm(this,FORWARD_AD_PERATOM);
// calculate the force on my particles (interpolation)
fieldforce();
// calculate the per-atom energy/virial for my particles
if (evflag_atom) fieldforce_peratom();
// update qsum and qsqsum, if atom count has changed and energy needed
if ((eflag_global || eflag_atom) && atom->natoms != natoms_original) {
qsum_qsq();
natoms_original = atom->natoms;
}
// sum global energy across procs and add in self-energy term
const double qscale = force->qqrd2e * scale;
if (eflag_global) {
double energy_all;
MPI_Allreduce(&energy,&energy_all,1,MPI_DOUBLE,MPI_SUM,world);
energy = energy_all;
double e_self = qsqsum*gamma(0.0)/cutoff;
energy -= e_self;
energy *= 0.5*qscale;
}
// total long-range virial
if (vflag_global) {
double virial_all[6];
MPI_Allreduce(virial,virial_all,6,MPI_DOUBLE,MPI_SUM,world);
for (i = 0; i < 6; i++) virial[i] = 0.5*qscale*virial_all[i];
}
// per-atom energy/virial
// energy includes self-energy correction
if (evflag_atom) {
const double qs = 0.5*qscale;
if (eflag_atom) {
const double sf = gamma(0.0)/cutoff;
for (j = 0; j < num_charged; j++) {
i = is_charged[j];
eatom[i] -= q[i]*q[i]*sf;
eatom[i] *= qs;
}
}
if (vflag_atom) {
for (n = 0; n < num_charged; n++) {
i = is_charged[n];
for (j = 0; j < 6; j++)
vatom[i][j] *= qs;
}
}
}
#if defined(_OPENMP)
#pragma omp parallel default(none) shared(eflag,vflag)
#endif
{
#if defined(_OPENMP)
const int tid = omp_get_thread_num();
#else
const int tid = 0;
#endif
ThrData *thr = fix->get_thr(tid);
thr->timer(Timer::START);
reduce_thr(this, eflag, vflag, thr);
} // end of omp parallel region
}
/* ----------------------------------------------------------------------
find center grid pt for each of my particles
check that full stencil for the particle will fit in my 3d brick
store central grid pt indices in part2grid array
------------------------------------------------------------------------- */
void MSMCGOMP::particle_map()
{
const double * const * const x = atom->x;
int flag = 0;
int i;
- if (!isfinite(boxlo[0]) || !isfinite(boxlo[1]) || !isfinite(boxlo[2]))
+ if (!ISFINITE(boxlo[0]) || !ISFINITE(boxlo[1]) || !ISFINITE(boxlo[2]))
error->one(FLERR,"Non-numeric box dimensions - simulation unstable");
// XXX: O(N). is it worth to add OpenMP here?
for (int j = 0; j < num_charged; j++) {
i = is_charged[j];
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
// current particle coord can be outside global and local box
// add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1
const int nx=static_cast<int>((x[i][0]-boxlo[0])*delxinv[0]+OFFSET)-OFFSET;
const int ny=static_cast<int>((x[i][1]-boxlo[1])*delyinv[0]+OFFSET)-OFFSET;
const int nz=static_cast<int>((x[i][2]-boxlo[2])*delzinv[0]+OFFSET)-OFFSET;
part2grid[i][0] = nx;
part2grid[i][1] = ny;
part2grid[i][2] = nz;
// check that entire stencil around nx,ny,nz will fit in my 3d brick
if (nx+nlower < nxlo_out[0] || nx+nupper > nxhi_out[0] ||
ny+nlower < nylo_out[0] || ny+nupper > nyhi_out[0] ||
nz+nlower < nzlo_out[0] || nz+nupper > nzhi_out[0])
flag = 1;
}
if (flag) error->one(FLERR,"Out of range atoms - cannot compute MSM");
}
/* ----------------------------------------------------------------------
create discretized "density" on section of global grid due to my particles
density(x,y,z) = charge "density" at grid points of my 3d brick
(nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts)
in global grid
------------------------------------------------------------------------- */
void MSMCGOMP::make_rho()
{
const double * const q = atom->q;
const double * const * const x = atom->x;
// clear 3d density array
double * const * const * const qgridn = qgrid[0];
memset(&(qgridn[nzlo_out[0]][nylo_out[0]][nxlo_out[0]]),0,ngrid[0]*sizeof(double));
double dx,dy,dz,x0,y0,z0;
int i,j,l,m,n,nx,ny,nz,mx,my,mz;
// loop over my charges, add their contribution to nearby grid points
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
// (dx,dy,dz) = distance to "lower left" grid pt
// (mx,my,mz) = global coords of moving stencil pt
for (j = 0; j < num_charged; j++) {
i = is_charged[j];
nx = part2grid[i][0];
ny = part2grid[i][1];
nz = part2grid[i][2];
dx = nx - (x[i][0]-boxlo[0])*delxinv[0];
dy = ny - (x[i][1]-boxlo[1])*delyinv[0];
dz = nz - (x[i][2]-boxlo[2])*delzinv[0];
compute_phis(dx,dy,dz);
z0 = q[i];
for (n = nlower; n <= nupper; n++) {
mz = n+nz;
y0 = z0*phi1d[2][n];
for (m = nlower; m <= nupper; m++) {
my = m+ny;
x0 = y0*phi1d[1][m];
for (l = nlower; l <= nupper; l++) {
mx = l+nx;
qgridn[mz][my][mx] += x0*phi1d[0][l];
}
}
}
}
}
/* ----------------------------------------------------------------------
interpolate from grid to get force on my particles
------------------------------------------------------------------------- */
void MSMCGOMP::fieldforce()
{
const double * const * const * const egridn = egrid[0];
const double * const * const x = atom->x;
double * const * const f = atom->f;
const double * const q = atom->q;
int i,j,l,m,n,nx,ny,nz,mx,my,mz;
double dx,dy,dz;
double phi_x,phi_y,phi_z;
double dphi_x,dphi_y,dphi_z;
double ekx,eky,ekz;
// loop over my charges, interpolate electric field from nearby grid points
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
// (dx,dy,dz) = distance to "lower left" grid pt
// (mx,my,mz) = global coords of moving stencil pt
// ek = 3 components of E-field on particle
for (j = 0; j < num_charged; j++) {
i = is_charged[j];
nx = part2grid[i][0];
ny = part2grid[i][1];
nz = part2grid[i][2];
dx = nx - (x[i][0]-boxlo[0])*delxinv[0];
dy = ny - (x[i][1]-boxlo[1])*delyinv[0];
dz = nz - (x[i][2]-boxlo[2])*delzinv[0];
compute_phis_and_dphis(dx,dy,dz);
ekx = eky = ekz = 0.0;
for (n = nlower; n <= nupper; n++) {
mz = n+nz;
phi_z = phi1d[2][n];
dphi_z = dphi1d[2][n];
for (m = nlower; m <= nupper; m++) {
my = m+ny;
phi_y = phi1d[1][m];
dphi_y = dphi1d[1][m];
for (l = nlower; l <= nupper; l++) {
mx = l+nx;
phi_x = phi1d[0][l];
dphi_x = dphi1d[0][l];
ekx += dphi_x*phi_y*phi_z*egridn[mz][my][mx];
eky += phi_x*dphi_y*phi_z*egridn[mz][my][mx];
ekz += phi_x*phi_y*dphi_z*egridn[mz][my][mx];
}
}
}
ekx *= delxinv[0];
eky *= delyinv[0];
ekz *= delzinv[0];
// convert E-field to force
const double qfactor = force->qqrd2e*scale*q[i];
f[i][0] += qfactor*ekx;
f[i][1] += qfactor*eky;
f[i][2] += qfactor*ekz;
}
}
/* ----------------------------------------------------------------------
interpolate from grid to get per-atom energy/virial
------------------------------------------------------------------------- */
void MSMCGOMP::fieldforce_peratom()
{
const double * const q = atom->q;
const double * const * const x = atom->x;
double ***egridn = egrid[0];
double ***v0gridn = v0grid[0];
double ***v1gridn = v1grid[0];
double ***v2gridn = v2grid[0];
double ***v3gridn = v3grid[0];
double ***v4gridn = v4grid[0];
double ***v5gridn = v5grid[0];
int i,j,l,m,n,nx,ny,nz,mx,my,mz;
double dx,dy,dz,x0,y0,z0;
double u,v0,v1,v2,v3,v4,v5;
// loop over my charges, interpolate from nearby grid points
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
// (dx,dy,dz) = distance to "lower left" grid pt
// (mx,my,mz) = global coords of moving stencil pt
for (j = 0; j < num_charged; j++) {
i = is_charged[j];
nx = part2grid[i][0];
ny = part2grid[i][1];
nz = part2grid[i][2];
dx = nx - (x[i][0]-boxlo[0])*delxinv[0];
dy = ny - (x[i][1]-boxlo[1])*delyinv[0];
dz = nz - (x[i][2]-boxlo[2])*delzinv[0];
compute_phis_and_dphis(dx,dy,dz);
u = v0 = v1 = v2 = v3 = v4 = v5 = 0.0;
for (n = nlower; n <= nupper; n++) {
mz = n+nz;
z0 = phi1d[2][n];
for (m = nlower; m <= nupper; m++) {
my = m+ny;
y0 = z0*phi1d[1][m];
for (l = nlower; l <= nupper; l++) {
mx = l+nx;
x0 = y0*phi1d[0][l];
if (eflag_atom) u += x0*egridn[mz][my][mx];
if (vflag_atom) {
v0 += x0*v0gridn[mz][my][mx];
v1 += x0*v1gridn[mz][my][mx];
v2 += x0*v2gridn[mz][my][mx];
v3 += x0*v3gridn[mz][my][mx];
v4 += x0*v4gridn[mz][my][mx];
v5 += x0*v5gridn[mz][my][mx];
}
}
}
}
if (eflag_atom) eatom[i] += q[i]*u;
if (vflag_atom) {
vatom[i][0] += q[i]*v0;
vatom[i][1] += q[i]*v1;
vatom[i][2] += q[i]*v2;
vatom[i][3] += q[i]*v3;
vatom[i][4] += q[i]*v4;
vatom[i][5] += q[i]*v5;
}
}
}
double MSMCGOMP::memory_usage()
{
double bytes = MSM::memory_usage();
bytes += nmax * sizeof(int);
return bytes;
}
diff --git a/src/USER-OMP/pppm_disp_omp.cpp b/src/USER-OMP/pppm_disp_omp.cpp
index f3692b287..cdf4b3bce 100644
--- a/src/USER-OMP/pppm_disp_omp.cpp
+++ b/src/USER-OMP/pppm_disp_omp.cpp
@@ -1,1865 +1,1865 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing author: Axel Kohlmeyer (Temple U), Rolf Isele-Holder (RWTH Aachen University)
------------------------------------------------------------------------- */
#include "pppm_disp_omp.h"
#include "atom.h"
#include "comm.h"
#include "domain.h"
#include "force.h"
#include "memory.h"
#include "math_const.h"
#include <string.h>
#include <math.h>
#include "suffix.h"
using namespace LAMMPS_NS;
using namespace MathConst;
#ifdef FFT_SINGLE
#define ZEROF 0.0f
#define ONEF 1.0f
#else
#define ZEROF 0.0
#define ONEF 1.0
#endif
#define OFFSET 16384
/* ---------------------------------------------------------------------- */
PPPMDispOMP::PPPMDispOMP(LAMMPS *lmp, int narg, char **arg) :
PPPMDisp(lmp, narg, arg), ThrOMP(lmp, THR_KSPACE)
{
triclinic_support = 0;
suffix_flag |= Suffix::OMP;
}
/* ----------------------------------------------------------------------
allocate memory that depends on # of K-vectors and order
------------------------------------------------------------------------- */
void PPPMDispOMP::allocate()
{
PPPMDisp::allocate();
#if defined(_OPENMP)
#pragma omp parallel default(none)
#endif
{
#if defined(_OPENMP)
const int tid = omp_get_thread_num();
#else
const int tid = 0;
#endif
if (function[0]) {
ThrData *thr = fix->get_thr(tid);
thr->init_pppm(order,memory);
}
if (function[1] + function[2]) {
ThrData * thr = fix->get_thr(tid);
thr->init_pppm_disp(order_6,memory);
}
}
}
/* ----------------------------------------------------------------------
free memory that depends on # of K-vectors and order
------------------------------------------------------------------------- */
void PPPMDispOMP::deallocate()
{
PPPMDisp::deallocate();
#if defined(_OPENMP)
#pragma omp parallel default(none)
#endif
{
#if defined(_OPENMP)
const int tid = omp_get_thread_num();
#else
const int tid = 0;
#endif
if (function[0]) {
ThrData * thr = fix->get_thr(tid);
thr->init_pppm(-order,memory);
}
if (function[1] + function[2]) {
ThrData * thr = fix->get_thr(tid);
thr->init_pppm_disp(-order_6,memory);
}
}
}
/* ----------------------------------------------------------------------
Compute the modified (hockney-eastwood) coulomb green function
------------------------------------------------------------------------- */
void PPPMDispOMP::compute_gf()
{
#if defined(_OPENMP)
#pragma omp parallel default(none)
#endif
{
double *prd;
if (triclinic == 0) prd = domain->prd;
else prd = domain->prd_lamda;
double xprd = prd[0];
double yprd = prd[1];
double zprd = prd[2];
double zprd_slab = zprd*slab_volfactor;
double unitkx = (2.0*MY_PI/xprd);
double unitky = (2.0*MY_PI/yprd);
double unitkz = (2.0*MY_PI/zprd_slab);
int tid,nn,nnfrom,nnto,k,l,m;
int kper,lper,mper;
double snx,sny,snz,snx2,sny2,snz2;
double sqk;
double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz;
double numerator,denominator;
const int nnx = nxhi_fft-nxlo_fft+1;
const int nny = nyhi_fft-nylo_fft+1;
loop_setup_thr(nnfrom, nnto, tid, nfft, comm->nthreads);
ThrData *thr = fix->get_thr(tid);
thr->timer(Timer::START);
for (m = nzlo_fft; m <= nzhi_fft; m++) {
mper = m - nz_pppm*(2*m/nz_pppm);
qz = unitkz*mper;
snz = sin(0.5*qz*zprd_slab/nz_pppm);
snz2 = snz*snz;
sz = exp(-0.25*pow(qz/g_ewald,2.0));
wz = 1.0;
argz = 0.5*qz*zprd_slab/nz_pppm;
if (argz != 0.0) wz = pow(sin(argz)/argz,order);
wz *= wz;
for (l = nylo_fft; l <= nyhi_fft; l++) {
lper = l - ny_pppm*(2*l/ny_pppm);
qy = unitky*lper;
sny = sin(0.5*qy*yprd/ny_pppm);
sny2 = sny*sny;
sy = exp(-0.25*pow(qy/g_ewald,2.0));
wy = 1.0;
argy = 0.5*qy*yprd/ny_pppm;
if (argy != 0.0) wy = pow(sin(argy)/argy,order);
wy *= wy;
for (k = nxlo_fft; k <= nxhi_fft; k++) {
/* only compute the part designated to this thread */
nn = k-nxlo_fft + nnx*(l-nylo_fft + nny*(m-nzlo_fft));
if ((nn < nnfrom) || (nn >=nnto)) continue;
kper = k - nx_pppm*(2*k/nx_pppm);
qx = unitkx*kper;
snx = sin(0.5*qx*xprd/nx_pppm);
snx2 = snx*snx;
sx = exp(-0.25*pow(qx/g_ewald,2.0));
wx = 1.0;
argx = 0.5*qx*xprd/nx_pppm;
if (argx != 0.0) wx = pow(sin(argx)/argx,order);
wx *= wx;
sqk = pow(qx,2.0) + pow(qy,2.0) + pow(qz,2.0);
if (sqk != 0.0) {
numerator = 4.0*MY_PI/sqk;
denominator = gf_denom(snx2,sny2,snz2, gf_b, order);
greensfn[nn] = numerator*sx*sy*sz*wx*wy*wz/denominator;
} else greensfn[nn] = 0.0;
}
}
}
thr->timer(Timer::KSPACE);
} // end of parallel region
}
/* ----------------------------------------------------------------------
Compyute the modified (hockney-eastwood) dispersion green function
------------------------------------------------------------------------- */
void PPPMDispOMP::compute_gf_6()
{
#if defined(_OPENMP)
#pragma omp parallel default(none)
#endif
{
double *prd;
int k,l,m,nn;
// volume-dependent factors
// adjust z dimension for 2d slab PPPM
// z dimension for 3d PPPM is zprd since slab_volfactor = 1.0
if (triclinic == 0) prd = domain->prd;
else prd = domain->prd_lamda;
double xprd = prd[0];
double yprd = prd[1];
double zprd = prd[2];
double zprd_slab = zprd*slab_volfactor;
double unitkx = (2.0*MY_PI/xprd);
double unitky = (2.0*MY_PI/yprd);
double unitkz = (2.0*MY_PI/zprd_slab);
int kper,lper,mper;
double sqk;
double snx,sny,snz,snx2,sny2,snz2;
double argx,argy,argz,wx,wy,wz,sx,sy,sz;
double qx,qy,qz;
double rtsqk, term;
double numerator,denominator;
double inv2ew = 2*g_ewald_6;
inv2ew = 1/inv2ew;
double rtpi = sqrt(MY_PI);
int nnfrom, nnto, tid;
numerator = -MY_PI*rtpi*g_ewald_6*g_ewald_6*g_ewald_6/(3.0);
const int nnx = nxhi_fft_6-nxlo_fft_6+1;
const int nny = nyhi_fft_6-nylo_fft_6+1;
loop_setup_thr(nnfrom, nnto, tid, nfft_6, comm->nthreads);
ThrData *thr = fix->get_thr(tid);
thr->timer(Timer::START);
for (m = nzlo_fft_6; m <= nzhi_fft_6; m++) {
mper = m - nz_pppm_6*(2*m/nz_pppm_6);
qz = unitkz*mper;
snz = sin(0.5*unitkz*mper*zprd_slab/nz_pppm_6);
snz2 = snz*snz;
sz = exp(-qz*qz*inv2ew*inv2ew);
wz = 1.0;
argz = 0.5*qz*zprd_slab/nz_pppm_6;
if (argz != 0.0) wz = pow(sin(argz)/argz,order_6);
wz *= wz;
for (l = nylo_fft_6; l <= nyhi_fft_6; l++) {
lper = l - ny_pppm_6*(2*l/ny_pppm_6);
qy = unitky*lper;
sny = sin(0.5*unitky*lper*yprd/ny_pppm_6);
sny2 = sny*sny;
sy = exp(-qy*qy*inv2ew*inv2ew);
wy = 1.0;
argy = 0.5*qy*yprd/ny_pppm_6;
if (argy != 0.0) wy = pow(sin(argy)/argy,order_6);
wy *= wy;
for (k = nxlo_fft_6; k <= nxhi_fft_6; k++) {
/* only compute the part designated to this thread */
nn = k-nxlo_fft_6 + nnx*(l-nylo_fft_6 + nny*(m-nzlo_fft_6));
if ((nn < nnfrom) || (nn >=nnto)) continue;
kper = k - nx_pppm_6*(2*k/nx_pppm_6);
qx = unitkx*kper;
snx = sin(0.5*unitkx*kper*xprd/nx_pppm_6);
snx2 = snx*snx;
sx = exp(-qx*qx*inv2ew*inv2ew);
wx = 1.0;
argx = 0.5*qx*xprd/nx_pppm_6;
if (argx != 0.0) wx = pow(sin(argx)/argx,order_6);
wx *= wx;
sqk = pow(qx,2.0) + pow(qy,2.0) + pow(qz,2.0);
if (sqk != 0.0) {
denominator = gf_denom(snx2,sny2,snz2, gf_b_6, order_6);
rtsqk = sqrt(sqk);
term = (1-2*sqk*inv2ew*inv2ew)*sx*sy*sz +
2*sqk*rtsqk*inv2ew*inv2ew*inv2ew*rtpi*erfc(rtsqk*inv2ew);
greensfn_6[nn] = numerator*term*wx*wy*wz/denominator;
} else greensfn_6[nn] = 0.0;
}
}
}
thr->timer(Timer::KSPACE);
} // end of parallel region
}
/* ----------------------------------------------------------------------
run the regular toplevel compute method from plain PPPPM
which will have individual methods replaced by our threaded
versions and then call the obligatory force reduction.
------------------------------------------------------------------------- */
void PPPMDispOMP::compute(int eflag, int vflag)
{
PPPMDisp::compute(eflag,vflag);
#if defined(_OPENMP)
#pragma omp parallel default(none) shared(eflag,vflag)
#endif
{
#if defined(_OPENMP)
const int tid = omp_get_thread_num();
#else
const int tid = 0;
#endif
ThrData *thr = fix->get_thr(tid);
thr->timer(Timer::START);
reduce_thr(this, eflag, vflag, thr);
} // end of omp parallel region
}
/* ----------------------------------------------------------------------
find center grid pt for each of my particles
check that full stencil for the particle will fit in my 3d brick
store central grid pt indices in part2grid array
------------------------------------------------------------------------- */
void PPPMDispOMP::particle_map(double dxinv, double dyinv,
double dzinv, double sft,
int ** part2grid, int nup,
int nlw, int nxlo_o,
int nylo_o, int nzlo_o,
int nxhi_o, int nyhi_o,
int nzhi_o)
{
const dbl3_t * _noalias const x = (dbl3_t *) atom->x[0];
int3_t * _noalias const p2g = (int3_t *) part2grid[0];
const double boxlox = boxlo[0];
const double boxloy = boxlo[1];
const double boxloz = boxlo[2];
const int nlocal = atom->nlocal;
const double delxinv = dxinv;
const double delyinv = dyinv;
const double delzinv = dzinv;
const double shift = sft;
const int nupper = nup;
const int nlower = nlw;
const int nxlo_out = nxlo_o;
const int nylo_out = nylo_o;
const int nzlo_out = nzlo_o;
const int nxhi_out = nxhi_o;
const int nyhi_out = nyhi_o;
const int nzhi_out = nzhi_o;
- if (!isfinite(boxlo[0]) || !isfinite(boxlo[1]) || !isfinite(boxlo[2]))
+ if (!ISFINITE(boxlo[0]) || !ISFINITE(boxlo[1]) || !ISFINITE(boxlo[2]))
error->one(FLERR,"Non-numeric box dimensions. Simulation unstable.");
int i, flag = 0;
#if defined(_OPENMP)
#pragma omp parallel for private(i) default(none) reduction(+:flag) schedule(static)
#endif
for (i = 0; i < nlocal; i++) {
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
// current particle coord can be outside global and local box
// add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1
const int nx = static_cast<int> ((x[i].x-boxlox)*delxinv+shift) - OFFSET;
const int ny = static_cast<int> ((x[i].y-boxloy)*delyinv+shift) - OFFSET;
const int nz = static_cast<int> ((x[i].z-boxloz)*delzinv+shift) - OFFSET;
p2g[i].a = nx;
p2g[i].b = ny;
p2g[i].t = nz;
// check that entire stencil around nx,ny,nz will fit in my 3d brick
if (nx+nlower < nxlo_out || nx+nupper > nxhi_out ||
ny+nlower < nylo_out || ny+nupper > nyhi_out ||
nz+nlower < nzlo_out || nz+nupper > nzhi_out)
flag++;
}
int flag_all;
MPI_Allreduce(&flag,&flag_all,1,MPI_INT,MPI_SUM,world);
if (flag_all) error->all(FLERR,"Out of range atoms - cannot compute PPPM");
}
/* ----------------------------------------------------------------------
create discretized "density" on section of global grid due to my particles
density(x,y,z) = charge "density" at grid points of my 3d brick
(nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts)
in global grid
------------------------------------------------------------------------- */
void PPPMDispOMP::make_rho_c()
{
// clear 3d density array
FFT_SCALAR * _noalias const d = &(density_brick[nzlo_out][nylo_out][nxlo_out]);
memset(d,0,ngrid*sizeof(FFT_SCALAR));
// no local atoms => nothing else to do
const int nlocal = atom->nlocal;
if (nlocal == 0) return;
const int ix = nxhi_out - nxlo_out + 1;
const int iy = nyhi_out - nylo_out + 1;
#if defined(_OPENMP)
#pragma omp parallel default(none)
#endif
{
const double * _noalias const q = atom->q;
const dbl3_t * _noalias const x = (dbl3_t *) atom->x[0];
const int3_t * _noalias const p2g = (int3_t *) part2grid[0];
const double boxlox = boxlo[0];
const double boxloy = boxlo[1];
const double boxloz = boxlo[2];
// determine range of grid points handled by this thread
int i,jfrom,jto,tid;
loop_setup_thr(jfrom,jto,tid,ngrid,comm->nthreads);
// get per thread data
ThrData *thr = fix->get_thr(tid);
thr->timer(Timer::START);
FFT_SCALAR * const * const r1d = static_cast<FFT_SCALAR **>(thr->get_rho1d());
// loop over my charges, add their contribution to nearby grid points
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
// (dx,dy,dz) = distance to "lower left" grid pt
// loop over all local atoms for all threads
for (i = 0; i < nlocal; i++) {
const int nx = p2g[i].a;
const int ny = p2g[i].b;
const int nz = p2g[i].t;
// pre-screen whether this atom will ever come within
// reach of the data segement this thread is updating.
if ( ((nz+nlower-nzlo_out)*ix*iy >= jto)
|| ((nz+nupper-nzlo_out+1)*ix*iy < jfrom) ) continue;
const FFT_SCALAR dx = nx+shiftone - (x[i].x-boxlox)*delxinv;
const FFT_SCALAR dy = ny+shiftone - (x[i].y-boxloy)*delyinv;
const FFT_SCALAR dz = nz+shiftone - (x[i].z-boxloz)*delzinv;
compute_rho1d_thr(r1d,dx,dy,dz,order,rho_coeff);
const FFT_SCALAR z0 = delvolinv * q[i];
for (int n = nlower; n <= nupper; ++n) {
const int jn = (nz+n-nzlo_out)*ix*iy;
const FFT_SCALAR y0 = z0*r1d[2][n];
for (int m = nlower; m <= nupper; ++m) {
const int jm = jn+(ny+m-nylo_out)*ix;
const FFT_SCALAR x0 = y0*r1d[1][m];
for (int l = nlower; l <= nupper; ++l) {
const int jl = jm+nx+l-nxlo_out;
// make sure each thread only updates
// "his" elements of the density grid
if (jl >= jto) break;
if (jl < jfrom) continue;
d[jl] += x0*r1d[0][l];
}
}
}
}
thr->timer(Timer::KSPACE);
} // end of parallel region
}
/* ----------------------------------------------------------------------
same as above for dispersion interaction with geometric mixing rule
------------------------------------------------------------------------- */
void PPPMDispOMP::make_rho_g()
{
// clear 3d density array
FFT_SCALAR * _noalias const d = &(density_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]);
memset(d,0,ngrid_6*sizeof(FFT_SCALAR));
// no local atoms => nothing else to do
const int nlocal = atom->nlocal;
if (nlocal == 0) return;
const int ix = nxhi_out_6 - nxlo_out_6 + 1;
const int iy = nyhi_out_6 - nylo_out_6 + 1;
#if defined(_OPENMP)
#pragma omp parallel default(none)
#endif
{
const dbl3_t * _noalias const x = (dbl3_t *) atom->x[0];
const int3_t * _noalias const p2g = (int3_t *) part2grid_6[0];
const double boxlox = boxlo[0];
const double boxloy = boxlo[1];
const double boxloz = boxlo[2];
// determine range of grid points handled by this thread
int i,jfrom,jto,tid;
loop_setup_thr(jfrom,jto,tid,ngrid_6,comm->nthreads);
// get per thread data
ThrData *thr = fix->get_thr(tid);
thr->timer(Timer::START);
FFT_SCALAR * const * const r1d = static_cast<FFT_SCALAR **>(thr->get_rho1d_6());
// loop over my charges, add their contribution to nearby grid points
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
// (dx,dy,dz) = distance to "lower left" grid pt
// loop over all local atoms for all threads
for (i = 0; i < nlocal; i++) {
const int nx = p2g[i].a;
const int ny = p2g[i].b;
const int nz = p2g[i].t;
// pre-screen whether this atom will ever come within
// reach of the data segement this thread is updating.
if ( ((nz+nlower_6-nzlo_out_6)*ix*iy >= jto)
|| ((nz+nupper_6-nzlo_out_6+1)*ix*iy < jfrom) ) continue;
const FFT_SCALAR dx = nx+shiftone_6 - (x[i].x-boxlox)*delxinv_6;
const FFT_SCALAR dy = ny+shiftone_6 - (x[i].y-boxloy)*delyinv_6;
const FFT_SCALAR dz = nz+shiftone_6 - (x[i].z-boxloz)*delzinv_6;
compute_rho1d_thr(r1d,dx,dy,dz,order_6,rho_coeff_6);
const int type = atom->type[i];
const double lj = B[type];
const FFT_SCALAR z0 = delvolinv_6 * lj;
for (int n = nlower_6; n <= nupper_6; ++n) {
const int jn = (nz+n-nzlo_out_6)*ix*iy;
const FFT_SCALAR y0 = z0*r1d[2][n];
for (int m = nlower_6; m <= nupper_6; ++m) {
const int jm = jn+(ny+m-nylo_out_6)*ix;
const FFT_SCALAR x0 = y0*r1d[1][m];
for (int l = nlower_6; l <= nupper_6; ++l) {
const int jl = jm+nx+l-nxlo_out_6;
// make sure each thread only updates
// "his" elements of the density grid
if (jl >= jto) break;
if (jl < jfrom) continue;
d[jl] += x0*r1d[0][l];
}
}
}
}
thr->timer(Timer::KSPACE);
} // end of parallel region
}
/* ----------------------------------------------------------------------
same as above for dispersion interaction with arithmetic mixing rule
------------------------------------------------------------------------- */
void PPPMDispOMP::make_rho_a()
{
// clear 3d density array
FFT_SCALAR * _noalias const d0 = &(density_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]);
FFT_SCALAR * _noalias const d1 = &(density_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]);
FFT_SCALAR * _noalias const d2 = &(density_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]);
FFT_SCALAR * _noalias const d3 = &(density_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]);
FFT_SCALAR * _noalias const d4 = &(density_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]);
FFT_SCALAR * _noalias const d5 = &(density_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]);
FFT_SCALAR * _noalias const d6 = &(density_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]);
memset(d0,0,ngrid_6*sizeof(FFT_SCALAR));
memset(d1,0,ngrid_6*sizeof(FFT_SCALAR));
memset(d2,0,ngrid_6*sizeof(FFT_SCALAR));
memset(d3,0,ngrid_6*sizeof(FFT_SCALAR));
memset(d4,0,ngrid_6*sizeof(FFT_SCALAR));
memset(d5,0,ngrid_6*sizeof(FFT_SCALAR));
memset(d6,0,ngrid_6*sizeof(FFT_SCALAR));
// no local atoms => nothing else to do
const int nlocal = atom->nlocal;
if (nlocal == 0) return;
const int ix = nxhi_out_6 - nxlo_out_6 + 1;
const int iy = nyhi_out_6 - nylo_out_6 + 1;
#if defined(_OPENMP)
#pragma omp parallel default(none)
#endif
{
const dbl3_t * _noalias const x = (dbl3_t *) atom->x[0];
const int3_t * _noalias const p2g = (int3_t *) part2grid_6[0];
const double boxlox = boxlo[0];
const double boxloy = boxlo[1];
const double boxloz = boxlo[2];
// determine range of grid points handled by this thread
int i,jfrom,jto,tid;
loop_setup_thr(jfrom,jto,tid,ngrid_6,comm->nthreads);
// get per thread data
ThrData *thr = fix->get_thr(tid);
thr->timer(Timer::START);
FFT_SCALAR * const * const r1d = static_cast<FFT_SCALAR **>(thr->get_rho1d_6());
// loop over my charges, add their contribution to nearby grid points
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
// (dx,dy,dz) = distance to "lower left" grid pt
// loop over all local atoms for all threads
for (i = 0; i < nlocal; i++) {
const int nx = p2g[i].a;
const int ny = p2g[i].b;
const int nz = p2g[i].t;
// pre-screen whether this atom will ever come within
// reach of the data segement this thread is updating.
if ( ((nz+nlower_6-nzlo_out_6)*ix*iy >= jto)
|| ((nz+nupper_6-nzlo_out_6+1)*ix*iy < jfrom) ) continue;
const FFT_SCALAR dx = nx+shiftone_6 - (x[i].x-boxlox)*delxinv_6;
const FFT_SCALAR dy = ny+shiftone_6 - (x[i].y-boxloy)*delyinv_6;
const FFT_SCALAR dz = nz+shiftone_6 - (x[i].z-boxloz)*delzinv_6;
compute_rho1d_thr(r1d,dx,dy,dz,order_6,rho_coeff_6);
const int type = atom->type[i];
const double lj0 = B[7*type];
const double lj1 = B[7*type+1];
const double lj2 = B[7*type+2];
const double lj3 = B[7*type+3];
const double lj4 = B[7*type+4];
const double lj5 = B[7*type+5];
const double lj6 = B[7*type+6];
const FFT_SCALAR z0 = delvolinv_6;
for (int n = nlower_6; n <= nupper_6; ++n) {
const int jn = (nz+n-nzlo_out_6)*ix*iy;
const FFT_SCALAR y0 = z0*r1d[2][n];
for (int m = nlower_6; m <= nupper_6; ++m) {
const int jm = jn+(ny+m-nylo_out_6)*ix;
const FFT_SCALAR x0 = y0*r1d[1][m];
for (int l = nlower_6; l <= nupper_6; ++l) {
const int jl = jm+nx+l-nxlo_out_6;
// make sure each thread only updates
// "his" elements of the density grid
if (jl >= jto) break;
if (jl < jfrom) continue;
const double w = x0*r1d[0][l];
d0[jl] += w*lj0;
d1[jl] += w*lj1;
d2[jl] += w*lj2;
d3[jl] += w*lj3;
d4[jl] += w*lj4;
d5[jl] += w*lj5;
d6[jl] += w*lj6;
}
}
}
}
thr->timer(Timer::KSPACE);
} // end of parallel region
}
/* ----------------------------------------------------------------------
interpolate from grid to get electric field & force on my particles
for ik scheme
------------------------------------------------------------------------- */
void PPPMDispOMP::fieldforce_c_ik()
{
const int nlocal = atom->nlocal;
// no local atoms => nothing to do
if (nlocal == 0) return;
// loop over my charges, interpolate electric field from nearby grid points
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
// (dx,dy,dz) = distance to "lower left" grid pt
// (mx,my,mz) = global coords of moving stencil pt
// ek = 3 components of E-field on particle
const double * const q = atom->q;
const double * const * const x = atom->x;
const double qqrd2e = force->qqrd2e;
#if defined(_OPENMP)
const int nthreads = comm->nthreads;
#pragma omp parallel default(none)
#endif
{
#if defined(_OPENMP)
// each thread works on a fixed chunk of atoms.
const int tid = omp_get_thread_num();
const int inum = nlocal;
const int idelta = 1 + inum/nthreads;
const int ifrom = tid*idelta;
const int ito = ((ifrom + idelta) > inum) ? inum : ifrom + idelta;
#else
const int ifrom = 0;
const int ito = nlocal;
const int tid = 0;
#endif
ThrData *thr = fix->get_thr(tid);
thr->timer(Timer::START);
double * const * const f = thr->get_f();
FFT_SCALAR * const * const r1d = static_cast<FFT_SCALAR **>(thr->get_rho1d());
int l,m,n,nx,ny,nz,mx,my,mz;
FFT_SCALAR dx,dy,dz,x0,y0,z0;
FFT_SCALAR ekx,eky,ekz;
// this if protects against having more threads than local atoms
if (ifrom < nlocal) {
for (int i = ifrom; i < ito; i++) {
nx = part2grid[i][0];
ny = part2grid[i][1];
nz = part2grid[i][2];
dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv;
dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv;
dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv;
compute_rho1d_thr(r1d,dx,dy,dz, order, rho_coeff);
ekx = eky = ekz = ZEROF;
for (n = nlower; n <= nupper; n++) {
mz = n+nz;
z0 = r1d[2][n];
for (m = nlower; m <= nupper; m++) {
my = m+ny;
y0 = z0*r1d[1][m];
for (l = nlower; l <= nupper; l++) {
mx = l+nx;
x0 = y0*r1d[0][l];
ekx -= x0*vdx_brick[mz][my][mx];
eky -= x0*vdy_brick[mz][my][mx];
ekz -= x0*vdz_brick[mz][my][mx];
}
}
}
// convert E-field to force
const double qfactor = qqrd2e*scale*q[i];
f[i][0] += qfactor*ekx;
f[i][1] += qfactor*eky;
f[i][2] += qfactor*ekz;
}
}
thr->timer(Timer::KSPACE);
} // end of parallel region
}
/* ----------------------------------------------------------------------
interpolate from grid to get electric field & force on my particles
for ad scheme
------------------------------------------------------------------------- */
void PPPMDispOMP::fieldforce_c_ad()
{
const int nlocal = atom->nlocal;
// no local atoms => nothing to do
if (nlocal == 0) return;
// loop over my charges, interpolate electric field from nearby grid points
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
// (dx,dy,dz) = distance to "lower left" grid pt
// (mx,my,mz) = global coords of moving stencil pt
// ek = 3 components of E-field on particle
const double * const q = atom->q;
const double * const * const x = atom->x;
const double qqrd2e = force->qqrd2e;
//const double * const sf_c = sf_coeff;
double *prd;
if (triclinic == 0) prd = domain->prd;
else prd = domain->prd_lamda;
double xprd = prd[0];
double yprd = prd[1];
double zprd = prd[2];
double zprd_slab = zprd*slab_volfactor;
const double hx_inv = nx_pppm/xprd;
const double hy_inv = ny_pppm/yprd;
const double hz_inv = nz_pppm/zprd_slab;
#if defined(_OPENMP)
const int nthreads = comm->nthreads;
#pragma omp parallel default(none)
#endif
{
#if defined(_OPENMP)
// each thread works on a fixed chunk of atoms.
const int tid = omp_get_thread_num();
const int inum = nlocal;
const int idelta = 1 + inum/nthreads;
const int ifrom = tid*idelta;
const int ito = ((ifrom + idelta) > inum) ? inum : ifrom + idelta;
#else
const int ifrom = 0;
const int ito = nlocal;
const int tid = 0;
#endif
ThrData *thr = fix->get_thr(tid);
thr->timer(Timer::START);
double * const * const f = thr->get_f();
FFT_SCALAR * const * const r1d = static_cast<FFT_SCALAR **>(thr->get_rho1d());
FFT_SCALAR * const * const dr1d = static_cast<FFT_SCALAR **>(thr->get_drho1d());
int l,m,n,nx,ny,nz,mx,my,mz;
FFT_SCALAR dx,dy,dz;
FFT_SCALAR ekx,eky,ekz;
double sf = 0.0;
double s1,s2,s3;
// this if protects against having more threads than local atoms
if (ifrom < nlocal) {
for (int i = ifrom; i < ito; i++) {
nx = part2grid[i][0];
ny = part2grid[i][1];
nz = part2grid[i][2];
dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv;
dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv;
dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv;
compute_rho1d_thr(r1d,dx,dy,dz, order, rho_coeff);
compute_drho1d_thr(dr1d,dx,dy,dz, order, drho_coeff);
ekx = eky = ekz = ZEROF;
for (n = nlower; n <= nupper; n++) {
mz = n+nz;
for (m = nlower; m <= nupper; m++) {
my = m+ny;
for (l = nlower; l <= nupper; l++) {
mx = l+nx;
ekx += dr1d[0][l]*r1d[1][m]*r1d[2][n]*u_brick[mz][my][mx];
eky += r1d[0][l]*dr1d[1][m]*r1d[2][n]*u_brick[mz][my][mx];
ekz += r1d[0][l]*r1d[1][m]*dr1d[2][n]*u_brick[mz][my][mx];
}
}
}
ekx *= hx_inv;
eky *= hy_inv;
ekz *= hz_inv;
// convert E-field to force
const double qfactor = qqrd2e*scale;
s1 = x[i][0]*hx_inv;
s2 = x[i][1]*hy_inv;
s3 = x[i][2]*hz_inv;
sf = sf_coeff[0]*sin(2*MY_PI*s1);
sf += sf_coeff[1]*sin(4*MY_PI*s1);
sf *= 2*q[i]*q[i];
f[i][0] += qfactor*(ekx*q[i] - sf);
sf = sf_coeff[2]*sin(2*MY_PI*s2);
sf += sf_coeff[3]*sin(4*MY_PI*s2);
sf *= 2*q[i]*q[i];
f[i][1] += qfactor*(eky*q[i] - sf);
sf = sf_coeff[4]*sin(2*MY_PI*s3);
sf += sf_coeff[5]*sin(4*MY_PI*s3);
sf *= 2*q[i]*q[i];
if (slabflag != 2) f[i][2] += qfactor*(ekz*q[i] - sf);
}
}
thr->timer(Timer::KSPACE);
} // end of parallel region
}
/* ----------------------------------------------------------------------
interpolate from grid to get per-atom energy/virial
------------------------------------------------------------------------- */
void PPPMDispOMP::fieldforce_c_peratom()
{
const int nlocal = atom->nlocal;
// no local atoms => nothing to do
if (nlocal == 0) return;
// loop over my charges, interpolate from nearby grid points
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
// (dx,dy,dz) = distance to "lower left" grid pt
// (mx,my,mz) = global coords of moving stencil pt
const double * const q = atom->q;
const double * const * const x = atom->x;
#if defined(_OPENMP)
const int nthreads = comm->nthreads;
#pragma omp parallel default(none)
#endif
{
#if defined(_OPENMP)
// each thread works on a fixed chunk of atoms.
const int tid = omp_get_thread_num();
const int inum = nlocal;
const int idelta = 1 + inum/nthreads;
const int ifrom = tid*idelta;
const int ito = ((ifrom + idelta) > inum) ? inum : ifrom + idelta;
#else
const int ifrom = 0;
const int ito = nlocal;
const int tid = 0;
#endif
ThrData *thr = fix->get_thr(tid);
thr->timer(Timer::START);
FFT_SCALAR * const * const r1d = static_cast<FFT_SCALAR **>(thr->get_rho1d());
int l,m,n,nx,ny,nz,mx,my,mz;
FFT_SCALAR dx,dy,dz,x0,y0,z0;
FFT_SCALAR u,v0,v1,v2,v3,v4,v5;
// this if protects against having more threads than local atoms
if (ifrom < nlocal) {
for (int i = ifrom; i < ito; i++) {
nx = part2grid[i][0];
ny = part2grid[i][1];
nz = part2grid[i][2];
dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv;
dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv;
dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv;
compute_rho1d_thr(r1d,dx,dy,dz, order, rho_coeff);
u = v0 = v1 = v2 = v3 = v4 = v5 = ZEROF;
for (n = nlower; n <= nupper; n++) {
mz = n+nz;
z0 = r1d[2][n];
for (m = nlower; m <= nupper; m++) {
my = m+ny;
y0 = z0*r1d[1][m];
for (l = nlower; l <= nupper; l++) {
mx = l+nx;
x0 = y0*r1d[0][l];
if (eflag_atom) u += x0*u_brick[mz][my][mx];
if (vflag_atom) {
v0 += x0*v0_brick[mz][my][mx];
v1 += x0*v1_brick[mz][my][mx];
v2 += x0*v2_brick[mz][my][mx];
v3 += x0*v3_brick[mz][my][mx];
v4 += x0*v4_brick[mz][my][mx];
v5 += x0*v5_brick[mz][my][mx];
}
}
}
}
const double qfactor = 0.5*force->qqrd2e * scale * q[i];
if (eflag_atom) eatom[i] += u*qfactor;
if (vflag_atom) {
vatom[i][0] += v0*qfactor;
vatom[i][1] += v1*qfactor;
vatom[i][2] += v2*qfactor;
vatom[i][3] += v3*qfactor;
vatom[i][4] += v4*qfactor;
vatom[i][5] += v5*qfactor;
}
}
}
thr->timer(Timer::KSPACE);
} // end of parallel region
}
/* ----------------------------------------------------------------------
interpolate from grid to get dispersion field & force on my particles
for ik scheme and geometric mixing rule
------------------------------------------------------------------------- */
void PPPMDispOMP::fieldforce_g_ik()
{
const int nlocal = atom->nlocal;
// no local atoms => nothing to do
if (nlocal == 0) return;
// loop over my charges, interpolate electric field from nearby grid points
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
// (dx,dy,dz) = distance to "lower left" grid pt
// (mx,my,mz) = global coords of moving stencil pt
// ek = 3 components of E-field on particle
const double * const * const x = atom->x;
#if defined(_OPENMP)
const int nthreads = comm->nthreads;
#pragma omp parallel default(none)
#endif
{
#if defined(_OPENMP)
// each thread works on a fixed chunk of atoms.
const int tid = omp_get_thread_num();
const int inum = nlocal;
const int idelta = 1 + inum/nthreads;
const int ifrom = tid*idelta;
const int ito = ((ifrom + idelta) > inum) ? inum : ifrom + idelta;
#else
const int ifrom = 0;
const int ito = nlocal;
const int tid = 0;
#endif
ThrData *thr = fix->get_thr(tid);
thr->timer(Timer::START);
double * const * const f = thr->get_f();
FFT_SCALAR * const * const r1d = static_cast<FFT_SCALAR **>(thr->get_rho1d_6());
int l,m,n,nx,ny,nz,mx,my,mz;
FFT_SCALAR dx,dy,dz,x0,y0,z0;
FFT_SCALAR ekx,eky,ekz;
int type;
double lj;
// this if protects against having more threads than local atoms
if (ifrom < nlocal) {
for (int i = ifrom; i < ito; i++) {
nx = part2grid_6[i][0];
ny = part2grid_6[i][1];
nz = part2grid_6[i][2];
dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6;
dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6;
dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6;
compute_rho1d_thr(r1d,dx,dy,dz, order_6, rho_coeff_6);
ekx = eky = ekz = ZEROF;
for (n = nlower_6; n <= nupper_6; n++) {
mz = n+nz;
z0 = r1d[2][n];
for (m = nlower_6; m <= nupper_6; m++) {
my = m+ny;
y0 = z0*r1d[1][m];
for (l = nlower_6; l <= nupper_6; l++) {
mx = l+nx;
x0 = y0*r1d[0][l];
ekx -= x0*vdx_brick_g[mz][my][mx];
eky -= x0*vdy_brick_g[mz][my][mx];
ekz -= x0*vdz_brick_g[mz][my][mx];
}
}
}
// convert E-field to force
type = atom->type[i];
lj = B[type];
f[i][0] += lj*ekx;
f[i][1] += lj*eky;
f[i][2] += lj*ekz;
}
}
thr->timer(Timer::KSPACE);
} // end of parallel region
}
/* ----------------------------------------------------------------------
interpolate from grid to get dispersion field & force on my particles
for ad scheme and geometric mixing rule
------------------------------------------------------------------------- */
void PPPMDispOMP::fieldforce_g_ad()
{
const int nlocal = atom->nlocal;
// no local atoms => nothing to do
if (nlocal == 0) return;
// loop over my charges, interpolate electric field from nearby grid points
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
// (dx,dy,dz) = distance to "lower left" grid pt
// (mx,my,mz) = global coords of moving stencil pt
// ek = 3 components of E-field on particle
const double * const * const x = atom->x;
double *prd;
if (triclinic == 0) prd = domain->prd;
else prd = domain->prd_lamda;
double xprd = prd[0];
double yprd = prd[1];
double zprd = prd[2];
double zprd_slab = zprd*slab_volfactor;
const double hx_inv = nx_pppm_6/xprd;
const double hy_inv = ny_pppm_6/yprd;
const double hz_inv = nz_pppm_6/zprd_slab;
#if defined(_OPENMP)
const int nthreads = comm->nthreads;
#pragma omp parallel default(none)
#endif
{
#if defined(_OPENMP)
// each thread works on a fixed chunk of atoms.
const int tid = omp_get_thread_num();
const int inum = nlocal;
const int idelta = 1 + inum/nthreads;
const int ifrom = tid*idelta;
const int ito = ((ifrom + idelta) > inum) ? inum : ifrom + idelta;
#else
const int ifrom = 0;
const int ito = nlocal;
const int tid = 0;
#endif
ThrData *thr = fix->get_thr(tid);
thr->timer(Timer::START);
double * const * const f = thr->get_f();
FFT_SCALAR * const * const r1d = static_cast<FFT_SCALAR **>(thr->get_rho1d_6());
FFT_SCALAR * const * const dr1d = static_cast<FFT_SCALAR **>(thr->get_drho1d_6());
int l,m,n,nx,ny,nz,mx,my,mz;
FFT_SCALAR dx,dy,dz;
FFT_SCALAR ekx,eky,ekz;
int type;
double lj;
double sf = 0.0;
double s1,s2,s3;
// this if protects against having more threads than local atoms
if (ifrom < nlocal) {
for (int i = ifrom; i < ito; i++) {
nx = part2grid_6[i][0];
ny = part2grid_6[i][1];
nz = part2grid_6[i][2];
dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6;
dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6;
dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6;
compute_rho1d_thr(r1d,dx,dy,dz, order_6, rho_coeff_6);
compute_drho1d_thr(dr1d,dx,dy,dz, order_6, drho_coeff_6);
ekx = eky = ekz = ZEROF;
for (n = nlower_6; n <= nupper_6; n++) {
mz = n+nz;
for (m = nlower_6; m <= nupper_6; m++) {
my = m+ny;
for (l = nlower_6; l <= nupper_6; l++) {
mx = l+nx;
ekx += dr1d[0][l]*r1d[1][m]*r1d[2][n]*u_brick_g[mz][my][mx];
eky += r1d[0][l]*dr1d[1][m]*r1d[2][n]*u_brick_g[mz][my][mx];
ekz += r1d[0][l]*r1d[1][m]*dr1d[2][n]*u_brick_g[mz][my][mx];
}
}
}
ekx *= hx_inv;
eky *= hy_inv;
ekz *= hz_inv;
// convert E-field to force
type = atom->type[i];
lj = B[type];
s1 = x[i][0]*hx_inv;
s2 = x[i][1]*hy_inv;
s3 = x[i][2]*hz_inv;
sf = sf_coeff_6[0]*sin(2*MY_PI*s1);
sf += sf_coeff_6[1]*sin(4*MY_PI*s1);
sf *= 2*lj*lj;
f[i][0] += ekx*lj - sf;
sf = sf_coeff_6[2]*sin(2*MY_PI*s2);
sf += sf_coeff_6[3]*sin(4*MY_PI*s2);
sf *= 2*lj*lj;
f[i][1] += eky*lj - sf;
sf = sf_coeff_6[4]*sin(2*MY_PI*s3);
sf += sf_coeff_6[5]*sin(4*MY_PI*s3);
sf *= 2*lj*lj;
if (slabflag != 2) f[i][2] += ekz*lj - sf;
}
}
thr->timer(Timer::KSPACE);
} // end of parallel region
}
/* ----------------------------------------------------------------------
interpolate from grid to get per-atom energy/virial for dispersion
interaction and geometric mixing rule
------------------------------------------------------------------------- */
void PPPMDispOMP::fieldforce_g_peratom()
{
const int nlocal = atom->nlocal;
// no local atoms => nothing to do
if (nlocal == 0) return;
// loop over my charges, interpolate from nearby grid points
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
// (dx,dy,dz) = distance to "lower left" grid pt
// (mx,my,mz) = global coords of moving stencil pt
const double * const * const x = atom->x;
#if defined(_OPENMP)
const int nthreads = comm->nthreads;
#pragma omp parallel default(none)
#endif
{
#if defined(_OPENMP)
// each thread works on a fixed chunk of atoms.
const int tid = omp_get_thread_num();
const int inum = nlocal;
const int idelta = 1 + inum/nthreads;
const int ifrom = tid*idelta;
const int ito = ((ifrom + idelta) > inum) ? inum : ifrom + idelta;
#else
const int ifrom = 0;
const int ito = nlocal;
const int tid = 0;
#endif
ThrData *thr = fix->get_thr(tid);
thr->timer(Timer::START);
FFT_SCALAR * const * const r1d = static_cast<FFT_SCALAR **>(thr->get_rho1d_6());
int l,m,n,nx,ny,nz,mx,my,mz;
FFT_SCALAR dx,dy,dz,x0,y0,z0;
FFT_SCALAR u,v0,v1,v2,v3,v4,v5;
int type;
double lj;
// this if protects against having more threads than local atoms
if (ifrom < nlocal) {
for (int i = ifrom; i < ito; i++) {
nx = part2grid_6[i][0];
ny = part2grid_6[i][1];
nz = part2grid_6[i][2];
dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6;
dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6;
dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6;
compute_rho1d_thr(r1d,dx,dy,dz, order_6, rho_coeff_6);
u = v0 = v1 = v2 = v3 = v4 = v5 = ZEROF;
for (n = nlower_6; n <= nupper_6; n++) {
mz = n+nz;
z0 = r1d[2][n];
for (m = nlower_6; m <= nupper_6; m++) {
my = m+ny;
y0 = z0*r1d[1][m];
for (l = nlower_6; l <= nupper_6; l++) {
mx = l+nx;
x0 = y0*r1d[0][l];
if (eflag_atom) u += x0*u_brick_g[mz][my][mx];
if (vflag_atom) {
v0 += x0*v0_brick_g[mz][my][mx];
v1 += x0*v1_brick_g[mz][my][mx];
v2 += x0*v2_brick_g[mz][my][mx];
v3 += x0*v3_brick_g[mz][my][mx];
v4 += x0*v4_brick_g[mz][my][mx];
v5 += x0*v5_brick_g[mz][my][mx];
}
}
}
}
type = atom->type[i];
lj = B[type]*0.5;
if (eflag_atom) eatom[i] += u*lj;
if (vflag_atom) {
vatom[i][0] += v0*lj;
vatom[i][1] += v1*lj;
vatom[i][2] += v2*lj;
vatom[i][3] += v3*lj;
vatom[i][4] += v4*lj;
vatom[i][5] += v5*lj;
}
}
}
thr->timer(Timer::KSPACE);
} // end of parallel region
}
/* ----------------------------------------------------------------------
interpolate from grid to get dispersion field & force on my particles
for ik scheme and arithmetic mixing rule
------------------------------------------------------------------------- */
void PPPMDispOMP::fieldforce_a_ik()
{
const int nlocal = atom->nlocal;
// no local atoms => nothing to do
if (nlocal == 0) return;
// loop over my charges, interpolate electric field from nearby grid points
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
// (dx,dy,dz) = distance to "lower left" grid pt
// (mx,my,mz) = global coords of moving stencil pt
// ek = 3 components of E-field on particle
const double * const * const x = atom->x;
#if defined(_OPENMP)
const int nthreads = comm->nthreads;
#pragma omp parallel default(none)
#endif
{
#if defined(_OPENMP)
// each thread works on a fixed chunk of atoms.
const int tid = omp_get_thread_num();
const int inum = nlocal;
const int idelta = 1 + inum/nthreads;
const int ifrom = tid*idelta;
const int ito = ((ifrom + idelta) > inum) ? inum : ifrom + idelta;
#else
const int ifrom = 0;
const int ito = nlocal;
const int tid = 0;
#endif
ThrData *thr = fix->get_thr(tid);
thr->timer(Timer::START);
double * const * const f = thr->get_f();
FFT_SCALAR * const * const r1d = static_cast<FFT_SCALAR **>(thr->get_rho1d_6());
int l,m,n,nx,ny,nz,mx,my,mz;
FFT_SCALAR dx,dy,dz,x0,y0,z0;
FFT_SCALAR ekx0, eky0, ekz0, ekx1, eky1, ekz1, ekx2, eky2, ekz2;
FFT_SCALAR ekx3, eky3, ekz3, ekx4, eky4, ekz4, ekx5, eky5, ekz5;
FFT_SCALAR ekx6, eky6, ekz6;
int type;
double lj0,lj1,lj2,lj3,lj4,lj5,lj6;
// this if protects against having more threads than local atoms
if (ifrom < nlocal) {
for (int i = ifrom; i < ito; i++) {
nx = part2grid_6[i][0];
ny = part2grid_6[i][1];
nz = part2grid_6[i][2];
dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6;
dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6;
dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6;
compute_rho1d_thr(r1d,dx,dy,dz, order_6, rho_coeff_6);
ekx0 = eky0 = ekz0 = ZEROF;
ekx1 = eky1 = ekz1 = ZEROF;
ekx2 = eky2 = ekz2 = ZEROF;
ekx3 = eky3 = ekz3 = ZEROF;
ekx4 = eky4 = ekz4 = ZEROF;
ekx5 = eky5 = ekz5 = ZEROF;
ekx6 = eky6 = ekz6 = ZEROF;
for (n = nlower_6; n <= nupper_6; n++) {
mz = n+nz;
z0 = r1d[2][n];
for (m = nlower_6; m <= nupper_6; m++) {
my = m+ny;
y0 = z0*r1d[1][m];
for (l = nlower_6; l <= nupper_6; l++) {
mx = l+nx;
x0 = y0*r1d[0][l];
ekx0 -= x0*vdx_brick_a0[mz][my][mx];
eky0 -= x0*vdy_brick_a0[mz][my][mx];
ekz0 -= x0*vdz_brick_a0[mz][my][mx];
ekx1 -= x0*vdx_brick_a1[mz][my][mx];
eky1 -= x0*vdy_brick_a1[mz][my][mx];
ekz1 -= x0*vdz_brick_a1[mz][my][mx];
ekx2 -= x0*vdx_brick_a2[mz][my][mx];
eky2 -= x0*vdy_brick_a2[mz][my][mx];
ekz2 -= x0*vdz_brick_a2[mz][my][mx];
ekx3 -= x0*vdx_brick_a3[mz][my][mx];
eky3 -= x0*vdy_brick_a3[mz][my][mx];
ekz3 -= x0*vdz_brick_a3[mz][my][mx];
ekx4 -= x0*vdx_brick_a4[mz][my][mx];
eky4 -= x0*vdy_brick_a4[mz][my][mx];
ekz4 -= x0*vdz_brick_a4[mz][my][mx];
ekx5 -= x0*vdx_brick_a5[mz][my][mx];
eky5 -= x0*vdy_brick_a5[mz][my][mx];
ekz5 -= x0*vdz_brick_a5[mz][my][mx];
ekx6 -= x0*vdx_brick_a6[mz][my][mx];
eky6 -= x0*vdy_brick_a6[mz][my][mx];
ekz6 -= x0*vdz_brick_a6[mz][my][mx];
}
}
}
// convert D-field to force
type = atom->type[i];
lj0 = B[7*type+6];
lj1 = B[7*type+5];
lj2 = B[7*type+4];
lj3 = B[7*type+3];
lj4 = B[7*type+2];
lj5 = B[7*type+1];
lj6 = B[7*type];
f[i][0] += lj0*ekx0 + lj1*ekx1 + lj2*ekx2 + lj3*ekx3 + lj4*ekx4 + lj5*ekx5 + lj6*ekx6;
f[i][1] += lj0*eky0 + lj1*eky1 + lj2*eky2 + lj3*eky3 + lj4*eky4 + lj5*eky5 + lj6*eky6;
f[i][2] += lj0*ekz0 + lj1*ekz1 + lj2*ekz2 + lj3*ekz3 + lj4*ekz4 + lj5*ekz5 + lj6*ekz6;
}
}
thr->timer(Timer::KSPACE);
} // end of parallel region
}
/* ----------------------------------------------------------------------
interpolate from grid to get dispersion field & force on my particles
for ad scheme and arithmetic mixing rule
------------------------------------------------------------------------- */
void PPPMDispOMP::fieldforce_a_ad()
{
const int nlocal = atom->nlocal;
// no local atoms => nothing to do
if (nlocal == 0) return;
// loop over my charges, interpolate electric field from nearby grid points
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
// (dx,dy,dz) = distance to "lower left" grid pt
// (mx,my,mz) = global coords of moving stencil pt
// ek = 3 components of E-field on particle
const double * const * const x = atom->x;
double *prd;
if (triclinic == 0) prd = domain->prd;
else prd = domain->prd_lamda;
double xprd = prd[0];
double yprd = prd[1];
double zprd = prd[2];
double zprd_slab = zprd*slab_volfactor;
const double hx_inv = nx_pppm_6/xprd;
const double hy_inv = ny_pppm_6/yprd;
const double hz_inv = nz_pppm_6/zprd_slab;
#if defined(_OPENMP)
const int nthreads = comm->nthreads;
#pragma omp parallel default(none)
#endif
{
#if defined(_OPENMP)
// each thread works on a fixed chunk of atoms.
const int tid = omp_get_thread_num();
const int inum = nlocal;
const int idelta = 1 + inum/nthreads;
const int ifrom = tid*idelta;
const int ito = ((ifrom + idelta) > inum) ? inum : ifrom + idelta;
#else
const int ifrom = 0;
const int ito = nlocal;
const int tid = 0;
#endif
ThrData *thr = fix->get_thr(tid);
thr->timer(Timer::START);
double * const * const f = thr->get_f();
FFT_SCALAR * const * const r1d = static_cast<FFT_SCALAR **>(thr->get_rho1d_6());
FFT_SCALAR * const * const dr1d = static_cast<FFT_SCALAR **>(thr->get_drho1d_6());
int l,m,n,nx,ny,nz,mx,my,mz;
FFT_SCALAR dx,dy,dz,x0,y0,z0;
FFT_SCALAR ekx0, eky0, ekz0, ekx1, eky1, ekz1, ekx2, eky2, ekz2;
FFT_SCALAR ekx3, eky3, ekz3, ekx4, eky4, ekz4, ekx5, eky5, ekz5;
FFT_SCALAR ekx6, eky6, ekz6;
int type;
double lj0,lj1,lj2,lj3,lj4,lj5,lj6;
double sf = 0.0;
double s1,s2,s3;
// this if protects against having more threads than local atoms
if (ifrom < nlocal) {
for (int i = ifrom; i < ito; i++) {
nx = part2grid_6[i][0];
ny = part2grid_6[i][1];
nz = part2grid_6[i][2];
dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6;
dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6;
dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6;
compute_rho1d_thr(r1d,dx,dy,dz, order_6, rho_coeff_6);
compute_drho1d_thr(dr1d,dx,dy,dz, order_6, drho_coeff_6);
ekx0 = eky0 = ekz0 = ZEROF;
ekx1 = eky1 = ekz1 = ZEROF;
ekx2 = eky2 = ekz2 = ZEROF;
ekx3 = eky3 = ekz3 = ZEROF;
ekx4 = eky4 = ekz4 = ZEROF;
ekx5 = eky5 = ekz5 = ZEROF;
ekx6 = eky6 = ekz6 = ZEROF;
for (n = nlower_6; n <= nupper_6; n++) {
mz = n+nz;
for (m = nlower_6; m <= nupper_6; m++) {
my = m+ny;
for (l = nlower_6; l <= nupper_6; l++) {
mx = l+nx;
x0 = dr1d[0][l]*r1d[1][m]*r1d[2][n];
y0 = r1d[0][l]*dr1d[1][m]*r1d[2][n];
z0 = r1d[0][l]*r1d[1][m]*dr1d[2][n];
ekx0 += x0*u_brick_a0[mz][my][mx];
eky0 += y0*u_brick_a0[mz][my][mx];
ekz0 += z0*u_brick_a0[mz][my][mx];
ekx1 += x0*u_brick_a1[mz][my][mx];
eky1 += y0*u_brick_a1[mz][my][mx];
ekz1 += z0*u_brick_a1[mz][my][mx];
ekx2 += x0*u_brick_a2[mz][my][mx];
eky2 += y0*u_brick_a2[mz][my][mx];
ekz2 += z0*u_brick_a2[mz][my][mx];
ekx3 += x0*u_brick_a3[mz][my][mx];
eky3 += y0*u_brick_a3[mz][my][mx];
ekz3 += z0*u_brick_a3[mz][my][mx];
ekx4 += x0*u_brick_a4[mz][my][mx];
eky4 += y0*u_brick_a4[mz][my][mx];
ekz4 += z0*u_brick_a4[mz][my][mx];
ekx5 += x0*u_brick_a5[mz][my][mx];
eky5 += y0*u_brick_a5[mz][my][mx];
ekz5 += z0*u_brick_a5[mz][my][mx];
ekx6 += x0*u_brick_a6[mz][my][mx];
eky6 += y0*u_brick_a6[mz][my][mx];
ekz6 += z0*u_brick_a6[mz][my][mx];
}
}
}
ekx0 *= hx_inv;
eky0 *= hy_inv;
ekz0 *= hz_inv;
ekx1 *= hx_inv;
eky1 *= hy_inv;
ekz1 *= hz_inv;
ekx2 *= hx_inv;
eky2 *= hy_inv;
ekz2 *= hz_inv;
ekx3 *= hx_inv;
eky3 *= hy_inv;
ekz3 *= hz_inv;
ekx4 *= hx_inv;
eky4 *= hy_inv;
ekz4 *= hz_inv;
ekx5 *= hx_inv;
eky5 *= hy_inv;
ekz5 *= hz_inv;
ekx6 *= hx_inv;
eky6 *= hy_inv;
ekz6 *= hz_inv;
// convert D-field to force
type = atom->type[i];
lj0 = B[7*type+6];
lj1 = B[7*type+5];
lj2 = B[7*type+4];
lj3 = B[7*type+3];
lj4 = B[7*type+2];
lj5 = B[7*type+1];
lj6 = B[7*type];
s1 = x[i][0]*hx_inv;
s2 = x[i][1]*hy_inv;
s3 = x[i][2]*hz_inv;
sf = sf_coeff_6[0]*sin(2*MY_PI*s1);
sf += sf_coeff_6[1]*sin(4*MY_PI*s1);
sf *= 4*lj0*lj6 + 4*lj1*lj5 + 4*lj2*lj4 + 2*lj3*lj3;
f[i][0] += lj0*ekx0 + lj1*ekx1 + lj2*ekx2 + lj3*ekx3 + lj4*ekx4 + lj5*ekx5 + lj6*ekx6 - sf;
sf = sf_coeff_6[2]*sin(2*MY_PI*s2);
sf += sf_coeff_6[3]*sin(4*MY_PI*s2);
sf *= 4*lj0*lj6 + 4*lj1*lj5 + 4*lj2*lj4 + 2*lj3*lj3;
f[i][1] += lj0*eky0 + lj1*eky1 + lj2*eky2 + lj3*eky3 + lj4*eky4 + lj5*eky5 + lj6*eky6 - sf;
sf = sf_coeff_6[4]*sin(2*MY_PI*s3);
sf += sf_coeff_6[5]*sin(4*MY_PI*s3);
sf *= 4*lj0*lj6 + 4*lj1*lj5 + 4*lj2*lj4 + 2*lj3*lj3;
if (slabflag != 2) f[i][2] += lj0*ekz0 + lj1*ekz1 + lj2*ekz2 + lj3*ekz3 + lj4*ekz4 + lj5*ekz5 + lj6*ekz6 - sf;
}
}
thr->timer(Timer::KSPACE);
} // end of parallel region
}
/* ----------------------------------------------------------------------
interpolate from grid to get per-atom energy/virial for dispersion
interaction and arithmetic mixing rule
------------------------------------------------------------------------- */
void PPPMDispOMP::fieldforce_a_peratom()
{
const int nlocal = atom->nlocal;
// no local atoms => nothing to do
if (nlocal == 0) return;
// loop over my charges, interpolate from nearby grid points
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
// (dx,dy,dz) = distance to "lower left" grid pt
// (mx,my,mz) = global coords of moving stencil pt
const double * const * const x = atom->x;
#if defined(_OPENMP)
const int nthreads = comm->nthreads;
#pragma omp parallel default(none)
#endif
{
#if defined(_OPENMP)
// each thread works on a fixed chunk of atoms.
const int tid = omp_get_thread_num();
const int inum = nlocal;
const int idelta = 1 + inum/nthreads;
const int ifrom = tid*idelta;
const int ito = ((ifrom + idelta) > inum) ? inum : ifrom + idelta;
#else
const int ifrom = 0;
const int ito = nlocal;
const int tid = 0;
#endif
ThrData *thr = fix->get_thr(tid);
thr->timer(Timer::START);
FFT_SCALAR * const * const r1d = static_cast<FFT_SCALAR **>(thr->get_rho1d_6());
int l,m,n,nx,ny,nz,mx,my,mz;
FFT_SCALAR dx,dy,dz,x0,y0,z0;
FFT_SCALAR u0,v00,v10,v20,v30,v40,v50;
FFT_SCALAR u1,v01,v11,v21,v31,v41,v51;
FFT_SCALAR u2,v02,v12,v22,v32,v42,v52;
FFT_SCALAR u3,v03,v13,v23,v33,v43,v53;
FFT_SCALAR u4,v04,v14,v24,v34,v44,v54;
FFT_SCALAR u5,v05,v15,v25,v35,v45,v55;
FFT_SCALAR u6,v06,v16,v26,v36,v46,v56;
int type;
double lj0,lj1,lj2,lj3,lj4,lj5,lj6;
// this if protects against having more threads than local atoms
if (ifrom < nlocal) {
for (int i = ifrom; i < ito; i++) {
nx = part2grid_6[i][0];
ny = part2grid_6[i][1];
nz = part2grid_6[i][2];
dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6;
dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6;
dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6;
compute_rho1d_thr(r1d,dx,dy,dz, order_6, rho_coeff_6);
u0 = v00 = v10 = v20 = v30 = v40 = v50 = ZEROF;
u1 = v01 = v11 = v21 = v31 = v41 = v51 = ZEROF;
u2 = v02 = v12 = v22 = v32 = v42 = v52 = ZEROF;
u3 = v03 = v13 = v23 = v33 = v43 = v53 = ZEROF;
u4 = v04 = v14 = v24 = v34 = v44 = v54 = ZEROF;
u5 = v05 = v15 = v25 = v35 = v45 = v55 = ZEROF;
u6 = v06 = v16 = v26 = v36 = v46 = v56 = ZEROF;
for (n = nlower_6; n <= nupper_6; n++) {
mz = n+nz;
z0 = r1d[2][n];
for (m = nlower_6; m <= nupper_6; m++) {
my = m+ny;
y0 = z0*r1d[1][m];
for (l = nlower_6; l <= nupper_6; l++) {
mx = l+nx;
x0 = y0*r1d[0][l];
if (eflag_atom) {
u0 += x0*u_brick_a0[mz][my][mx];
u1 += x0*u_brick_a1[mz][my][mx];
u2 += x0*u_brick_a2[mz][my][mx];
u3 += x0*u_brick_a3[mz][my][mx];
u4 += x0*u_brick_a4[mz][my][mx];
u5 += x0*u_brick_a5[mz][my][mx];
u6 += x0*u_brick_a6[mz][my][mx];
}
if (vflag_atom) {
v00 += x0*v0_brick_a0[mz][my][mx];
v10 += x0*v1_brick_a0[mz][my][mx];
v20 += x0*v2_brick_a0[mz][my][mx];
v30 += x0*v3_brick_a0[mz][my][mx];
v40 += x0*v4_brick_a0[mz][my][mx];
v50 += x0*v5_brick_a0[mz][my][mx];
v01 += x0*v0_brick_a1[mz][my][mx];
v11 += x0*v1_brick_a1[mz][my][mx];
v21 += x0*v2_brick_a1[mz][my][mx];
v31 += x0*v3_brick_a1[mz][my][mx];
v41 += x0*v4_brick_a1[mz][my][mx];
v51 += x0*v5_brick_a1[mz][my][mx];
v02 += x0*v0_brick_a2[mz][my][mx];
v12 += x0*v1_brick_a2[mz][my][mx];
v22 += x0*v2_brick_a2[mz][my][mx];
v32 += x0*v3_brick_a2[mz][my][mx];
v42 += x0*v4_brick_a2[mz][my][mx];
v52 += x0*v5_brick_a2[mz][my][mx];
v03 += x0*v0_brick_a3[mz][my][mx];
v13 += x0*v1_brick_a3[mz][my][mx];
v23 += x0*v2_brick_a3[mz][my][mx];
v33 += x0*v3_brick_a3[mz][my][mx];
v43 += x0*v4_brick_a3[mz][my][mx];
v53 += x0*v5_brick_a3[mz][my][mx];
v04 += x0*v0_brick_a4[mz][my][mx];
v14 += x0*v1_brick_a4[mz][my][mx];
v24 += x0*v2_brick_a4[mz][my][mx];
v34 += x0*v3_brick_a4[mz][my][mx];
v44 += x0*v4_brick_a4[mz][my][mx];
v54 += x0*v5_brick_a4[mz][my][mx];
v05 += x0*v0_brick_a5[mz][my][mx];
v15 += x0*v1_brick_a5[mz][my][mx];
v25 += x0*v2_brick_a5[mz][my][mx];
v35 += x0*v3_brick_a5[mz][my][mx];
v45 += x0*v4_brick_a5[mz][my][mx];
v55 += x0*v5_brick_a5[mz][my][mx];
v06 += x0*v0_brick_a6[mz][my][mx];
v16 += x0*v1_brick_a6[mz][my][mx];
v26 += x0*v2_brick_a6[mz][my][mx];
v36 += x0*v3_brick_a6[mz][my][mx];
v46 += x0*v4_brick_a6[mz][my][mx];
v56 += x0*v5_brick_a6[mz][my][mx];
}
}
}
}
// convert D-field to force
type = atom->type[i];
lj0 = B[7*type+6]*0.5;
lj1 = B[7*type+5]*0.5;
lj2 = B[7*type+4]*0.5;
lj3 = B[7*type+3]*0.5;
lj4 = B[7*type+2]*0.5;
lj5 = B[7*type+1]*0.5;
lj6 = B[7*type]*0.5;
if (eflag_atom)
eatom[i] += u0*lj0 + u1*lj1 + u2*lj2 +
u3*lj3 + u4*lj4 + u5*lj5 + u6*lj6;
if (vflag_atom) {
vatom[i][0] += v00*lj0 + v01*lj1 + v02*lj2 + v03*lj3 +
v04*lj4 + v05*lj5 + v06*lj6;
vatom[i][1] += v10*lj0 + v11*lj1 + v12*lj2 + v13*lj3 +
v14*lj4 + v15*lj5 + v16*lj6;
vatom[i][2] += v20*lj0 + v21*lj1 + v22*lj2 + v23*lj3 +
v24*lj4 + v25*lj5 + v26*lj6;
vatom[i][3] += v30*lj0 + v31*lj1 + v32*lj2 + v33*lj3 +
v34*lj4 + v35*lj5 + v36*lj6;
vatom[i][4] += v40*lj0 + v41*lj1 + v42*lj2 + v43*lj3 +
v44*lj4 + v45*lj5 + v46*lj6;
vatom[i][5] += v50*lj0 + v51*lj1 + v52*lj2 + v53*lj3 +
v54*lj4 + v55*lj5 + v56*lj6;
}
}
}
thr->timer(Timer::KSPACE);
} // end of parallel region
}
/* ----------------------------------------------------------------------
charge assignment into rho1d
dx,dy,dz = distance of particle from "lower left" grid point
------------------------------------------------------------------------- */
void PPPMDispOMP::compute_rho1d_thr(FFT_SCALAR * const * const r1d, const FFT_SCALAR &dx,
const FFT_SCALAR &dy, const FFT_SCALAR &dz,
const int ord, FFT_SCALAR * const * const rho_c)
{
int k,l;
FFT_SCALAR r1,r2,r3;
for (k = (1-ord)/2; k <= ord/2; k++) {
r1 = r2 = r3 = ZEROF;
for (l = ord-1; l >= 0; l--) {
r1 = rho_c[l][k] + r1*dx;
r2 = rho_c[l][k] + r2*dy;
r3 = rho_c[l][k] + r3*dz;
}
r1d[0][k] = r1;
r1d[1][k] = r2;
r1d[2][k] = r3;
}
}
/* ----------------------------------------------------------------------
charge assignment into drho1d
dx,dy,dz = distance of particle from "lower left" grid point
------------------------------------------------------------------------- */
void PPPMDispOMP::compute_drho1d_thr(FFT_SCALAR * const * const dr1d, const FFT_SCALAR &dx,
const FFT_SCALAR &dy, const FFT_SCALAR &dz,
const int ord, FFT_SCALAR * const * const drho_c)
{
int k,l;
FFT_SCALAR r1,r2,r3;
for (k = (1-ord)/2; k <= ord/2; k++) {
r1 = r2 = r3 = ZEROF;
for (l = ord-2; l >= 0; l--) {
r1 = drho_c[l][k] + r1*dx;
r2 = drho_c[l][k] + r2*dy;
r3 = drho_c[l][k] + r3*dz;
}
dr1d[0][k] = r1;
dr1d[1][k] = r2;
dr1d[2][k] = r3;
}
}
diff --git a/src/USER-OMP/pppm_disp_tip4p_omp.cpp b/src/USER-OMP/pppm_disp_tip4p_omp.cpp
index 6ee1c9fcf..6606c9602 100644
--- a/src/USER-OMP/pppm_disp_tip4p_omp.cpp
+++ b/src/USER-OMP/pppm_disp_tip4p_omp.cpp
@@ -1,1870 +1,1870 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing author: Axel Kohlmeyer (Temple U)
------------------------------------------------------------------------- */
#include "pppm_disp_tip4p_omp.h"
#include "atom.h"
#include "comm.h"
#include "domain.h"
#include "error.h"
#include "fix_omp.h"
#include "force.h"
#include "memory.h"
#include "math_const.h"
#include "math_special.h"
#include <string.h>
#include <math.h>
#include "suffix.h"
using namespace LAMMPS_NS;
using namespace MathConst;
using namespace MathSpecial;
#ifdef FFT_SINGLE
#define ZEROF 0.0f
#else
#define ZEROF 0.0
#endif
#define OFFSET 16384
/* ---------------------------------------------------------------------- */
PPPMDispTIP4POMP::PPPMDispTIP4POMP(LAMMPS *lmp, int narg, char **arg) :
PPPMDispTIP4P(lmp, narg, arg), ThrOMP(lmp, THR_KSPACE)
{
triclinic_support = 0;
tip4pflag = 1;
suffix_flag |= Suffix::OMP;
}
/* ----------------------------------------------------------------------
allocate memory that depends on # of K-vectors and order
------------------------------------------------------------------------- */
void PPPMDispTIP4POMP::allocate()
{
PPPMDispTIP4P::allocate();
#if defined(_OPENMP)
#pragma omp parallel default(none)
#endif
{
#if defined(_OPENMP)
const int tid = omp_get_thread_num();
#else
const int tid = 0;
#endif
if (function[0]) {
ThrData *thr = fix->get_thr(tid);
thr->init_pppm(order,memory);
}
if (function[1] + function[2]) {
ThrData * thr = fix->get_thr(tid);
thr->init_pppm_disp(order_6,memory);
}
}
}
/* ----------------------------------------------------------------------
free memory that depends on # of K-vectors and order
------------------------------------------------------------------------- */
void PPPMDispTIP4POMP::deallocate()
{
PPPMDispTIP4P::deallocate();
#if defined(_OPENMP)
#pragma omp parallel default(none)
#endif
{
#if defined(_OPENMP)
const int tid = omp_get_thread_num();
#else
const int tid = 0;
#endif
if (function[0]) {
ThrData * thr = fix->get_thr(tid);
thr->init_pppm(-order,memory);
}
if (function[1] + function[2]) {
ThrData * thr = fix->get_thr(tid);
thr->init_pppm_disp(-order_6,memory);
}
}
}
/* ----------------------------------------------------------------------
Compute the modified (hockney-eastwood) coulomb green function
------------------------------------------------------------------------- */
void PPPMDispTIP4POMP::compute_gf()
{
#if defined(_OPENMP)
#pragma omp parallel default(none)
#endif
{
double *prd;
if (triclinic == 0) prd = domain->prd;
else prd = domain->prd_lamda;
double xprd = prd[0];
double yprd = prd[1];
double zprd = prd[2];
double zprd_slab = zprd*slab_volfactor;
double unitkx = (2.0*MY_PI/xprd);
double unitky = (2.0*MY_PI/yprd);
double unitkz = (2.0*MY_PI/zprd_slab);
int tid,nn,nnfrom,nnto,k,l,m;
int kper,lper,mper;
double snx,sny,snz,snx2,sny2,snz2;
double sqk;
double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz;
double numerator,denominator;
const int nnx = nxhi_fft-nxlo_fft+1;
const int nny = nyhi_fft-nylo_fft+1;
loop_setup_thr(nnfrom, nnto, tid, nfft, comm->nthreads);
for (m = nzlo_fft; m <= nzhi_fft; m++) {
mper = m - nz_pppm*(2*m/nz_pppm);
qz = unitkz*mper;
snz = sin(0.5*qz*zprd_slab/nz_pppm);
snz2 = snz*snz;
sz = exp(-0.25*pow(qz/g_ewald,2.0));
wz = 1.0;
argz = 0.5*qz*zprd_slab/nz_pppm;
if (argz != 0.0) wz = pow(sin(argz)/argz,order);
wz *= wz;
for (l = nylo_fft; l <= nyhi_fft; l++) {
lper = l - ny_pppm*(2*l/ny_pppm);
qy = unitky*lper;
sny = sin(0.5*qy*yprd/ny_pppm);
sny2 = sny*sny;
sy = exp(-0.25*pow(qy/g_ewald,2.0));
wy = 1.0;
argy = 0.5*qy*yprd/ny_pppm;
if (argy != 0.0) wy = pow(sin(argy)/argy,order);
wy *= wy;
for (k = nxlo_fft; k <= nxhi_fft; k++) {
/* only compute the part designated to this thread */
nn = k-nxlo_fft + nnx*(l-nylo_fft + nny*(m-nzlo_fft));
if ((nn < nnfrom) || (nn >=nnto)) continue;
kper = k - nx_pppm*(2*k/nx_pppm);
qx = unitkx*kper;
snx = sin(0.5*qx*xprd/nx_pppm);
snx2 = snx*snx;
sx = exp(-0.25*pow(qx/g_ewald,2.0));
wx = 1.0;
argx = 0.5*qx*xprd/nx_pppm;
if (argx != 0.0) wx = pow(sin(argx)/argx,order);
wx *= wx;
sqk = pow(qx,2.0) + pow(qy,2.0) + pow(qz,2.0);
if (sqk != 0.0) {
numerator = 4.0*MY_PI/sqk;
denominator = gf_denom(snx2,sny2,snz2, gf_b, order);
greensfn[nn] = numerator*sx*sy*sz*wx*wy*wz/denominator;
} else greensfn[nn] = 0.0;
}
}
}
}
}
/* ----------------------------------------------------------------------
Compyute the modified (hockney-eastwood) dispersion green function
------------------------------------------------------------------------- */
void PPPMDispTIP4POMP::compute_gf_6()
{
#if defined(_OPENMP)
#pragma omp parallel default(none)
#endif
{
double *prd;
int k,l,m,nn;
// volume-dependent factors
// adjust z dimension for 2d slab PPPM
// z dimension for 3d PPPM is zprd since slab_volfactor = 1.0
if (triclinic == 0) prd = domain->prd;
else prd = domain->prd_lamda;
double xprd = prd[0];
double yprd = prd[1];
double zprd = prd[2];
double zprd_slab = zprd*slab_volfactor;
double unitkx = (2.0*MY_PI/xprd);
double unitky = (2.0*MY_PI/yprd);
double unitkz = (2.0*MY_PI/zprd_slab);
int kper,lper,mper;
double sqk;
double snx,sny,snz,snx2,sny2,snz2;
double argx,argy,argz,wx,wy,wz,sx,sy,sz;
double qx,qy,qz;
double rtsqk, term;
double numerator,denominator;
double inv2ew = 2*g_ewald_6;
inv2ew = 1/inv2ew;
double rtpi = sqrt(MY_PI);
int nnfrom, nnto, tid;
numerator = -MY_PI*rtpi*g_ewald_6*g_ewald_6*g_ewald_6/(3.0);
const int nnx = nxhi_fft_6-nxlo_fft_6+1;
const int nny = nyhi_fft_6-nylo_fft_6+1;
loop_setup_thr(nnfrom, nnto, tid, nfft_6, comm->nthreads);
for (m = nzlo_fft_6; m <= nzhi_fft_6; m++) {
mper = m - nz_pppm_6*(2*m/nz_pppm_6);
qz = unitkz*mper;
snz = sin(0.5*unitkz*mper*zprd_slab/nz_pppm_6);
snz2 = snz*snz;
sz = exp(-qz*qz*inv2ew*inv2ew);
wz = 1.0;
argz = 0.5*qz*zprd_slab/nz_pppm_6;
if (argz != 0.0) wz = pow(sin(argz)/argz,order_6);
wz *= wz;
for (l = nylo_fft_6; l <= nyhi_fft_6; l++) {
lper = l - ny_pppm_6*(2*l/ny_pppm_6);
qy = unitky*lper;
sny = sin(0.5*unitky*lper*yprd/ny_pppm_6);
sny2 = sny*sny;
sy = exp(-qy*qy*inv2ew*inv2ew);
wy = 1.0;
argy = 0.5*qy*yprd/ny_pppm_6;
if (argy != 0.0) wy = pow(sin(argy)/argy,order_6);
wy *= wy;
for (k = nxlo_fft_6; k <= nxhi_fft_6; k++) {
/* only compute the part designated to this thread */
nn = k-nxlo_fft_6 + nnx*(l-nylo_fft_6 + nny*(m-nzlo_fft_6));
if ((nn < nnfrom) || (nn >=nnto)) continue;
kper = k - nx_pppm_6*(2*k/nx_pppm_6);
qx = unitkx*kper;
snx = sin(0.5*unitkx*kper*xprd/nx_pppm_6);
snx2 = snx*snx;
sx = exp(-qx*qx*inv2ew*inv2ew);
wx = 1.0;
argx = 0.5*qx*xprd/nx_pppm_6;
if (argx != 0.0) wx = pow(sin(argx)/argx,order_6);
wx *= wx;
sqk = pow(qx,2.0) + pow(qy,2.0) + pow(qz,2.0);
denominator = gf_denom(snx2,sny2,snz2, gf_b_6, order_6);
rtsqk = sqrt(sqk);
term = (1-2*sqk*inv2ew*inv2ew)*sx*sy*sz +
2*sqk*rtsqk*inv2ew*inv2ew*inv2ew*rtpi*erfc(rtsqk*inv2ew);
greensfn_6[nn] = numerator*term*wx*wy*wz/denominator;
}
}
}
}
}
/* ----------------------------------------------------------------------
run the regular toplevel compute method from plain PPPM
which will have individual methods replaced by our threaded
versions and then call the obligatory force reduction.
------------------------------------------------------------------------- */
void PPPMDispTIP4POMP::compute(int eflag, int vflag)
{
PPPMDispTIP4P::compute(eflag,vflag);
#if defined(_OPENMP)
#pragma omp parallel default(none) shared(eflag,vflag)
#endif
{
#if defined(_OPENMP)
const int tid = omp_get_thread_num();
#else
const int tid = 0;
#endif
ThrData *thr = fix->get_thr(tid);
reduce_thr(this, eflag, vflag, thr);
} // end of omp parallel region
}
/* ----------------------------------------------------------------------
find center grid pt for each of my particles
check that full stencil for the particle will fit in my 3d brick
store central grid pt indices in part2grid array
------------------------------------------------------------------------- */
void PPPMDispTIP4POMP::particle_map_c(double dxinv, double dyinv,
double dzinv, double sft,
int ** part2grid, int nup,
int nlw, int nxlo_o,
int nylo_o, int nzlo_o,
int nxhi_o, int nyhi_o,
int nzhi_o)
{
// no local atoms => nothing to do
if (atom->nlocal == 0) return;
const int * _noalias const type = atom->type;
const dbl3_t * _noalias const x = (dbl3_t *) atom->x[0];
int3_t * _noalias const p2g = (int3_t *) part2grid[0];
const double boxlox = boxlo[0];
const double boxloy = boxlo[1];
const double boxloz = boxlo[2];
const int nlocal = atom->nlocal;
const double delxinv = dxinv;
const double delyinv = dyinv;
const double delzinv = dzinv;
const double shift = sft;
const int nupper = nup;
const int nlower = nlw;
const int nxlo_out = nxlo_o;
const int nylo_out = nylo_o;
const int nzlo_out = nzlo_o;
const int nxhi_out = nxhi_o;
const int nyhi_out = nyhi_o;
const int nzhi_out = nzhi_o;
- if (!isfinite(boxlo[0]) || !isfinite(boxlo[1]) || !isfinite(boxlo[2]))
+ if (!ISFINITE(boxlo[0]) || !ISFINITE(boxlo[1]) || !ISFINITE(boxlo[2]))
error->one(FLERR,"Non-numeric box dimensions - simulation unstable");
int i, flag = 0;
#if defined(_OPENMP)
#pragma omp parallel for private(i) default(none) reduction(+:flag) schedule(static)
#endif
for (i = 0; i < nlocal; i++) {
dbl3_t xM;
int iH1,iH2;
if (type[i] == typeO) {
find_M_thr(i,iH1,iH2,xM);
} else {
xM = x[i];
}
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
// current particle coord can be outside global and local box
// add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1
const int nx = static_cast<int> ((xM.x-boxlox)*delxinv+shift) - OFFSET;
const int ny = static_cast<int> ((xM.y-boxloy)*delyinv+shift) - OFFSET;
const int nz = static_cast<int> ((xM.z-boxloz)*delzinv+shift) - OFFSET;
p2g[i].a = nx;
p2g[i].b = ny;
p2g[i].t = nz;
// check that entire stencil around nx,ny,nz will fit in my 3d brick
if (nx+nlower < nxlo_out || nx+nupper > nxhi_out ||
ny+nlower < nylo_out || ny+nupper > nyhi_out ||
nz+nlower < nzlo_out || nz+nupper > nzhi_out)
flag++;
}
int flag_all;
MPI_Allreduce(&flag,&flag_all,1,MPI_INT,MPI_SUM,world);
if (flag_all) error->all(FLERR,"Out of range atoms - cannot compute PPPM");
}
/* ----------------------------------------------------------------------
find center grid pt for each of my particles
check that full stencil for the particle will fit in my 3d brick
store central grid pt indices in part2grid array
------------------------------------------------------------------------- */
void PPPMDispTIP4POMP::particle_map(double dxinv, double dyinv,
double dzinv, double sft,
int ** part2grid, int nup,
int nlw, int nxlo_o,
int nylo_o, int nzlo_o,
int nxhi_o, int nyhi_o,
int nzhi_o)
{
// no local atoms => nothing to do
if (atom->nlocal == 0) return;
const dbl3_t * _noalias const x = (dbl3_t *) atom->x[0];
int3_t * _noalias const p2g = (int3_t *) part2grid[0];
const double boxlox = boxlo[0];
const double boxloy = boxlo[1];
const double boxloz = boxlo[2];
const int nlocal = atom->nlocal;
const double delxinv = dxinv;
const double delyinv = dyinv;
const double delzinv = dzinv;
const double shift = sft;
const int nupper = nup;
const int nlower = nlw;
const int nxlo_out = nxlo_o;
const int nylo_out = nylo_o;
const int nzlo_out = nzlo_o;
const int nxhi_out = nxhi_o;
const int nyhi_out = nyhi_o;
const int nzhi_out = nzhi_o;
int i, flag = 0;
#if defined(_OPENMP)
#pragma omp parallel for private(i) default(none) reduction(+:flag) schedule(static)
#endif
for (i = 0; i < nlocal; i++) {
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
// current particle coord can be outside global and local box
// add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1
const int nx = static_cast<int> ((x[i].x-boxlox)*delxinv+shift) - OFFSET;
const int ny = static_cast<int> ((x[i].y-boxloy)*delyinv+shift) - OFFSET;
const int nz = static_cast<int> ((x[i].z-boxloz)*delzinv+shift) - OFFSET;
p2g[i].a = nx;
p2g[i].b = ny;
p2g[i].t = nz;
// check that entire stencil around nx,ny,nz will fit in my 3d brick
if (nx+nlower < nxlo_out || nx+nupper > nxhi_out ||
ny+nlower < nylo_out || ny+nupper > nyhi_out ||
nz+nlower < nzlo_out || nz+nupper > nzhi_out)
flag++;
}
int flag_all;
MPI_Allreduce(&flag,&flag_all,1,MPI_INT,MPI_SUM,world);
if (flag_all) error->all(FLERR,"Out of range atoms - cannot compute PPPM");
}
/* ----------------------------------------------------------------------
create discretized "density" on section of global grid due to my particles
density(x,y,z) = charge "density" at grid points of my 3d brick
(nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts)
in global grid
------------------------------------------------------------------------- */
void PPPMDispTIP4POMP::make_rho_c()
{
// clear 3d density array
FFT_SCALAR * _noalias const d = &(density_brick[nzlo_out][nylo_out][nxlo_out]);
memset(d,0,ngrid*sizeof(FFT_SCALAR));
// no local atoms => nothing else to do
const int nlocal = atom->nlocal;
if (nlocal == 0) return;
const int ix = nxhi_out - nxlo_out + 1;
const int iy = nyhi_out - nylo_out + 1;
#if defined(_OPENMP)
#pragma omp parallel default(none)
#endif
{
const double * _noalias const q = atom->q;
const dbl3_t * _noalias const x = (dbl3_t *) atom->x[0];
const int3_t * _noalias const p2g = (int3_t *) part2grid[0];
const int * _noalias const type = atom->type;
dbl3_t xM;
const double boxlox = boxlo[0];
const double boxloy = boxlo[1];
const double boxloz = boxlo[2];
// determine range of grid points handled by this thread
int i,jfrom,jto,tid,iH1,iH2;
loop_setup_thr(jfrom,jto,tid,ngrid,comm->nthreads);
// get per thread data
ThrData *thr = fix->get_thr(tid);
FFT_SCALAR * const * const r1d = static_cast<FFT_SCALAR **>(thr->get_rho1d());
// loop over my charges, add their contribution to nearby grid points
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
// (dx,dy,dz) = distance to "lower left" grid pt
// loop over all local atoms for all threads
for (i = 0; i < nlocal; i++) {
const int nx = p2g[i].a;
const int ny = p2g[i].b;
const int nz = p2g[i].t;
// pre-screen whether this atom will ever come within
// reach of the data segement this thread is updating.
if ( ((nz+nlower-nzlo_out)*ix*iy >= jto)
|| ((nz+nupper-nzlo_out+1)*ix*iy < jfrom) ) continue;
if (type[i] == typeO) {
find_M_thr(i,iH1,iH2,xM);
} else {
xM = x[i];
}
const FFT_SCALAR dx = nx+shiftone - (xM.x-boxlox)*delxinv;
const FFT_SCALAR dy = ny+shiftone - (xM.y-boxloy)*delyinv;
const FFT_SCALAR dz = nz+shiftone - (xM.z-boxloz)*delzinv;
compute_rho1d_thr(r1d,dx,dy,dz,order,rho_coeff);
const FFT_SCALAR z0 = delvolinv * q[i];
for (int n = nlower; n <= nupper; ++n) {
const int jn = (nz+n-nzlo_out)*ix*iy;
const FFT_SCALAR y0 = z0*r1d[2][n];
for (int m = nlower; m <= nupper; ++m) {
const int jm = jn+(ny+m-nylo_out)*ix;
const FFT_SCALAR x0 = y0*r1d[1][m];
for (int l = nlower; l <= nupper; ++l) {
const int jl = jm+nx+l-nxlo_out;
// make sure each thread only updates
// "his" elements of the density grid
if (jl >= jto) break;
if (jl < jfrom) continue;
d[jl] += x0*r1d[0][l];
}
}
}
}
}
}
/* ----------------------------------------------------------------------
same as above for dispersion interaction with geometric mixing rule
------------------------------------------------------------------------- */
void PPPMDispTIP4POMP::make_rho_g()
{
// clear 3d density array
FFT_SCALAR * _noalias const d = &(density_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]);
memset(d,0,ngrid_6*sizeof(FFT_SCALAR));
// no local atoms => nothing else to do
const int nlocal = atom->nlocal;
if (nlocal == 0) return;
const int ix = nxhi_out_6 - nxlo_out_6 + 1;
const int iy = nyhi_out_6 - nylo_out_6 + 1;
#if defined(_OPENMP)
#pragma omp parallel default(none)
#endif
{
const dbl3_t * _noalias const x = (dbl3_t *) atom->x[0];
const int3_t * _noalias const p2g = (int3_t *) part2grid_6[0];
const double boxlox = boxlo[0];
const double boxloy = boxlo[1];
const double boxloz = boxlo[2];
// determine range of grid points handled by this thread
int i,jfrom,jto,tid;
loop_setup_thr(jfrom,jto,tid,ngrid_6,comm->nthreads);
// get per thread data
ThrData *thr = fix->get_thr(tid);
FFT_SCALAR * const * const r1d = static_cast<FFT_SCALAR **>(thr->get_rho1d_6());
// loop over my charges, add their contribution to nearby grid points
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
// (dx,dy,dz) = distance to "lower left" grid pt
// loop over all local atoms for all threads
for (i = 0; i < nlocal; i++) {
const int nx = p2g[i].a;
const int ny = p2g[i].b;
const int nz = p2g[i].t;
// pre-screen whether this atom will ever come within
// reach of the data segement this thread is updating.
if ( ((nz+nlower_6-nzlo_out_6)*ix*iy >= jto)
|| ((nz+nupper_6-nzlo_out_6+1)*ix*iy < jfrom) ) continue;
const FFT_SCALAR dx = nx+shiftone_6 - (x[i].x-boxlox)*delxinv_6;
const FFT_SCALAR dy = ny+shiftone_6 - (x[i].y-boxloy)*delyinv_6;
const FFT_SCALAR dz = nz+shiftone_6 - (x[i].z-boxloz)*delzinv_6;
compute_rho1d_thr(r1d,dx,dy,dz,order_6,rho_coeff_6);
const int type = atom->type[i];
const double lj = B[type];
const FFT_SCALAR z0 = delvolinv_6 * lj;
for (int n = nlower_6; n <= nupper_6; ++n) {
const int jn = (nz+n-nzlo_out_6)*ix*iy;
const FFT_SCALAR y0 = z0*r1d[2][n];
for (int m = nlower_6; m <= nupper_6; ++m) {
const int jm = jn+(ny+m-nylo_out_6)*ix;
const FFT_SCALAR x0 = y0*r1d[1][m];
for (int l = nlower_6; l <= nupper_6; ++l) {
const int jl = jm+nx+l-nxlo_out_6;
// make sure each thread only updates
// "his" elements of the density grid
if (jl >= jto) break;
if (jl < jfrom) continue;
d[jl] += x0*r1d[0][l];
}
}
}
}
}
}
/* ----------------------------------------------------------------------
same as above for dispersion interaction with arithmetic mixing rule
------------------------------------------------------------------------- */
void PPPMDispTIP4POMP::make_rho_a()
{
// clear 3d density array
FFT_SCALAR * _noalias const d0 = &(density_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]);
FFT_SCALAR * _noalias const d1 = &(density_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]);
FFT_SCALAR * _noalias const d2 = &(density_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]);
FFT_SCALAR * _noalias const d3 = &(density_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]);
FFT_SCALAR * _noalias const d4 = &(density_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]);
FFT_SCALAR * _noalias const d5 = &(density_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]);
FFT_SCALAR * _noalias const d6 = &(density_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]);
memset(d0,0,ngrid_6*sizeof(FFT_SCALAR));
memset(d1,0,ngrid_6*sizeof(FFT_SCALAR));
memset(d2,0,ngrid_6*sizeof(FFT_SCALAR));
memset(d3,0,ngrid_6*sizeof(FFT_SCALAR));
memset(d4,0,ngrid_6*sizeof(FFT_SCALAR));
memset(d5,0,ngrid_6*sizeof(FFT_SCALAR));
memset(d6,0,ngrid_6*sizeof(FFT_SCALAR));
// no local atoms => nothing else to do
const int nlocal = atom->nlocal;
if (nlocal == 0) return;
const int ix = nxhi_out_6 - nxlo_out_6 + 1;
const int iy = nyhi_out_6 - nylo_out_6 + 1;
#if defined(_OPENMP)
#pragma omp parallel default(none)
#endif
{
const dbl3_t * _noalias const x = (dbl3_t *) atom->x[0];
const int3_t * _noalias const p2g = (int3_t *) part2grid_6[0];
const double boxlox = boxlo[0];
const double boxloy = boxlo[1];
const double boxloz = boxlo[2];
// determine range of grid points handled by this thread
int i,jfrom,jto,tid;
loop_setup_thr(jfrom,jto,tid,ngrid_6,comm->nthreads);
// get per thread data
ThrData *thr = fix->get_thr(tid);
FFT_SCALAR * const * const r1d = static_cast<FFT_SCALAR **>(thr->get_rho1d_6());
// loop over my charges, add their contribution to nearby grid points
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
// (dx,dy,dz) = distance to "lower left" grid pt
// loop over all local atoms for all threads
for (i = 0; i < nlocal; i++) {
const int nx = p2g[i].a;
const int ny = p2g[i].b;
const int nz = p2g[i].t;
// pre-screen whether this atom will ever come within
// reach of the data segement this thread is updating.
if ( ((nz+nlower_6-nzlo_out_6)*ix*iy >= jto)
|| ((nz+nupper_6-nzlo_out_6+1)*ix*iy < jfrom) ) continue;
const FFT_SCALAR dx = nx+shiftone_6 - (x[i].x-boxlox)*delxinv_6;
const FFT_SCALAR dy = ny+shiftone_6 - (x[i].y-boxloy)*delyinv_6;
const FFT_SCALAR dz = nz+shiftone_6 - (x[i].z-boxloz)*delzinv_6;
compute_rho1d_thr(r1d,dx,dy,dz,order_6,rho_coeff_6);
const int type = atom->type[i];
const double lj0 = B[7*type];
const double lj1 = B[7*type+1];
const double lj2 = B[7*type+2];
const double lj3 = B[7*type+3];
const double lj4 = B[7*type+4];
const double lj5 = B[7*type+5];
const double lj6 = B[7*type+6];
const FFT_SCALAR z0 = delvolinv_6;
for (int n = nlower_6; n <= nupper_6; ++n) {
const int jn = (nz+n-nzlo_out_6)*ix*iy;
const FFT_SCALAR y0 = z0*r1d[2][n];
for (int m = nlower_6; m <= nupper_6; ++m) {
const int jm = jn+(ny+m-nylo_out_6)*ix;
const FFT_SCALAR x0 = y0*r1d[1][m];
for (int l = nlower_6; l <= nupper_6; ++l) {
const int jl = jm+nx+l-nxlo_out_6;
// make sure each thread only updates
// "his" elements of the density grid
if (jl >= jto) break;
if (jl < jfrom) continue;
const double w = x0*r1d[0][l];
d0[jl] += w*lj0;
d1[jl] += w*lj1;
d2[jl] += w*lj2;
d3[jl] += w*lj3;
d4[jl] += w*lj4;
d5[jl] += w*lj5;
d6[jl] += w*lj6;
}
}
}
}
}
}
/* ----------------------------------------------------------------------
interpolate from grid to get electric field & force on my particles for ik
------------------------------------------------------------------------- */
void PPPMDispTIP4POMP::fieldforce_c_ik()
{
const int nlocal = atom->nlocal;
// no local atoms => nothing to do
if (nlocal == 0) return;
// loop over my charges, interpolate electric field from nearby grid points
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
// (dx,dy,dz) = distance to "lower left" grid pt
// (mx,my,mz) = global coords of moving stencil pt
// ek = 3 components of E-field on particle
const dbl3_t * _noalias const x = (dbl3_t *) atom->x[0];
const double * _noalias const q = atom->q;
const int3_t * _noalias const p2g = (int3_t *) part2grid[0];
const int * _noalias const type = atom->type;
const double qqrd2e = force->qqrd2e;
const double boxlox = boxlo[0];
const double boxloy = boxlo[1];
const double boxloz = boxlo[2];
#if defined(_OPENMP)
#pragma omp parallel default(none)
#endif
{
dbl3_t xM;
FFT_SCALAR x0,y0,z0,ekx,eky,ekz;
int i,ifrom,ito,tid,iH1,iH2,l,m,n,mx,my,mz;
loop_setup_thr(ifrom,ito,tid,nlocal,comm->nthreads);
// get per thread data
ThrData *thr = fix->get_thr(tid);
dbl3_t * _noalias const f = (dbl3_t *) thr->get_f()[0];
FFT_SCALAR * const * const r1d = static_cast<FFT_SCALAR **>(thr->get_rho1d());
for (i = ifrom; i < ito; ++i) {
if (type[i] == typeO) {
find_M_thr(i,iH1,iH2,xM);
} else xM = x[i];
const int nx = p2g[i].a;
const int ny = p2g[i].b;
const int nz = p2g[i].t;
const FFT_SCALAR dx = nx+shiftone - (xM.x-boxlox)*delxinv;
const FFT_SCALAR dy = ny+shiftone - (xM.y-boxloy)*delyinv;
const FFT_SCALAR dz = nz+shiftone - (xM.z-boxloz)*delzinv;
compute_rho1d_thr(r1d,dx,dy,dz, order, rho_coeff);
ekx = eky = ekz = ZEROF;
for (n = nlower; n <= nupper; n++) {
mz = n+nz;
z0 = r1d[2][n];
for (m = nlower; m <= nupper; m++) {
my = m+ny;
y0 = z0*r1d[1][m];
for (l = nlower; l <= nupper; l++) {
mx = l+nx;
x0 = y0*r1d[0][l];
ekx -= x0*vdx_brick[mz][my][mx];
eky -= x0*vdy_brick[mz][my][mx];
ekz -= x0*vdz_brick[mz][my][mx];
}
}
}
// convert E-field to force
const double qfactor = qqrd2e * scale * q[i];
if (type[i] != typeO) {
f[i].x += qfactor*ekx;
f[i].y += qfactor*eky;
if (slabflag != 2) f[i].z += qfactor*ekz;
} else {
const double fx = qfactor * ekx;
const double fy = qfactor * eky;
const double fz = qfactor * ekz;
f[i].x += fx*(1 - alpha);
f[i].y += fy*(1 - alpha);
if (slabflag != 2) f[i].z += fz*(1 - alpha);
f[iH1].x += 0.5*alpha*fx;
f[iH1].y += 0.5*alpha*fy;
if (slabflag != 2) f[iH1].z += 0.5*alpha*fz;
f[iH2].x += 0.5*alpha*fx;
f[iH2].y += 0.5*alpha*fy;
if (slabflag != 2) f[iH2].z += 0.5*alpha*fz;
}
}
} // end of parallel region
}
/* ----------------------------------------------------------------------
interpolate from grid to get electric field & force on my particles for ad
------------------------------------------------------------------------- */
void PPPMDispTIP4POMP::fieldforce_c_ad()
{
const int nlocal = atom->nlocal;
// no local atoms => nothing to do
if (nlocal == 0) return;
const double *prd = (triclinic == 0) ? domain->prd : domain->prd_lamda;
const double hx_inv = nx_pppm/prd[0];
const double hy_inv = ny_pppm/prd[1];
const double hz_inv = nz_pppm/prd[2];
// loop over my charges, interpolate electric field from nearby grid points
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
// (dx,dy,dz) = distance to "lower left" grid pt
// (mx,my,mz) = global coords of moving stencil pt
// ek = 3 components of E-field on particle
const dbl3_t * _noalias const x = (dbl3_t *) atom->x[0];
const double * _noalias const q = atom->q;
const int3_t * _noalias const p2g = (int3_t *) part2grid[0];
const int * _noalias const type = atom->type;
const double qqrd2e = force->qqrd2e;
const double boxlox = boxlo[0];
const double boxloy = boxlo[1];
const double boxloz = boxlo[2];
#if defined(_OPENMP)
#pragma omp parallel default(none)
#endif
{
double s1,s2,s3,sf;
dbl3_t xM;
FFT_SCALAR ekx,eky,ekz;
int i,ifrom,ito,tid,iH1,iH2,l,m,n,mx,my,mz;
loop_setup_thr(ifrom,ito,tid,nlocal,comm->nthreads);
// get per thread data
ThrData *thr = fix->get_thr(tid);
dbl3_t * _noalias const f = (dbl3_t *) thr->get_f()[0];
FFT_SCALAR * const * const r1d = static_cast<FFT_SCALAR **>(thr->get_rho1d());
FFT_SCALAR * const * const d1d = static_cast<FFT_SCALAR **>(thr->get_drho1d());
for (i = ifrom; i < ito; ++i) {
if (type[i] == typeO) {
find_M_thr(i,iH1,iH2,xM);
} else xM = x[i];
const int nx = p2g[i].a;
const int ny = p2g[i].b;
const int nz = p2g[i].t;
const FFT_SCALAR dx = nx+shiftone - (xM.x-boxlox)*delxinv;
const FFT_SCALAR dy = ny+shiftone - (xM.y-boxloy)*delyinv;
const FFT_SCALAR dz = nz+shiftone - (xM.z-boxloz)*delzinv;
compute_rho1d_thr(r1d,dx,dy,dz,order,rho_coeff);
compute_drho1d_thr(d1d,dx,dy,dz,order,drho_coeff);
ekx = eky = ekz = ZEROF;
for (n = nlower; n <= nupper; n++) {
mz = n+nz;
for (m = nlower; m <= nupper; m++) {
my = m+ny;
for (l = nlower; l <= nupper; l++) {
mx = l+nx;
ekx += d1d[0][l]*r1d[1][m]*r1d[2][n]*u_brick[mz][my][mx];
eky += r1d[0][l]*d1d[1][m]*r1d[2][n]*u_brick[mz][my][mx];
ekz += r1d[0][l]*r1d[1][m]*d1d[2][n]*u_brick[mz][my][mx];
}
}
}
ekx *= hx_inv;
eky *= hy_inv;
ekz *= hz_inv;
// convert E-field to force and substract self forces
const double qi = q[i];
const double qfactor = qqrd2e * scale * qi;
s1 = x[i].x*hx_inv;
sf = sf_coeff[0]*sin(MY_2PI*s1);
sf += sf_coeff[1]*sin(MY_4PI*s1);
sf *= 2.0*qi;
const double fx = qfactor*(ekx - sf);
s2 = x[i].y*hy_inv;
sf = sf_coeff[2]*sin(MY_2PI*s2);
sf += sf_coeff[3]*sin(MY_4PI*s2);
sf *= 2.0*qi;
const double fy = qfactor*(eky - sf);
s3 = x[i].z*hz_inv;
sf = sf_coeff[4]*sin(MY_2PI*s3);
sf += sf_coeff[5]*sin(MY_4PI*s3);
sf *= 2.0*qi;
const double fz = qfactor*(ekz - sf);
if (type[i] != typeO) {
f[i].x += fx;
f[i].y += fy;
if (slabflag != 2) f[i].z += fz;
} else {
f[i].x += fx*(1 - alpha);
f[i].y += fy*(1 - alpha);
if (slabflag != 2) f[i].z += fz*(1 - alpha);
f[iH1].x += 0.5*alpha*fx;
f[iH1].y += 0.5*alpha*fy;
if (slabflag != 2) f[iH1].z += 0.5*alpha*fz;
f[iH2].x += 0.5*alpha*fx;
f[iH2].y += 0.5*alpha*fy;
if (slabflag != 2) f[iH2].z += 0.5*alpha*fz;
}
}
} // end of parallel region
}
/* ----------------------------------------------------------------------
interpolate from grid to get dispersion field & force on my particles
for ik scheme and geometric mixing rule
------------------------------------------------------------------------- */
void PPPMDispTIP4POMP::fieldforce_g_ik()
{
const int nlocal = atom->nlocal;
// no local atoms => nothing to do
if (nlocal == 0) return;
// loop over my charges, interpolate electric field from nearby grid points
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
// (dx,dy,dz) = distance to "lower left" grid pt
// (mx,my,mz) = global coords of moving stencil pt
// ek = 3 components of E-field on particle
const double * const * const x = atom->x;
#if defined(_OPENMP)
#pragma omp parallel default(none)
#endif
{
#if defined(_OPENMP)
// each thread works on a fixed chunk of atoms.
const int tid = omp_get_thread_num();
const int inum = nlocal;
const int idelta = 1 + inum/comm->nthreads;
const int ifrom = tid*idelta;
const int ito = ((ifrom + idelta) > inum) ? inum : ifrom + idelta;
#else
const int ifrom = 0;
const int ito = nlocal;
const int tid = 0;
#endif
ThrData *thr = fix->get_thr(tid);
double * const * const f = thr->get_f();
FFT_SCALAR * const * const r1d = static_cast<FFT_SCALAR **>(thr->get_rho1d_6());
int l,m,n,nx,ny,nz,mx,my,mz;
FFT_SCALAR dx,dy,dz,x0,y0,z0;
FFT_SCALAR ekx,eky,ekz;
int type;
double lj;
// this if protects against having more threads than local atoms
if (ifrom < nlocal) {
for (int i = ifrom; i < ito; i++) {
nx = part2grid_6[i][0];
ny = part2grid_6[i][1];
nz = part2grid_6[i][2];
dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6;
dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6;
dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6;
compute_rho1d_thr(r1d,dx,dy,dz, order_6, rho_coeff_6);
ekx = eky = ekz = ZEROF;
for (n = nlower_6; n <= nupper_6; n++) {
mz = n+nz;
z0 = r1d[2][n];
for (m = nlower_6; m <= nupper_6; m++) {
my = m+ny;
y0 = z0*r1d[1][m];
for (l = nlower_6; l <= nupper_6; l++) {
mx = l+nx;
x0 = y0*r1d[0][l];
ekx -= x0*vdx_brick_g[mz][my][mx];
eky -= x0*vdy_brick_g[mz][my][mx];
ekz -= x0*vdz_brick_g[mz][my][mx];
}
}
}
// convert E-field to force
type = atom->type[i];
lj = B[type];
f[i][0] += lj*ekx;
f[i][1] += lj*eky;
f[i][2] += lj*ekz;
}
}
}
}
/* ----------------------------------------------------------------------
interpolate from grid to get dispersion field & force on my particles
for ad scheme and geometric mixing rule
------------------------------------------------------------------------- */
void PPPMDispTIP4POMP::fieldforce_g_ad()
{
const int nlocal = atom->nlocal;
// no local atoms => nothing to do
if (nlocal == 0) return;
// loop over my charges, interpolate electric field from nearby grid points
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
// (dx,dy,dz) = distance to "lower left" grid pt
// (mx,my,mz) = global coords of moving stencil pt
// ek = 3 components of E-field on particle
const double * const * const x = atom->x;
double *prd;
if (triclinic == 0) prd = domain->prd;
else prd = domain->prd_lamda;
double xprd = prd[0];
double yprd = prd[1];
double zprd = prd[2];
double zprd_slab = zprd*slab_volfactor;
const double hx_inv = nx_pppm_6/xprd;
const double hy_inv = ny_pppm_6/yprd;
const double hz_inv = nz_pppm_6/zprd_slab;
#if defined(_OPENMP)
#pragma omp parallel default(none)
#endif
{
#if defined(_OPENMP)
// each thread works on a fixed chunk of atoms.
const int tid = omp_get_thread_num();
const int inum = nlocal;
const int idelta = 1 + inum/comm->nthreads;
const int ifrom = tid*idelta;
const int ito = ((ifrom + idelta) > inum) ? inum : ifrom + idelta;
#else
const int ifrom = 0;
const int ito = nlocal;
const int tid = 0;
#endif
ThrData *thr = fix->get_thr(tid);
double * const * const f = thr->get_f();
FFT_SCALAR * const * const r1d = static_cast<FFT_SCALAR **>(thr->get_rho1d_6());
FFT_SCALAR * const * const dr1d = static_cast<FFT_SCALAR **>(thr->get_drho1d_6());
int l,m,n,nx,ny,nz,mx,my,mz;
FFT_SCALAR dx,dy,dz;
FFT_SCALAR ekx,eky,ekz;
int type;
double lj;
double sf = 0.0;
double s1,s2,s3;
// this if protects against having more threads than local atoms
if (ifrom < nlocal) {
for (int i = ifrom; i < ito; i++) {
nx = part2grid_6[i][0];
ny = part2grid_6[i][1];
nz = part2grid_6[i][2];
dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6;
dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6;
dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6;
compute_rho1d_thr(r1d,dx,dy,dz, order_6, rho_coeff_6);
compute_drho1d_thr(dr1d,dx,dy,dz, order_6, drho_coeff_6);
ekx = eky = ekz = ZEROF;
for (n = nlower_6; n <= nupper_6; n++) {
mz = n+nz;
for (m = nlower_6; m <= nupper_6; m++) {
my = m+ny;
for (l = nlower_6; l <= nupper_6; l++) {
mx = l+nx;
ekx += dr1d[0][l]*r1d[1][m]*r1d[2][n]*u_brick_g[mz][my][mx];
eky += r1d[0][l]*dr1d[1][m]*r1d[2][n]*u_brick_g[mz][my][mx];
ekz += r1d[0][l]*r1d[1][m]*dr1d[2][n]*u_brick_g[mz][my][mx];
}
}
}
ekx *= hx_inv;
eky *= hy_inv;
ekz *= hz_inv;
// convert E-field to force
type = atom->type[i];
lj = B[type];
s1 = x[i][0]*hx_inv;
s2 = x[i][1]*hy_inv;
s3 = x[i][2]*hz_inv;
sf = sf_coeff_6[0]*sin(2*MY_PI*s1);
sf += sf_coeff_6[1]*sin(4*MY_PI*s1);
sf *= 2*lj*lj;
f[i][0] += ekx*lj - sf;
sf = sf_coeff_6[2]*sin(2*MY_PI*s2);
sf += sf_coeff_6[3]*sin(4*MY_PI*s2);
sf *= 2*lj*lj;
f[i][1] += eky*lj - sf;
sf = sf_coeff_6[4]*sin(2*MY_PI*s3);
sf += sf_coeff_6[5]*sin(4*MY_PI*s3);
sf *= 2*lj*lj;
if (slabflag != 2) f[i][2] += ekz*lj - sf;
}
}
}
}
/* ----------------------------------------------------------------------
interpolate from grid to get per-atom energy/virial for dispersion
interaction and geometric mixing rule
------------------------------------------------------------------------- */
void PPPMDispTIP4POMP::fieldforce_g_peratom()
{
const int nlocal = atom->nlocal;
// no local atoms => nothing to do
if (nlocal == 0) return;
// loop over my charges, interpolate from nearby grid points
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
// (dx,dy,dz) = distance to "lower left" grid pt
// (mx,my,mz) = global coords of moving stencil pt
const double * const * const x = atom->x;
#if defined(_OPENMP)
#pragma omp parallel default(none)
#endif
{
#if defined(_OPENMP)
// each thread works on a fixed chunk of atoms.
const int tid = omp_get_thread_num();
const int inum = nlocal;
const int idelta = 1 + inum/comm->nthreads;
const int ifrom = tid*idelta;
const int ito = ((ifrom + idelta) > inum) ? inum : ifrom + idelta;
#else
const int ifrom = 0;
const int ito = nlocal;
const int tid = 0;
#endif
ThrData *thr = fix->get_thr(tid);
FFT_SCALAR * const * const r1d = static_cast<FFT_SCALAR **>(thr->get_rho1d_6());
int l,m,n,nx,ny,nz,mx,my,mz;
FFT_SCALAR dx,dy,dz,x0,y0,z0;
FFT_SCALAR u,v0,v1,v2,v3,v4,v5;
int type;
double lj;
// this if protects against having more threads than local atoms
if (ifrom < nlocal) {
for (int i = ifrom; i < ito; i++) {
nx = part2grid_6[i][0];
ny = part2grid_6[i][1];
nz = part2grid_6[i][2];
dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6;
dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6;
dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6;
compute_rho1d_thr(r1d,dx,dy,dz, order_6, rho_coeff_6);
u = v0 = v1 = v2 = v3 = v4 = v5 = ZEROF;
for (n = nlower_6; n <= nupper_6; n++) {
mz = n+nz;
z0 = r1d[2][n];
for (m = nlower_6; m <= nupper_6; m++) {
my = m+ny;
y0 = z0*r1d[1][m];
for (l = nlower_6; l <= nupper_6; l++) {
mx = l+nx;
x0 = y0*r1d[0][l];
if (eflag_atom) u += x0*u_brick_g[mz][my][mx];
if (vflag_atom) {
v0 += x0*v0_brick_g[mz][my][mx];
v1 += x0*v1_brick_g[mz][my][mx];
v2 += x0*v2_brick_g[mz][my][mx];
v3 += x0*v3_brick_g[mz][my][mx];
v4 += x0*v4_brick_g[mz][my][mx];
v5 += x0*v5_brick_g[mz][my][mx];
}
}
}
}
type = atom->type[i];
lj = B[type]*0.5;
if (eflag_atom) eatom[i] += u*lj;
if (vflag_atom) {
vatom[i][0] += v0*lj;
vatom[i][1] += v1*lj;
vatom[i][2] += v2*lj;
vatom[i][3] += v3*lj;
vatom[i][4] += v4*lj;
vatom[i][5] += v5*lj;
}
}
}
}
}
/* ----------------------------------------------------------------------
interpolate from grid to get dispersion field & force on my particles
for ik scheme and arithmetic mixing rule
------------------------------------------------------------------------- */
void PPPMDispTIP4POMP::fieldforce_a_ik()
{
const int nlocal = atom->nlocal;
// no local atoms => nothing to do
if (nlocal == 0) return;
// loop over my charges, interpolate electric field from nearby grid points
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
// (dx,dy,dz) = distance to "lower left" grid pt
// (mx,my,mz) = global coords of moving stencil pt
// ek = 3 components of E-field on particle
const double * const * const x = atom->x;
#if defined(_OPENMP)
#pragma omp parallel default(none)
#endif
{
#if defined(_OPENMP)
// each thread works on a fixed chunk of atoms.
const int tid = omp_get_thread_num();
const int inum = nlocal;
const int idelta = 1 + inum/comm->nthreads;
const int ifrom = tid*idelta;
const int ito = ((ifrom + idelta) > inum) ? inum : ifrom + idelta;
#else
const int ifrom = 0;
const int ito = nlocal;
const int tid = 0;
#endif
ThrData *thr = fix->get_thr(tid);
double * const * const f = thr->get_f();
FFT_SCALAR * const * const r1d = static_cast<FFT_SCALAR **>(thr->get_rho1d_6());
int l,m,n,nx,ny,nz,mx,my,mz;
FFT_SCALAR dx,dy,dz,x0,y0,z0;
FFT_SCALAR ekx0, eky0, ekz0, ekx1, eky1, ekz1, ekx2, eky2, ekz2;
FFT_SCALAR ekx3, eky3, ekz3, ekx4, eky4, ekz4, ekx5, eky5, ekz5;
FFT_SCALAR ekx6, eky6, ekz6;
int type;
double lj0,lj1,lj2,lj3,lj4,lj5,lj6;
// this if protects against having more threads than local atoms
if (ifrom < nlocal) {
for (int i = ifrom; i < ito; i++) {
nx = part2grid_6[i][0];
ny = part2grid_6[i][1];
nz = part2grid_6[i][2];
dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6;
dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6;
dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6;
compute_rho1d_thr(r1d,dx,dy,dz, order_6, rho_coeff_6);
ekx0 = eky0 = ekz0 = ZEROF;
ekx1 = eky1 = ekz1 = ZEROF;
ekx2 = eky2 = ekz2 = ZEROF;
ekx3 = eky3 = ekz3 = ZEROF;
ekx4 = eky4 = ekz4 = ZEROF;
ekx5 = eky5 = ekz5 = ZEROF;
ekx6 = eky6 = ekz6 = ZEROF;
for (n = nlower_6; n <= nupper_6; n++) {
mz = n+nz;
z0 = r1d[2][n];
for (m = nlower_6; m <= nupper_6; m++) {
my = m+ny;
y0 = z0*r1d[1][m];
for (l = nlower_6; l <= nupper_6; l++) {
mx = l+nx;
x0 = y0*r1d[0][l];
ekx0 -= x0*vdx_brick_a0[mz][my][mx];
eky0 -= x0*vdy_brick_a0[mz][my][mx];
ekz0 -= x0*vdz_brick_a0[mz][my][mx];
ekx1 -= x0*vdx_brick_a1[mz][my][mx];
eky1 -= x0*vdy_brick_a1[mz][my][mx];
ekz1 -= x0*vdz_brick_a1[mz][my][mx];
ekx2 -= x0*vdx_brick_a2[mz][my][mx];
eky2 -= x0*vdy_brick_a2[mz][my][mx];
ekz2 -= x0*vdz_brick_a2[mz][my][mx];
ekx3 -= x0*vdx_brick_a3[mz][my][mx];
eky3 -= x0*vdy_brick_a3[mz][my][mx];
ekz3 -= x0*vdz_brick_a3[mz][my][mx];
ekx4 -= x0*vdx_brick_a4[mz][my][mx];
eky4 -= x0*vdy_brick_a4[mz][my][mx];
ekz4 -= x0*vdz_brick_a4[mz][my][mx];
ekx5 -= x0*vdx_brick_a5[mz][my][mx];
eky5 -= x0*vdy_brick_a5[mz][my][mx];
ekz5 -= x0*vdz_brick_a5[mz][my][mx];
ekx6 -= x0*vdx_brick_a6[mz][my][mx];
eky6 -= x0*vdy_brick_a6[mz][my][mx];
ekz6 -= x0*vdz_brick_a6[mz][my][mx];
}
}
}
// convert D-field to force
type = atom->type[i];
lj0 = B[7*type+6];
lj1 = B[7*type+5];
lj2 = B[7*type+4];
lj3 = B[7*type+3];
lj4 = B[7*type+2];
lj5 = B[7*type+1];
lj6 = B[7*type];
f[i][0] += lj0*ekx0 + lj1*ekx1 + lj2*ekx2 + lj3*ekx3 + lj4*ekx4 + lj5*ekx5 + lj6*ekx6;
f[i][1] += lj0*eky0 + lj1*eky1 + lj2*eky2 + lj3*eky3 + lj4*eky4 + lj5*eky5 + lj6*eky6;
f[i][2] += lj0*ekz0 + lj1*ekz1 + lj2*ekz2 + lj3*ekz3 + lj4*ekz4 + lj5*ekz5 + lj6*ekz6;
}
}
}
}
/* ----------------------------------------------------------------------
interpolate from grid to get dispersion field & force on my particles
for ad scheme and arithmetic mixing rule
------------------------------------------------------------------------- */
void PPPMDispTIP4POMP::fieldforce_a_ad()
{
const int nlocal = atom->nlocal;
// no local atoms => nothing to do
if (nlocal == 0) return;
// loop over my charges, interpolate electric field from nearby grid points
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
// (dx,dy,dz) = distance to "lower left" grid pt
// (mx,my,mz) = global coords of moving stencil pt
// ek = 3 components of E-field on particle
const double * const * const x = atom->x;
double *prd;
if (triclinic == 0) prd = domain->prd;
else prd = domain->prd_lamda;
double xprd = prd[0];
double yprd = prd[1];
double zprd = prd[2];
double zprd_slab = zprd*slab_volfactor;
const double hx_inv = nx_pppm_6/xprd;
const double hy_inv = ny_pppm_6/yprd;
const double hz_inv = nz_pppm_6/zprd_slab;
#if defined(_OPENMP)
#pragma omp parallel default(none)
#endif
{
#if defined(_OPENMP)
// each thread works on a fixed chunk of atoms.
const int tid = omp_get_thread_num();
const int inum = nlocal;
const int idelta = 1 + inum/comm->nthreads;
const int ifrom = tid*idelta;
const int ito = ((ifrom + idelta) > inum) ? inum : ifrom + idelta;
#else
const int ifrom = 0;
const int ito = nlocal;
const int tid = 0;
#endif
ThrData *thr = fix->get_thr(tid);
double * const * const f = thr->get_f();
FFT_SCALAR * const * const r1d = static_cast<FFT_SCALAR **>(thr->get_rho1d_6());
FFT_SCALAR * const * const dr1d = static_cast<FFT_SCALAR **>(thr->get_drho1d_6());
int l,m,n,nx,ny,nz,mx,my,mz;
FFT_SCALAR dx,dy,dz,x0,y0,z0;
FFT_SCALAR ekx0, eky0, ekz0, ekx1, eky1, ekz1, ekx2, eky2, ekz2;
FFT_SCALAR ekx3, eky3, ekz3, ekx4, eky4, ekz4, ekx5, eky5, ekz5;
FFT_SCALAR ekx6, eky6, ekz6;
int type;
double lj0,lj1,lj2,lj3,lj4,lj5,lj6;
double sf = 0.0;
double s1,s2,s3;
// this if protects against having more threads than local atoms
if (ifrom < nlocal) {
for (int i = ifrom; i < ito; i++) {
nx = part2grid_6[i][0];
ny = part2grid_6[i][1];
nz = part2grid_6[i][2];
dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6;
dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6;
dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6;
compute_rho1d_thr(r1d,dx,dy,dz, order_6, rho_coeff_6);
compute_drho1d_thr(dr1d,dx,dy,dz, order_6, drho_coeff_6);
ekx0 = eky0 = ekz0 = ZEROF;
ekx1 = eky1 = ekz1 = ZEROF;
ekx2 = eky2 = ekz2 = ZEROF;
ekx3 = eky3 = ekz3 = ZEROF;
ekx4 = eky4 = ekz4 = ZEROF;
ekx5 = eky5 = ekz5 = ZEROF;
ekx6 = eky6 = ekz6 = ZEROF;
for (n = nlower_6; n <= nupper_6; n++) {
mz = n+nz;
for (m = nlower_6; m <= nupper_6; m++) {
my = m+ny;
for (l = nlower_6; l <= nupper_6; l++) {
mx = l+nx;
x0 = dr1d[0][l]*r1d[1][m]*r1d[2][n];
y0 = r1d[0][l]*dr1d[1][m]*r1d[2][n];
z0 = r1d[0][l]*r1d[1][m]*dr1d[2][n];
ekx0 += x0*u_brick_a0[mz][my][mx];
eky0 += y0*u_brick_a0[mz][my][mx];
ekz0 += z0*u_brick_a0[mz][my][mx];
ekx1 += x0*u_brick_a1[mz][my][mx];
eky1 += y0*u_brick_a1[mz][my][mx];
ekz1 += z0*u_brick_a1[mz][my][mx];
ekx2 += x0*u_brick_a2[mz][my][mx];
eky2 += y0*u_brick_a2[mz][my][mx];
ekz2 += z0*u_brick_a2[mz][my][mx];
ekx3 += x0*u_brick_a3[mz][my][mx];
eky3 += y0*u_brick_a3[mz][my][mx];
ekz3 += z0*u_brick_a3[mz][my][mx];
ekx4 += x0*u_brick_a4[mz][my][mx];
eky4 += y0*u_brick_a4[mz][my][mx];
ekz4 += z0*u_brick_a4[mz][my][mx];
ekx5 += x0*u_brick_a5[mz][my][mx];
eky5 += y0*u_brick_a5[mz][my][mx];
ekz5 += z0*u_brick_a5[mz][my][mx];
ekx6 += x0*u_brick_a6[mz][my][mx];
eky6 += y0*u_brick_a6[mz][my][mx];
ekz6 += z0*u_brick_a6[mz][my][mx];
}
}
}
ekx0 *= hx_inv;
eky0 *= hy_inv;
ekz0 *= hz_inv;
ekx1 *= hx_inv;
eky1 *= hy_inv;
ekz1 *= hz_inv;
ekx2 *= hx_inv;
eky2 *= hy_inv;
ekz2 *= hz_inv;
ekx3 *= hx_inv;
eky3 *= hy_inv;
ekz3 *= hz_inv;
ekx4 *= hx_inv;
eky4 *= hy_inv;
ekz4 *= hz_inv;
ekx5 *= hx_inv;
eky5 *= hy_inv;
ekz5 *= hz_inv;
ekx6 *= hx_inv;
eky6 *= hy_inv;
ekz6 *= hz_inv;
// convert D-field to force
type = atom->type[i];
lj0 = B[7*type+6];
lj1 = B[7*type+5];
lj2 = B[7*type+4];
lj3 = B[7*type+3];
lj4 = B[7*type+2];
lj5 = B[7*type+1];
lj6 = B[7*type];
s1 = x[i][0]*hx_inv;
s2 = x[i][1]*hy_inv;
s3 = x[i][2]*hz_inv;
sf = sf_coeff_6[0]*sin(2*MY_PI*s1);
sf += sf_coeff_6[1]*sin(4*MY_PI*s1);
sf *= 4*lj0*lj6 + 4*lj1*lj5 + 4*lj2*lj4 + 2*lj3*lj3;
f[i][0] += lj0*ekx0 + lj1*ekx1 + lj2*ekx2 + lj3*ekx3 + lj4*ekx4 + lj5*ekx5 + lj6*ekx6 - sf;
sf = sf_coeff_6[2]*sin(2*MY_PI*s2);
sf += sf_coeff_6[3]*sin(4*MY_PI*s2);
sf *= 4*lj0*lj6 + 4*lj1*lj5 + 4*lj2*lj4 + 2*lj3*lj3;
f[i][1] += lj0*eky0 + lj1*eky1 + lj2*eky2 + lj3*eky3 + lj4*eky4 + lj5*eky5 + lj6*eky6 - sf;
sf = sf_coeff_6[4]*sin(2*MY_PI*s3);
sf += sf_coeff_6[5]*sin(4*MY_PI*s3);
sf *= 4*lj0*lj6 + 4*lj1*lj5 + 4*lj2*lj4 + 2*lj3*lj3;
if (slabflag != 2) f[i][2] += lj0*ekz0 + lj1*ekz1 + lj2*ekz2 + lj3*ekz3 + lj4*ekz4 + lj5*ekz5 + lj6*ekz6 - sf;
}
}
}
}
/* ----------------------------------------------------------------------
interpolate from grid to get per-atom energy/virial for dispersion
interaction and arithmetic mixing rule
------------------------------------------------------------------------- */
void PPPMDispTIP4POMP::fieldforce_a_peratom()
{
const int nlocal = atom->nlocal;
// no local atoms => nothing to do
if (nlocal == 0) return;
// loop over my charges, interpolate from nearby grid points
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
// (dx,dy,dz) = distance to "lower left" grid pt
// (mx,my,mz) = global coords of moving stencil pt
const double * const * const x = atom->x;
#if defined(_OPENMP)
#pragma omp parallel default(none)
#endif
{
#if defined(_OPENMP)
// each thread works on a fixed chunk of atoms.
const int tid = omp_get_thread_num();
const int inum = nlocal;
const int idelta = 1 + inum/comm->nthreads;
const int ifrom = tid*idelta;
const int ito = ((ifrom + idelta) > inum) ? inum : ifrom + idelta;
#else
const int ifrom = 0;
const int ito = nlocal;
const int tid = 0;
#endif
ThrData *thr = fix->get_thr(tid);
FFT_SCALAR * const * const r1d = static_cast<FFT_SCALAR **>(thr->get_rho1d_6());
int i,l,m,n,nx,ny,nz,mx,my,mz;
FFT_SCALAR dx,dy,dz,x0,y0,z0;
FFT_SCALAR u0,v00,v10,v20,v30,v40,v50;
FFT_SCALAR u1,v01,v11,v21,v31,v41,v51;
FFT_SCALAR u2,v02,v12,v22,v32,v42,v52;
FFT_SCALAR u3,v03,v13,v23,v33,v43,v53;
FFT_SCALAR u4,v04,v14,v24,v34,v44,v54;
FFT_SCALAR u5,v05,v15,v25,v35,v45,v55;
FFT_SCALAR u6,v06,v16,v26,v36,v46,v56;
int type;
double lj0,lj1,lj2,lj3,lj4,lj5,lj6;
// this if protects against having more threads than local atoms
if (ifrom < nlocal) {
for (i = ifrom; i < ito; i++) {
nx = part2grid_6[i][0];
ny = part2grid_6[i][1];
nz = part2grid_6[i][2];
dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6;
dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6;
dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6;
compute_rho1d_thr(r1d,dx,dy,dz, order_6, rho_coeff_6);
u0 = v00 = v10 = v20 = v30 = v40 = v50 = ZEROF;
u1 = v01 = v11 = v21 = v31 = v41 = v51 = ZEROF;
u2 = v02 = v12 = v22 = v32 = v42 = v52 = ZEROF;
u3 = v03 = v13 = v23 = v33 = v43 = v53 = ZEROF;
u4 = v04 = v14 = v24 = v34 = v44 = v54 = ZEROF;
u5 = v05 = v15 = v25 = v35 = v45 = v55 = ZEROF;
u6 = v06 = v16 = v26 = v36 = v46 = v56 = ZEROF;
for (n = nlower_6; n <= nupper_6; n++) {
mz = n+nz;
z0 = r1d[2][n];
for (m = nlower_6; m <= nupper_6; m++) {
my = m+ny;
y0 = z0*r1d[1][m];
for (l = nlower_6; l <= nupper_6; l++) {
mx = l+nx;
x0 = y0*r1d[0][l];
if (eflag_atom) {
u0 += x0*u_brick_a0[mz][my][mx];
u1 += x0*u_brick_a1[mz][my][mx];
u2 += x0*u_brick_a2[mz][my][mx];
u3 += x0*u_brick_a3[mz][my][mx];
u4 += x0*u_brick_a4[mz][my][mx];
u5 += x0*u_brick_a5[mz][my][mx];
u6 += x0*u_brick_a6[mz][my][mx];
}
if (vflag_atom) {
v00 += x0*v0_brick_a0[mz][my][mx];
v10 += x0*v1_brick_a0[mz][my][mx];
v20 += x0*v2_brick_a0[mz][my][mx];
v30 += x0*v3_brick_a0[mz][my][mx];
v40 += x0*v4_brick_a0[mz][my][mx];
v50 += x0*v5_brick_a0[mz][my][mx];
v01 += x0*v0_brick_a1[mz][my][mx];
v11 += x0*v1_brick_a1[mz][my][mx];
v21 += x0*v2_brick_a1[mz][my][mx];
v31 += x0*v3_brick_a1[mz][my][mx];
v41 += x0*v4_brick_a1[mz][my][mx];
v51 += x0*v5_brick_a1[mz][my][mx];
v02 += x0*v0_brick_a2[mz][my][mx];
v12 += x0*v1_brick_a2[mz][my][mx];
v22 += x0*v2_brick_a2[mz][my][mx];
v32 += x0*v3_brick_a2[mz][my][mx];
v42 += x0*v4_brick_a2[mz][my][mx];
v52 += x0*v5_brick_a2[mz][my][mx];
v03 += x0*v0_brick_a3[mz][my][mx];
v13 += x0*v1_brick_a3[mz][my][mx];
v23 += x0*v2_brick_a3[mz][my][mx];
v33 += x0*v3_brick_a3[mz][my][mx];
v43 += x0*v4_brick_a3[mz][my][mx];
v53 += x0*v5_brick_a3[mz][my][mx];
v04 += x0*v0_brick_a4[mz][my][mx];
v14 += x0*v1_brick_a4[mz][my][mx];
v24 += x0*v2_brick_a4[mz][my][mx];
v34 += x0*v3_brick_a4[mz][my][mx];
v44 += x0*v4_brick_a4[mz][my][mx];
v54 += x0*v5_brick_a4[mz][my][mx];
v05 += x0*v0_brick_a5[mz][my][mx];
v15 += x0*v1_brick_a5[mz][my][mx];
v25 += x0*v2_brick_a5[mz][my][mx];
v35 += x0*v3_brick_a5[mz][my][mx];
v45 += x0*v4_brick_a5[mz][my][mx];
v55 += x0*v5_brick_a5[mz][my][mx];
v06 += x0*v0_brick_a6[mz][my][mx];
v16 += x0*v1_brick_a6[mz][my][mx];
v26 += x0*v2_brick_a6[mz][my][mx];
v36 += x0*v3_brick_a6[mz][my][mx];
v46 += x0*v4_brick_a6[mz][my][mx];
v56 += x0*v5_brick_a6[mz][my][mx];
}
}
}
}
// convert D-field to force
type = atom->type[i];
lj0 = B[7*type+6]*0.5;
lj1 = B[7*type+5]*0.5;
lj2 = B[7*type+4]*0.5;
lj3 = B[7*type+3]*0.5;
lj4 = B[7*type+2]*0.5;
lj5 = B[7*type+1]*0.5;
lj6 = B[7*type]*0.5;
if (eflag_atom)
eatom[i] += u0*lj0 + u1*lj1 + u2*lj2 +
u3*lj3 + u4*lj4 + u5*lj5 + u6*lj6;
if (vflag_atom) {
vatom[i][0] += v00*lj0 + v01*lj1 + v02*lj2 + v03*lj3 +
v04*lj4 + v05*lj5 + v06*lj6;
vatom[i][1] += v10*lj0 + v11*lj1 + v12*lj2 + v13*lj3 +
v14*lj4 + v15*lj5 + v16*lj6;
vatom[i][2] += v20*lj0 + v21*lj1 + v22*lj2 + v23*lj3 +
v24*lj4 + v25*lj5 + v26*lj6;
vatom[i][3] += v30*lj0 + v31*lj1 + v32*lj2 + v33*lj3 +
v34*lj4 + v35*lj5 + v36*lj6;
vatom[i][4] += v40*lj0 + v41*lj1 + v42*lj2 + v43*lj3 +
v44*lj4 + v45*lj5 + v46*lj6;
vatom[i][5] += v50*lj0 + v51*lj1 + v52*lj2 + v53*lj3 +
v54*lj4 + v55*lj5 + v56*lj6;
}
}
}
}
}
/* ----------------------------------------------------------------------
charge assignment into rho1d
dx,dy,dz = distance of particle from "lower left" grid point
------------------------------------------------------------------------- */
void PPPMDispTIP4POMP::compute_rho1d_thr(FFT_SCALAR * const * const r1d, const FFT_SCALAR &dx,
const FFT_SCALAR &dy, const FFT_SCALAR &dz,
const int ord, FFT_SCALAR * const * const rho_c)
{
int k,l;
FFT_SCALAR r1,r2,r3;
for (k = (1-ord)/2; k <= ord/2; k++) {
r1 = r2 = r3 = ZEROF;
for (l = ord-1; l >= 0; l--) {
r1 = rho_c[l][k] + r1*dx;
r2 = rho_c[l][k] + r2*dy;
r3 = rho_c[l][k] + r3*dz;
}
r1d[0][k] = r1;
r1d[1][k] = r2;
r1d[2][k] = r3;
}
}
/* ----------------------------------------------------------------------
charge assignment into drho1d
dx,dy,dz = distance of particle from "lower left" grid point
------------------------------------------------------------------------- */
void PPPMDispTIP4POMP::compute_drho1d_thr(FFT_SCALAR * const * const dr1d, const FFT_SCALAR &dx,
const FFT_SCALAR &dy, const FFT_SCALAR &dz,
const int ord, FFT_SCALAR * const * const drho_c)
{
int k,l;
FFT_SCALAR r1,r2,r3;
for (k = (1-ord)/2; k <= ord/2; k++) {
r1 = r2 = r3 = ZEROF;
for (l = ord-2; l >= 0; l--) {
r1 = drho_c[l][k] + r1*dx;
r2 = drho_c[l][k] + r2*dy;
r3 = drho_c[l][k] + r3*dz;
}
dr1d[0][k] = r1;
dr1d[1][k] = r2;
dr1d[2][k] = r3;
}
}
/* ----------------------------------------------------------------------
find 2 H atoms bonded to O atom i
compute position xM of fictitious charge site for O atom
also return local indices iH1,iH2 of H atoms
------------------------------------------------------------------------- */
void PPPMDispTIP4POMP::find_M_thr(int i, int &iH1, int &iH2, dbl3_t &xM)
{
iH1 = atom->map(atom->tag[i] + 1);
iH2 = atom->map(atom->tag[i] + 2);
if (iH1 == -1 || iH2 == -1) error->one(FLERR,"TIP4P hydrogen is missing");
if (atom->type[iH1] != typeH || atom->type[iH2] != typeH)
error->one(FLERR,"TIP4P hydrogen has incorrect atom type");
const dbl3_t * _noalias const x = (dbl3_t *) atom->x[0];
double delx1 = x[iH1].x - x[i].x;
double dely1 = x[iH1].y - x[i].y;
double delz1 = x[iH1].z - x[i].z;
domain->minimum_image(delx1,dely1,delz1);
double delx2 = x[iH2].x - x[i].x;
double dely2 = x[iH2].y - x[i].y;
double delz2 = x[iH2].z - x[i].z;
domain->minimum_image(delx2,dely2,delz2);
xM.x = x[i].x + alpha * 0.5 * (delx1 + delx2);
xM.y = x[i].y + alpha * 0.5 * (dely1 + dely2);
xM.z = x[i].z + alpha * 0.5 * (delz1 + delz2);
}
diff --git a/src/USER-OMP/pppm_tip4p_omp.cpp b/src/USER-OMP/pppm_tip4p_omp.cpp
index b2e344036..21da81312 100644
--- a/src/USER-OMP/pppm_tip4p_omp.cpp
+++ b/src/USER-OMP/pppm_tip4p_omp.cpp
@@ -1,810 +1,810 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing author: Axel Kohlmeyer (Temple U)
------------------------------------------------------------------------- */
#include "pppm_tip4p_omp.h"
#include "atom.h"
#include "comm.h"
#include "domain.h"
#include "error.h"
#include "fix_omp.h"
#include "force.h"
#include "memory.h"
#include "math_const.h"
#include "math_special.h"
#include <string.h>
#include <math.h>
#include "suffix.h"
using namespace LAMMPS_NS;
using namespace MathConst;
using namespace MathSpecial;
#ifdef FFT_SINGLE
#define ZEROF 0.0f
#else
#define ZEROF 0.0
#endif
#define EPS_HOC 1.0e-7
#define OFFSET 16384
/* ---------------------------------------------------------------------- */
PPPMTIP4POMP::PPPMTIP4POMP(LAMMPS *lmp, int narg, char **arg) :
PPPMTIP4P(lmp, narg, arg), ThrOMP(lmp, THR_KSPACE)
{
triclinic_support = 0;
suffix_flag |= Suffix::OMP;
}
/* ----------------------------------------------------------------------
allocate memory that depends on # of K-vectors and order
------------------------------------------------------------------------- */
void PPPMTIP4POMP::allocate()
{
PPPMTIP4P::allocate();
#if defined(_OPENMP)
#pragma omp parallel default(none)
#endif
{
#if defined(_OPENMP)
const int tid = omp_get_thread_num();
#else
const int tid = 0;
#endif
ThrData *thr = fix->get_thr(tid);
thr->init_pppm(order,memory);
}
}
/* ----------------------------------------------------------------------
free memory that depends on # of K-vectors and order
------------------------------------------------------------------------- */
void PPPMTIP4POMP::deallocate()
{
PPPMTIP4P::deallocate();
#if defined(_OPENMP)
#pragma omp parallel default(none)
#endif
{
#if defined(_OPENMP)
const int tid = omp_get_thread_num();
#else
const int tid = 0;
#endif
ThrData *thr = fix->get_thr(tid);
thr->init_pppm(-order,memory);
}
}
/* ----------------------------------------------------------------------
pre-compute modified (Hockney-Eastwood) Coulomb Green's function
------------------------------------------------------------------------- */
void PPPMTIP4POMP::compute_gf_ik()
{
const double * const prd = (triclinic==0) ? domain->prd : domain->prd_lamda;
const double xprd = prd[0];
const double yprd = prd[1];
const double zprd = prd[2];
const double zprd_slab = zprd*slab_volfactor;
const double unitkx = (MY_2PI/xprd);
const double unitky = (MY_2PI/yprd);
const double unitkz = (MY_2PI/zprd_slab);
const int nbx = static_cast<int> ((g_ewald*xprd/(MY_PI*nx_pppm)) *
pow(-log(EPS_HOC),0.25));
const int nby = static_cast<int> ((g_ewald*yprd/(MY_PI*ny_pppm)) *
pow(-log(EPS_HOC),0.25));
const int nbz = static_cast<int> ((g_ewald*zprd_slab/(MY_PI*nz_pppm)) *
pow(-log(EPS_HOC),0.25));
const int numk = nxhi_fft - nxlo_fft + 1;
const int numl = nyhi_fft - nylo_fft + 1;
const int twoorder = 2*order;
#if defined(_OPENMP)
#pragma omp parallel default(none)
#endif
{
double snx,sny,snz;
double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz;
double sum1,dot1,dot2;
double numerator,denominator;
double sqk;
int k,l,m,nx,ny,nz,kper,lper,mper,n,nfrom,nto,tid;
loop_setup_thr(nfrom, nto, tid, nfft, comm->nthreads);
ThrData *thr = fix->get_thr(tid);
thr->timer(Timer::START);
for (n = nfrom; n < nto; ++n) {
m = n / (numl*numk);
l = (n - m*numl*numk) / numk;
k = n - m*numl*numk - l*numk;
m += nzlo_fft;
l += nylo_fft;
k += nxlo_fft;
mper = m - nz_pppm*(2*m/nz_pppm);
snz = square(sin(0.5*unitkz*mper*zprd_slab/nz_pppm));
lper = l - ny_pppm*(2*l/ny_pppm);
sny = square(sin(0.5*unitky*lper*yprd/ny_pppm));
kper = k - nx_pppm*(2*k/nx_pppm);
snx = square(sin(0.5*unitkx*kper*xprd/nx_pppm));
sqk = square(unitkx*kper) + square(unitky*lper) + square(unitkz*mper);
if (sqk != 0.0) {
numerator = 12.5663706/sqk;
denominator = gf_denom(snx,sny,snz);
sum1 = 0.0;
for (nx = -nbx; nx <= nbx; nx++) {
qx = unitkx*(kper+nx_pppm*nx);
sx = exp(-0.25*square(qx/g_ewald));
argx = 0.5*qx*xprd/nx_pppm;
wx = powsinxx(argx,twoorder);
for (ny = -nby; ny <= nby; ny++) {
qy = unitky*(lper+ny_pppm*ny);
sy = exp(-0.25*square(qy/g_ewald));
argy = 0.5*qy*yprd/ny_pppm;
wy = powsinxx(argy,twoorder);
for (nz = -nbz; nz <= nbz; nz++) {
qz = unitkz*(mper+nz_pppm*nz);
sz = exp(-0.25*square(qz/g_ewald));
argz = 0.5*qz*zprd_slab/nz_pppm;
wz = powsinxx(argz,twoorder);
dot1 = unitkx*kper*qx + unitky*lper*qy + unitkz*mper*qz;
dot2 = qx*qx+qy*qy+qz*qz;
sum1 += (dot1/dot2) * sx*sy*sz * wx*wy*wz;
}
}
}
greensfn[n] = numerator*sum1/denominator;
} else greensfn[n] = 0.0;
}
thr->timer(Timer::KSPACE);
} // end of parallel region
}
/* ----------------------------------------------------------------------
compute optimized Green's function for energy calculation
------------------------------------------------------------------------- */
void PPPMTIP4POMP::compute_gf_ad()
{
const double * const prd = (triclinic==0) ? domain->prd : domain->prd_lamda;
const double xprd = prd[0];
const double yprd = prd[1];
const double zprd = prd[2];
const double zprd_slab = zprd*slab_volfactor;
const double unitkx = (MY_2PI/xprd);
const double unitky = (MY_2PI/yprd);
const double unitkz = (MY_2PI/zprd_slab);
const int numk = nxhi_fft - nxlo_fft + 1;
const int numl = nyhi_fft - nylo_fft + 1;
const int twoorder = 2*order;
double sf0=0.0,sf1=0.0,sf2=0.0,sf3=0.0,sf4=0.0,sf5=0.0;
#if defined(_OPENMP)
#pragma omp parallel default(none) reduction(+:sf0,sf1,sf2,sf3,sf4,sf5)
#endif
{
double snx,sny,snz,sqk;
double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz;
double numerator,denominator;
int k,l,m,kper,lper,mper,n,nfrom,nto,tid;
loop_setup_thr(nfrom, nto, tid, nfft, comm->nthreads);
ThrData *thr = fix->get_thr(tid);
thr->timer(Timer::START);
for (n = nfrom; n < nto; ++n) {
m = n / (numl*numk);
l = (n - m*numl*numk) / numk;
k = n - m*numl*numk - l*numk;
m += nzlo_fft;
l += nylo_fft;
k += nxlo_fft;
mper = m - nz_pppm*(2*m/nz_pppm);
qz = unitkz*mper;
snz = square(sin(0.5*qz*zprd_slab/nz_pppm));
sz = exp(-0.25*square(qz/g_ewald));
argz = 0.5*qz*zprd_slab/nz_pppm;
wz = powsinxx(argz,twoorder);
lper = l - ny_pppm*(2*l/ny_pppm);
qy = unitky*lper;
sny = square(sin(0.5*qy*yprd/ny_pppm));
sy = exp(-0.25*square(qy/g_ewald));
argy = 0.5*qy*yprd/ny_pppm;
wy = powsinxx(argy,twoorder);
kper = k - nx_pppm*(2*k/nx_pppm);
qx = unitkx*kper;
snx = square(sin(0.5*qx*xprd/nx_pppm));
sx = exp(-0.25*square(qx/g_ewald));
argx = 0.5*qx*xprd/nx_pppm;
wx = powsinxx(argx,twoorder);
sqk = qx*qx + qy*qy + qz*qz;
if (sqk != 0.0) {
numerator = MY_4PI/sqk;
denominator = gf_denom(snx,sny,snz);
greensfn[n] = numerator*sx*sy*sz*wx*wy*wz/denominator;
sf0 += sf_precoeff1[n]*greensfn[n];
sf1 += sf_precoeff2[n]*greensfn[n];
sf2 += sf_precoeff3[n]*greensfn[n];
sf3 += sf_precoeff4[n]*greensfn[n];
sf4 += sf_precoeff5[n]*greensfn[n];
sf5 += sf_precoeff6[n]*greensfn[n];
} else {
greensfn[n] = 0.0;
sf0 += sf_precoeff1[n]*greensfn[n];
sf1 += sf_precoeff2[n]*greensfn[n];
sf2 += sf_precoeff3[n]*greensfn[n];
sf3 += sf_precoeff4[n]*greensfn[n];
sf4 += sf_precoeff5[n]*greensfn[n];
sf5 += sf_precoeff6[n]*greensfn[n];
}
}
thr->timer(Timer::KSPACE);
} // end of paralle region
// compute the coefficients for the self-force correction
double prex, prey, prez, tmp[6];
prex = prey = prez = MY_PI/volume;
prex *= nx_pppm/xprd;
prey *= ny_pppm/yprd;
prez *= nz_pppm/zprd_slab;
tmp[0] = sf0 * prex;
tmp[1] = sf1 * prex*2;
tmp[2] = sf2 * prey;
tmp[3] = sf3 * prey*2;
tmp[4] = sf4 * prez;
tmp[5] = sf5 * prez*2;
// communicate values with other procs
MPI_Allreduce(tmp,sf_coeff,6,MPI_DOUBLE,MPI_SUM,world);
}
/* ----------------------------------------------------------------------
run the regular toplevel compute method from plain PPPM
which will have individual methods replaced by our threaded
versions and then call the obligatory force reduction.
------------------------------------------------------------------------- */
void PPPMTIP4POMP::compute(int eflag, int vflag)
{
PPPMTIP4P::compute(eflag,vflag);
#if defined(_OPENMP)
#pragma omp parallel default(none) shared(eflag,vflag)
#endif
{
#if defined(_OPENMP)
const int tid = omp_get_thread_num();
#else
const int tid = 0;
#endif
ThrData *thr = fix->get_thr(tid);
thr->timer(Timer::START);
reduce_thr(this, eflag, vflag, thr);
} // end of omp parallel region
}
/* ----------------------------------------------------------------------
find center grid pt for each of my particles
check that full stencil for the particle will fit in my 3d brick
store central grid pt indices in part2grid array
------------------------------------------------------------------------- */
void PPPMTIP4POMP::particle_map()
{
// no local atoms => nothing to do
if (atom->nlocal == 0) return;
const int * _noalias const type = atom->type;
const dbl3_t * _noalias const x = (dbl3_t *) atom->x[0];
int3_t * _noalias const p2g = (int3_t *) part2grid[0];
const double boxlox = boxlo[0];
const double boxloy = boxlo[1];
const double boxloz = boxlo[2];
const int nlocal = atom->nlocal;
- if (!isfinite(boxlo[0]) || !isfinite(boxlo[1]) || !isfinite(boxlo[2]))
+ if (!ISFINITE(boxlo[0]) || !ISFINITE(boxlo[1]) || !ISFINITE(boxlo[2]))
error->one(FLERR,"Non-numeric box dimensions - simulation unstable");
int i, flag = 0;
#if defined(_OPENMP)
#pragma omp parallel for private(i) default(none) reduction(+:flag) schedule(static)
#endif
for (i = 0; i < nlocal; i++) {
dbl3_t xM;
int iH1,iH2;
if (type[i] == typeO) {
find_M_thr(i,iH1,iH2,xM);
} else {
xM = x[i];
}
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
// current particle coord can be outside global and local box
// add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1
const int nx = static_cast<int> ((xM.x-boxlox)*delxinv+shift) - OFFSET;
const int ny = static_cast<int> ((xM.y-boxloy)*delyinv+shift) - OFFSET;
const int nz = static_cast<int> ((xM.z-boxloz)*delzinv+shift) - OFFSET;
p2g[i].a = nx;
p2g[i].b = ny;
p2g[i].t = nz;
// check that entire stencil around nx,ny,nz will fit in my 3d brick
if (nx+nlower < nxlo_out || nx+nupper > nxhi_out ||
ny+nlower < nylo_out || ny+nupper > nyhi_out ||
nz+nlower < nzlo_out || nz+nupper > nzhi_out)
flag++;
}
int flag_all;
MPI_Allreduce(&flag,&flag_all,1,MPI_INT,MPI_SUM,world);
if (flag_all) error->all(FLERR,"Out of range atoms - cannot compute PPPM");
}
/* ----------------------------------------------------------------------
create discretized "density" on section of global grid due to my particles
density(x,y,z) = charge "density" at grid points of my 3d brick
(nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts)
in global grid
------------------------------------------------------------------------- */
void PPPMTIP4POMP::make_rho()
{
// clear 3d density array
FFT_SCALAR * _noalias const d = &(density_brick[nzlo_out][nylo_out][nxlo_out]);
memset(d,0,ngrid*sizeof(FFT_SCALAR));
// no local atoms => nothing else to do
const int nlocal = atom->nlocal;
if (nlocal == 0) return;
const int ix = nxhi_out - nxlo_out + 1;
const int iy = nyhi_out - nylo_out + 1;
#if defined(_OPENMP)
#pragma omp parallel default(none)
#endif
{
const double * _noalias const q = atom->q;
const dbl3_t * _noalias const x = (dbl3_t *) atom->x[0];
const int3_t * _noalias const p2g = (int3_t *) part2grid[0];
const int * _noalias const type = atom->type;
dbl3_t xM;
const double boxlox = boxlo[0];
const double boxloy = boxlo[1];
const double boxloz = boxlo[2];
// determine range of grid points handled by this thread
int i,jfrom,jto,tid,iH1,iH2;
loop_setup_thr(jfrom,jto,tid,ngrid,comm->nthreads);
// get per thread data
ThrData *thr = fix->get_thr(tid);
thr->timer(Timer::START);
FFT_SCALAR * const * const r1d = static_cast<FFT_SCALAR **>(thr->get_rho1d());
// loop over my charges, add their contribution to nearby grid points
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
// (dx,dy,dz) = distance to "lower left" grid pt
// loop over all local atoms for all threads
for (i = 0; i < nlocal; i++) {
const int nx = p2g[i].a;
const int ny = p2g[i].b;
const int nz = p2g[i].t;
// pre-screen whether this atom will ever come within
// reach of the data segement this thread is updating.
if ( ((nz+nlower-nzlo_out)*ix*iy >= jto)
|| ((nz+nupper-nzlo_out+1)*ix*iy < jfrom) ) continue;
if (type[i] == typeO) {
find_M_thr(i,iH1,iH2,xM);
} else {
xM = x[i];
}
const FFT_SCALAR dx = nx+shiftone - (xM.x-boxlox)*delxinv;
const FFT_SCALAR dy = ny+shiftone - (xM.y-boxloy)*delyinv;
const FFT_SCALAR dz = nz+shiftone - (xM.z-boxloz)*delzinv;
compute_rho1d_thr(r1d,dx,dy,dz);
const FFT_SCALAR z0 = delvolinv * q[i];
for (int n = nlower; n <= nupper; ++n) {
const int jn = (nz+n-nzlo_out)*ix*iy;
const FFT_SCALAR y0 = z0*r1d[2][n];
for (int m = nlower; m <= nupper; ++m) {
const int jm = jn+(ny+m-nylo_out)*ix;
const FFT_SCALAR x0 = y0*r1d[1][m];
for (int l = nlower; l <= nupper; ++l) {
const int jl = jm+nx+l-nxlo_out;
// make sure each thread only updates
// "his" elements of the density grid
if (jl >= jto) break;
if (jl < jfrom) continue;
d[jl] += x0*r1d[0][l];
}
}
}
}
thr->timer(Timer::KSPACE);
}
}
/* ----------------------------------------------------------------------
interpolate from grid to get electric field & force on my particles for ik
------------------------------------------------------------------------- */
void PPPMTIP4POMP::fieldforce_ik()
{
const int nthreads = comm->nthreads;
const int nlocal = atom->nlocal;
// no local atoms => nothing to do
if (nlocal == 0) return;
// loop over my charges, interpolate electric field from nearby grid points
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
// (dx,dy,dz) = distance to "lower left" grid pt
// (mx,my,mz) = global coords of moving stencil pt
// ek = 3 components of E-field on particle
const dbl3_t * _noalias const x = (dbl3_t *) atom->x[0];
const double * _noalias const q = atom->q;
const int3_t * _noalias const p2g = (int3_t *) part2grid[0];
const int * _noalias const type = atom->type;
const double qqrd2e = force->qqrd2e;
const double boxlox = boxlo[0];
const double boxloy = boxlo[1];
const double boxloz = boxlo[2];
#if defined(_OPENMP)
#pragma omp parallel default(none)
#endif
{
dbl3_t xM;
FFT_SCALAR x0,y0,z0,ekx,eky,ekz;
int i,ifrom,ito,tid,iH1,iH2,l,m,n,mx,my,mz;
loop_setup_thr(ifrom,ito,tid,nlocal,nthreads);
// get per thread data
ThrData *thr = fix->get_thr(tid);
thr->timer(Timer::START);
dbl3_t * _noalias const f = (dbl3_t *) thr->get_f()[0];
FFT_SCALAR * const * const r1d = static_cast<FFT_SCALAR **>(thr->get_rho1d());
for (i = ifrom; i < ito; ++i) {
if (type[i] == typeO) {
find_M_thr(i,iH1,iH2,xM);
} else xM = x[i];
const int nx = p2g[i].a;
const int ny = p2g[i].b;
const int nz = p2g[i].t;
const FFT_SCALAR dx = nx+shiftone - (xM.x-boxlox)*delxinv;
const FFT_SCALAR dy = ny+shiftone - (xM.y-boxloy)*delyinv;
const FFT_SCALAR dz = nz+shiftone - (xM.z-boxloz)*delzinv;
compute_rho1d_thr(r1d,dx,dy,dz);
ekx = eky = ekz = ZEROF;
for (n = nlower; n <= nupper; n++) {
mz = n+nz;
z0 = r1d[2][n];
for (m = nlower; m <= nupper; m++) {
my = m+ny;
y0 = z0*r1d[1][m];
for (l = nlower; l <= nupper; l++) {
mx = l+nx;
x0 = y0*r1d[0][l];
ekx -= x0*vdx_brick[mz][my][mx];
eky -= x0*vdy_brick[mz][my][mx];
ekz -= x0*vdz_brick[mz][my][mx];
}
}
}
// convert E-field to force
const double qfactor = qqrd2e * scale * q[i];
if (type[i] != typeO) {
f[i].x += qfactor*ekx;
f[i].y += qfactor*eky;
if (slabflag != 2) f[i].z += qfactor*ekz;
} else {
const double fx = qfactor * ekx;
const double fy = qfactor * eky;
const double fz = qfactor * ekz;
f[i].x += fx*(1 - alpha);
f[i].y += fy*(1 - alpha);
if (slabflag != 2) f[i].z += fz*(1 - alpha);
f[iH1].x += 0.5*alpha*fx;
f[iH1].y += 0.5*alpha*fy;
if (slabflag != 2) f[iH1].z += 0.5*alpha*fz;
f[iH2].x += 0.5*alpha*fx;
f[iH2].y += 0.5*alpha*fy;
if (slabflag != 2) f[iH2].z += 0.5*alpha*fz;
}
}
thr->timer(Timer::KSPACE);
} // end of parallel region
}
/* ----------------------------------------------------------------------
interpolate from grid to get electric field & force on my particles for ad
------------------------------------------------------------------------- */
void PPPMTIP4POMP::fieldforce_ad()
{
const int nthreads = comm->nthreads;
const int nlocal = atom->nlocal;
// no local atoms => nothing to do
if (nlocal == 0) return;
const double *prd = (triclinic == 0) ? domain->prd : domain->prd_lamda;
const double hx_inv = nx_pppm/prd[0];
const double hy_inv = ny_pppm/prd[1];
const double hz_inv = nz_pppm/prd[2];
// loop over my charges, interpolate electric field from nearby grid points
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
// (dx,dy,dz) = distance to "lower left" grid pt
// (mx,my,mz) = global coords of moving stencil pt
// ek = 3 components of E-field on particle
const dbl3_t * _noalias const x = (dbl3_t *) atom->x[0];
const double * _noalias const q = atom->q;
const int3_t * _noalias const p2g = (int3_t *) part2grid[0];
const int * _noalias const type = atom->type;
const double qqrd2e = force->qqrd2e;
const double boxlox = boxlo[0];
const double boxloy = boxlo[1];
const double boxloz = boxlo[2];
#if defined(_OPENMP)
#pragma omp parallel default(none)
#endif
{
double s1,s2,s3,sf;
dbl3_t xM;
FFT_SCALAR ekx,eky,ekz;
int i,ifrom,ito,tid,iH1,iH2,l,m,n,mx,my,mz;
loop_setup_thr(ifrom,ito,tid,nlocal,nthreads);
// get per thread data
ThrData *thr = fix->get_thr(tid);
thr->timer(Timer::START);
dbl3_t * _noalias const f = (dbl3_t *) thr->get_f()[0];
FFT_SCALAR * const * const r1d = static_cast<FFT_SCALAR **>(thr->get_rho1d());
FFT_SCALAR * const * const d1d = static_cast<FFT_SCALAR **>(thr->get_drho1d());
for (i = ifrom; i < ito; ++i) {
if (type[i] == typeO) {
find_M_thr(i,iH1,iH2,xM);
} else xM = x[i];
const int nx = p2g[i].a;
const int ny = p2g[i].b;
const int nz = p2g[i].t;
const FFT_SCALAR dx = nx+shiftone - (xM.x-boxlox)*delxinv;
const FFT_SCALAR dy = ny+shiftone - (xM.y-boxloy)*delyinv;
const FFT_SCALAR dz = nz+shiftone - (xM.z-boxloz)*delzinv;
compute_rho1d_thr(r1d,dx,dy,dz);
compute_drho1d_thr(d1d,dx,dy,dz);
ekx = eky = ekz = ZEROF;
for (n = nlower; n <= nupper; n++) {
mz = n+nz;
for (m = nlower; m <= nupper; m++) {
my = m+ny;
for (l = nlower; l <= nupper; l++) {
mx = l+nx;
ekx += d1d[0][l]*r1d[1][m]*r1d[2][n]*u_brick[mz][my][mx];
eky += r1d[0][l]*d1d[1][m]*r1d[2][n]*u_brick[mz][my][mx];
ekz += r1d[0][l]*r1d[1][m]*d1d[2][n]*u_brick[mz][my][mx];
}
}
}
ekx *= hx_inv;
eky *= hy_inv;
ekz *= hz_inv;
// convert E-field to force and substract self forces
const double qi = q[i];
const double qfactor = qqrd2e * scale * qi;
s1 = x[i].x*hx_inv;
sf = sf_coeff[0]*sin(MY_2PI*s1);
sf += sf_coeff[1]*sin(MY_4PI*s1);
sf *= 2.0*qi;
const double fx = qfactor*(ekx - sf);
s2 = x[i].y*hy_inv;
sf = sf_coeff[2]*sin(MY_2PI*s2);
sf += sf_coeff[3]*sin(MY_4PI*s2);
sf *= 2.0*qi;
const double fy = qfactor*(eky - sf);
s3 = x[i].z*hz_inv;
sf = sf_coeff[4]*sin(MY_2PI*s3);
sf += sf_coeff[5]*sin(MY_4PI*s3);
sf *= 2.0*qi;
const double fz = qfactor*(ekz - sf);
if (type[i] != typeO) {
f[i].x += fx;
f[i].y += fy;
if (slabflag != 2) f[i].z += fz;
} else {
f[i].x += fx*(1 - alpha);
f[i].y += fy*(1 - alpha);
if (slabflag != 2) f[i].z += fz*(1 - alpha);
f[iH1].x += 0.5*alpha*fx;
f[iH1].y += 0.5*alpha*fy;
if (slabflag != 2) f[iH1].z += 0.5*alpha*fz;
f[iH2].x += 0.5*alpha*fx;
f[iH2].y += 0.5*alpha*fy;
if (slabflag != 2) f[iH2].z += 0.5*alpha*fz;
}
}
thr->timer(Timer::KSPACE);
} // end of parallel region
}
/* ----------------------------------------------------------------------
find 2 H atoms bonded to O atom i
compute position xM of fictitious charge site for O atom
also return local indices iH1,iH2 of H atoms
------------------------------------------------------------------------- */
void PPPMTIP4POMP::find_M_thr(int i, int &iH1, int &iH2, dbl3_t &xM)
{
iH1 = atom->map(atom->tag[i] + 1);
iH2 = atom->map(atom->tag[i] + 2);
if (iH1 == -1 || iH2 == -1) error->one(FLERR,"TIP4P hydrogen is missing");
if (atom->type[iH1] != typeH || atom->type[iH2] != typeH)
error->one(FLERR,"TIP4P hydrogen has incorrect atom type");
const dbl3_t * _noalias const x = (dbl3_t *) atom->x[0];
double delx1 = x[iH1].x - x[i].x;
double dely1 = x[iH1].y - x[i].y;
double delz1 = x[iH1].z - x[i].z;
domain->minimum_image(delx1,dely1,delz1);
double delx2 = x[iH2].x - x[i].x;
double dely2 = x[iH2].y - x[i].y;
double delz2 = x[iH2].z - x[i].z;
domain->minimum_image(delx2,dely2,delz2);
xM.x = x[i].x + alpha * 0.5 * (delx1 + delx2);
xM.y = x[i].y + alpha * 0.5 * (dely1 + dely2);
xM.z = x[i].z + alpha * 0.5 * (delz1 + delz2);
}
/* ----------------------------------------------------------------------
charge assignment into rho1d
dx,dy,dz = distance of particle from "lower left" grid point
------------------------------------------------------------------------- */
void PPPMTIP4POMP::compute_rho1d_thr(FFT_SCALAR * const * const r1d, const FFT_SCALAR &dx,
const FFT_SCALAR &dy, const FFT_SCALAR &dz)
{
int k,l;
FFT_SCALAR r1,r2,r3;
for (k = (1-order)/2; k <= order/2; k++) {
r1 = r2 = r3 = ZEROF;
for (l = order-1; l >= 0; l--) {
r1 = rho_coeff[l][k] + r1*dx;
r2 = rho_coeff[l][k] + r2*dy;
r3 = rho_coeff[l][k] + r3*dz;
}
r1d[0][k] = r1;
r1d[1][k] = r2;
r1d[2][k] = r3;
}
}
/* ----------------------------------------------------------------------
charge assignment into drho1d
dx,dy,dz = distance of particle from "lower left" grid point
------------------------------------------------------------------------- */
void PPPMTIP4POMP::compute_drho1d_thr(FFT_SCALAR * const * const d1d, const FFT_SCALAR &dx,
const FFT_SCALAR &dy, const FFT_SCALAR &dz)
{
int k,l;
FFT_SCALAR r1,r2,r3;
for (k = (1-order)/2; k <= order/2; k++) {
r1 = r2 = r3 = ZEROF;
for (l = order-2; l >= 0; l--) {
r1 = drho_coeff[l][k] + r1*dx;
r2 = drho_coeff[l][k] + r2*dy;
r3 = drho_coeff[l][k] + r3*dz;
}
d1d[0][k] = r1;
d1d[1][k] = r2;
d1d[2][k] = r3;
}
}
diff --git a/src/lmptype.h b/src/lmptype.h
index 7a63ee4e5..a8a696450 100644
--- a/src/lmptype.h
+++ b/src/lmptype.h
@@ -1,206 +1,208 @@
/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
// define integer data types used by LAMMPS and associated size limits
// smallint = variables for on-procesor system (nlocal, nmax, etc)
// imageint = variables for atom image flags (image)
// tagint = variables for atom IDs and molecule IDs (tag,molecule)
// bigint = variables for total system (natoms, ntimestep, etc)
// smallint must be an int, as defined by C compiler
// imageint can be 32-bit or 64-bit int, must be >= smallint
// tagint can be 32-bit or 64-bit int, must be >= smallint
// bigint can be 32-bit or 64-bit int, must be >= imageint,tagint
// MPI_LMP_BIGINT = MPI data type corresponding to a bigint
#ifndef LMP_LMPTYPE_H
#define LMP_LMPTYPE_H
#ifndef __STDC_LIMIT_MACROS
#define __STDC_LIMIT_MACROS
#endif
#ifndef __STDC_FORMAT_MACROS
#define __STDC_FORMAT_MACROS
#endif
#include <limits.h>
#include <stdint.h>
#include <inttypes.h>
// grrr - IBM Power6 does not provide this def in their system header files
#ifndef PRId64
#define PRId64 "ld"
#endif
namespace LAMMPS_NS {
// enum used for KOKKOS host/device flags
enum ExecutionSpace{Host,Device};
// reserve 2 hi bits in molecular system neigh list for special bonds flag
// max local + ghost atoms per processor = 2^30 - 1
#define SBBITS 30
#define NEIGHMASK 0x3FFFFFFF
// default to 32-bit smallint and other ints, 64-bit bigint
#if !defined(LAMMPS_SMALLSMALL) && !defined(LAMMPS_BIGBIG) && !defined(LAMMPS_SMALLBIG)
#define LAMMPS_SMALLBIG
#endif
// allow user override of LONGLONG to LONG, necessary for some machines/MPI
#ifdef LAMMPS_LONGLONG_TO_LONG
#define MPI_LL MPI_LONG
#define ATOLL atoll
#else
#define MPI_LL MPI_LONG_LONG
#define ATOLL atol
#endif
// for atomic problems that exceed 2 billion (2^31) atoms
// 32-bit smallint/imageint/tagint, 64-bit bigint
#ifdef LAMMPS_SMALLBIG
typedef int smallint;
typedef int imageint;
typedef int tagint;
typedef int64_t bigint;
#define MAXSMALLINT INT_MAX
#define MAXTAGINT INT_MAX
#define MAXBIGINT INT64_MAX
#define MPI_LMP_TAGINT MPI_INT
#define MPI_LMP_BIGINT MPI_LL
#define TAGINT_FORMAT "%d"
#define BIGINT_FORMAT "%" PRId64
#define ATOTAGINT atoi
#define ATOBIGINT ATOLL
#define IMGMASK 1023
#define IMGMAX 512
#define IMGBITS 10
#define IMG2BITS 20
#endif
// for molecular problems that exceed 2 billion (2^31) atoms
// or problems where atoms wrap around the periodic box more than 512 times
// 32-bit smallint, 64-bit imageint/tagint/bigint
#ifdef LAMMPS_BIGBIG
typedef int smallint;
typedef int64_t imageint;
typedef int64_t tagint;
typedef int64_t bigint;
#define MAXSMALLINT INT_MAX
#define MAXTAGINT INT64_MAX
#define MAXBIGINT INT64_MAX
#define MPI_LMP_TAGINT MPI_LL
#define MPI_LMP_BIGINT MPI_LL
#define TAGINT_FORMAT "%" PRId64
#define BIGINT_FORMAT "%" PRId64
#define ATOTAGINT ATOLL
#define ATOBIGINT ATOLL
#define IMGMASK 2097151
#define IMGMAX 1048576
#define IMGBITS 21
#define IMG2BITS 42
#endif
// for machines that do not support 64-bit ints
// 32-bit smallint/imageint/tagint/bigint
#ifdef LAMMPS_SMALLSMALL
typedef int smallint;
typedef int imageint;
typedef int tagint;
typedef int bigint;
#define MAXSMALLINT INT_MAX
#define MAXTAGINT INT_MAX
#define MAXBIGINT INT_MAX
#define MPI_LMP_TAGINT MPI_INT
#define MPI_LMP_BIGINT MPI_INT
#define TAGINT_FORMAT "%d"
#define BIGINT_FORMAT "%d"
#define ATOTAGINT atoi
#define ATOBIGINT atoi
#define IMGMASK 1023
#define IMGMAX 512
#define IMGBITS 10
#define IMG2BITS 20
#endif
}
// preprocessor macros for compiler specific settings
// clear previous definitions to avoid redefinition warning
#ifdef _alignvar
#undef _alignvar
#endif
#ifdef _noalias
#undef _noalias
#endif
// define stack variable alignment
#if defined(__INTEL_COMPILER)
#define _alignvar(expr,val) __declspec(align(val)) expr
#elif defined(__GNUC__)
#define _alignvar(expr,val) expr __attribute((aligned(val)))
#else
#define _alignvar(expr,val) expr
#endif
// declaration to lift aliasing restrictions
#if defined(__INTEL_COMPILER)
#define _noalias restrict
#elif defined(__GNUC__)
#define _noalias __restrict
#else
#define _noalias
#endif
+#define ISFINITE(x) isfinite(x)
+
// settings to enable LAMMPS to build under Windows
#ifdef _WIN32
#include "lmpwindows.h"
#endif
#endif

Event Timeline