TestSynchronic.cpp
No OneTemporary
Actions

Subscribers

None

File Metadata

Created: Tue, Jul 1, 14:37

TestSynchronic.cpp
View Options

	/*

	Copyright (c) 2014, NVIDIA Corporation
	All rights reserved.

	Redistribution and use in source and binary forms, with or without modification,
	are permitted provided that the following conditions are met:

	1. Redistributions of source code must retain the above copyright notice, this
	list of conditions and the following disclaimer.

	2. Redistributions in binary form must reproduce the above copyright notice,
	this list of conditions and the following disclaimer in the documentation
	and/or other materials provided with the distribution.

	THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
	ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
	WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
	IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
	INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
	BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
	DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
	LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
	OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
	OF THE POSSIBILITY OF SUCH DAMAGE.

	*/

	//#undef _WIN32_WINNT
	//#define _WIN32_WINNT 0x0602

	#if defined(__powerpc__) \|\| defined(__ppc__) \|\| defined(__PPC__) \|\| defined(__APPLE__)

	// Skip for now

	#else

	#include <gtest/gtest.h>

	#ifdef USEOMP
	#include <omp.h>
	#endif

	#include <iostream>
	#include <sstream>
	#include <algorithm>
	#include <string>
	#include <vector>
	#include <map>
	#include <cstring>
	#include <ctime>

	//#include <details/config>
	//#undef __SYNCHRONIC_COMPATIBLE

	#include <impl/Kokkos_Synchronic.hpp>
	#include <impl/Kokkos_Synchronic_n3998.hpp>

	#include "TestSynchronic.hpp"

	// Uncomment to allow test to dump output
	//#define VERBOSE_TEST

	namespace Test {

	unsigned next_table[] =
	{
	0, 1, 2, 3, //0-3
	4, 4, 6, 6, //4-7
	8, 8, 8, 8, //8-11
	12, 12, 12, 12, //12-15
	16, 16, 16, 16, //16-19
	16, 16, 16, 16, //20-23
	24, 24, 24, 24, //24-27
	24, 24, 24, 24, //28-31
	32, 32, 32, 32, //32-35
	32, 32, 32, 32, //36-39
	40, 40, 40, 40, //40-43
	40, 40, 40, 40, //44-47
	48, 48, 48, 48, //48-51
	48, 48, 48, 48, //52-55
	56, 56, 56, 56, //56-59
	56, 56, 56, 56, //60-63
	};

	//change this if you want to allow oversubscription of the system, by default only the range {1-(system size)} is tested
	#define FOR_GAUNTLET(x) for(unsigned x = (std::min)(std::thread::hardware_concurrency()*8,unsigned(sizeof(next_table)/sizeof(unsigned))); x; x = next_table[x-1])

	//set this to override the benchmark of barriers to use OMP barriers instead of n3998 std::barrier
	//#define USEOMP

	#if defined(__SYNCHRONIC_COMPATIBLE)
	#define PREFIX "futex-"
	#else
	#define PREFIX "backoff-"
	#endif

	//this test uses a custom Mersenne twister to eliminate implementation variation
	MersenneTwister mt;

	int dummya = 1, dummyb =1;

	int dummy1 = 1;
	std::atomic<int> dummy2(1);
	std::atomic<int> dummy3(1);

	double time_item(int const count = (int)1E8) {

	clock_t const start = clock();

	for(int i = 0;i < count; ++i)
	mt.integer();

	clock_t const end = clock();
	double elapsed_seconds = (end - start) / double(CLOCKS_PER_SEC);

	return elapsed_seconds / count;
	}
	double time_nil(int const count = (int)1E08) {

	clock_t const start = clock();

	dummy3 = count;
	for(int i = 0;i < (int)1E6; ++i) {
	if(dummy1) {
	// Do some work while holding the lock
	int workunits = dummy3;//(int) (mtc.poissonInterval((float)num_items_critical) + 0.5f);
	for (int j = 1; j < workunits; j++)
	dummy1 &= j; // Do one work unit
	dummy2.fetch_add(dummy1,std::memory_order_relaxed);
	}
	}

	clock_t const end = clock();
	double elapsed_seconds = (end - start) / double(CLOCKS_PER_SEC);

	return elapsed_seconds / count;
	}


	template <class mutex_type>
	void testmutex_inner(mutex_type& m, std::atomic<int>& t,std::atomic<int>& wc,std::atomic<int>& wnc, int const num_iterations,
	int const num_items_critical, int const num_items_noncritical, MersenneTwister& mtc, MersenneTwister& mtnc, bool skip) {

	for(int k = 0; k < num_iterations; ++k) {

	if(num_items_noncritical) {
	// Do some work without holding the lock
	int workunits = num_items_noncritical;//(int) (mtnc.poissonInterval((float)num_items_noncritical) + 0.5f);
	for (int i = 1; i < workunits; i++)
	mtnc.integer(); // Do one work unit
	wnc.fetch_add(workunits,std::memory_order_relaxed);
	}

	t.fetch_add(1,std::memory_order_relaxed);

	if(!skip) {
	std::unique_lock<mutex_type> l(m);
	if(num_items_critical) {
	// Do some work while holding the lock
	int workunits = num_items_critical;//(int) (mtc.poissonInterval((float)num_items_critical) + 0.5f);
	for (int i = 1; i < workunits; i++)
	mtc.integer(); // Do one work unit
	wc.fetch_add(workunits,std::memory_order_relaxed);
	}
	}
	}
	}
	template <class mutex_type>
	void testmutex_outer(std::map<std::string,std::vector<double>>& results, std::string const& name, double critical_fraction, double critical_duration) {

	std::ostringstream truename;
	truename << name << " (f=" << critical_fraction << ",d=" << critical_duration << ")";

	std::vector<double>& data = results[truename.str()];

	double const workItemTime = time_item() ,
	nilTime = time_nil();

	int const num_items_critical = (critical_duration <= 0 ? 0 : (std::max)( int(critical_duration / workItemTime + 0.5), int(100 * nilTime / workItemTime + 0.5))),
	num_items_noncritical = (num_items_critical <= 0 ? 0 : int( ( 1 - critical_fraction ) * num_items_critical / critical_fraction + 0.5 ));

	FOR_GAUNTLET(num_threads) {

	//Kokkos::Impl::portable_sleep(std::chrono::microseconds(2000000));

	int const num_iterations = (num_items_critical + num_items_noncritical != 0) ?
	#ifdef __SYNCHRONIC_JUST_YIELD
	int( 1 / ( 8 * workItemTime ) / (num_items_critical + num_items_noncritical) / num_threads + 0.5 ) :
	#else
	int( 1 / ( 8 * workItemTime ) / (num_items_critical + num_items_noncritical) / num_threads + 0.5 ) :
	#endif
	#ifdef WIN32
	int( 1 / workItemTime / (20 * num_threads * num_threads) );
	#else
	int( 1 / workItemTime / (200 * num_threads * num_threads) );
	#endif

	#ifdef VERBOSE_TEST
	std::cerr << "running " << truename.str() << " #" << num_threads << ", " << num_iterations << " * " << num_items_noncritical << "\n" << std::flush;
	#endif


	std::atomic<int> t[2], wc[2], wnc[2];

	clock_t start[2], end[2];
	for(int pass = 0; pass < 2; ++pass) {

	t[pass] = 0;
	wc[pass] = 0;
	wnc[pass] = 0;

	srand(num_threads);
	std::vector<MersenneTwister> randomsnc(num_threads),
	randomsc(num_threads);

	mutex_type m;

	start[pass] = clock();
	#ifdef USEOMP
	omp_set_num_threads(num_threads);
	std::atomic<int> _j(0);
	#pragma omp parallel
	{
	int const j = _j.fetch_add(1,std::memory_order_relaxed);
	testmutex_inner(m, t[pass], wc[pass], wnc[pass], num_iterations, num_items_critical, num_items_noncritical, randomsc[j], randomsnc[j], pass==0);
	num_threads = omp_get_num_threads();
	}
	#else
	std::vector<std::thread*> threads(num_threads);
	for(unsigned j = 0; j < num_threads; ++j)
	threads[j] = new std::thread([&,j](){
	testmutex_inner(m, t[pass], wc[pass], wnc[pass], num_iterations, num_items_critical, num_items_noncritical, randomsc[j], randomsnc[j], pass==0);
	}
	);
	for(unsigned j = 0; j < num_threads; ++j) {
	threads[j]->join();
	delete threads[j];
	}
	#endif
	end[pass] = clock();
	}
	if(t[0] != t[1]) throw std::string("mismatched iteration counts");
	if(wnc[0] != wnc[1]) throw std::string("mismatched work item counts");

	double elapsed_seconds_0 = (end[0] - start[0]) / double(CLOCKS_PER_SEC),
	elapsed_seconds_1 = (end[1] - start[1]) / double(CLOCKS_PER_SEC);
	double time = (elapsed_seconds_1 - elapsed_seconds_0 - wc[1]*workItemTime) / num_iterations;

	data.push_back(time);
	#ifdef VERBOSE_TEST
	std::cerr << truename.str() << " : " << num_threads << "," << elapsed_seconds_1 / num_iterations << " - " << elapsed_seconds_0 / num_iterations << " - " << wc[1]*workItemTime/num_iterations << " = " << time << " \n";
	#endif
	}
	}

	template <class barrier_type>
	void testbarrier_inner(barrier_type& b, int const num_threads, int const j, std::atomic<int>& t,std::atomic<int>& w,
	int const num_iterations_odd, int const num_iterations_even,
	int const num_items_noncritical, MersenneTwister& arg_mt, bool skip) {

	for(int k = 0; k < (std::max)(num_iterations_even,num_iterations_odd); ++k) {

	if(k >= (~j & 0x1 ? num_iterations_odd : num_iterations_even )) {
	if(!skip)
	b.arrive_and_drop();
	break;
	}

	if(num_items_noncritical) {
	// Do some work without holding the lock
	int workunits = (int) (arg_mt.poissonInterval((float)num_items_noncritical) + 0.5f);
	for (int i = 1; i < workunits; i++)
	arg_mt.integer(); // Do one work unit
	w.fetch_add(workunits,std::memory_order_relaxed);
	}

	t.fetch_add(1,std::memory_order_relaxed);

	if(!skip) {
	int const thiscount = (std::min)(k+1,num_iterations_odd)((num_threads>>1)+(num_threads&1)) + (std::min)(k+1,num_iterations_even)(num_threads>>1);
	if(t.load(std::memory_order_relaxed) > thiscount) {
	std::cerr << "FAILURE: some threads have run ahead of the barrier (" << t.load(std::memory_order_relaxed) << ">" << thiscount << ").\n";
	EXPECT_TRUE(false);
	}
	#ifdef USEOMP
	#pragma omp barrier
	#else
	b.arrive_and_wait();
	#endif
	if(t.load(std::memory_order_relaxed) < thiscount) {
	std::cerr << "FAILURE: some threads have fallen behind the barrier (" << t.load(std::memory_order_relaxed) << "<" << thiscount << ").\n";
	EXPECT_TRUE(false);
	}
	}
	}
	}
	template <class barrier_type>
	void testbarrier_outer(std::map<std::string,std::vector<double>>& results, std::string const& name, double barrier_frequency, double phase_duration, bool randomIterations = false) {

	std::vector<double>& data = results[name];

	double const workItemTime = time_item();
	int const num_items_noncritical = int( phase_duration / workItemTime + 0.5 );

	FOR_GAUNTLET(num_threads) {

	int const num_iterations = int( barrier_frequency );
	#ifdef VERBOSE_TEST
	std::cerr << "running " << name << " #" << num_threads << ", " << num_iterations << " * " << num_items_noncritical << "\r" << std::flush;
	#endif

	srand(num_threads);

	MersenneTwister local_mt;
	int const num_iterations_odd = randomIterations ? int(local_mt.poissonInterval((float)num_iterations)+0.5f) : num_iterations,
	num_iterations_even = randomIterations ? int(local_mt.poissonInterval((float)num_iterations)+0.5f) : num_iterations;

	std::atomic<int> t[2], w[2];
	std::chrono::time_point<std::chrono::high_resolution_clock> start[2], end[2];
	for(int pass = 0; pass < 2; ++pass) {

	t[pass] = 0;
	w[pass] = 0;

	srand(num_threads);
	std::vector<MersenneTwister> randoms(num_threads);

	barrier_type b(num_threads);

	start[pass] = std::chrono::high_resolution_clock::now();
	#ifdef USEOMP
	omp_set_num_threads(num_threads);
	std::atomic<int> _j(0);
	#pragma omp parallel
	{
	int const j = _j.fetch_add(1,std::memory_order_relaxed);
	testbarrier_inner(b, num_threads, j, t[pass], w[pass], num_iterations_odd, num_iterations_even, num_items_noncritical, randoms[j], pass==0);
	num_threads = omp_get_num_threads();
	}
	#else
	std::vector<std::thread*> threads(num_threads);
	for(unsigned j = 0; j < num_threads; ++j)
	threads[j] = new std::thread([&,j](){
	testbarrier_inner(b, num_threads, j, t[pass], w[pass], num_iterations_odd, num_iterations_even, num_items_noncritical, randoms[j], pass==0);
	});
	for(unsigned j = 0; j < num_threads; ++j) {
	threads[j]->join();
	delete threads[j];
	}
	#endif
	end[pass] = std::chrono::high_resolution_clock::now();
	}

	if(t[0] != t[1]) throw std::string("mismatched iteration counts");
	if(w[0] != w[1]) throw std::string("mismatched work item counts");

	int const phases = (std::max)(num_iterations_odd, num_iterations_even);

	std::chrono::duration<double> elapsed_seconds_0 = end[0]-start[0],
	elapsed_seconds_1 = end[1]-start[1];
	double const time = (elapsed_seconds_1.count() - elapsed_seconds_0.count()) / phases;

	data.push_back(time);
	#ifdef VERBOSE_TEST
	std::cerr << name << " : " << num_threads << "," << elapsed_seconds_1.count() / phases << " - " << elapsed_seconds_0.count() / phases << " = " << time << " \n";
	#endif
	}
	}

	template <class... T>
	struct mutex_tester;
	template <class F>
	struct mutex_tester<F> {
	static void run(std::map<std::string,std::vector<double>>& results, std::string const name[], double critical_fraction, double critical_duration) {
	testmutex_outer<F>(results, *name, critical_fraction, critical_duration);
	}
	};
	template <class F, class... T>
	struct mutex_tester<F,T...> {
	static void run(std::map<std::string,std::vector<double>>& results, std::string const name[], double critical_fraction, double critical_duration) {
	mutex_tester<F>::run(results, name, critical_fraction, critical_duration);
	mutex_tester<T...>::run(results, ++name, critical_fraction, critical_duration);
	}
	};

	TEST( synchronic, main )
	{
	//warm up
	time_item();

	//measure up
	#ifdef VERBOSE_TEST
	std::cerr << "measuring work item speed...\r";
	std::cerr << "work item speed is " << time_item() << " per item, nil is " << time_nil() << "\n";
	#endif
	try {

	std::pair<double,double> testpoints[] = { {1, 0}, /{1E-1, 10E-3}, {5E-1, 2E-6}, {3E-1, 50E-9},/ };
	for(auto x : testpoints ) {

	std::map<std::string,std::vector<double>> results;

	//testbarrier_outer<std::barrier>(results, PREFIX"bar 1khz 100us", 1E3, x.second);

	std::string const names[] = {
	PREFIX"tkt", PREFIX"mcs", PREFIX"ttas", PREFIX"std"
	#ifdef WIN32
	,PREFIX"srw"
	#endif
	};

	//run -->

	mutex_tester<
	ticket_mutex, mcs_mutex, ttas_mutex, std::mutex
	#ifdef WIN32
	,srw_mutex
	#endif
	>::run(results, names, x.first, x.second);

	//<-- run

	#ifdef VERBOSE_TEST
	std::cout << "threads";
	for(auto & i : results)
	std::cout << ",\"" << i.first << '\"';
	std::cout << std::endl;
	int j = 0;
	FOR_GAUNTLET(num_threads) {
	std::cout << num_threads;
	for(auto & i : results)
	std::cout << ',' << i.second[j];
	std::cout << std::endl;
	++j;
	}
	#endif
	}
	}
	catch(std::string & e) {
	std::cerr << "EXCEPTION : " << e << std::endl;
	EXPECT_TRUE( false );
	}
	}

	} // namespace Test

	#endif

TestSynchronic.cppNo OneTemporaryActions

File Metadata

TestSynchronic.cppView Options

Event Timeline

TestSynchronic.cpp
No OneTemporary
Actions

TestSynchronic.cpp
View Options