Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F92139138
algorithm.c
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Sun, Nov 17, 17:19
Size
2 KB
Mime Type
text/x-c
Expires
Tue, Nov 19, 17:19 (1 d, 23 h)
Engine
blob
Format
Raw Data
Handle
22382072
Attached To
R10834 Project_multiproc
algorithm.c
View Options
/*
============================================================================
Filename : algorithm.c
Author : Your names go here
SCIPER : Your SCIPER numbers
============================================================================
*/
#include <math.h>
#define INPUT(I,J) input[(I)*length+(J)]
#define OUTPUT(I,J) output[(I)*length+(J)]
#define CACHE_SIZE 8
#define COORD(I,J,K) (I) = K / length; (J) = (K) - (I) * length;
void simulate(double *input, double *output, int threads, int length, int iterations)
{
double *temp;
omp_set_num_threads(threads);
double horiz_block[3];
for(int n=0; n < iterations; n++)
{
//printf("Loop: %d\n", n);
#pragma omp parallel for private(horiz_block) collapse(2)
for(int i=1; i<length-1; i++)
{
for(int j=1; j<length-1; j++) {
if ( ((i == length/2-1) || (i== length/2))
&& ((j == length/2-1) || (j == length/2)) ) {
continue;
}
/*
* Store horizontal sums into an array.
* Supposing the cache is of size 4 * sizeof(double),
* the horiz_block array fits into the L1 cache avoiding cache miss while writing into it.
*
* Three cache misses on L1 are performed on reading INPUT because of line jumps.
* However L2 is able to fit a big enough portion of the input array to avoid critically slow cache misses.
*/
horiz_block[0] = INPUT(i-1,j-1) + INPUT(i-1,j) + INPUT(i-1,j+1);
horiz_block[1] = INPUT(i,j-1) + INPUT(i,j) + INPUT(i,j+1);
horiz_block[2] = INPUT(i+1,j-1) + INPUT(i+1,j) + INPUT(i+1,j+1);
/*
* The needed portion of OUTPUT is accessible at worst from L2, while
* horiz_block is forcely in L1 cache (as it's been used by last).
* This means that reading from horiz_block is best optimized and storing into OUTPUT takes a limited latency.
*/
OUTPUT(i,j) = (horiz_block[0] + horiz_block[1] + horiz_block[2]) / 9;
}
}
temp = input;
input = output;
output = temp;
}
}
Event Timeline
Log In to Comment