Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F87601758
algorithm.c
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Sun, Oct 13, 15:33
Size
2 KB
Mime Type
text/x-c
Expires
Tue, Oct 15, 15:33 (2 d)
Engine
blob
Format
Raw Data
Handle
21595400
Attached To
R10834 Project_multiproc
algorithm.c
View Options
/*
============================================================================
Filename : algorithm.c
Author : Your names go here
SCIPER : Your SCIPER numbers
============================================================================
*/
#include <math.h>
#define INPUT(I,J) input[(I)*length+(J)]
#define OUTPUT(I,J) output[(I)*length+(J)]
#define CACHE_SIZE 8
#define COORD(I,J,K) (I) = K / length; (J) = (K) - (I) * length;
void
simulate
(
double
*
input
,
double
*
output
,
int
threads
,
int
length
,
int
iterations
)
{
double
*
temp
;
omp_set_num_threads
(
threads
);
double
horiz_block
[
3
];
for
(
int
n
=
0
;
n
<
iterations
;
n
++
)
{
//printf("Loop: %d\n", n);
#pragma omp parallel for private(horiz_block) collapse(2)
for
(
int
i
=
1
;
i
<
length
-
1
;
i
++
)
{
for
(
int
j
=
1
;
j
<
length
-
1
;
j
++
)
{
if
(
((
i
==
length
/
2
-
1
)
||
(
i
==
length
/
2
))
&&
((
j
==
length
/
2
-
1
)
||
(
j
==
length
/
2
))
)
{
continue
;
}
/*
* Store horizontal sums into an array.
* Supposing the cache is of size 4 * sizeof(double),
* the horiz_block array fits into the L1 cache avoiding cache miss while writing into it.
*
* Three cache misses on L1 are performed on reading INPUT because of line jumps.
* However L2 is able to fit a big enough portion of the input array to avoid critically slow cache misses.
*/
horiz_block
[
0
]
=
INPUT
(
i
-
1
,
j
-
1
)
+
INPUT
(
i
-
1
,
j
)
+
INPUT
(
i
-
1
,
j
+
1
);
horiz_block
[
1
]
=
INPUT
(
i
,
j
-
1
)
+
INPUT
(
i
,
j
)
+
INPUT
(
i
,
j
+
1
);
horiz_block
[
2
]
=
INPUT
(
i
+
1
,
j
-
1
)
+
INPUT
(
i
+
1
,
j
)
+
INPUT
(
i
+
1
,
j
+
1
);
/*
* The needed portion of OUTPUT is accessible at worst from L2, while
* horiz_block is forcely in L1 cache (as it's been used by last).
* This means that reading from horiz_block is best optimized and storing into OUTPUT takes a limited latency.
*/
OUTPUT
(
i
,
j
)
=
(
horiz_block
[
0
]
+
horiz_block
[
1
]
+
horiz_block
[
2
])
/
9
;
}
}
temp
=
input
;
input
=
output
;
output
=
temp
;
}
}
Event Timeline
Log In to Comment