Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F85439839
algorithm_Ancarola.c
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Sun, Sep 29, 06:05
Size
2 KB
Mime Type
text/x-c
Expires
Tue, Oct 1, 06:05 (2 d)
Engine
blob
Format
Raw Data
Handle
21181617
Attached To
R10834 Project_multiproc
algorithm_Ancarola.c
View Options
/*
============================================================================
Filename : algorithm.c
Author : Your names go here
SCIPER : Your SCIPER numbers
============================================================================
*/
#include <math.h>
#define INPUT(I,J) input[(I)*length+(J)]
#define OUTPUT(I,J) output[(I)*length+(J)]
#define CACHE_SIZE 8
#define COORD(I,J,K) (I) = K / length; (J) = (K) - (I) * length;
void
simulate
(
double
*
input
,
double
*
output
,
int
threads
,
int
length
,
int
iterations
)
{
double
*
temp
;
omp_set_num_threads
(
threads
);
double
horiz_block
[
3
];
for
(
int
n
=
0
;
n
<
iterations
;
n
++
)
{
//printf("Loop: %d\n", n);
#pragma omp parallel for private(horiz_block) collapse(2)
for
(
int
i
=
1
;
i
<
length
-
1
;
i
++
)
{
for
(
int
j
=
1
;
j
<
length
-
1
;
j
++
)
{
if
(
((
i
==
length
/
2
-
1
)
||
(
i
==
length
/
2
))
&&
((
j
==
length
/
2
-
1
)
||
(
j
==
length
/
2
))
)
{
continue
;
}
/*
* Store horizontal sums into an array.
* Supposing the cache is of size 4 * sizeof(double),
* the horiz_block array fits into the L1 cache avoiding cache miss while writing into it.
*
* Three cache misses on L1 are performed on reading INPUT because of line jumps.
* However L2 is able to fit a big enough portion of the input array to avoid critically slow cache misses.
*/
horiz_block
[
0
]
=
INPUT
(
i
-
1
,
j
-
1
)
+
INPUT
(
i
-
1
,
j
)
+
INPUT
(
i
-
1
,
j
+
1
);
horiz_block
[
1
]
=
INPUT
(
i
,
j
-
1
)
+
INPUT
(
i
,
j
)
+
INPUT
(
i
,
j
+
1
);
horiz_block
[
2
]
=
INPUT
(
i
+
1
,
j
-
1
)
+
INPUT
(
i
+
1
,
j
)
+
INPUT
(
i
+
1
,
j
+
1
);
/*
* The needed portion of OUTPUT is accessible at worst from L2, while
* horiz_block is forcely in L1 cache (as it's been used by last).
* This means that reading from horiz_block is best optimized and storing into OUTPUT takes a limited latency.
*/
OUTPUT
(
i
,
j
)
=
(
horiz_block
[
0
]
+
horiz_block
[
1
]
+
horiz_block
[
2
])
/
9
;
}
}
temp
=
input
;
input
=
output
;
output
=
temp
;
}
}
Event Timeline
Log In to Comment