Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F91080443
fenl_functors.hpp
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Thu, Nov 7, 16:29
Size
37 KB
Mime Type
text/x-c++
Expires
Sat, Nov 9, 16:29 (2 d)
Engine
blob
Format
Raw Data
Handle
22191094
Attached To
rLAMMPS lammps
fenl_functors.hpp
View Options
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_EXAMPLE_FENLFUNCTORS_HPP
#define KOKKOS_EXAMPLE_FENLFUNCTORS_HPP
#include <stdio.h>
#include <iostream>
#include <fstream>
#include <iomanip>
#include <cstdlib>
#include <cmath>
#include <limits>
#include <Kokkos_Pair.hpp>
#include <Kokkos_UnorderedMap.hpp>
#include <impl/Kokkos_Timer.hpp>
#include <BoxElemFixture.hpp>
#include <HexElement.hpp>
#include <CGSolve.hpp>
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace
Kokkos
{
namespace
Example
{
namespace
FENL
{
template
<
class
ElemNodeIdView
,
class
CrsGraphType
,
unsigned
ElemNode
>
class
NodeNodeGraph
{
public
:
typedef
typename
ElemNodeIdView
::
execution_space
execution_space
;
typedef
pair
<
unsigned
,
unsigned
>
key_type
;
typedef
Kokkos
::
UnorderedMap
<
key_type
,
void
,
execution_space
>
SetType
;
typedef
typename
CrsGraphType
::
row_map_type
::
non_const_type
RowMapType
;
typedef
Kokkos
::
View
<
unsigned
,
execution_space
>
UnsignedValue
;
// Static dimensions of 0 generate compiler warnings or errors.
typedef
Kokkos
::
View
<
unsigned
*
[
ElemNode
][
ElemNode
]
,
execution_space
>
ElemGraphType
;
struct
TagFillNodeSet
{};
struct
TagScanNodeCount
{};
struct
TagFillGraphEntries
{};
struct
TagSortGraphEntries
{};
struct
TagFillElementGraph
{};
private
:
enum
PhaseType
{
FILL_NODE_SET
,
SCAN_NODE_COUNT
,
FILL_GRAPH_ENTRIES
,
SORT_GRAPH_ENTRIES
,
FILL_ELEMENT_GRAPH
};
const
unsigned
node_count
;
const
ElemNodeIdView
elem_node_id
;
UnsignedValue
row_total
;
RowMapType
row_count
;
RowMapType
row_map
;
SetType
node_node_set
;
PhaseType
phase
;
public
:
CrsGraphType
graph
;
ElemGraphType
elem_graph
;
struct
Times
{
double
ratio
;
double
fill_node_set
;
double
scan_node_count
;
double
fill_graph_entries
;
double
sort_graph_entries
;
double
fill_element_graph
;
};
NodeNodeGraph
(
const
ElemNodeIdView
&
arg_elem_node_id
,
const
unsigned
arg_node_count
,
Times
&
results
)
:
node_count
(
arg_node_count
)
,
elem_node_id
(
arg_elem_node_id
)
,
row_total
(
"row_total"
)
,
row_count
(
Kokkos
::
ViewAllocateWithoutInitializing
(
"row_count"
)
,
node_count
)
// will deep_copy to 0 inside loop
,
row_map
(
"graph_row_map"
,
node_count
+
1
)
,
node_node_set
()
,
phase
(
FILL_NODE_SET
)
,
graph
()
,
elem_graph
()
{
//--------------------------------
// Guess at capacity required for the map:
Kokkos
::
Impl
::
Timer
wall_clock
;
wall_clock
.
reset
();
phase
=
FILL_NODE_SET
;
// upper bound on the capacity
size_t
set_capacity
=
(
28ull
*
node_count
)
/
2
;
unsigned
failed_insert_count
=
0
;
do
{
// Zero the row count to restart the fill
Kokkos
::
deep_copy
(
row_count
,
0u
);
node_node_set
=
SetType
(
(
set_capacity
+=
failed_insert_count
)
);
// May be larger that requested:
set_capacity
=
node_node_set
.
capacity
();
Kokkos
::
parallel_reduce
(
Kokkos
::
RangePolicy
<
execution_space
,
TagFillNodeSet
>
(
0
,
elem_node_id
.
dimension_0
())
,
*
this
,
failed_insert_count
);
}
while
(
failed_insert_count
);
execution_space
::
fence
();
results
.
ratio
=
(
double
)
node_node_set
.
size
()
/
(
double
)
node_node_set
.
capacity
();
results
.
fill_node_set
=
wall_clock
.
seconds
();
//--------------------------------
wall_clock
.
reset
();
phase
=
SCAN_NODE_COUNT
;
// Exclusive scan of row_count into row_map
// including the final total in the 'node_count + 1' position.
// Zero the 'row_count' values.
Kokkos
::
parallel_scan
(
node_count
,
*
this
);
// Zero the row count for the fill:
Kokkos
::
deep_copy
(
row_count
,
0u
);
unsigned
graph_entry_count
=
0
;
Kokkos
::
deep_copy
(
graph_entry_count
,
row_total
);
// Assign graph's row_map and allocate graph's entries
graph
.
row_map
=
row_map
;
graph
.
entries
=
typename
CrsGraphType
::
entries_type
(
"graph_entries"
,
graph_entry_count
);
//--------------------------------
// Fill graph's entries from the (node,node) set.
execution_space
::
fence
();
results
.
scan_node_count
=
wall_clock
.
seconds
();
wall_clock
.
reset
();
phase
=
FILL_GRAPH_ENTRIES
;
Kokkos
::
parallel_for
(
node_node_set
.
capacity
()
,
*
this
);
execution_space
::
fence
();
results
.
fill_graph_entries
=
wall_clock
.
seconds
();
//--------------------------------
// Done with the temporary sets and arrays
wall_clock
.
reset
();
phase
=
SORT_GRAPH_ENTRIES
;
row_total
=
UnsignedValue
();
row_count
=
RowMapType
();
row_map
=
RowMapType
();
node_node_set
.
clear
();
//--------------------------------
Kokkos
::
parallel_for
(
node_count
,
*
this
);
execution_space
::
fence
();
results
.
sort_graph_entries
=
wall_clock
.
seconds
();
//--------------------------------
// Element-to-graph mapping:
wall_clock
.
reset
();
phase
=
FILL_ELEMENT_GRAPH
;
elem_graph
=
ElemGraphType
(
"elem_graph"
,
elem_node_id
.
dimension_0
()
);
Kokkos
::
parallel_for
(
elem_node_id
.
dimension_0
()
,
*
this
);
execution_space
::
fence
();
results
.
fill_element_graph
=
wall_clock
.
seconds
();
}
//------------------------------------
// parallel_for: create map and count row length
KOKKOS_INLINE_FUNCTION
void
operator
()(
const
TagFillNodeSet
&
,
unsigned
ielem
,
unsigned
&
count
)
const
{
// Loop over element's (row_local_node,col_local_node) pairs:
for
(
unsigned
row_local_node
=
0
;
row_local_node
<
elem_node_id
.
dimension_1
()
;
++
row_local_node
)
{
const
unsigned
row_node
=
elem_node_id
(
ielem
,
row_local_node
);
for
(
unsigned
col_local_node
=
row_local_node
;
col_local_node
<
elem_node_id
.
dimension_1
()
;
++
col_local_node
)
{
const
unsigned
col_node
=
elem_node_id
(
ielem
,
col_local_node
);
// If either node is locally owned then insert the pair into the unordered map:
if
(
row_node
<
row_count
.
dimension_0
()
||
col_node
<
row_count
.
dimension_0
()
)
{
const
key_type
key
=
(
row_node
<
col_node
)
?
make_pair
(
row_node
,
col_node
)
:
make_pair
(
col_node
,
row_node
)
;
const
typename
SetType
::
insert_result
result
=
node_node_set
.
insert
(
key
);
// A successfull insert: the first time this pair was added
if
(
result
.
success
()
)
{
// If row node is owned then increment count
if
(
row_node
<
row_count
.
dimension_0
()
)
{
atomic_fetch_add
(
&
row_count
(
row_node
)
,
1
);
}
// If column node is owned and not equal to row node then increment count
if
(
col_node
<
row_count
.
dimension_0
()
&&
col_node
!=
row_node
)
{
atomic_fetch_add
(
&
row_count
(
col_node
)
,
1
);
}
}
else
if
(
result
.
failed
()
)
{
++
count
;
}
}
}
}
}
KOKKOS_INLINE_FUNCTION
void
fill_graph_entries
(
const
unsigned
iset
)
const
{
if
(
node_node_set
.
valid_at
(
iset
)
)
{
// Add each entry to the graph entries.
const
key_type
key
=
node_node_set
.
key_at
(
iset
)
;
const
unsigned
row_node
=
key
.
first
;
const
unsigned
col_node
=
key
.
second
;
if
(
row_node
<
row_count
.
dimension_0
()
)
{
const
unsigned
offset
=
graph
.
row_map
(
row_node
)
+
atomic_fetch_add
(
&
row_count
(
row_node
)
,
1
);
graph
.
entries
(
offset
)
=
col_node
;
}
if
(
col_node
<
row_count
.
dimension_0
()
&&
col_node
!=
row_node
)
{
const
unsigned
offset
=
graph
.
row_map
(
col_node
)
+
atomic_fetch_add
(
&
row_count
(
col_node
)
,
1
);
graph
.
entries
(
offset
)
=
row_node
;
}
}
}
KOKKOS_INLINE_FUNCTION
void
sort_graph_entries
(
const
unsigned
irow
)
const
{
const
unsigned
row_beg
=
graph
.
row_map
(
irow
);
const
unsigned
row_end
=
graph
.
row_map
(
irow
+
1
);
for
(
unsigned
i
=
row_beg
+
1
;
i
<
row_end
;
++
i
)
{
const
unsigned
col
=
graph
.
entries
(
i
);
unsigned
j
=
i
;
for
(
;
row_beg
<
j
&&
col
<
graph
.
entries
(
j
-
1
)
;
--
j
)
{
graph
.
entries
(
j
)
=
graph
.
entries
(
j
-
1
);
}
graph
.
entries
(
j
)
=
col
;
}
}
KOKKOS_INLINE_FUNCTION
void
fill_elem_graph_map
(
const
unsigned
ielem
)
const
{
for
(
unsigned
row_local_node
=
0
;
row_local_node
<
elem_node_id
.
dimension_1
()
;
++
row_local_node
)
{
const
unsigned
row_node
=
elem_node_id
(
ielem
,
row_local_node
);
for
(
unsigned
col_local_node
=
0
;
col_local_node
<
elem_node_id
.
dimension_1
()
;
++
col_local_node
)
{
const
unsigned
col_node
=
elem_node_id
(
ielem
,
col_local_node
);
unsigned
entry
=
~
0u
;
if
(
row_node
+
1
<
graph
.
row_map
.
dimension_0
()
)
{
const
unsigned
entry_end
=
graph
.
row_map
(
row_node
+
1
);
entry
=
graph
.
row_map
(
row_node
);
for
(
;
entry
<
entry_end
&&
graph
.
entries
(
entry
)
!=
col_node
;
++
entry
);
if
(
entry
==
entry_end
)
entry
=
~
0u
;
}
elem_graph
(
ielem
,
row_local_node
,
col_local_node
)
=
entry
;
}
}
}
KOKKOS_INLINE_FUNCTION
void
operator
()(
const
unsigned
iwork
)
const
{
/*
if ( phase == FILL_NODE_SET ) {
operator()( TagFillNodeSet() , iwork );
}
else */
if
(
phase
==
FILL_GRAPH_ENTRIES
)
{
fill_graph_entries
(
iwork
);
}
else
if
(
phase
==
SORT_GRAPH_ENTRIES
)
{
sort_graph_entries
(
iwork
);
}
else
if
(
phase
==
FILL_ELEMENT_GRAPH
)
{
fill_elem_graph_map
(
iwork
);
}
}
//------------------------------------
// parallel_scan: row offsets
typedef
unsigned
value_type
;
KOKKOS_INLINE_FUNCTION
void
operator
()(
const
unsigned
irow
,
unsigned
&
update
,
const
bool
final
)
const
{
// exclusive scan
if
(
final
)
{
row_map
(
irow
)
=
update
;
}
update
+=
row_count
(
irow
);
if
(
final
)
{
if
(
irow
+
1
==
row_count
.
dimension_0
()
)
{
row_map
(
irow
+
1
)
=
update
;
row_total
()
=
update
;
}
}
}
// For the reduce phase:
KOKKOS_INLINE_FUNCTION
void
init
(
const
TagFillNodeSet
&
,
unsigned
&
update
)
const
{
update
=
0
;
}
KOKKOS_INLINE_FUNCTION
void
join
(
const
TagFillNodeSet
&
,
volatile
unsigned
&
update
,
volatile
const
unsigned
&
input
)
const
{
update
+=
input
;
}
// For the scan phase::
KOKKOS_INLINE_FUNCTION
void
init
(
unsigned
&
update
)
const
{
update
=
0
;
}
KOKKOS_INLINE_FUNCTION
void
join
(
volatile
unsigned
&
update
,
volatile
const
unsigned
&
input
)
const
{
update
+=
input
;
}
//------------------------------------
};
}
/* namespace FENL */
}
/* namespace Example */
}
/* namespace Kokkos */
//----------------------------------------------------------------------------
namespace
Kokkos
{
namespace
Example
{
namespace
FENL
{
template
<
class
ElemCompType
>
class
NodeElemGatherFill
{
public
:
typedef
typename
ElemCompType
::
execution_space
execution_space
;
typedef
typename
ElemCompType
::
vector_type
vector_type
;
typedef
typename
ElemCompType
::
sparse_matrix_type
sparse_matrix_type
;
typedef
typename
ElemCompType
::
elem_node_type
elem_node_type
;
typedef
typename
ElemCompType
::
elem_vectors_type
elem_vectors_type
;
typedef
typename
ElemCompType
::
elem_matrices_type
elem_matrices_type
;
typedef
typename
ElemCompType
::
elem_graph_type
elem_graph_type
;
static
const
unsigned
ElemNodeCount
=
ElemCompType
::
ElemNodeCount
;
//------------------------------------
private
:
typedef
Kokkos
::
StaticCrsGraph
<
unsigned
[
2
]
,
execution_space
>
CrsGraphType
;
typedef
typename
CrsGraphType
::
row_map_type
::
non_const_type
RowMapType
;
typedef
Kokkos
::
View
<
unsigned
,
execution_space
>
UnsignedValue
;
enum
PhaseType
{
FILL_NODE_COUNT
,
SCAN_NODE_COUNT
,
FILL_GRAPH_ENTRIES
,
SORT_GRAPH_ENTRIES
,
GATHER_FILL
};
const
elem_node_type
elem_node_id
;
const
elem_graph_type
elem_graph
;
UnsignedValue
row_total
;
RowMapType
row_count
;
RowMapType
row_map
;
CrsGraphType
graph
;
vector_type
residual
;
sparse_matrix_type
jacobian
;
elem_vectors_type
elem_residual
;
elem_matrices_type
elem_jacobian
;
PhaseType
phase
;
public
:
NodeElemGatherFill
()
:
elem_node_id
()
,
elem_graph
()
,
row_total
()
,
row_count
()
,
row_map
()
,
graph
()
,
residual
()
,
jacobian
()
,
elem_residual
()
,
elem_jacobian
()
,
phase
(
FILL_NODE_COUNT
)
{}
NodeElemGatherFill
(
const
NodeElemGatherFill
&
rhs
)
:
elem_node_id
(
rhs
.
elem_node_id
)
,
elem_graph
(
rhs
.
elem_graph
)
,
row_total
(
rhs
.
row_total
)
,
row_count
(
rhs
.
row_count
)
,
row_map
(
rhs
.
row_map
)
,
graph
(
rhs
.
graph
)
,
residual
(
rhs
.
residual
)
,
jacobian
(
rhs
.
jacobian
)
,
elem_residual
(
rhs
.
elem_residual
)
,
elem_jacobian
(
rhs
.
elem_jacobian
)
,
phase
(
rhs
.
phase
)
{}
NodeElemGatherFill
(
const
elem_node_type
&
arg_elem_node_id
,
const
elem_graph_type
&
arg_elem_graph
,
const
vector_type
&
arg_residual
,
const
sparse_matrix_type
&
arg_jacobian
,
const
elem_vectors_type
&
arg_elem_residual
,
const
elem_matrices_type
&
arg_elem_jacobian
)
:
elem_node_id
(
arg_elem_node_id
)
,
elem_graph
(
arg_elem_graph
)
,
row_total
(
"row_total"
)
,
row_count
(
"row_count"
,
arg_residual
.
dimension_0
()
)
,
row_map
(
"graph_row_map"
,
arg_residual
.
dimension_0
()
+
1
)
,
graph
()
,
residual
(
arg_residual
)
,
jacobian
(
arg_jacobian
)
,
elem_residual
(
arg_elem_residual
)
,
elem_jacobian
(
arg_elem_jacobian
)
,
phase
(
FILL_NODE_COUNT
)
{
//--------------------------------
// Count node->element relations
phase
=
FILL_NODE_COUNT
;
Kokkos
::
parallel_for
(
elem_node_id
.
dimension_0
()
,
*
this
);
//--------------------------------
phase
=
SCAN_NODE_COUNT
;
// Exclusive scan of row_count into row_map
// including the final total in the 'node_count + 1' position.
// Zero the 'row_count' values.
Kokkos
::
parallel_scan
(
residual
.
dimension_0
()
,
*
this
);
// Zero the row count for the fill:
Kokkos
::
deep_copy
(
row_count
,
typename
RowMapType
::
value_type
(
0
)
);
unsigned
graph_entry_count
=
0
;
Kokkos
::
deep_copy
(
graph_entry_count
,
row_total
);
// Assign graph's row_map and allocate graph's entries
graph
.
row_map
=
row_map
;
typedef
typename
CrsGraphType
::
entries_type
graph_entries_type
;
graph
.
entries
=
graph_entries_type
(
"graph_entries"
,
graph_entry_count
);
//--------------------------------
// Fill graph's entries from the (node,node) set.
phase
=
FILL_GRAPH_ENTRIES
;
Kokkos
::
deep_copy
(
row_count
,
0u
);
Kokkos
::
parallel_for
(
elem_node_id
.
dimension_0
()
,
*
this
);
execution_space
::
fence
();
//--------------------------------
// Done with the temporary sets and arrays
row_total
=
UnsignedValue
();
row_count
=
RowMapType
();
row_map
=
RowMapType
();
//--------------------------------
phase
=
SORT_GRAPH_ENTRIES
;
Kokkos
::
parallel_for
(
residual
.
dimension_0
()
,
*
this
);
execution_space
::
fence
();
phase
=
GATHER_FILL
;
}
void
apply
()
const
{
Kokkos
::
parallel_for
(
residual
.
dimension_0
()
,
*
this
);
}
//------------------------------------
//------------------------------------
// parallel_for: Count node->element pairs
KOKKOS_INLINE_FUNCTION
void
fill_node_count
(
const
unsigned
ielem
)
const
{
for
(
unsigned
row_local_node
=
0
;
row_local_node
<
elem_node_id
.
dimension_1
()
;
++
row_local_node
)
{
const
unsigned
row_node
=
elem_node_id
(
ielem
,
row_local_node
);
if
(
row_node
<
row_count
.
dimension_0
()
)
{
atomic_fetch_add
(
&
row_count
(
row_node
)
,
1
);
}
}
}
KOKKOS_INLINE_FUNCTION
void
fill_graph_entries
(
const
unsigned
ielem
)
const
{
for
(
unsigned
row_local_node
=
0
;
row_local_node
<
elem_node_id
.
dimension_1
()
;
++
row_local_node
)
{
const
unsigned
row_node
=
elem_node_id
(
ielem
,
row_local_node
);
if
(
row_node
<
row_count
.
dimension_0
()
)
{
const
unsigned
offset
=
graph
.
row_map
(
row_node
)
+
atomic_fetch_add
(
&
row_count
(
row_node
)
,
1
);
graph
.
entries
(
offset
,
0
)
=
ielem
;
graph
.
entries
(
offset
,
1
)
=
row_local_node
;
}
}
}
KOKKOS_INLINE_FUNCTION
void
sort_graph_entries
(
const
unsigned
irow
)
const
{
const
unsigned
row_beg
=
graph
.
row_map
(
irow
);
const
unsigned
row_end
=
graph
.
row_map
(
irow
+
1
);
for
(
unsigned
i
=
row_beg
+
1
;
i
<
row_end
;
++
i
)
{
const
unsigned
elem
=
graph
.
entries
(
i
,
0
);
const
unsigned
local
=
graph
.
entries
(
i
,
1
);
unsigned
j
=
i
;
for
(
;
row_beg
<
j
&&
elem
<
graph
.
entries
(
j
-
1
,
0
)
;
--
j
)
{
graph
.
entries
(
j
,
0
)
=
graph
.
entries
(
j
-
1
,
0
);
graph
.
entries
(
j
,
1
)
=
graph
.
entries
(
j
-
1
,
1
);
}
graph
.
entries
(
j
,
0
)
=
elem
;
graph
.
entries
(
j
,
1
)
=
local
;
}
}
//------------------------------------
KOKKOS_INLINE_FUNCTION
void
gather_fill
(
const
unsigned
irow
)
const
{
const
unsigned
node_elem_begin
=
graph
.
row_map
(
irow
);
const
unsigned
node_elem_end
=
graph
.
row_map
(
irow
+
1
);
// for each element that a node belongs to
for
(
unsigned
i
=
node_elem_begin
;
i
<
node_elem_end
;
i
++
)
{
const
unsigned
elem_id
=
graph
.
entries
(
i
,
0
);
const
unsigned
row_index
=
graph
.
entries
(
i
,
1
);
residual
(
irow
)
+=
elem_residual
(
elem_id
,
row_index
);
// for each node in a particular related element
// gather the contents of the element stiffness
// matrix that belong in irow
for
(
unsigned
j
=
0
;
j
<
ElemNodeCount
;
++
j
)
{
const
unsigned
A_index
=
elem_graph
(
elem_id
,
row_index
,
j
);
jacobian
.
coeff
(
A_index
)
+=
elem_jacobian
(
elem_id
,
row_index
,
j
);
}
}
}
//------------------------------------
KOKKOS_INLINE_FUNCTION
void
operator
()(
const
unsigned
iwork
)
const
{
if
(
phase
==
FILL_NODE_COUNT
)
{
fill_node_count
(
iwork
);
}
else
if
(
phase
==
FILL_GRAPH_ENTRIES
)
{
fill_graph_entries
(
iwork
);
}
else
if
(
phase
==
SORT_GRAPH_ENTRIES
)
{
sort_graph_entries
(
iwork
);
}
else
if
(
phase
==
GATHER_FILL
)
{
gather_fill
(
iwork
);
}
}
//------------------------------------
// parallel_scan: row offsets
typedef
unsigned
value_type
;
KOKKOS_INLINE_FUNCTION
void
operator
()(
const
unsigned
irow
,
unsigned
&
update
,
const
bool
final
)
const
{
// exclusive scan
if
(
final
)
{
row_map
(
irow
)
=
update
;
}
update
+=
row_count
(
irow
);
if
(
final
)
{
if
(
irow
+
1
==
row_count
.
dimension_0
()
)
{
row_map
(
irow
+
1
)
=
update
;
row_total
()
=
update
;
}
}
}
KOKKOS_INLINE_FUNCTION
void
init
(
unsigned
&
update
)
const
{
update
=
0
;
}
KOKKOS_INLINE_FUNCTION
void
join
(
volatile
unsigned
&
update
,
const
volatile
unsigned
&
input
)
const
{
update
+=
input
;
}
};
}
/* namespace FENL */
}
/* namespace Example */
}
/* namespace Kokkos */
//----------------------------------------------------------------------------
namespace
Kokkos
{
namespace
Example
{
namespace
FENL
{
template
<
class
FiniteElementMeshType
,
class
SparseMatrixType
>
class
ElementComputation
;
template
<
class
ExecSpace
,
BoxElemPart
::
ElemOrder
Order
,
class
CoordinateMap
,
typename
ScalarType
>
class
ElementComputation
<
Kokkos
::
Example
::
BoxElemFixture
<
ExecSpace
,
Order
,
CoordinateMap
>
,
Kokkos
::
Example
::
CrsMatrix
<
ScalarType
,
ExecSpace
>
>
{
public
:
typedef
Kokkos
::
Example
::
BoxElemFixture
<
ExecSpace
,
Order
,
CoordinateMap
>
mesh_type
;
typedef
Kokkos
::
Example
::
HexElement_Data
<
mesh_type
::
ElemNode
>
element_data_type
;
typedef
Kokkos
::
Example
::
CrsMatrix
<
ScalarType
,
ExecSpace
>
sparse_matrix_type
;
typedef
typename
sparse_matrix_type
::
StaticCrsGraphType
sparse_graph_type
;
typedef
ExecSpace
execution_space
;
typedef
ScalarType
scalar_type
;
static
const
unsigned
SpatialDim
=
element_data_type
::
spatial_dimension
;
static
const
unsigned
TensorDim
=
SpatialDim
*
SpatialDim
;
static
const
unsigned
ElemNodeCount
=
element_data_type
::
element_node_count
;
static
const
unsigned
FunctionCount
=
element_data_type
::
function_count
;
static
const
unsigned
IntegrationCount
=
element_data_type
::
integration_count
;
//------------------------------------
typedef
typename
mesh_type
::
node_coord_type
node_coord_type
;
typedef
typename
mesh_type
::
elem_node_type
elem_node_type
;
typedef
Kokkos
::
View
<
scalar_type
*
[
FunctionCount
][
FunctionCount
]
,
execution_space
>
elem_matrices_type
;
typedef
Kokkos
::
View
<
scalar_type
*
[
FunctionCount
]
,
execution_space
>
elem_vectors_type
;
typedef
Kokkos
::
View
<
scalar_type
*
,
execution_space
>
vector_type
;
typedef
typename
NodeNodeGraph
<
elem_node_type
,
sparse_graph_type
,
ElemNodeCount
>::
ElemGraphType
elem_graph_type
;
//------------------------------------
//------------------------------------
// Computational data:
const
element_data_type
elem_data
;
const
elem_node_type
elem_node_ids
;
const
node_coord_type
node_coords
;
const
elem_graph_type
elem_graph
;
const
elem_matrices_type
elem_jacobians
;
const
elem_vectors_type
elem_residuals
;
const
vector_type
solution
;
const
vector_type
residual
;
const
sparse_matrix_type
jacobian
;
const
scalar_type
coeff_K
;
ElementComputation
(
const
ElementComputation
&
rhs
)
:
elem_data
()
,
elem_node_ids
(
rhs
.
elem_node_ids
)
,
node_coords
(
rhs
.
node_coords
)
,
elem_graph
(
rhs
.
elem_graph
)
,
elem_jacobians
(
rhs
.
elem_jacobians
)
,
elem_residuals
(
rhs
.
elem_residuals
)
,
solution
(
rhs
.
solution
)
,
residual
(
rhs
.
residual
)
,
jacobian
(
rhs
.
jacobian
)
,
coeff_K
(
rhs
.
coeff_K
)
{}
// If the element->sparse_matrix graph is provided then perform atomic updates
// Otherwise fill per-element contributions for subequent gather-add into a residual and jacobian.
ElementComputation
(
const
mesh_type
&
arg_mesh
,
const
scalar_type
arg_coeff_K
,
const
vector_type
&
arg_solution
,
const
elem_graph_type
&
arg_elem_graph
,
const
sparse_matrix_type
&
arg_jacobian
,
const
vector_type
&
arg_residual
)
:
elem_data
()
,
elem_node_ids
(
arg_mesh
.
elem_node
()
)
,
node_coords
(
arg_mesh
.
node_coord
()
)
,
elem_graph
(
arg_elem_graph
)
,
elem_jacobians
()
,
elem_residuals
()
,
solution
(
arg_solution
)
,
residual
(
arg_residual
)
,
jacobian
(
arg_jacobian
)
,
coeff_K
(
arg_coeff_K
)
{}
ElementComputation
(
const
mesh_type
&
arg_mesh
,
const
scalar_type
arg_coeff_K
,
const
vector_type
&
arg_solution
)
:
elem_data
()
,
elem_node_ids
(
arg_mesh
.
elem_node
()
)
,
node_coords
(
arg_mesh
.
node_coord
()
)
,
elem_graph
()
,
elem_jacobians
(
"elem_jacobians"
,
arg_mesh
.
elem_count
()
)
,
elem_residuals
(
"elem_residuals"
,
arg_mesh
.
elem_count
()
)
,
solution
(
arg_solution
)
,
residual
()
,
jacobian
()
,
coeff_K
(
arg_coeff_K
)
{}
//------------------------------------
void
apply
()
const
{
parallel_for
(
elem_node_ids
.
dimension_0
()
,
*
this
);
}
//------------------------------------
static
const
unsigned
FLOPS_transform_gradients
=
/* Jacobian */
FunctionCount
*
TensorDim
*
2
+
/* Inverse jacobian */
TensorDim
*
6
+
6
+
/* Gradient transform */
FunctionCount
*
15
;
KOKKOS_INLINE_FUNCTION
float
transform_gradients
(
const
float
grad
[][
FunctionCount
]
,
// Gradient of bases master element
const
double
x
[]
,
const
double
y
[]
,
const
double
z
[]
,
float
dpsidx
[]
,
float
dpsidy
[]
,
float
dpsidz
[]
)
const
{
enum
{
j11
=
0
,
j12
=
1
,
j13
=
2
,
j21
=
3
,
j22
=
4
,
j23
=
5
,
j31
=
6
,
j32
=
7
,
j33
=
8
};
// Jacobian accumulation:
double
J
[
TensorDim
]
=
{
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
};
for
(
unsigned
i
=
0
;
i
<
FunctionCount
;
++
i
)
{
const
double
x1
=
x
[
i
]
;
const
double
x2
=
y
[
i
]
;
const
double
x3
=
z
[
i
]
;
const
float
g1
=
grad
[
0
][
i
]
;
const
float
g2
=
grad
[
1
][
i
]
;
const
float
g3
=
grad
[
2
][
i
]
;
J
[
j11
]
+=
g1
*
x1
;
J
[
j12
]
+=
g1
*
x2
;
J
[
j13
]
+=
g1
*
x3
;
J
[
j21
]
+=
g2
*
x1
;
J
[
j22
]
+=
g2
*
x2
;
J
[
j23
]
+=
g2
*
x3
;
J
[
j31
]
+=
g3
*
x1
;
J
[
j32
]
+=
g3
*
x2
;
J
[
j33
]
+=
g3
*
x3
;
}
// Inverse jacobian:
float
invJ
[
TensorDim
]
=
{
static_cast
<
float
>
(
J
[
j22
]
*
J
[
j33
]
-
J
[
j23
]
*
J
[
j32
]
)
,
static_cast
<
float
>
(
J
[
j13
]
*
J
[
j32
]
-
J
[
j12
]
*
J
[
j33
]
)
,
static_cast
<
float
>
(
J
[
j12
]
*
J
[
j23
]
-
J
[
j13
]
*
J
[
j22
]
)
,
static_cast
<
float
>
(
J
[
j23
]
*
J
[
j31
]
-
J
[
j21
]
*
J
[
j33
]
)
,
static_cast
<
float
>
(
J
[
j11
]
*
J
[
j33
]
-
J
[
j13
]
*
J
[
j31
]
)
,
static_cast
<
float
>
(
J
[
j13
]
*
J
[
j21
]
-
J
[
j11
]
*
J
[
j23
]
)
,
static_cast
<
float
>
(
J
[
j21
]
*
J
[
j32
]
-
J
[
j22
]
*
J
[
j31
]
)
,
static_cast
<
float
>
(
J
[
j12
]
*
J
[
j31
]
-
J
[
j11
]
*
J
[
j32
]
)
,
static_cast
<
float
>
(
J
[
j11
]
*
J
[
j22
]
-
J
[
j12
]
*
J
[
j21
]
)
};
const
float
detJ
=
J
[
j11
]
*
invJ
[
j11
]
+
J
[
j21
]
*
invJ
[
j12
]
+
J
[
j31
]
*
invJ
[
j13
]
;
const
float
detJinv
=
1.0
/
detJ
;
for
(
unsigned
i
=
0
;
i
<
TensorDim
;
++
i
)
{
invJ
[
i
]
*=
detJinv
;
}
// Transform gradients:
for
(
unsigned
i
=
0
;
i
<
FunctionCount
;
++
i
)
{
const
float
g0
=
grad
[
0
][
i
];
const
float
g1
=
grad
[
1
][
i
];
const
float
g2
=
grad
[
2
][
i
];
dpsidx
[
i
]
=
g0
*
invJ
[
j11
]
+
g1
*
invJ
[
j12
]
+
g2
*
invJ
[
j13
];
dpsidy
[
i
]
=
g0
*
invJ
[
j21
]
+
g1
*
invJ
[
j22
]
+
g2
*
invJ
[
j23
];
dpsidz
[
i
]
=
g0
*
invJ
[
j31
]
+
g1
*
invJ
[
j32
]
+
g2
*
invJ
[
j33
];
}
return
detJ
;
}
KOKKOS_INLINE_FUNCTION
void
contributeResidualJacobian
(
const
float
coeff_k
,
const
double
dof_values
[]
,
const
float
dpsidx
[]
,
const
float
dpsidy
[]
,
const
float
dpsidz
[]
,
const
float
detJ
,
const
float
integ_weight
,
const
float
bases_vals
[]
,
double
elem_res
[]
,
double
elem_mat
[][
FunctionCount
]
)
const
{
double
value_at_pt
=
0
;
double
gradx_at_pt
=
0
;
double
grady_at_pt
=
0
;
double
gradz_at_pt
=
0
;
for
(
unsigned
m
=
0
;
m
<
FunctionCount
;
m
++
)
{
value_at_pt
+=
dof_values
[
m
]
*
bases_vals
[
m
]
;
gradx_at_pt
+=
dof_values
[
m
]
*
dpsidx
[
m
]
;
grady_at_pt
+=
dof_values
[
m
]
*
dpsidy
[
m
]
;
gradz_at_pt
+=
dof_values
[
m
]
*
dpsidz
[
m
]
;
}
const
scalar_type
k_detJ_weight
=
coeff_k
*
detJ
*
integ_weight
;
const
double
res_val
=
value_at_pt
*
value_at_pt
*
detJ
*
integ_weight
;
const
double
mat_val
=
2.0
*
value_at_pt
*
detJ
*
integ_weight
;
// $$ R_i = \int_{\Omega} \nabla \phi_i \cdot (k \nabla T) + \phi_i T^2 d \Omega $$
// $$ J_{i,j} = \frac{\partial R_i}{\partial T_j} = \int_{\Omega} k \nabla \phi_i \cdot \nabla \phi_j + 2 \phi_i \phi_j T d \Omega $$
for
(
unsigned
m
=
0
;
m
<
FunctionCount
;
++
m
)
{
double
*
const
mat
=
elem_mat
[
m
]
;
const
float
bases_val_m
=
bases_vals
[
m
];
const
float
dpsidx_m
=
dpsidx
[
m
]
;
const
float
dpsidy_m
=
dpsidy
[
m
]
;
const
float
dpsidz_m
=
dpsidz
[
m
]
;
elem_res
[
m
]
+=
k_detJ_weight
*
(
dpsidx_m
*
gradx_at_pt
+
dpsidy_m
*
grady_at_pt
+
dpsidz_m
*
gradz_at_pt
)
+
res_val
*
bases_val_m
;
for
(
unsigned
n
=
0
;
n
<
FunctionCount
;
n
++
)
{
mat
[
n
]
+=
k_detJ_weight
*
(
dpsidx_m
*
dpsidx
[
n
]
+
dpsidy_m
*
dpsidy
[
n
]
+
dpsidz_m
*
dpsidz
[
n
]
)
+
mat_val
*
bases_val_m
*
bases_vals
[
n
];
}
}
}
KOKKOS_INLINE_FUNCTION
void
operator
()(
const
unsigned
ielem
)
const
{
// Gather nodal coordinates and solution vector:
double
x
[
FunctionCount
]
;
double
y
[
FunctionCount
]
;
double
z
[
FunctionCount
]
;
double
val
[
FunctionCount
]
;
unsigned
node_index
[
ElemNodeCount
];
for
(
unsigned
i
=
0
;
i
<
ElemNodeCount
;
++
i
)
{
const
unsigned
ni
=
elem_node_ids
(
ielem
,
i
);
node_index
[
i
]
=
ni
;
x
[
i
]
=
node_coords
(
ni
,
0
);
y
[
i
]
=
node_coords
(
ni
,
1
);
z
[
i
]
=
node_coords
(
ni
,
2
);
val
[
i
]
=
solution
(
ni
);
}
double
elem_vec
[
FunctionCount
]
;
double
elem_mat
[
FunctionCount
][
FunctionCount
]
;
for
(
unsigned
i
=
0
;
i
<
FunctionCount
;
i
++
)
{
elem_vec
[
i
]
=
0
;
for
(
unsigned
j
=
0
;
j
<
FunctionCount
;
j
++
){
elem_mat
[
i
][
j
]
=
0
;
}
}
for
(
unsigned
i
=
0
;
i
<
IntegrationCount
;
++
i
)
{
float
dpsidx
[
FunctionCount
]
;
float
dpsidy
[
FunctionCount
]
;
float
dpsidz
[
FunctionCount
]
;
const
float
detJ
=
transform_gradients
(
elem_data
.
gradients
[
i
]
,
x
,
y
,
z
,
dpsidx
,
dpsidy
,
dpsidz
);
contributeResidualJacobian
(
coeff_K
,
val
,
dpsidx
,
dpsidy
,
dpsidz
,
detJ
,
elem_data
.
weights
[
i
]
,
elem_data
.
values
[
i
]
,
elem_vec
,
elem_mat
);
}
#if 0
if ( 1 == ielem ) {
printf("ElemResidual { %f %f %f %f %f %f %f %f }\n",
elem_vec[0], elem_vec[1], elem_vec[2], elem_vec[3],
elem_vec[4], elem_vec[5], elem_vec[6], elem_vec[7]);
printf("ElemJacobian {\n");
for ( unsigned j = 0 ; j < FunctionCount ; ++j ) {
printf(" { %f %f %f %f %f %f %f %f }\n",
elem_mat[j][0], elem_mat[j][1], elem_mat[j][2], elem_mat[j][3],
elem_mat[j][4], elem_mat[j][5], elem_mat[j][6], elem_mat[j][7]);
}
printf("}\n");
}
#endif
if
(
!
residual
.
dimension_0
()
)
{
for
(
unsigned
i
=
0
;
i
<
FunctionCount
;
i
++
){
elem_residuals
(
ielem
,
i
)
=
elem_vec
[
i
]
;
for
(
unsigned
j
=
0
;
j
<
FunctionCount
;
j
++
){
elem_jacobians
(
ielem
,
i
,
j
)
=
elem_mat
[
i
][
j
]
;
}
}
}
else
{
for
(
unsigned
i
=
0
;
i
<
FunctionCount
;
i
++
)
{
const
unsigned
row
=
node_index
[
i
]
;
if
(
row
<
residual
.
dimension_0
()
)
{
atomic_fetch_add
(
&
residual
(
row
)
,
elem_vec
[
i
]
);
for
(
unsigned
j
=
0
;
j
<
FunctionCount
;
j
++
)
{
const
unsigned
entry
=
elem_graph
(
ielem
,
i
,
j
);
if
(
entry
!=
~
0u
)
{
atomic_fetch_add
(
&
jacobian
.
coeff
(
entry
)
,
elem_mat
[
i
][
j
]
);
}
}
}
}
}
}
};
/* ElementComputation */
//----------------------------------------------------------------------------
template
<
class
FixtureType
,
class
SparseMatrixType
>
class
DirichletComputation
;
template
<
class
ExecSpace
,
BoxElemPart
::
ElemOrder
Order
,
class
CoordinateMap
,
typename
ScalarType
>
class
DirichletComputation
<
Kokkos
::
Example
::
BoxElemFixture
<
ExecSpace
,
Order
,
CoordinateMap
>
,
Kokkos
::
Example
::
CrsMatrix
<
ScalarType
,
ExecSpace
>
>
{
public
:
typedef
Kokkos
::
Example
::
BoxElemFixture
<
ExecSpace
,
Order
,
CoordinateMap
>
mesh_type
;
typedef
typename
mesh_type
::
node_coord_type
node_coord_type
;
typedef
typename
node_coord_type
::
value_type
scalar_coord_type
;
typedef
Kokkos
::
Example
::
CrsMatrix
<
ScalarType
,
ExecSpace
>
sparse_matrix_type
;
typedef
typename
sparse_matrix_type
::
StaticCrsGraphType
sparse_graph_type
;
typedef
ExecSpace
execution_space
;
typedef
ScalarType
scalar_type
;
//------------------------------------
typedef
Kokkos
::
View
<
scalar_type
*
,
execution_space
>
vector_type
;
//------------------------------------
// Computational data:
const
node_coord_type
node_coords
;
const
vector_type
solution
;
const
sparse_matrix_type
jacobian
;
const
vector_type
residual
;
const
scalar_type
bc_lower_value
;
const
scalar_type
bc_upper_value
;
const
scalar_coord_type
bc_lower_limit
;
const
scalar_coord_type
bc_upper_limit
;
const
unsigned
bc_plane
;
const
unsigned
node_count
;
bool
init
;
DirichletComputation
(
const
mesh_type
&
arg_mesh
,
const
vector_type
&
arg_solution
,
const
sparse_matrix_type
&
arg_jacobian
,
const
vector_type
&
arg_residual
,
const
unsigned
arg_bc_plane
,
const
scalar_type
arg_bc_lower_value
,
const
scalar_type
arg_bc_upper_value
)
:
node_coords
(
arg_mesh
.
node_coord
()
)
,
solution
(
arg_solution
)
,
jacobian
(
arg_jacobian
)
,
residual
(
arg_residual
)
,
bc_lower_value
(
arg_bc_lower_value
)
,
bc_upper_value
(
arg_bc_upper_value
)
,
bc_lower_limit
(
std
::
numeric_limits
<
scalar_coord_type
>::
epsilon
()
)
,
bc_upper_limit
(
scalar_coord_type
(
1
)
-
std
::
numeric_limits
<
scalar_coord_type
>::
epsilon
()
)
,
bc_plane
(
arg_bc_plane
)
,
node_count
(
arg_mesh
.
node_count_owned
()
)
,
init
(
false
)
{
parallel_for
(
node_count
,
*
this
);
init
=
true
;
}
void
apply
()
const
{
parallel_for
(
node_count
,
*
this
);
}
//------------------------------------
KOKKOS_INLINE_FUNCTION
void
operator
()(
const
unsigned
inode
)
const
{
// Apply dirichlet boundary condition on the Solution and Residual vectors.
// To maintain the symmetry of the original global stiffness matrix,
// zero out the columns that correspond to boundary conditions, and
// update the residual vector accordingly
const
unsigned
iBeg
=
jacobian
.
graph
.
row_map
[
inode
];
const
unsigned
iEnd
=
jacobian
.
graph
.
row_map
[
inode
+
1
];
const
scalar_coord_type
c
=
node_coords
(
inode
,
bc_plane
);
const
bool
bc_lower
=
c
<=
bc_lower_limit
;
const
bool
bc_upper
=
bc_upper_limit
<=
c
;
if
(
!
init
)
{
solution
(
inode
)
=
bc_lower
?
bc_lower_value
:
(
bc_upper
?
bc_upper_value
:
0
);
}
else
{
if
(
bc_lower
||
bc_upper
)
{
residual
(
inode
)
=
0
;
// zero each value on the row, and leave a one
// on the diagonal
for
(
unsigned
i
=
iBeg
;
i
<
iEnd
;
++
i
)
{
jacobian
.
coeff
(
i
)
=
int
(
inode
)
==
int
(
jacobian
.
graph
.
entries
(
i
))
?
1
:
0
;
}
}
else
{
// Find any columns that are boundary conditions.
// Clear them and adjust the residual vector
for
(
unsigned
i
=
iBeg
;
i
<
iEnd
;
++
i
)
{
const
unsigned
cnode
=
jacobian
.
graph
.
entries
(
i
)
;
const
scalar_coord_type
cc
=
node_coords
(
cnode
,
bc_plane
);
if
(
(
cc
<=
bc_lower_limit
)
||
(
bc_upper_limit
<=
cc
)
)
{
jacobian
.
coeff
(
i
)
=
0
;
}
}
}
}
}
};
}
/* namespace FENL */
}
/* namespace Example */
}
/* namespace Kokkos */
//----------------------------------------------------------------------------
/* A Cuda-specific specialization for the element computation functor. */
#if defined( __CUDACC__ )
// #include <NonlinearElement_Cuda.hpp>
#endif
//----------------------------------------------------------------------------
#endif
/* #ifndef KOKKOS_EXAMPLE_FENLFUNCTORS_HPP */
Event Timeline
Log In to Comment