Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F98470626
neighbor_cpu.ptx
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Mon, Jan 13, 12:15
Size
4 KB
Mime Type
text/x-asm
Expires
Wed, Jan 15, 12:15 (1 d, 23 h)
Engine
blob
Format
Raw Data
Handle
23590037
Attached To
rLAMMPS lammps
neighbor_cpu.ptx
View Options
.version 2.3
.target sm_20
.address_size 64
// compiled with /usr/local/cuda/open64/lib//be
// nvopencc 4.0 built on 2011-05-12
//-----------------------------------------------------------
// Compiling /tmp/tmpxft_00009a34_00000000-9_lal_neighbor_cpu.cpp3.i (/home/sjplimp/ccBI#.V8lyjI)
//-----------------------------------------------------------
//-----------------------------------------------------------
// Options:
//-----------------------------------------------------------
// Target:ptx, ISA:sm_20, Endian:little, Pointer Size:64
// -O3 (Optimization level)
// -g0 (Debug level)
// -m2 (Report advisories)
//-----------------------------------------------------------
.file 1 "<command-line>"
.file 2 "/tmp/tmpxft_00009a34_00000000-8_lal_neighbor_cpu.cudafe2.gpu"
.file 3 "/usr/lib/gcc/x86_64-redhat-linux/4.4.5/include/stddef.h"
.file 4 "/usr/local/cuda/include/crt/device_runtime.h"
.file 5 "/usr/local/cuda/include/host_defines.h"
.file 6 "/usr/local/cuda/include/builtin_types.h"
.file 7 "/usr/local/cuda/include/device_types.h"
.file 8 "/usr/local/cuda/include/driver_types.h"
.file 9 "/usr/local/cuda/include/surface_types.h"
.file 10 "/usr/local/cuda/include/texture_types.h"
.file 11 "/usr/local/cuda/include/vector_types.h"
.file 12 "/usr/local/cuda/include/device_launch_parameters.h"
.file 13 "/usr/local/cuda/include/crt/storage_class.h"
.file 14 "/usr/include/bits/types.h"
.file 15 "/usr/include/time.h"
.file 16 "lal_neighbor_cpu.cu"
.file 17 "/usr/local/cuda/include/common_functions.h"
.file 18 "/usr/local/cuda/include/math_functions.h"
.file 19 "/usr/local/cuda/include/math_constants.h"
.file 20 "/usr/local/cuda/include/device_functions.h"
.file 21 "/usr/local/cuda/include/sm_11_atomic_functions.h"
.file 22 "/usr/local/cuda/include/sm_12_atomic_functions.h"
.file 23 "/usr/local/cuda/include/sm_13_double_functions.h"
.file 24 "/usr/local/cuda/include/sm_20_atomic_functions.h"
.file 25 "/usr/local/cuda/include/sm_20_intrinsics.h"
.file 26 "/usr/local/cuda/include/surface_functions.h"
.file 27 "/usr/local/cuda/include/texture_fetch_functions.h"
.file 28 "/usr/local/cuda/include/math_functions_dbl_ptx3.h"
.entry kernel_unpack (
.param .u64 __cudaparm_kernel_unpack_dev_nbor,
.param .u64 __cudaparm_kernel_unpack_dev_ij,
.param .s32 __cudaparm_kernel_unpack_inum,
.param .s32 __cudaparm_kernel_unpack_t_per_atom)
{
.reg .u32 %r<19>;
.reg .u64 %rd<33>;
.reg .pred %p<5>;
.loc 16 21 0
$LDWbegin_kernel_unpack:
ld.param.s32 %r1, [__cudaparm_kernel_unpack_t_per_atom];
cvt.s32.u32 %r2, %tid.x;
div.s32 %r3, %r2, %r1;
cvt.s32.u32 %r4, %ntid.x;
div.s32 %r5, %r4, %r1;
cvt.s32.u32 %r6, %ctaid.x;
mul.lo.s32 %r7, %r6, %r5;
add.s32 %r8, %r3, %r7;
ld.param.s32 %r9, [__cudaparm_kernel_unpack_inum];
setp.ge.s32 %p1, %r8, %r9;
@%p1 bra $Lt_0_2050;
.loc 16 30 0
cvt.s64.s32 %rd1, %r9;
ld.param.u64 %rd2, [__cudaparm_kernel_unpack_dev_nbor];
cvt.s64.s32 %rd3, %r8;
add.u64 %rd4, %rd3, %rd1;
mul.lo.u64 %rd5, %rd4, 4;
add.u64 %rd6, %rd2, %rd5;
mul.wide.s32 %rd7, %r9, 4;
add.u64 %rd8, %rd6, %rd7;
ld.param.u64 %rd9, [__cudaparm_kernel_unpack_dev_ij];
ld.global.s32 %r10, [%rd8+0];
cvt.s64.s32 %rd10, %r10;
mul.wide.s32 %rd11, %r10, 4;
add.u64 %rd12, %rd9, %rd11;
.loc 16 31 0
ld.global.s32 %r11, [%rd6+0];
cvt.s64.s32 %rd13, %r11;
mul.wide.s32 %rd14, %r11, 4;
add.u64 %rd15, %rd12, %rd14;
.loc 16 33 0
sub.s32 %r12, %r1, 1;
and.b32 %r13, %r12, %r2;
mul.lo.s32 %r14, %r12, %r8;
add.s32 %r15, %r13, %r14;
cvt.s64.s32 %rd16, %r15;
mul.wide.s32 %rd17, %r15, 4;
add.u64 %rd18, %rd8, %rd17;
.loc 16 34 0
cvt.s64.s32 %rd19, %r13;
mul.wide.s32 %rd20, %r13, 4;
add.u64 %rd21, %rd12, %rd20;
setp.ge.u64 %p2, %rd21, %rd15;
@%p2 bra $Lt_0_2562;
sub.u64 %rd22, %rd15, %rd21;
add.u64 %rd23, %rd22, 3;
shr.s64 %rd24, %rd23, 63;
mov.s64 %rd25, 3;
and.b64 %rd26, %rd24, %rd25;
add.s64 %rd27, %rd26, %rd23;
shr.s64 %rd28, %rd27, 2;
mul.lo.s32 %r16, %r9, %r1;
mov.s64 %rd29, %rd28;
$Lt_0_3074:
//<loop> Loop body line 34, nesting depth: 1, estimated iterations: unknown
.loc 16 37 0
ld.global.s32 %r17, [%rd21+0];
st.global.s32 [%rd18+0], %r17;
.loc 16 38 0
cvt.s64.s32 %rd30, %r16;
mul.wide.s32 %rd31, %r16, 4;
add.u64 %rd18, %rd18, %rd31;
add.u64 %rd21, %rd21, 4;
setp.ne.u64 %p3, %rd21, %rd15;
@%p3 bra $Lt_0_3074;
$Lt_0_2562:
$Lt_0_2050:
.loc 16 41 0
exit;
$LDWend_kernel_unpack:
} // kernel_unpack
Event Timeline
Log In to Comment