Page MenuHomec4science

neighbor_cpu.ptx
No OneTemporary

File Metadata

Created
Mon, Jan 13, 12:15

neighbor_cpu.ptx

.version 2.3
.target sm_20
.address_size 64
// compiled with /usr/local/cuda/open64/lib//be
// nvopencc 4.0 built on 2011-05-12
//-----------------------------------------------------------
// Compiling /tmp/tmpxft_00009a34_00000000-9_lal_neighbor_cpu.cpp3.i (/home/sjplimp/ccBI#.V8lyjI)
//-----------------------------------------------------------
//-----------------------------------------------------------
// Options:
//-----------------------------------------------------------
// Target:ptx, ISA:sm_20, Endian:little, Pointer Size:64
// -O3 (Optimization level)
// -g0 (Debug level)
// -m2 (Report advisories)
//-----------------------------------------------------------
.file 1 "<command-line>"
.file 2 "/tmp/tmpxft_00009a34_00000000-8_lal_neighbor_cpu.cudafe2.gpu"
.file 3 "/usr/lib/gcc/x86_64-redhat-linux/4.4.5/include/stddef.h"
.file 4 "/usr/local/cuda/include/crt/device_runtime.h"
.file 5 "/usr/local/cuda/include/host_defines.h"
.file 6 "/usr/local/cuda/include/builtin_types.h"
.file 7 "/usr/local/cuda/include/device_types.h"
.file 8 "/usr/local/cuda/include/driver_types.h"
.file 9 "/usr/local/cuda/include/surface_types.h"
.file 10 "/usr/local/cuda/include/texture_types.h"
.file 11 "/usr/local/cuda/include/vector_types.h"
.file 12 "/usr/local/cuda/include/device_launch_parameters.h"
.file 13 "/usr/local/cuda/include/crt/storage_class.h"
.file 14 "/usr/include/bits/types.h"
.file 15 "/usr/include/time.h"
.file 16 "lal_neighbor_cpu.cu"
.file 17 "/usr/local/cuda/include/common_functions.h"
.file 18 "/usr/local/cuda/include/math_functions.h"
.file 19 "/usr/local/cuda/include/math_constants.h"
.file 20 "/usr/local/cuda/include/device_functions.h"
.file 21 "/usr/local/cuda/include/sm_11_atomic_functions.h"
.file 22 "/usr/local/cuda/include/sm_12_atomic_functions.h"
.file 23 "/usr/local/cuda/include/sm_13_double_functions.h"
.file 24 "/usr/local/cuda/include/sm_20_atomic_functions.h"
.file 25 "/usr/local/cuda/include/sm_20_intrinsics.h"
.file 26 "/usr/local/cuda/include/surface_functions.h"
.file 27 "/usr/local/cuda/include/texture_fetch_functions.h"
.file 28 "/usr/local/cuda/include/math_functions_dbl_ptx3.h"
.entry kernel_unpack (
.param .u64 __cudaparm_kernel_unpack_dev_nbor,
.param .u64 __cudaparm_kernel_unpack_dev_ij,
.param .s32 __cudaparm_kernel_unpack_inum,
.param .s32 __cudaparm_kernel_unpack_t_per_atom)
{
.reg .u32 %r<19>;
.reg .u64 %rd<33>;
.reg .pred %p<5>;
.loc 16 21 0
$LDWbegin_kernel_unpack:
ld.param.s32 %r1, [__cudaparm_kernel_unpack_t_per_atom];
cvt.s32.u32 %r2, %tid.x;
div.s32 %r3, %r2, %r1;
cvt.s32.u32 %r4, %ntid.x;
div.s32 %r5, %r4, %r1;
cvt.s32.u32 %r6, %ctaid.x;
mul.lo.s32 %r7, %r6, %r5;
add.s32 %r8, %r3, %r7;
ld.param.s32 %r9, [__cudaparm_kernel_unpack_inum];
setp.ge.s32 %p1, %r8, %r9;
@%p1 bra $Lt_0_2050;
.loc 16 30 0
cvt.s64.s32 %rd1, %r9;
ld.param.u64 %rd2, [__cudaparm_kernel_unpack_dev_nbor];
cvt.s64.s32 %rd3, %r8;
add.u64 %rd4, %rd3, %rd1;
mul.lo.u64 %rd5, %rd4, 4;
add.u64 %rd6, %rd2, %rd5;
mul.wide.s32 %rd7, %r9, 4;
add.u64 %rd8, %rd6, %rd7;
ld.param.u64 %rd9, [__cudaparm_kernel_unpack_dev_ij];
ld.global.s32 %r10, [%rd8+0];
cvt.s64.s32 %rd10, %r10;
mul.wide.s32 %rd11, %r10, 4;
add.u64 %rd12, %rd9, %rd11;
.loc 16 31 0
ld.global.s32 %r11, [%rd6+0];
cvt.s64.s32 %rd13, %r11;
mul.wide.s32 %rd14, %r11, 4;
add.u64 %rd15, %rd12, %rd14;
.loc 16 33 0
sub.s32 %r12, %r1, 1;
and.b32 %r13, %r12, %r2;
mul.lo.s32 %r14, %r12, %r8;
add.s32 %r15, %r13, %r14;
cvt.s64.s32 %rd16, %r15;
mul.wide.s32 %rd17, %r15, 4;
add.u64 %rd18, %rd8, %rd17;
.loc 16 34 0
cvt.s64.s32 %rd19, %r13;
mul.wide.s32 %rd20, %r13, 4;
add.u64 %rd21, %rd12, %rd20;
setp.ge.u64 %p2, %rd21, %rd15;
@%p2 bra $Lt_0_2562;
sub.u64 %rd22, %rd15, %rd21;
add.u64 %rd23, %rd22, 3;
shr.s64 %rd24, %rd23, 63;
mov.s64 %rd25, 3;
and.b64 %rd26, %rd24, %rd25;
add.s64 %rd27, %rd26, %rd23;
shr.s64 %rd28, %rd27, 2;
mul.lo.s32 %r16, %r9, %r1;
mov.s64 %rd29, %rd28;
$Lt_0_3074:
//<loop> Loop body line 34, nesting depth: 1, estimated iterations: unknown
.loc 16 37 0
ld.global.s32 %r17, [%rd21+0];
st.global.s32 [%rd18+0], %r17;
.loc 16 38 0
cvt.s64.s32 %rd30, %r16;
mul.wide.s32 %rd31, %r16, 4;
add.u64 %rd18, %rd18, %rd31;
add.u64 %rd21, %rd21, 4;
setp.ne.u64 %p3, %rd21, %rd15;
@%p3 bra $Lt_0_3074;
$Lt_0_2562:
$Lt_0_2050:
.loc 16 41 0
exit;
$LDWend_kernel_unpack:
} // kernel_unpack

Event Timeline