KJB
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
util_ptx.h
Go to the documentation of this file.
1 static const char* util_ptx =
2 " .version 1.4\n"
3 " .target sm_10, map_f64_to_f32\n"
4 " // compiled with /usr/local/cuda/open64/lib//be\n"
5 " // nvopencc 3.1 built on 2010-06-07\n"
6 "\n"
7 " //-----------------------------------------------------------\n"
8 " // Compiling /tmp/tmpxft_00005c16_00000000-7_util.cpp3.i (/tmp/ccBI#.kEa25g)\n"
9 " //-----------------------------------------------------------\n"
10 "\n"
11 " //-----------------------------------------------------------\n"
12 " // Options:\n"
13 " //-----------------------------------------------------------\n"
14 " // Target:ptx, ISA:sm_10, Endian:little, Pointer Size:64\n"
15 " // -O3 (Optimization level)\n"
16 " // -g0 (Debug level)\n"
17 " // -m2 (Report advisories)\n"
18 " //-----------------------------------------------------------\n"
19 "\n"
20 " .file 1 \"<command-line>\"\n"
21 " .file 2 \"/tmp/tmpxft_00005c16_00000000-6_util.cudafe2.gpu\"\n"
22 " .file 3 \"/usr/lib/gcc/x86_64-linux-gnu/4.4.3/include/stddef.h\"\n"
23 " .file 4 \"/usr/local/cuda/bin/../include/crt/device_runtime.h\"\n"
24 " .file 5 \"/usr/local/cuda/bin/../include/host_defines.h\"\n"
25 " .file 6 \"/usr/local/cuda/bin/../include/builtin_types.h\"\n"
26 " .file 7 \"/usr/local/cuda/bin/../include/device_types.h\"\n"
27 " .file 8 \"/usr/local/cuda/bin/../include/driver_types.h\"\n"
28 " .file 9 \"/usr/local/cuda/bin/../include/surface_types.h\"\n"
29 " .file 10 \"/usr/local/cuda/bin/../include/texture_types.h\"\n"
30 " .file 11 \"/usr/local/cuda/bin/../include/vector_types.h\"\n"
31 " .file 12 \"/usr/local/cuda/bin/../include/device_launch_parameters.h\"\n"
32 " .file 13 \"/usr/local/cuda/bin/../include/crt/storage_class.h\"\n"
33 " .file 14 \"/usr/include/bits/types.h\"\n"
34 " .file 15 \"/usr/include/time.h\"\n"
35 " .file 16 \"util.cu\"\n"
36 " .file 17 \"/usr/local/cuda/bin/../include/common_functions.h\"\n"
37 " .file 18 \"/usr/local/cuda/bin/../include/math_functions.h\"\n"
38 " .file 19 \"/usr/local/cuda/bin/../include/math_constants.h\"\n"
39 " .file 20 \"/usr/local/cuda/bin/../include/device_functions.h\"\n"
40 " .file 21 \"/usr/local/cuda/bin/../include/sm_11_atomic_functions.h\"\n"
41 " .file 22 \"/usr/local/cuda/bin/../include/sm_12_atomic_functions.h\"\n"
42 " .file 23 \"/usr/local/cuda/bin/../include/sm_13_double_functions.h\"\n"
43 " .file 24 \"/usr/local/cuda/bin/../include/sm_20_atomic_functions.h\"\n"
44 " .file 25 \"/usr/local/cuda/bin/../include/sm_20_intrinsics.h\"\n"
45 " .file 26 \"/usr/local/cuda/bin/../include/surface_functions.h\"\n"
46 " .file 27 \"/usr/local/cuda/bin/../include/texture_fetch_functions.h\"\n"
47 " .file 28 \"/usr/local/cuda/bin/../include/math_functions_dbl_ptx1.h\"\n"
48 "\n"
49 "\n"
50 " .entry ow_ew_multiply_uint_float (\n"
51 " .param .u64 __cudaparm_ow_ew_multiply_uint_float_data1,\n"
52 " .param .u64 __cudaparm_ow_ew_multiply_uint_float_data2,\n"
53 " .param .u32 __cudaparm_ow_ew_multiply_uint_float_n)\n"
54 " {\n"
55 " .reg .u16 %rh<4>;\n"
56 " .reg .u32 %r<8>;\n"
57 " .reg .u64 %rd<8>;\n"
58 " .reg .f32 %f<5>;\n"
59 " .reg .pred %p<3>;\n"
60 " .loc 16 75 0\n"
61 "$LDWbegin_ow_ew_multiply_uint_float:\n"
62 " .loc 16 77 0\n"
63 " mov.u16 %rh1, %ctaid.x;\n"
64 " mov.u16 %rh2, %ntid.x;\n"
65 " mul.wide.u16 %r1, %rh1, %rh2;\n"
66 " cvt.u32.u16 %r2, %tid.x;\n"
67 " add.u32 %r3, %r2, %r1;\n"
68 " ld.param.u32 %r4, [__cudaparm_ow_ew_multiply_uint_float_n];\n"
69 " setp.le.u32 %p1, %r4, %r3;\n"
70 " @%p1 bra $Lt_0_1026;\n"
71 " .loc 16 68 0\n"
72 " cvt.u64.u32 %rd1, %r3;\n"
73 " mul.wide.u32 %rd2, %r3, 4;\n"
74 " ld.param.u64 %rd3, [__cudaparm_ow_ew_multiply_uint_float_data1];\n"
75 " add.u64 %rd4, %rd3, %rd2;\n"
76 " ld.global.u32 %r5, [%rd4+0];\n"
77 " .loc 16 69 0\n"
78 " cvt.rn.f32.u32 %f1, %r5;\n"
79 " ld.param.u64 %rd5, [__cudaparm_ow_ew_multiply_uint_float_data2];\n"
80 " add.u64 %rd6, %rd5, %rd2;\n"
81 " ld.global.f32 %f2, [%rd6+0];\n"
82 " mul.f32 %f3, %f1, %f2;\n"
83 " cvt.rzi.u32.f32 %r6, %f3;\n"
84 " .loc 16 70 0\n"
85 " st.global.u32 [%rd4+0], %r6;\n"
86 "$Lt_0_1026:\n"
87 " .loc 16 78 0\n"
88 " exit;\n"
89 "$LDWend_ow_ew_multiply_uint_float:\n"
90 " } // ow_ew_multiply_uint_float\n"
91 "\n"
92 " .entry ow_ew_multiply_float (\n"
93 " .param .u64 __cudaparm_ow_ew_multiply_float_data1,\n"
94 " .param .u64 __cudaparm_ow_ew_multiply_float_data2,\n"
95 " .param .u32 __cudaparm_ow_ew_multiply_float_n)\n"
96 " {\n"
97 " .reg .u16 %rh<4>;\n"
98 " .reg .u32 %r<6>;\n"
99 " .reg .u64 %rd<8>;\n"
100 " .reg .f32 %f<5>;\n"
101 " .reg .pred %p<3>;\n"
102 " .loc 16 81 0\n"
103 "$LDWbegin_ow_ew_multiply_float:\n"
104 " .loc 16 83 0\n"
105 " mov.u16 %rh1, %ctaid.x;\n"
106 " mov.u16 %rh2, %ntid.x;\n"
107 " mul.wide.u16 %r1, %rh1, %rh2;\n"
108 " cvt.u32.u16 %r2, %tid.x;\n"
109 " add.u32 %r3, %r2, %r1;\n"
110 " ld.param.u32 %r4, [__cudaparm_ow_ew_multiply_float_n];\n"
111 " setp.le.u32 %p1, %r4, %r3;\n"
112 " @%p1 bra $Lt_1_1026;\n"
113 " .loc 16 70 0\n"
114 " cvt.u64.u32 %rd1, %r3;\n"
115 " mul.wide.u32 %rd2, %r3, 4;\n"
116 " ld.param.u64 %rd3, [__cudaparm_ow_ew_multiply_float_data1];\n"
117 " add.u64 %rd4, %rd3, %rd2;\n"
118 " ld.global.f32 %f1, [%rd4+0];\n"
119 " ld.param.u64 %rd5, [__cudaparm_ow_ew_multiply_float_data2];\n"
120 " add.u64 %rd6, %rd5, %rd2;\n"
121 " ld.global.f32 %f2, [%rd6+0];\n"
122 " mul.f32 %f3, %f1, %f2;\n"
123 " st.global.f32 [%rd4+0], %f3;\n"
124 "$Lt_1_1026:\n"
125 " .loc 16 84 0\n"
126 " exit;\n"
127 "$LDWend_ow_ew_multiply_float:\n"
128 " } // ow_ew_multiply_float\n"
129 "\n"
130 " .entry ow_ew_multiply_uint (\n"
131 " .param .u64 __cudaparm_ow_ew_multiply_uint_data1,\n"
132 " .param .u64 __cudaparm_ow_ew_multiply_uint_data2,\n"
133 " .param .u32 __cudaparm_ow_ew_multiply_uint_n)\n"
134 " {\n"
135 " .reg .u16 %rh<4>;\n"
136 " .reg .u32 %r<9>;\n"
137 " .reg .u64 %rd<8>;\n"
138 " .reg .pred %p<3>;\n"
139 " .loc 16 87 0\n"
140 "$LDWbegin_ow_ew_multiply_uint:\n"
141 " .loc 16 89 0\n"
142 " mov.u16 %rh1, %ctaid.x;\n"
143 " mov.u16 %rh2, %ntid.x;\n"
144 " mul.wide.u16 %r1, %rh1, %rh2;\n"
145 " cvt.u32.u16 %r2, %tid.x;\n"
146 " add.u32 %r3, %r2, %r1;\n"
147 " ld.param.u32 %r4, [__cudaparm_ow_ew_multiply_uint_n];\n"
148 " setp.le.u32 %p1, %r4, %r3;\n"
149 " @%p1 bra $Lt_2_1026;\n"
150 " .loc 16 70 0\n"
151 " cvt.u64.u32 %rd1, %r3;\n"
152 " mul.wide.u32 %rd2, %r3, 4;\n"
153 " ld.param.u64 %rd3, [__cudaparm_ow_ew_multiply_uint_data1];\n"
154 " add.u64 %rd4, %rd3, %rd2;\n"
155 " ld.global.u32 %r5, [%rd4+0];\n"
156 " ld.param.u64 %rd5, [__cudaparm_ow_ew_multiply_uint_data2];\n"
157 " add.u64 %rd6, %rd5, %rd2;\n"
158 " ld.global.u32 %r6, [%rd6+0];\n"
159 " mul.lo.u32 %r7, %r5, %r6;\n"
160 " st.global.u32 [%rd4+0], %r7;\n"
161 "$Lt_2_1026:\n"
162 " .loc 16 90 0\n"
163 " exit;\n"
164 "$LDWend_ow_ew_multiply_uint:\n"
165 " } // ow_ew_multiply_uint\n"
166 "\n"
167 " .entry ow_ew_multiply_int (\n"
168 " .param .u64 __cudaparm_ow_ew_multiply_int_data1,\n"
169 " .param .u64 __cudaparm_ow_ew_multiply_int_data2,\n"
170 " .param .u32 __cudaparm_ow_ew_multiply_int_n)\n"
171 " {\n"
172 " .reg .u16 %rh<4>;\n"
173 " .reg .u32 %r<9>;\n"
174 " .reg .u64 %rd<8>;\n"
175 " .reg .pred %p<3>;\n"
176 " .loc 16 93 0\n"
177 "$LDWbegin_ow_ew_multiply_int:\n"
178 " .loc 16 95 0\n"
179 " mov.u16 %rh1, %ctaid.x;\n"
180 " mov.u16 %rh2, %ntid.x;\n"
181 " mul.wide.u16 %r1, %rh1, %rh2;\n"
182 " cvt.u32.u16 %r2, %tid.x;\n"
183 " add.u32 %r3, %r2, %r1;\n"
184 " ld.param.u32 %r4, [__cudaparm_ow_ew_multiply_int_n];\n"
185 " setp.le.u32 %p1, %r4, %r3;\n"
186 " @%p1 bra $Lt_3_1026;\n"
187 " .loc 16 70 0\n"
188 " cvt.u64.u32 %rd1, %r3;\n"
189 " mul.wide.u32 %rd2, %r3, 4;\n"
190 " ld.param.u64 %rd3, [__cudaparm_ow_ew_multiply_int_data1];\n"
191 " add.u64 %rd4, %rd3, %rd2;\n"
192 " ld.global.s32 %r5, [%rd4+0];\n"
193 " ld.param.u64 %rd5, [__cudaparm_ow_ew_multiply_int_data2];\n"
194 " add.u64 %rd6, %rd5, %rd2;\n"
195 " ld.global.s32 %r6, [%rd6+0];\n"
196 " mul.lo.s32 %r7, %r5, %r6;\n"
197 " st.global.s32 [%rd4+0], %r7;\n"
198 "$Lt_3_1026:\n"
199 " .loc 16 96 0\n"
200 " exit;\n"
201 "$LDWend_ow_ew_multiply_int:\n"
202 " } // ow_ew_multiply_int\n"
203 "\n"
204 " .entry detect_changes_float (\n"
205 " .param .u64 __cudaparm_detect_changes_float_data,\n"
206 " .param .u32 __cudaparm_detect_changes_float_n)\n"
207 " {\n"
208 " .reg .u16 %rh<4>;\n"
209 " .reg .u32 %r<8>;\n"
210 " .reg .u64 %rd<10>;\n"
211 " .reg .f32 %f<8>;\n"
212 " .reg .pred %p<5>;\n"
213 " .loc 16 132 0\n"
214 "$LDWbegin_detect_changes_float:\n"
215 " .loc 16 134 0\n"
216 " mov.u16 %rh1, %ctaid.x;\n"
217 " mov.u16 %rh2, %ntid.x;\n"
218 " mul.wide.u16 %r1, %rh1, %rh2;\n"
219 " cvt.u32.u16 %r2, %tid.x;\n"
220 " add.u32 %r3, %r2, %r1;\n"
221 " mov.u32 %r4, 0;\n"
222 " setp.ne.u32 %p1, %r3, %r4;\n"
223 " @%p1 bra $Lt_4_3074;\n"
224 " .loc 16 114 0\n"
225 " mov.f32 %f1, 0f00000000; // 0\n"
226 " ld.param.u64 %rd1, [__cudaparm_detect_changes_float_data];\n"
227 " cvt.u64.u32 %rd2, %r3;\n"
228 " mul.wide.u32 %rd3, %r3, 4;\n"
229 " add.u64 %rd4, %rd1, %rd3;\n"
230 " st.global.f32 [%rd4+0], %f1;\n"
231 " bra.uni $LDWendi__Z23detect_changes_dispatchIfEvPT_j_214_1;\n"
232 "$Lt_4_3074:\n"
233 " ld.param.u32 %r5, [__cudaparm_detect_changes_float_n];\n"
234 " add.u32 %r6, %r3, 1;\n"
235 " setp.le.u32 %p2, %r5, %r6;\n"
236 " @%p2 bra $Lt_4_3586;\n"
237 " .loc 16 120 0\n"
238 " ld.param.u64 %rd5, [__cudaparm_detect_changes_float_data];\n"
239 " cvt.u64.u32 %rd6, %r3;\n"
240 " mul.wide.u32 %rd7, %r3, 4;\n"
241 " add.u64 %rd8, %rd5, %rd7;\n"
242 " mov.f32 %f2, 0f00000000; // 0\n"
243 " mov.f32 %f3, 0f3f800000; // 1\n"
244 " ld.global.f32 %f4, [%rd8+0];\n"
245 " ld.global.f32 %f5, [%rd8+4];\n"
246 " setp.eq.f32 %p3, %f4, %f5;\n"
247 " selp.f32 %f6, %f2, %f3, %p3;\n"
248 " st.global.f32 [%rd8+4], %f6;\n"
249 "$Lt_4_3586:\n"
250 "$LDWendi__Z23detect_changes_dispatchIfEvPT_j_214_1:\n"
251 " .loc 16 135 0\n"
252 " exit;\n"
253 "$LDWend_detect_changes_float:\n"
254 " } // detect_changes_float\n"
255 "\n"
256 " .entry detect_changes_int (\n"
257 " .param .u64 __cudaparm_detect_changes_int_data,\n"
258 " .param .u32 __cudaparm_detect_changes_int_n)\n"
259 " {\n"
260 " .reg .u16 %rh<4>;\n"
261 " .reg .u32 %r<13>;\n"
262 " .reg .u64 %rd<10>;\n"
263 " .reg .pred %p<4>;\n"
264 " .loc 16 138 0\n"
265 "$LDWbegin_detect_changes_int:\n"
266 " .loc 16 140 0\n"
267 " mov.u16 %rh1, %ctaid.x;\n"
268 " mov.u16 %rh2, %ntid.x;\n"
269 " mul.wide.u16 %r1, %rh1, %rh2;\n"
270 " cvt.u32.u16 %r2, %tid.x;\n"
271 " add.u32 %r3, %r2, %r1;\n"
272 " mov.u32 %r4, 0;\n"
273 " setp.ne.u32 %p1, %r3, %r4;\n"
274 " @%p1 bra $Lt_5_3074;\n"
275 " .loc 16 114 0\n"
276 " mov.s32 %r5, 0;\n"
277 " ld.param.u64 %rd1, [__cudaparm_detect_changes_int_data];\n"
278 " cvt.u64.u32 %rd2, %r3;\n"
279 " mul.wide.u32 %rd3, %r3, 4;\n"
280 " add.u64 %rd4, %rd1, %rd3;\n"
281 " st.global.s32 [%rd4+0], %r5;\n"
282 " bra.uni $LDWendi__Z23detect_changes_dispatchIiEvPT_j_215_1;\n"
283 "$Lt_5_3074:\n"
284 " ld.param.u32 %r6, [__cudaparm_detect_changes_int_n];\n"
285 " add.u32 %r7, %r3, 1;\n"
286 " setp.le.u32 %p2, %r6, %r7;\n"
287 " @%p2 bra $Lt_5_3586;\n"
288 " .loc 16 120 0\n"
289 " ld.param.u64 %rd5, [__cudaparm_detect_changes_int_data];\n"
290 " cvt.u64.u32 %rd6, %r3;\n"
291 " mul.wide.u32 %rd7, %r3, 4;\n"
292 " add.u64 %rd8, %rd5, %rd7;\n"
293 " ld.global.s32 %r8, [%rd8+0];\n"
294 " ld.global.s32 %r9, [%rd8+4];\n"
295 " set.ne.u32.s32 %r10, %r8, %r9;\n"
296 " neg.s32 %r11, %r10;\n"
297 " st.global.s32 [%rd8+4], %r11;\n"
298 "$Lt_5_3586:\n"
299 "$LDWendi__Z23detect_changes_dispatchIiEvPT_j_215_1:\n"
300 " .loc 16 141 0\n"
301 " exit;\n"
302 "$LDWend_detect_changes_int:\n"
303 " } // detect_changes_int\n"
304 "\n"
305 " .entry detect_changes_uint (\n"
306 " .param .u64 __cudaparm_detect_changes_uint_data,\n"
307 " .param .u32 __cudaparm_detect_changes_uint_n)\n"
308 " {\n"
309 " .reg .u16 %rh<4>;\n"
310 " .reg .u32 %r<13>;\n"
311 " .reg .u64 %rd<10>;\n"
312 " .reg .pred %p<4>;\n"
313 " .loc 16 145 0\n"
314 "$LDWbegin_detect_changes_uint:\n"
315 " .loc 16 147 0\n"
316 " mov.u16 %rh1, %ctaid.x;\n"
317 " mov.u16 %rh2, %ntid.x;\n"
318 " mul.wide.u16 %r1, %rh1, %rh2;\n"
319 " cvt.u32.u16 %r2, %tid.x;\n"
320 " add.u32 %r3, %r2, %r1;\n"
321 " mov.u32 %r4, 0;\n"
322 " setp.ne.u32 %p1, %r3, %r4;\n"
323 " @%p1 bra $Lt_6_3074;\n"
324 " .loc 16 114 0\n"
325 " mov.u32 %r5, 0;\n"
326 " ld.param.u64 %rd1, [__cudaparm_detect_changes_uint_data];\n"
327 " cvt.u64.u32 %rd2, %r3;\n"
328 " mul.wide.u32 %rd3, %r3, 4;\n"
329 " add.u64 %rd4, %rd1, %rd3;\n"
330 " st.global.u32 [%rd4+0], %r5;\n"
331 " bra.uni $LDWendi__Z23detect_changes_dispatchIjEvPT_j_216_1;\n"
332 "$Lt_6_3074:\n"
333 " ld.param.u32 %r6, [__cudaparm_detect_changes_uint_n];\n"
334 " add.u32 %r7, %r3, 1;\n"
335 " setp.le.u32 %p2, %r6, %r7;\n"
336 " @%p2 bra $Lt_6_3586;\n"
337 " .loc 16 120 0\n"
338 " ld.param.u64 %rd5, [__cudaparm_detect_changes_uint_data];\n"
339 " cvt.u64.u32 %rd6, %r3;\n"
340 " mul.wide.u32 %rd7, %r3, 4;\n"
341 " add.u64 %rd8, %rd5, %rd7;\n"
342 " ld.global.u32 %r8, [%rd8+0];\n"
343 " ld.global.u32 %r9, [%rd8+4];\n"
344 " set.ne.u32.u32 %r10, %r8, %r9;\n"
345 " neg.s32 %r11, %r10;\n"
346 " st.global.u32 [%rd8+4], %r11;\n"
347 "$Lt_6_3586:\n"
348 "$LDWendi__Z23detect_changes_dispatchIjEvPT_j_216_1:\n"
349 " .loc 16 148 0\n"
350 " exit;\n"
351 "$LDWend_detect_changes_uint:\n"
352 " } // detect_changes_uint\n"
353 "\n"
354 ;