KJB
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
fft_internal.h
Go to the documentation of this file.
1 
2 //
3 // File: fft_internal.h
4 //
5 // Version: <1.0>
6 //
7 // Disclaimer: IMPORTANT: This Apple software is supplied to you by Apple Inc. ("Apple")
8 // in consideration of your agreement to the following terms, and your use,
9 // installation, modification or redistribution of this Apple software
10 // constitutes acceptance of these terms. If you do not agree with these
11 // terms, please do not use, install, modify or redistribute this Apple
12 // software.
13 //
14 // In consideration of your agreement to abide by the following terms, and
15 // subject to these terms, Apple grants you a personal, non - exclusive
16 // license, under Apple's copyrights in this original Apple software ( the
17 // "Apple Software" ), to use, reproduce, modify and redistribute the Apple
18 // Software, with or without modifications, in source and / or binary forms;
19 // provided that if you redistribute the Apple Software in its entirety and
20 // without modifications, you must retain this notice and the following text
21 // and disclaimers in all such redistributions of the Apple Software. Neither
22 // the name, trademarks, service marks or logos of Apple Inc. may be used to
23 // endorse or promote products derived from the Apple Software without specific
24 // prior written permission from Apple. Except as expressly stated in this
25 // notice, no other rights or licenses, express or implied, are granted by
26 // Apple herein, including but not limited to any patent rights that may be
27 // infringed by your derivative works or by other works in which the Apple
28 // Software may be incorporated.
29 //
30 // The Apple Software is provided by Apple on an "AS IS" basis. APPLE MAKES NO
31 // WARRANTIES, EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION THE IMPLIED
32 // WARRANTIES OF NON - INFRINGEMENT, MERCHANTABILITY AND FITNESS FOR A
33 // PARTICULAR PURPOSE, REGARDING THE APPLE SOFTWARE OR ITS USE AND OPERATION
34 // ALONE OR IN COMBINATION WITH YOUR PRODUCTS.
35 //
36 // IN NO EVENT SHALL APPLE BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL OR
37 // CONSEQUENTIAL DAMAGES ( INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
38 // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
39 // INTERRUPTION ) ARISING IN ANY WAY OUT OF THE USE, REPRODUCTION, MODIFICATION
40 // AND / OR DISTRIBUTION OF THE APPLE SOFTWARE, HOWEVER CAUSED AND WHETHER
41 // UNDER THEORY OF CONTRACT, TORT ( INCLUDING NEGLIGENCE ), STRICT LIABILITY OR
42 // OTHERWISE, EVEN IF APPLE HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
43 //
44 // Copyright ( C ) 2008 Apple Inc. All Rights Reserved.
45 //
47 
48 
49 #ifndef __CLFFT_INTERNAL_H
50 #define __CLFFT_INTERNAL_H
51 
52 #ifdef KJB_HAVE_OPENCL
53 #include "wrap_clfft/clFFT.h"
54 #include <string>
55 #include <sstream>
56 
57 #warning "[Code police] Do not put 'using namespace' in global scope of header."
58 using namespace std;
59 
60 typedef enum kernel_dir_t
61 {
62  cl_fft_kernel_x,
63  cl_fft_kernel_y,
64  cl_fft_kernel_z
65 }cl_fft_kernel_dir;
66 
67 typedef struct kernel_info_t
68 {
69  cl_kernel kernel;
70  char *kernel_name;
71  size_t lmem_size;
72  size_t num_workgroups;
73  size_t num_xforms_per_workgroup;
74  size_t num_workitems_per_workgroup;
75  cl_fft_kernel_dir dir;
76  int in_place_possible;
77  kernel_info_t *next;
78 }cl_fft_kernel_info;
79 
80 typedef struct
81 {
82  // context in which fft resources are created and kernels are executed
83  cl_context context;
84 
85  // size of signal
86  clFFT_Dim3 n;
87 
88  // dimension of transform ... must be either 1D, 2D or 3D
89  clFFT_Dimension dim;
90 
91  // data format ... must be either interleaved or plannar
92  clFFT_DataFormat format;
93 
94  // string containing kernel source. Generated at runtime based on
95  // n, dim, format and other parameters
96  string *kernel_string;
97 
98  // CL program containing source and kernel this particular
99  // n, dim, data format
100  cl_program program;
101 
102  // linked list of kernels which needs to be executed for this fft
103  cl_fft_kernel_info *kernel_info;
104 
105  // number of kernels
106  int num_kernels;
107 
108  // twist kernel for virtualizing fft of very large sizes that do not
109  // fit in GPU global memory
110  cl_kernel twist_kernel;
111 
112  // flag indicating if temporary intermediate buffer is needed or not.
113  // this depends on fft kernels being executed and if transform is
114  // in-place or out-of-place. e.g. Local memory fft (say 1D 1024 ...
115  // one that does not require global transpose do not need temporary buffer)
116  // 2D 1024x1024 out-of-place fft however do require intermediate buffer.
117  // If temp buffer is needed, its allocation is lazy i.e. its not allocated
118  // until its needed
119  cl_int temp_buffer_needed;
120 
121  // Batch size is runtime parameter and size of temporary buffer (if needed)
122  // depends on batch size. Allocation of temporary buffer is lazy i.e. its
123  // only created when needed. Once its created at first call of clFFT_Executexxx
124  // it is not allocated next time if next time clFFT_Executexxx is called with
125  // batch size different than the first call. last_batch_size caches the last
126  // batch size with which this plan is used so that we dont keep allocating/deallocating
127  // temp buffer if same batch size is used again and again.
128  size_t last_batch_size;
129 
130  // temporary buffer for interleaved plan
131  cl_mem tempmemobj;
132 
133  // temporary buffer for planner plan. Only one of tempmemobj or
134  // (tempmemobj_real, tempmemobj_imag) pair is valid (allocated) depending
135  // data format of plan (plannar or interleaved)
136  cl_mem tempmemobj_real, tempmemobj_imag;
137 
138  // Maximum size of signal for which local memory transposed based
139  // fft is sufficient i.e. no global mem transpose (communication)
140  // is needed
141  size_t max_localmem_fft_size;
142 
143  // Maximum work items per work group allowed. This, along with max_radix below controls
144  // maximum local memory being used by fft kernels of this plan. Set to 256 by default
145  size_t max_work_item_per_workgroup;
146 
147  // Maximum base radix for local memory fft ... this controls the maximum register
148  // space used by work items. Currently defaults to 16
149  size_t max_radix;
150 
151  // Device depended parameter that tells how many work-items need to be read consecutive
152  // values to make sure global memory access by work-items of a work-group result in
153  // coalesced memory access to utilize full bandwidth e.g. on NVidia tesla, this is 16
154  size_t min_mem_coalesce_width;
155 
156  // Number of local memory banks. This is used to geneate kernel with local memory
157  // transposes with appropriate padding to avoid bank conflicts to local memory
158  // e.g. on NVidia it is 16.
159  size_t num_local_mem_banks;
160 }cl_fft_plan;
161 
162 void FFT1D(cl_fft_plan *plan, cl_fft_kernel_dir dir);
163 
164 
165 #endif /* KJB_HAVE_OPENCL */
166 #endif