PSFEstimationwithCPSO
|
00001 /* 00002 * cpso.hpp 00003 * 00004 * Created on: 27/05/2012 00005 * Author: Peter Frank Perroni (pfperroni@inf.ufpr.br) 00006 */ 00007 00008 00009 #ifndef _CPSO_HPP_ 00010 #define _CPSO_HPP_ 00011 00012 #include <CL/cl.h> 00013 #include <CL/opencl.h> 00014 #include <CL/cl_platform.h> 00015 #include <CL/cl_ext.h> 00016 #include <vector_types.h> 00017 #include <string.h> 00018 #include <cmath> 00019 #include <stdio.h> 00020 #include <stdlib.h> 00021 #include <complex> 00022 #include <iostream> 00023 #include <map> 00024 #include "viennacl/vector.hpp" 00025 #include "viennacl/fft.hpp" 00026 #include "util.hpp" 00027 #include "cl_factory.hpp" 00028 #include "psf.hpp" 00029 #include "TimeTracker.hpp" 00030 #include "Profiling.hpp" 00031 00032 using namespace std; 00033 00039 class CPSO { 00040 private: 00041 static map<cl_context, cl_mem*> static_references; 00042 static omp_lock_t mutex, mutex_fft; 00043 static int seq; 00044 static bool lock_initialized; 00045 static int *diffraction_mask; 00046 00050 int UID; 00051 00052 // OpenCl section. 00053 //---------------- 00054 // Variables. 00055 cl_mem cl_coefs, cl_sum, cl_conobj, cl_cost, cl_mismatch; 00056 cl_mem cl_speed, cl_pbest, cl_gbest, cl_pbest_value, cl_gbest_value, cl_reset_search, cl_swarm_dim, 00057 cl_rand_ctx, cl_w, cl_c1, cl_c2, cl_best_coefs; 00058 cl_mem cl_fft_psfe, cl_fft_conobj, cl_pupil, cl_phase, cl_focus; 00059 cl_mem cl_object, cl_image, cl_best_phase, cl_best_psf, cl_best_psfe, cl_best_conobj, cl_best_fft_conobj, 00060 cl_best_fft_psfe, cl_fft_object, cl_fft_image, cl_debug_info, cl_fft_original_psf; 00061 00062 // Constant values. 00063 cl_mem cl_phase_mask, cl_zernikes, cl_diffraction_mask; 00064 00065 // FFT. 00066 viennacl::vector<WORD> *vn_object, *vn_image, *vn_fft_object, *vn_fft_image, *vn_fft_conobj, 00067 *vn_fft_original_psf; 00068 00069 // Environment control. 00070 cl_int status; 00071 cl_context context; 00072 cl_command_queue command_queue; 00073 cl_program program; 00074 cl_device_id device; 00075 clQueue *queue; 00076 kernel_set *kernels; 00077 00078 // Host section. 00079 //-------------- 00080 bool in_use, has_startup_coefs, has_psf_original; 00081 int threads, phase_size, img_size, img_sizeh, img_area, img_areah, size_fft, z_size, n_zernikes, 00082 n_particles, n_swarms, n_psf_evals, max_evals; 00083 int convergence_stable_cycle; 00084 WORD w, psf_range, reset_at, gbest_cost; 00085 WORD *startup_coefs; 00086 FFT_TYPE *original_psf_fft; // To be used during Validation process only, for comparison with the calculated one. 00087 00088 PSF **psf; 00089 00090 char *buffer; 00091 00092 // Methods. 00093 //--------- 00094 cl_context createContext(); 00095 00096 cl_command_queue createCommandQueue(cl_context context, cl_device_id *device); 00097 00098 cl_program createProgram(cl_context context, cl_device_id device); 00099 00100 void initialize_cl(); 00101 00102 void allocate_data(int _phase_size, int image_size, int _n_zernikes, int _psf_range, 00103 WORD _w, WORD c1, WORD c2, WORD _reset_at, int _n_particles, int _n_swarms); 00104 00105 void startup(TimeTracker **trackers); 00106 00107 static void startup_locks(); 00108 00109 static void destroy_locks(); 00110 00111 void setStableCycle(int cycle) { convergence_stable_cycle = cycle; } 00112 00113 FFT_TYPE calcMismatch(TimeTracker **tracker, cl_mem _cl_fft_conobj); 00114 00115 public: 00116 CPSO(double* _zernikes, int *_phase_mask, int *_diffraction_mask, int _phase_size, int _image_size, int _n_zernikes, 00117 int _psf_range, WORD _w, WORD c1, WORD c2, WORD _reset_at, int _n_particles, int _n_swarms); 00118 00119 ~CPSO(); 00120 00121 void set_images(double *object, double *image); 00122 00123 static void store_static_data(cl_context _context, int _n_zernikes, int _phase_size, int _image_size, double* zernikes, 00124 int *phase_mask, int *_diffraction_mask); 00125 00126 static void clear_static_data(); 00127 00128 void initialize_gpu(); 00129 00130 void finalize_cl(); 00131 00132 int getStableCycle(){ return convergence_stable_cycle; } 00133 00134 PSF** getPsfs(){ return psf; } 00135 00136 WORD getMinCost(); 00137 00138 int getBestPsfPos(); 00139 00140 void getBestCoefs(WORD* _coefs); 00141 00142 void getBestPsf(WORD *psf); 00143 00144 void getBestPsfe(WORD *psfe); 00145 00146 void getBestPhase(WORD *phase); 00147 00148 void getBestConvolvedObject(WORD *conobj); 00149 00150 void getBestConvolvedObjectFFT(FFT_TYPE *conobj_fft); 00151 00152 void getBestPsfeFFT(FFT_TYPE *psfe_fft); 00153 00154 void getObjectFFT(FFT_TYPE *obj_fft); 00155 00156 void getImageFFT(FFT_TYPE *img_fft); 00157 00158 cl_context getContext(){ return context; } 00159 00160 cl_command_queue getCommandQueue(){ return command_queue; } 00161 00162 int getPhaseSize(){ return phase_size; } 00163 00164 int getImageSize(){ return img_size; } 00165 00166 int getFftSize(){ return size_fft; } 00167 00168 int getNZernikes() { return n_zernikes; } 00169 00170 int getPsfRange() { return psf_range; } 00171 00172 int getNPsfEvals() { return n_psf_evals; } 00173 00174 void setMaxEvaluations(int _max_evals) { max_evals = _max_evals; } 00175 00176 void replicateValue(WORD value, int sz, cl_mem cl_ref); 00177 00178 void setW(WORD _w); 00179 00180 void setC1(WORD _c1); 00181 00182 void setC2(WORD _c2); 00183 00184 void setOriginalPsf(WORD *original_psf); 00185 00186 double calcPsfDifferences(); 00187 00188 cl_mem getFftObject(){ return cl_fft_object; } 00189 00190 cl_mem getFftImage() {return cl_fft_image; } 00191 00192 void run(TimeTracker **trackers, int n_cycles); 00193 00194 void runPsf(TimeTracker **trackers); 00195 00196 void runPsf(TimeTracker **trackers, int n_psfs); 00197 00198 void generatePhase(TimeTracker **tracker, int n_psfs); 00199 00200 void makePsf(TimeTracker **tracker, int n_psfs); 00201 00202 void convolveObj(TimeTracker **tracker, int n_psfs); 00203 00204 void calcCost(TimeTracker **tracker, int n_psfs); 00205 00206 FFT_TYPE calcDifference(TimeTracker **tracker, WORD *img, WORD *img_diff); 00207 00208 void runCPSO(TimeTracker **tracker, int n_cycles); 00209 00210 void reduce_squares(TimeTracker **tracker, int n_reductions, int reduction_width, cl_mem square, cl_mem sum, WORD* result); 00211 00212 FFT_TYPE validatePsf(double *psf); 00213 00214 void commitBestValues(); 00215 00216 void testFft(int sz, FFT_TYPE *values); 00217 00218 void testFft(int sz); 00219 00220 void testReduction(int sz); 00221 00222 void testReduction(int sz, WORD *values); 00223 00224 static WORD getRandCoef(double range); 00225 00226 void copyToDeviceAsFloat(cl_command_queue command_queue, cl_mem dest, double *values, int size); 00227 00228 void copyToHostAsDouble(cl_command_queue command_queue, cl_mem src, double *values, int size); 00229 00230 const char* last_cuda_error(); 00231 00232 void lock(); 00233 00234 void release(); 00235 00236 bool isInUse(); 00237 00238 WORD getGBestCost(); 00239 00240 void setStartupCoefs(WORD *coefs); 00241 00242 void saveFirstResult(); 00243 00244 static double calc_mean(double *image, int img_area); 00245 00246 static double calc_variance(double *image, int img_area); 00247 00248 static double calc_stddev(double *image, int img_area); 00249 00250 static void generateRandomCoefs(WORD *coefs, int n_zernikes, double range); 00251 00252 static void generateNormalDistrRandomCoefs(WORD *coefs, int n_zernikes, double range); 00253 00254 static double randNormalDistribution(double mean, double std_dev); 00255 00265 template<class T> 00266 static void invertPsf(T *psf, T *psf_inv, int psf_size){ 00267 int half = psf_size / 2; 00268 /* Original PSF 00269 * _________ 00270 * | A | B | 00271 * |-------| 00272 * | C | D | 00273 * ========= 00274 */ 00275 for(int c, l=0; l < half; l++){ 00276 for(c=0; c < half; c++){ 00277 /* PSF Inverted. 00278 * _________ 00279 * | D | C | 00280 * |-------| 00281 * | B | A | 00282 * ========= 00283 */ 00284 psf_inv[((l + half) * psf_size) + half + c] = psf[(l * psf_size) + c]; // A -> D 00285 psf_inv[((l + half) * psf_size) + c] = psf[(l * psf_size) + half + c]; // B -> C 00286 psf_inv[(l * psf_size) + half + c] = psf[((l + half) * psf_size) + c]; // C -> C 00287 psf_inv[(l * psf_size) + c] = psf[((l + half) * psf_size) + half + c]; // D -> B 00288 } 00289 } 00290 } 00291 }; 00292 00293 #endif /* _CPSO_HPP_ */