PSFEstimationwithCPSO
cpso.hpp
00001 /*
00002  * cpso.hpp
00003  *
00004  *  Created on: 27/05/2012
00005  *  Author: Peter Frank Perroni (pfperroni@inf.ufpr.br)
00006  */
00007 
00008 
00009 #ifndef _CPSO_HPP_
00010 #define _CPSO_HPP_
00011 
00012 #include <CL/cl.h>
00013 #include <CL/opencl.h>
00014 #include <CL/cl_platform.h>
00015 #include <CL/cl_ext.h>
00016 #include <vector_types.h>
00017 #include <string.h>
00018 #include <cmath>
00019 #include <stdio.h>
00020 #include <stdlib.h>
00021 #include <complex>
00022 #include <iostream>
00023 #include <map>
00024 #include "viennacl/vector.hpp"
00025 #include "viennacl/fft.hpp"
00026 #include "util.hpp"
00027 #include "cl_factory.hpp"
00028 #include "psf.hpp"
00029 #include "TimeTracker.hpp"
00030 #include "Profiling.hpp"
00031 
00032 using namespace std;
00033 
00039 class CPSO {
00040 private:
00041         static map<cl_context, cl_mem*> static_references;
00042         static omp_lock_t mutex, mutex_fft;
00043         static int seq;
00044         static bool lock_initialized;
00045         static int *diffraction_mask;
00046 
00050         int UID;
00051 
00052         // OpenCl section.
00053         //----------------
00054         // Variables.
00055         cl_mem cl_coefs, cl_sum, cl_conobj, cl_cost, cl_mismatch;
00056         cl_mem cl_speed, cl_pbest, cl_gbest, cl_pbest_value, cl_gbest_value, cl_reset_search, cl_swarm_dim,
00057                 cl_rand_ctx, cl_w, cl_c1, cl_c2, cl_best_coefs;
00058         cl_mem cl_fft_psfe, cl_fft_conobj, cl_pupil, cl_phase, cl_focus;
00059         cl_mem cl_object, cl_image, cl_best_phase, cl_best_psf, cl_best_psfe, cl_best_conobj, cl_best_fft_conobj,
00060                 cl_best_fft_psfe, cl_fft_object, cl_fft_image, cl_debug_info, cl_fft_original_psf;
00061 
00062         // Constant values.
00063         cl_mem cl_phase_mask, cl_zernikes, cl_diffraction_mask;
00064 
00065         // FFT.
00066         viennacl::vector<WORD> *vn_object, *vn_image, *vn_fft_object, *vn_fft_image, *vn_fft_conobj,
00067                 *vn_fft_original_psf;
00068 
00069         // Environment control.
00070         cl_int status;
00071         cl_context context;
00072         cl_command_queue command_queue;
00073         cl_program program;
00074         cl_device_id device;
00075         clQueue *queue;
00076         kernel_set *kernels;
00077 
00078         // Host section.
00079         //--------------
00080         bool in_use, has_startup_coefs, has_psf_original;
00081         int threads, phase_size, img_size, img_sizeh, img_area, img_areah, size_fft, z_size, n_zernikes,
00082                 n_particles, n_swarms, n_psf_evals, max_evals;
00083         int convergence_stable_cycle;
00084         WORD w, psf_range, reset_at, gbest_cost;
00085         WORD *startup_coefs;
00086         FFT_TYPE *original_psf_fft; // To be used during Validation process only, for comparison with the calculated one.
00087 
00088         PSF **psf;
00089 
00090         char *buffer;
00091 
00092         // Methods.
00093         //---------
00094         cl_context createContext();
00095 
00096         cl_command_queue createCommandQueue(cl_context context, cl_device_id *device);
00097 
00098         cl_program createProgram(cl_context context, cl_device_id device);
00099 
00100         void initialize_cl();
00101 
00102         void allocate_data(int _phase_size, int image_size, int _n_zernikes, int _psf_range,
00103                         WORD _w, WORD c1, WORD c2, WORD _reset_at, int _n_particles, int _n_swarms);
00104 
00105         void startup(TimeTracker **trackers);
00106 
00107         static void startup_locks();
00108 
00109         static void destroy_locks();
00110 
00111         void setStableCycle(int cycle) { convergence_stable_cycle = cycle; }
00112 
00113         FFT_TYPE calcMismatch(TimeTracker **tracker, cl_mem _cl_fft_conobj);
00114 
00115 public:
00116         CPSO(double* _zernikes, int *_phase_mask, int *_diffraction_mask, int _phase_size, int _image_size, int _n_zernikes,
00117                         int _psf_range, WORD _w, WORD c1, WORD c2, WORD _reset_at, int _n_particles, int _n_swarms);
00118 
00119         ~CPSO();
00120 
00121         void set_images(double *object, double *image);
00122 
00123         static void store_static_data(cl_context _context, int _n_zernikes, int _phase_size, int _image_size, double* zernikes,
00124                         int *phase_mask, int *_diffraction_mask);
00125 
00126         static void clear_static_data();
00127 
00128         void initialize_gpu();
00129 
00130         void finalize_cl();
00131 
00132         int getStableCycle(){ return convergence_stable_cycle; }
00133 
00134         PSF** getPsfs(){ return psf; }
00135 
00136         WORD getMinCost();
00137 
00138         int getBestPsfPos();
00139 
00140         void getBestCoefs(WORD* _coefs);
00141 
00142         void getBestPsf(WORD *psf);
00143 
00144         void getBestPsfe(WORD *psfe);
00145 
00146         void getBestPhase(WORD *phase);
00147 
00148         void getBestConvolvedObject(WORD *conobj);
00149 
00150         void getBestConvolvedObjectFFT(FFT_TYPE *conobj_fft);
00151 
00152         void getBestPsfeFFT(FFT_TYPE *psfe_fft);
00153 
00154         void getObjectFFT(FFT_TYPE *obj_fft);
00155 
00156         void getImageFFT(FFT_TYPE *img_fft);
00157 
00158         cl_context getContext(){ return context; }
00159 
00160         cl_command_queue getCommandQueue(){ return command_queue; }
00161 
00162         int getPhaseSize(){ return phase_size; }
00163 
00164         int getImageSize(){ return img_size; }
00165 
00166         int getFftSize(){ return size_fft; }
00167 
00168         int getNZernikes() { return n_zernikes; }
00169 
00170         int getPsfRange() { return psf_range; }
00171 
00172         int getNPsfEvals() { return n_psf_evals; }
00173 
00174         void setMaxEvaluations(int _max_evals) { max_evals = _max_evals; }
00175 
00176         void replicateValue(WORD value, int sz, cl_mem cl_ref);
00177 
00178         void setW(WORD _w);
00179 
00180         void setC1(WORD _c1);
00181 
00182         void setC2(WORD _c2);
00183 
00184         void setOriginalPsf(WORD *original_psf);
00185 
00186         double calcPsfDifferences();
00187 
00188         cl_mem getFftObject(){ return cl_fft_object; }
00189 
00190         cl_mem getFftImage() {return cl_fft_image; }
00191 
00192         void run(TimeTracker **trackers, int n_cycles);
00193 
00194         void runPsf(TimeTracker **trackers);
00195 
00196         void runPsf(TimeTracker **trackers, int n_psfs);
00197 
00198         void generatePhase(TimeTracker **tracker, int n_psfs);
00199 
00200         void makePsf(TimeTracker **tracker, int n_psfs);
00201 
00202         void convolveObj(TimeTracker **tracker, int n_psfs);
00203 
00204         void calcCost(TimeTracker **tracker, int n_psfs);
00205 
00206         FFT_TYPE calcDifference(TimeTracker **tracker, WORD *img, WORD *img_diff);
00207 
00208         void runCPSO(TimeTracker **tracker, int n_cycles);
00209 
00210         void reduce_squares(TimeTracker **tracker, int n_reductions, int reduction_width, cl_mem square, cl_mem sum, WORD* result);
00211 
00212         FFT_TYPE validatePsf(double *psf);
00213 
00214         void commitBestValues();
00215 
00216         void testFft(int sz, FFT_TYPE *values);
00217 
00218         void testFft(int sz);
00219 
00220         void testReduction(int sz);
00221 
00222         void testReduction(int sz, WORD *values);
00223 
00224         static WORD getRandCoef(double range);
00225 
00226         void copyToDeviceAsFloat(cl_command_queue command_queue, cl_mem dest, double *values, int size);
00227 
00228         void copyToHostAsDouble(cl_command_queue command_queue, cl_mem src, double *values, int size);
00229 
00230         const char* last_cuda_error();
00231 
00232         void lock();
00233 
00234         void release();
00235 
00236         bool isInUse();
00237 
00238         WORD getGBestCost();
00239 
00240         void setStartupCoefs(WORD *coefs);
00241 
00242         void saveFirstResult();
00243 
00244         static double calc_mean(double *image, int img_area);
00245 
00246         static double calc_variance(double *image, int img_area);
00247 
00248         static double calc_stddev(double *image, int img_area);
00249 
00250         static void generateRandomCoefs(WORD *coefs, int n_zernikes, double range);
00251 
00252         static void generateNormalDistrRandomCoefs(WORD *coefs, int n_zernikes, double range);
00253 
00254         static double randNormalDistribution(double mean, double std_dev);
00255 
00265         template<class T>
00266         static void invertPsf(T *psf, T *psf_inv, int psf_size){
00267                 int half = psf_size / 2;
00268                 /* Original PSF
00269                  *    _________
00270                  *    | A | B |
00271                  *    |-------|
00272                  *    | C | D |
00273                  *    =========
00274                  */
00275                 for(int c, l=0; l < half; l++){
00276                         for(c=0; c < half; c++){
00277                                 /* PSF Inverted.
00278                                  *    _________
00279                                  *    | D | C |
00280                                  *    |-------|
00281                                  *    | B | A |
00282                                  *    =========
00283                                  */
00284                                 psf_inv[((l + half) * psf_size) + half + c] = psf[(l * psf_size) + c]; // A -> D
00285                                 psf_inv[((l + half) * psf_size) + c] = psf[(l * psf_size) + half + c]; // B -> C
00286                                 psf_inv[(l * psf_size) + half + c] = psf[((l + half) * psf_size) + c]; // C -> C
00287                                 psf_inv[(l * psf_size) + c] = psf[((l + half) * psf_size) + half + c]; // D  -> B
00288                         }
00289                 }
00290         }
00291 };
00292 
00293 #endif /* _CPSO_HPP_ */
 All Classes Functions