PSFEstimationwithCPSO
debug.hpp
00001 /*
00002  * debug.hpp
00003  *
00004  *  Created on: 17/07/2012
00005  *  Author: Peter Frank Perroni (pfperroni@inf.ufpr.br)
00006  */
00007 
00008 #ifndef _DEBUG_HPP_
00009 #define _DEBUG_HPP_
00010 
00011 #include <CL/cl.h>
00012 #include <CL/opencl.h>
00013 #include <CL/cl_platform.h>
00014 #include <CL/cl_ext.h>
00015 #include <vector_types.h>
00016 #include <cmath>
00017 #include <float.h>
00018 #include <stdio.h>
00019 #include <stdlib.h>
00020 #include <complex>
00021 #include "util.hpp"
00022 #include "Profiling.hpp"
00023 
00024 #ifdef _DEBUG_
00025 #if _DEBUG_ == DEBUG_TRACE
00026 #define TRACE(msg) { \
00027         ostringstream _oss; \
00028         _oss << msg; \
00029         Debug::traceInfo(_oss.str().c_str()); \
00030 }
00031 #else
00032 #define TRACE(msg)
00033 #endif
00034 #define DEBUG_MSG(msg) Debug::debug(msg)
00035 #define DEBUG(T, command_queue, label, cl_ptr, rows, n_vars) Debug::debug<T>(command_queue, label, cl_ptr, rows, n_vars)
00036 #define DEBUG_COMPLEX(command_queue, label, cl_ptr, rows, n_vars) Debug::debugComplex(command_queue, label, cl_ptr, rows, n_vars)
00037 #define DEBUG_HOST(T, label, ptr, rows, n_vars) Debug::debugHost<T>(label, ptr, rows, n_vars)
00038 #define DEBUG_HOST_COMPLEX(label, ptr, rows, n_vars) Debug::debugHostComplex(label, ptr, rows, n_vars)
00039 #define DEBUG_VIENNA_CL(label, ptr) std::cout << label << *ptr << std::endl;
00040 #define DEBUG_2D(T, command_queue, label, cl_ptr, d1, d2) DEBUG_3D(T, command_queue, label, cl_ptr, d2, 1, d1)
00041 
00042 #define DEBUG_3D(T, command_queue, label, cl_ptr, d1, d2, d3) { \
00043         ostringstream oss; \
00044         int _matrix_size = (d1) * (d2); \
00045         T _matrix[_matrix_size]; \
00046         for(int _i=0; _i < d3; _i++){ \
00047                 oss << label << "[" << _i << "]"; \
00048                 clMemcpyDeviceToHostOffset(command_queue, _matrix, cl_ptr, _i * _matrix_size * sizeof(T), _matrix_size * sizeof(T)); \
00049                 DEBUG_HOST(T, oss.str().c_str(), _matrix, d2, d1); \
00050                 oss.seekp(0); \
00051         } \
00052 }
00053 
00054 #define DEBUG_COMPLEX2D(command_queue, label, cl_ptr, d1, d2, d3) DEBUG_COMPLEX3D(command_queue, label, cl_ptr, d1, 1, d2)
00055 
00056 #define DEBUG_COMPLEX3D(command_queue, label, cl_ptr, d1, d2, d3) { \
00057         ostringstream oss; \
00058         int _matrix_size = (d1) * (d2); \
00059         FFT_TYPE _matrix[_matrix_size]; \
00060         for(int _i=0; _i < d3; _i++){ \
00061                 oss << label << "[" << _i << "]"; \
00062                 clMemcpyDeviceToHostOffset(command_queue, _matrix, cl_ptr, _i * _matrix_size * SIZEOF_FFTTYPE, _matrix_size * SIZEOF_FFTTYPE); \
00063                 DEBUG_HOST_COMPLEX(oss.str().c_str(), _matrix, d2, d1); \
00064                 oss.seekp(0); \
00065         } \
00066 }
00067 
00068 #else
00069 #define TRACE(msg)
00070 #define DEBUG_MSG(msg)
00071 #define DEBUG(T, command_queue, label, cl_ptr, rows, n_vars)
00072 #define DEBUG_COMPLEX(command_queue, label, cl_ptr, rows, n_vars)
00073 #define DEBUG_HOST(T, label, ptr, rows, n_vars)
00074 #define DEBUG_HOST_COMPLEX(label, ptr, rows, n_vars)
00075 #define DEBUG_VIENNA_CL(label, ptr)
00076 #define DEBUG_FFT(command_queue, cl_fft, sz)
00077 #define DEBUG_2D(T, command_queue, label, cl_ptr, d1, d2)
00078 #define DEBUG_3D(T, command_queue, label, cl_ptr, d1, d2, d3)
00079 #define DEBUG_COMPLEX2D(command_queue, label, cl_ptr, d1, d2)
00080 #define DEBUG_COMPLEX3D(command_queue, label, cl_ptr, d1, d2, d3)
00081 #endif
00082 
00083 
00084 #define ERROR_DETAILS(msg, file, line) { \
00085         ostringstream _oss; \
00086         _oss << "Error: [" << msg << "] at " << file << ":" << line << std::endl; \
00087         Debug::traceError(_oss.str().c_str()); \
00088 }
00089 
00090 #define ERROR(msg) ERROR_DETAILS(msg, __FILE__, __LINE__)
00091 
00092 #ifdef _CL_STATE_
00093 #define CHECK_CL_STATE(cmd) \
00094         { \
00095                 cl_int _status = cmd; \
00096                 if(_status != CL_SUCCESS){ \
00097                         ERROR_DETAILS("OpenCL call failed (error " << _status << "): " << STRINGIFY(cmd), __FILE__, __LINE__); \
00098                         exit(1); \
00099                 } \
00100         }
00101 #else
00102 #define CHECK_CL_STATE(cmd) cmd;
00103 #endif
00104 
00105 #ifdef _CHECKSUM_
00106 #define CHECKSUM(T, command_queue, cl_ptr, size, compareTo) Debug::checkSum<T>(command_queue, cl_ptr, size, compareTo, __FILE__, __LINE__)
00107 #define CHECKSUM_COMPLEX(command_queue, cl_ptr, size, compareTo) Debug::checkSumComplex(command_queue, cl_ptr, size, compareTo, __FILE__, __LINE__)
00108 #else
00109 #define CHECKSUM(T, command_queue, cl_ptr, size, compareTo)
00110 #define CHECKSUM_COMPLEX(command_queue, cl_ptr, size, compareTo)
00111 #endif
00112 
00113 #ifdef _TRACK_TIME_
00114 #define TRACK(statement) statement;
00115 #define RESUME_TRACKING(tracker, pos) tracker[pos]->resume();
00116 #define PAUSE_TRACKING(tracker, pos) tracker[pos]->pause();
00117 #else
00118 #define TRACK(statement)
00119 #define RESUME_TRACKING(tracker, pos)
00120 #define PAUSE_TRACKING(tracker, pos)
00121 #endif
00122 
00127 class Debug{
00128         // For the methods that includes additional method call inside itself,
00129         // which in turn could call a second mutex.
00130         static omp_lock_t mutex1;
00131         // For the methods that does not include additional method calls inside itself.
00132         static omp_lock_t mutex2;
00133         static bool lock_initialized;
00134 
00135         // Pre-allocate the debug buffer to avoid memory fragmentation.
00136         static char buffer[DEFAULT_BUFFER_SIZE];
00137 
00138 public:
00139 
00143         static void startup_locks(){
00144                 if(!lock_initialized){
00145                         omp_init_lock(&mutex1);
00146                         omp_init_lock(&mutex2);
00147                         lock_initialized = true;
00148                 }
00149         }
00150 
00154         static void destroy_locks(){
00155                 if(lock_initialized){
00156                         omp_destroy_lock(&mutex1);
00157                         omp_destroy_lock(&mutex2);
00158                         lock_initialized = false;
00159                 }
00160         }
00161 
00167         static void debug(const char *msg){
00168                 omp_set_lock(&mutex2);
00169                 std::cout << msg << std::endl;
00170                 omp_unset_lock(&mutex2);
00171         }
00172 
00182         template<class T>
00183         static void debug(cl_command_queue command_queue, const char* label, cl_mem cl_ptr, int rows, int n_vars) {
00184                 omp_set_lock(&mutex1);
00185                 T *debug_info = (T*)buffer;
00186                 clMemcpyDeviceToHost(command_queue, debug_info, cl_ptr, rows * n_vars * sizeof(T));
00187                 cout << label << ":" << endl;
00188                 for (int j, i = 0; i < rows; i++) {
00189                         for (j = 0; j < n_vars; j++) {
00190                                 if (j > 0) {
00191                                         cout << ' ';
00192                                 }
00193                                 cout << debug_info[i * n_vars + j];
00194                         }
00195                         cout << endl;
00196                 }
00197                 omp_unset_lock(&mutex1);
00198         }
00199 
00208         template<class T>
00209         static void debugHost(const char* label, void* ptr, int rows, int n_vars) {
00210                 omp_set_lock(&mutex2);
00211                 T *debug_info = (T*)ptr;
00212                 cout << label << ":" << endl;
00213                 for (int j, i = 0; i < rows; i++) {
00214                         for (j = 0; j < n_vars; j++) {
00215                                 if (j > 0) {
00216                                         cout << ' ';
00217                                 }
00218                                 cout << debug_info[i * n_vars + j];
00219                         }
00220                         cout << endl;
00221                 }
00222                 omp_unset_lock(&mutex2);
00223         }
00224 
00234         static void debugComplex(cl_command_queue command_queue, const char* label, cl_mem cl_ptr, int rows, int n_vars) {
00235                 omp_set_lock(&mutex1);
00236                 FFT_TYPE *debug_info = (FFT_TYPE*)buffer;
00237                 clMemcpyDeviceToHost(command_queue, debug_info, cl_ptr, rows * n_vars * SIZEOF_FFTTYPE);
00238                 cout << label << ":" << endl;
00239                 for (int j, i = 0; i < rows; i++) {
00240                         for (j = 0; j < n_vars; j++) {
00241                                 if (j > 0) {
00242                                         cout << ' ';
00243                                 }
00244                                 cout << debug_info[i * n_vars + j].x << "," << debug_info[i * n_vars + j].y;
00245                         }
00246                         cout << endl;
00247                 }
00248                 cout << endl;
00249                 omp_unset_lock(&mutex1);
00250         }
00251 
00260         static void debugHostComplex(const char* label, FFT_TYPE* ptr, int rows, int n_vars) {
00261                 omp_set_lock(&mutex2);
00262                 cout << label << ":" << endl;
00263                 for (int j, i = 0; i < rows; i++) {
00264                         for (j = 0; j < n_vars; j++) {
00265                                 if (j > 0) {
00266                                         cout << ' ';
00267                                 }
00268                                 cout << ptr[i * n_vars + j].x << "," << ptr[i * n_vars + j].y;
00269                         }
00270                         cout << endl;
00271                 }
00272                 cout << endl;
00273                 omp_unset_lock(&mutex2);
00274         }
00275 
00281         static void traceInfo(const char *msg){
00282                 omp_set_lock(&mutex2);
00283                 std::cout << "Trace: [" << msg << "]" << std::endl;
00284                 omp_unset_lock(&mutex2);
00285         }
00286 
00292         static void traceError(const char *msg){
00293                 omp_set_lock(&mutex2);
00294                 std::cerr << "Trace: [" << msg << "]" << std::endl;
00295                 omp_unset_lock(&mutex2);
00296         }
00297 
00308         template<class T>
00309         static void checkSum(cl_command_queue command_queue, cl_mem cl_ptr, int size, double compareTo, const char *file, int line) {
00310                 omp_set_lock(&mutex1);
00311                 T *values = (T*)buffer;
00312                 double sum = 0;
00313                 clMemcpyDeviceToHost(command_queue, values, cl_ptr, size * sizeof(T));
00314                 for (int i = 0; i < size; i++) {
00315                         sum += values[i];
00316                 }
00317                 omp_unset_lock(&mutex1);
00318 
00319         #ifdef _DOUBLE_WORD_
00320                 if((float)sum != (float)compareTo) {
00321                         ERROR("CHECKSUM ERROR: correct=" << sum << ", found=" << compareTo);
00322                         exit(1);
00323                 }
00324         #else
00325                 int error = 0;
00326                 // This is Entirely ad-hoc, since the GPU floats will never match with CPU calculations.
00327                 // Thus, this is much more to have some reasonable approximation from the expected result.
00328                 if (sum >= 1e+8 && abs(sum / 100000 - compareTo / 100000) > 1)
00329                         error = 1;
00330                 if (sum < 1e+8 && sum >= 1e+7 && abs(sum / 10000 - compareTo / 10000) > 1)
00331                         error = 2;
00332                 if (sum < 1e+7 && sum >= 1e+6 && abs(sum / 1000 - compareTo / 1000) > 1)
00333                         error = 3;
00334                 if (sum < 1e+6 && sum >= 1e+5 && abs(sum / 100 - compareTo / 100) > 1)
00335                         error = 4;
00336                 if (sum < 1e+5 && sum >= 1e+4 && abs(sum / 10 - compareTo / 10) > 1)
00337                         error = 5;
00338                 if (sum < 1e+4 && sum >= 1e+3 && abs(sum - compareTo) > 1)
00339                         error = 6;
00340                 if (sum < 1e+3 && sum >= 1e+2 && abs(sum * 10 - compareTo * 10) > 1)
00341                         error = 7;
00342                 if (sum < 1e+2 && sum >= 1e+1 && abs(sum * 100 - compareTo * 100) > 1)
00343                         error = 8;
00344                 if (sum < 1e+1 && abs(sum * 1000 - compareTo * 1000) > 1)
00345                         error = 9;
00346                 if (error) {
00347                         ERROR_DETAILS("CHECKSUM ERROR: correct=" << (WORD)sum << ", found=" << (WORD)compareTo << ", error=" << error, file, line);
00348                         exit(1);
00349                 }
00350         #endif
00351         }
00352 
00363         static void checkSumComplex(cl_command_queue command_queue, cl_mem cl_ptr, int size, FFT_TYPE compareTo, const char *file, int line) {
00364                 omp_set_lock(&mutex1);
00365                 FFT_TYPE *values = (FFT_TYPE*)buffer;
00366                 FFT_TYPE sum = {0, 0};
00367                 clMemcpyDeviceToHost(command_queue, values, cl_ptr, size * SIZEOF_FFTTYPE);
00368                 for (int i = 0; i < size; i++) {
00369                         sum.x += values[i].x;
00370                         sum.y += values[i].y;
00371                 }
00372                 omp_unset_lock(&mutex1);
00373                 // Not sure if the comparison below will work for float2 data types.
00374                 if(sum.x != compareTo.x || sum.x != compareTo.x) {
00375                         ERROR_DETAILS("CHECKSUM ERROR: correct=" << sum.x << "," << sum.y
00376                                 << "; found=" << compareTo.x << "," << compareTo.y, file, line);
00377                         exit(1);
00378                 }
00379         }
00380 };
00381 
00382 #endif /* _DEBUG_HPP_ */
 All Classes Functions