PSFEstimationwithCPSO
|
00001 /* 00002 * debug.hpp 00003 * 00004 * Created on: 17/07/2012 00005 * Author: Peter Frank Perroni (pfperroni@inf.ufpr.br) 00006 */ 00007 00008 #ifndef _DEBUG_HPP_ 00009 #define _DEBUG_HPP_ 00010 00011 #include <CL/cl.h> 00012 #include <CL/opencl.h> 00013 #include <CL/cl_platform.h> 00014 #include <CL/cl_ext.h> 00015 #include <vector_types.h> 00016 #include <cmath> 00017 #include <float.h> 00018 #include <stdio.h> 00019 #include <stdlib.h> 00020 #include <complex> 00021 #include "util.hpp" 00022 #include "Profiling.hpp" 00023 00024 #ifdef _DEBUG_ 00025 #if _DEBUG_ == DEBUG_TRACE 00026 #define TRACE(msg) { \ 00027 ostringstream _oss; \ 00028 _oss << msg; \ 00029 Debug::traceInfo(_oss.str().c_str()); \ 00030 } 00031 #else 00032 #define TRACE(msg) 00033 #endif 00034 #define DEBUG_MSG(msg) Debug::debug(msg) 00035 #define DEBUG(T, command_queue, label, cl_ptr, rows, n_vars) Debug::debug<T>(command_queue, label, cl_ptr, rows, n_vars) 00036 #define DEBUG_COMPLEX(command_queue, label, cl_ptr, rows, n_vars) Debug::debugComplex(command_queue, label, cl_ptr, rows, n_vars) 00037 #define DEBUG_HOST(T, label, ptr, rows, n_vars) Debug::debugHost<T>(label, ptr, rows, n_vars) 00038 #define DEBUG_HOST_COMPLEX(label, ptr, rows, n_vars) Debug::debugHostComplex(label, ptr, rows, n_vars) 00039 #define DEBUG_VIENNA_CL(label, ptr) std::cout << label << *ptr << std::endl; 00040 #define DEBUG_2D(T, command_queue, label, cl_ptr, d1, d2) DEBUG_3D(T, command_queue, label, cl_ptr, d2, 1, d1) 00041 00042 #define DEBUG_3D(T, command_queue, label, cl_ptr, d1, d2, d3) { \ 00043 ostringstream oss; \ 00044 int _matrix_size = (d1) * (d2); \ 00045 T _matrix[_matrix_size]; \ 00046 for(int _i=0; _i < d3; _i++){ \ 00047 oss << label << "[" << _i << "]"; \ 00048 clMemcpyDeviceToHostOffset(command_queue, _matrix, cl_ptr, _i * _matrix_size * sizeof(T), _matrix_size * sizeof(T)); \ 00049 DEBUG_HOST(T, oss.str().c_str(), _matrix, d2, d1); \ 00050 oss.seekp(0); \ 00051 } \ 00052 } 00053 00054 #define DEBUG_COMPLEX2D(command_queue, label, cl_ptr, d1, d2, d3) DEBUG_COMPLEX3D(command_queue, label, cl_ptr, d1, 1, d2) 00055 00056 #define DEBUG_COMPLEX3D(command_queue, label, cl_ptr, d1, d2, d3) { \ 00057 ostringstream oss; \ 00058 int _matrix_size = (d1) * (d2); \ 00059 FFT_TYPE _matrix[_matrix_size]; \ 00060 for(int _i=0; _i < d3; _i++){ \ 00061 oss << label << "[" << _i << "]"; \ 00062 clMemcpyDeviceToHostOffset(command_queue, _matrix, cl_ptr, _i * _matrix_size * SIZEOF_FFTTYPE, _matrix_size * SIZEOF_FFTTYPE); \ 00063 DEBUG_HOST_COMPLEX(oss.str().c_str(), _matrix, d2, d1); \ 00064 oss.seekp(0); \ 00065 } \ 00066 } 00067 00068 #else 00069 #define TRACE(msg) 00070 #define DEBUG_MSG(msg) 00071 #define DEBUG(T, command_queue, label, cl_ptr, rows, n_vars) 00072 #define DEBUG_COMPLEX(command_queue, label, cl_ptr, rows, n_vars) 00073 #define DEBUG_HOST(T, label, ptr, rows, n_vars) 00074 #define DEBUG_HOST_COMPLEX(label, ptr, rows, n_vars) 00075 #define DEBUG_VIENNA_CL(label, ptr) 00076 #define DEBUG_FFT(command_queue, cl_fft, sz) 00077 #define DEBUG_2D(T, command_queue, label, cl_ptr, d1, d2) 00078 #define DEBUG_3D(T, command_queue, label, cl_ptr, d1, d2, d3) 00079 #define DEBUG_COMPLEX2D(command_queue, label, cl_ptr, d1, d2) 00080 #define DEBUG_COMPLEX3D(command_queue, label, cl_ptr, d1, d2, d3) 00081 #endif 00082 00083 00084 #define ERROR_DETAILS(msg, file, line) { \ 00085 ostringstream _oss; \ 00086 _oss << "Error: [" << msg << "] at " << file << ":" << line << std::endl; \ 00087 Debug::traceError(_oss.str().c_str()); \ 00088 } 00089 00090 #define ERROR(msg) ERROR_DETAILS(msg, __FILE__, __LINE__) 00091 00092 #ifdef _CL_STATE_ 00093 #define CHECK_CL_STATE(cmd) \ 00094 { \ 00095 cl_int _status = cmd; \ 00096 if(_status != CL_SUCCESS){ \ 00097 ERROR_DETAILS("OpenCL call failed (error " << _status << "): " << STRINGIFY(cmd), __FILE__, __LINE__); \ 00098 exit(1); \ 00099 } \ 00100 } 00101 #else 00102 #define CHECK_CL_STATE(cmd) cmd; 00103 #endif 00104 00105 #ifdef _CHECKSUM_ 00106 #define CHECKSUM(T, command_queue, cl_ptr, size, compareTo) Debug::checkSum<T>(command_queue, cl_ptr, size, compareTo, __FILE__, __LINE__) 00107 #define CHECKSUM_COMPLEX(command_queue, cl_ptr, size, compareTo) Debug::checkSumComplex(command_queue, cl_ptr, size, compareTo, __FILE__, __LINE__) 00108 #else 00109 #define CHECKSUM(T, command_queue, cl_ptr, size, compareTo) 00110 #define CHECKSUM_COMPLEX(command_queue, cl_ptr, size, compareTo) 00111 #endif 00112 00113 #ifdef _TRACK_TIME_ 00114 #define TRACK(statement) statement; 00115 #define RESUME_TRACKING(tracker, pos) tracker[pos]->resume(); 00116 #define PAUSE_TRACKING(tracker, pos) tracker[pos]->pause(); 00117 #else 00118 #define TRACK(statement) 00119 #define RESUME_TRACKING(tracker, pos) 00120 #define PAUSE_TRACKING(tracker, pos) 00121 #endif 00122 00127 class Debug{ 00128 // For the methods that includes additional method call inside itself, 00129 // which in turn could call a second mutex. 00130 static omp_lock_t mutex1; 00131 // For the methods that does not include additional method calls inside itself. 00132 static omp_lock_t mutex2; 00133 static bool lock_initialized; 00134 00135 // Pre-allocate the debug buffer to avoid memory fragmentation. 00136 static char buffer[DEFAULT_BUFFER_SIZE]; 00137 00138 public: 00139 00143 static void startup_locks(){ 00144 if(!lock_initialized){ 00145 omp_init_lock(&mutex1); 00146 omp_init_lock(&mutex2); 00147 lock_initialized = true; 00148 } 00149 } 00150 00154 static void destroy_locks(){ 00155 if(lock_initialized){ 00156 omp_destroy_lock(&mutex1); 00157 omp_destroy_lock(&mutex2); 00158 lock_initialized = false; 00159 } 00160 } 00161 00167 static void debug(const char *msg){ 00168 omp_set_lock(&mutex2); 00169 std::cout << msg << std::endl; 00170 omp_unset_lock(&mutex2); 00171 } 00172 00182 template<class T> 00183 static void debug(cl_command_queue command_queue, const char* label, cl_mem cl_ptr, int rows, int n_vars) { 00184 omp_set_lock(&mutex1); 00185 T *debug_info = (T*)buffer; 00186 clMemcpyDeviceToHost(command_queue, debug_info, cl_ptr, rows * n_vars * sizeof(T)); 00187 cout << label << ":" << endl; 00188 for (int j, i = 0; i < rows; i++) { 00189 for (j = 0; j < n_vars; j++) { 00190 if (j > 0) { 00191 cout << ' '; 00192 } 00193 cout << debug_info[i * n_vars + j]; 00194 } 00195 cout << endl; 00196 } 00197 omp_unset_lock(&mutex1); 00198 } 00199 00208 template<class T> 00209 static void debugHost(const char* label, void* ptr, int rows, int n_vars) { 00210 omp_set_lock(&mutex2); 00211 T *debug_info = (T*)ptr; 00212 cout << label << ":" << endl; 00213 for (int j, i = 0; i < rows; i++) { 00214 for (j = 0; j < n_vars; j++) { 00215 if (j > 0) { 00216 cout << ' '; 00217 } 00218 cout << debug_info[i * n_vars + j]; 00219 } 00220 cout << endl; 00221 } 00222 omp_unset_lock(&mutex2); 00223 } 00224 00234 static void debugComplex(cl_command_queue command_queue, const char* label, cl_mem cl_ptr, int rows, int n_vars) { 00235 omp_set_lock(&mutex1); 00236 FFT_TYPE *debug_info = (FFT_TYPE*)buffer; 00237 clMemcpyDeviceToHost(command_queue, debug_info, cl_ptr, rows * n_vars * SIZEOF_FFTTYPE); 00238 cout << label << ":" << endl; 00239 for (int j, i = 0; i < rows; i++) { 00240 for (j = 0; j < n_vars; j++) { 00241 if (j > 0) { 00242 cout << ' '; 00243 } 00244 cout << debug_info[i * n_vars + j].x << "," << debug_info[i * n_vars + j].y; 00245 } 00246 cout << endl; 00247 } 00248 cout << endl; 00249 omp_unset_lock(&mutex1); 00250 } 00251 00260 static void debugHostComplex(const char* label, FFT_TYPE* ptr, int rows, int n_vars) { 00261 omp_set_lock(&mutex2); 00262 cout << label << ":" << endl; 00263 for (int j, i = 0; i < rows; i++) { 00264 for (j = 0; j < n_vars; j++) { 00265 if (j > 0) { 00266 cout << ' '; 00267 } 00268 cout << ptr[i * n_vars + j].x << "," << ptr[i * n_vars + j].y; 00269 } 00270 cout << endl; 00271 } 00272 cout << endl; 00273 omp_unset_lock(&mutex2); 00274 } 00275 00281 static void traceInfo(const char *msg){ 00282 omp_set_lock(&mutex2); 00283 std::cout << "Trace: [" << msg << "]" << std::endl; 00284 omp_unset_lock(&mutex2); 00285 } 00286 00292 static void traceError(const char *msg){ 00293 omp_set_lock(&mutex2); 00294 std::cerr << "Trace: [" << msg << "]" << std::endl; 00295 omp_unset_lock(&mutex2); 00296 } 00297 00308 template<class T> 00309 static void checkSum(cl_command_queue command_queue, cl_mem cl_ptr, int size, double compareTo, const char *file, int line) { 00310 omp_set_lock(&mutex1); 00311 T *values = (T*)buffer; 00312 double sum = 0; 00313 clMemcpyDeviceToHost(command_queue, values, cl_ptr, size * sizeof(T)); 00314 for (int i = 0; i < size; i++) { 00315 sum += values[i]; 00316 } 00317 omp_unset_lock(&mutex1); 00318 00319 #ifdef _DOUBLE_WORD_ 00320 if((float)sum != (float)compareTo) { 00321 ERROR("CHECKSUM ERROR: correct=" << sum << ", found=" << compareTo); 00322 exit(1); 00323 } 00324 #else 00325 int error = 0; 00326 // This is Entirely ad-hoc, since the GPU floats will never match with CPU calculations. 00327 // Thus, this is much more to have some reasonable approximation from the expected result. 00328 if (sum >= 1e+8 && abs(sum / 100000 - compareTo / 100000) > 1) 00329 error = 1; 00330 if (sum < 1e+8 && sum >= 1e+7 && abs(sum / 10000 - compareTo / 10000) > 1) 00331 error = 2; 00332 if (sum < 1e+7 && sum >= 1e+6 && abs(sum / 1000 - compareTo / 1000) > 1) 00333 error = 3; 00334 if (sum < 1e+6 && sum >= 1e+5 && abs(sum / 100 - compareTo / 100) > 1) 00335 error = 4; 00336 if (sum < 1e+5 && sum >= 1e+4 && abs(sum / 10 - compareTo / 10) > 1) 00337 error = 5; 00338 if (sum < 1e+4 && sum >= 1e+3 && abs(sum - compareTo) > 1) 00339 error = 6; 00340 if (sum < 1e+3 && sum >= 1e+2 && abs(sum * 10 - compareTo * 10) > 1) 00341 error = 7; 00342 if (sum < 1e+2 && sum >= 1e+1 && abs(sum * 100 - compareTo * 100) > 1) 00343 error = 8; 00344 if (sum < 1e+1 && abs(sum * 1000 - compareTo * 1000) > 1) 00345 error = 9; 00346 if (error) { 00347 ERROR_DETAILS("CHECKSUM ERROR: correct=" << (WORD)sum << ", found=" << (WORD)compareTo << ", error=" << error, file, line); 00348 exit(1); 00349 } 00350 #endif 00351 } 00352 00363 static void checkSumComplex(cl_command_queue command_queue, cl_mem cl_ptr, int size, FFT_TYPE compareTo, const char *file, int line) { 00364 omp_set_lock(&mutex1); 00365 FFT_TYPE *values = (FFT_TYPE*)buffer; 00366 FFT_TYPE sum = {0, 0}; 00367 clMemcpyDeviceToHost(command_queue, values, cl_ptr, size * SIZEOF_FFTTYPE); 00368 for (int i = 0; i < size; i++) { 00369 sum.x += values[i].x; 00370 sum.y += values[i].y; 00371 } 00372 omp_unset_lock(&mutex1); 00373 // Not sure if the comparison below will work for float2 data types. 00374 if(sum.x != compareTo.x || sum.x != compareTo.x) { 00375 ERROR_DETAILS("CHECKSUM ERROR: correct=" << sum.x << "," << sum.y 00376 << "; found=" << compareTo.x << "," << compareTo.y, file, line); 00377 exit(1); 00378 } 00379 } 00380 }; 00381 00382 #endif /* _DEBUG_HPP_ */