PSFEstimationwithCPSO
|
00001 /* 00002 * cl_factory.cpp 00003 * 00004 * Created on: 16/07/2012 00005 * Author: Peter Frank Perroni (pfperroni@inf.ufpr.br) 00006 */ 00007 #include "cl_factory.hpp" 00008 00009 cl_context *clFactory::context; 00010 omp_lock_t clFactory::mutex; 00011 omp_lock_t clFactory::mutex_queue; 00012 int clFactory::n_contexts; 00013 int clFactory::n_queues; 00014 int clFactory::curr_queue; 00015 int clFactory::curr_context; 00016 vector<clQueue*> clFactory::queues; 00017 map<cl_context, context_t*> clFactory::contexts; 00018 map<cl_device_id, omp_lock_t*> clFactory::devices; 00019 00029 void clFactory::initialize_gpu(vector<int> *_devices){ 00030 omp_init_lock(&mutex); 00031 omp_init_lock(&mutex_queue); 00032 00033 // Read the .cl file. 00034 char *src = readCl(); 00035 00036 // Create the OpenCL contexts. 00037 createContext(_devices); 00038 00039 // Create the command queues. 00040 char cBuffer[1024]; 00041 int i, j; 00042 context_t *_context; 00043 cl_program program; 00044 kernel_set *kernels; 00045 omp_lock_t *_mutex; 00046 n_queues = 0; 00047 map<cl_context, context_t*>::iterator iter; 00048 for(iter=contexts.begin(); iter != contexts.end(); iter++){ 00049 _context = iter->second; 00050 createCommandQueue(_context); 00051 if((int)queues.size() == n_queues){ 00052 continue; 00053 } 00054 00055 // Create the OpenCL program from .cl kernel source for every device available inside the context. 00056 if ((program=createProgram(_context->context, _context->devices[0], src)) == NULL) { 00057 // If cannot be compiled in any device, stop the program. 00058 for(i=0; i < n_queues; i++){ 00059 clReleaseCommandQueue(queues[i]->getCommandQueue()); 00060 } 00061 for(i=0; i < n_contexts; i++){ 00062 clReleaseContext(context[i]); 00063 } 00064 for(iter=contexts.begin(); iter != contexts.end(); iter++){ 00065 delete iter->second->devices; 00066 delete iter->second; 00067 } 00068 delete context; 00069 delete src; 00070 exit(1); 00071 } 00072 // Initialize the basic cl environment (like kernels). 00073 kernels = initialize_cl(_context->context, program); 00074 // Set the correct kernels and mutex for the newly created queues. 00075 for(j=n_queues; j < (int)queues.size(); j++){ 00076 _mutex = devices[queues[j]->getDevice()]; 00077 queues[j]->kernels = clone_kernels(kernels, _mutex); 00078 } 00079 n_queues = queues.size(); 00080 } 00081 delete src; 00082 00083 if (n_queues == 0) { 00084 for(i=0; i < n_contexts; i++){ 00085 clReleaseContext(context[i]); 00086 } 00087 for(iter=contexts.begin(); iter != contexts.end(); iter++){ 00088 delete iter->second->devices; 00089 delete iter->second; 00090 } 00091 delete context; 00092 printf("No device found that matches the provided parameters!\n"); 00093 exit(1); 00094 } 00095 00096 // Print the device information. 00097 // Note that only 1 command queue is created for every device during startup. 00098 for(i=0; i < n_queues; i++){ 00099 getDeviceName(queues[i], cBuffer, 1024); 00100 printf("Queues created on Device [%s]\n", cBuffer); 00101 } 00102 00103 // Initialize the FFT context. 00104 // Use the first Context and its command queue for the FFT. 00105 viennacl::ocl::setup_context(0, queues[0]->getContext(), queues[0]->getDevice(), queues[0]->getCommandQueue()); 00106 // Retain one additional reference for the first Context and for its command queue, 00107 // to avoid finalization errors. 00108 clRetainContext(queues[0]->getContext()); 00109 clRetainCommandQueue(queues[0]->getCommandQueue()); 00110 } 00111 00112 00124 void clFactory::createContext(vector<int> *_devices) { 00125 cl_int errNum; 00126 cl_uint numPlatforms; 00127 cl_platform_id platformIds[CL_PLATFORMS_TO_TRY]; 00128 00129 // First, select an OpenCL platform to run on. 00130 // This code will try up to CL_PLATFORMS_TO_TRY available platforms. 00131 errNum = clGetPlatformIDs(CL_PLATFORMS_TO_TRY, platformIds, &numPlatforms); 00132 if (errNum != CL_SUCCESS || numPlatforms <= 0) { 00133 std::cerr << "Failed to find any OpenCL platforms." << std::endl; 00134 exit(1); 00135 } 00136 if(numPlatforms > CL_PLATFORMS_TO_TRY) numPlatforms = CL_PLATFORMS_TO_TRY; 00137 00138 // Create OpenCL contexts on the platform for the primary device type (GPU or CPU). 00139 errNum = CL_DEVICE_NOT_FOUND; 00140 cl_context _cl_context[numPlatforms]; 00141 int n_found_contexts = 0; 00142 int i; 00143 for(i=0; i < (int)numPlatforms; i++){ 00144 cl_context_properties contextProperties[] = { CL_CONTEXT_PLATFORM, (cl_context_properties) platformIds[i], 0 }; 00145 cl_context _context = clCreateContextFromType(contextProperties, PRIMARY_DEVICE_TYPE, NULL, NULL, &errNum); 00146 if(errNum == CL_SUCCESS){ 00147 _cl_context[n_found_contexts++] = _context; 00148 } 00149 } 00150 // If all primary devices have failed, try to create it on the secondary device type. 00151 if(n_found_contexts == 0){ 00152 for(i=0; i < (int)numPlatforms && errNum != CL_SUCCESS; i++){ 00153 cl_context_properties contextProperties[] = { CL_CONTEXT_PLATFORM, (cl_context_properties) platformIds[i], 0 }; 00154 cl_context _context = clCreateContextFromType(contextProperties, SECONDARY_DEVICE_TYPE, NULL, NULL, &errNum); 00155 if(errNum == CL_SUCCESS){ 00156 _cl_context[n_found_contexts++] = _context; 00157 } 00158 } 00159 } 00160 if (n_found_contexts == 0) { 00161 std::cerr << "Failed to create an OpenCL GPU or CPU context." << std::endl; 00162 exit(1); 00163 } 00164 00165 // Determine the devices that will compose each context. 00166 context_t *_context; 00167 int j, k, new_count, curr_device = 1; 00168 for(i=0; i < n_found_contexts; i++){ 00169 _context = new context_t; 00170 getDevices(_cl_context[i], _context); 00171 // Only store the context if it contains at least 1 device available. 00172 if(_context->n_devices > 0){ 00173 vector<cl_device_id> saved_devices; 00174 // If filter the devices. 00175 if(_devices != NULL){ 00176 // For all devices within this context. 00177 for(j=0; j < _context->n_devices; j++, curr_device++){ 00178 // For all devices to be filtered. 00179 for(k=0; k < (int)_devices->size(); k++){ 00180 if(curr_device == _devices->at(k)){ 00181 saved_devices.push_back(_context->devices[j]); 00182 } 00183 } 00184 } 00185 new_count = saved_devices.size(); 00186 // If any device within this context shall be used. 00187 if(new_count > 0){ 00188 // If any device was discarded, rebuild the pointer. 00189 if(new_count != _context->n_devices){ 00190 _context->n_devices = new_count; 00191 delete _context->devices; 00192 _context->devices = new cl_device_id[_context->n_devices]; 00193 for(j=0; j < _context->n_devices; j++){ 00194 _context->devices[j] = saved_devices[j]; 00195 } 00196 } 00197 } 00198 else{ 00199 // Ignore this context. 00200 delete _context->devices; 00201 delete _context; 00202 continue; 00203 } 00204 } 00205 n_contexts++; 00206 contexts[_cl_context[i]] = _context; 00207 } 00208 else{ 00209 delete _context; 00210 } 00211 } 00212 // Store the cl_context's separately for easier manipulation. 00213 context = new cl_context[n_contexts]; 00214 map<cl_context, context_t*>::iterator iter; 00215 for(i=0, iter=contexts.begin(); iter != contexts.end(); iter++, i++){ 00216 context[i] = iter->first; 00217 } 00218 } 00219 00226 void clFactory::createCommandQueue(context_t *_context) { 00227 cl_int errNum; 00228 omp_lock_t *_mutex; 00229 cl_command_queue commandQueue = NULL; 00230 cl_command_queue_properties prop = CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE; 00231 #ifdef _PROFILING_ 00232 prop |= CL_QUEUE_PROFILING_ENABLE; 00233 #endif 00234 00235 for(int i=0; i < _context->n_devices; i++){ 00236 commandQueue = clCreateCommandQueue(_context->context, _context->devices[i], prop, &errNum); 00237 if (errNum != CL_SUCCESS) { 00238 std::cerr << "Failed to create command queue, errno=" << errNum << "." << endl; 00239 } 00240 else if (commandQueue != NULL) { 00241 // Add the command queue to the pool of queues. 00242 queues.push_back(new clQueue(_context->context, _context->devices[i], commandQueue)); 00243 // Create one single mutex per device. 00244 if((_mutex = devices[_context->devices[i]]) == NULL){ 00245 _mutex = new omp_lock_t; 00246 omp_init_lock(_mutex); 00247 devices[_context->devices[i]] = _mutex; 00248 } 00249 } 00250 } 00251 } 00252 00258 char* clFactory::readCl(){ 00259 FILE *fp = fopen("src/kippe_aux.cl", "r"); 00260 if (!fp) { 00261 cerr << "Failed to open .cl for reading." << std::endl; 00262 return NULL; 00263 } 00264 fseek(fp, 0, SEEK_END); 00265 int sz1 = ftell(fp); 00266 rewind(fp); 00267 char *src = new char[sz1]; 00268 int sz2 = fread(src, 1, sz1, fp); 00269 fclose(fp); 00270 if (sz1 != sz2) { 00271 cerr << "Fail reading .cl." << std::endl; 00272 delete src; 00273 return NULL; 00274 } 00275 return src; 00276 } 00277 00287 cl_program clFactory::createProgram(cl_context _context, cl_device_id _device, char *src) { 00288 cl_int errNum; 00289 // Create a cl_program into the context, meaning that every device present inside the context will have the compiled kernels. 00290 cl_program program = clCreateProgramWithSource(_context, 1, (const char**) &src, NULL, &errNum); 00291 if (program == NULL || errNum != CL_SUCCESS) { 00292 cerr << "Failed to create CL program from source (" << errNum << ")." << endl; 00293 return NULL; 00294 } 00295 00296 //Passing parameters to the cl file. 00297 ostringstream oss; 00298 #ifdef _DOUBLE_WORD_ 00299 oss << "-D_DOUBLE_WORD_ -DWORD=double -DFFT_TYPE=double2 "; 00300 #else 00301 oss << "-DWORD=float -DFFT_TYPE=float2 "; 00302 #endif 00303 oss << "-DREDUCTION_BLOCKSZ=" << REDUCTION_BLOCKSZ << " -DREDUCTION_NBLOCKS=" << REDUCTION_NBLOCKS 00304 << " -DPSF_CORNER=" << PSF_CORNER << " -DPSF_CENTER=" << PSF_CENTER << " -DPSF_EXTRACT=" << PSF_EXTRACT 00305 << " -DWORD_MAX=" << WORD_MAX << " -DINT_MAX=" << INT_MAX << " -DRAND_MAX=" << RAND_MAX; 00306 00307 // Build the program inside the devices under the context. 00308 errNum = clBuildProgram(program, 0, NULL, oss.str().c_str(), NULL, NULL); 00309 if (errNum != CL_SUCCESS) { 00310 size_t ret_val_size; 00311 // Get the error message from inside the device. 00312 //---------------------------------------------- 00313 // Check the error message size. 00314 clGetProgramBuildInfo(program, _device, CL_PROGRAM_BUILD_LOG, 0, NULL, &ret_val_size); 00315 // Alocates only the memory needed. 00316 char *build_log = (char*)malloc(ret_val_size+1); 00317 // Read the error text. 00318 clGetProgramBuildInfo(program, _device, CL_PROGRAM_BUILD_LOG, ret_val_size, build_log, NULL); 00319 00320 // The \0 is not mandatory in the OpenCl specification, thus we add it here for safety. 00321 build_log[ret_val_size] = '\0'; 00322 00323 printf("BUILD LOG: \n%s\n", build_log); 00324 free(build_log); 00325 00326 return NULL; 00327 } 00328 return program; 00329 } 00330 00339 cl_kernel clFactory::createKernel(cl_program program, const char *kernel_name) { 00340 cl_kernel kernel = clCreateKernel(program, kernel_name, NULL); 00341 if (kernel == NULL){ 00342 std::cerr << "Failed to create kernel" << kernel_name << endl; 00343 finalize_cl(); 00344 exit(1); 00345 } 00346 return kernel; 00347 } 00348 00360 kernel_set* clFactory::initialize_cl(cl_context context, cl_program program) { 00361 kernel_set *kernels = new kernel_set; 00362 kernels->program = program; 00363 kernels->cl_generate_phase.kernel_instance = createKernel(program, "cl_generate_phase"); 00364 kernels->cl_generate_phase.mutex = &kernels->mutex; // For now, let's keep one single kernel mutex for the entire context. 00365 kernels->cl_power_spec.kernel_instance = createKernel(program, "cl_power_spec"); 00366 kernels->cl_power_spec.mutex = &kernels->mutex; 00367 kernels->cl_reduce.kernel_instance = createKernel(program, "cl_reduce"); 00368 kernels->cl_reduce.mutex = &kernels->mutex; 00369 kernels->cl_multiply_complexarr.kernel_instance = createKernel(program, "cl_multiply_complexarr"); 00370 kernels->cl_multiply_complexarr.mutex = &kernels->mutex; 00371 kernels->cl_resize_psf.kernel_instance = createKernel(program, "cl_resize_psf"); 00372 kernels->cl_resize_psf.mutex = &kernels->mutex; 00373 kernels->cl_multiply_doublearr.kernel_instance = createKernel(program, "cl_multiply_doublearr"); 00374 kernels->cl_multiply_doublearr.mutex = &kernels->mutex; 00375 kernels->cl_multiply_fftw_complex_arrays.kernel_instance = createKernel(program, "cl_multiply_fftw_complex_arrays"); 00376 kernels->cl_multiply_fftw_complex_arrays.mutex = &kernels->mutex; 00377 kernels->cl_calc_cost.kernel_instance = createKernel(program, "cl_calc_cost"); 00378 kernels->cl_calc_cost.mutex = &kernels->mutex; 00379 kernels->cl_calc_mismatch.kernel_instance = createKernel(program, "cl_calc_mismatch"); 00380 kernels->cl_calc_mismatch.mutex = &kernels->mutex; 00381 kernels->cl_adjust_fft.kernel_instance = createKernel(program, "cl_adjust_fft"); 00382 kernels->cl_adjust_fft.mutex = &kernels->mutex; 00383 kernels->cl_real2complex.kernel_instance = createKernel(program, "cl_real2complex"); 00384 kernels->cl_real2complex.mutex = &kernels->mutex; 00385 kernels->cl_complex2real.kernel_instance = createKernel(program, "cl_complex2real"); 00386 kernels->cl_complex2real.mutex = &kernels->mutex; 00387 kernels->cl_cpso.kernel_instance = createKernel(program, "cl_cpso"); 00388 kernels->cl_cpso.mutex = &kernels->mutex; 00389 kernels->cl_real.kernel_instance = createKernel(program, "cl_real"); 00390 kernels->cl_real.mutex = &kernels->mutex; 00391 00392 // Store the kernels separately so they can be finalized at the end. 00393 contexts[context]->kernels = kernels; 00394 return kernels; 00395 } 00396 00407 kernel_set* clFactory::clone_kernels(kernel_set *kernels, omp_lock_t *_mutex) { 00408 kernel_set *new_kernels = new kernel_set; 00409 new_kernels->mutex = *_mutex; 00410 new_kernels->program = kernels->program; 00411 new_kernels->cl_generate_phase.kernel_instance = kernels->cl_generate_phase.kernel_instance; 00412 new_kernels->cl_generate_phase.mutex = _mutex; 00413 new_kernels->cl_power_spec.kernel_instance = kernels->cl_power_spec.kernel_instance; 00414 new_kernels->cl_power_spec.mutex = _mutex; 00415 new_kernels->cl_reduce.kernel_instance = kernels->cl_reduce.kernel_instance; 00416 new_kernels->cl_reduce.mutex = _mutex; 00417 new_kernels->cl_multiply_complexarr.kernel_instance = kernels->cl_multiply_complexarr.kernel_instance; 00418 new_kernels->cl_multiply_complexarr.mutex = _mutex; 00419 new_kernels->cl_resize_psf.kernel_instance = kernels->cl_resize_psf.kernel_instance; 00420 new_kernels->cl_resize_psf.mutex = _mutex; 00421 new_kernels->cl_multiply_doublearr.kernel_instance = kernels->cl_multiply_doublearr.kernel_instance; 00422 new_kernels->cl_multiply_doublearr.mutex = _mutex; 00423 new_kernels->cl_multiply_fftw_complex_arrays.kernel_instance = kernels->cl_multiply_fftw_complex_arrays.kernel_instance; 00424 new_kernels->cl_multiply_fftw_complex_arrays.mutex = _mutex; 00425 new_kernels->cl_calc_cost.kernel_instance = kernels->cl_calc_cost.kernel_instance; 00426 new_kernels->cl_calc_cost.mutex = _mutex; 00427 new_kernels->cl_calc_mismatch.kernel_instance = kernels->cl_calc_mismatch.kernel_instance; 00428 new_kernels->cl_calc_mismatch.mutex = _mutex; 00429 new_kernels->cl_adjust_fft.kernel_instance = kernels->cl_adjust_fft.kernel_instance; 00430 new_kernels->cl_adjust_fft.mutex = _mutex; 00431 new_kernels->cl_real2complex.kernel_instance = kernels->cl_real2complex.kernel_instance; 00432 new_kernels->cl_real2complex.mutex = _mutex; 00433 new_kernels->cl_complex2real.kernel_instance = kernels->cl_complex2real.kernel_instance; 00434 new_kernels->cl_complex2real.mutex = _mutex; 00435 new_kernels->cl_cpso.kernel_instance = kernels->cl_cpso.kernel_instance; 00436 new_kernels->cl_cpso.mutex = _mutex; 00437 new_kernels->cl_real.kernel_instance = kernels->cl_real.kernel_instance; 00438 new_kernels->cl_real.mutex = _mutex; 00439 00440 return new_kernels; 00441 } 00442 00446 void clFactory::finalize_cl() { 00447 int i; 00448 for(i=0; i < n_queues; i++){ 00449 clReleaseCommandQueue(queues[i]->getCommandQueue()); 00450 } 00451 00452 context_t *_context; 00453 kernel_set *kernels; 00454 map<cl_context, context_t*>::iterator iter; 00455 for(iter=contexts.begin(); iter != contexts.end(); iter++){ 00456 // Release the kernels. 00457 _context = iter->second; 00458 kernels = _context->kernels; 00459 clReleaseKernel(kernels->cl_generate_phase.kernel_instance); 00460 clReleaseKernel(kernels->cl_power_spec.kernel_instance); 00461 clReleaseKernel(kernels->cl_reduce.kernel_instance); 00462 clReleaseKernel(kernels->cl_multiply_complexarr.kernel_instance); 00463 clReleaseKernel(kernels->cl_resize_psf.kernel_instance); 00464 clReleaseKernel(kernels->cl_multiply_doublearr.kernel_instance); 00465 clReleaseKernel(kernels->cl_multiply_fftw_complex_arrays.kernel_instance); 00466 clReleaseKernel(kernels->cl_calc_cost.kernel_instance); 00467 clReleaseKernel(kernels->cl_adjust_fft.kernel_instance); 00468 clReleaseKernel(kernels->cl_real2complex.kernel_instance); 00469 clReleaseKernel(kernels->cl_complex2real.kernel_instance); 00470 clReleaseKernel(kernels->cl_cpso.kernel_instance); 00471 clReleaseKernel(kernels->cl_real.kernel_instance); 00472 00473 // Release the program. 00474 clReleaseProgram(kernels->program); 00475 00476 delete _context->devices; 00477 delete _context->kernels; 00478 00479 // Release the context. 00480 clReleaseContext(_context->context); 00481 omp_destroy_lock(&kernels->mutex); 00482 00483 delete _context; 00484 } 00485 00486 omp_lock_t *_mutex; 00487 map<cl_device_id, omp_lock_t*>::iterator iter2; 00488 for(iter2=devices.begin(); iter2 != devices.end(); iter2++){ 00489 _mutex = iter2->second; 00490 omp_destroy_lock(_mutex); 00491 delete _mutex; 00492 } 00493 } 00494 00501 void clFactory::startup(){ 00502 startup(NULL); 00503 } 00504 00512 void clFactory::startup(vector<int> *_devices){ 00513 omp_set_lock(&mutex); 00514 n_queues = 0; 00515 n_contexts = 0; 00516 initialize_gpu(_devices); 00517 curr_queue = 0; // The first queue is exclusive for FFT use, so curr_queue is already 0 (meaning the 0 position is in use). 00518 curr_context = 0; 00519 omp_unset_lock(&mutex); 00520 } 00521 00527 void clFactory::shutdown(){ 00528 omp_set_lock(&mutex); 00529 finalize_cl(); 00530 delete context; 00531 for(int i=0; i < n_queues; i++){ 00532 delete queues[i]; 00533 } 00534 omp_unset_lock(&mutex); 00535 00536 omp_destroy_lock(&mutex); 00537 omp_destroy_lock(&mutex_queue); 00538 } 00539 00548 clQueue* clFactory::getQueue(){ 00549 // If the contexts were not yet initialized. 00550 if(n_contexts == 0){ 00551 startup(); // No device filter. 00552 if(n_contexts == 0){ 00553 return NULL; 00554 } 00555 } 00556 int _curr_queue; 00557 00558 omp_set_lock(&mutex_queue); 00559 // If it's necessary to create more queues. 00560 if(++curr_queue == n_queues){ 00561 do{ 00562 // Round-robin over the contexts. 00563 if(++curr_context == n_contexts){ 00564 curr_context = 0; 00565 } 00566 // Create one additional queue for every device present in the next context. 00567 createCommandQueue(contexts[context[curr_context]]); 00568 }while(n_queues == (int)queues.size()); 00569 // Set the correct kernels for the newly created queues. 00570 kernel_set *kernels = contexts[context[curr_context]]->kernels; 00571 omp_lock_t *_mutex; 00572 for(int i=n_queues; i < (int)queues.size(); i++){ 00573 // Set the correct mutex based on the device where the queue was created. 00574 _mutex = devices[queues[i]->getDevice()]; 00575 queues[i]->kernels = clone_kernels(kernels, _mutex); 00576 } 00577 n_queues = queues.size(); 00578 } 00579 _curr_queue = curr_queue; 00580 omp_unset_lock(&mutex_queue); 00581 00582 return queues[_curr_queue]; 00583 } 00584 00590 void clFactory::disposeQueue(clQueue *clqueue){ 00591 omp_set_lock(&mutex_queue); 00592 for(int i=0; i < n_queues; ++i) { 00593 if(queues[i] == clqueue){ 00594 // Move the queue to the last position in the pool, recycling it. 00595 queues.erase(queues.begin() + i); 00596 queues.push_back(clqueue); 00597 curr_queue--; 00598 break; 00599 } 00600 } 00601 omp_unset_lock(&mutex_queue); 00602 } 00603 00611 void clFactory::getDeviceName(clQueue* queue, char *deviceName, int nameSize){ 00612 omp_set_lock(&mutex_queue); 00613 size_t sz; 00614 clGetDeviceInfo(queue->getDevice(), CL_DEVICE_NAME, nameSize, deviceName, &sz); 00615 deviceName[sz] = '\0'; 00616 omp_unset_lock(&mutex_queue); 00617 } 00618 00624 int clFactory::getNumAvailableContexts(){ 00625 return n_contexts; 00626 } 00627 00633 int clFactory::getNumAvailableDevices(){ 00634 return devices.size(); 00635 } 00636 00642 int clFactory::countDevicesFromHardware() { 00643 cl_int errNum; 00644 size_t deviceBufferSize = -1; 00645 int n_devices = 0; 00646 00647 for(int i=0; i < n_contexts; i++){ 00648 // First get the size of the devices buffer 00649 errNum = clGetContextInfo(context[i], CL_CONTEXT_DEVICES, 0, NULL, &deviceBufferSize); 00650 if (errNum == CL_SUCCESS) { 00651 n_devices += deviceBufferSize / sizeof(cl_device_id); 00652 } 00653 } 00654 00655 return n_devices; 00656 } 00657 00664 void clFactory::getDevices(cl_context _context, context_t *context_data){ 00665 context_data->context = _context; 00666 00667 cl_int errNum; 00668 size_t deviceBufferSize = -1; 00669 00670 // First get the size of the devices buffer 00671 errNum = clGetContextInfo(_context, CL_CONTEXT_DEVICES, 0, NULL, &deviceBufferSize); 00672 if (errNum != CL_SUCCESS || deviceBufferSize <= 0) { 00673 context_data->n_devices = 0; 00674 return; 00675 } 00676 00677 // Allocate memory for the devices buffer 00678 context_data->n_devices = deviceBufferSize / sizeof(cl_device_id); 00679 context_data->devices = new cl_device_id[context_data->n_devices]; 00680 // Get the device IDs available in the context (notice that such ids are more like internal pointers but not sequential numbers). 00681 errNum = clGetContextInfo(_context, CL_CONTEXT_DEVICES, deviceBufferSize, context_data->devices, NULL); 00682 if (errNum != CL_SUCCESS) { 00683 context_data->n_devices = 0; 00684 delete context_data->devices; 00685 } 00686 }