PSFEstimationwithCPSO
cl_factory.cpp
00001 /*
00002  * cl_factory.cpp
00003  *
00004  *  Created on: 16/07/2012
00005  *  Author: Peter Frank Perroni (pfperroni@inf.ufpr.br)
00006  */
00007 #include "cl_factory.hpp"
00008 
00009 cl_context *clFactory::context;
00010 omp_lock_t clFactory::mutex;
00011 omp_lock_t clFactory::mutex_queue;
00012 int clFactory::n_contexts;
00013 int clFactory::n_queues;
00014 int clFactory::curr_queue;
00015 int clFactory::curr_context;
00016 vector<clQueue*> clFactory::queues;
00017 map<cl_context, context_t*> clFactory::contexts;
00018 map<cl_device_id, omp_lock_t*> clFactory::devices;
00019 
00029 void clFactory::initialize_gpu(vector<int> *_devices){
00030         omp_init_lock(&mutex);
00031         omp_init_lock(&mutex_queue);
00032 
00033         // Read the .cl file.
00034         char *src = readCl();
00035 
00036         // Create the OpenCL contexts.
00037         createContext(_devices);
00038 
00039         // Create the command queues.
00040         char cBuffer[1024];
00041         int i, j;
00042         context_t *_context;
00043         cl_program program;
00044         kernel_set *kernels;
00045         omp_lock_t *_mutex;
00046         n_queues = 0;
00047         map<cl_context, context_t*>::iterator iter;
00048         for(iter=contexts.begin(); iter != contexts.end(); iter++){
00049                 _context = iter->second;
00050                 createCommandQueue(_context);
00051                 if((int)queues.size() == n_queues){
00052                         continue;
00053                 }
00054 
00055                 // Create the OpenCL program from .cl kernel source for every device available inside the context.
00056                 if ((program=createProgram(_context->context, _context->devices[0], src)) == NULL) {
00057                         // If cannot be compiled in any device, stop the program.
00058                         for(i=0; i < n_queues; i++){
00059                                 clReleaseCommandQueue(queues[i]->getCommandQueue());
00060                         }
00061                         for(i=0; i < n_contexts; i++){
00062                                 clReleaseContext(context[i]);
00063                         }
00064                         for(iter=contexts.begin(); iter != contexts.end(); iter++){
00065                                 delete iter->second->devices;
00066                                 delete iter->second;
00067                         }
00068                         delete context;
00069                         delete src;
00070                         exit(1);
00071                 }
00072                 // Initialize the basic cl environment (like kernels).
00073                 kernels = initialize_cl(_context->context, program);
00074                 // Set the correct kernels and mutex for the newly created queues.
00075                 for(j=n_queues; j < (int)queues.size(); j++){
00076                         _mutex = devices[queues[j]->getDevice()];
00077                         queues[j]->kernels = clone_kernels(kernels, _mutex);
00078                 }
00079                 n_queues = queues.size();
00080         }
00081         delete src;
00082 
00083         if (n_queues == 0) {
00084                 for(i=0; i < n_contexts; i++){
00085                         clReleaseContext(context[i]);
00086                 }
00087                 for(iter=contexts.begin(); iter != contexts.end(); iter++){
00088                         delete iter->second->devices;
00089                         delete iter->second;
00090                 }
00091                 delete context;
00092                 printf("No device found that matches the provided parameters!\n");
00093                 exit(1);
00094         }
00095 
00096         // Print the device information.
00097         // Note that only 1 command queue is created for every device during startup.
00098         for(i=0; i < n_queues; i++){
00099                 getDeviceName(queues[i], cBuffer, 1024);
00100                 printf("Queues created on Device [%s]\n", cBuffer);
00101         }
00102 
00103         // Initialize the FFT context.
00104         // Use the first Context and its command queue for the FFT.
00105         viennacl::ocl::setup_context(0, queues[0]->getContext(), queues[0]->getDevice(), queues[0]->getCommandQueue());
00106         // Retain one additional reference for the first Context and for its command queue,
00107         // to avoid finalization errors.
00108         clRetainContext(queues[0]->getContext());
00109         clRetainCommandQueue(queues[0]->getCommandQueue());
00110 }
00111 
00112 
00124 void clFactory::createContext(vector<int> *_devices) {
00125         cl_int errNum;
00126         cl_uint numPlatforms;
00127         cl_platform_id platformIds[CL_PLATFORMS_TO_TRY];
00128 
00129         // First, select an OpenCL platform to run on.
00130         // This code will try up to CL_PLATFORMS_TO_TRY available platforms.
00131         errNum = clGetPlatformIDs(CL_PLATFORMS_TO_TRY, platformIds, &numPlatforms);
00132         if (errNum != CL_SUCCESS || numPlatforms <= 0) {
00133                 std::cerr << "Failed to find any OpenCL platforms." << std::endl;
00134                 exit(1);
00135         }
00136         if(numPlatforms > CL_PLATFORMS_TO_TRY) numPlatforms = CL_PLATFORMS_TO_TRY;
00137 
00138         // Create OpenCL contexts on the platform for the primary device type (GPU or CPU).
00139         errNum = CL_DEVICE_NOT_FOUND;
00140         cl_context _cl_context[numPlatforms];
00141         int n_found_contexts = 0;
00142         int i;
00143         for(i=0; i < (int)numPlatforms; i++){
00144                 cl_context_properties contextProperties[] = { CL_CONTEXT_PLATFORM, (cl_context_properties) platformIds[i], 0 };
00145                 cl_context _context = clCreateContextFromType(contextProperties, PRIMARY_DEVICE_TYPE, NULL, NULL, &errNum);
00146                 if(errNum == CL_SUCCESS){
00147                         _cl_context[n_found_contexts++] = _context;
00148                 }
00149         }
00150         // If all primary devices have failed, try to create it on the secondary device type.
00151         if(n_found_contexts == 0){
00152                 for(i=0; i < (int)numPlatforms && errNum != CL_SUCCESS; i++){
00153                         cl_context_properties contextProperties[] = { CL_CONTEXT_PLATFORM, (cl_context_properties) platformIds[i], 0 };
00154                         cl_context _context = clCreateContextFromType(contextProperties, SECONDARY_DEVICE_TYPE, NULL, NULL, &errNum);
00155                         if(errNum == CL_SUCCESS){
00156                                 _cl_context[n_found_contexts++] = _context;
00157                         }
00158                 }
00159         }
00160         if (n_found_contexts == 0) {
00161                 std::cerr << "Failed to create an OpenCL GPU or CPU context." << std::endl;
00162                 exit(1);
00163         }
00164 
00165         // Determine the devices that will compose each context.
00166         context_t *_context;
00167         int j, k, new_count, curr_device = 1;
00168         for(i=0; i < n_found_contexts; i++){
00169                 _context = new context_t;
00170                 getDevices(_cl_context[i], _context);
00171                 // Only store the context if it contains at least 1 device available.
00172                 if(_context->n_devices > 0){
00173                         vector<cl_device_id> saved_devices;
00174                         // If filter the devices.
00175                         if(_devices != NULL){
00176                                 // For all devices within this context.
00177                                 for(j=0; j < _context->n_devices; j++, curr_device++){
00178                                         // For all devices to be filtered.
00179                                         for(k=0; k < (int)_devices->size(); k++){
00180                                                 if(curr_device == _devices->at(k)){
00181                                                         saved_devices.push_back(_context->devices[j]);
00182                                                 }
00183                                         }
00184                                 }
00185                                 new_count = saved_devices.size();
00186                                 // If any device within this context shall be used.
00187                                 if(new_count > 0){
00188                                         // If any device was discarded, rebuild the pointer.
00189                                         if(new_count != _context->n_devices){
00190                                                 _context->n_devices = new_count;
00191                                                 delete _context->devices;
00192                                                 _context->devices = new cl_device_id[_context->n_devices];
00193                                                 for(j=0; j < _context->n_devices; j++){
00194                                                         _context->devices[j] = saved_devices[j];
00195                                                 }
00196                                         }
00197                                 }
00198                                 else{
00199                                         // Ignore this context.
00200                                         delete _context->devices;
00201                                         delete _context;
00202                                         continue;
00203                                 }
00204                         }
00205                         n_contexts++;
00206                         contexts[_cl_context[i]] = _context;
00207                 }
00208                 else{
00209                         delete _context;
00210                 }
00211         }
00212         // Store the cl_context's separately for easier manipulation.
00213         context = new cl_context[n_contexts];
00214         map<cl_context, context_t*>::iterator iter;
00215         for(i=0, iter=contexts.begin(); iter != contexts.end(); iter++, i++){
00216                 context[i] = iter->first;
00217         }
00218 }
00219 
00226 void clFactory::createCommandQueue(context_t *_context) {
00227         cl_int errNum;
00228         omp_lock_t *_mutex;
00229         cl_command_queue commandQueue = NULL;
00230         cl_command_queue_properties prop = CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE;
00231 #ifdef _PROFILING_
00232         prop |= CL_QUEUE_PROFILING_ENABLE;
00233 #endif
00234 
00235         for(int i=0; i < _context->n_devices; i++){
00236                 commandQueue = clCreateCommandQueue(_context->context, _context->devices[i], prop, &errNum);
00237                 if (errNum != CL_SUCCESS) {
00238                         std::cerr << "Failed to create command queue, errno=" << errNum << "." << endl;
00239                 }
00240                 else if (commandQueue != NULL) {
00241                         // Add the command queue to the pool of queues.
00242                         queues.push_back(new clQueue(_context->context, _context->devices[i], commandQueue));
00243                         // Create one single mutex per device.
00244                         if((_mutex = devices[_context->devices[i]]) == NULL){
00245                                 _mutex = new omp_lock_t;
00246                                 omp_init_lock(_mutex);
00247                                 devices[_context->devices[i]] = _mutex;
00248                         }
00249                 }
00250         }
00251 }
00252 
00258 char* clFactory::readCl(){
00259         FILE *fp = fopen("src/kippe_aux.cl", "r");
00260         if (!fp) {
00261                 cerr << "Failed to open .cl for reading." << std::endl;
00262                 return NULL;
00263         }
00264         fseek(fp, 0, SEEK_END);
00265         int sz1 = ftell(fp);
00266         rewind(fp);
00267         char *src = new char[sz1];
00268         int sz2 = fread(src, 1, sz1, fp);
00269         fclose(fp);
00270         if (sz1 != sz2) {
00271                 cerr << "Fail reading .cl." << std::endl;
00272                 delete src;
00273                 return NULL;
00274         }
00275         return src;
00276 }
00277 
00287 cl_program clFactory::createProgram(cl_context _context, cl_device_id _device, char *src) {
00288         cl_int errNum;
00289         // Create a cl_program into the context, meaning that every device present inside the context will have the compiled kernels.
00290         cl_program program = clCreateProgramWithSource(_context, 1, (const char**) &src, NULL, &errNum);
00291         if (program == NULL || errNum != CL_SUCCESS) {
00292                 cerr << "Failed to create CL program from source (" << errNum << ")." << endl;
00293                 return NULL;
00294         }
00295 
00296         //Passing parameters to the cl file.
00297         ostringstream oss;
00298 #ifdef _DOUBLE_WORD_
00299         oss << "-D_DOUBLE_WORD_ -DWORD=double -DFFT_TYPE=double2 ";
00300 #else
00301         oss << "-DWORD=float -DFFT_TYPE=float2 ";
00302 #endif
00303         oss << "-DREDUCTION_BLOCKSZ=" << REDUCTION_BLOCKSZ << " -DREDUCTION_NBLOCKS=" << REDUCTION_NBLOCKS
00304                         << " -DPSF_CORNER=" << PSF_CORNER << " -DPSF_CENTER=" << PSF_CENTER << " -DPSF_EXTRACT=" << PSF_EXTRACT
00305                         << " -DWORD_MAX=" << WORD_MAX << " -DINT_MAX=" << INT_MAX << " -DRAND_MAX=" << RAND_MAX;
00306 
00307         // Build the program inside the devices under the context.
00308         errNum = clBuildProgram(program, 0, NULL, oss.str().c_str(), NULL, NULL);
00309         if (errNum != CL_SUCCESS) {
00310                 size_t ret_val_size;
00311                 // Get the error message from inside the device.
00312                 //----------------------------------------------
00313                 // Check the error message size.
00314                 clGetProgramBuildInfo(program, _device, CL_PROGRAM_BUILD_LOG, 0, NULL, &ret_val_size);
00315                 // Alocates only the memory needed.
00316                 char *build_log = (char*)malloc(ret_val_size+1);
00317                 // Read the error text.
00318                 clGetProgramBuildInfo(program, _device, CL_PROGRAM_BUILD_LOG, ret_val_size, build_log, NULL);
00319 
00320                 // The \0 is not mandatory in the OpenCl specification, thus we add it here for safety.
00321                 build_log[ret_val_size] = '\0';
00322 
00323                 printf("BUILD LOG: \n%s\n", build_log);
00324                 free(build_log);
00325 
00326                 return NULL;
00327         }
00328         return program;
00329 }
00330 
00339 cl_kernel clFactory::createKernel(cl_program program, const char *kernel_name) {
00340         cl_kernel kernel = clCreateKernel(program, kernel_name, NULL);
00341         if (kernel == NULL){
00342                 std::cerr << "Failed to create kernel" << kernel_name << endl;
00343                 finalize_cl();
00344                 exit(1);
00345         }
00346         return kernel;
00347 }
00348 
00360 kernel_set* clFactory::initialize_cl(cl_context context, cl_program program) {
00361         kernel_set *kernels = new kernel_set;
00362         kernels->program = program;
00363         kernels->cl_generate_phase.kernel_instance = createKernel(program, "cl_generate_phase");
00364         kernels->cl_generate_phase.mutex = &kernels->mutex;  // For now, let's keep one single kernel mutex for the entire context.
00365         kernels->cl_power_spec.kernel_instance = createKernel(program, "cl_power_spec");
00366         kernels->cl_power_spec.mutex = &kernels->mutex;
00367         kernels->cl_reduce.kernel_instance = createKernel(program, "cl_reduce");
00368         kernels->cl_reduce.mutex = &kernels->mutex;
00369         kernels->cl_multiply_complexarr.kernel_instance = createKernel(program, "cl_multiply_complexarr");
00370         kernels->cl_multiply_complexarr.mutex = &kernels->mutex;
00371         kernels->cl_resize_psf.kernel_instance = createKernel(program, "cl_resize_psf");
00372         kernels->cl_resize_psf.mutex = &kernels->mutex;
00373         kernels->cl_multiply_doublearr.kernel_instance = createKernel(program, "cl_multiply_doublearr");
00374         kernels->cl_multiply_doublearr.mutex = &kernels->mutex;
00375         kernels->cl_multiply_fftw_complex_arrays.kernel_instance = createKernel(program, "cl_multiply_fftw_complex_arrays");
00376         kernels->cl_multiply_fftw_complex_arrays.mutex = &kernels->mutex;
00377         kernels->cl_calc_cost.kernel_instance = createKernel(program, "cl_calc_cost");
00378         kernels->cl_calc_cost.mutex = &kernels->mutex;
00379         kernels->cl_calc_mismatch.kernel_instance = createKernel(program, "cl_calc_mismatch");
00380         kernels->cl_calc_mismatch.mutex = &kernels->mutex;
00381         kernels->cl_adjust_fft.kernel_instance = createKernel(program, "cl_adjust_fft");
00382         kernels->cl_adjust_fft.mutex = &kernels->mutex;
00383         kernels->cl_real2complex.kernel_instance = createKernel(program, "cl_real2complex");
00384         kernels->cl_real2complex.mutex = &kernels->mutex;
00385         kernels->cl_complex2real.kernel_instance = createKernel(program, "cl_complex2real");
00386         kernels->cl_complex2real.mutex = &kernels->mutex;
00387         kernels->cl_cpso.kernel_instance = createKernel(program, "cl_cpso");
00388         kernels->cl_cpso.mutex = &kernels->mutex;
00389         kernels->cl_real.kernel_instance = createKernel(program, "cl_real");
00390         kernels->cl_real.mutex = &kernels->mutex;
00391 
00392         // Store the kernels separately so they can be finalized at the end.
00393         contexts[context]->kernels = kernels;
00394         return kernels;
00395 }
00396 
00407 kernel_set* clFactory::clone_kernels(kernel_set *kernels, omp_lock_t *_mutex) {
00408         kernel_set *new_kernels = new kernel_set;
00409         new_kernels->mutex = *_mutex;
00410         new_kernels->program = kernels->program;
00411         new_kernels->cl_generate_phase.kernel_instance = kernels->cl_generate_phase.kernel_instance;
00412         new_kernels->cl_generate_phase.mutex = _mutex;
00413         new_kernels->cl_power_spec.kernel_instance = kernels->cl_power_spec.kernel_instance;
00414         new_kernels->cl_power_spec.mutex = _mutex;
00415         new_kernels->cl_reduce.kernel_instance = kernels->cl_reduce.kernel_instance;
00416         new_kernels->cl_reduce.mutex = _mutex;
00417         new_kernels->cl_multiply_complexarr.kernel_instance = kernels->cl_multiply_complexarr.kernel_instance;
00418         new_kernels->cl_multiply_complexarr.mutex = _mutex;
00419         new_kernels->cl_resize_psf.kernel_instance = kernels->cl_resize_psf.kernel_instance;
00420         new_kernels->cl_resize_psf.mutex = _mutex;
00421         new_kernels->cl_multiply_doublearr.kernel_instance = kernels->cl_multiply_doublearr.kernel_instance;
00422         new_kernels->cl_multiply_doublearr.mutex = _mutex;
00423         new_kernels->cl_multiply_fftw_complex_arrays.kernel_instance = kernels->cl_multiply_fftw_complex_arrays.kernel_instance;
00424         new_kernels->cl_multiply_fftw_complex_arrays.mutex = _mutex;
00425         new_kernels->cl_calc_cost.kernel_instance = kernels->cl_calc_cost.kernel_instance;
00426         new_kernels->cl_calc_cost.mutex = _mutex;
00427         new_kernels->cl_calc_mismatch.kernel_instance = kernels->cl_calc_mismatch.kernel_instance;
00428         new_kernels->cl_calc_mismatch.mutex = _mutex;
00429         new_kernels->cl_adjust_fft.kernel_instance = kernels->cl_adjust_fft.kernel_instance;
00430         new_kernels->cl_adjust_fft.mutex = _mutex;
00431         new_kernels->cl_real2complex.kernel_instance = kernels->cl_real2complex.kernel_instance;
00432         new_kernels->cl_real2complex.mutex = _mutex;
00433         new_kernels->cl_complex2real.kernel_instance = kernels->cl_complex2real.kernel_instance;
00434         new_kernels->cl_complex2real.mutex = _mutex;
00435         new_kernels->cl_cpso.kernel_instance = kernels->cl_cpso.kernel_instance;
00436         new_kernels->cl_cpso.mutex = _mutex;
00437         new_kernels->cl_real.kernel_instance = kernels->cl_real.kernel_instance;
00438         new_kernels->cl_real.mutex = _mutex;
00439 
00440         return new_kernels;
00441 }
00442 
00446 void clFactory::finalize_cl() {
00447         int i;
00448         for(i=0; i < n_queues; i++){
00449                 clReleaseCommandQueue(queues[i]->getCommandQueue());
00450         }
00451 
00452         context_t *_context;
00453         kernel_set *kernels;
00454         map<cl_context, context_t*>::iterator iter;
00455         for(iter=contexts.begin(); iter != contexts.end(); iter++){
00456                 // Release the kernels.
00457                 _context = iter->second;
00458                 kernels = _context->kernels;
00459                 clReleaseKernel(kernels->cl_generate_phase.kernel_instance);
00460                 clReleaseKernel(kernels->cl_power_spec.kernel_instance);
00461                 clReleaseKernel(kernels->cl_reduce.kernel_instance);
00462                 clReleaseKernel(kernels->cl_multiply_complexarr.kernel_instance);
00463                 clReleaseKernel(kernels->cl_resize_psf.kernel_instance);
00464                 clReleaseKernel(kernels->cl_multiply_doublearr.kernel_instance);
00465                 clReleaseKernel(kernels->cl_multiply_fftw_complex_arrays.kernel_instance);
00466                 clReleaseKernel(kernels->cl_calc_cost.kernel_instance);
00467                 clReleaseKernel(kernels->cl_adjust_fft.kernel_instance);
00468                 clReleaseKernel(kernels->cl_real2complex.kernel_instance);
00469                 clReleaseKernel(kernels->cl_complex2real.kernel_instance);
00470                 clReleaseKernel(kernels->cl_cpso.kernel_instance);
00471                 clReleaseKernel(kernels->cl_real.kernel_instance);
00472 
00473                 // Release the program.
00474                 clReleaseProgram(kernels->program);
00475 
00476                 delete _context->devices;
00477                 delete _context->kernels;
00478 
00479                 // Release the context.
00480                 clReleaseContext(_context->context);
00481                 omp_destroy_lock(&kernels->mutex);
00482 
00483                 delete _context;
00484         }
00485 
00486         omp_lock_t *_mutex;
00487         map<cl_device_id, omp_lock_t*>::iterator iter2;
00488         for(iter2=devices.begin(); iter2 != devices.end(); iter2++){
00489                 _mutex = iter2->second;
00490                 omp_destroy_lock(_mutex);
00491                 delete _mutex;
00492         }
00493 }
00494 
00501 void clFactory::startup(){
00502         startup(NULL);
00503 }
00504 
00512 void clFactory::startup(vector<int> *_devices){
00513         omp_set_lock(&mutex);
00514         n_queues = 0;
00515         n_contexts = 0;
00516         initialize_gpu(_devices);
00517         curr_queue = 0; // The first queue is exclusive for FFT use, so curr_queue is already 0 (meaning the 0 position is in use).
00518         curr_context = 0;
00519         omp_unset_lock(&mutex);
00520 }
00521 
00527 void clFactory::shutdown(){
00528         omp_set_lock(&mutex);
00529         finalize_cl();
00530         delete context;
00531         for(int i=0; i < n_queues; i++){
00532                 delete queues[i];
00533         }
00534         omp_unset_lock(&mutex);
00535 
00536         omp_destroy_lock(&mutex);
00537         omp_destroy_lock(&mutex_queue);
00538 }
00539 
00548 clQueue* clFactory::getQueue(){
00549         // If the contexts were not yet initialized.
00550         if(n_contexts == 0){
00551                 startup(); // No device filter.
00552                 if(n_contexts == 0){
00553                         return NULL;
00554                 }
00555         }
00556         int _curr_queue;
00557 
00558         omp_set_lock(&mutex_queue);
00559         // If it's necessary to create more queues.
00560         if(++curr_queue == n_queues){
00561                 do{
00562                         // Round-robin over the contexts.
00563                         if(++curr_context == n_contexts){
00564                                 curr_context = 0;
00565                         }
00566                         // Create one additional queue for every device present in the next context.
00567                         createCommandQueue(contexts[context[curr_context]]);
00568                 }while(n_queues == (int)queues.size());
00569                 // Set the correct kernels for the newly created queues.
00570                 kernel_set *kernels = contexts[context[curr_context]]->kernels;
00571                 omp_lock_t *_mutex;
00572                 for(int i=n_queues; i < (int)queues.size(); i++){
00573                         // Set the correct mutex based on the device where the queue was created.
00574                         _mutex = devices[queues[i]->getDevice()];
00575                         queues[i]->kernels = clone_kernels(kernels, _mutex);
00576                 }
00577                 n_queues = queues.size();
00578         }
00579         _curr_queue = curr_queue;
00580         omp_unset_lock(&mutex_queue);
00581 
00582         return queues[_curr_queue];
00583 }
00584 
00590 void clFactory::disposeQueue(clQueue *clqueue){
00591         omp_set_lock(&mutex_queue);
00592         for(int i=0; i < n_queues; ++i) {
00593                 if(queues[i] == clqueue){
00594                         // Move the queue to the last position in the pool, recycling it.
00595                         queues.erase(queues.begin() + i);
00596                         queues.push_back(clqueue);
00597                         curr_queue--;
00598                         break;
00599                 }
00600         }
00601         omp_unset_lock(&mutex_queue);
00602 }
00603 
00611 void clFactory::getDeviceName(clQueue* queue, char *deviceName, int nameSize){
00612         omp_set_lock(&mutex_queue);
00613         size_t sz;
00614         clGetDeviceInfo(queue->getDevice(), CL_DEVICE_NAME, nameSize, deviceName, &sz);
00615         deviceName[sz] = '\0';
00616         omp_unset_lock(&mutex_queue);
00617 }
00618 
00624 int clFactory::getNumAvailableContexts(){
00625         return n_contexts;
00626 }
00627 
00633 int clFactory::getNumAvailableDevices(){
00634         return devices.size();
00635 }
00636 
00642 int clFactory::countDevicesFromHardware() {
00643         cl_int errNum;
00644         size_t deviceBufferSize = -1;
00645         int n_devices = 0;
00646 
00647         for(int i=0; i < n_contexts; i++){
00648                 // First get the size of the devices buffer
00649                 errNum = clGetContextInfo(context[i], CL_CONTEXT_DEVICES, 0, NULL, &deviceBufferSize);
00650                 if (errNum == CL_SUCCESS) {
00651                         n_devices += deviceBufferSize / sizeof(cl_device_id);
00652                 }
00653         }
00654 
00655         return n_devices;
00656 }
00657 
00664 void clFactory::getDevices(cl_context _context, context_t *context_data){
00665         context_data->context = _context;
00666 
00667         cl_int errNum;
00668         size_t deviceBufferSize = -1;
00669 
00670         // First get the size of the devices buffer
00671         errNum = clGetContextInfo(_context, CL_CONTEXT_DEVICES, 0, NULL, &deviceBufferSize);
00672         if (errNum != CL_SUCCESS || deviceBufferSize <= 0) {
00673                 context_data->n_devices = 0;
00674                 return;
00675         }
00676 
00677         // Allocate memory for the devices buffer
00678         context_data->n_devices = deviceBufferSize / sizeof(cl_device_id);
00679         context_data->devices = new cl_device_id[context_data->n_devices];
00680         // Get the device IDs available in the context (notice that such ids are more like internal pointers but not sequential numbers).
00681         errNum = clGetContextInfo(_context, CL_CONTEXT_DEVICES, deviceBufferSize, context_data->devices, NULL);
00682         if (errNum != CL_SUCCESS) {
00683                 context_data->n_devices = 0;
00684                 delete context_data->devices;
00685         }
00686 }
 All Classes Functions