/* * render_opencl.c * * Created on: 26.01.2018 * Author: Superleo1810 */ #include "render_opencl.h" void idle_opencl_dummy(void) { glutPostRedisplay(); } void init_opencl(OpenCLConfig *config) { x_min_s_cl = -2.0; x_max_s_cl = 1.0; y_min_s_cl = -1.0; y_max_s_cl = 1.0; x_min_cl = x_min_s_cl; x_max_cl = x_max_s_cl; y_min_cl = y_min_s_cl; y_max_cl = y_max_s_cl; config_opencl = config; output = (cl_uint *) malloc((config_opencl->width) * (config_opencl->height) * sizeof(cl_uchar4)); context = NULL; width_cl = config_opencl->width; // Leave it in, cl needs different endianness cl_platform_id platform_id; cl_uint ret_num_devices; cl_uint ret_num_platforms; size_t device_list_size; cl_program program; size_t cl_src_sz; clGetPlatformIDs(1, &platform_id, &ret_num_platforms); clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_DEFAULT, 1, &device_id, &ret_num_devices); context = clCreateContext(NULL, 1, &device_id, NULL, NULL, &ret); ret = clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL, &device_list_size); devices = (cl_device_id *)malloc(device_list_size); num_devices = (cl_uint)(device_list_size/sizeof(cl_device_id)); clGetContextInfo(context, CL_CONTEXT_DEVICES, device_list_size, devices, NULL); FILE *fp; char *cl_src, *path, *flags = (char *)malloc(200 * sizeof(char)); flags[0] = '\0'; switch(config_opencl->fpu) { case OPENCL_FPU_32: switch(config_opencl->set_func) { case SFUNC_JULIA: path = "cl/julia32.cl"; break; case SFUNC_MANDELBROT: default: path = "cl/mandelbrot32.cl"; break; } break; case OPENCL_FPU_64: switch(config_opencl->set_func) { case SFUNC_JULIA: path = "cl/julia64.cl"; break; case SFUNC_MANDELBROT: default: path = "cl/mandelbrot64.cl"; break; } int khrFP64 = 0; int amdFP64 = 0; for (cl_uint i = 0; i < num_devices; i++) { char deviceExtensions[8192]; ret = clGetDeviceInfo(devices[i], CL_DEVICE_EXTENSIONS, sizeof(deviceExtensions), deviceExtensions, 0); if (strstr(deviceExtensions, "cl_khr_fp64")) { khrFP64++; } else { if (strstr(deviceExtensions, "cl_amd_fp64")) { amdFP64++; } } } if (khrFP64 == num_devices) { flags = strcat(flags, "-D KHR_DP_EXTENSION "); } else if (amdFP64 == num_devices) { flags = strcat(flags, ""); } break; case OPENCL_FPU_128: printf("128 bit precision not implemented yet\n"); break; } cl_src = (char *)malloc(MAX_SOURCE_SIZE * sizeof(char)); fp = fopen(path, "r"); cl_src_sz = fread(cl_src, 1, MAX_SOURCE_SIZE, fp); fclose(fp); for (cl_uint i = 0; i < num_devices; i++) { cl_command_queue_properties prop = 0; // if (sampleArgs->timing) // { // prop |= CL_QUEUE_PROFILING_ENABLE; // } commandQueue[i] = clCreateCommandQueue(context, devices[i], prop, &ret); outputBuffer[i] = clCreateBuffer(context, CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR, (sizeof(cl_uint) * (config_opencl->width) * (config_opencl->height)) / num_devices, NULL, &ret); } if (config_opencl->fma) { flags = strcat(flags, "-D MUL_ADD=fma "); } else { flags = strcat(flags, "-D MUL_ADD=mad "); } printf("flags: %s\n", flags); program = clCreateProgramWithSource(context, 1, (const char **)&cl_src, (const size_t *)&cl_src_sz, &ret); ret = clBuildProgram(program, num_devices, devices, flags, NULL, NULL); for (cl_uint i = 0; i < num_devices; i++) { kernel_vector[i] = clCreateKernel(program, "calculate", &ret); } } void render_opencl(void) { cl_event events[MAX_DEVICES]; cl_int eventStatus = CL_QUEUED; size_t globalThreads[1]; size_t localThreads[1]; size_t kernelWorkGroupSize; cl_kernel kernel; cl_double y_max_t; cl_float y_max_t_f; cl_double x_delta = ((x_max_cl - x_min_cl) / (double) config_opencl->width); cl_double y_delta = -((y_max_cl - y_min_cl) / (double) config_opencl->height); cl_float x_delta_f = (float) x_delta; cl_float y_delta_f = (float) y_delta; cl_float x_min_f = (float) x_min_cl; globalThreads[0] = ((config_opencl->width) * (config_opencl->height)) / num_devices; localThreads[0] = 256; globalThreads[0] >>= 2; for (cl_uint i = 0; i < num_devices; i++) { kernel = kernel_vector[i]; ret = clGetKernelWorkGroupInfo(kernel, devices[i], CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &kernelWorkGroupSize, 0); if ((cl_uint) (localThreads[0]) > kernelWorkGroupSize) { localThreads[0] = kernelWorkGroupSize; } y_max_t = (((y_min_cl + y_max_cl) / 2.0) + (y_max_cl - y_min_cl) / 2.0 - ((double) i * (y_max_cl - y_min_cl)) / (double) num_devices); ret = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *) &outputBuffer[i]); y_max_t_f = (float) y_max_t; //printf("x_delta: %f, y_delta: %f, x_delta_f: %f, y_delta_f: %f, x_min_f: %f, y_max_t: %f, y_max_t_f: %f\n", x_delta, y_delta, x_delta_f, y_delta_f, x_min_f, y_max_t, y_max_t_f); switch (config_opencl->fpu) { case OPENCL_FPU_32: // lel ret = clSetKernelArg(kernel, 1, sizeof(cl_float), (void *) &x_min_f); ret = clSetKernelArg(kernel, 2, sizeof(cl_float), (void *) &y_max_t_f); ret = clSetKernelArg(kernel, 3, sizeof(cl_float), (void *) &x_delta_f); ret = clSetKernelArg(kernel, 4, sizeof(cl_float), (void *) &y_delta_f); break; case OPENCL_FPU_64: ret = clSetKernelArg(kernel, 1, sizeof(cl_double), (void *) &x_min_cl); ret = clSetKernelArg(kernel, 2, sizeof(cl_double), (void *) &y_max_t); ret = clSetKernelArg(kernel, 3, sizeof(cl_double), (void *) &x_delta); ret = clSetKernelArg(kernel, 4, sizeof(cl_double), (void *) &y_delta); break; case OPENCL_FPU_128: break; } ret = clSetKernelArg(kernel, 5, sizeof(cl_uint), (void *) &config_opencl->iterations); ret = clSetKernelArg(kernel, 6, sizeof(cl_int), (void *) &width_cl); ret = clEnqueueNDRangeKernel(commandQueue[i], kernel, 1, NULL, globalThreads, localThreads, 0, NULL, &events[i]); } for (cl_uint i = 0; i < num_devices; i++) { ret = clFlush(commandQueue[i]); } for (cl_uint i = 0; i < num_devices; i++) { ret = clWaitForEvents(1, &events[num_devices - i - 1]); ret = clReleaseEvent(events[num_devices - i - 1]); } for (cl_uint i = 0; i < num_devices; i++) { ret = clEnqueueReadBuffer(commandQueue[i], outputBuffer[i], CL_FALSE, 0, (config_opencl->width * config_opencl->height * sizeof(cl_int)) / num_devices, config_opencl->arr + (config_opencl->width * config_opencl->height / num_devices) * i, 0, NULL, &events[i]); } for (cl_uint i = 0; i < num_devices; i++) { ret = clFlush(commandQueue[i]); } for (cl_uint i = 0; i < num_devices; i++) { ret = clWaitForEvents(1, &events[num_devices - i - 1]); ret = clReleaseEvent(events[num_devices - i - 1]); } glBindTexture(GL_TEXTURE_2D, config_opencl->tex); glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, config_opencl->width, config_opencl->height, 0, GL_RGBA, GL_UNSIGNED_BYTE, config_opencl->arr); glBindTexture(GL_TEXTURE_2D, 0); glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); glBindTexture(GL_TEXTURE_2D, config_opencl->tex); glEnable(GL_TEXTURE_2D); glBegin(GL_QUADS); glTexCoord2i(0, 0); glVertex2i(0, 0); glTexCoord2i(0, 1); glVertex2i(0, config_opencl->height); glTexCoord2i(1, 1); glVertex2i(config_opencl->width, config_opencl->height); glTexCoord2i(1, 0); glVertex2i(config_opencl->width, 0); glEnd(); glDisable(GL_TEXTURE_2D); glBindTexture(GL_TEXTURE_2D, 0); glutSwapBuffers(); } void idle_opencl(void) { int t = 0, delta = 0; do { t = glutGet(GLUT_ELAPSED_TIME); delta = t - t_old_opencl; } while (delta < 16); // TODO: Hardcoded FPS t_old_opencl = t; //glGenTextures(1, &tex); glBindTexture(GL_TEXTURE_2D, config_opencl->tex); //glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); //glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, config_opencl->width, config_opencl->height, 0, GL_RGBA, GL_UNSIGNED_BYTE, config_opencl->arr); glBindTexture(GL_TEXTURE_2D, 0); cl_ft += (config_opencl->speed * (delta / 1000.0)); x_min_cl = x_min_s_cl + config_opencl->zoom_func(cl_ft, (cl_double) 2.0 + config_opencl->to_x); y_min_cl = y_min_s_cl + config_opencl->zoom_func(cl_ft, (cl_double) 1.0 + config_opencl->to_y); x_max_cl = x_max_s_cl - config_opencl->zoom_func(cl_ft, (cl_double) 1.0 - config_opencl->to_x); y_max_cl = y_max_s_cl - config_opencl->zoom_func(cl_ft, (cl_double) 1.0 - config_opencl->to_y); glutPostRedisplay(); } void keyboard_opencl(unsigned char key, int mouseX, int mouseY) { switch (key) { case 'i': config_opencl->iterations++; break; case 'd': config_opencl->iterations--; break; } } void mouse_opencl(int button, int state, int x, int y) { if (state == GLUT_DOWN) { switch (button) { case GLUT_LEFT_BUTTON: if (config_opencl->speed < 0) config_opencl->speed = (-1) * config_opencl->speed; break; case GLUT_RIGHT_BUTTON: if (config_opencl->speed > 0) config_opencl->speed = (-1) * config_opencl->speed; break; } config_opencl->to_x = x_min_cl + ((d64) x * (x_max_cl - x_min_cl)) / config_opencl->width; config_opencl->to_y = y_min_cl + ((d64) y * (y_max_cl - y_min_cl)) / config_opencl->height; t_old_opencl = glutGet(GLUT_ELAPSED_TIME); glutIdleFunc(idle_opencl); } else if(state == GLUT_UP) { glutIdleFunc(idle_opencl_dummy); } }