aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/cl/mandelbrot32.cl196
-rw-r--r--src/cl/mandelbrot64.cl230
-rw-r--r--src/defs.h8
-rw-r--r--src/mandelbrot-zoom.c112
-rw-r--r--src/mandelbrot-zoom.h5
-rw-r--r--src/render.c111
-rw-r--r--src/render.h44
-rw-r--r--src/render_cpu.c99
-rw-r--r--src/render_cpu.h61
-rw-r--r--src/render_opencl.c297
-rw-r--r--src/render_opencl.h75
-rw-r--r--src/sets.c14
-rw-r--r--src/sets.h16
13 files changed, 1086 insertions, 182 deletions
diff --git a/src/cl/mandelbrot32.cl b/src/cl/mandelbrot32.cl
new file mode 100644
index 0000000..cda933f
--- /dev/null
+++ b/src/cl/mandelbrot32.cl
@@ -0,0 +1,196 @@
+__kernel void calculate(__global uchar4 * mandelbrotImage, const float posx, const float posy, const float stepSizeX, const float stepSizeY, const uint maxIterations, const uint width)
+{
+ int tid = get_global_id(0);
+
+ int i = tid % (width / 4);
+ int j = tid / (width / 4);
+
+ int4 veci = {4 * i, 4 * i + 1, 4 * i + 2, 4 * i + 3};
+ int4 vecj = {j, j, j, j};
+
+ float4 x0;
+ x0.s0 = (float)(posx + stepSizeX * (float)veci.s0);
+ x0.s1 = (float)(posx + stepSizeX * (float)veci.s1);
+ x0.s2 = (float)(posx + stepSizeX * (float)veci.s2);
+ x0.s3 = (float)(posx + stepSizeX * (float)veci.s3);
+ float4 y0;
+ y0.s0 = (float)(posy + stepSizeY * (float)vecj.s0);
+ y0.s1 = (float)(posy + stepSizeY * (float)vecj.s1);
+ y0.s2 = (float)(posy + stepSizeY * (float)vecj.s2);
+ y0.s3 = (float)(posy + stepSizeY * (float)vecj.s3);
+
+ float4 x = x0;
+ float4 y = y0;
+
+ uint iter=0;
+ float4 tmp;
+ int4 stay;
+ int4 ccount = 0;
+
+ stay.s0 = (x.s0 * x.s0 + y.s0 * y.s0) <= 4.0f;
+ stay.s1 = (x.s1 * x.s1 + y.s1 * y.s1) <= 4.0f;
+ stay.s2 = (x.s2 * x.s2 + y.s2 * y.s2) <= 4.0f;
+ stay.s3 = (x.s3 * x.s3 + y.s3 * y.s3) <= 4.0f;
+ float4 savx = x;
+ float4 savy = y;
+ for(iter=0; (stay.s0 | stay.s1 | stay.s2 | stay.s3) && (iter < maxIterations); iter+= 16)
+ {
+ x = savx;
+ y = savy;
+
+ // Two iterations
+ tmp = MUL_ADD(-y, y, MUL_ADD(x, x, x0));
+ y = MUL_ADD(2.0f * x, y, y0);
+ x = MUL_ADD(-y, y, MUL_ADD(tmp, tmp, x0));
+ y = MUL_ADD(2.0f * tmp, y, y0);
+
+ // Two iterations
+ tmp = MUL_ADD(-y, y, MUL_ADD(x, x, x0));
+ y = MUL_ADD(2.0f * x, y, y0);
+ x = MUL_ADD(-y, y, MUL_ADD(tmp, tmp, x0));
+ y = MUL_ADD(2.0f * tmp, y, y0);
+
+ // Two iterations
+ tmp = MUL_ADD(-y, y, MUL_ADD(x, x, x0));
+ y = MUL_ADD(2.0f * x, y, y0);
+ x = MUL_ADD(-y, y, MUL_ADD(tmp, tmp, x0));
+ y = MUL_ADD(2.0f * tmp, y, y0);
+
+ // Two iterations
+ tmp = MUL_ADD(-y, y, MUL_ADD(x, x, x0));
+ y = MUL_ADD(2.0f * x, y, y0);
+ x = MUL_ADD(-y, y, MUL_ADD(tmp, tmp, x0));
+ y = MUL_ADD(2.0f * tmp, y, y0);
+
+ // Two iterations
+ tmp = MUL_ADD(-y, y, MUL_ADD(x, x, x0));
+ y = MUL_ADD(2.0f * x, y, y0);
+ x = MUL_ADD(-y, y, MUL_ADD(tmp, tmp, x0));
+ y = MUL_ADD(2.0f * tmp, y, y0);
+
+ // Two iterations
+ tmp = MUL_ADD(-y, y, MUL_ADD(x, x, x0));
+ y = MUL_ADD(2.0f * x, y, y0);
+ x = MUL_ADD(-y, y, MUL_ADD(tmp, tmp, x0));
+ y = MUL_ADD(2.0f * tmp, y, y0);
+
+ // Two iterations
+ tmp = MUL_ADD(-y, y, MUL_ADD(x, x, x0));
+ y = MUL_ADD(2.0f * x, y, y0);
+ x = MUL_ADD(-y, y, MUL_ADD(tmp, tmp, x0));
+ y = MUL_ADD(2.0f * tmp, y, y0);
+
+ // Two iterations
+ tmp = MUL_ADD(-y, y, MUL_ADD(x, x, x0));
+ y = MUL_ADD(2.0f * x, y, y0);
+ x = MUL_ADD(-y, y, MUL_ADD(tmp, tmp, x0));
+ y = MUL_ADD(2.0f * tmp, y, y0);
+
+ stay.s0 = (x.s0 * x.s0 + y.s0 * y.s0) <= 4.0f;
+ stay.s1 = (x.s1 * x.s1 + y.s1 * y.s1) <= 4.0f;
+ stay.s2 = (x.s2 * x.s2 + y.s2 * y.s2) <= 4.0f;
+ stay.s3 = (x.s3 * x.s3 + y.s3 * y.s3) <= 4.0f;
+
+ savx.s0 = (stay.s0 ? x.s0 : savx.s0);
+ savx.s1 = (stay.s1 ? x.s1 : savx.s1);
+ savx.s2 = (stay.s2 ? x.s2 : savx.s2);
+ savx.s3 = (stay.s3 ? x.s3 : savx.s3);
+ savy.s0 = (stay.s0 ? y.s0 : savy.s0);
+ savy.s1 = (stay.s1 ? y.s1 : savy.s1);
+ savy.s2 = (stay.s2 ? y.s2 : savy.s2);
+ savy.s3 = (stay.s3 ? y.s3 : savy.s3);
+ ccount += stay*16;
+ }
+ // Handle remainder
+ if (!(stay.s0 & stay.s1 & stay.s2 & stay.s3))
+ {
+ iter = 16;
+ do
+ {
+ x = savx;
+ y = savy;
+ stay.s0 = ((x.s0 * x.s0 + y.s0 * y.s0) <= 4.0f) &&
+ (ccount.s0 < maxIterations);
+ stay.s1 = ((x.s1 * x.s1 + y.s1 * y.s1) <= 4.0f) &&
+ (ccount.s1 < maxIterations);
+ stay.s2 = ((x.s2 * x.s2 + y.s2 * y.s2) <= 4.0f) &&
+ (ccount.s2 < maxIterations);
+ stay.s3 = ((x.s3 * x.s3 + y.s3 * y.s3) <= 4.0f) &&
+ (ccount.s3 < maxIterations);
+ tmp = x;
+ x = MUL_ADD(-y, y, MUL_ADD(x, x, x0));
+ y = MUL_ADD(2.0f * tmp, y, y0);
+ ccount += stay;
+ iter--;
+ savx.s0 = (stay.s0 ? x.s0 : savx.s0);
+ savx.s1 = (stay.s1 ? x.s1 : savx.s1);
+ savx.s2 = (stay.s2 ? x.s2 : savx.s2);
+ savx.s3 = (stay.s3 ? x.s3 : savx.s3);
+ savy.s0 = (stay.s0 ? y.s0 : savy.s0);
+ savy.s1 = (stay.s1 ? y.s1 : savy.s1);
+ savy.s2 = (stay.s2 ? y.s2 : savy.s2);
+ savy.s3 = (stay.s3 ? y.s3 : savy.s3);
+ } while ((stay.s0 | stay.s1 | stay.s2 | stay.s3) && iter);
+ }
+ x = savx;
+ y = savy;
+ float4 fc = convert_float4(ccount);
+ fc.s0 = (float)ccount.s0 + 1 -
+ native_log2(native_log2(x.s0 * x.s0 + y.s0 * y.s0));
+ fc.s1 = (float)ccount.s1 + 1 -
+ native_log2(native_log2(x.s1 * x.s1 + y.s1 * y.s1));
+ fc.s2 = (float)ccount.s2 + 1 -
+ native_log2(native_log2(x.s2 * x.s2 + y.s2 * y.s2));
+ fc.s3 = (float)ccount.s3 + 1 -
+ native_log2(native_log2(x.s3 * x.s3 + y.s3 * y.s3));
+
+ float c = fc.s0 * 2.0f * 3.1416f / 256.0f;
+ uchar4 color[4];
+ color[0].s0 = ((1.0f + native_cos(c)) * 0.5f) * 255;
+ color[0].s1 = ((1.0f + native_cos(2.0f * c + 2.0f * 3.1416f / 3.0f)) * 0.5f) * 255;
+ color[0].s2 = ((1.0f + native_cos(c - 2.0f * 3.1416f / 3.0f)) * 0.5f) * 255;
+ color[0].s3 = 0xff;
+ if (ccount.s0 == maxIterations)
+ {
+ color[0].s0 = 0;
+ color[0].s1 = 0;
+ color[0].s2 = 0;
+ }
+ mandelbrotImage[4 * tid] = color[0];
+ c = fc.s1 * 2.0f * 3.1416f / 256.0f;
+ color[1].s0 = ((1.0f + native_cos(c)) * 0.5f) * 255;
+ color[1].s1 = ((1.0f + native_cos(2.0f * c + 2.0f * 3.1416f / 3.0f)) * 0.5f) * 255;
+ color[1].s2 = ((1.0f + native_cos(c - 2.0f * 3.1416f / 3.0f)) * 0.5f) * 255;
+ color[1].s3 = 0xff;
+ if (ccount.s1 == maxIterations)
+ {
+ color[1].s0 = 0;
+ color[1].s1 = 0;
+ color[1].s2 = 0;
+ }
+ mandelbrotImage[4 * tid + 1] = color[1];
+ c = fc.s2 * 2.0f * 3.1416f / 256.0f;
+ color[2].s0 = ((1.0f + native_cos(c)) * 0.5f) * 255;
+ color[2].s1 = ((1.0f + native_cos(2.0f * c + 2.0f * 3.1416f / 3.0f)) * 0.5f) * 255;
+ color[2].s2 = ((1.0f + native_cos(c - 2.0f * 3.1416f / 3.0f)) * 0.5f) * 255;
+ color[2].s3 = 0xff;
+ if (ccount.s2 == maxIterations)
+ {
+ color[2].s0 = 0;
+ color[2].s1 = 0;
+ color[2].s2 = 0;
+ }
+ mandelbrotImage[4 * tid + 2] = color[2];
+ c = fc.s3 * 2.0f * 3.1416f / 256.0f;
+ color[3].s0 = ((1.0f + native_cos(c)) * 0.5f) * 255;
+ color[3].s1 = ((1.0f + native_cos(2.0f * c + 2.0f * 3.1416f / 3.0f)) * 0.5f) * 255;
+ color[3].s2 = ((1.0f + native_cos(c - 2.0f * 3.1416f / 3.0f)) * 0.5f) * 255;
+ color[3].s3 = 0xff;
+ if (ccount.s3 == maxIterations)
+ {
+ color[3].s0 = 0;
+ color[3].s1 = 0;
+ color[3].s2 = 0;
+ }
+ mandelbrotImage[4 * tid + 3] = color[3];
+} \ No newline at end of file
diff --git a/src/cl/mandelbrot64.cl b/src/cl/mandelbrot64.cl
new file mode 100644
index 0000000..be38b4a
--- /dev/null
+++ b/src/cl/mandelbrot64.cl
@@ -0,0 +1,230 @@
+#ifdef KHR_DP_EXTENSION
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+#else
+#pragma OPENCL EXTENSION cl_amd_fp64 : enable
+#endif
+
+__kernel void calculate(__global uchar4 * mandelbrotImage, const double posx, const double posy, const double stepSizeX, const double stepSizeY, const uint maxIterations, const uint width)
+{
+ int tid = get_global_id(0);
+
+ int i = tid % (width / 4);
+ int j = tid / (width / 4);
+
+ int4 veci = {4 * i, 4 * i + 1, 4 * i + 2, 4 * i + 3};
+ int4 vecj = {j, j, j, j};
+
+ double4 x0;
+ x0.s0 = (double)(posx + stepSizeX * (double)veci.s0);
+ x0.s1 = (double)(posx + stepSizeX * (double)veci.s1);
+ x0.s2 = (double)(posx + stepSizeX * (double)veci.s2);
+ x0.s3 = (double)(posx + stepSizeX * (double)veci.s3);
+ double4 y0;
+ y0.s0 = (double)(posy + stepSizeY * (double)vecj.s0);
+ y0.s1 = (double)(posy + stepSizeY * (double)vecj.s1);
+ y0.s2 = (double)(posy + stepSizeY * (double)vecj.s2);
+ y0.s3 = (double)(posy + stepSizeY * (double)vecj.s3);
+
+ double4 x = x0;
+ double4 y = y0;
+
+ uint iter=0;
+ double4 tmp;
+ int4 stay;
+ int4 ccount = 0;
+
+ stay.s0 = (x.s0 * x.s0 + y.s0 * y.s0) <= 4.0;
+ stay.s1 = (x.s1 * x.s1 + y.s1 * y.s1) <= 4.0;
+ stay.s2 = (x.s2 * x.s2 + y.s2 * y.s2) <= 4.0;
+ stay.s3 = (x.s3 * x.s3 + y.s3 * y.s3) <= 4.0;
+ double4 savx = x;
+ double4 savy = y;
+ for(iter=0; (stay.s0 | stay.s1 | stay.s2 | stay.s3) && (iter < maxIterations); iter+= 16)
+ {
+ x = savx;
+ y = savy;
+
+ // Two iterations
+ tmp = MUL_ADD(-y, y, MUL_ADD(x, x, x0)); // tmp = x * x + x0 - y * y;
+ y = MUL_ADD(2.0 * x, y, y0); //y = 2.0 * x * y + y0;
+ x = MUL_ADD(-y, y, MUL_ADD(tmp, tmp, x0));// x = tmp * tmp + x0 - y * y;
+ y = MUL_ADD(2.0 * tmp, y, y0); //y = 2.0 * tmp * y + y0;
+
+ // Two iterations
+ tmp = MUL_ADD(-y, y, MUL_ADD(x, x, x0));
+ y = MUL_ADD(2.0 * x, y, y0);
+ x = MUL_ADD(-y, y, MUL_ADD(tmp, tmp, x0));
+ y = MUL_ADD(2.0 * tmp, y, y0);
+
+ // Two iterations
+ tmp = MUL_ADD(-y, y, MUL_ADD(x, x, x0));
+ y = MUL_ADD(2.0 * x, y, y0);
+ x = MUL_ADD(-y, y, MUL_ADD(tmp, tmp, x0));
+ y = MUL_ADD(2.0 * tmp, y, y0);
+
+ // Two iterations
+ tmp = MUL_ADD(-y, y, MUL_ADD(x, x, x0));
+ y = MUL_ADD(2.0 * x, y, y0);
+ x = MUL_ADD(-y, y, MUL_ADD(tmp, tmp, x0));
+ y = MUL_ADD(2.0 * tmp, y, y0);
+
+ // Two iterations
+ tmp = MUL_ADD(-y, y, MUL_ADD(x, x, x0));
+ y = MUL_ADD(2.0 * x, y, y0);
+ x = MUL_ADD(-y, y, MUL_ADD(tmp, tmp, x0));
+ y = MUL_ADD(2.0 * tmp, y, y0);
+
+ // Two iterations
+ tmp = MUL_ADD(-y, y, MUL_ADD(x, x, x0));
+ y = MUL_ADD(2.0 * x, y, y0);
+ x = MUL_ADD(-y, y, MUL_ADD(tmp, tmp, x0));
+ y = MUL_ADD(2.0 * tmp, y, y0);
+
+ // Two iterations
+ tmp = MUL_ADD(-y, y, MUL_ADD(x, x, x0));
+ y = MUL_ADD(2.0 * x, y, y0);
+ x = MUL_ADD(-y, y, MUL_ADD(tmp, tmp, x0));
+ y = MUL_ADD(2.0 * tmp, y, y0);
+
+ // Two iterations
+ tmp = MUL_ADD(-y, y, MUL_ADD(x, x, x0));
+ y = MUL_ADD(2.0 * x, y, y0);
+ x = MUL_ADD(-y, y, MUL_ADD(tmp, tmp, x0));
+ y = MUL_ADD(2.0 * tmp, y, y0);
+
+ stay.s0 = (x.s0 * x.s0 + y.s0 * y.s0) <= 4.0;
+ stay.s1 = (x.s1 * x.s1 + y.s1 * y.s1) <= 4.0;
+ stay.s2 = (x.s2 * x.s2 + y.s2 * y.s2) <= 4.0;
+ stay.s3 = (x.s3 * x.s3 + y.s3 * y.s3) <= 4.0;
+
+ savx.s0 = (stay.s0 ? x.s0 : savx.s0);
+ savx.s1 = (stay.s1 ? x.s1 : savx.s1);
+ savx.s2 = (stay.s2 ? x.s2 : savx.s2);
+ savx.s3 = (stay.s3 ? x.s3 : savx.s3);
+ savy.s0 = (stay.s0 ? y.s0 : savy.s0);
+ savy.s1 = (stay.s1 ? y.s1 : savy.s1);
+ savy.s2 = (stay.s2 ? y.s2 : savy.s2);
+ savy.s3 = (stay.s3 ? y.s3 : savy.s3);
+ ccount += stay*16;
+ }
+ // Handle remainder
+ if (!(stay.s0 & stay.s1 & stay.s2 & stay.s3))
+ {
+ iter = 16;
+ do
+ {
+ x = savx;
+ y = savy;
+ stay.s0 = ((x.s0 * x.s0 + y.s0 * y.s0) <= 4.0) &&
+ (ccount.s0 < maxIterations);
+ stay.s1 = ((x.s1 * x.s1 + y.s1 * y.s1) <= 4.0) &&
+ (ccount.s1 < maxIterations);
+ stay.s2 = ((x.s2 * x.s2 + y.s2 * y.s2) <= 4.0) &&
+ (ccount.s2 < maxIterations);
+ stay.s3 = ((x.s3 * x.s3 + y.s3 * y.s3) <= 4.0) &&
+ (ccount.s3 < maxIterations);
+ tmp = x;
+ x = MUL_ADD(-y, y, MUL_ADD(x, x, x0));
+ y = MUL_ADD(2.0 * tmp, y, y0); //y = 2.0 * tmp * y + y0;
+ ccount += stay;
+ iter--;
+ savx.s0 = (stay.s0 ? x.s0 : savx.s0);
+ savx.s1 = (stay.s1 ? x.s1 : savx.s1);
+ savx.s2 = (stay.s2 ? x.s2 : savx.s2);
+ savx.s3 = (stay.s3 ? x.s3 : savx.s3);
+ savy.s0 = (stay.s0 ? y.s0 : savy.s0);
+ savy.s1 = (stay.s1 ? y.s1 : savy.s1);
+ savy.s2 = (stay.s2 ? y.s2 : savy.s2);
+ savy.s3 = (stay.s3 ? y.s3 : savy.s3);
+ } while ((stay.s0 | stay.s1 | stay.s2 | stay.s3) && iter);
+ }
+ x = savx;
+ y = savy;
+ double4 fc = convert_double4(ccount);
+ fc.s0 = (double)ccount.s0 + 1 -
+ native_log2(native_log2(x.s0 * x.s0 + y.s0 * y.s0));
+ fc.s1 = (double)ccount.s1 + 1 -
+ native_log2(native_log2(x.s1 * x.s1 + y.s1 * y.s1));
+ fc.s2 = (double)ccount.s2 + 1 -
+ native_log2(native_log2(x.s2 * x.s2 + y.s2 * y.s2));
+ fc.s3 = (double)ccount.s3 + 1 -
+ native_log2(native_log2(x.s3 * x.s3 + y.s3 * y.s3));
+
+ double c = fc.s0 * 2.0 * 3.1416 / 256.0;
+ uchar4 color[4];
+ color[0].s0 = ((1.0 + native_cos(c)) * 0.5) * 255;
+ color[0].s1 = ((1.0 + native_cos(2.0 * c + 2.0 * 3.1416 / 3.0)) * 0.5) * 255;
+ color[0].s2 = ((1.0 + native_cos(c - 2.0 * 3.1416 / 3.0)) * 0.5) * 255;
+ color[0].s3 = 0xff;
+ if (ccount.s0 == maxIterations)
+ {
+ color[0].s0 = 0;
+ color[0].s1 = 0;
+ color[0].s2 = 0;
+ }
+ if (bench)
+ {
+ color[0].s0 = ccount.s0 & 0xff;
+ color[0].s1 = (ccount.s0 & 0xff00) >> 8;
+ color[0].s2 = (ccount.s0 & 0xff0000) >> 16;
+ color[0].s3 = (ccount.s0 & 0xff000000) >> 24;
+ }
+ mandelbrotImage[4 * tid] = color[0];
+ c = fc.s1 * 2.0 * 3.1416 / 256.0;
+ color[1].s0 = ((1.0 + native_cos(c)) * 0.5) * 255;
+ color[1].s1 = ((1.0 + native_cos(2.0 * c + 2.0 * 3.1416 / 3.0)) * 0.5) * 255;
+ color[1].s2 = ((1.0 + native_cos(c - 2.0 * 3.1416 / 3.0)) * 0.5) * 255;
+ color[1].s3 = 0xff;
+ if (ccount.s1 == maxIterations)
+ {
+ color[1].s0 = 0;
+ color[1].s1 = 0;
+ color[1].s2 = 0;
+ }
+ if (bench)
+ {
+ color[1].s0 = ccount.s1 & 0xff;
+ color[1].s1 = (ccount.s1 & 0xff00) >> 8;
+ color[1].s2 = (ccount.s1 & 0xff0000) >> 16;
+ color[1].s3 = (ccount.s1 & 0xff000000) >> 24;
+ }
+ mandelbrotImage[4 * tid + 1] = color[1];
+ c = fc.s2 * 2.0 * 3.1416 / 256.0;
+ color[2].s0 = ((1.0 + native_cos(c)) * 0.5) * 255;
+ color[2].s1 = ((1.0 + native_cos(2.0 * c + 2.0 * 3.1416 / 3.0)) * 0.5) * 255;
+ color[2].s2 = ((1.0 + native_cos(c - 2.0 * 3.1416 / 3.0)) * 0.5) * 255;
+ color[2].s3 = 0xff;
+ if (ccount.s2 == maxIterations)
+ {
+ color[2].s0 = 0;
+ color[2].s1 = 0;
+ color[2].s2 = 0;
+ }
+ if (bench)
+ {
+ color[2].s0 = ccount.s2 & 0xff;
+ color[2].s1 = (ccount.s2 & 0xff00) >> 8;
+ color[2].s2 = (ccount.s2 & 0xff0000) >> 16;
+ color[2].s3 = (ccount.s2 & 0xff000000) >> 24;
+ }
+ mandelbrotImage[4 * tid + 2] = color[2];
+ c = fc.s3 * 2.0 * 3.1416 / 256.0;
+ color[3].s0 = ((1.0 + native_cos(c)) * 0.5) * 255;
+ color[3].s1 = ((1.0 + native_cos(2.0 * c + 2.0 * 3.1416 / 3.0)) * 0.5) * 255;
+ color[3].s2 = ((1.0 + native_cos(c - 2.0 * 3.1416 / 3.0)) * 0.5) * 255;
+ color[3].s3 = 0xff;
+ if (ccount.s3 == maxIterations)
+ {
+ color[3].s0 = 0;
+ color[3].s1 = 0;
+ color[3].s2 = 0;
+ }
+ if (bench)
+ {
+ color[3].s0 = ccount.s3 & 0xff;
+ color[3].s1 = (ccount.s3 & 0xff00) >> 8;
+ color[3].s2 = (ccount.s3 & 0xff0000) >> 16;
+ color[3].s3 = (ccount.s3 & 0xff000000) >> 24;
+ }
+ mandelbrotImage[4 * tid + 3] = color[3];
+} \ No newline at end of file
diff --git a/src/defs.h b/src/defs.h
index 00b51bd..fc0c91d 100644
--- a/src/defs.h
+++ b/src/defs.h
@@ -8,6 +8,8 @@
#ifndef DEFS_H_
#define DEFS_H_
+#include <stdio.h>
+
#ifndef NULL
#define NULL 0
#endif
@@ -19,6 +21,10 @@
#define FALSE 0
#endif
+// Rendering modes, u8
+#define MODE_CPU 0
+#define MODE_OPENCL 1
+
typedef unsigned char u8;
typedef signed char s8;
@@ -31,6 +37,8 @@ typedef signed long int s32;
typedef unsigned long long u64;
typedef signed long long s64;
+typedef long double d64;
+
//typedef u8 bool;
#endif /* DEFS_H_ */
diff --git a/src/mandelbrot-zoom.c b/src/mandelbrot-zoom.c
index 9410734..72b54ba 100644
--- a/src/mandelbrot-zoom.c
+++ b/src/mandelbrot-zoom.c
@@ -17,6 +17,7 @@ int main(int argc, char **argv)
ui_settings.settings = GTK_WINDOW(gtk_builder_get_object(builder, "settings"));
//gtk_builder_connect_signals(builder, NULL);
+ ui_settings.setCombo = GTK_COMBO_BOX_TEXT(gtk_builder_get_object(builder, "setCombo"));
ui_settings.iterationsSp = GTK_SPIN_BUTTON(gtk_builder_get_object(builder, "iterationsSp"));
ui_settings.threadsSp = GTK_SPIN_BUTTON(gtk_builder_get_object(builder, "threadsSp"));
ui_settings.colorFromBtn = GTK_BUTTON(gtk_builder_get_object(builder, "colorFromBtn"));
@@ -24,6 +25,10 @@ int main(int argc, char **argv)
ui_settings.zoomToXEntry = GTK_ENTRY(gtk_builder_get_object(builder, "zoomToXEntry"));
ui_settings.zoomToYEntry = GTK_ENTRY(gtk_builder_get_object(builder, "zoomToYEntry"));
ui_settings.speedEntry = GTK_ENTRY(gtk_builder_get_object(builder, "speedEntry"));
+ ui_settings.modeCPURd = GTK_RADIO_BUTTON(gtk_builder_get_object(builder, "modeCPURd"));
+ ui_settings.modeGPURd = GTK_RADIO_BUTTON(gtk_builder_get_object(builder, "modeGPURd"));
+ ui_settings.fpuCombo = GTK_COMBO_BOX_TEXT(gtk_builder_get_object(builder, "fpuCombo"));
+ ui_settings.fmaCb = GTK_COMBO_BOX_TEXT(gtk_builder_get_object(builder, "fmaCb"));
ui_settings.exportCb = GTK_CHECK_BUTTON(gtk_builder_get_object(builder, "exportCb"));
ui_settings.gifRd = GTK_RADIO_BUTTON(gtk_builder_get_object(builder, "gifRd"));
ui_settings.widthSp = GTK_SPIN_BUTTON(gtk_builder_get_object(builder, "widthSp"));
@@ -60,17 +65,15 @@ int main(int argc, char **argv)
gtk_spin_button_set_range(ui_settings.fpsVideoSp, 1, 60);
gtk_spin_button_set_range(ui_settings.bitrateSp, 1, 65535);
- g_signal_connect(ui_settings.iterationsSp, "value-changed", G_CALLBACK(on_iterationsSp_valueChanged), NULL);
- g_signal_connect(ui_settings.threadsSp, "value-changed", G_CALLBACK(on_threadsSp_valueChanged), NULL);
+ gtk_combo_box_text_append(ui_settings.setCombo, NULL, "Mandelbrot");
+ gtk_combo_box_text_append(ui_settings.setCombo, NULL, "Julia");
+
+ gtk_combo_box_text_append(ui_settings.fpuCombo, NULL, "32 bit");
+ gtk_combo_box_text_append(ui_settings.fpuCombo, NULL, "64 bit");
+ gtk_combo_box_text_append(ui_settings.fpuCombo, NULL, "128 bit");
+
g_signal_connect(ui_settings.colorFromBtn, "clicked", G_CALLBACK(on_colorFromBtn_clicked), NULL);
g_signal_connect(ui_settings.colorToBtn, "clicked", G_CALLBACK(on_colorToBtn_clicked), NULL);
- g_signal_connect(ui_settings.exportCb, "toggled", G_CALLBACK(on_exportCb_toggled), NULL);
- g_signal_connect(ui_settings.widthSp, "value-changed", G_CALLBACK(on_widthSp_valueChanged), NULL);
- g_signal_connect(ui_settings.heightSp, "value-changed", G_CALLBACK(on_heightSp_valueChanged), NULL);
- g_signal_connect(ui_settings.fpsRenderSp, "value-changed", G_CALLBACK(on_fpsRenderSp_valueChanged), NULL);
- g_signal_connect(ui_settings.fpsVideoSp, "value-changed", G_CALLBACK(on_fpsVideoSp_valueChanged), NULL);
- g_signal_connect(ui_settings.bitrateSp, "value-changed", G_CALLBACK(on_bitrateSp_valueChanged), NULL);
- g_signal_connect(ui_settings.exportTf, "changed", G_CALLBACK(on_exportTf_changed), NULL);
g_signal_connect(ui_settings.startBtn, "clicked", G_CALLBACK(on_startBtn_clicked), NULL);
g_signal_connect(ui_settings.exitBtn, "clicked", G_CALLBACK(on_exitBtn_clicked), NULL);
@@ -80,16 +83,6 @@ int main(int argc, char **argv)
return 0;
}
-void on_iterationsSp_valueChanged()
-{
- config.iterations = gtk_spin_button_get_value(ui_settings.iterationsSp);
-}
-
-void on_threadsSp_valueChanged()
-{
- config.threads = gtk_spin_button_get_value(ui_settings.threadsSp);
-}
-
void on_colorFromBtn_clicked()
{
currentColor = COLOR_FROM;
@@ -102,53 +95,74 @@ void on_colorToBtn_clicked()
gtk_dialog_run(ui_settings.colorDialog);
}
-void on_exportCb_toggled()
+void on_startBtn_clicked()
{
- config.video = gtk_toggle_button_get_active(ui_settings.exportCb);
-}
+ config.config_cpu.iterations = gtk_spin_button_get_value(ui_settings.iterationsSp);
+ config.config_opencl.iterations = gtk_spin_button_get_value(ui_settings.iterationsSp);
+
+ config.config_cpu.threads = gtk_spin_button_get_value(ui_settings.threadsSp);
+
+ config.config_opencl.fma = gtk_toggle_button_get_active(GTK_TOGGLE_BUTTON(ui_settings.fmaCb));
+
+ config.video = gtk_toggle_button_get_active(GTK_TOGGLE_BUTTON(ui_settings.exportCb));
-void on_exportTf_changed()
-{
config.path = CHAR_PTR(gtk_entry_get_text(ui_settings.exportTf));
-}
-void on_widthSp_valueChanged()
-{
config.width = gtk_spin_button_get_value(ui_settings.widthSp);
-}
+ config.config_cpu.width = gtk_spin_button_get_value(ui_settings.widthSp);
+ config.config_opencl.width = gtk_spin_button_get_value(ui_settings.widthSp);
-void on_heightSp_valueChanged()
-{
config.height = gtk_spin_button_get_value(ui_settings.heightSp);
-}
+ config.config_cpu.height = gtk_spin_button_get_value(ui_settings.heightSp);
+ config.config_opencl.height = gtk_spin_button_get_value(ui_settings.heightSp);
-void on_fpsRenderSp_valueChanged()
-{
- config.renderFPS = gtk_spin_button_get_value(ui_settings.fpsRenderSp);
-}
+ config.config_cpu.renderFPS = gtk_spin_button_get_value(ui_settings.fpsRenderSp);
+ config.config_opencl.renderFPS = gtk_spin_button_get_value(ui_settings.fpsRenderSp);
-void on_fpsVideoSp_valueChanged()
-{
config.videoFPS = gtk_spin_button_get_value(ui_settings.fpsVideoSp);
-}
-void on_bitrateSp_valueChanged()
-{
config.bitrate = gtk_spin_button_get_value(ui_settings.bitrateSp);
-}
-void on_startBtn_clicked()
-{
double x, y, speed;
sscanf(gtk_entry_get_text(ui_settings.zoomToXEntry), "%lf", &x);
sscanf(gtk_entry_get_text(ui_settings.zoomToYEntry), "%lf", &y);
sscanf(gtk_entry_get_text(ui_settings.speedEntry), "%lf", &speed);
- config.to_x = x;
- config.to_y = y;
- config.speed = speed;
- printf("config {\n\t.iterations = %u\n\t.tox = %f\n\t.toy = %f\n\t.video = %u\n\t.filetype = %u\n\t.width = %u\n\t.height = %u\n\t.renderFPS = %u\n\t.videoFPS = %u\n\t.bitrate = %u\n\t.path = %s\n}\n", config.iterations, x, y, config.video, 0, config.width, config.height, config.renderFPS, config.videoFPS, config.bitrate, config.path);
- render_init(&config, mandelbrot_r);
- render_show();
+ config.config_cpu.to_x = x;
+ config.config_cpu.to_y = y;
+ config.config_cpu.speed = speed;
+ config.config_opencl.to_x = x;
+ config.config_opencl.to_y = y;
+ config.config_opencl.speed = speed;
+
+ config.mode = gtk_toggle_button_get_active(GTK_TOGGLE_BUTTON(ui_settings.modeGPURd));
+ //config.mode = MODE_CPU;
+
+ if (strcmp(gtk_combo_box_text_get_active_text(ui_settings.setCombo), "Mandelbrot") == 0)
+ {
+ config.config_cpu.set_func = mandelbrot_r;
+ config.config_opencl.set_func = SFUNC_MANDELBROT;
+ }
+ else if (strcmp(gtk_combo_box_text_get_active_text(ui_settings.setCombo), "Julia") == 0)
+ {
+ config.config_cpu.set_func = julia;
+ config.config_opencl.set_func = SFUNC_JULIA;
+ }
+
+ if (strcmp(gtk_combo_box_text_get_active_text(ui_settings.fpuCombo), "32 bit") == 0)
+ {
+ config.config_opencl.fpu = OPENCL_FPU_32;
+ }
+ else if (strcmp(gtk_combo_box_text_get_active_text(ui_settings.fpuCombo), "64 bit") == 0)
+ {
+ config.config_opencl.fpu = OPENCL_FPU_64;
+ }
+ else if (strcmp(gtk_combo_box_text_get_active_text(ui_settings.fpuCombo), "128 bit") == 0)
+ {
+ config.config_opencl.fpu = OPENCL_FPU_128;
+ }
+
+ init_render(&config);
+ show_render();
}
void on_exitBtn_clicked()
diff --git a/src/mandelbrot-zoom.h b/src/mandelbrot-zoom.h
index a57deba..8309885 100644
--- a/src/mandelbrot-zoom.h
+++ b/src/mandelbrot-zoom.h
@@ -16,6 +16,7 @@
typedef struct Ui_settings {
GtkWindow *settings;
+ GtkComboBoxText *setCombo;
GtkSpinButton *iterationsSp;
GtkSpinButton *threadsSp;
GtkButton *colorFromBtn;
@@ -23,6 +24,10 @@ typedef struct Ui_settings {
GtkEntry *zoomToXEntry;
GtkEntry *zoomToYEntry;
GtkEntry *speedEntry;
+ GtkRadioButton *modeCPURd;
+ GtkRadioButton *modeGPURd;
+ GtkComboBoxText *fpuCombo;
+ GtkCheckButton *fmaCb;
GtkCheckButton *exportCb;
GtkRadioButton *gifRd;
GtkSpinButton *widthSp;
diff --git a/src/render.c b/src/render.c
index 55d514b..191dcd9 100644
--- a/src/render.c
+++ b/src/render.c
@@ -9,121 +9,60 @@
#define HAVE_STRUCT_TIMESPEC
#include <pthread.h>
-void render_init(Config *config, u32 (*sfunc) (long double, long double, u32))
+void init_render(Config *config)
{
- delta = glutGet(GLUT_ELAPSED_TIME);
- x_min_s = -2.0;
- x_max_s = 1.0;
- y_min_s= -1.0;
- y_max_s = 1.0;
- x_min = x_min_s;
- x_max = x_max_s;
- y_min = y_min_s;
- y_max = y_max_s;
_config = config;
- _sfunc = sfunc;
s_arr = (u32 *) malloc((_config->width) * (_config->height) * sizeof(u32));
- calculate(x_min, y_min, x_max, y_max, _sfunc, s_arr);
+ _config->config_cpu.arr = s_arr;
+ _config->config_opencl.arr = s_arr;
//glutInit(0, NULL);
glutInitWindowPosition(0, 0);
glutInitWindowSize(_config->width, _config->height);
glutInitDisplayMode(GLUT_RGBA | GLUT_DOUBLE | GLUT_DEPTH);
glutCreateWindow("Renderer");
- glutDisplayFunc(gl_render);
- glutIdleFunc(gl_idle);
glGenTextures(1, &tex);
glBindTexture(GL_TEXTURE_2D, tex);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
- glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, _config->width, _config->height, 0, GL_RGBA, GL_UNSIGNED_BYTE, s_arr);
+ glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, _config->width, _config->height, 0,
+ GL_RGBA, GL_UNSIGNED_BYTE, s_arr);
glBindTexture(GL_TEXTURE_2D, 0);
//glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, _config->width, _config->height, 0, GL_RGBA, GL_UNSIGNED_BYTE, s_arr);
glMatrixMode(GL_PROJECTION);
glOrtho(0, _config->width, 0, _config->height, -1, 1);
glMatrixMode(GL_MODELVIEW);
- glutMainLoop();
-}
-
-void render_show()
-{
-}
+ _config->config_cpu.tex = tex;
+ _config->config_opencl.tex = tex;
+ _config->config_cpu.zoom_func = zoom_func;
+ _config->config_opencl.zoom_func = zoom_func;
-void gl_render(void)
-{
- glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
- glBindTexture(GL_TEXTURE_2D, tex);
- glEnable(GL_TEXTURE_2D);
- glBegin(GL_QUADS);
- glTexCoord2i(0, 0); glVertex2i(0, 0);
- glTexCoord2i(0, 1); glVertex2i(0, _config->height);
- glTexCoord2i(1, 1); glVertex2i(_config->width, _config->height);
- glTexCoord2i(1, 0); glVertex2i(_config->width, 0);
- glEnd();
- glDisable(GL_TEXTURE_2D);
- glBindTexture(GL_TEXTURE_2D, 0);
+ switch(_config->mode)
+ {
+ case MODE_CPU:
+ init_cpu(&_config->config_cpu);
+ glutDisplayFunc(render_cpu);
+ glutIdleFunc(idle_cpu);
+ break;
+ case MODE_OPENCL:
+ init_opencl(&_config->config_opencl);
+ glutDisplayFunc(render_opencl);
+ glutIdleFunc(idle_opencl);
+ break;
+ }
-// glBegin(GL_TRIANGLES);
-// glVertex3f(-0.5,-0.5,0.0);
-// glVertex3f(0.5,0.0,0.0);
-// glVertex3f(0.0,0.5,0.0);
-// glEnd();
+ glutMainLoop();
- glutSwapBuffers();
}
-void calculate(long double x_min, long double y_min, long double x_max, long double y_max, u32 (*sfunc) (long double, long double, u32), u32 *arr)
+void show_render()
{
- pthread_t thread;
- ThreadArgs *args = (ThreadArgs *) malloc(_config->threads * sizeof(ThreadArgs));
- for(u8 i = 0; i < _config->threads; i++)
- {
- args[i] = (ThreadArgs) { .tc = _config->threads, .tid = i, .x_min = x_min, .y_min = y_min, .x_max = x_max, .y_max = y_max, .sfunc = sfunc, .arr = arr };
- pthread_create(&thread, NULL, calculate_t, (void *)&args[i]);
- }
- pthread_join(thread, NULL);
-}
-void calculate_t(void *args)
-{
- ThreadArgs *_args = (ThreadArgs *)args;
- long double x_math, y_math;
- u32 iterations;
- for (u32 y = (_config->height/_args->tc)*(_args->tid); y < _config->height; y++)
- {
- for (u32 x = 0; x < _config->width; x++)
- {
- x_math = _args->x_min + ((long double) x * (_args->x_max - _args->x_min)) / _config->width;
- y_math = _args->y_min + ((long double) (_config->height - y) * (_args->y_max - _args->y_min)) / _config->height;
- iterations = _args->sfunc(x_math, y_math, _config->iterations);
- _args->arr[COORDS(x, y, _config->width)] = (((1<<24)-1)*iterations)/_config->iterations;
- }
- }
}
-long double zoom_func(long double ft, long double s)
+d64 zoom_func(d64 ft, d64 s)
{
return (s - expl(-ft));
}
-
-void gl_idle(void)
-{
- calculate(x_min, y_min, x_max, y_max, _sfunc, s_arr);
- //glGenTextures(1, &tex);
- glBindTexture(GL_TEXTURE_2D, tex);
- //glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
- //glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
- glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, _config->width, _config->height, 0, GL_RGBA, GL_UNSIGNED_BYTE, s_arr);
- glBindTexture(GL_TEXTURE_2D, 0);
- int t = glutGet(GLUT_ELAPSED_TIME);
- dt = (t - delta) / 1000.0;
- delta = t;
- ft+=(_config->speed*dt);
- x_min = x_min_s + zoom_func(ft, (long double)2.0 + _config->to_x);
- y_min = y_min_s + zoom_func(ft, (long double)1.0 + _config->to_y);
- x_max = x_max_s - zoom_func(ft, (long double)1.0 - _config->to_x);
- y_max = y_max_s - zoom_func(ft, (long double)1.0 - _config->to_y);
- glutPostRedisplay();
-}
diff --git a/src/render.h b/src/render.h
index dde79be..a50027a 100644
--- a/src/render.h
+++ b/src/render.h
@@ -8,60 +8,32 @@
#ifndef RENDER_H_
#define RENDER_H_
-#define COORDS(x, y, width) ((y)*(width)+(x))
-
#include "defs.h"
-#include <stdlib.h>
-#include <GL/glut.h>
-#include <pthread.h>
+#include "render_opencl.h"
+#include "render_cpu.h"
#include <math.h>
typedef struct config {
- u32 iterations;
- u8 threads;
- u32 colorFrom;
- u32 colorTo;
- long double to_x;
- long double to_y;
- long double speed;
+ CpuConfig config_cpu;
+ OpenCLConfig config_opencl;
+ u8 mode;
u8 video;
u8 filetype;
u16 width;
u16 height;
- u8 renderFPS;
u8 videoFPS;
u32 bitrate;
const char *path;
// TODO: key mapping als option in die struct
} Config;
-typedef struct t_args {
- u8 tc;
- u8 tid;
- long double x_min;
- long double x_max;
- long double y_min;
- long double y_max;
- u32 (*sfunc) (long double, long double, u32);
- u32 *arr;
-} ThreadArgs;
-
Config *_config;
-u32 (*_sfunc) (long double, long double, u32);
u32 *s_arr;
GLuint tex;
-u32 rendercnt;
-long double x_min, x_max, y_min, y_max;
-long double x_min_s, x_max_s, y_min_s, y_max_s;
-int delta;
-long double dt, ft;
-void render_init(Config *config, u32 (*sfunc) (long double, long double, u32));
-void render_show();
-void gl_render(void);
-void gl_idle(void);
+d64 zoom_func(d64 ft, d64 s);
-void calculate(long double x_min, long double y_min, long double x_max, long double y_max, u32 (*sfunc) (long double, long double, u32), u32 *arr);
-void calculate_t(void *args);
+void init_render(Config *config);
+void show_render();
#endif /* RENDER_H_ */
diff --git a/src/render_cpu.c b/src/render_cpu.c
new file mode 100644
index 0000000..5065d4d
--- /dev/null
+++ b/src/render_cpu.c
@@ -0,0 +1,99 @@
+/*
+ * render.c
+ *
+ * Created on: 15.01.2018
+ * Author: Superleo1810
+ */
+
+#include "render_cpu.h"
+#define HAVE_STRUCT_TIMESPEC
+
+void init_cpu(CpuConfig *config)
+{
+ config_cpu = config;
+ x_min_s = -2.0;
+ x_max_s = 1.0;
+ y_min_s = -1.0;
+ y_max_s = 1.0;
+ x_min = x_min_s;
+ x_max = x_max_s;
+ y_min = y_min_s;
+ y_max = y_max_s;
+
+ calculate(x_min, y_min, x_max, y_max, config_cpu->set_func, config_cpu->arr);
+}
+
+void render_cpu(void)
+{
+ glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
+ glBindTexture(GL_TEXTURE_2D, config_cpu->tex);
+ glEnable(GL_TEXTURE_2D);
+
+ glBegin(GL_QUADS);
+ glTexCoord2i(0, 0); glVertex2i(0, 0);
+ glTexCoord2i(0, 1); glVertex2i(0, config_cpu->height);
+ glTexCoord2i(1, 1); glVertex2i(config_cpu->width, config_cpu->height);
+ glTexCoord2i(1, 0); glVertex2i(config_cpu->width, 0);
+ glEnd();
+
+ glDisable(GL_TEXTURE_2D);
+ glBindTexture(GL_TEXTURE_2D, 0);
+ glutSwapBuffers();
+}
+
+void calculate(d64 x_min, d64 y_min, d64 x_max, d64 y_max, u32 (*sfunc) (d64, d64, u32), u32 *arr)
+{
+ pthread_t thread;
+ ThreadArgs *args = (ThreadArgs *) malloc(config_cpu->threads * sizeof(ThreadArgs));
+ for(u8 i = 0; i < config_cpu->threads; i++)
+ {
+ args[i] = (ThreadArgs) { .tc = config_cpu->threads, .tid = i, .x_min = x_min, .y_min = y_min, .x_max = x_max, .y_max = y_max, .sfunc = sfunc, .arr = arr };
+ pthread_create(&thread, NULL, calculate_t, (void *)&args[i]);
+ }
+ pthread_join(thread, NULL);
+ free(args);
+}
+
+void calculate_t(void *args)
+{
+ ThreadArgs *_args = (ThreadArgs *)args;
+ d64 x_math, y_math;
+ u32 iterations;
+ for (u32 y = (config_cpu->height/_args->tc)*(_args->tid); y < config_cpu->height; y++)
+ {
+ for (u32 x = 0; x < config_cpu->width; x++)
+ {
+ x_math = _args->x_min + ((d64) x * (_args->x_max - _args->x_min)) / config_cpu->width;
+ y_math = _args->y_min + ((d64) (config_cpu->height - y) * (_args->y_max - _args->y_min)) / config_cpu->height;
+ iterations = _args->sfunc(x_math, y_math, config_cpu->iterations);
+ _args->arr[COORDS(x, y, config_cpu->width)] = (((1<<24)-1)*iterations)/config_cpu->iterations;
+ }
+ }
+}
+
+void idle_cpu(void)
+{
+ static int t_old;
+ int t = 0, delta = 0;
+ do
+ {
+ t = glutGet(GLUT_ELAPSED_TIME);
+ delta = t - t_old;
+ }
+ while(delta < 16); // TODO: Hardcoded FPS
+ t_old = t;
+
+ calculate(x_min, y_min, x_max, y_max, config_cpu->set_func, config_cpu->arr);
+ //glGenTextures(1, &tex);
+ glBindTexture(GL_TEXTURE_2D, config_cpu->tex);
+ //glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
+ //glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
+ glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, config_cpu->width, config_cpu->height, 0, GL_RGBA, GL_UNSIGNED_BYTE, config_cpu->arr);
+ glBindTexture(GL_TEXTURE_2D, 0);
+ ft+=(config_cpu->speed*(delta/1000.0));
+ x_min = x_min_s + config_cpu->zoom_func(ft, (d64)2.0 + config_cpu->to_x);
+ y_min = y_min_s + config_cpu->zoom_func(ft, (d64)1.0 + config_cpu->to_y);
+ x_max = x_max_s - config_cpu->zoom_func(ft, (d64)1.0 - config_cpu->to_x);
+ y_max = y_max_s - config_cpu->zoom_func(ft, (d64)1.0 - config_cpu->to_y);
+ glutPostRedisplay();
+}
diff --git a/src/render_cpu.h b/src/render_cpu.h
new file mode 100644
index 0000000..7abbdc4
--- /dev/null
+++ b/src/render_cpu.h
@@ -0,0 +1,61 @@
+/*
+ * render_cpu.h
+ *
+ * Created on: 25.01.2018
+ * Author: Superleo1810
+ */
+
+#ifndef RENDER_CPU_H_
+#define RENDER_CPU_H_
+
+#include "defs.h"
+#include <stdlib.h>
+#include <GL/glut.h>
+#include <pthread.h>
+#include <time.h>
+#include <math.h>
+
+#define COORDS(x, y, width) ((y)*(width)+(x))
+
+typedef struct config_cpu {
+ u8 threads;
+ GLuint tex;
+ u32 *arr;
+ d64 (*zoom_func)(d64, d64);
+ u32 (*set_func)(d64, d64, u32);
+ u32 iterations;
+ u32 colorFrom;
+ u32 colorTo;
+ d64 to_x;
+ d64 to_y;
+ d64 speed;
+ u16 width;
+ u16 height;
+ u8 renderFPS;
+} CpuConfig;
+
+typedef struct t_args {
+ u8 tc;
+ u8 tid;
+ d64 x_min;
+ d64 x_max;
+ d64 y_min;
+ d64 y_max;
+ u32 (*sfunc) (d64, d64, u32);
+ u32 *arr;
+} ThreadArgs;
+
+CpuConfig *config_cpu;
+u32 rendercnt;
+float ft;
+
+d64 x_min, x_max, y_min, y_max;
+d64 x_min_s, x_max_s, y_min_s, y_max_s;
+
+void init_cpu(CpuConfig *config);
+void render_cpu(void);
+void idle_cpu(void);
+void calculate(d64 x_min, d64 y_min, d64 x_max, d64 y_max, u32 (*sfunc) (d64, d64, u32), u32 *arr);
+void calculate_t(void *args);
+
+#endif /* RENDER_CPU_H_ */
diff --git a/src/render_opencl.c b/src/render_opencl.c
new file mode 100644
index 0000000..9d650c2
--- /dev/null
+++ b/src/render_opencl.c
@@ -0,0 +1,297 @@
+/*
+ * render_opencl.c
+ *
+ * Created on: 26.01.2018
+ * Author: Superleo1810
+ */
+
+#include "render_opencl.h"
+
+void init_opencl(OpenCLConfig *config)
+{
+ printf("cl init\n");
+ x_min_s = -2.0;
+ x_max_s = 1.0;
+ y_min_s = -1.0;
+ y_max_s = 1.0;
+ x_min = x_min_s;
+ x_max = x_max_s;
+ y_min = y_min_s;
+ y_max = y_max_s;
+
+ config_opencl = config;
+ output = (cl_uint *) malloc((config_opencl->width) * (config_opencl->height) * sizeof(cl_uchar4));
+ context = NULL;
+
+ cl_platform_id platform_id;
+ cl_uint ret_num_devices;
+ cl_uint ret_num_platforms;
+ size_t device_list_size;
+ cl_program program;
+ size_t cl_src_sz;
+
+ clGetPlatformIDs(1, &platform_id, &ret_num_platforms);
+ clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_DEFAULT, 1, &device_id, &ret_num_devices);
+ context = clCreateContext(NULL, 1, &device_id, NULL, NULL, &ret);
+ ret = clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL, &device_list_size);
+ devices = (cl_device_id *)malloc(device_list_size);
+ num_devices = (cl_uint)(device_list_size/sizeof(cl_device_id));
+ clGetContextInfo(context, CL_CONTEXT_DEVICES, device_list_size, devices, NULL);
+
+ printf("lists done\n");
+
+ FILE *fp;
+ char *cl_src, *path, *flags = (char *)malloc(200 * sizeof(char));
+ flags[0] = '\0';
+ switch(config_opencl->fpu)
+ {
+ case OPENCL_FPU_32:
+ switch(config_opencl->set_func)
+ {
+ case SFUNC_JULIA:
+ path = "cl/julia32.cl";
+ break;
+ case SFUNC_MANDELBROT:
+ default:
+ path = "cl/mandelbrot32.cl";
+ break;
+ }
+ break;
+ case OPENCL_FPU_64:
+ switch(config_opencl->set_func)
+ {
+ case SFUNC_JULIA:
+ path = "cl/julia64.cl";
+ break;
+ case SFUNC_MANDELBROT:
+ default:
+ path = "cl/mandelbrot64.cl";
+ break;
+ }
+ int khrFP64 = 0;
+ int amdFP64 = 0;
+ for (cl_uint i = 0; i < num_devices; i++)
+ {
+ char deviceExtensions[8192];
+ ret = clGetDeviceInfo(devices[i], CL_DEVICE_EXTENSIONS,
+ sizeof(deviceExtensions), deviceExtensions, 0);
+ if (strstr(deviceExtensions, "cl_khr_fp64"))
+ {
+ khrFP64++;
+ }
+ else
+ {
+ if (strstr(deviceExtensions, "cl_amd_fp64"))
+ {
+ amdFP64++;
+ }
+ }
+ }
+ if (khrFP64 == num_devices)
+ {
+ flags = strcat(flags, "-D KHR_DP_EXTENSION ");
+ }
+ else if (amdFP64 == num_devices)
+ {
+ flags = strcat(flags, "");
+ }
+ break;
+ case OPENCL_FPU_128:
+ printf("128 bit precision not implemented yet\n");
+ break;
+ }
+
+ cl_src = (char *)malloc(MAX_SOURCE_SIZE * sizeof(char));
+ fp = fopen(path, "r");
+ cl_src_sz = fread(cl_src, 1, MAX_SOURCE_SIZE, fp);
+ fclose(fp);
+
+ printf("reading done\n");
+
+ for (cl_uint i = 0; i < num_devices; i++)
+ {
+ cl_command_queue_properties prop = 0;
+ // if (sampleArgs->timing)
+ // {
+ // prop |= CL_QUEUE_PROFILING_ENABLE;
+ // }
+ commandQueue[i] = clCreateCommandQueue(context, devices[i], prop, &ret);
+ outputBuffer[i] = clCreateBuffer(context,
+ CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR,
+ (sizeof(cl_uint) * (config_opencl->width)
+ * (config_opencl->height)) / num_devices, NULL, &ret);
+ }
+ printf("comand puffer\n");
+ if (config_opencl->fma)
+ {
+ flags = strcat(flags, "-D MUL_ADD=fma ");
+ }
+ else
+ {
+ flags = strcat(flags, "-D MUL_ADD=mad ");
+ }
+ printf("flags concat\n");
+ program = clCreateProgramWithSource(context, 1, (const char **)&cl_src, (const size_t *)&cl_src_sz, &ret);
+ ret = clBuildProgram(program, num_devices, devices, flags, NULL, NULL);
+ for (cl_uint i = 0; i < num_devices; i++)
+ {
+ kernel_vector[i] = clCreateKernel(program, "calculate", &ret);
+ }
+
+ printf("cl init done\n");
+
+}
+
+void render_opencl(void)
+{
+ cl_event events[MAX_DEVICES];
+ cl_int eventStatus = CL_QUEUED;
+ size_t globalThreads[1];
+ size_t localThreads[1];
+ size_t kernelWorkGroupSize;
+ cl_kernel kernel;
+
+ globalThreads[0] = ((config_opencl->width) * (config_opencl->height))
+ / num_devices;
+ localThreads[0] = 256;
+
+ globalThreads[0] >>= 2;
+
+ for (cl_uint i = 0; i < num_devices; i++)
+ {
+ kernel = kernel_vector[i];
+ ret = clGetKernelWorkGroupInfo(kernel, devices[i],
+ CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &kernelWorkGroupSize,
+ 0);
+ if ((cl_uint) (localThreads[0]) > kernelWorkGroupSize)
+ {
+ localThreads[0] = kernelWorkGroupSize;
+ }
+
+ xpos = 0.0;
+ ypos = 0.0;
+ xsize = 0.1;
+ ysize = 0.1;
+
+ xstep = (xsize / (double) config_opencl->width);
+ ystep = (ysize / (double) config_opencl->height);
+ leftx = (xpos - xsize / 2.0);
+ topy =
+ (ypos + ysize / 2.0
+ - ((double) i * ysize) / (double) num_devices);
+
+ if (i == 0)
+ {
+ topy0 = topy;
+ }
+
+ printf("xsize: %f, ysize: %f, xstep: %f, ystep: %f, leftx: %f, topy: %f\n", xsize, ysize, xstep, ystep, leftx, topy);
+
+ ret = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *) &outputBuffer[i]);
+
+ cl_float leftxF = (float) leftx;
+ cl_float topyF = (float) topy;
+ cl_float xstepF = (float) xstep;
+ cl_float ystepF = (float) ystep;
+
+ switch (config_opencl->fpu)
+ {
+ case OPENCL_FPU_32: // lel
+ ret = clSetKernelArg(kernel, 1, sizeof(cl_float), (void *) &leftxF);
+ ret = clSetKernelArg(kernel, 2, sizeof(cl_float), (void *) &topyF);
+ ret = clSetKernelArg(kernel, 3, sizeof(cl_float), (void *) &xstepF);
+ ret = clSetKernelArg(kernel, 4, sizeof(cl_float), (void *) &ystepF);
+ break;
+ case OPENCL_FPU_64:
+ ret = clSetKernelArg(kernel, 1, sizeof(cl_double), (void *) &leftx);
+ ret = clSetKernelArg(kernel, 2, sizeof(cl_double), (void *) &topy);
+ ret = clSetKernelArg(kernel, 3, sizeof(cl_double), (void *) &xstep);
+ ret = clSetKernelArg(kernel, 4, sizeof(cl_double), (void *) &ystep);
+ break;
+ case OPENCL_FPU_128:
+ break;
+ }
+
+ ret = clSetKernelArg(kernel, 5, sizeof(cl_uint),
+ (void *) &config_opencl->iterations);
+ ret = clSetKernelArg(kernel, 6, sizeof(cl_int),
+ (void *) &config_opencl->width);
+ ret = clEnqueueNDRangeKernel(commandQueue[i], kernel, 1, NULL,
+ globalThreads, localThreads, 0, NULL, &events[i]);
+
+
+ for (cl_uint i = 0; i < num_devices; i++)
+ {
+ ret = clFlush(commandQueue[i]);
+ }
+ for (cl_uint i = 0; i < num_devices; i++)
+ {
+ ret = clWaitForEvents(1, &events[num_devices - i - 1]);
+ ret = clReleaseEvent(events[num_devices - i - 1]);
+ }
+
+ for (cl_uint i = 0; i < num_devices; i++)
+ {
+ ret = clEnqueueReadBuffer(commandQueue[i], outputBuffer[i],
+ CL_FALSE, 0,
+ ((config_opencl->width) * (config_opencl->height)
+ * sizeof(u32)) / num_devices,
+ config_opencl->arr
+ + (((config_opencl->width) * (config_opencl->height) * i)
+ / num_devices), 0,
+ NULL, &events[i]);
+ }
+
+ for (cl_uint i = 0; i < num_devices; i++)
+ {
+ ret = clFlush(commandQueue[i]);
+ }
+
+ for (cl_uint i = 0; i < num_devices; i++)
+ {
+ ret = clWaitForEvents(1, &events[num_devices - i - 1]);
+ ret = clReleaseEvent(events[num_devices - i - 1]);
+ }
+ }
+
+ glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
+ glBindTexture(GL_TEXTURE_2D, config_opencl->tex);
+ glEnable(GL_TEXTURE_2D);
+
+ glBegin(GL_QUADS);
+ glTexCoord2i(0, 0); glVertex2i(0, 0);
+ glTexCoord2i(0, 1); glVertex2i(0, config_opencl->height);
+ glTexCoord2i(1, 1); glVertex2i(config_opencl->width, config_opencl->height);
+ glTexCoord2i(1, 0); glVertex2i(config_opencl->width, 0);
+ glEnd();
+
+ glDisable(GL_TEXTURE_2D);
+ glBindTexture(GL_TEXTURE_2D, 0);
+ glutSwapBuffers();
+}
+
+void idle_opencl(void)
+{
+ static int t_old;
+ int t = 0, delta = 0;
+ do
+ {
+ t = glutGet(GLUT_ELAPSED_TIME);
+ delta = t - t_old;
+ } while (delta < 16); // TODO: Hardcoded FPS
+ t_old = t;
+
+ //glGenTextures(1, &tex);
+ glBindTexture(GL_TEXTURE_2D, config_opencl->tex);
+ //glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
+ //glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
+ glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, config_opencl->width,
+ config_opencl->height, 0, GL_RGBA, GL_UNSIGNED_BYTE, config_opencl->arr);
+ glBindTexture(GL_TEXTURE_2D, 0);
+ cl_ft += (config_opencl->speed * (delta / 1000.0));
+ x_min = x_min_s + config_opencl->zoom_func(cl_ft, (d64) 2.0 + config_opencl->to_x);
+ y_min = y_min_s + config_opencl->zoom_func(cl_ft, (d64) 1.0 + config_opencl->to_y);
+ x_max = x_max_s - config_opencl->zoom_func(cl_ft, (d64) 1.0 - config_opencl->to_x);
+ y_max = y_max_s - config_opencl->zoom_func(cl_ft, (d64) 1.0 - config_opencl->to_y);
+ glutPostRedisplay();
+}
diff --git a/src/render_opencl.h b/src/render_opencl.h
new file mode 100644
index 0000000..b995c4d
--- /dev/null
+++ b/src/render_opencl.h
@@ -0,0 +1,75 @@
+/*
+ * render_opencl.h
+ *
+ * Created on: 25.01.2018
+ * Author: Superleo1810
+ */
+
+#ifndef RENDER_OPENCL_H_
+#define RENDER_OPENCL_H_
+
+#include "defs.h"
+#include <stdlib.h>
+#include <GL/glut.h>
+#include <CL/cl.h>
+#include "sets.h"
+#include <math.h>
+
+#define OPENCL_FPU_32 0
+#define OPENCL_FPU_64 1
+#define OPENCL_FPU_128 2
+
+#define MAX_SOURCE_SIZE 0xFFFF // 64 KiB
+#define MAX_DEVICES 4
+
+typedef struct config_opencl {
+ u8 fpu;
+ u8 fma;
+ GLuint tex;
+ d64 (*zoom_func)(d64, d64);
+ u8 set_func; // id, not pointer!
+ u32 *arr;
+ u32 iterations;
+ u32 colorFrom;
+ u32 colorTo;
+ d64 to_x;
+ d64 to_y;
+ d64 speed;
+ u16 width;
+ u16 height;
+ u8 renderFPS;
+} OpenCLConfig;
+
+OpenCLConfig *config_opencl;
+
+d64 x_min, x_max, y_min, y_max;
+d64 x_min_s, x_max_s, y_min_s, y_max_s;
+
+float cl_ft;
+cl_uint *output;
+cl_device_id device_id;
+cl_context context;
+cl_int ret;
+cl_kernel kernel_vector[MAX_DEVICES];
+cl_uint num_devices;
+cl_device_id *devices;
+cl_command_queue commandQueue[MAX_DEVICES];
+cl_mem outputBuffer[MAX_DEVICES];
+
+double xpos;
+double ypos;
+double xsize;
+double ysize;
+double xstep;
+double ystep;
+double leftx;
+double topy;
+double topy0;
+
+d64 zoom_func(d64 ft, d64 s);
+
+void init_opencl(OpenCLConfig *config);
+void render_opencl(void);
+void idle_opencl(void);
+
+#endif /* RENDER_OPENCL_H_ */
diff --git a/src/sets.c b/src/sets.c
index 2c01987..86c68f4 100644
--- a/src/sets.c
+++ b/src/sets.c
@@ -7,9 +7,9 @@
#include "sets.h"
-u32 mandelbrot_s(long double x, long double y, u32 iterations)
+u32 mandelbrot_s(d64 x, d64 y, u32 iterations)
{
- long double cx = x, cy = y, x2;
+ d64 cx = x, cy = y, x2;
u32 m = 0;
while(m <= iterations && (x*x)+(y*y) <= 4)
{
@@ -21,16 +21,16 @@ u32 mandelbrot_s(long double x, long double y, u32 iterations)
return m;
}
-u32 mandelbrot_r(long double x, long double y, u32 iterations)
+u32 mandelbrot_r(d64 x, d64 y, u32 iterations)
{
return _mandelbrot_r(x, y, 0.0, 0.0, 0, iterations, 4.0);
}
-u32 _mandelbrot_r(long double x, long double y, long double zx, long double zy, u32 n, u32 iterations, long double threshold)
+u32 _mandelbrot_r(d64 x, d64 y, d64 zx, d64 zy, u32 n, u32 iterations, d64 threshold)
{
if ((n < iterations) && ((zx * zx + zy * zy) < threshold)) {
- long double zx_new = (zx * zx - zy * zy + x);
- long double zy_new = (2 * zx * zy + y);
+ d64 zx_new = (zx * zx - zy * zy + x);
+ d64 zy_new = (2 * zx * zy + y);
if ((zx_new == zx) && (zy_new == zy)) {
return iterations;
}
@@ -39,7 +39,7 @@ u32 _mandelbrot_r(long double x, long double y, long double zx, long double zy,
return n;
}
-u32 julia(long double x, long double y, u32 iterations)
+u32 julia(d64 x, d64 y, u32 iterations)
{
// TODO: Julia-Menge
return 0;
diff --git a/src/sets.h b/src/sets.h
index 1ae237c..3dd8227 100644
--- a/src/sets.h
+++ b/src/sets.h
@@ -10,9 +10,17 @@
#include "defs.h"
-u32 mandelbrot_s(long double x, long double y, u32 iterations);
-u32 mandelbrot_r(long double x, long double y, u32 iterations);
-u32 _mandelbrot_r(long double x, long double y, long double zx, long double zy, u32 n, u32 iterations, long double threshold);
-u32 julia(long double x, long double y, u32 iterations);
+#define SFUNC_MANDELBROT_R 0
+#define SFUNC_MANDELBROT_S (~0)
+#define SFUNC_JULIA_R 1
+#define SFUNC_JULIA_S (~1)
+
+#define SFUNC_MANDELBROT SFUNC_MANDELBROT_R
+#define SFUNC_JULIA SFUNC_JULIA_R
+
+u32 mandelbrot_s(d64 x, d64 y, u32 iterations);
+u32 mandelbrot_r(d64 x, d64 y, u32 iterations);
+u32 _mandelbrot_r(d64 x, d64 y, d64 zx, d64 zy, u32 n, u32 iterations, d64 threshold);
+u32 julia(d64 x, d64 y, u32 iterations);
#endif /* SETS_H_ */