kde-extraapps/kdeplasma-addons/wallpapers/mandelbrot/render_impl.cpp
2015-01-15 17:07:43 +00:00

416 lines
16 KiB
C++

// Copyright 2008-2010 by Benoît Jacob <jacob.benoit.1@gmail.com>
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License as
// published by the Free Software Foundation; either version 2 of
// the License or (at your option) version 3 or any later version
// accepted by the membership of KDE e.V. (or its successor approved
// by the membership of KDE e.V.), which shall act as a proxy
// defined in Section 14 of version 3 of the license.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
#include "mandelbrot.h"
#include "mix.h"
#include <Eigen/Array>
#include <limits>
#ifdef THIS_PATH_WITH_SSE2_EXPLICTLY_ENABLED
namespace with_SSE2_explicitly_enabled_if_x86 {
#else
namespace with_arch_defaults {
#endif
/* The number of iterations to do before testing for divergence. The higher, the faster. However,
* at each iteration we square z, and z may initially have modulus up to 2. So we're very quickly hitting the
* exponent limit of floating point numbers. Taking into account that we want this number to be a multiple of 4,
* as we are going to peel the inner loop by 4, the values below are the largest possible.
* As they are so small, we make them compile-time constants to allow to compiler to unroll if it decides to.
*/
template<typename T> struct iter_before_test { enum { ret = 4 }; };
template<> struct iter_before_test<double> { enum { ret = 8 }; };
template<typename Real>
struct mandelbrot_render_tile_impl
{
// number of reals in a SIMD packet.
// Examples:
// * no vectorization: then packet_size == 1 always
// * SSE or AltiVec: then packet_size is 4 if Real==float and is 2 if Real==double
enum { packet_size = Eigen::ei_packet_traits<Real>::size };
// with Eigen, if we use vectors of the right size, operations on them will be compiled
// as operations on SIMD packets
typedef Eigen::Matrix<Real, packet_size, 1> Packet;
typedef Eigen::Matrix<int, packet_size, 1> Packeti;
// normally we wouldn't need to do that, but since some compilers can miss optimizations,
// in the most critical code, let's play directly with SIMD registers. It's completely equivalent
// to Eigen vectors of the right size, just less subtle for the compiler to get right.
typedef typename Eigen::ei_packet_traits<Real>::type LowlevelPacket;
// how many iterations we do before testing for divergence. See comment above for struct iter_before_test.
// must be a multiple of 4 as we'll peel the inner loop by 4
enum { iter_before_test = iter_before_test<Real>::ret };
Real resolution;
int supersampling;
int max_iter;
float log_max_iter;
float tmin;
float log_of_2;
float log_of_2log2;
Real square_bailout_radius, log_log_square_bailout_radius;
Color3 rgb1, rgb2, rgb3, hsv1, hsv2, hsv3;
Mandelbrot *mandelbrot;
QImage *image;
const MandelbrotTile& tile;
bool found_exterior_point;
mandelbrot_render_tile_impl(Mandelbrot *m, const MandelbrotTile& t)
: mandelbrot(m), tile(t) { init(); }
void init();
void computePacket(int x, int y, Color3 *pixels);
};
template<typename Real>
void mandelbrot_render_tile_impl<Real>::init()
{
// remember if we already rendered some exterior point. will be useful for the check for fully interior tiles
found_exterior_point = false;
// the supersampling factor.
supersampling = mandelbrot->supersampling();
// the resolution, i.e. the distance in the complex plane between adjacent pixels.
resolution = Real(mandelbrot->resolution()) / supersampling;
/***************
Henceforth, we can completely forget about supersampling -- it's implicit.
Supersampling only means that 'image' is bigger and 'resolution' is smaller than they would otherwise be,
but from now on we don't need to care.
***************/
// how many iterations we do on each sample before declaring that it doesn't diverge
max_iter = mandelbrot->maxIter();
// precompute some constants that will be used in the coloring computations
log_max_iter = std::log(float(max_iter));
if(mandelbrot->min_iter_divergence() != 0 && mandelbrot->min_iter_divergence() != max_iter)
{
tmin = std::log(float(mandelbrot->min_iter_divergence())) / log_max_iter;
}
else tmin = 0.f;
square_bailout_radius = 20; // value found in papers on continuous escape time formulas, changing it can degrade the smoothness.
log_log_square_bailout_radius = std::log(std::log(square_bailout_radius));
log_of_2 = std::log(2.f);
log_of_2log2 = std::log(2.f*log_of_2);
mandelbrot->color1().getRgbF(&rgb1[0], &rgb1[1], &rgb1[2]);
mandelbrot->color1().getHsvF(&hsv1[0], &hsv1[1], &hsv1[2]);
mandelbrot->color2().getRgbF(&rgb2[0], &rgb2[1], &rgb2[2]);
mandelbrot->color2().getHsvF(&hsv2[0], &hsv2[1], &hsv2[2]);
mandelbrot->color3().getRgbF(&rgb3[0], &rgb3[1], &rgb3[2]);
mandelbrot->color3().getHsvF(&hsv3[0], &hsv3[1], &hsv3[2]);
}
template<typename Real>
void mandelbrot_render_tile_impl<Real>::computePacket(int x, int y, Color3 *pixels)
{
// for each pixel, we're going to do the iteration z := z^2 + c where z and c are complex numbers,
// starting with z = c = complex coord of the pixel. pzr and pzi denote the real and imaginary parts of z.
// pcr and pci denote the real and imaginary parts of c.
Packet pcr, pci, pzr, pzi, pzr_buf;
for(int i = 0; i < packet_size; i++) {
pzi[i] = pci[i] = tile.affix().y() + y * resolution;
pzr[i] = pcr[i] = tile.affix().x() + (x+i) * resolution;
}
/* first step: do iterations by batches of 'iter_before_test'. Testing only every 'iter_before_test'
* iterations allows to go faster.
*/
int j = 0;
Packet pzabs2;
Packeti pixel_iter = Packeti::Zero(), // number of iteration per pixel in the packet
pixel_diverge = Packeti::Zero(); // whether or not each pixel has already diverged
int count_not_yet_diverged = packet_size;
Packet pzr_previous, pzi_previous,
pzr_before_diverge = Packet::Zero(), pzi_before_diverge = Packet::Zero();
do
{
pzr_previous = pzr;
pzi_previous = pzi;
/* perform iter_before_test iterations */
for(int i = 0; i < iter_before_test/4; i++) // we peel the inner loop by 4
{
LowlevelPacket lpzr, lpzi;
for(int repeat = 0; repeat < 4; repeat++)
{
lpzr = Eigen::ei_pload(pzr.data());
lpzi = Eigen::ei_pload(pzi.data());
Eigen::ei_pstore(pzr.data(),
Eigen::ei_padd(
Eigen::ei_psub(
Eigen::ei_pmul(lpzr,lpzr),
Eigen::ei_pmul(lpzi,lpzi)
),
Eigen::ei_pload(pcr.data())
)
);
Eigen::ei_pstore(pzi.data(),
Eigen::ei_padd(
Eigen::ei_pmul(
Eigen::ei_padd(lpzr,lpzr),
lpzi
),
Eigen::ei_pload(pci.data())
)
);
}
}
/* test for divergence */
pzabs2 = pzr.cwise().square();
pzabs2 += pzi.cwise().square();
for(int i = 0; i < packet_size; i++) {
if(!(pixel_diverge[i])) {
if(pzabs2[i] > square_bailout_radius) {
pixel_diverge[i] = 1;
pzr_before_diverge[i] = pzr_previous[i];
pzi_before_diverge[i] = pzi_previous[i];
count_not_yet_diverged--;
}
else pixel_iter[i] += iter_before_test;
}
}
j += iter_before_test;
}
while(j < max_iter && count_not_yet_diverged);
/* Second step: we know the iteration count before divergence for each pixel but only up to precision
* 'iter_before_test'. We now want to get the exact iteration count before divergence,
* so we iterate again starting from where we were before divergence, and now we test at every iteration.
*/
j = 0;
pzr = pzr_before_diverge;
pzi = pzi_before_diverge;
pixel_diverge = Packeti::Zero();
count_not_yet_diverged = packet_size;
typedef Eigen::Matrix<float,packet_size,1> Packet_to_float;
Packet_to_float square_escape_modulus = Packet_to_float::Zero();
do
{
pzr_buf = pzr;
pzr = pzr.cwise().square();
pzr -= pzi.cwise().square();
pzr += pcr;
pzi = (2*pzr_buf).cwise()*pzi;
pzi += pci;
pzabs2 = pzr.cwise().square();
pzabs2 += pzi.cwise().square();
for(int i = 0; i < packet_size; i++) {
if(!(pixel_diverge[i])) {
if(pzabs2[i] > square_bailout_radius) {
pixel_diverge[i] = 1;
square_escape_modulus[i] = (float)pzabs2[i];
count_not_yet_diverged--;
}
else pixel_iter[i]++;
}
}
j++;
}
while(j < iter_before_test && count_not_yet_diverged);
if(count_not_yet_diverged < packet_size) found_exterior_point = true;
/* Now we know exactly the number of iterations before divergence, and the escape modulus. */
/* Third step: compute pixel colors. */
for(int i = 0; i < packet_size; i++)
{
Real log_log_escape_modulus = Real(0);
if(square_escape_modulus[i] > Real(1))
{
Real log_escape_modulus = std::log(square_escape_modulus[i]);
if(log_escape_modulus > Real(1))
log_log_escape_modulus = std::log(log_escape_modulus);
}
Real normalized_iter_count = pixel_iter[i] + (log_log_square_bailout_radius - log_log_escape_modulus) / log_of_2;
Real log_normalized_iter_count = (normalized_iter_count > Real(1)) ? std::log(normalized_iter_count) : Real(0);
Real t = log_normalized_iter_count / log_max_iter;
// Now, remember that we did a little statistical analysis on some samples
// to determine roughly what would be the smallest count of iterations before divergence.
// At the beginning of the present function, we used that to compute 'tmin'.
// Now we use it to make the gradient actually start at t=tmin. Lower values of t just give black.
// In other words, we are ensuring that no matter what the viewpoint, the darkest part of the image
// will always be black (at least if our statistical analysis went well).
// In other words, we are trying to get optimal contrast. This is important as otherwise there could be
// almost no contrast at all and an interesting viewpoint could give an almost blank image.
t = (t-tmin)/(1.f-tmin);
t = CLAMP(t, Real(0), Real(1));
float threshold1 = 0.09f;
float threshold2 = 0.3f;
if(t < threshold1) {
pixels[i] = (t/threshold1)*rgb3;
}
else if(t < threshold2) {
pixels[i] = mix(rgb2, hsv2, rgb3, hsv3, (t - threshold1) / (threshold2 - threshold1));
}
else {
pixels[i] = mix(rgb1, hsv1, rgb2, hsv2, (t - threshold2) / (1.f - threshold2));
}
}
}
/** This is the main rendering function. It renders only the current tile of the Mandelbrot wallpaper.
*
* The image parameter is the image to render the tile to. The size of this image determines how many pixels we render.
* So to get supersampled rendering, just pass a larger image here, and then scale it down to the real size using smooth
* scaling.
*
* This function only writes to 'image', it does not at all write to mandelbrot->image() directly. The mandelbrot parameter
* is only used to determine the parameters of the rendering such as viewpoint, colors, etc.
*/
template<typename Real> void mandelbrot_render_tile(
Mandelbrot *mandelbrot,
const MandelbrotTile& tile
)
{
// if we're compiling the path with SSE2 explicitly enabled, since it's only used on x86 (and not even on x86-64 since it has SSE2 by
// default), let's only compile the code if it's going to be used!
#if defined(HAVE_PATH_WITH_SSE2_EXPLICTLY_ENABLED) || !defined(THIS_PATH_WITH_SSE2_EXPLICTLY_ENABLED)
enum { packet_size = Eigen::ei_packet_traits<Real>::size };
Color3 dummy_buffer[packet_size];
mandelbrot_render_tile_impl<Real> renderer(mandelbrot, tile);
int supersampling = renderer.supersampling;
int supersampled_packet_size = supersampling * packet_size;
int tile_x = tile.destination().x();
int tile_y = tile.destination().y();
int tile_width = tile.destination().width();
int tile_height = tile.destination().height();
int supersampled_tile_width = supersampling * tile_width;
int supersampled_tile_height = supersampling * tile_height;
// first render a part of the border to check if the tile is probably entirely inside the interior of the Mandelbrot set
for(int y = 1; y < supersampled_tile_height-1; y+=4) // render every 4th pixel on the border
{
renderer.computePacket(0,y,dummy_buffer);
renderer.computePacket(supersampled_tile_width - packet_size, y, dummy_buffer);
// abort if required
if(mandelbrot->abortRenderingAsSoonAsPossible()) return;
}
for(int x = 0; x < supersampled_tile_width; x+= 4*packet_size)
{
renderer.computePacket(x,0,dummy_buffer);
renderer.computePacket(x,supersampled_tile_height-1,dummy_buffer);
// abort if required
if(mandelbrot->abortRenderingAsSoonAsPossible()) return;
}
// render the bottom-right packet: due to our rendering only every 4-th packet on the border, we could be
// missing this corner of the tile.
renderer.computePacket(supersampled_tile_width-packet_size,supersampled_tile_height-1,dummy_buffer);
// now, if the tile looks like it's entirely inside the interior, just assume that's the case,
// so fill it (not using a QPainter so as to avoid having to lock a mutex in case the image is being accessed
// by another QPainter in the GUI thread) and return.
if(!(renderer.found_exterior_point))
{
for(int y = 0; y < tile_height; y++)
{
for(int x = 0; x < tile_width; x++)
{
unsigned char *pixel
= const_cast<unsigned char*>(
static_cast<const QImage *>(mandelbrot->image())->scanLine(tile_y+y)
) + 4*(tile_x+x);
pixel[0] = mandelbrot->color1().blue();
pixel[1] = mandelbrot->color1().green();
pixel[2] = mandelbrot->color1().red();
pixel[3] = 255;
}
}
return;
}
// ok now do the actual rendering. not much point trying to reuse the part of the border we've already rendered,
// it's few pixels and it would take some nontrivial code.
qreal one_over_supersampling_squared = qreal(1) / (supersampling*supersampling);
for(int y = 0; y < tile_height; y++)
{
for(int x = 0; x < tile_width; x += packet_size)
{
Color3 supersampled_buffer[MAX_SUPERSAMPLING][packet_size * MAX_SUPERSAMPLING];
for(int y2 = 0; y2 < supersampling; y2++)
{
for(int x2 = 0; x2 < supersampled_packet_size; x2 += packet_size)
{
renderer.computePacket(supersampling*x+x2,supersampling*y+y2,&supersampled_buffer[y2][x2]);
// abort if required
if(mandelbrot->abortRenderingAsSoonAsPossible()) return;
}
}
int pixels_to_write = std::min(int(packet_size), tile_width-x);
for(int i = 0; i < pixels_to_write; i++)
{
Color3 color = Color3::Zero();
for(int y2 = 0; y2 < supersampling; y2++)
{
for(int x2 = 0; x2 < supersampling; x2++)
{
color += supersampled_buffer[y2][x2 + i*supersampling];
}
}
color *= one_over_supersampling_squared;
unsigned char *pixel
= const_cast<unsigned char*>(
static_cast<const QImage *>(mandelbrot->image())->scanLine(tile_y+y)
) + 4*(tile_x+x+i);
pixel[0] = qreal_to_uchar_color_channel(color[2]);
pixel[1] = qreal_to_uchar_color_channel(color[1]);
pixel[2] = qreal_to_uchar_color_channel(color[0]);
pixel[3] = 255;
}
}
}
#else
Q_UNUSED(mandelbrot);
Q_UNUSED(tile);
#endif
}
template void mandelbrot_render_tile<float>(
Mandelbrot *mandelbrot,
const MandelbrotTile& tile
);
template void mandelbrot_render_tile<double>(
Mandelbrot *mandelbrot,
const MandelbrotTile& tile
);
}