kde-workspace/ksmserver/fadeeffect.cpp

574 lines
14 KiB
C++

/*
* Copyright © 2008 Fredrik Höglund <fredrik@kde.org>
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include <QThread>
#include <QWidget>
#include <QPixmap>
#include <QTimer>
#include <QtGui/qx11info_x11.h>
#include <QDebug>
#include <solid/device.h>
#include <solid/processor.h>
#include <X11/Xlib.h>
#include <X11/Xutil.h>
#include <string>
#if defined(__INTEL_COMPILER)
# define HAVE_MMX
# define HAVE_SSE2
#elif defined(__GNUC__)
# if defined(__MMX__)
# define HAVE_MMX
# endif
# if defined(__SSE2__) && __GNUC__ > 3
# define HAVE_SSE2
# endif
#endif
#ifdef HAVE_MMX
# include <mmintrin.h>
#endif
#ifdef HAVE_SSE2
# include <emmintrin.h>
#endif
#include "fadeeffect.h"
#include "moc_fadeeffect.cpp"
#ifndef HAVE_SSE2
static inline void *_mm_malloc(size_t size, int)
{
return malloc(size);
}
static inline void _mm_free(void *p)
{
free(p);
}
#endif
static inline int multiply(int a, int b)
{
int res = a * b + 0x80;
return (res + (res >> 8)) >> 8;
}
static inline void load(const quint32 src, int *r, int *g, int *b)
{
*r = (src >> 16) & 0xff;
*g = (src >> 8) & 0xff;
*b = src & 0xff;
}
static inline void load16(const quint16 src, int *r, int *g, int *b)
{
*r = ((src >> 8) & 0x00f8) | ((src >> 13) & 0x0007);
*g = ((src >> 3) & 0x00fc) | ((src >> 9) & 0x0003);
*b = ((src << 3) & 0x00f8) | ((src >> 2) & 0x0007);
}
static inline quint32 store(const int r, const int g, const int b)
{
return (r << 16) | (g << 8) | b | 0xff000000;
}
static inline quint16 store16(const int r, const int g, const int b)
{
return (((r << 8) | (b >> 3)) & 0xf81f) | ((g << 3) & 0x07e0);
}
static void scanline_blend(const quint32 *over, const quint8 alpha, const quint32 *under,
quint32 *result, uint length)
{
for (uint i = 0; i < length; ++i)
{
int sr, sg, sb, dr, dg, db;
load(over[i], &sr, &sg, &sb);
load(under[i], &dr, &dg, &db);
dr = multiply((sr - dr), alpha) + dr;
dg = multiply((sg - dg), alpha) + dg;
db = multiply((sb - db), alpha) + db;
result[i] = store(dr, dg, db);
}
}
static void scanline_blend_16(const quint16 *over, const quint8 alpha, const quint16 *under,
quint16 *result, uint length)
{
for (uint i = 0; i < length; ++i)
{
int sr, sg, sb, dr, dg, db;
load16(over[i], &sr, &sg, &sb);
load16(under[i], &dr, &dg, &db);
dr = multiply((sr - dr), alpha) + dr;
dg = multiply((sg - dg), alpha) + dg;
db = multiply((sb - db), alpha) + db;
result[i] = store16(dr, dg, db);
}
}
// ----------------------------------------------------------------------------
#ifdef HAVE_MMX
static inline __m64 multiply(const __m64 m1, const __m64 m2)
{
__m64 res = _mm_mullo_pi16(m1, m2);
res = _mm_adds_pi16(res, _mm_set1_pi16 (0x0080));
res = _mm_adds_pi16(res, _mm_srli_pi16 (res, 8));
return _mm_srli_pi16(res, 8);
}
static inline __m64 add(const __m64 m1, const __m64 m2)
{
return _mm_adds_pi16(m1, m2);
}
static inline __m64 load(const quint32 pixel, const __m64 zero)
{
__m64 m = _mm_cvtsi32_si64(pixel);
return _mm_unpacklo_pi8(m, zero);
}
static inline quint32 store(const __m64 pixel, const __m64 zero)
{
__m64 packed = _mm_packs_pu16(pixel, zero);
return _mm_cvtsi64_si32(packed);
}
static void scanline_blend_mmx(const quint32 *over, const quint8 a, const quint32 *under,
quint32 *result, uint length)
{
register const __m64 alpha = _mm_set1_pi16(quint16 (a));
register const __m64 negalpha = _mm_xor_si64(alpha, _mm_set1_pi16 (0x00ff));
register const __m64 zero = _mm_setzero_si64();
for (uint i = 0; i < length; ++i)
{
__m64 src = load(over[i], zero);
__m64 dst = load(under[i], zero);
src = multiply(src, alpha);
dst = multiply(dst, negalpha);
dst = add(src, dst);
result[i] = store(dst, zero);
}
_mm_empty();
}
#endif // HAVE_MMX
// ----------------------------------------------------------------------------
#ifdef HAVE_SSE2
static inline __m128i multiply(const __m128i m1, const __m128i m2)
{
__m128i res = _mm_mullo_epi16(m1, m2);
res = _mm_adds_epi16(res, _mm_set1_epi16 (0x0080));
res = _mm_adds_epi16(res, _mm_srli_epi16 (res, 8));
return _mm_srli_epi16(res, 8);
}
static inline __m128i add(const __m128i m1, const __m128i m2)
{
return _mm_adds_epi16(m1, m2);
}
static inline __m128i lower(__m128i m)
{
return _mm_unpacklo_epi8(m, _mm_setzero_si128 ());
}
static inline __m128i upper(__m128i m)
{
return _mm_unpackhi_epi8(m, _mm_setzero_si128 ());
}
void scanline_blend_sse2(const __m128i *over, const quint8 a, const __m128i *under,
__m128i *result, uint length)
{
length = (length + 15) >> 4;
register const __m128i alpha = _mm_set1_epi16(__uint16_t (a));
register const __m128i negalpha = _mm_xor_si128(alpha, _mm_set1_epi16 (0x00ff));
for (uint i = 0; i < length; i++)
{
__m128i squad = _mm_load_si128(over + i);
__m128i dquad = _mm_load_si128(under + i);
__m128i src1 = lower(squad);
__m128i dst1 = lower(dquad);
__m128i src2 = upper(squad);
__m128i dst2 = upper(dquad);
squad = add(multiply(src1, alpha), multiply(dst1, negalpha));
dquad = add(multiply(src2, alpha), multiply(dst2, negalpha));
dquad = _mm_packus_epi16(squad, dquad);
_mm_store_si128(result + i, dquad);
}
}
#endif // HAVE_SSE2
// ----------------------------------------------------------------------------
class BlendingThread : public QThread
{
public:
BlendingThread(QObject *parent);
~BlendingThread();
void setImage(XImage *image);
void setAlpha(int alpha) { m_alpha = alpha; }
private:
void toGray16(quint8 *data);
void toGray32(quint8 *data);
void blend16();
void blend32();
void blend32_mmx();
void blend32_sse2();
protected:
void run();
private:
bool have_mmx;
bool have_sse2;
int m_alpha;
XImage *m_image;
quint8 *m_original;
quint8 *m_final;
};
BlendingThread::BlendingThread(QObject *parent)
: QThread(parent)
{
// Check if the CPU supports MMX and SSE2.
// We only check the first CPU on an SMP system, and assume all CPU's support the same features.
QList<Solid::Device> list = Solid::Device::listFromType(Solid::DeviceInterface::Processor, QString());
if (list.size() > 0)
{
Solid::Processor::InstructionSets features = list[0].as<Solid::Processor>()->instructionSets();
have_mmx = features & Solid::Processor::IntelMmx;
have_sse2 = features & Solid::Processor::IntelSse2;
}
else
{
// Can happen if e.g. there is no usable backend for Solid. Err on the side of caution.
// (c.f. bug:163112)
have_mmx = false;
have_sse2 = false;
}
m_final = NULL;
m_original = NULL;
}
BlendingThread::~BlendingThread()
{
_mm_free(m_final);
_mm_free(m_original);
}
void BlendingThread::setImage(XImage *image)
{
m_image = image;
int size = m_image->bytes_per_line * m_image->height;
// We need the data to be aligned on a 128 bit (16 byte) boundary for SSE2
m_original = (quint8*) _mm_malloc(size, 16);
m_final = (quint8*) _mm_malloc(size, 16);
memcpy((void*)m_original, (const void*)m_image->data, size);
memcpy((void*)m_final, (const void*)m_image->data, size);
if (m_image->depth > 16) {
// Make sure that the alpha channel is initialized to 0xff
for (int y = 0; y < image->height; y++) {
quint32 *pixels = (quint32*)(m_original + (m_image->bytes_per_line * y));
for (int x = 0; x < image->width; x++)
pixels[x] |= 0xff000000;
}
}
if (m_image->depth != 16)
toGray32(m_final);
else
toGray16(m_final);
}
void BlendingThread::toGray16(quint8 *data)
{
for (int y = 0; y < m_image->height; y++)
{
quint16 *pixels = (quint16*)(data + (m_image->bytes_per_line * y));
for (int x = 0; x < m_image->width; x++)
{
int red, green, blue;
load16(pixels[x], &red, &green, &blue);
// Make sure the 3 least significant bits are 0, so the red, green and blue
// channels really have the same value when packed in a 5/6/5 representation.
int val = int(red * .299 + green * .587 + blue * .114) & 0xf8;
pixels[x] = store16(val, val, val);
}
}
}
void BlendingThread::toGray32(quint8 *data)
{
for (int y = 0; y < m_image->height; y++)
{
quint32 *pixels = (quint32*)(data + (m_image->bytes_per_line * y));
for (int x = 0; x < m_image->width; x++)
{
int red, green, blue;
load(pixels[x], &red, &green, &blue);
int val = int(red * .299 + green * .587 + blue * .114);
pixels[x] = store(val, val, val);
}
}
}
void BlendingThread::blend16()
{
for (int y = 0; y < m_image->height; y++)
{
uint start = m_image->bytes_per_line * y;
quint16 *over = (quint16*)(m_original + start);
quint16 *under = (quint16*)(m_final + start);
quint16 *result = (quint16*)(m_image->data + start);
scanline_blend_16(over, m_alpha, under, result, m_image->width);
}
}
void BlendingThread::blend32()
{
for (int y = 0; y < m_image->height; y++)
{
int start = m_image->bytes_per_line * y;
quint32 *over = (quint32*)(m_original + start);
quint32 *under = (quint32*)(m_final + start);
quint32 *result = (quint32*)(m_image->data + start);
scanline_blend(over, m_alpha, under, result, m_image->width);
}
}
void BlendingThread::blend32_mmx()
{
#ifdef HAVE_MMX
for (int y = 0; y < m_image->height; y++)
{
int start = m_image->bytes_per_line * y;
quint32 *over = (quint32*)(m_original + start);
quint32 *under = (quint32*)(m_final + start);
quint32 *result = (quint32*)(m_image->data + start);
scanline_blend_mmx(over, m_alpha, under, result, m_image->width);
}
#endif
}
void BlendingThread::blend32_sse2()
{
#ifdef HAVE_SSE2
uint length = m_image->bytes_per_line * m_image->height;
__m128i *over = (__m128i*)(m_original);
__m128i *under = (__m128i*)(m_final);
__m128i *result = (__m128i*)(m_image->data);
scanline_blend_sse2(over, m_alpha, under, result, length);
#endif
}
void BlendingThread::run()
{
if (m_image->depth != 16)
{
#ifdef HAVE_SSE2
if (have_sse2)
blend32_sse2();
else
#endif
#ifdef HAVE_MMX
if (have_mmx)
blend32_mmx();
else
#endif
blend32();
}
else
blend16();
}
// ----------------------------------------------------------------------------
FadeEffect::FadeEffect(QWidget *parent, QPixmap *pixmap)
: LogoutEffect(parent, pixmap), blender(NULL)
{
Display *dpy = parent->x11Info().display();
image = XCreateImage(dpy, (Visual*)pixmap->x11Info().visual(), pixmap->depth(),
ZPixmap, 0, NULL, pixmap->width(), pixmap->height(), 32, 0);
// Allocate the image data on 16 byte boundary for SSE2
image->data = (char*)_mm_malloc(image->bytes_per_line * image->height, 16);
gc = XCreateGC(dpy, pixmap->handle(), 0, NULL);
blender = new BlendingThread(this);
currentY = 0;
}
FadeEffect::~FadeEffect()
{
blender->wait();
_mm_free(image->data);
image->data = NULL;
XDestroyImage(image);
XFreeGC(QX11Info::display(), gc);
}
void FadeEffect::start()
{
done = false;
alpha = 255;
// Start by grabbing the screenshot
grabImageSection();
}
void FadeEffect::grabImageSection()
{
const int sectionHeight = 64;
int h = (currentY + sectionHeight > image->height) ? image->height - currentY : sectionHeight;
XGetSubImage(QX11Info::display(), QX11Info::appRootWindow(), 0, currentY, image->width, h,
AllPlanes, ZPixmap, image, 0, currentY);
// Continue until we have the whole image
currentY += sectionHeight;
if (currentY < image->height)
{
QTimer::singleShot(1, this, SLOT(grabImageSection()));
return;
}
// Let the owner know we're done.
emit initialized();
// Start the fade effect
blender->setImage(image);
blender->setAlpha(alpha);
blender->start();
time.start();
QTimer::singleShot(10, this, SLOT(nextFrame()));
}
void FadeEffect::nextFrame()
{
const qreal runTime = 2000; // milliseconds
if (!blender->isFinished())
{
QTimer::singleShot(10, this, SLOT(nextFrame()));
return;
}
XPutImage(QX11Info::display(), pixmap->handle(), gc, image, 0, 0, 0, 0, image->width, image->height);
parent->update();
alpha = qRound(qMax(255. - (255. * (qreal(time.elapsed() / runTime))), 0.0));
if (!done)
{
blender->setAlpha(alpha);
blender->start();
// Make sure we don't send frames faster than the X server can process them
XSync(QX11Info::display(), False);
QTimer::singleShot(1, this, SLOT(nextFrame()));
}
if (alpha == 0)
done = true;
}