This commit is contained in:
Ivailo Monev 2016-08-10 03:22:04 +00:00
commit 113c57c95b
35 changed files with 11 additions and 7971 deletions

View file

@ -149,42 +149,6 @@ macro(KATIE_SETUP_OBJECT FORTARGET)
endif()
endmacro()
function(KATIE_SETUP_SOURCES SOURCESVAR)
set(compilesources)
foreach(source ${ARGN})
get_filename_component(sourcename ${source} NAME)
set(compileflags)
# TODO: sse4.1 and sse4.2 support, currently not needed
foreach(flag 3dnow avx mmx sse sse2 sse3 ssse3 iwmmxt neon)
string(REGEX MATCH "${flag}" flagmatch ${sourcename})
string(TOUPPER "${flag}" upperflag)
if("${flagmatch}" MATCHES "(iwmmxt|neon)" AND NOT "${KATIE_ARCHITECTURE}" STREQUAL "arm")
set(flagmatch)
katie_warning("The source file ${source} is ARM specifiec, make it conditional")
endif()
if("${flagmatch}" STREQUAL "mmx" AND "${sourcename}" MATCHES "iwmmxt")
# false positive
set(flagmatch)
endif()
if("${flagmatch}" STREQUAL "neon" AND KATIE_${upperflag}_RESULT)
set(compileflags "${compileflags} -mfpu=neon")
elseif("${flagmatch}" STREQUAL "iwmmxt" AND KATIE_${upperflag}_RESULT)
set(compileflags "${compileflags} -mcpu=iwmmxt")
elseif(flagmatch AND KATIE_${upperflag}_RESULT)
set(compileflags "${compileflags} -m${flag}")
endif()
endforeach()
if(compileflags)
# message(STATUS "Setting up compile flags for: ${source} to: ${compileflags}")
set_source_files_properties(${source} PROPERTIES COMPILE_FLAGS "${compileflags}")
set(compilesources ${compilesources} ${source})
endif()
endforeach()
if(compilesources)
set(${SOURCESVAR} ${${SOURCESVAR}} ${compilesources} PARENT_SCOPE)
endif()
endfunction()
# a function to change full installation paths to relative so that CPack
# generators do not choke, still paths must contain a string of some sort - if
# they are null even quoting them will not help and CMake will complain that

View file

@ -1,415 +0,0 @@
/****************************************************************************
**
** Copyright (C) 2015 The Qt Company Ltd.
** Contact: http://www.qt.io/licensing/
**
** This file is part of the QtCore module of the Qt Toolkit.
**
** $QT_BEGIN_LICENSE:LGPL$
** Commercial License Usage
** Licensees holding valid commercial Qt licenses may use this file in
** accordance with the commercial license agreement provided with the
** Software or, alternatively, in accordance with the terms contained in
** a written agreement between you and The Qt Company. For licensing terms
** and conditions see http://www.qt.io/terms-conditions. For further
** information use the contact form at http://www.qt.io/contact-us.
**
** GNU Lesser General Public License Usage
** Alternatively, this file may be used under the terms of the GNU Lesser
** General Public License version 2.1 or version 3 as published by the Free
** Software Foundation and appearing in the file LICENSE.LGPLv21 and
** LICENSE.LGPLv3 included in the packaging of this file. Please review the
** following information to ensure the GNU Lesser General Public License
** requirements will be met: https://www.gnu.org/licenses/lgpl.html and
** http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
**
** As a special exception, The Qt Company gives you certain additional
** rights. These rights are described in The Qt Company LGPL Exception
** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
**
** GNU General Public License Usage
** Alternatively, this file may be used under the terms of the GNU
** General Public License version 3.0 as published by the Free Software
** Foundation and appearing in the file LICENSE.GPL included in the
** packaging of this file. Please review the following information to
** ensure the GNU General Public License version 3.0 requirements will be
** met: http://www.gnu.org/copyleft/gpl.html.
**
** $QT_END_LICENSE$
**
****************************************************************************/
#include "qsimd_p.h"
#include <QByteArray>
#include <stdio.h>
#if defined(Q_OS_WINCE)
#include <windows.h>
#endif
#if defined(Q_OS_WIN64)
#include <intrin.h>
#endif
#if defined(Q_OS_LINUX) && defined(__arm__)
#include "qcore_unix_p.h"
// the kernel header definitions for HWCAP_*
// (the ones we need/may need anyway)
// copied from <asm/hwcap.h> (ARM)
#define HWCAP_IWMMXT 512
#define HWCAP_CRUNCH 1024
#define HWCAP_THUMBEE 2048
#define HWCAP_NEON 4096
#define HWCAP_VFPv3 8192
#define HWCAP_VFPv3D16 16384
// copied from <linux/auxvec.h>
#define AT_HWCAP 16 /* arch dependent hints at CPU capabilities */
#endif
QT_BEGIN_NAMESPACE
#if defined (Q_OS_NACL)
static inline uint detectProcessorFeatures()
{
return 0;
}
#elif defined (Q_OS_WINCE)
static inline uint detectProcessorFeatures()
{
uint features = 0;
#if defined (ARM)
if (IsProcessorFeaturePresent(PF_ARM_INTEL_WMMX)) {
features = IWMMXT;
return features;
}
#elif defined(_X86_)
features = 0;
#if defined QT_HAVE_MMX
if (IsProcessorFeaturePresent(PF_MMX_INSTRUCTIONS_AVAILABLE))
features |= MMX;
#endif
#if defined QT_HAVE_3DNOW
if (IsProcessorFeaturePresent(PF_3DNOW_INSTRUCTIONS_AVAILABLE))
features |= MMX3DNOW;
#endif
return features;
#endif
features = 0;
return features;
}
#elif defined(__arm__) || defined(__arm) || defined(QT_HAVE_IWMMXT) || defined(QT_HAVE_NEON)
static inline uint detectProcessorFeatures()
{
uint features = 0;
#if defined(Q_OS_LINUX)
int auxv = ::qt_safe_open("/proc/self/auxv", O_RDONLY);
if (auxv != -1) {
unsigned long vector[64];
int nread;
while (features == 0) {
nread = ::qt_safe_read(auxv, (char *)vector, sizeof vector);
if (nread <= 0) {
// EOF or error
break;
}
int max = nread / (sizeof vector[0]);
for (int i = 0; i < max; i += 2)
if (vector[i] == AT_HWCAP) {
if (vector[i+1] & HWCAP_IWMMXT)
features |= IWMMXT;
if (vector[i+1] & HWCAP_NEON)
features |= NEON;
break;
}
}
::qt_safe_close(auxv);
return features;
}
// fall back if /proc/self/auxv wasn't found
#endif
#if defined(QT_HAVE_IWMMXT)
// runtime detection only available when running as a previlegied process
features = IWMMXT;
#elif defined(QT_ALWAYS_HAVE_NEON)
features = NEON;
#endif
return features;
}
#elif defined(__i386__) || defined(_M_IX86)
static inline uint detectProcessorFeatures()
{
uint features = 0;
unsigned int extended_result = 0;
unsigned int feature_result = 0;
uint result = 0;
/* see p. 118 of amd64 instruction set manual Vol3 */
#if defined(Q_CC_GNU)
long cpuid_supported, tmp1;
asm ("pushf\n"
"pop %0\n"
"mov %0, %1\n"
"xor $0x00200000, %0\n"
"push %0\n"
"popf\n"
"pushf\n"
"pop %0\n"
"xor %1, %0\n" // %eax is now 0 if CPUID is not supported
: "=a" (cpuid_supported), "=r" (tmp1)
);
if (cpuid_supported) {
asm ("xchg %%ebx, %2\n"
"cpuid\n"
"xchg %%ebx, %2\n"
: "=c" (feature_result), "=d" (result), "=&r" (tmp1)
: "a" (1));
asm ("xchg %%ebx, %1\n"
"cpuid\n"
"cmp $0x80000000, %%eax\n"
"jnbe 1f\n"
"xor %0, %0\n"
"jmp 2f\n"
"1:\n"
"mov $0x80000001, %%eax\n"
"cpuid\n"
"2:\n"
"xchg %%ebx, %1\n"
: "=d" (extended_result), "=&r" (tmp1)
: "a" (0x80000000)
: "%ecx"
);
}
#elif defined (Q_OS_WIN)
_asm {
push eax
push ebx
push ecx
push edx
pushfd
pop eax
mov ebx, eax
xor eax, 00200000h
push eax
popfd
pushfd
pop eax
mov edx, 0
xor eax, ebx
jz skip
mov eax, 1
cpuid
mov result, edx
mov feature_result, ecx
skip:
pop edx
pop ecx
pop ebx
pop eax
}
_asm {
push eax
push ebx
push ecx
push edx
pushfd
pop eax
mov ebx, eax
xor eax, 00200000h
push eax
popfd
pushfd
pop eax
mov edx, 0
xor eax, ebx
jz skip2
mov eax, 80000000h
cpuid
cmp eax, 80000000h
jbe skip2
mov eax, 80000001h
cpuid
mov extended_result, edx
skip2:
pop edx
pop ecx
pop ebx
pop eax
}
#endif
// result now contains the standard feature bits
if (result & (1u << 15))
features |= CMOV;
if (result & (1u << 23))
features |= MMX;
if (extended_result & (1u << 22))
features |= MMXEXT;
if (extended_result & (1u << 31))
features |= MMX3DNOW;
if (extended_result & (1u << 30))
features |= MMX3DNOWEXT;
if (result & (1u << 25))
features |= SSE;
if (result & (1u << 26))
features |= SSE2;
if (feature_result & (1u))
features |= SSE3;
if (feature_result & (1u << 9))
features |= SSSE3;
if (feature_result & (1u << 19))
features |= SSE4_1;
if (feature_result & (1u << 20))
features |= SSE4_2;
if (feature_result & (1u << 28))
features |= AVX;
return features;
}
#elif defined(__x86_64) || defined(Q_OS_WIN64)
static inline uint detectProcessorFeatures()
{
uint features = MMX|SSE|SSE2|CMOV;
uint feature_result = 0;
#if defined(Q_CC_GNU)
quint64 tmp;
asm ("xchg %%rbx, %1\n"
"cpuid\n"
"xchg %%rbx, %1\n"
: "=c" (feature_result), "=&r" (tmp)
: "a" (1)
: "%edx"
);
#elif defined (Q_OS_WIN64)
{
int info[4];
__cpuid(info, 1);
feature_result = info[2];
}
#endif
if (feature_result & (1u))
features |= SSE3;
if (feature_result & (1u << 9))
features |= SSSE3;
if (feature_result & (1u << 19))
features |= SSE4_1;
if (feature_result & (1u << 20))
features |= SSE4_2;
if (feature_result & (1u << 28))
features |= AVX;
return features;
}
#elif defined(__ia64__)
static inline uint detectProcessorFeatures()
{
return MMX|SSE|SSE2;
}
#else
static inline uint detectProcessorFeatures()
{
return 0;
}
#endif
/*
* Use kdesdk/scripts/generate_string_table.pl to update the table below.
* Here's the data (don't forget the ONE leading space):
mmx
mmxext
mmx3dnow
mmx3dnowext
sse
sse2
cmov
iwmmxt
neon
sse3
ssse3
sse4.1
sse4.2
avx
*/
// begin generated
static const char features_string[] =
" mmx\0"
" mmxext\0"
" mmx3dnow\0"
" mmx3dnowext\0"
" sse\0"
" sse2\0"
" cmov\0"
" iwmmxt\0"
" neon\0"
" sse3\0"
" ssse3\0"
" sse4.1\0"
" sse4.2\0"
" avx\0"
"\0";
static const int features_indices[] = {
0, 5, 13, 23, 36, 41, 47, 53,
61, 67, 73, 80, 88, 96, -1
};
// end generated
const int features_count = (sizeof features_indices - 1) / (sizeof features_indices[0]);
uint qDetectCPUFeatures()
{
static QBasicAtomicInt features = Q_BASIC_ATOMIC_INITIALIZER(-1);
if (features != -1)
return features;
uint f = detectProcessorFeatures();
QByteArray disable = qgetenv("QT_NO_CPU_FEATURE");
if (disable == "all") {
f = 0;
} else if (!disable.isEmpty()) {
disable.prepend(' ');
for (int i = 0; i < features_count; ++i) {
if (disable.contains(features_string + features_indices[i]))
f &= ~(1 << i);
}
}
features = f;
return features;
}
void qDumpCPUFeatures()
{
uint features = qDetectCPUFeatures();
printf("Processor features: ");
for (int i = 0; i < features_count; ++i) {
if (features & (1 << i))
printf("%s", features_string + features_indices[i]);
}
puts("");
}
QT_END_NAMESPACE

View file

@ -1,237 +0,0 @@
/****************************************************************************
**
** Copyright (C) 2015 The Qt Company Ltd.
** Contact: http://www.qt.io/licensing/
**
** This file is part of the QtCore module of the Qt Toolkit.
**
** $QT_BEGIN_LICENSE:LGPL$
** Commercial License Usage
** Licensees holding valid commercial Qt licenses may use this file in
** accordance with the commercial license agreement provided with the
** Software or, alternatively, in accordance with the terms contained in
** a written agreement between you and The Qt Company. For licensing terms
** and conditions see http://www.qt.io/terms-conditions. For further
** information use the contact form at http://www.qt.io/contact-us.
**
** GNU Lesser General Public License Usage
** Alternatively, this file may be used under the terms of the GNU Lesser
** General Public License version 2.1 or version 3 as published by the Free
** Software Foundation and appearing in the file LICENSE.LGPLv21 and
** LICENSE.LGPLv3 included in the packaging of this file. Please review the
** following information to ensure the GNU Lesser General Public License
** requirements will be met: https://www.gnu.org/licenses/lgpl.html and
** http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
**
** As a special exception, The Qt Company gives you certain additional
** rights. These rights are described in The Qt Company LGPL Exception
** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
**
** GNU General Public License Usage
** Alternatively, this file may be used under the terms of the GNU
** General Public License version 3.0 as published by the Free Software
** Foundation and appearing in the file LICENSE.GPL included in the
** packaging of this file. Please review the following information to
** ensure the GNU General Public License version 3.0 requirements will be
** met: http://www.gnu.org/copyleft/gpl.html.
**
** $QT_END_LICENSE$
**
****************************************************************************/
#ifndef QSIMD_P_H
#define QSIMD_P_H
#include <qglobal.h>
QT_BEGIN_HEADER
#if defined(QT_NO_MAC_XARCH) || (defined(Q_OS_DARWIN) && (defined(__ppc__) || defined(__ppc64__)))
// Disable MMX and SSE on Mac/PPC builds, or if the compiler
// does not support -Xarch argument passing
#undef QT_HAVE_SSE
#undef QT_HAVE_SSE2
#undef QT_HAVE_SSE3
#undef QT_HAVE_SSSE3
#undef QT_HAVE_SSE4_1
#undef QT_HAVE_SSE4_2
#undef QT_HAVE_AVX
#undef QT_HAVE_3DNOW
#undef QT_HAVE_MMX
#endif
// SSE intrinsics
#if defined(QT_HAVE_SSE2) && (defined(__SSE2__) || defined(Q_CC_MSVC))
#if defined(QT_LINUXBASE)
/// this is an evil hack - the posix_memalign declaration in LSB
/// is wrong - see http://bugs.linuxbase.org/show_bug.cgi?id=2431
# define posix_memalign _lsb_hack_posix_memalign
# include <emmintrin.h>
# undef posix_memalign
#else
# ifdef Q_CC_MINGW
# include <windows.h>
# endif
# include <emmintrin.h>
#endif
// SSE3 intrinsics
#if defined(QT_HAVE_SSE3) && (defined(__SSE3__) || defined(Q_CC_MSVC))
#include <pmmintrin.h>
#endif
// SSSE3 intrinsics
#if defined(QT_HAVE_SSSE3) && (defined(__SSSE3__) || defined(Q_CC_MSVC))
#include <tmmintrin.h>
#endif
// SSE4.1 intrinsics
#if defined(QT_HAVE_SSE4_1) && (defined(__SSE4_1__) || defined(Q_CC_MSVC))
#include <smmintrin.h>
#endif
// SSE4.2 intrinsics
#if defined(QT_HAVE_SSE4_2) && (defined(__SSE4_2__) || defined(Q_CC_MSVC))
#include <nmmintrin.h>
// Add missing intrisics in some compilers (e.g. llvm-gcc)
#ifndef _SIDD_UBYTE_OPS
#define _SIDD_UBYTE_OPS 0x00
#endif
#ifndef _SIDD_UWORD_OPS
#define _SIDD_UWORD_OPS 0x01
#endif
#ifndef _SIDD_SBYTE_OPS
#define _SIDD_SBYTE_OPS 0x02
#endif
#ifndef _SIDD_SWORD_OPS
#define _SIDD_SWORD_OPS 0x03
#endif
#ifndef _SIDD_CMP_EQUAL_ANY
#define _SIDD_CMP_EQUAL_ANY 0x00
#endif
#ifndef _SIDD_CMP_RANGES
#define _SIDD_CMP_RANGES 0x04
#endif
#ifndef _SIDD_CMP_EQUAL_EACH
#define _SIDD_CMP_EQUAL_EACH 0x08
#endif
#ifndef _SIDD_CMP_EQUAL_ORDERED
#define _SIDD_CMP_EQUAL_ORDERED 0x0c
#endif
#ifndef _SIDD_POSITIVE_POLARITY
#define _SIDD_POSITIVE_POLARITY 0x00
#endif
#ifndef _SIDD_NEGATIVE_POLARITY
#define _SIDD_NEGATIVE_POLARITY 0x10
#endif
#ifndef _SIDD_MASKED_POSITIVE_POLARITY
#define _SIDD_MASKED_POSITIVE_POLARITY 0x20
#endif
#ifndef _SIDD_MASKED_NEGATIVE_POLARITY
#define _SIDD_MASKED_NEGATIVE_POLARITY 0x30
#endif
#ifndef _SIDD_LEAST_SIGNIFICANT
#define _SIDD_LEAST_SIGNIFICANT 0x00
#endif
#ifndef _SIDD_MOST_SIGNIFICANT
#define _SIDD_MOST_SIGNIFICANT 0x40
#endif
#ifndef _SIDD_BIT_MASK
#define _SIDD_BIT_MASK 0x00
#endif
#ifndef _SIDD_UNIT_MASK
#define _SIDD_UNIT_MASK 0x40
#endif
#endif
// AVX intrinsics
#if defined(QT_HAVE_AVX) && (defined(__AVX__) || defined(Q_CC_MSVC))
#include <immintrin.h>
#endif
#if !defined(QT_BOOTSTRAPPED) && (!defined(Q_CC_MSVC) || (defined(_M_X64) || _M_IX86_FP == 2))
#define QT_ALWAYS_HAVE_SSE2
#endif
#endif // defined(QT_HAVE_SSE2) && (defined(__SSE2__) || defined(Q_CC_MSVC))
// NEON intrinsics
#if defined __ARM_NEON__
#define QT_ALWAYS_HAVE_NEON
#include <arm_neon.h>
#endif
// IWMMXT intrinsics
#if defined(QT_HAVE_IWMMXT)
#include <mmintrin.h>
#if defined(Q_OS_WINCE)
# include "qplatformdefs.h"
#endif
#endif
#if defined(QT_HAVE_IWMMXT)
#if !defined(__IWMMXT__) && !defined(Q_OS_WINCE)
# include <xmmintrin.h>
#elif defined(Q_OS_WINCE_STD) && defined(_X86_)
# pragma warning(disable: 4391)
# include <xmmintrin.h>
#endif
#endif
// 3D now intrinsics
#if defined(QT_HAVE_3DNOW) && (defined(__3dNOW__) || defined(Q_CC_MSVC))
#include <mm3dnow.h>
#endif
QT_BEGIN_NAMESPACE
enum CPUFeatures {
None = 0,
MMX = 0x1,
MMXEXT = 0x2,
MMX3DNOW = 0x4,
MMX3DNOWEXT = 0x8,
SSE = 0x10,
SSE2 = 0x20,
CMOV = 0x40,
IWMMXT = 0x80,
NEON = 0x100,
SSE3 = 0x200,
SSSE3 = 0x400,
SSE4_1 = 0x800,
SSE4_2 = 0x1000,
AVX = 0x2000
};
Q_CORE_EXPORT uint qDetectCPUFeatures();
#define ALIGNMENT_PROLOGUE_16BYTES(ptr, i, length) \
for (; i < static_cast<int>(qMin(static_cast<quintptr>(length), ((4 - ((reinterpret_cast<quintptr>(ptr) >> 2) & 0x3)) & 0x3))); ++i)
QT_END_NAMESPACE
QT_END_HEADER
#endif // QSIMD_P_H

View file

@ -46,7 +46,6 @@
#include <qtextcodec.h>
#endif
#include <qutfcodec_p.h>
#include "qsimd_p.h"
#include <qdatastream.h>
#include <qlist.h>
#include "qlocale.h"
@ -3535,61 +3534,6 @@ bool QString::endsWith(const QChar &c, Qt::CaseSensitivity cs) const
Use toLocal8Bit() instead.
*/
#if defined(QT_ALWAYS_HAVE_SSE2)
static inline __m128i mergeQuestionMarks(__m128i chunk)
{
const __m128i questionMark = _mm_set1_epi16('?');
# ifdef __SSE4_2__
// compare the unsigned shorts for the range 0x0100-0xFFFF
// note on the use of _mm_cmpestrm:
// The MSDN documentation online (http://technet.microsoft.com/en-us/library/bb514080.aspx)
// says for range search the following:
// For each character c in a, determine whether b0 <= c <= b1 or b2 <= c <= b3
//
// However, all examples on the Internet, including from Intel
// (see http://software.intel.com/en-us/articles/xml-parsing-accelerator-with-intel-streaming-simd-extensions-4-intel-sse4/)
// put the range to be searched first
//
// Disassembly and instruction-level debugging with GCC and ICC show
// that they are doing the right thing. Inverting the arguments in the
// instruction does cause a bunch of test failures.
const int mode = _SIDD_UWORD_OPS | _SIDD_CMP_RANGES | _SIDD_UNIT_MASK;
const __m128i rangeMatch = _mm_cvtsi32_si128(0xffff0100);
const __m128i offLimitMask = _mm_cmpestrm(rangeMatch, 2, chunk, 8, mode);
// replace the non-Latin 1 characters in the chunk with question marks
chunk = _mm_blendv_epi8(chunk, questionMark, offLimitMask);
# else
// SSE has no compare instruction for unsigned comparison.
// The variables must be shiffted + 0x8000 to be compared
const __m128i signedBitOffset = _mm_set1_epi16(short(0x8000));
const __m128i thresholdMask = _mm_set1_epi16(short(0xff + 0x8000));
const __m128i signedChunk = _mm_add_epi16(chunk, signedBitOffset);
const __m128i offLimitMask = _mm_cmpgt_epi16(signedChunk, thresholdMask);
# ifdef __SSE4_1__
// replace the non-Latin 1 characters in the chunk with question marks
chunk = _mm_blendv_epi8(chunk, questionMark, offLimitMask);
# else
// offLimitQuestionMark contains '?' for each 16 bits that was off-limit
// the 16 bits that were correct contains zeros
const __m128i offLimitQuestionMark = _mm_and_si128(offLimitMask, questionMark);
// correctBytes contains the bytes that were in limit
// the 16 bits that were off limits contains zeros
const __m128i correctBytes = _mm_andnot_si128(offLimitMask, chunk);
// merge offLimitQuestionMark and correctBytes to have the result
chunk = _mm_or_si128(correctBytes, offLimitQuestionMark);
# endif
# endif
return chunk;
}
#endif
static QByteArray toLatin1_helper(const QChar *data, int length)
{
QByteArray ba;
@ -3597,51 +3541,6 @@ static QByteArray toLatin1_helper(const QChar *data, int length)
ba.resize(length);
const ushort *src = reinterpret_cast<const ushort *>(data);
uchar *dst = (uchar*) ba.data();
#if defined(QT_ALWAYS_HAVE_SSE2)
if (length >= 16) {
const int chunkCount = length >> 4; // divided by 16
for (int i = 0; i < chunkCount; ++i) {
__m128i chunk1 = _mm_loadu_si128((__m128i*)src); // load
chunk1 = mergeQuestionMarks(chunk1);
src += 8;
__m128i chunk2 = _mm_loadu_si128((__m128i*)src); // load
chunk2 = mergeQuestionMarks(chunk2);
src += 8;
// pack the two vector to 16 x 8bits elements
const __m128i result = _mm_packus_epi16(chunk1, chunk2);
_mm_storeu_si128((__m128i*)dst, result); // store
dst += 16;
}
length = length % 16;
}
#elif defined(QT_ALWAYS_HAVE_NEON)
// Refer to the documentation of the SSE2 implementation
// this use eactly the same method as for SSE except:
// 1) neon has unsigned comparison
// 2) packing is done to 64 bits (8 x 8bits component).
if (length >= 16) {
const int chunkCount = length >> 3; // divided by 8
const uint16x8_t questionMark = vdupq_n_u16('?'); // set
const uint16x8_t thresholdMask = vdupq_n_u16(0xff); // set
for (int i = 0; i < chunkCount; ++i) {
uint16x8_t chunk = vld1q_u16((uint16_t *)src); // load
src += 8;
const uint16x8_t offLimitMask = vcgtq_u16(chunk, thresholdMask); // chunk > thresholdMask
const uint16x8_t offLimitQuestionMark = vandq_u16(offLimitMask, questionMark); // offLimitMask & questionMark
const uint16x8_t correctBytes = vbicq_u16(chunk, offLimitMask); // !offLimitMask & chunk
chunk = vorrq_u16(correctBytes, offLimitQuestionMark); // correctBytes | offLimitQuestionMark
const uint8x8_t result = vmovn_u16(chunk); // narrowing move->packing
vst1_u8(dst, result); // store
dst += 8;
}
length = length % 8;
}
#endif
while (length--) {
*dst++ = (*src>0xff) ? '?' : (uchar) *src;
++src;
@ -3783,31 +3682,6 @@ QString::Data *QString::fromLatin1_helper(const char *str, int size)
d->data = d->array;
d->array[size] = '\0';
ushort *dst = d->data;
/* SIMD:
* Unpacking with SSE has been shown to improve performance on recent CPUs
* The same method gives no improvement with NEON.
*/
#if defined(QT_ALWAYS_HAVE_SSE2)
if (size >= 16) {
int chunkCount = size >> 4; // divided by 16
const __m128i nullMask = _mm_set1_epi32(0);
for (int i = 0; i < chunkCount; ++i) {
const __m128i chunk = _mm_loadu_si128((__m128i*)str); // load
str += 16;
// unpack the first 8 bytes, padding with zeros
const __m128i firstHalf = _mm_unpacklo_epi8(chunk, nullMask);
_mm_storeu_si128((__m128i*)dst, firstHalf); // store
dst += 8;
// unpack the last 8 bytes, padding with zeros
const __m128i secondHalf = _mm_unpackhi_epi8 (chunk, nullMask);
_mm_storeu_si128((__m128i*)dst, secondHalf); // store
dst += 8;
}
size = size % 16;
}
#endif
while (size--)
*dst++ = (uchar)*str++;
}

View file

@ -37,7 +37,6 @@ set(CORE_HEADERS
${CMAKE_CURRENT_SOURCE_DIR}/tools/qsharedpointer.h
${CMAKE_CURRENT_SOURCE_DIR}/tools/qsharedpointer_impl.h
${CMAKE_CURRENT_SOURCE_DIR}/tools/qset.h
${CMAKE_CURRENT_SOURCE_DIR}/tools/qsimd_p.h
${CMAKE_CURRENT_SOURCE_DIR}/tools/qsize.h
${CMAKE_CURRENT_SOURCE_DIR}/tools/qstack.h
${CMAKE_CURRENT_SOURCE_DIR}/tools/qstring.h
@ -80,7 +79,6 @@ set(CORE_SOURCES
${CMAKE_CURRENT_SOURCE_DIR}/tools/qregexp.cpp
${CMAKE_CURRENT_SOURCE_DIR}/tools/qshareddata.cpp
${CMAKE_CURRENT_SOURCE_DIR}/tools/qsharedpointer.cpp
${CMAKE_CURRENT_SOURCE_DIR}/tools/qsimd.cpp
${CMAKE_CURRENT_SOURCE_DIR}/tools/qsize.cpp
${CMAKE_CURRENT_SOURCE_DIR}/tools/qstring.cpp
${CMAKE_CURRENT_SOURCE_DIR}/tools/qstringbuilder.cpp

View file

@ -61,13 +61,6 @@ set(GUI_SOURCES
${CMAKE_CURRENT_SOURCE_DIR}/image/qgifhandler.cpp
)
katie_setup_sources(
GUI_SOURCES
${CMAKE_CURRENT_SOURCE_DIR}/image/qimage_neon.cpp
${CMAKE_CURRENT_SOURCE_DIR}/image/qimage_sse2.cpp
${CMAKE_CURRENT_SOURCE_DIR}/image/qimage_ssse3.cpp
)
if(WITH_PNG)
set(GUI_HEADERS
${GUI_HEADERS}

View file

@ -57,7 +57,6 @@
#include <qdrawhelper_p.h>
#include <qmemrotate_p.h>
#include <qpixmapdata_p.h>
#include <qsimd_p.h>
#include <qhash.h>
@ -3374,35 +3373,6 @@ static InPlace_Image_Converter inplace_converter_map[QImage::NImageFormats][QIma
} // Format_ARGB4444_Premultiplied
};
void qInitImageConversions()
{
const uint features = qDetectCPUFeatures();
Q_UNUSED(features);
#ifdef QT_HAVE_SSE2
if (features & SSE2) {
extern bool convert_ARGB_to_ARGB_PM_inplace_sse2(QImageData *data, Qt::ImageConversionFlags);
inplace_converter_map[QImage::Format_ARGB32][QImage::Format_ARGB32_Premultiplied] = convert_ARGB_to_ARGB_PM_inplace_sse2;
}
#endif
#ifdef QT_HAVE_SSSE3
if (features & SSSE3) {
extern void convert_RGB888_to_RGB32_ssse3(QImageData *dest, const QImageData *src, Qt::ImageConversionFlags);
converter_map[QImage::Format_RGB888][QImage::Format_RGB32] = convert_RGB888_to_RGB32_ssse3;
converter_map[QImage::Format_RGB888][QImage::Format_ARGB32] = convert_RGB888_to_RGB32_ssse3;
converter_map[QImage::Format_RGB888][QImage::Format_ARGB32_Premultiplied] = convert_RGB888_to_RGB32_ssse3;
}
#endif
#ifdef QT_HAVE_NEON
if (features & NEON) {
extern void convert_RGB888_to_RGB32_neon(QImageData *dest, const QImageData *src, Qt::ImageConversionFlags);
converter_map[QImage::Format_RGB888][QImage::Format_RGB32] = convert_RGB888_to_RGB32_neon;
converter_map[QImage::Format_RGB888][QImage::Format_ARGB32] = convert_RGB888_to_RGB32_neon;
converter_map[QImage::Format_RGB888][QImage::Format_ARGB32_Premultiplied] = convert_RGB888_to_RGB32_neon;
}
#endif
}
void qGamma_correct_back_to_linear_cs(QImage *image)
{
extern uchar qt_pow_rgb_gamma[256];

View file

@ -1,116 +0,0 @@
/****************************************************************************
**
** Copyright (C) 2015 The Qt Company Ltd.
** Contact: http://www.qt.io/licensing/
**
** This file is part of the QtGui module of the Qt Toolkit.
**
** $QT_BEGIN_LICENSE:LGPL$
** Commercial License Usage
** Licensees holding valid commercial Qt licenses may use this file in
** accordance with the commercial license agreement provided with the
** Software or, alternatively, in accordance with the terms contained in
** a written agreement between you and The Qt Company. For licensing terms
** and conditions see http://www.qt.io/terms-conditions. For further
** information use the contact form at http://www.qt.io/contact-us.
**
** GNU Lesser General Public License Usage
** Alternatively, this file may be used under the terms of the GNU Lesser
** General Public License version 2.1 or version 3 as published by the Free
** Software Foundation and appearing in the file LICENSE.LGPLv21 and
** LICENSE.LGPLv3 included in the packaging of this file. Please review the
** following information to ensure the GNU Lesser General Public License
** requirements will be met: https://www.gnu.org/licenses/lgpl.html and
** http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
**
** As a special exception, The Qt Company gives you certain additional
** rights. These rights are described in The Qt Company LGPL Exception
** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
**
** GNU General Public License Usage
** Alternatively, this file may be used under the terms of the GNU
** General Public License version 3.0 as published by the Free Software
** Foundation and appearing in the file LICENSE.GPL included in the
** packaging of this file. Please review the following information to
** ensure the GNU General Public License version 3.0 requirements will be
** met: http://www.gnu.org/copyleft/gpl.html.
**
** $QT_END_LICENSE$
**
****************************************************************************/
#include <qimage.h>
#include <qimage_p.h>
#include <qsimd_p.h>
#ifdef QT_HAVE_NEON
QT_BEGIN_NAMESPACE
Q_GUI_EXPORT void QT_FASTCALL qt_convert_rgb888_to_rgb32_neon(quint32 *dst, const uchar *src, int len)
{
if (!len)
return;
const quint32 *const end = dst + len;
// align dst on 64 bits
const int offsetToAlignOn8Bytes = (reinterpret_cast<quintptr>(dst) >> 2) & 0x1;
for (int i = 0; i < offsetToAlignOn8Bytes; ++i) {
*dst++ = qRgb(src[0], src[1], src[2]);
src += 3;
}
if ((len - offsetToAlignOn8Bytes) >= 8) {
const quint32 *const simdEnd = end - 7;
register uint8x8_t fullVector asm ("d3") = vdup_n_u8(0xff);
do {
#if Q_BYTE_ORDER == Q_BIG_ENDIAN
asm volatile (
"vld3.8 { d4, d5, d6 }, [%[SRC]] !\n\t"
"vst4.8 { d3, d4, d5, d6 }, [%[DST],:64] !\n\t"
: [DST]"+r" (dst), [SRC]"+r" (src)
: "w"(fullVector)
: "memory", "d4", "d5", "d6"
);
#else
asm volatile (
"vld3.8 { d0, d1, d2 }, [%[SRC]] !\n\t"
"vswp d0, d2\n\t"
"vst4.8 { d0, d1, d2, d3 }, [%[DST],:64] !\n\t"
: [DST]"+r" (dst), [SRC]"+r" (src)
: "w"(fullVector)
: "memory", "d0", "d1", "d2"
);
#endif
} while (dst < simdEnd);
}
while (dst != end) {
*dst++ = qRgb(src[0], src[1], src[2]);
src += 3;
}
}
void convert_RGB888_to_RGB32_neon(QImageData *dest, const QImageData *src, Qt::ImageConversionFlags)
{
Q_ASSERT(src->format == QImage::Format_RGB888);
Q_ASSERT(dest->format == QImage::Format_RGB32 || dest->format == QImage::Format_ARGB32 || dest->format == QImage::Format_ARGB32_Premultiplied);
Q_ASSERT(src->width == dest->width);
Q_ASSERT(src->height == dest->height);
const uchar *src_data = (uchar *) src->data;
quint32 *dest_data = (quint32 *) dest->data;
for (int i = 0; i < src->height; ++i) {
qt_convert_rgb888_to_rgb32_neon(dest_data, src_data, src->width);
src_data += src->bytes_per_line;
dest_data = (quint32 *)((uchar*)dest_data + dest->bytes_per_line);
}
}
QT_END_NAMESPACE
#endif // QT_HAVE_NEON

View file

@ -107,7 +107,6 @@ struct Q_GUI_EXPORT QImageData { // internal image data
QPaintEngine *paintEngine;
};
void qInitImageConversions();
Q_GUI_EXPORT void qGamma_correct_back_to_linear_cs(QImage *image);
inline int qt_depthForFormat(QImage::Format format)

View file

@ -1,111 +0,0 @@
/****************************************************************************
**
** Copyright (C) 2015 The Qt Company Ltd.
** Contact: http://www.qt.io/licensing/
**
** This file is part of the QtGui module of the Qt Toolkit.
**
** $QT_BEGIN_LICENSE:LGPL$
** Commercial License Usage
** Licensees holding valid commercial Qt licenses may use this file in
** accordance with the commercial license agreement provided with the
** Software or, alternatively, in accordance with the terms contained in
** a written agreement between you and The Qt Company. For licensing terms
** and conditions see http://www.qt.io/terms-conditions. For further
** information use the contact form at http://www.qt.io/contact-us.
**
** GNU Lesser General Public License Usage
** Alternatively, this file may be used under the terms of the GNU Lesser
** General Public License version 2.1 or version 3 as published by the Free
** Software Foundation and appearing in the file LICENSE.LGPLv21 and
** LICENSE.LGPLv3 included in the packaging of this file. Please review the
** following information to ensure the GNU Lesser General Public License
** requirements will be met: https://www.gnu.org/licenses/lgpl.html and
** http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
**
** As a special exception, The Qt Company gives you certain additional
** rights. These rights are described in The Qt Company LGPL Exception
** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
**
** GNU General Public License Usage
** Alternatively, this file may be used under the terms of the GNU
** General Public License version 3.0 as published by the Free Software
** Foundation and appearing in the file LICENSE.GPL included in the
** packaging of this file. Please review the following information to
** ensure the GNU General Public License version 3.0 requirements will be
** met: http://www.gnu.org/copyleft/gpl.html.
**
** $QT_END_LICENSE$
**
****************************************************************************/
#include "qimage.h"
#include <qimage_p.h>
#include <qsimd_p.h>
#include <qdrawhelper_p.h>
#include <qdrawingprimitive_sse2_p.h>
#ifdef QT_HAVE_SSE2
QT_BEGIN_NAMESPACE
bool convert_ARGB_to_ARGB_PM_inplace_sse2(QImageData *data, Qt::ImageConversionFlags)
{
Q_ASSERT(data->format == QImage::Format_ARGB32);
// extra pixels on each line
const int spare = data->width & 3;
// width in pixels of the pad at the end of each line
const int pad = (data->bytes_per_line >> 2) - data->width;
const int iter = data->width >> 2;
int height = data->height;
const __m128i alphaMask = _mm_set1_epi32(0xff000000);
const __m128i nullVector = _mm_setzero_si128();
const __m128i half = _mm_set1_epi16(0x80);
const __m128i colorMask = _mm_set1_epi32(0x00ff00ff);
__m128i *d = reinterpret_cast<__m128i*>(data->data);
while (height--) {
const __m128i *end = d + iter;
for (; d != end; ++d) {
const __m128i srcVector = _mm_loadu_si128(d);
const __m128i srcVectorAlpha = _mm_and_si128(srcVector, alphaMask);
if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, alphaMask)) == 0xffff) {
// opaque, data is unchanged
} else if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, nullVector)) == 0xffff) {
// fully transparent
_mm_storeu_si128(d, nullVector);
} else {
__m128i alphaChannel = _mm_srli_epi32(srcVector, 24);
alphaChannel = _mm_or_si128(alphaChannel, _mm_slli_epi32(alphaChannel, 16));
__m128i result;
BYTE_MUL_SSE2(result, srcVector, alphaChannel, colorMask, half);
result = _mm_or_si128(_mm_andnot_si128(alphaMask, result), srcVectorAlpha);
_mm_storeu_si128(d, result);
}
}
QRgb *p = reinterpret_cast<QRgb*>(d);
QRgb *pe = p+spare;
for (; p != pe; ++p) {
if (*p < 0x00ffffff)
*p = 0;
else if (*p < 0xff000000)
*p = PREMUL(*p);
}
d = reinterpret_cast<__m128i*>(p+pad);
}
data->format = QImage::Format_ARGB32_Premultiplied;
return true;
}
QT_END_NAMESPACE
#endif // QT_HAVE_SSE2

View file

@ -1,151 +0,0 @@
/****************************************************************************
**
** Copyright (C) 2015 The Qt Company Ltd.
** Contact: http://www.qt.io/licensing/
**
** This file is part of the QtGui module of the Qt Toolkit.
**
** $QT_BEGIN_LICENSE:LGPL$
** Commercial License Usage
** Licensees holding valid commercial Qt licenses may use this file in
** accordance with the commercial license agreement provided with the
** Software or, alternatively, in accordance with the terms contained in
** a written agreement between you and The Qt Company. For licensing terms
** and conditions see http://www.qt.io/terms-conditions. For further
** information use the contact form at http://www.qt.io/contact-us.
**
** GNU Lesser General Public License Usage
** Alternatively, this file may be used under the terms of the GNU Lesser
** General Public License version 2.1 or version 3 as published by the Free
** Software Foundation and appearing in the file LICENSE.LGPLv21 and
** LICENSE.LGPLv3 included in the packaging of this file. Please review the
** following information to ensure the GNU Lesser General Public License
** requirements will be met: https://www.gnu.org/licenses/lgpl.html and
** http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
**
** As a special exception, The Qt Company gives you certain additional
** rights. These rights are described in The Qt Company LGPL Exception
** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
**
** GNU General Public License Usage
** Alternatively, this file may be used under the terms of the GNU
** General Public License version 3.0 as published by the Free Software
** Foundation and appearing in the file LICENSE.GPL included in the
** packaging of this file. Please review the following information to
** ensure the GNU General Public License version 3.0 requirements will be
** met: http://www.gnu.org/copyleft/gpl.html.
**
** $QT_END_LICENSE$
**
****************************************************************************/
#include <qimage.h>
#include <qimage_p.h>
#include <qsimd_p.h>
#ifdef QT_HAVE_SSSE3
QT_BEGIN_NAMESPACE
// Convert a scanline of RGB888 (src) to RGB32 (dst)
// src must be at least len * 3 bytes
// dst must be at least len * 4 bytes
Q_GUI_EXPORT void QT_FASTCALL qt_convert_rgb888_to_rgb32_ssse3(quint32 *dst, const uchar *src, int len)
{
quint32 *const end = dst + len;
// Prologue, align dst to 16 bytes. The alignment is done on dst because it has 4 store()
// for each 3 load() of src.
const int offsetToAlignOn16Bytes = (4 - ((reinterpret_cast<quintptr>(dst) >> 2) & 0x3)) & 0x3;
const int prologLength = qMin(len, offsetToAlignOn16Bytes);
for (int i = 0; i < prologLength; ++i) {
*dst++ = qRgb(src[0], src[1], src[2]);
src += 3;
}
// Mask the 4 first colors of the RGB888 vector
const __m128i shuffleMask = _mm_set_epi8(char(0xff), 9, 10, 11, char(0xff), 6, 7, 8, char(0xff), 3, 4, 5, char(0xff), 0, 1, 2);
// Mask the 4 last colors of a RGB888 vector with an offset of 1 (so the last 3 bytes are RGB)
const __m128i shuffleMaskEnd = _mm_set_epi8(char(0xff), 13, 14, 15, char(0xff), 10, 11, 12, char(0xff), 7, 8, 9, char(0xff), 4, 5, 6);
// Mask to have alpha = 0xff
const __m128i alphaMask = _mm_set1_epi32(0xff000000);
__m128i *inVectorPtr = (__m128i *)src;
__m128i *dstVectorPtr = (__m128i *)dst;
const int simdRoundCount = (len - prologLength) / 16; // one iteration in the loop converts 16 pixels
for (int i = 0; i < simdRoundCount; ++i) {
/*
RGB888 has 5 pixels per vector, + 1 byte from the next pixel. The idea here is
to load vectors of RGB888 and use palignr to select a vector out of two vectors.
After 3 loads of RGB888 and 3 stores of RGB32, we have 4 pixels left in the last
vector of RGB888, we can mask it directly to get a last store or RGB32. After that,
the first next byte is a R, and we can loop for the next 16 pixels.
The conversion itself is done with a byte permutation (pshufb).
*/
__m128i firstSrcVector = _mm_lddqu_si128(inVectorPtr);
__m128i outputVector = _mm_shuffle_epi8(firstSrcVector, shuffleMask);
_mm_store_si128(dstVectorPtr, _mm_or_si128(outputVector, alphaMask));
++inVectorPtr;
++dstVectorPtr;
// There are 4 unused bytes left in srcVector, we need to load the next 16 bytes
// and load the next input with palignr
__m128i secondSrcVector = _mm_lddqu_si128(inVectorPtr);
__m128i srcVector = _mm_alignr_epi8(secondSrcVector, firstSrcVector, 12);
outputVector = _mm_shuffle_epi8(srcVector, shuffleMask);
_mm_store_si128(dstVectorPtr, _mm_or_si128(outputVector, alphaMask));
++inVectorPtr;
++dstVectorPtr;
firstSrcVector = secondSrcVector;
// We now have 8 unused bytes left in firstSrcVector
secondSrcVector = _mm_lddqu_si128(inVectorPtr);
srcVector = _mm_alignr_epi8(secondSrcVector, firstSrcVector, 8);
outputVector = _mm_shuffle_epi8(srcVector, shuffleMask);
_mm_store_si128(dstVectorPtr, _mm_or_si128(outputVector, alphaMask));
++inVectorPtr;
++dstVectorPtr;
// There are now 12 unused bytes in firstSrcVector.
// We can mask them directly, almost there.
outputVector = _mm_shuffle_epi8(secondSrcVector, shuffleMaskEnd);
_mm_store_si128(dstVectorPtr, _mm_or_si128(outputVector, alphaMask));
++dstVectorPtr;
}
src = (uchar *)inVectorPtr;
dst = (quint32 *)dstVectorPtr;
while (dst != end) {
*dst++ = qRgb(src[0], src[1], src[2]);
src += 3;
}
}
void convert_RGB888_to_RGB32_ssse3(QImageData *dest, const QImageData *src, Qt::ImageConversionFlags)
{
Q_ASSERT(src->format == QImage::Format_RGB888);
Q_ASSERT(dest->format == QImage::Format_RGB32 || dest->format == QImage::Format_ARGB32 || dest->format == QImage::Format_ARGB32_Premultiplied);
Q_ASSERT(src->width == dest->width);
Q_ASSERT(src->height == dest->height);
const uchar *src_data = (uchar *) src->data;
quint32 *dest_data = (quint32 *) dest->data;
for (int i = 0; i < src->height; ++i) {
qt_convert_rgb888_to_rgb32_ssse3(dest_data, src_data, src->width);
src_data += src->bytes_per_line;
dest_data = (quint32 *)((uchar*)dest_data + dest->bytes_per_line);
}
}
QT_END_NAMESPACE
#endif // QT_HAVE_SSSE3

View file

@ -45,7 +45,6 @@
#include <qvariant.h>
#include <qvector.h>
#include <qbuffer.h>
#include <qsimd_p.h>
#include <stdio.h> // jpeglib needs this to be pre-included
#include <setjmp.h>
@ -779,22 +778,6 @@ bool QJpegHandlerPrivate::read(QImage *image)
QJpegHandler::QJpegHandler()
: d(new QJpegHandlerPrivate(this))
{
const uint features = qDetectCPUFeatures();
Q_UNUSED(features);
#if defined(QT_HAVE_NEON)
// from qimage_neon.cpp
Q_GUI_EXPORT void QT_FASTCALL qt_convert_rgb888_to_rgb32_neon(quint32 *dst, const uchar *src, int len);
if (features & NEON)
rgb888ToRgb32ConverterPtr = qt_convert_rgb888_to_rgb32_neon;
#endif // QT_HAVE_NEON
#if defined(QT_HAVE_SSSE3)
// from qimage_ssse3.cpp
Q_GUI_EXPORT void QT_FASTCALL qt_convert_rgb888_to_rgb32_ssse3(quint32 *dst, const uchar *src, int len);
if (features & SSSE3)
rgb888ToRgb32ConverterPtr = qt_convert_rgb888_to_rgb32_ssse3;
#endif // QT_HAVE_SSSE3
}
QJpegHandler::~QJpegHandler()

View file

@ -40,22 +40,18 @@
****************************************************************************/
#include "qpixmap.h"
#include <qfont_p.h>
#include "qfont_p.h"
#include "qpixmap_raster_p.h"
#include "qnativeimage_p.h"
#include "qimage_p.h"
#include "qpaintengine.h"
#include "qbitmap.h"
#include "qimage.h"
#include <QBuffer>
#include <QImageReader>
#include <qimage_p.h>
#include <qsimd_p.h>
#include <qwidget_p.h>
#include <qdrawhelper_p.h>
#include "qbuffer.h"
#include "qimagereader.h"
#include "qimage_p.h"
#include "qwidget_p.h"
#include "qdrawhelper_p.h"
QT_BEGIN_NAMESPACE
@ -163,7 +159,6 @@ void QRasterPixmapData::fill(const QColor &color)
if (alpha != 255) {
if (!image.hasAlphaChannel()) {
QImage::Format toFormat;
#if !(defined(QT_HAVE_NEON) || defined(QT_ALWAYS_HAVE_SSE2))
if (image.format() == QImage::Format_RGB16)
toFormat = QImage::Format_ARGB8565_Premultiplied;
else if (image.format() == QImage::Format_RGB666)
@ -173,7 +168,6 @@ void QRasterPixmapData::fill(const QColor &color)
else if (image.format() == QImage::Format_RGB444)
toFormat = QImage::Format_ARGB4444_Premultiplied;
else
#endif
toFormat = QImage::Format_ARGB32_Premultiplied;
if (!image.isNull() && qt_depthForFormat(image.format()) == qt_depthForFormat(toFormat)) {
@ -364,7 +358,6 @@ void QRasterPixmapData::createPixmapForImage(QImage &sourceImage, Qt::ImageConve
QImage::Format opaqueFormat = QNativeImage::systemFormat();
QImage::Format alphaFormat = QImage::Format_ARGB32_Premultiplied;
#if !defined(QT_HAVE_NEON) && !defined(QT_ALWAYS_HAVE_SSE2)
switch (opaqueFormat) {
case QImage::Format_RGB16:
alphaFormat = QImage::Format_ARGB8565_Premultiplied;
@ -372,7 +365,6 @@ void QRasterPixmapData::createPixmapForImage(QImage &sourceImage, Qt::ImageConve
default: // We don't care about the others...
break;
}
#endif
if (!sourceImage.hasAlphaChannel()) {
format = opaqueFormat;

View file

@ -824,7 +824,6 @@ QApplication::QApplication(Display *dpy, int &argc, char **argv,
#endif // Q_WS_X11
extern void qInitDrawhelperAsm();
extern void qInitImageConversions();
extern int qRegisterGuiVariant();
extern int qUnregisterGuiVariant();
#ifndef QT_NO_STATEMACHINE
@ -881,8 +880,6 @@ void QApplicationPrivate::initialize()
// Set up which span functions should be used in raster engine...
qInitDrawhelperAsm();
// and QImage conversion functions
qInitImageConversions();
#ifndef QT_NO_WHEELEVENT
QApplicationPrivate::wheel_scroll_lines = 3;

View file

@ -41,7 +41,6 @@ set(GUI_HEADERS
${CMAKE_CURRENT_SOURCE_DIR}/painting/qtextureglyphcache_p.h
${CMAKE_CURRENT_SOURCE_DIR}/painting/qtransform.h
${CMAKE_CURRENT_SOURCE_DIR}/painting/qwindowsurface_p.h
${CMAKE_CURRENT_SOURCE_DIR}/painting/qwmatrix.h
${CMAKE_CURRENT_SOURCE_DIR}/painting/qpaintengine_raster_p.h
${CMAKE_CURRENT_SOURCE_DIR}/painting/qdrawhelper_p.h
${CMAKE_CURRENT_SOURCE_DIR}/painting/qblendfunctions_p.h
@ -56,14 +55,6 @@ set(GUI_HEADERS
${CMAKE_CURRENT_SOURCE_DIR}/painting/qgraphicssystemfactory_p.h
${CMAKE_CURRENT_SOURCE_DIR}/painting/qgraphicssystemplugin_p.h
${CMAKE_CURRENT_SOURCE_DIR}/painting/qwindowsurface_raster_p.h
${CMAKE_CURRENT_SOURCE_DIR}/painting/qdrawhelper_x86_p.h
${CMAKE_CURRENT_SOURCE_DIR}/painting/qdrawhelper_mmx_p.h
${CMAKE_CURRENT_SOURCE_DIR}/painting/qdrawhelper_sse_p.h
${CMAKE_CURRENT_SOURCE_DIR}/painting/qdrawingprimitive_sse2_p.h
${CMAKE_CURRENT_SOURCE_DIR}/painting/qdrawhelper_neon_p.h
# XXX: obsolete?
${CMAKE_CURRENT_SOURCE_DIR}/painting/qrgb.h
)
@ -113,20 +104,6 @@ set(GUI_SOURCES
${CMAKE_CURRENT_SOURCE_DIR}/painting/qwindowsurface_raster.cpp
)
katie_setup_sources(
GUI_SOURCES
${CMAKE_CURRENT_SOURCE_DIR}/painting/qdrawhelper_mmx.cpp
${CMAKE_CURRENT_SOURCE_DIR}/painting/qdrawhelper_mmx3dnow.cpp
${CMAKE_CURRENT_SOURCE_DIR}/painting/qdrawhelper_sse3dnow.cpp
${CMAKE_CURRENT_SOURCE_DIR}/painting/qdrawhelper_sse.cpp
${CMAKE_CURRENT_SOURCE_DIR}/painting/qdrawhelper_sse2.cpp
${CMAKE_CURRENT_SOURCE_DIR}/painting/qdrawhelper_ssse3.cpp
${CMAKE_CURRENT_SOURCE_DIR}/painting/qdrawhelper_iwmmxt.cpp
# TODO: link to pixman for arm-neon?
${CMAKE_CURRENT_SOURCE_DIR}/painting/qdrawhelper_neon.cpp
${CMAKE_CURRENT_SOURCE_DIR}/painting/qdrawhelper_neon_asm.S
)
if(WITH_X11 AND X11_FOUND)
set(GUI_HEADERS
${GUI_HEADERS}

View file

@ -42,11 +42,6 @@
#include <qdrawhelper_p.h>
#include <qpaintengine_raster_p.h>
#include <qpainter_p.h>
#include <qdrawhelper_x86_p.h>
#ifdef QT_HAVE_ARM_SIMD
#include <qdrawhelper_arm_simd_p.h>
#endif
#include <qdrawhelper_neon_p.h>
#include <qmath_p.h>
#include <qmath.h>
@ -630,76 +625,6 @@ static inline uint interpolate_4_pixels_16(uint tl, uint tr, uint bl, uint br, i
return (((tlrb + trrb + blrb + brrb) >> 8) & 0x00ff00ff) | ((tlag + trag + blag + brag) & 0xff00ff00);
}
#if defined(QT_ALWAYS_HAVE_SSE2)
#define interpolate_4_pixels_16_sse2(tl, tr, bl, br, distx, disty, colorMask, v_256, b) \
{ \
const __m128i dxdy = _mm_mullo_epi16 (distx, disty); \
const __m128i distx_ = _mm_slli_epi16(distx, 4); \
const __m128i disty_ = _mm_slli_epi16(disty, 4); \
const __m128i idxidy = _mm_add_epi16(dxdy, _mm_sub_epi16(v_256, _mm_add_epi16(distx_, disty_))); \
const __m128i dxidy = _mm_sub_epi16(distx_, dxdy); \
const __m128i idxdy = _mm_sub_epi16(disty_, dxdy); \
\
__m128i tlAG = _mm_srli_epi16(tl, 8); \
__m128i tlRB = _mm_and_si128(tl, colorMask); \
__m128i trAG = _mm_srli_epi16(tr, 8); \
__m128i trRB = _mm_and_si128(tr, colorMask); \
__m128i blAG = _mm_srli_epi16(bl, 8); \
__m128i blRB = _mm_and_si128(bl, colorMask); \
__m128i brAG = _mm_srli_epi16(br, 8); \
__m128i brRB = _mm_and_si128(br, colorMask); \
\
tlAG = _mm_mullo_epi16(tlAG, idxidy); \
tlRB = _mm_mullo_epi16(tlRB, idxidy); \
trAG = _mm_mullo_epi16(trAG, dxidy); \
trRB = _mm_mullo_epi16(trRB, dxidy); \
blAG = _mm_mullo_epi16(blAG, idxdy); \
blRB = _mm_mullo_epi16(blRB, idxdy); \
brAG = _mm_mullo_epi16(brAG, dxdy); \
brRB = _mm_mullo_epi16(brRB, dxdy); \
\
/* Add the values, and shift to only keep 8 significant bits per colors */ \
__m128i rAG =_mm_add_epi16(_mm_add_epi16(tlAG, trAG), _mm_add_epi16(blAG, brAG)); \
__m128i rRB =_mm_add_epi16(_mm_add_epi16(tlRB, trRB), _mm_add_epi16(blRB, brRB)); \
rAG = _mm_andnot_si128(colorMask, rAG); \
rRB = _mm_srli_epi16(rRB, 8); \
_mm_storeu_si128((__m128i*)(b), _mm_or_si128(rAG, rRB)); \
}
#endif
#if defined(QT_ALWAYS_HAVE_NEON)
#define interpolate_4_pixels_16_neon(tl, tr, bl, br, distx, disty, disty_, colorMask, invColorMask, v_256, b) \
{ \
const int16x8_t dxdy = vmulq_s16(distx, disty); \
const int16x8_t distx_ = vshlq_n_s16(distx, 4); \
const int16x8_t idxidy = vaddq_s16(dxdy, vsubq_s16(v_256, vaddq_s16(distx_, disty_))); \
const int16x8_t dxidy = vsubq_s16(distx_, dxdy); \
const int16x8_t idxdy = vsubq_s16(disty_, dxdy); \
\
int16x8_t tlAG = vreinterpretq_s16_u16(vshrq_n_u16(vreinterpretq_u16_s16(tl), 8)); \
int16x8_t tlRB = vandq_s16(tl, colorMask); \
int16x8_t trAG = vreinterpretq_s16_u16(vshrq_n_u16(vreinterpretq_u16_s16(tr), 8)); \
int16x8_t trRB = vandq_s16(tr, colorMask); \
int16x8_t blAG = vreinterpretq_s16_u16(vshrq_n_u16(vreinterpretq_u16_s16(bl), 8)); \
int16x8_t blRB = vandq_s16(bl, colorMask); \
int16x8_t brAG = vreinterpretq_s16_u16(vshrq_n_u16(vreinterpretq_u16_s16(br), 8)); \
int16x8_t brRB = vandq_s16(br, colorMask); \
\
int16x8_t rAG = vmulq_s16(tlAG, idxidy); \
int16x8_t rRB = vmulq_s16(tlRB, idxidy); \
rAG = vmlaq_s16(rAG, trAG, dxidy); \
rRB = vmlaq_s16(rRB, trRB, dxidy); \
rAG = vmlaq_s16(rAG, blAG, idxdy); \
rRB = vmlaq_s16(rRB, blRB, idxdy); \
rAG = vmlaq_s16(rAG, brAG, dxdy); \
rRB = vmlaq_s16(rRB, brRB, dxdy); \
\
rAG = vandq_s16(invColorMask, rAG); \
rRB = vreinterpretq_s16_u16(vshrq_n_u16(vreinterpretq_u16_s16(rRB), 8)); \
vst1q_s16((int16_t*)(b), vorrq_s16(rAG, rRB)); \
}
#endif
template<TextureBlendType blendType>
Q_STATIC_TEMPLATE_FUNCTION inline void fetchTransformedBilinear_pixelBounds(int max, int l1, int l2, int &v1, int &v2)
{
@ -806,70 +731,6 @@ const uint * QT_FASTCALL fetchTransformedBilinear(uint *buffer, const Operator *
}
}
if (blendType != BlendTransformedBilinearTiled &&
(format == QImage::Format_ARGB32_Premultiplied || format == QImage::Format_RGB32)) {
#if defined(QT_ALWAYS_HAVE_SSE2)
const __m128i disty_ = _mm_set1_epi16(disty);
const __m128i idisty_ = _mm_set1_epi16(idisty);
const __m128i colorMask = _mm_set1_epi32(0x00ff00ff);
lim -= 3;
for (; f < lim; x += 4, f += 4) {
// Load 4 pixels from s1, and split the alpha-green and red-blue component
__m128i top = _mm_loadu_si128((__m128i*)((const uint *)(s1)+x));
__m128i topAG = _mm_srli_epi16(top, 8);
__m128i topRB = _mm_and_si128(top, colorMask);
// Multiplies each colour component by idisty
topAG = _mm_mullo_epi16 (topAG, idisty_);
topRB = _mm_mullo_epi16 (topRB, idisty_);
// Same for the s2 vector
__m128i bottom = _mm_loadu_si128((__m128i*)((const uint *)(s2)+x));
__m128i bottomAG = _mm_srli_epi16(bottom, 8);
__m128i bottomRB = _mm_and_si128(bottom, colorMask);
bottomAG = _mm_mullo_epi16 (bottomAG, disty_);
bottomRB = _mm_mullo_epi16 (bottomRB, disty_);
// Add the values, and shift to only keep 8 significant bits per colors
__m128i rAG =_mm_add_epi16(topAG, bottomAG);
rAG = _mm_srli_epi16(rAG, 8);
_mm_storeu_si128((__m128i*)(&intermediate_buffer[1][f]), rAG);
__m128i rRB =_mm_add_epi16(topRB, bottomRB);
rRB = _mm_srli_epi16(rRB, 8);
_mm_storeu_si128((__m128i*)(&intermediate_buffer[0][f]), rRB);
}
#elif defined(QT_ALWAYS_HAVE_NEON)
const int16x8_t disty_ = vdupq_n_s16(disty);
const int16x8_t idisty_ = vdupq_n_s16(idisty);
const int16x8_t colorMask = vdupq_n_s16(0x00ff);
lim -= 3;
for (; f < lim; x += 4, f += 4) {
// Load 4 pixels from s1, and split the alpha-green and red-blue component
int16x8_t top = vld1q_s16((int16_t*)((const uint *)(s1)+x));
int16x8_t topAG = vreinterpretq_s16_u16(vshrq_n_u16(vreinterpretq_u16_s16(top), 8));
int16x8_t topRB = vandq_s16(top, colorMask);
// Multiplies each colour component by idisty
topAG = vmulq_s16(topAG, idisty_);
topRB = vmulq_s16(topRB, idisty_);
// Same for the s2 vector
int16x8_t bottom = vld1q_s16((int16_t*)((const uint *)(s2)+x));
int16x8_t bottomAG = vreinterpretq_s16_u16(vshrq_n_u16(vreinterpretq_u16_s16(bottom), 8));
int16x8_t bottomRB = vandq_s16(bottom, colorMask);
bottomAG = vmulq_s16(bottomAG, disty_);
bottomRB = vmulq_s16(bottomRB, disty_);
// Add the values, and shift to only keep 8 significant bits per colors
int16x8_t rAG = vaddq_s16(topAG, bottomAG);
rAG = vreinterpretq_s16_u16(vshrq_n_u16(vreinterpretq_u16_s16(rAG), 8));
vst1q_s16((int16_t*)(&intermediate_buffer[1][f]), rAG);
int16x8_t rRB = vaddq_s16(topRB, bottomRB);
rRB = vreinterpretq_s16_u16(vshrq_n_u16(vreinterpretq_u16_s16(rRB), 8));
vst1q_s16((int16_t*)(&intermediate_buffer[0][f]), rRB);
}
#endif
}
for (; f < count; f++) { // Same as above but without sse2
if (blendType == BlendTransformedBilinearTiled) {
if (x >= image_width) x -= image_width;
@ -936,123 +797,6 @@ const uint * QT_FASTCALL fetchTransformedBilinear(uint *buffer, const Operator *
const uchar *s2 = data->texture.scanLine(y2);
int disty = (fy & 0x0000ffff) >> 12;
if (blendType != BlendTransformedBilinearTiled &&
(format == QImage::Format_ARGB32_Premultiplied || format == QImage::Format_RGB32)) {
#define BILINEAR_DOWNSCALE_BOUNDS_PROLOG \
while (b < end) { \
int x1 = (fx >> 16); \
int x2; \
fetchTransformedBilinear_pixelBounds<blendType>(image_width, image_x1, image_x2, x1, x2); \
if (x1 != x2) \
break; \
uint tl = fetch(s1, x1, data->texture.colorTable); \
uint tr = fetch(s1, x2, data->texture.colorTable); \
uint bl = fetch(s2, x1, data->texture.colorTable); \
uint br = fetch(s2, x2, data->texture.colorTable); \
int distx = (fx & 0x0000ffff) >> 12; \
*b = interpolate_4_pixels_16(tl, tr, bl, br, distx, disty); \
fx += fdx; \
++b; \
} \
uint *boundedEnd; \
if (fdx > 0) \
boundedEnd = qMin(end, buffer + uint((image_x2 - (fx >> 16)) / data->m11)); \
else \
boundedEnd = qMin(end, buffer + uint((image_x1 - (fx >> 16)) / data->m11)); \
boundedEnd -= 3;
#if defined(QT_ALWAYS_HAVE_SSE2)
BILINEAR_DOWNSCALE_BOUNDS_PROLOG
const __m128i colorMask = _mm_set1_epi32(0x00ff00ff);
const __m128i v_256 = _mm_set1_epi16(256);
const __m128i v_disty = _mm_set1_epi16(disty);
__m128i v_fdx = _mm_set1_epi32(fdx*4);
ptrdiff_t secondLine = reinterpret_cast<const uint *>(s2) - reinterpret_cast<const uint *>(s1);
union Vect_buffer { __m128i vect; quint32 i[4]; };
Vect_buffer v_fx;
for (int i = 0; i < 4; i++) {
v_fx.i[i] = fx;
fx += fdx;
}
while (b < boundedEnd) {
Vect_buffer tl, tr, bl, br;
for (int i = 0; i < 4; i++) {
int x1 = v_fx.i[i] >> 16;
const uint *addr_tl = reinterpret_cast<const uint *>(s1) + x1;
const uint *addr_tr = addr_tl + 1;
tl.i[i] = *addr_tl;
tr.i[i] = *addr_tr;
bl.i[i] = *(addr_tl+secondLine);
br.i[i] = *(addr_tr+secondLine);
}
__m128i v_distx = _mm_srli_epi16(v_fx.vect, 12);
v_distx = _mm_shufflehi_epi16(v_distx, _MM_SHUFFLE(2,2,0,0));
v_distx = _mm_shufflelo_epi16(v_distx, _MM_SHUFFLE(2,2,0,0));
interpolate_4_pixels_16_sse2(tl.vect, tr.vect, bl.vect, br.vect, v_distx, v_disty, colorMask, v_256, b);
b+=4;
v_fx.vect = _mm_add_epi32(v_fx.vect, v_fdx);
}
fx = v_fx.i[0];
#elif defined(QT_ALWAYS_HAVE_NEON)
BILINEAR_DOWNSCALE_BOUNDS_PROLOG
const int16x8_t colorMask = vdupq_n_s16(0x00ff);
const int16x8_t invColorMask = vmvnq_s16(colorMask);
const int16x8_t v_256 = vdupq_n_s16(256);
const int16x8_t v_disty = vdupq_n_s16(disty);
const int16x8_t v_disty_ = vshlq_n_s16(v_disty, 4);
int32x4_t v_fdx = vdupq_n_s32(fdx*4);
ptrdiff_t secondLine = reinterpret_cast<const uint *>(s2) - reinterpret_cast<const uint *>(s1);
union Vect_buffer { int32x4_t vect; quint32 i[4]; };
Vect_buffer v_fx;
for (int i = 0; i < 4; i++) {
v_fx.i[i] = fx;
fx += fdx;
}
const int32x4_t v_ffff_mask = vdupq_n_s32(0x0000ffff);
while (b < boundedEnd) {
Vect_buffer tl, tr, bl, br;
Vect_buffer v_fx_shifted;
v_fx_shifted.vect = vshrq_n_s32(v_fx.vect, 16);
int32x4_t v_distx = vshrq_n_s32(vandq_s32(v_fx.vect, v_ffff_mask), 12);
for (int i = 0; i < 4; i++) {
int x1 = v_fx_shifted.i[i];
const uint *addr_tl = reinterpret_cast<const uint *>(s1) + x1;
const uint *addr_tr = addr_tl + 1;
tl.i[i] = *addr_tl;
tr.i[i] = *addr_tr;
bl.i[i] = *(addr_tl+secondLine);
br.i[i] = *(addr_tr+secondLine);
}
v_distx = vorrq_s32(v_distx, vshlq_n_s32(v_distx, 16));
interpolate_4_pixels_16_neon(vreinterpretq_s16_s32(tl.vect), vreinterpretq_s16_s32(tr.vect), vreinterpretq_s16_s32(bl.vect), vreinterpretq_s16_s32(br.vect), vreinterpretq_s16_s32(v_distx), v_disty, v_disty_, colorMask, invColorMask, v_256, b);
b+=4;
v_fx.vect = vaddq_s32(v_fx.vect, v_fdx);
}
fx = v_fx.i[0];
#endif
}
while (b < end) {
int x1 = (fx >> 16);
int x2;
@ -1493,32 +1237,11 @@ static const uint * QT_FASTCALL qt_fetch_conical_gradient(uint *buffer, const Op
return b;
}
#if defined(Q_CC_RVCT)
// Force ARM code generation for comp_func_* -methods
# pragma push
# pragma arm
# if defined(QT_HAVE_ARMV6)
static __forceinline void preload(const uint *start)
{
asm( "pld [start]" );
}
static const uint L2CacheLineLength = 32;
static const uint L2CacheLineLengthInInts = L2CacheLineLength/sizeof(uint);
# define PRELOAD_INIT(x) preload(x);
# define PRELOAD_INIT2(x,y) PRELOAD_INIT(x) PRELOAD_INIT(y)
# define PRELOAD_COND(x) if (((uint)&x[i])%L2CacheLineLength == 0) preload(&x[i] + L2CacheLineLengthInInts);
// Two consecutive preloads stall, so space them out a bit by using different modulus.
# define PRELOAD_COND2(x,y) if (((uint)&x[i])%L2CacheLineLength == 0) preload(&x[i] + L2CacheLineLengthInInts); \
if (((uint)&y[i])%L2CacheLineLength == 16) preload(&y[i] + L2CacheLineLengthInInts);
# endif // QT_HAVE_ARMV6
#endif // Q_CC_RVCT
#if !defined(Q_CC_RVCT) || !defined(QT_HAVE_ARMV6)
# define PRELOAD_INIT(x)
# define PRELOAD_INIT2(x,y)
# define PRELOAD_COND(x)
# define PRELOAD_COND2(x,y)
#endif
// TODO: get rid of those
#define PRELOAD_INIT(x)
#define PRELOAD_INIT2(x,y)
#define PRELOAD_COND(x)
#define PRELOAD_COND2(x,y)
/* The constant alpha factor describes an alpha factor that gets applied
to the result of the composition operation combining it with the destination.
@ -6918,218 +6641,6 @@ void qInitDrawhelperAsm()
qt_memfill32 = qt_memfill_template<quint32, quint32>;
qt_memfill16 = qt_memfill_quint16; //qt_memfill_template<quint16, quint16>;
CompositionFunction *functionForModeAsm = 0;
CompositionFunctionSolid *functionForModeSolidAsm = 0;
const uint features = qDetectCPUFeatures();
if (false) {
#ifdef QT_HAVE_SSE2
} else if (features & SSE2) {
qt_memfill32 = qt_memfill32_sse2;
qt_memfill16 = qt_memfill16_sse2;
qDrawHelper[QImage::Format_RGB32].bitmapBlit = qt_bitmapblit32_sse2;
qDrawHelper[QImage::Format_ARGB32].bitmapBlit = qt_bitmapblit32_sse2;
qDrawHelper[QImage::Format_ARGB32_Premultiplied].bitmapBlit = qt_bitmapblit32_sse2;
qDrawHelper[QImage::Format_RGB16].bitmapBlit = qt_bitmapblit16_sse2;
#endif
#ifdef QT_HAVE_SSE
} else if (features & SSE) {
// qt_memfill32 = qt_memfill32_sse;
qDrawHelper[QImage::Format_RGB16].bitmapBlit = qt_bitmapblit16_sse;
#ifdef QT_HAVE_3DNOW
if (features & MMX3DNOW) {
qt_memfill32 = qt_memfill32_sse3dnow;
qDrawHelper[QImage::Format_RGB16].bitmapBlit = qt_bitmapblit16_sse3dnow;
}
#endif
#endif // SSE
}
#ifdef QT_HAVE_MMX
if (features & MMX) {
functionForModeAsm = qt_functionForMode_MMX;
functionForModeSolidAsm = qt_functionForModeSolid_MMX;
qDrawHelper[QImage::Format_ARGB32_Premultiplied].blendColor = qt_blend_color_argb_mmx;
#ifdef QT_HAVE_3DNOW
if (features & MMX3DNOW) {
functionForModeAsm = qt_functionForMode_MMX3DNOW;
functionForModeSolidAsm = qt_functionForModeSolid_MMX3DNOW;
qDrawHelper[QImage::Format_ARGB32_Premultiplied].blendColor = qt_blend_color_argb_mmx3dnow;
}
#endif // 3DNOW
extern void qt_blend_rgb32_on_rgb32_mmx(uchar *destPixels, int dbpl,
const uchar *srcPixels, int sbpl,
int w, int h,
int const_alpha);
extern void qt_blend_argb32_on_argb32_mmx(uchar *destPixels, int dbpl,
const uchar *srcPixels, int sbpl,
int w, int h,
int const_alpha);
qBlendFunctions[QImage::Format_RGB32][QImage::Format_RGB32] = qt_blend_rgb32_on_rgb32_mmx;
qBlendFunctions[QImage::Format_ARGB32_Premultiplied][QImage::Format_RGB32] = qt_blend_rgb32_on_rgb32_mmx;
qBlendFunctions[QImage::Format_RGB32][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32_on_argb32_mmx;
qBlendFunctions[QImage::Format_ARGB32_Premultiplied][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32_on_argb32_mmx;
}
#endif // MMX
#ifdef QT_HAVE_SSE
if (features & SSE) {
extern void qt_blend_rgb32_on_rgb32_sse(uchar *destPixels, int dbpl,
const uchar *srcPixels, int sbpl,
int w, int h,
int const_alpha);
extern void qt_blend_argb32_on_argb32_sse(uchar *destPixels, int dbpl,
const uchar *srcPixels, int sbpl,
int w, int h,
int const_alpha);
qBlendFunctions[QImage::Format_RGB32][QImage::Format_RGB32] = qt_blend_rgb32_on_rgb32_sse;
qBlendFunctions[QImage::Format_ARGB32_Premultiplied][QImage::Format_RGB32] = qt_blend_rgb32_on_rgb32_sse;
qBlendFunctions[QImage::Format_RGB32][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32_on_argb32_sse;
qBlendFunctions[QImage::Format_ARGB32_Premultiplied][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32_on_argb32_sse;
}
#endif // SSE
#ifdef QT_HAVE_SSE2
if (features & SSE2) {
extern void qt_blend_rgb32_on_rgb32_sse2(uchar *destPixels, int dbpl,
const uchar *srcPixels, int sbpl,
int w, int h,
int const_alpha);
extern void qt_blend_argb32_on_argb32_sse2(uchar *destPixels, int dbpl,
const uchar *srcPixels, int sbpl,
int w, int h,
int const_alpha);
qBlendFunctions[QImage::Format_RGB32][QImage::Format_RGB32] = qt_blend_rgb32_on_rgb32_sse2;
qBlendFunctions[QImage::Format_ARGB32_Premultiplied][QImage::Format_RGB32] = qt_blend_rgb32_on_rgb32_sse2;
qBlendFunctions[QImage::Format_RGB32][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32_on_argb32_sse2;
qBlendFunctions[QImage::Format_ARGB32_Premultiplied][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32_on_argb32_sse2;
extern const uint * QT_FASTCALL qt_fetch_radial_gradient_sse2(uint *buffer, const Operator *op, const QSpanData *data,
int y, int x, int length);
qt_fetch_radial_gradient = qt_fetch_radial_gradient_sse2;
}
#ifdef QT_HAVE_SSSE3
if (features & SSSE3) {
extern void qt_blend_argb32_on_argb32_ssse3(uchar *destPixels, int dbpl,
const uchar *srcPixels, int sbpl,
int w, int h,
int const_alpha);
qBlendFunctions[QImage::Format_RGB32][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32_on_argb32_ssse3;
qBlendFunctions[QImage::Format_ARGB32_Premultiplied][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32_on_argb32_ssse3;
}
#endif // SSSE3
#endif // SSE2
#ifdef QT_HAVE_SSE
if (features & SSE) {
functionForModeAsm = qt_functionForMode_SSE;
functionForModeSolidAsm = qt_functionForModeSolid_SSE;
qDrawHelper[QImage::Format_ARGB32_Premultiplied].blendColor = qt_blend_color_argb_sse;
#ifdef QT_HAVE_3DNOW
if (features & MMX3DNOW) {
functionForModeAsm = qt_functionForMode_SSE3DNOW;
functionForModeSolidAsm = qt_functionForModeSolid_SSE3DNOW;
qDrawHelper[QImage::Format_ARGB32_Premultiplied].blendColor = qt_blend_color_argb_sse3dnow;
}
#endif // 3DNOW
#ifdef QT_HAVE_SSE2
if (features & SSE2) {
extern void QT_FASTCALL comp_func_SourceOver_sse2(uint *destPixels,
const uint *srcPixels,
int length,
uint const_alpha);
extern void QT_FASTCALL comp_func_solid_SourceOver_sse2(uint *destPixels, int length, uint color, uint const_alpha);
extern void QT_FASTCALL comp_func_Plus_sse2(uint *dst, const uint *src, int length, uint const_alpha);
extern void QT_FASTCALL comp_func_Source_sse2(uint *dst, const uint *src, int length, uint const_alpha);
functionForModeAsm[0] = comp_func_SourceOver_sse2;
functionForModeAsm[QPainter::CompositionMode_Source] = comp_func_Source_sse2;
functionForModeAsm[QPainter::CompositionMode_Plus] = comp_func_Plus_sse2;
functionForModeSolidAsm[0] = comp_func_solid_SourceOver_sse2;
}
#endif
}
#elif defined(QT_HAVE_SSE2)
// this is the special case when SSE2 is usable but MMX/SSE is not usable (e.g.: Windows x64 + visual studio)
if (features & SSE2) {
functionForModeAsm = qt_functionForMode_onlySSE2;
functionForModeSolidAsm = qt_functionForModeSolid_onlySSE2;
}
#endif
#ifdef QT_HAVE_IWMMXT
if (features & IWMMXT) {
functionForModeAsm = qt_functionForMode_IWMMXT;
functionForModeSolidAsm = qt_functionForModeSolid_IWMMXT;
qDrawHelper[QImage::Format_ARGB32_Premultiplied].blendColor = qt_blend_color_argb_iwmmxt;
}
#endif // IWMMXT
#if defined(QT_HAVE_ARM_SIMD)
qBlendFunctions[QImage::Format_RGB32][QImage::Format_RGB32] = qt_blend_rgb32_on_rgb32_arm_simd;
qBlendFunctions[QImage::Format_ARGB32_Premultiplied][QImage::Format_RGB32] = qt_blend_rgb32_on_rgb32_arm_simd;
qBlendFunctions[QImage::Format_RGB32][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32_on_argb32_arm_simd;
qBlendFunctions[QImage::Format_ARGB32_Premultiplied][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32_on_argb32_arm_simd;
#elif defined(QT_HAVE_NEON)
if (features & NEON) {
qBlendFunctions[QImage::Format_RGB32][QImage::Format_RGB32] = qt_blend_rgb32_on_rgb32_neon;
qBlendFunctions[QImage::Format_ARGB32_Premultiplied][QImage::Format_RGB32] = qt_blend_rgb32_on_rgb32_neon;
qBlendFunctions[QImage::Format_RGB32][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32_on_argb32_neon;
qBlendFunctions[QImage::Format_ARGB32_Premultiplied][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32_on_argb32_neon;
qBlendFunctions[QImage::Format_RGB16][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32_on_rgb16_neon;
qBlendFunctions[QImage::Format_ARGB32_Premultiplied][QImage::Format_RGB16] = qt_blend_rgb16_on_argb32_neon;
qBlendFunctions[QImage::Format_RGB16][QImage::Format_RGB16] = qt_blend_rgb16_on_rgb16_neon;
qScaleFunctions[QImage::Format_RGB16][QImage::Format_ARGB32_Premultiplied] = qt_scale_image_argb32_on_rgb16_neon;
qScaleFunctions[QImage::Format_RGB16][QImage::Format_RGB16] = qt_scale_image_rgb16_on_rgb16_neon;
qTransformFunctions[QImage::Format_RGB16][QImage::Format_ARGB32_Premultiplied] = qt_transform_image_argb32_on_rgb16_neon;
qTransformFunctions[QImage::Format_RGB16][QImage::Format_RGB16] = qt_transform_image_rgb16_on_rgb16_neon;
qDrawHelper[QImage::Format_RGB16].alphamapBlit = qt_alphamapblit_quint16_neon;
functionForMode_C[QPainter::CompositionMode_SourceOver] = qt_blend_argb32_on_argb32_scanline_neon;
functionForModeSolid_C[QPainter::CompositionMode_SourceOver] = comp_func_solid_SourceOver_neon;
functionForMode_C[QPainter::CompositionMode_Plus] = comp_func_Plus_neon;
destFetchProc[QImage::Format_RGB16] = qt_destFetchRGB16_neon;
destStoreProc[QImage::Format_RGB16] = qt_destStoreRGB16_neon;
qMemRotateFunctions[QImage::Format_RGB16][0] = qt_memrotate90_16_neon;
qMemRotateFunctions[QImage::Format_RGB16][2] = qt_memrotate270_16_neon;
qt_memfill32 = qt_memfill32_neon;
extern const uint * QT_FASTCALL qt_fetch_radial_gradient_neon(uint *buffer, const Operator *op, const QSpanData *data,
int y, int x, int length);
qt_fetch_radial_gradient = qt_fetch_radial_gradient_neon;
}
#endif
if (functionForModeSolidAsm) {
const int destinationMode = QPainter::CompositionMode_Destination;
functionForModeSolidAsm[destinationMode] = functionForModeSolid_C[destinationMode];
// use the default qdrawhelper implementation for the
// extended composition modes
for (int mode = 12; mode < 24; ++mode)
functionForModeSolidAsm[mode] = functionForModeSolid_C[mode];
functionForModeSolid = functionForModeSolidAsm;
}
if (functionForModeAsm)
functionForMode = functionForModeAsm;
qt_build_pow_tables();
}

View file

@ -1,115 +0,0 @@
/****************************************************************************
**
** Copyright (C) 2015 The Qt Company Ltd.
** Contact: http://www.qt.io/licensing/
**
** This file is part of the QtGui module of the Qt Toolkit.
**
** $QT_BEGIN_LICENSE:LGPL$
** Commercial License Usage
** Licensees holding valid commercial Qt licenses may use this file in
** accordance with the commercial license agreement provided with the
** Software or, alternatively, in accordance with the terms contained in
** a written agreement between you and The Qt Company. For licensing terms
** and conditions see http://www.qt.io/terms-conditions. For further
** information use the contact form at http://www.qt.io/contact-us.
**
** GNU Lesser General Public License Usage
** Alternatively, this file may be used under the terms of the GNU Lesser
** General Public License version 2.1 or version 3 as published by the Free
** Software Foundation and appearing in the file LICENSE.LGPLv21 and
** LICENSE.LGPLv3 included in the packaging of this file. Please review the
** following information to ensure the GNU Lesser General Public License
** requirements will be met: https://www.gnu.org/licenses/lgpl.html and
** http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
**
** As a special exception, The Qt Company gives you certain additional
** rights. These rights are described in The Qt Company LGPL Exception
** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
**
** GNU General Public License Usage
** Alternatively, this file may be used under the terms of the GNU
** General Public License version 3.0 as published by the Free Software
** Foundation and appearing in the file LICENSE.GPL included in the
** packaging of this file. Please review the following information to
** ensure the GNU General Public License version 3.0 requirements will be
** met: http://www.gnu.org/copyleft/gpl.html.
**
** $QT_END_LICENSE$
**
****************************************************************************/
#include "qdrawhelper_arm_simd_p.h"
#include <qpaintengine_raster_p.h>
#include <qblendfunctions_p.h>
#ifdef QT_HAVE_ARM_SIMD
// TODO: add GNU assembler instructions and support for other platforms.
// Default to C code for now
void qt_blend_argb32_on_argb32_arm_simd(uchar *destPixels, int dbpl,
const uchar *srcPixels, int sbpl,
int w, int h,
int const_alpha)
{
const uint *src = (const uint *) srcPixels;
uint *dst = (uint *) destPixels;
if (const_alpha == 256) {
for (int y=0; y<h; ++y) {
for (int x=0; x<w; ++x) {
uint s = src[x];
if (s >= 0xff000000)
dst[x] = s;
else if (s != 0)
dst[x] = s + BYTE_MUL(dst[x], qAlpha(~s));
}
dst = (quint32 *)(((uchar *) dst) + dbpl);
src = (const quint32 *)(((const uchar *) src) + sbpl);
}
} else if (const_alpha != 0) {
const_alpha = (const_alpha * 255) >> 8;
for (int y=0; y<h; ++y) {
for (int x=0; x<w; ++x) {
uint s = BYTE_MUL(src[x], const_alpha);
dst[x] = s + BYTE_MUL(dst[x], qAlpha(~s));
}
dst = (quint32 *)(((uchar *) dst) + dbpl);
src = (const quint32 *)(((const uchar *) src) + sbpl);
}
}
}
void qt_blend_rgb32_on_rgb32_arm_simd(uchar *destPixels, int dbpl,
const uchar *srcPixels, int sbpl,
int w, int h,
int const_alpha)
{
if (const_alpha != 256) {
qt_blend_argb32_on_argb32_arm_simd(destPixels, dbpl, srcPixels, sbpl, w, h, const_alpha);
return;
}
const uint *src = (const uint *) srcPixels;
uint *dst = (uint *) destPixels;
if (w <= 64) {
for (int y=0; y<h; ++y) {
qt_memconvert(dst, src, w);
dst = (quint32 *)(((uchar *) dst) + dbpl);
src = (const quint32 *)(((const uchar *) src) + sbpl);
}
} else {
int len = w * 4;
for (int y=0; y<h; ++y) {
memcpy(dst, src, len);
dst = (quint32 *)(((uchar *) dst) + dbpl);
src = (const quint32 *)(((const uchar *) src) + sbpl);
}
}
}
#endif // QT_HAVE_ARMV_SIMD

View file

@ -1,76 +0,0 @@
/****************************************************************************
**
** Copyright (C) 2015 The Qt Company Ltd.
** Contact: http://www.qt.io/licensing/
**
** This file is part of the QtGui module of the Qt Toolkit.
**
** $QT_BEGIN_LICENSE:LGPL$
** Commercial License Usage
** Licensees holding valid commercial Qt licenses may use this file in
** accordance with the commercial license agreement provided with the
** Software or, alternatively, in accordance with the terms contained in
** a written agreement between you and The Qt Company. For licensing terms
** and conditions see http://www.qt.io/terms-conditions. For further
** information use the contact form at http://www.qt.io/contact-us.
**
** GNU Lesser General Public License Usage
** Alternatively, this file may be used under the terms of the GNU Lesser
** General Public License version 2.1 or version 3 as published by the Free
** Software Foundation and appearing in the file LICENSE.LGPLv21 and
** LICENSE.LGPLv3 included in the packaging of this file. Please review the
** following information to ensure the GNU Lesser General Public License
** requirements will be met: https://www.gnu.org/licenses/lgpl.html and
** http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
**
** As a special exception, The Qt Company gives you certain additional
** rights. These rights are described in The Qt Company LGPL Exception
** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
**
** GNU General Public License Usage
** Alternatively, this file may be used under the terms of the GNU
** General Public License version 3.0 as published by the Free Software
** Foundation and appearing in the file LICENSE.GPL included in the
** packaging of this file. Please review the following information to
** ensure the GNU General Public License version 3.0 requirements will be
** met: http://www.gnu.org/copyleft/gpl.html.
**
** $QT_END_LICENSE$
**
****************************************************************************/
#ifndef QDRAWHELPER_ARM_SIMD_P_H
#define QDRAWHELPER_ARM_SIMD_P_H
//
// W A R N I N G
// -------------
//
// This file is not part of the Qt API. It exists purely as an
// implementation detail. This header file may change from version to
// version without notice, or even be removed.
//
// We mean it.
//
#include <qdrawhelper_p.h>
QT_BEGIN_NAMESPACE
#if defined(QT_HAVE_ARM_SIMD)
void qt_blend_argb32_on_argb32_arm_simd(uchar *destPixels, int dbpl,
const uchar *srcPixels, int sbpl,
int w, int h,
int const_alpha);
void qt_blend_rgb32_on_rgb32_arm_simd(uchar *destPixels, int dbpl,
const uchar *srcPixels, int sbpl,
int w, int h,
int const_alpha);
#endif // QT_HAVE_ARM_SIMD
QT_END_NAMESPACE
#endif // QDRAWHELPER_ARM_SIMD_P_H

View file

@ -1,151 +0,0 @@
/****************************************************************************
**
** Copyright (C) 2015 The Qt Company Ltd.
** Contact: http://www.qt.io/licensing/
**
** This file is part of the QtGui module of the Qt Toolkit.
**
** $QT_BEGIN_LICENSE:LGPL$
** Commercial License Usage
** Licensees holding valid commercial Qt licenses may use this file in
** accordance with the commercial license agreement provided with the
** Software or, alternatively, in accordance with the terms contained in
** a written agreement between you and The Qt Company. For licensing terms
** and conditions see http://www.qt.io/terms-conditions. For further
** information use the contact form at http://www.qt.io/contact-us.
**
** GNU Lesser General Public License Usage
** Alternatively, this file may be used under the terms of the GNU Lesser
** General Public License version 2.1 or version 3 as published by the Free
** Software Foundation and appearing in the file LICENSE.LGPLv21 and
** LICENSE.LGPLv3 included in the packaging of this file. Please review the
** following information to ensure the GNU Lesser General Public License
** requirements will be met: https://www.gnu.org/licenses/lgpl.html and
** http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
**
** As a special exception, The Qt Company gives you certain additional
** rights. These rights are described in The Qt Company LGPL Exception
** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
**
** GNU General Public License Usage
** Alternatively, this file may be used under the terms of the GNU
** General Public License version 3.0 as published by the Free Software
** Foundation and appearing in the file LICENSE.GPL included in the
** packaging of this file. Please review the following information to
** ensure the GNU General Public License version 3.0 requirements will be
** met: http://www.gnu.org/copyleft/gpl.html.
**
** $QT_END_LICENSE$
**
****************************************************************************/
#ifdef QT_HAVE_IWMMXT
#include <mmintrin.h>
#if defined(Q_OS_WINCE)
# include "qplatformdefs.h"
#endif
#if !defined(__IWMMXT__) && !defined(Q_OS_WINCE)
# include <xmmintrin.h>
#elif defined(Q_OS_WINCE_STD) && defined(_X86_)
# pragma warning(disable: 4391)
# include <xmmintrin.h>
#endif
#include <qdrawhelper_sse_p.h>
QT_BEGIN_NAMESPACE
#ifndef _MM_SHUFFLE
#define _MM_SHUFFLE(fp3,fp2,fp1,fp0) \
(((fp3) << 6) | ((fp2) << 4) | ((fp1) << 2) | (fp0))
#endif
struct QIWMMXTIntrinsics : public QMMXCommonIntrinsics
{
static inline m64 alpha(m64 x) {
return _mm_shuffle_pi16 (x, _MM_SHUFFLE(3, 3, 3, 3));
}
static inline m64 _load_alpha(uint x, const m64 &mmx_0x0000) {
m64 t = _mm_unpacklo_pi8(_mm_cvtsi32_si64(x), mmx_0x0000);
return _mm_shuffle_pi16(t, _MM_SHUFFLE(0, 0, 0, 0));
}
static inline void end() {
}
};
CompositionFunctionSolid qt_functionForModeSolid_IWMMXT[numCompositionFunctions] = {
comp_func_solid_SourceOver<QIWMMXTIntrinsics>,
comp_func_solid_DestinationOver<QIWMMXTIntrinsics>,
comp_func_solid_Clear<QIWMMXTIntrinsics>,
comp_func_solid_Source<QIWMMXTIntrinsics>,
0,
comp_func_solid_SourceIn<QIWMMXTIntrinsics>,
comp_func_solid_DestinationIn<QIWMMXTIntrinsics>,
comp_func_solid_SourceOut<QIWMMXTIntrinsics>,
comp_func_solid_DestinationOut<QIWMMXTIntrinsics>,
comp_func_solid_SourceAtop<QIWMMXTIntrinsics>,
comp_func_solid_DestinationAtop<QIWMMXTIntrinsics>,
comp_func_solid_XOR<QIWMMXTIntrinsics>,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // svg 1.2 modes
rasterop_solid_SourceOrDestination<QIWMMXTIntrinsics>,
rasterop_solid_SourceAndDestination<QIWMMXTIntrinsics>,
rasterop_solid_SourceXorDestination<QIWMMXTIntrinsics>,
rasterop_solid_NotSourceAndNotDestination<QIWMMXTIntrinsics>,
rasterop_solid_NotSourceOrNotDestination<QIWMMXTIntrinsics>,
rasterop_solid_NotSourceXorDestination<QIWMMXTIntrinsics>,
rasterop_solid_NotSource<QIWMMXTIntrinsics>,
rasterop_solid_NotSourceAndDestination<QIWMMXTIntrinsics>,
rasterop_solid_SourceAndNotDestination<QIWMMXTIntrinsics>
};
CompositionFunction qt_functionForMode_IWMMXT[] = {
comp_func_SourceOver<QIWMMXTIntrinsics>,
comp_func_DestinationOver<QIWMMXTIntrinsics>,
comp_func_Clear<QIWMMXTIntrinsics>,
comp_func_Source<QIWMMXTIntrinsics>,
comp_func_Destination,
comp_func_SourceIn<QIWMMXTIntrinsics>,
comp_func_DestinationIn<QIWMMXTIntrinsics>,
comp_func_SourceOut<QIWMMXTIntrinsics>,
comp_func_DestinationOut<QIWMMXTIntrinsics>,
comp_func_SourceAtop<QIWMMXTIntrinsics>,
comp_func_DestinationAtop<QIWMMXTIntrinsics>,
comp_func_XOR<QIWMMXTIntrinsics>,
comp_func_Plus,
comp_func_Multiply,
comp_func_Screen,
comp_func_Overlay,
comp_func_Darken,
comp_func_Lighten,
comp_func_ColorDodge,
comp_func_ColorBurn,
comp_func_HardLight,
comp_func_SoftLight,
comp_func_Difference,
comp_func_Exclusion,
rasterop_SourceOrDestination,
rasterop_SourceAndDestination,
rasterop_SourceXorDestination,
rasterop_NotSourceAndNotDestination,
rasterop_NotSourceOrNotDestination,
rasterop_NotSourceXorDestination,
rasterop_NotSource,
rasterop_NotSourceAndDestination,
rasterop_SourceAndNotDestination
};
void qt_blend_color_argb_iwmmxt(int count, const QSpan *spans, void *userData)
{
qt_blend_color_argb_x86<QIWMMXTIntrinsics>(count, spans, userData,
(CompositionFunctionSolid*)qt_functionForModeSolid_IWMMXT);
}
QT_END_NAMESPACE
#endif // QT_HAVE_IWMMXT

View file

@ -1,159 +0,0 @@
/****************************************************************************
**
** Copyright (C) 2015 The Qt Company Ltd.
** Contact: http://www.qt.io/licensing/
**
** This file is part of the QtGui module of the Qt Toolkit.
**
** $QT_BEGIN_LICENSE:LGPL$
** Commercial License Usage
** Licensees holding valid commercial Qt licenses may use this file in
** accordance with the commercial license agreement provided with the
** Software or, alternatively, in accordance with the terms contained in
** a written agreement between you and The Qt Company. For licensing terms
** and conditions see http://www.qt.io/terms-conditions. For further
** information use the contact form at http://www.qt.io/contact-us.
**
** GNU Lesser General Public License Usage
** Alternatively, this file may be used under the terms of the GNU Lesser
** General Public License version 2.1 or version 3 as published by the Free
** Software Foundation and appearing in the file LICENSE.LGPLv21 and
** LICENSE.LGPLv3 included in the packaging of this file. Please review the
** following information to ensure the GNU Lesser General Public License
** requirements will be met: https://www.gnu.org/licenses/lgpl.html and
** http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
**
** As a special exception, The Qt Company gives you certain additional
** rights. These rights are described in The Qt Company LGPL Exception
** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
**
** GNU General Public License Usage
** Alternatively, this file may be used under the terms of the GNU
** General Public License version 3.0 as published by the Free Software
** Foundation and appearing in the file LICENSE.GPL included in the
** packaging of this file. Please review the following information to
** ensure the GNU General Public License version 3.0 requirements will be
** met: http://www.gnu.org/copyleft/gpl.html.
**
** $QT_END_LICENSE$
**
****************************************************************************/
#include <qdrawhelper_p.h>
#if defined(QT_HAVE_MMX)
#include <qdrawhelper_mmx_p.h>
QT_BEGIN_NAMESPACE
CompositionFunctionSolid qt_functionForModeSolid_MMX[numCompositionFunctions] = {
comp_func_solid_SourceOver<QMMXIntrinsics>,
comp_func_solid_DestinationOver<QMMXIntrinsics>,
comp_func_solid_Clear<QMMXIntrinsics>,
comp_func_solid_Source<QMMXIntrinsics>,
0,
comp_func_solid_SourceIn<QMMXIntrinsics>,
comp_func_solid_DestinationIn<QMMXIntrinsics>,
comp_func_solid_SourceOut<QMMXIntrinsics>,
comp_func_solid_DestinationOut<QMMXIntrinsics>,
comp_func_solid_SourceAtop<QMMXIntrinsics>,
comp_func_solid_DestinationAtop<QMMXIntrinsics>,
comp_func_solid_XOR<QMMXIntrinsics>,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // svg 1.2 modes
rasterop_solid_SourceOrDestination<QMMXIntrinsics>,
rasterop_solid_SourceAndDestination<QMMXIntrinsics>,
rasterop_solid_SourceXorDestination<QMMXIntrinsics>,
rasterop_solid_NotSourceAndNotDestination<QMMXIntrinsics>,
rasterop_solid_NotSourceOrNotDestination<QMMXIntrinsics>,
rasterop_solid_NotSourceXorDestination<QMMXIntrinsics>,
rasterop_solid_NotSource<QMMXIntrinsics>,
rasterop_solid_NotSourceAndDestination<QMMXIntrinsics>,
rasterop_solid_SourceAndNotDestination<QMMXIntrinsics>
};
CompositionFunction qt_functionForMode_MMX[numCompositionFunctions] = {
comp_func_SourceOver<QMMXIntrinsics>,
comp_func_DestinationOver<QMMXIntrinsics>,
comp_func_Clear<QMMXIntrinsics>,
comp_func_Source<QMMXIntrinsics>,
comp_func_Destination,
comp_func_SourceIn<QMMXIntrinsics>,
comp_func_DestinationIn<QMMXIntrinsics>,
comp_func_SourceOut<QMMXIntrinsics>,
comp_func_DestinationOut<QMMXIntrinsics>,
comp_func_SourceAtop<QMMXIntrinsics>,
comp_func_DestinationAtop<QMMXIntrinsics>,
comp_func_XOR<QMMXIntrinsics>,
comp_func_Plus,
comp_func_Multiply,
comp_func_Screen,
comp_func_Overlay,
comp_func_Darken,
comp_func_Lighten,
comp_func_ColorDodge,
comp_func_ColorBurn,
comp_func_HardLight,
comp_func_SoftLight,
comp_func_Difference,
comp_func_Exclusion,
rasterop_SourceOrDestination,
rasterop_SourceAndDestination,
rasterop_SourceXorDestination,
rasterop_NotSourceAndNotDestination,
rasterop_NotSourceOrNotDestination,
rasterop_NotSourceXorDestination,
rasterop_NotSource,
rasterop_NotSourceAndDestination,
rasterop_SourceAndNotDestination
};
void qt_blend_color_argb_mmx(int count, const QSpan *spans, void *userData)
{
qt_blend_color_argb_x86<QMMXIntrinsics>(count, spans, userData,
(CompositionFunctionSolid*)qt_functionForModeSolid_MMX);
}
void qt_blend_argb32_on_argb32_mmx(uchar *destPixels, int dbpl,
const uchar *srcPixels, int sbpl,
int w, int h,
int const_alpha)
{
const uint *src = (const uint *) srcPixels;
uint *dst = (uint *) destPixels;
uint ca = const_alpha - 1;
for (int y=0; y<h; ++y) {
comp_func_SourceOver<QMMXIntrinsics>(dst, src, w, ca);
dst = (quint32 *)(((uchar *) dst) + dbpl);
src = (const quint32 *)(((const uchar *) src) + sbpl);
}
}
void qt_blend_rgb32_on_rgb32_mmx(uchar *destPixels, int dbpl,
const uchar *srcPixels, int sbpl,
int w, int h,
int const_alpha)
{
const uint *src = (const uint *) srcPixels;
uint *dst = (uint *) destPixels;
uint ca = const_alpha - 1;
for (int y=0; y<h; ++y) {
comp_func_Source<QMMXIntrinsics>(dst, src, w, ca);
dst = (quint32 *)(((uchar *) dst) + dbpl);
src = (const quint32 *)(((const uchar *) src) + sbpl);
}
}
QT_END_NAMESPACE
#endif // QT_HAVE_MMX

View file

@ -1,130 +0,0 @@
/****************************************************************************
**
** Copyright (C) 2015 The Qt Company Ltd.
** Contact: http://www.qt.io/licensing/
**
** This file is part of the QtGui module of the Qt Toolkit.
**
** $QT_BEGIN_LICENSE:LGPL$
** Commercial License Usage
** Licensees holding valid commercial Qt licenses may use this file in
** accordance with the commercial license agreement provided with the
** Software or, alternatively, in accordance with the terms contained in
** a written agreement between you and The Qt Company. For licensing terms
** and conditions see http://www.qt.io/terms-conditions. For further
** information use the contact form at http://www.qt.io/contact-us.
**
** GNU Lesser General Public License Usage
** Alternatively, this file may be used under the terms of the GNU Lesser
** General Public License version 2.1 or version 3 as published by the Free
** Software Foundation and appearing in the file LICENSE.LGPLv21 and
** LICENSE.LGPLv3 included in the packaging of this file. Please review the
** following information to ensure the GNU Lesser General Public License
** requirements will be met: https://www.gnu.org/licenses/lgpl.html and
** http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
**
** As a special exception, The Qt Company gives you certain additional
** rights. These rights are described in The Qt Company LGPL Exception
** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
**
** GNU General Public License Usage
** Alternatively, this file may be used under the terms of the GNU
** General Public License version 3.0 as published by the Free Software
** Foundation and appearing in the file LICENSE.GPL included in the
** packaging of this file. Please review the following information to
** ensure the GNU General Public License version 3.0 requirements will be
** met: http://www.gnu.org/copyleft/gpl.html.
**
** $QT_END_LICENSE$
**
****************************************************************************/
#include <qdrawhelper_x86_p.h>
#ifdef QT_HAVE_3DNOW
#include <qdrawhelper_mmx_p.h>
#include <mm3dnow.h>
QT_BEGIN_NAMESPACE
struct QMMX3DNOWIntrinsics : public QMMXCommonIntrinsics
{
static inline void end() {
_m_femms();
}
};
CompositionFunctionSolid qt_functionForModeSolid_MMX3DNOW[numCompositionFunctions] = {
comp_func_solid_SourceOver<QMMX3DNOWIntrinsics>,
comp_func_solid_DestinationOver<QMMX3DNOWIntrinsics>,
comp_func_solid_Clear<QMMX3DNOWIntrinsics>,
comp_func_solid_Source<QMMX3DNOWIntrinsics>,
0,
comp_func_solid_SourceIn<QMMX3DNOWIntrinsics>,
comp_func_solid_DestinationIn<QMMX3DNOWIntrinsics>,
comp_func_solid_SourceOut<QMMX3DNOWIntrinsics>,
comp_func_solid_DestinationOut<QMMX3DNOWIntrinsics>,
comp_func_solid_SourceAtop<QMMX3DNOWIntrinsics>,
comp_func_solid_DestinationAtop<QMMX3DNOWIntrinsics>,
comp_func_solid_XOR<QMMX3DNOWIntrinsics>,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // svg 1.2 modes
rasterop_solid_SourceOrDestination<QMMX3DNOWIntrinsics>,
rasterop_solid_SourceAndDestination<QMMX3DNOWIntrinsics>,
rasterop_solid_SourceXorDestination<QMMX3DNOWIntrinsics>,
rasterop_solid_NotSourceAndNotDestination<QMMX3DNOWIntrinsics>,
rasterop_solid_NotSourceOrNotDestination<QMMX3DNOWIntrinsics>,
rasterop_solid_NotSourceXorDestination<QMMX3DNOWIntrinsics>,
rasterop_solid_NotSource<QMMX3DNOWIntrinsics>,
rasterop_solid_NotSourceAndDestination<QMMX3DNOWIntrinsics>,
rasterop_solid_SourceAndNotDestination<QMMX3DNOWIntrinsics>
};
CompositionFunction qt_functionForMode_MMX3DNOW[numCompositionFunctions] = {
comp_func_SourceOver<QMMX3DNOWIntrinsics>,
comp_func_DestinationOver<QMMX3DNOWIntrinsics>,
comp_func_Clear<QMMX3DNOWIntrinsics>,
comp_func_Source<QMMX3DNOWIntrinsics>,
comp_func_Destination,
comp_func_SourceIn<QMMX3DNOWIntrinsics>,
comp_func_DestinationIn<QMMX3DNOWIntrinsics>,
comp_func_SourceOut<QMMX3DNOWIntrinsics>,
comp_func_DestinationOut<QMMX3DNOWIntrinsics>,
comp_func_SourceAtop<QMMX3DNOWIntrinsics>,
comp_func_DestinationAtop<QMMX3DNOWIntrinsics>,
comp_func_XOR<QMMX3DNOWIntrinsics>,
comp_func_Plus,
comp_func_Multiply,
comp_func_Screen,
comp_func_Overlay,
comp_func_Darken,
comp_func_Lighten,
comp_func_ColorDodge,
comp_func_ColorBurn,
comp_func_HardLight,
comp_func_SoftLight,
comp_func_Difference,
comp_func_Exclusion,
rasterop_SourceOrDestination,
rasterop_SourceAndDestination,
rasterop_SourceXorDestination,
rasterop_NotSourceAndNotDestination,
rasterop_NotSourceOrNotDestination,
rasterop_NotSourceXorDestination,
rasterop_NotSource,
rasterop_NotSourceAndDestination,
rasterop_SourceAndNotDestination
};
void qt_blend_color_argb_mmx3dnow(int count, const QSpan *spans, void *userData)
{
qt_blend_color_argb_x86<QMMX3DNOWIntrinsics>(count, spans, userData,
(CompositionFunctionSolid*)qt_functionForModeSolid_MMX3DNOW);
}
QT_END_NAMESPACE
#endif // QT_HAVE_3DNOW

View file

@ -1,892 +0,0 @@
/****************************************************************************
**
** Copyright (C) 2015 The Qt Company Ltd.
** Contact: http://www.qt.io/licensing/
**
** This file is part of the QtGui module of the Qt Toolkit.
**
** $QT_BEGIN_LICENSE:LGPL$
** Commercial License Usage
** Licensees holding valid commercial Qt licenses may use this file in
** accordance with the commercial license agreement provided with the
** Software or, alternatively, in accordance with the terms contained in
** a written agreement between you and The Qt Company. For licensing terms
** and conditions see http://www.qt.io/terms-conditions. For further
** information use the contact form at http://www.qt.io/contact-us.
**
** GNU Lesser General Public License Usage
** Alternatively, this file may be used under the terms of the GNU Lesser
** General Public License version 2.1 or version 3 as published by the Free
** Software Foundation and appearing in the file LICENSE.LGPLv21 and
** LICENSE.LGPLv3 included in the packaging of this file. Please review the
** following information to ensure the GNU Lesser General Public License
** requirements will be met: https://www.gnu.org/licenses/lgpl.html and
** http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
**
** As a special exception, The Qt Company gives you certain additional
** rights. These rights are described in The Qt Company LGPL Exception
** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
**
** GNU General Public License Usage
** Alternatively, this file may be used under the terms of the GNU
** General Public License version 3.0 as published by the Free Software
** Foundation and appearing in the file LICENSE.GPL included in the
** packaging of this file. Please review the following information to
** ensure the GNU General Public License version 3.0 requirements will be
** met: http://www.gnu.org/copyleft/gpl.html.
**
** $QT_END_LICENSE$
**
****************************************************************************/
#ifndef QDRAWHELPER_MMX_P_H
#define QDRAWHELPER_MMX_P_H
//
// W A R N I N G
// -------------
//
// This file is not part of the Qt API. It exists purely as an
// implementation detail. This header file may change from version to
// version without notice, or even be removed.
//
// We mean it.
//
#include <qdrawhelper_p.h>
#include <qdrawhelper_x86_p.h>
#include <qpaintengine_raster_p.h>
#ifdef QT_HAVE_MMX
#include <mmintrin.h>
#endif
#define C_FF const m64 mmx_0x00ff = _mm_set1_pi16(0xff)
#define C_80 const m64 mmx_0x0080 = _mm_set1_pi16(0x80)
#define C_00 const m64 mmx_0x0000 = _mm_setzero_si64()
#ifdef Q_CC_MSVC
# pragma warning(disable: 4799) // No EMMS at end of function
#endif
typedef __m64 m64;
QT_BEGIN_NAMESPACE
struct QMMXCommonIntrinsics
{
static inline m64 alpha(m64 x) {
x = _mm_unpackhi_pi16(x, x);
x = _mm_unpackhi_pi16(x, x);
return x;
}
static inline m64 _negate(const m64 &x, const m64 &mmx_0x00ff) {
return _mm_xor_si64(x, mmx_0x00ff);
}
static inline m64 add(const m64 &a, const m64 &b) {
return _mm_adds_pu16 (a, b);
}
static inline m64 _byte_mul(const m64 &a, const m64 &b,
const m64 &mmx_0x0080)
{
m64 res = _mm_mullo_pi16(a, b);
res = _mm_adds_pu16(res, mmx_0x0080);
res = _mm_adds_pu16(res, _mm_srli_pi16 (res, 8));
return _mm_srli_pi16(res, 8);
}
static inline m64 interpolate_pixel_256(const m64 &x, const m64 &a,
const m64 &y, const m64 &b)
{
m64 res = _mm_adds_pu16(_mm_mullo_pi16(x, a), _mm_mullo_pi16(y, b));
return _mm_srli_pi16(res, 8);
}
static inline m64 _interpolate_pixel_255(const m64 &x, const m64 &a,
const m64 &y, const m64 &b,
const m64 &mmx_0x0080)
{
m64 res = _mm_adds_pu16(_mm_mullo_pi16(x, a), _mm_mullo_pi16(y, b));
res = _mm_adds_pu16(res, mmx_0x0080);
res = _mm_adds_pu16(res, _mm_srli_pi16 (res, 8));
return _mm_srli_pi16(res, 8);
}
static inline m64 _premul(m64 x, const m64 &mmx_0x0080) {
m64 a = alpha(x);
return _byte_mul(x, a, mmx_0x0080);
}
static inline m64 _load(uint x, const m64 &mmx_0x0000) {
return _mm_unpacklo_pi8(_mm_cvtsi32_si64(x), mmx_0x0000);
}
static inline m64 _load_alpha(uint x, const m64 &) {
x |= (x << 16);
return _mm_set1_pi32(x);
}
static inline uint _store(const m64 &x, const m64 &mmx_0x0000) {
return _mm_cvtsi64_si32(_mm_packs_pu16(x, mmx_0x0000));
}
};
#define negate(x) _negate(x, mmx_0x00ff)
#define byte_mul(a, b) _byte_mul(a, b, mmx_0x0080)
#define interpolate_pixel_255(x, a, y, b) _interpolate_pixel_255(x, a, y, b, mmx_0x0080)
#define premul(x) _premul(x, mmx_0x0080)
#define load(x) _load(x, mmx_0x0000)
#define load_alpha(x) _load_alpha(x, mmx_0x0000)
#define store(x) _store(x, mmx_0x0000)
/*
result = 0
d = d * cia
*/
#define comp_func_Clear_impl(dest, length, const_alpha)\
{\
if (const_alpha == 255) {\
qt_memfill(static_cast<quint32*>(dest), quint32(0), length);\
} else {\
C_FF; C_80; C_00;\
m64 ia = MM::negate(MM::load_alpha(const_alpha));\
for (int i = 0; i < length; ++i) {\
dest[i] = MM::store(MM::byte_mul(MM::load(dest[i]), ia));\
}\
MM::end();\
}\
}
template <class MM>
static void QT_FASTCALL comp_func_solid_Clear(uint *dest, int length, uint, uint const_alpha)
{
comp_func_Clear_impl(dest, length, const_alpha);
}
template <class MM>
static void QT_FASTCALL comp_func_Clear(uint *dest, const uint *, int length, uint const_alpha)
{
comp_func_Clear_impl(dest, length, const_alpha);
}
/*
result = s
dest = s * ca + d * cia
*/
template <class MM>
static void QT_FASTCALL comp_func_solid_Source(uint *dest, int length, uint src, uint const_alpha)
{
if (const_alpha == 255) {
qt_memfill(static_cast<quint32*>(dest), quint32(src), length);
} else {
C_FF; C_80; C_00;
const m64 a = MM::load_alpha(const_alpha);
const m64 ia = MM::negate(a);
const m64 s = MM::byte_mul(MM::load(src), a);
for (int i = 0; i < length; ++i) {
dest[i] = MM::store(MM::add(s, MM::byte_mul(MM::load(dest[i]), ia)));
}
MM::end();
}
}
template <class MM>
static void QT_FASTCALL comp_func_Source(uint *dest, const uint *src, int length, uint const_alpha)
{
if (const_alpha == 255) {
::memcpy(dest, src, length * sizeof(uint));
} else {
C_FF; C_80; C_00;
const m64 a = MM::load_alpha(const_alpha);
const m64 ia = MM::negate(a);
for (int i = 0; i < length; ++i)
dest[i] = MM::store(MM::interpolate_pixel_255(MM::load(src[i]), a,
MM::load(dest[i]), ia));
}
MM::end();
}
/*
result = s + d * sia
dest = (s + d * sia) * ca + d * cia
= s * ca + d * (sia * ca + cia)
= s * ca + d * (1 - sa*ca)
*/
template <class MM>
static void QT_FASTCALL comp_func_solid_SourceOver(uint *dest, int length, uint src, uint const_alpha)
{
if ((const_alpha & qAlpha(src)) == 255) {
qt_memfill(static_cast<quint32*>(dest), quint32(src), length);
} else {
C_FF; C_80; C_00;
m64 s = MM::load(src);
if (const_alpha != 255) {
m64 ca = MM::load_alpha(const_alpha);
s = MM::byte_mul(s, ca);
}
m64 a = MM::negate(MM::alpha(s));
for (int i = 0; i < length; ++i)
dest[i] = MM::store(MM::add(s, MM::byte_mul(MM::load(dest[i]), a)));
MM::end();
}
}
template <class MM>
static void QT_FASTCALL comp_func_SourceOver(uint *dest, const uint *src, int length, uint const_alpha)
{
C_FF; C_80; C_00;
if (const_alpha == 255) {
for (int i = 0; i < length; ++i) {
const uint alphaMaskedSource = 0xff000000 & src[i];
if (alphaMaskedSource == 0)
continue;
if (alphaMaskedSource == 0xff000000) {
dest[i] = src[i];
} else {
m64 s = MM::load(src[i]);
m64 ia = MM::negate(MM::alpha(s));
dest[i] = MM::store(MM::add(s, MM::byte_mul(MM::load(dest[i]), ia)));
}
}
} else {
m64 ca = MM::load_alpha(const_alpha);
for (int i = 0; i < length; ++i) {
if ((0xff000000 & src[i]) == 0)
continue;
m64 s = MM::byte_mul(MM::load(src[i]), ca);
m64 ia = MM::negate(MM::alpha(s));
dest[i] = MM::store(MM::add(s, MM::byte_mul(MM::load(dest[i]), ia)));
}
}
MM::end();
}
/*
result = d + s * dia
dest = (d + s * dia) * ca + d * cia
= d + s * dia * ca
*/
template <class MM>
static void QT_FASTCALL comp_func_solid_DestinationOver(uint *dest, int length, uint src, uint const_alpha)
{
C_FF; C_80; C_00;
m64 s = MM::load(src);
if (const_alpha != 255)
s = MM::byte_mul(s, MM::load_alpha(const_alpha));
for (int i = 0; i < length; ++i) {
m64 d = MM::load(dest[i]);
m64 dia = MM::negate(MM::alpha(d));
dest[i] = MM::store(MM::add(d, MM::byte_mul(s, dia)));
}
MM::end();
}
template <class MM>
static void QT_FASTCALL comp_func_DestinationOver(uint *dest, const uint *src, int length, uint const_alpha)
{
C_FF; C_80; C_00;
if (const_alpha == 255) {
for (int i = 0; i < length; ++i) {
m64 d = MM::load(dest[i]);
m64 ia = MM::negate(MM::alpha(d));
dest[i] = MM::store(MM::add(d, MM::byte_mul(MM::load(src[i]), ia)));
}
} else {
m64 ca = MM::load_alpha(const_alpha);
for (int i = 0; i < length; ++i) {
m64 d = MM::load(dest[i]);
m64 dia = MM::negate(MM::alpha(d));
dia = MM::byte_mul(dia, ca);
dest[i] = MM::store(MM::add(d, MM::byte_mul(MM::load(src[i]), dia)));
}
}
MM::end();
}
/*
result = s * da
dest = s * da * ca + d * cia
*/
template <class MM>
static void QT_FASTCALL comp_func_solid_SourceIn(uint *dest, int length, uint src, uint const_alpha)
{
C_80; C_00;
if (const_alpha == 255) {
m64 s = MM::load(src);
for (int i = 0; i < length; ++i) {
m64 da = MM::alpha(MM::load(dest[i]));
dest[i] = MM::store(MM::byte_mul(s, da));
}
} else {
C_FF;
m64 s = MM::load(src);
m64 ca = MM::load_alpha(const_alpha);
s = MM::byte_mul(s, ca);
m64 cia = MM::negate(ca);
for (int i = 0; i < length; ++i) {
m64 d = MM::load(dest[i]);
dest[i] = MM::store(MM::interpolate_pixel_255(s, MM::alpha(d), d, cia));
}
}
MM::end();
}
template <class MM>
static void QT_FASTCALL comp_func_SourceIn(uint *dest, const uint *src, int length, uint const_alpha)
{
C_FF; C_80; C_00;
if (const_alpha == 255) {
for (int i = 0; i < length; ++i) {
m64 a = MM::alpha(MM::load(dest[i]));
dest[i] = MM::store(MM::byte_mul(MM::load(src[i]), a));
}
} else {
m64 ca = MM::load_alpha(const_alpha);
m64 cia = MM::negate(ca);
for (int i = 0; i < length; ++i) {
m64 d = MM::load(dest[i]);
m64 da = MM::byte_mul(MM::alpha(d), ca);
dest[i] = MM::store(MM::interpolate_pixel_255(
MM::load(src[i]), da, d, cia));
}
}
MM::end();
}
/*
result = d * sa
dest = d * sa * ca + d * cia
= d * (sa * ca + cia)
*/
template <class MM>
static void QT_FASTCALL comp_func_solid_DestinationIn(uint *dest, int length, uint src, uint const_alpha)
{
C_80; C_00;
m64 a = MM::alpha(MM::load(src));
if (const_alpha != 255) {
C_FF;
m64 ca = MM::load_alpha(const_alpha);
m64 cia = MM::negate(ca);
a = MM::byte_mul(a, ca);
a = MM::add(a, cia);
}
for (int i = 0; i < length; ++i)
dest[i] = MM::store(MM::byte_mul(MM::load(dest[i]), a));
MM::end();
}
template <class MM>
static void QT_FASTCALL comp_func_DestinationIn(uint *dest, const uint *src, int length, uint const_alpha)
{
C_FF; C_80; C_00;
if (const_alpha == 255) {
for (int i = 0; i < length; ++i) {
m64 a = MM::alpha(MM::load(src[i]));
dest[i] = MM::store(MM::byte_mul(MM::load(dest[i]), a));
}
} else {
m64 ca = MM::load_alpha(const_alpha);
m64 cia = MM::negate(ca);
for (int i = 0; i < length; ++i) {
m64 d = MM::load(dest[i]);
m64 a = MM::alpha(MM::load(src[i]));
a = MM::byte_mul(a, ca);
a = MM::add(a, cia);
dest[i] = MM::store(MM::byte_mul(d, a));
}
}
MM::end();
}
/*
result = s * dia
dest = s * dia * ca + d * cia
*/
template <class MM>
static void QT_FASTCALL comp_func_solid_SourceOut(uint *dest, int length, uint src, uint const_alpha)
{
C_FF; C_80; C_00;
m64 s = MM::load(src);
if (const_alpha == 255) {
for (int i = 0; i < length; ++i) {
m64 dia = MM::negate(MM::alpha(MM::load(dest[i])));
dest[i] = MM::store(MM::byte_mul(s, dia));
}
} else {
m64 ca = MM::load_alpha(const_alpha);
m64 cia = MM::negate(ca);
s = MM::byte_mul(s, ca);
for (int i = 0; i < length; ++i) {
m64 d = MM::load(dest[i]);
dest[i] = MM::store(MM::interpolate_pixel_255(s, MM::negate(MM::alpha(d)), d, cia));
}
}
MM::end();
}
template <class MM>
static void QT_FASTCALL comp_func_SourceOut(uint *dest, const uint *src, int length, uint const_alpha)
{
C_FF; C_80; C_00;
if (const_alpha == 255) {
for (int i = 0; i < length; ++i) {
m64 ia = MM::negate(MM::alpha(MM::load(dest[i])));
dest[i] = MM::store(MM::byte_mul(MM::load(src[i]), ia));
}
} else {
m64 ca = MM::load_alpha(const_alpha);
m64 cia = MM::negate(ca);
for (int i = 0; i < length; ++i) {
m64 d = MM::load(dest[i]);
m64 dia = MM::byte_mul(MM::negate(MM::alpha(d)), ca);
dest[i] = MM::store(MM::interpolate_pixel_255(MM::load(src[i]), dia, d, cia));
}
}
MM::end();
}
/*
result = d * sia
dest = d * sia * ca + d * cia
= d * (sia * ca + cia)
*/
template <class MM>
static void QT_FASTCALL comp_func_solid_DestinationOut(uint *dest, int length, uint src, uint const_alpha)
{
C_FF; C_80; C_00;
m64 a = MM::negate(MM::alpha(MM::load(src)));
if (const_alpha != 255) {
m64 ca = MM::load_alpha(const_alpha);
a = MM::byte_mul(a, ca);
a = MM::add(a, MM::negate(ca));
}
for (int i = 0; i < length; ++i)
dest[i] = MM::store(MM::byte_mul(MM::load(dest[i]), a));
MM::end();
}
template <class MM>
static void QT_FASTCALL comp_func_DestinationOut(uint *dest, const uint *src, int length, uint const_alpha)
{
C_FF; C_80; C_00;
if (const_alpha == 255) {
for (int i = 0; i < length; ++i) {
m64 a = MM::negate(MM::alpha(MM::load(src[i])));
dest[i] = MM::store(MM::byte_mul(MM::load(dest[i]), a));
}
} else {
m64 ca = MM::load_alpha(const_alpha);
m64 cia = MM::negate(ca);
for (int i = 0; i < length; ++i) {
m64 d = MM::load(dest[i]);
m64 a = MM::negate(MM::alpha(MM::load(src[i])));
a = MM::byte_mul(a, ca);
a = MM::add(a, cia);
dest[i] = MM::store(MM::byte_mul(d, a));
}
}
MM::end();
}
/*
result = s*da + d*sia
dest = s*da*ca + d*sia*ca + d *cia
= s*ca * da + d * (sia*ca + cia)
= s*ca * da + d * (1 - sa*ca)
*/
template <class MM>
static void QT_FASTCALL comp_func_solid_SourceAtop(uint *dest, int length, uint src, uint const_alpha)
{
C_FF; C_80; C_00;
m64 s = MM::load(src);
if (const_alpha != 255) {
m64 ca = MM::load_alpha(const_alpha);
s = MM::byte_mul(s, ca);
}
m64 a = MM::negate(MM::alpha(s));
for (int i = 0; i < length; ++i) {
m64 d = MM::load(dest[i]);
dest[i] = MM::store(MM::interpolate_pixel_255(s, MM::alpha(d), d, a));
}
MM::end();
}
template <class MM>
static void QT_FASTCALL comp_func_SourceAtop(uint *dest, const uint *src, int length, uint const_alpha)
{
C_FF; C_80; C_00;
if (const_alpha == 255) {
for (int i = 0; i < length; ++i) {
m64 s = MM::load(src[i]);
m64 d = MM::load(dest[i]);
dest[i] = MM::store(MM::interpolate_pixel_255(s, MM::alpha(d), d,
MM::negate(MM::alpha(s))));
}
} else {
m64 ca = MM::load_alpha(const_alpha);
for (int i = 0; i < length; ++i) {
m64 s = MM::load(src[i]);
s = MM::byte_mul(s, ca);
m64 d = MM::load(dest[i]);
dest[i] = MM::store(MM::interpolate_pixel_255(s, MM::alpha(d), d,
MM::negate(MM::alpha(s))));
}
}
MM::end();
}
/*
result = d*sa + s*dia
dest = d*sa*ca + s*dia*ca + d *cia
= s*ca * dia + d * (sa*ca + cia)
*/
template <class MM>
static void QT_FASTCALL comp_func_solid_DestinationAtop(uint *dest, int length, uint src, uint const_alpha)
{
C_FF; C_80; C_00;
m64 s = MM::load(src);
m64 a = MM::alpha(s);
if (const_alpha != 255) {
m64 ca = MM::load_alpha(const_alpha);
s = MM::byte_mul(s, ca);
a = MM::alpha(s);
a = MM::add(a, MM::negate(ca));
}
for (int i = 0; i < length; ++i) {
m64 d = MM::load(dest[i]);
dest[i] = MM::store(MM::interpolate_pixel_255(s, MM::negate(MM::alpha(d)), d, a));
}
MM::end();
}
template <class MM>
static void QT_FASTCALL comp_func_DestinationAtop(uint *dest, const uint *src, int length, uint const_alpha)
{
C_FF; C_80; C_00;
if (const_alpha == 255) {
for (int i = 0; i < length; ++i) {
m64 s = MM::load(src[i]);
m64 d = MM::load(dest[i]);
dest[i] = MM::store(MM::interpolate_pixel_255(d, MM::alpha(s), s,
MM::negate(MM::alpha(d))));
}
} else {
m64 ca = MM::load_alpha(const_alpha);
for (int i = 0; i < length; ++i) {
m64 s = MM::load(src[i]);
s = MM::byte_mul(s, ca);
m64 d = MM::load(dest[i]);
m64 a = MM::alpha(s);
a = MM::add(a, MM::negate(ca));
dest[i] = MM::store(MM::interpolate_pixel_255(s, MM::negate(MM::alpha(d)),
d, a));
}
}
MM::end();
}
/*
result = d*sia + s*dia
dest = d*sia*ca + s*dia*ca + d *cia
= s*ca * dia + d * (sia*ca + cia)
= s*ca * dia + d * (1 - sa*ca)
*/
template <class MM>
static void QT_FASTCALL comp_func_solid_XOR(uint *dest, int length, uint src, uint const_alpha)
{
C_FF; C_80; C_00;
m64 s = MM::load(src);
if (const_alpha != 255) {
m64 ca = MM::load_alpha(const_alpha);
s = MM::byte_mul(s, ca);
}
m64 a = MM::negate(MM::alpha(s));
for (int i = 0; i < length; ++i) {
m64 d = MM::load(dest[i]);
dest[i] = MM::store(MM::interpolate_pixel_255(s, MM::negate(MM::alpha(d)),
d, a));
}
MM::end();
}
template <class MM>
static void QT_FASTCALL comp_func_XOR(uint *dest, const uint *src, int length, uint const_alpha)
{
C_FF; C_80; C_00;
if (const_alpha == 255) {
for (int i = 0; i < length; ++i) {
m64 s = MM::load(src[i]);
m64 d = MM::load(dest[i]);
dest[i] = MM::store(MM::interpolate_pixel_255(s, MM::negate(MM::alpha(d)),
d, MM::negate(MM::alpha(s))));
}
} else {
m64 ca = MM::load_alpha(const_alpha);
for (int i = 0; i < length; ++i) {
m64 s = MM::load(src[i]);
s = MM::byte_mul(s, ca);
m64 d = MM::load(dest[i]);
dest[i] = MM::store(MM::interpolate_pixel_255(s, MM::negate(MM::alpha(d)),
d, MM::negate(MM::alpha(s))));
}
}
MM::end();
}
template <class MM>
static void QT_FASTCALL rasterop_solid_SourceOrDestination(uint *dest,
int length,
uint color,
uint const_alpha)
{
Q_UNUSED(const_alpha);
if ((quintptr)(dest) & 0x7) {
*dest++ |= color;
--length;
}
const int length64 = length / 2;
if (length64) {
__m64 *dst64 = reinterpret_cast<__m64*>(dest);
const __m64 color64 = _mm_set_pi32(color, color);
int n = (length64 + 3) / 4;
switch (length64 & 0x3) {
case 0: do { *dst64 = _mm_or_si64(*dst64, color64); ++dst64;
case 3: *dst64 = _mm_or_si64(*dst64, color64); ++dst64;
case 2: *dst64 = _mm_or_si64(*dst64, color64); ++dst64;
case 1: *dst64 = _mm_or_si64(*dst64, color64); ++dst64;
} while (--n > 0);
}
}
if (length & 0x1) {
dest[length - 1] |= color;
}
MM::end();
}
template <class MM>
static void QT_FASTCALL rasterop_solid_SourceAndDestination(uint *dest,
int length,
uint color,
uint const_alpha)
{
Q_UNUSED(const_alpha);
color |= 0xff000000;
if ((quintptr)(dest) & 0x7) { // align
*dest++ &= color;
--length;
}
const int length64 = length / 2;
if (length64) {
__m64 *dst64 = reinterpret_cast<__m64*>(dest);
const __m64 color64 = _mm_set_pi32(color, color);
int n = (length64 + 3) / 4;
switch (length64 & 0x3) {
case 0: do { *dst64 = _mm_and_si64(*dst64, color64); ++dst64;
case 3: *dst64 = _mm_and_si64(*dst64, color64); ++dst64;
case 2: *dst64 = _mm_and_si64(*dst64, color64); ++dst64;
case 1: *dst64 = _mm_and_si64(*dst64, color64); ++dst64;
} while (--n > 0);
}
}
if (length & 0x1) {
dest[length - 1] &= color;
}
MM::end();
}
template <class MM>
static void QT_FASTCALL rasterop_solid_SourceXorDestination(uint *dest,
int length,
uint color,
uint const_alpha)
{
Q_UNUSED(const_alpha);
color &= 0x00ffffff;
if ((quintptr)(dest) & 0x7) {
*dest++ ^= color;
--length;
}
const int length64 = length / 2;
if (length64) {
__m64 *dst64 = reinterpret_cast<__m64*>(dest);
const __m64 color64 = _mm_set_pi32(color, color);
int n = (length64 + 3) / 4;
switch (length64 & 0x3) {
case 0: do { *dst64 = _mm_xor_si64(*dst64, color64); ++dst64;
case 3: *dst64 = _mm_xor_si64(*dst64, color64); ++dst64;
case 2: *dst64 = _mm_xor_si64(*dst64, color64); ++dst64;
case 1: *dst64 = _mm_xor_si64(*dst64, color64); ++dst64;
} while (--n > 0);
}
}
if (length & 0x1) {
dest[length - 1] ^= color;
}
MM::end();
}
template <class MM>
static void QT_FASTCALL rasterop_solid_SourceAndNotDestination(uint *dest,
int length,
uint color,
uint const_alpha)
{
Q_UNUSED(const_alpha);
if ((quintptr)(dest) & 0x7) {
*dest = (color & ~(*dest)) | 0xff000000;
++dest;
--length;
}
const int length64 = length / 2;
if (length64) {
__m64 *dst64 = reinterpret_cast<__m64*>(dest);
const __m64 color64 = _mm_set_pi32(color, color);
const m64 mmx_0xff000000 = _mm_set1_pi32(0xff000000);
__m64 tmp1, tmp2, tmp3, tmp4;
int n = (length64 + 3) / 4;
switch (length64 & 0x3) {
case 0: do { tmp1 = _mm_andnot_si64(*dst64, color64);
*dst64++ = _mm_or_si64(tmp1, mmx_0xff000000);
case 3: tmp2 = _mm_andnot_si64(*dst64, color64);
*dst64++ = _mm_or_si64(tmp2, mmx_0xff000000);
case 2: tmp3 = _mm_andnot_si64(*dst64, color64);
*dst64++ = _mm_or_si64(tmp3, mmx_0xff000000);
case 1: tmp4 = _mm_andnot_si64(*dst64, color64);
*dst64++ = _mm_or_si64(tmp4, mmx_0xff000000);
} while (--n > 0);
}
}
if (length & 0x1) {
dest[length - 1] = (color & ~(dest[length - 1])) | 0xff000000;
}
MM::end();
}
template <class MM>
static void QT_FASTCALL rasterop_solid_NotSourceAndNotDestination(uint *dest,
int length,
uint color,
uint const_alpha)
{
rasterop_solid_SourceAndNotDestination<MM>(dest, length,
~color, const_alpha);
}
template <class MM>
static void QT_FASTCALL rasterop_solid_NotSourceOrNotDestination(uint *dest,
int length,
uint color,
uint const_alpha)
{
Q_UNUSED(const_alpha);
color = ~color | 0xff000000;
while (length--) {
*dest = color | ~(*dest);
++dest;
}
}
template <class MM>
static void QT_FASTCALL rasterop_solid_NotSourceXorDestination(uint *dest,
int length,
uint color,
uint const_alpha)
{
rasterop_solid_SourceXorDestination<MM>(dest, length, ~color, const_alpha);
}
template <class MM>
static void QT_FASTCALL rasterop_solid_NotSource(uint *dest, int length,
uint color, uint const_alpha)
{
Q_UNUSED(const_alpha);
qt_memfill((quint32*)dest, ~color | 0xff000000, length);
}
template <class MM>
static void QT_FASTCALL rasterop_solid_NotSourceAndDestination(uint *dest,
int length,
uint color,
uint const_alpha)
{
rasterop_solid_SourceAndDestination<MM>(dest, length,
~color, const_alpha);
}
template <class MM>
static inline void qt_blend_color_argb_x86(int count, const QSpan *spans,
void *userData,
CompositionFunctionSolid *solidFunc)
{
QSpanData *data = reinterpret_cast<QSpanData *>(userData);
if (data->rasterBuffer->compositionMode == QPainter::CompositionMode_Source
|| (data->rasterBuffer->compositionMode == QPainter::CompositionMode_SourceOver
&& qAlpha(data->solid.color) == 255)) {
// inline for performance
C_FF; C_80; C_00;
while (count--) {
uint *target = ((uint *)data->rasterBuffer->scanLine(spans->y)) + spans->x;
if (spans->coverage == 255) {
qt_memfill(static_cast<quint32*>(target), quint32(data->solid.color), spans->len);
} else {
// dest = s * ca + d * (1 - sa*ca) --> dest = s * ca + d * (1-ca)
m64 ca = MM::load_alpha(spans->coverage);
m64 s = MM::byte_mul(MM::load(data->solid.color), ca);
m64 ica = MM::negate(ca);
for (int i = 0; i < spans->len; ++i)
target[i] = MM::store(MM::add(s, MM::byte_mul(MM::load(target[i]), ica)));
}
++spans;
}
MM::end();
return;
}
CompositionFunctionSolid func = solidFunc[data->rasterBuffer->compositionMode];
while (count--) {
uint *target = ((uint *)data->rasterBuffer->scanLine(spans->y)) + spans->x;
func(target, spans->len, data->solid.color, spans->coverage);
++spans;
}
}
#ifdef QT_HAVE_MMX
struct QMMXIntrinsics : public QMMXCommonIntrinsics
{
static inline void end() {
#if !defined(Q_OS_WINCE) || defined(_X86_)
_mm_empty();
#endif
}
};
#endif // QT_HAVE_MMX
QT_END_NAMESPACE
#endif // QDRAWHELPER_MMX_P_H

File diff suppressed because it is too large Load diff

View file

@ -1,297 +0,0 @@
/****************************************************************************
**
** Copyright (C) 2015 The Qt Company Ltd.
** Contact: http://www.qt.io/licensing/
**
** This file is part of the QtGui module of the Qt Toolkit.
**
** $QT_BEGIN_LICENSE:LGPL$
** Commercial License Usage
** Licensees holding valid commercial Qt licenses may use this file in
** accordance with the commercial license agreement provided with the
** Software or, alternatively, in accordance with the terms contained in
** a written agreement between you and The Qt Company. For licensing terms
** and conditions see http://www.qt.io/terms-conditions. For further
** information use the contact form at http://www.qt.io/contact-us.
**
** GNU Lesser General Public License Usage
** Alternatively, this file may be used under the terms of the GNU Lesser
** General Public License version 2.1 or version 3 as published by the Free
** Software Foundation and appearing in the file LICENSE.LGPLv21 and
** LICENSE.LGPLv3 included in the packaging of this file. Please review the
** following information to ensure the GNU Lesser General Public License
** requirements will be met: https://www.gnu.org/licenses/lgpl.html and
** http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
**
** As a special exception, The Qt Company gives you certain additional
** rights. These rights are described in The Qt Company LGPL Exception
** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
**
** GNU General Public License Usage
** Alternatively, this file may be used under the terms of the GNU
** General Public License version 3.0 as published by the Free Software
** Foundation and appearing in the file LICENSE.GPL included in the
** packaging of this file. Please review the following information to
** ensure the GNU General Public License version 3.0 requirements will be
** met: http://www.gnu.org/copyleft/gpl.html.
**
** $QT_END_LICENSE$
**
****************************************************************************/
/* Prevent the stack from becoming executable for no reason... */
#if defined(__linux__) && defined(__ELF__)
.section .note.GNU-stack,"",%progbits
#endif
.text
.fpu neon
.arch armv7a
.altmacro
/* void blend_8_pixels_argb32_on_rgb16_neon(quint16 *dst, const quint32 *src, int const_alpha) */
.func blend_8_pixels_argb32_on_rgb16_neon
.global blend_8_pixels_argb32_on_rgb16_neon
/* For ELF format also set function visibility to hidden */
#ifdef __ELF__
.hidden blend_8_pixels_argb32_on_rgb16_neon
.type blend_8_pixels_argb32_on_rgb16_neon, %function
#endif
blend_8_pixels_argb32_on_rgb16_neon:
vld4.8 { d0, d1, d2, d3 }, [r1]
vld1.16 { d4, d5 }, [r0]
cmp r2, #256
beq .blend_32_inner
vdup.8 d6, r2
/* multiply by const_alpha */
vmull.u8 q8, d6, d0
vmull.u8 q9, d6, d1
vmull.u8 q10, d6, d2
vmull.u8 q11, d6, d3
vshrn.u16 d0, q8, #8
vshrn.u16 d1, q9, #8
vshrn.u16 d2, q10, #8
vshrn.u16 d3, q11, #8
.blend_32_inner:
/* convert 8 r5g6b5 pixel data from {d4, d5} to planar 8-bit format
and put data into d6 - red, d7 - green, d30 - blue */
vshrn.u16 d6, q2, #8
vshrn.u16 d7, q2, #3
vsli.u16 q2, q2, #5
vsri.u8 d6, d6, #5
vmvn.8 d3, d3
vsri.u8 d7, d7, #6
vshrn.u16 d30, q2, #2
pld [r0, #128]
/* now do alpha blending, storing results in 8-bit planar format
into d16 - red, d19 - green, d18 - blue */
vmull.u8 q10, d3, d6
vmull.u8 q11, d3, d7
vmull.u8 q12, d3, d30
vrshr.u16 q13, q10, #8
vrshr.u16 q3, q11, #8
vrshr.u16 q15, q12, #8
vraddhn.u16 d20, q10, q13
vraddhn.u16 d23, q11, q3
vraddhn.u16 d22, q12, q15
vqadd.u8 d16, d2, d20
vqadd.u8 q9, q0, q11
/* convert the result to r5g6b5 and store it into {d28, d29} */
vshll.u8 q14, d16, #8
vshll.u8 q8, d19, #8
vshll.u8 q9, d18, #8
vsri.u16 q14, q8, #5
vsri.u16 q14, q9, #11
vst1.16 { d28, d29 }, [r0]
bx lr
.endfunc
/* void blend_8_pixels_rgb16_on_rgb16_neon(quint16 *dst, const quint16 *src, int const_alpha) */
.func blend_8_pixels_rgb16_on_rgb16_neon
.global blend_8_pixels_rgb16_on_rgb16_neon
/* For ELF format also set function visibility to hidden */
#ifdef __ELF__
.hidden blend_8_pixels_rgb16_on_rgb16_neon
.type blend_8_pixels_rgb16_on_rgb16_neon, %function
#endif
blend_8_pixels_rgb16_on_rgb16_neon:
vld1.16 { d0, d1 }, [r0]
vld1.16 { d2, d3 }, [r1]
rsb r3, r2, #256
vdup.8 d4, r2
vdup.8 d5, r3
/* convert 8 r5g6b5 pixel data from {d0, d1} to planar 8-bit format
and put data into d6 - red, d7 - green, d30 - blue */
vshrn.u16 d6, q0, #8
vshrn.u16 d7, q0, #3
vsli.u16 q0, q0, #5
vsri.u8 d6, d6, #5
vsri.u8 d7, d7, #6
vshrn.u16 d30, q0, #2
/* same from {d2, d3} into {d26, d27, d28} */
vshrn.u16 d26, q1, #8
vshrn.u16 d27, q1, #3
vsli.u16 q1, q1, #5
vsri.u8 d26, d26, #5
vsri.u8 d27, d27, #6
vshrn.u16 d28, q1, #2
/* multiply dst by inv const_alpha */
vmull.u8 q10, d5, d6
vmull.u8 q11, d5, d7
vmull.u8 q12, d5, d30
vshrn.u16 d6, q10, #8
vshrn.u16 d7, q11, #8
vshrn.u16 d30, q12, #8
/* multiply src by const_alpha */
vmull.u8 q10, d4, d26
vmull.u8 q11, d4, d27
vmull.u8 q12, d4, d28
vshrn.u16 d26, q10, #8
vshrn.u16 d27, q11, #8
vshrn.u16 d28, q12, #8
/* preload dst + 128 */
pld [r0, #128]
/* add components, storing results in 8-bit planar format
into d16 - red, d19 - green, d18 - blue */
vadd.u8 d16, d26, d6
vadd.u8 d19, d27, d7
vadd.u8 d18, d28, d30
/* convert the result to r5g6b5 and store it into {d28, d29} */
vshll.u8 q14, d16, #8
vshll.u8 q8, d19, #8
vshll.u8 q9, d18, #8
vsri.u16 q14, q8, #5
vsri.u16 q14, q9, #11
vst1.16 { d28, d29 }, [r0]
bx lr
.endfunc
/* void qt_rotate90_16_neon(quint16 *dst, const quint16 *src, int sstride, int dstride, int count) */
.func qt_rotate90_16_neon
.global qt_rotate90_16_neon
/* For ELF format also set function visibility to hidden */
#ifdef __ELF__
.hidden qt_rotate90_16_neon
.type qt_rotate90_16_neon, %function
#endif
qt_rotate90_16_neon:
push { r4-r11, lr }
ldr r5, [sp, #(9*4)]
/* The preloads are the key to getting good performance */
pld [r1]
mov r4, r5, asr #2
add r6, r0, r3
add r7, r6, r3
add r8, r7, r3
add r9, r8, r3
pld [r1, r2]
add r10, r9, r3
add r11, r10, r3
add r3, r3, r11
and r5, r5, #3
pld [r1, r2, lsl #1]
cmp r4, #0
beq .rotate90_16_tail
.rotate90_16_loop:
vld1.16 { q8 }, [r1], r2
pld [r1, r2, lsl #1]
vld1.16 { q9 }, [r1], r2
vld1.16 { q10 }, [r1], r2
vld1.16 { q11 }, [r1], r2
pld [r1]
/* Could have used four quad-word zips instead,
but those take three cycles as opposed to one. */
vzip.16 d16, d20
vzip.16 d17, d21
vzip.16 d18, d22
pld [r1, r2]
vzip.16 d19, d23
vzip.16 d16, d18
vzip.16 d17, d19
pld [r1, r2, lsl #1]
vzip.16 d20, d22
vzip.16 d21, d23
vst1.16 { d23 }, [r0]!
vst1.16 { d21 }, [r6]!
vst1.16 { d19 }, [r7]!
vst1.16 { d17 }, [r8]!
vst1.16 { d22 }, [r9]!
vst1.16 { d20 }, [r10]!
vst1.16 { d18 }, [r11]!
vst1.16 { d16 }, [r3]!
sub r4, r4, #1
cmp r4, #0
bne .rotate90_16_loop
b .rotate90_16_tail
.rotate90_16_tail_loop:
sub r5, r5, #2
vld1.16 { q8 }, [r1], r2
vld1.16 { q9 }, [r1], r2
vzip.16 d16, d18
vzip.16 d17, d19
vst1.32 { d19[1] }, [r0]!
vst1.32 { d19[0] }, [r6]!
vst1.32 { d17[1] }, [r7]!
vst1.32 { d17[0] }, [r8]!
vst1.32 { d18[1] }, [r9]!
vst1.32 { d18[0] }, [r10]!
vst1.32 { d16[1] }, [r11]!
vst1.32 { d16[0] }, [r3]!
.rotate90_16_tail:
cmp r5, #0
bgt .rotate90_16_tail_loop
pop { r4-r11, pc }
.endfunc

View file

@ -1,146 +0,0 @@
/****************************************************************************
**
** Copyright (C) 2015 The Qt Company Ltd.
** Contact: http://www.qt.io/licensing/
**
** This file is part of the QtGui module of the Qt Toolkit.
**
** $QT_BEGIN_LICENSE:LGPL$
** Commercial License Usage
** Licensees holding valid commercial Qt licenses may use this file in
** accordance with the commercial license agreement provided with the
** Software or, alternatively, in accordance with the terms contained in
** a written agreement between you and The Qt Company. For licensing terms
** and conditions see http://www.qt.io/terms-conditions. For further
** information use the contact form at http://www.qt.io/contact-us.
**
** GNU Lesser General Public License Usage
** Alternatively, this file may be used under the terms of the GNU Lesser
** General Public License version 2.1 or version 3 as published by the Free
** Software Foundation and appearing in the file LICENSE.LGPLv21 and
** LICENSE.LGPLv3 included in the packaging of this file. Please review the
** following information to ensure the GNU Lesser General Public License
** requirements will be met: https://www.gnu.org/licenses/lgpl.html and
** http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
**
** As a special exception, The Qt Company gives you certain additional
** rights. These rights are described in The Qt Company LGPL Exception
** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
**
** GNU General Public License Usage
** Alternatively, this file may be used under the terms of the GNU
** General Public License version 3.0 as published by the Free Software
** Foundation and appearing in the file LICENSE.GPL included in the
** packaging of this file. Please review the following information to
** ensure the GNU General Public License version 3.0 requirements will be
** met: http://www.gnu.org/copyleft/gpl.html.
**
** $QT_END_LICENSE$
**
****************************************************************************/
#ifndef QDRAWHELPER_NEON_P_H
#define QDRAWHELPER_NEON_P_H
//
// W A R N I N G
// -------------
//
// This file is not part of the Qt API. It exists purely as an
// implementation detail. This header file may change from version to
// version without notice, or even be removed.
//
// We mean it.
//
#include <qdrawhelper_p.h>
QT_BEGIN_NAMESPACE
#ifdef QT_HAVE_NEON
void qt_blend_argb32_on_argb32_neon(uchar *destPixels, int dbpl,
const uchar *srcPixels, int sbpl,
int w, int h,
int const_alpha);
void qt_blend_rgb32_on_rgb32_neon(uchar *destPixels, int dbpl,
const uchar *srcPixels, int sbpl,
int w, int h,
int const_alpha);
void qt_blend_argb32_on_rgb16_neon(uchar *destPixels, int dbpl,
const uchar *srcPixels, int sbpl,
int w, int h,
int const_alpha);
void qt_blend_argb32_on_argb32_scanline_neon(uint *dest,
const uint *src,
int length,
uint const_alpha);
void qt_blend_rgb16_on_argb32_neon(uchar *destPixels, int dbpl,
const uchar *srcPixels, int sbpl,
int w, int h,
int const_alpha);
void qt_blend_rgb16_on_rgb16_neon(uchar *destPixels, int dbpl,
const uchar *srcPixels, int sbpl,
int w, int h,
int const_alpha);
void qt_alphamapblit_quint16_neon(QRasterBuffer *rasterBuffer,
int x, int y, quint32 color,
const uchar *bitmap,
int mapWidth, int mapHeight, int mapStride,
const QClipData *clip);
void qt_scale_image_argb32_on_rgb16_neon(uchar *destPixels, int dbpl,
const uchar *srcPixels, int sbpl, int sh,
const QRectF &targetRect,
const QRectF &sourceRect,
const QRect &clip,
int const_alpha);
void qt_scale_image_rgb16_on_rgb16_neon(uchar *destPixels, int dbpl,
const uchar *srcPixels, int sbpl, int sh,
const QRectF &targetRect,
const QRectF &sourceRect,
const QRect &clip,
int const_alpha);
void qt_transform_image_argb32_on_rgb16_neon(uchar *destPixels, int dbpl,
const uchar *srcPixels, int sbpl,
const QRectF &targetRect,
const QRectF &sourceRect,
const QRect &clip,
const QTransform &targetRectTransform,
int const_alpha);
void qt_transform_image_rgb16_on_rgb16_neon(uchar *destPixels, int dbpl,
const uchar *srcPixels, int sbpl,
const QRectF &targetRect,
const QRectF &sourceRect,
const QRect &clip,
const QTransform &targetRectTransform,
int const_alpha);
void qt_memfill32_neon(quint32 *dest, quint32 value, int count);
void qt_memrotate90_16_neon(const uchar *srcPixels, int w, int h, int sbpl, uchar *destPixels, int dbpl);
void qt_memrotate270_16_neon(const uchar *srcPixels, int w, int h, int sbpl, uchar *destPixels, int dbpl);
uint * QT_FASTCALL qt_destFetchRGB16_neon(uint *buffer,
QRasterBuffer *rasterBuffer,
int x, int y, int length);
void QT_FASTCALL qt_destStoreRGB16_neon(QRasterBuffer *rasterBuffer,
int x, int y, const uint *buffer, int length);
void QT_FASTCALL comp_func_solid_SourceOver_neon(uint *destPixels, int length, uint color, uint const_alpha);
void QT_FASTCALL comp_func_Plus_neon(uint *dst, const uint *src, int length, uint const_alpha);
#endif // QT_HAVE_NEON
QT_END_NAMESPACE
#endif // QDRAWHELPER_NEON_P_H

View file

@ -62,7 +62,6 @@
#define QT_FT_END_HEADER
#endif
#include "qrasterdefs_p.h"
#include <qsimd_p.h>
#include <qmath_p.h>
QT_BEGIN_NAMESPACE

View file

@ -1,172 +0,0 @@
/****************************************************************************
**
** Copyright (C) 2015 The Qt Company Ltd.
** Contact: http://www.qt.io/licensing/
**
** This file is part of the QtGui module of the Qt Toolkit.
**
** $QT_BEGIN_LICENSE:LGPL$
** Commercial License Usage
** Licensees holding valid commercial Qt licenses may use this file in
** accordance with the commercial license agreement provided with the
** Software or, alternatively, in accordance with the terms contained in
** a written agreement between you and The Qt Company. For licensing terms
** and conditions see http://www.qt.io/terms-conditions. For further
** information use the contact form at http://www.qt.io/contact-us.
**
** GNU Lesser General Public License Usage
** Alternatively, this file may be used under the terms of the GNU Lesser
** General Public License version 2.1 or version 3 as published by the Free
** Software Foundation and appearing in the file LICENSE.LGPLv21 and
** LICENSE.LGPLv3 included in the packaging of this file. Please review the
** following information to ensure the GNU Lesser General Public License
** requirements will be met: https://www.gnu.org/licenses/lgpl.html and
** http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
**
** As a special exception, The Qt Company gives you certain additional
** rights. These rights are described in The Qt Company LGPL Exception
** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
**
** GNU General Public License Usage
** Alternatively, this file may be used under the terms of the GNU
** General Public License version 3.0 as published by the Free Software
** Foundation and appearing in the file LICENSE.GPL included in the
** packaging of this file. Please review the following information to
** ensure the GNU General Public License version 3.0 requirements will be
** met: http://www.gnu.org/copyleft/gpl.html.
**
** $QT_END_LICENSE$
**
****************************************************************************/
#include <qdrawhelper_p.h>
#ifdef QT_HAVE_SSE
#include <qdrawhelper_sse_p.h>
QT_BEGIN_NAMESPACE
CompositionFunctionSolid qt_functionForModeSolid_SSE[numCompositionFunctions] = {
comp_func_solid_SourceOver<QSSEIntrinsics>,
comp_func_solid_DestinationOver<QSSEIntrinsics>,
comp_func_solid_Clear<QSSEIntrinsics>,
comp_func_solid_Source<QSSEIntrinsics>,
0,
comp_func_solid_SourceIn<QSSEIntrinsics>,
comp_func_solid_DestinationIn<QSSEIntrinsics>,
comp_func_solid_SourceOut<QSSEIntrinsics>,
comp_func_solid_DestinationOut<QSSEIntrinsics>,
comp_func_solid_SourceAtop<QSSEIntrinsics>,
comp_func_solid_DestinationAtop<QSSEIntrinsics>,
comp_func_solid_XOR<QSSEIntrinsics>,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // svg 1.2 modes
rasterop_solid_SourceOrDestination<QMMXIntrinsics>,
rasterop_solid_SourceAndDestination<QMMXIntrinsics>,
rasterop_solid_SourceXorDestination<QMMXIntrinsics>,
rasterop_solid_NotSourceAndNotDestination<QMMXIntrinsics>,
rasterop_solid_NotSourceOrNotDestination<QMMXIntrinsics>,
rasterop_solid_NotSourceXorDestination<QMMXIntrinsics>,
rasterop_solid_NotSource<QMMXIntrinsics>,
rasterop_solid_NotSourceAndDestination<QMMXIntrinsics>,
rasterop_solid_SourceAndNotDestination<QMMXIntrinsics>
};
CompositionFunction qt_functionForMode_SSE[numCompositionFunctions] = {
comp_func_SourceOver<QSSEIntrinsics>,
comp_func_DestinationOver<QSSEIntrinsics>,
comp_func_Clear<QSSEIntrinsics>,
comp_func_Source<QSSEIntrinsics>,
comp_func_Destination,
comp_func_SourceIn<QSSEIntrinsics>,
comp_func_DestinationIn<QSSEIntrinsics>,
comp_func_SourceOut<QSSEIntrinsics>,
comp_func_DestinationOut<QSSEIntrinsics>,
comp_func_SourceAtop<QSSEIntrinsics>,
comp_func_DestinationAtop<QSSEIntrinsics>,
comp_func_XOR<QSSEIntrinsics>,
comp_func_Plus,
comp_func_Multiply,
comp_func_Screen,
comp_func_Overlay,
comp_func_Darken,
comp_func_Lighten,
comp_func_ColorDodge,
comp_func_ColorBurn,
comp_func_HardLight,
comp_func_SoftLight,
comp_func_Difference,
comp_func_Exclusion,
rasterop_SourceOrDestination,
rasterop_SourceAndDestination,
rasterop_SourceXorDestination,
rasterop_NotSourceAndNotDestination,
rasterop_NotSourceOrNotDestination,
rasterop_NotSourceXorDestination,
rasterop_NotSource,
rasterop_NotSourceAndDestination,
rasterop_SourceAndNotDestination
};
void qt_blend_color_argb_sse(int count, const QSpan *spans, void *userData)
{
qt_blend_color_argb_x86<QSSEIntrinsics>(count, spans, userData,
(CompositionFunctionSolid*)qt_functionForModeSolid_SSE);
}
void qt_memfill32_sse(quint32 *dest, quint32 value, int count)
{
return qt_memfill32_sse_template<QSSEIntrinsics>(dest, value, count);
}
void qt_bitmapblit16_sse(QRasterBuffer *rasterBuffer, int x, int y,
quint32 color,
const uchar *src,
int width, int height, int stride)
{
return qt_bitmapblit16_sse_template<QSSEIntrinsics>(rasterBuffer, x,y,
color, src, width,
height, stride);
}
void qt_blend_argb32_on_argb32_sse(uchar *destPixels, int dbpl,
const uchar *srcPixels, int sbpl,
int w, int h,
int const_alpha)
{
const uint *src = (const uint *) srcPixels;
uint *dst = (uint *) destPixels;
uint ca = const_alpha - 1;
for (int y=0; y<h; ++y) {
comp_func_SourceOver<QSSEIntrinsics>(dst, src, w, ca);
dst = (quint32 *)(((uchar *) dst) + dbpl);
src = (const quint32 *)(((const uchar *) src) + sbpl);
}
}
void qt_blend_rgb32_on_rgb32_sse(uchar *destPixels, int dbpl,
const uchar *srcPixels, int sbpl,
int w, int h,
int const_alpha)
{
const uint *src = (const uint *) srcPixels;
uint *dst = (uint *) destPixels;
uint ca = const_alpha - 1;
for (int y=0; y<h; ++y) {
comp_func_Source<QSSEIntrinsics>(dst, src, w, ca);
dst = (quint32 *)(((uchar *) dst) + dbpl);
src = (const quint32 *)(((const uchar *) src) + sbpl);
}
}
QT_END_NAMESPACE
#endif // QT_HAVE_SSE

View file

@ -1,547 +0,0 @@
/****************************************************************************
**
** Copyright (C) 2015 The Qt Company Ltd.
** Contact: http://www.qt.io/licensing/
**
** This file is part of the QtGui module of the Qt Toolkit.
**
** $QT_BEGIN_LICENSE:LGPL$
** Commercial License Usage
** Licensees holding valid commercial Qt licenses may use this file in
** accordance with the commercial license agreement provided with the
** Software or, alternatively, in accordance with the terms contained in
** a written agreement between you and The Qt Company. For licensing terms
** and conditions see http://www.qt.io/terms-conditions. For further
** information use the contact form at http://www.qt.io/contact-us.
**
** GNU Lesser General Public License Usage
** Alternatively, this file may be used under the terms of the GNU Lesser
** General Public License version 2.1 or version 3 as published by the Free
** Software Foundation and appearing in the file LICENSE.LGPLv21 and
** LICENSE.LGPLv3 included in the packaging of this file. Please review the
** following information to ensure the GNU Lesser General Public License
** requirements will be met: https://www.gnu.org/licenses/lgpl.html and
** http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
**
** As a special exception, The Qt Company gives you certain additional
** rights. These rights are described in The Qt Company LGPL Exception
** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
**
** GNU General Public License Usage
** Alternatively, this file may be used under the terms of the GNU
** General Public License version 3.0 as published by the Free Software
** Foundation and appearing in the file LICENSE.GPL included in the
** packaging of this file. Please review the following information to
** ensure the GNU General Public License version 3.0 requirements will be
** met: http://www.gnu.org/copyleft/gpl.html.
**
** $QT_END_LICENSE$
**
****************************************************************************/
#include <qdrawhelper_x86_p.h>
#ifdef QT_HAVE_SSE2
#include <qdrawingprimitive_sse2_p.h>
#include <qpaintengine_raster_p.h>
QT_BEGIN_NAMESPACE
void qt_blend_argb32_on_argb32_sse2(uchar *destPixels, int dbpl,
const uchar *srcPixels, int sbpl,
int w, int h,
int const_alpha)
{
const quint32 *src = (const quint32 *) srcPixels;
quint32 *dst = (quint32 *) destPixels;
if (const_alpha == 256) {
const __m128i alphaMask = _mm_set1_epi32(0xff000000);
const __m128i nullVector = _mm_set1_epi32(0);
const __m128i half = _mm_set1_epi16(0x80);
const __m128i one = _mm_set1_epi16(0xff);
const __m128i colorMask = _mm_set1_epi32(0x00ff00ff);
for (int y = 0; y < h; ++y) {
BLEND_SOURCE_OVER_ARGB32_SSE2(dst, src, w, nullVector, half, one, colorMask, alphaMask);
dst = (quint32 *)(((uchar *) dst) + dbpl);
src = (const quint32 *)(((const uchar *) src) + sbpl);
}
} else if (const_alpha != 0) {
// dest = (s + d * sia) * ca + d * cia
// = s * ca + d * (sia * ca + cia)
// = s * ca + d * (1 - sa*ca)
const_alpha = (const_alpha * 255) >> 8;
const __m128i nullVector = _mm_set1_epi32(0);
const __m128i half = _mm_set1_epi16(0x80);
const __m128i one = _mm_set1_epi16(0xff);
const __m128i colorMask = _mm_set1_epi32(0x00ff00ff);
const __m128i constAlphaVector = _mm_set1_epi16(const_alpha);
for (int y = 0; y < h; ++y) {
BLEND_SOURCE_OVER_ARGB32_WITH_CONST_ALPHA_SSE2(dst, src, w, nullVector, half, one, colorMask, constAlphaVector)
dst = (quint32 *)(((uchar *) dst) + dbpl);
src = (const quint32 *)(((const uchar *) src) + sbpl);
}
}
}
// qblendfunctions.cpp
void qt_blend_rgb32_on_rgb32(uchar *destPixels, int dbpl,
const uchar *srcPixels, int sbpl,
int w, int h,
int const_alpha);
void qt_blend_rgb32_on_rgb32_sse2(uchar *destPixels, int dbpl,
const uchar *srcPixels, int sbpl,
int w, int h,
int const_alpha)
{
const quint32 *src = (const quint32 *) srcPixels;
quint32 *dst = (quint32 *) destPixels;
if (const_alpha != 256) {
if (const_alpha != 0) {
const __m128i nullVector = _mm_set1_epi32(0);
const __m128i half = _mm_set1_epi16(0x80);
const __m128i colorMask = _mm_set1_epi32(0x00ff00ff);
const_alpha = (const_alpha * 255) >> 8;
int one_minus_const_alpha = 255 - const_alpha;
const __m128i constAlphaVector = _mm_set1_epi16(const_alpha);
const __m128i oneMinusConstAlpha = _mm_set1_epi16(one_minus_const_alpha);
for (int y = 0; y < h; ++y) {
int x = 0;
// First, align dest to 16 bytes:
ALIGNMENT_PROLOGUE_16BYTES(dst, x, w) {
dst[x] = INTERPOLATE_PIXEL_255(src[x], const_alpha, dst[x], one_minus_const_alpha);
}
for (; x < w-3; x += 4) {
__m128i srcVector = _mm_loadu_si128((__m128i *)&src[x]);
if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVector, nullVector)) != 0xffff) {
const __m128i dstVector = _mm_load_si128((__m128i *)&dst[x]);
__m128i result;
INTERPOLATE_PIXEL_255_SSE2(result, srcVector, dstVector, constAlphaVector, oneMinusConstAlpha, colorMask, half);
_mm_store_si128((__m128i *)&dst[x], result);
}
}
for (; x<w; ++x) {
dst[x] = INTERPOLATE_PIXEL_255(src[x], const_alpha, dst[x], one_minus_const_alpha);
}
dst = (quint32 *)(((uchar *) dst) + dbpl);
src = (const quint32 *)(((const uchar *) src) + sbpl);
}
}
} else {
qt_blend_rgb32_on_rgb32(destPixels, dbpl, srcPixels, sbpl, w, h, const_alpha);
}
}
void QT_FASTCALL comp_func_SourceOver_sse2(uint *destPixels, const uint *srcPixels, int length, uint const_alpha)
{
Q_ASSERT(const_alpha < 256);
const quint32 *src = (const quint32 *) srcPixels;
quint32 *dst = (quint32 *) destPixels;
const __m128i nullVector = _mm_set1_epi32(0);
const __m128i half = _mm_set1_epi16(0x80);
const __m128i one = _mm_set1_epi16(0xff);
const __m128i colorMask = _mm_set1_epi32(0x00ff00ff);
if (const_alpha == 255) {
const __m128i alphaMask = _mm_set1_epi32(0xff000000);
BLEND_SOURCE_OVER_ARGB32_SSE2(dst, src, length, nullVector, half, one, colorMask, alphaMask);
} else {
const __m128i constAlphaVector = _mm_set1_epi16(const_alpha);
BLEND_SOURCE_OVER_ARGB32_WITH_CONST_ALPHA_SSE2(dst, src, length, nullVector, half, one, colorMask, constAlphaVector);
}
}
void QT_FASTCALL comp_func_Plus_sse2(uint *dst, const uint *src, int length, uint const_alpha)
{
int x = 0;
if (const_alpha == 255) {
// 1) Prologue: align destination on 16 bytes
ALIGNMENT_PROLOGUE_16BYTES(dst, x, length)
dst[x] = comp_func_Plus_one_pixel(dst[x], src[x]);
// 2) composition with SSE2
for (; x < length - 3; x += 4) {
const __m128i srcVector = _mm_loadu_si128((__m128i *)&src[x]);
const __m128i dstVector = _mm_load_si128((__m128i *)&dst[x]);
const __m128i result = _mm_adds_epu8(srcVector, dstVector);
_mm_store_si128((__m128i *)&dst[x], result);
}
// 3) Epilogue:
for (; x < length; ++x)
dst[x] = comp_func_Plus_one_pixel(dst[x], src[x]);
} else {
const int one_minus_const_alpha = 255 - const_alpha;
const __m128i constAlphaVector = _mm_set1_epi16(const_alpha);
const __m128i oneMinusConstAlpha = _mm_set1_epi16(one_minus_const_alpha);
// 1) Prologue: align destination on 16 bytes
ALIGNMENT_PROLOGUE_16BYTES(dst, x, length)
dst[x] = comp_func_Plus_one_pixel_const_alpha(dst[x], src[x], const_alpha, one_minus_const_alpha);
const __m128i half = _mm_set1_epi16(0x80);
const __m128i colorMask = _mm_set1_epi32(0x00ff00ff);
// 2) composition with SSE2
for (; x < length - 3; x += 4) {
const __m128i srcVector = _mm_loadu_si128((__m128i *)&src[x]);
const __m128i dstVector = _mm_load_si128((__m128i *)&dst[x]);
__m128i result = _mm_adds_epu8(srcVector, dstVector);
INTERPOLATE_PIXEL_255_SSE2(result, result, dstVector, constAlphaVector, oneMinusConstAlpha, colorMask, half)
_mm_store_si128((__m128i *)&dst[x], result);
}
// 3) Epilogue:
for (; x < length; ++x)
dst[x] = comp_func_Plus_one_pixel_const_alpha(dst[x], src[x], const_alpha, one_minus_const_alpha);
}
}
void QT_FASTCALL comp_func_Source_sse2(uint *dst, const uint *src, int length, uint const_alpha)
{
if (const_alpha == 255) {
::memcpy(dst, src, length * sizeof(uint));
} else {
const int ialpha = 255 - const_alpha;
int x = 0;
// 1) prologue, align on 16 bytes
ALIGNMENT_PROLOGUE_16BYTES(dst, x, length)
dst[x] = INTERPOLATE_PIXEL_255(src[x], const_alpha, dst[x], ialpha);
// 2) interpolate pixels with SSE2
const __m128i half = _mm_set1_epi16(0x80);
const __m128i colorMask = _mm_set1_epi32(0x00ff00ff);
const __m128i constAlphaVector = _mm_set1_epi16(const_alpha);
const __m128i oneMinusConstAlpha = _mm_set1_epi16(ialpha);
for (; x < length - 3; x += 4) {
const __m128i srcVector = _mm_loadu_si128((__m128i *)&src[x]);
__m128i dstVector = _mm_load_si128((__m128i *)&dst[x]);
INTERPOLATE_PIXEL_255_SSE2(dstVector, srcVector, dstVector, constAlphaVector, oneMinusConstAlpha, colorMask, half)
_mm_store_si128((__m128i *)&dst[x], dstVector);
}
// 3) Epilogue
for (; x < length; ++x)
dst[x] = INTERPOLATE_PIXEL_255(src[x], const_alpha, dst[x], ialpha);
}
}
void qt_memfill32_sse2(quint32 *dest, quint32 value, int count)
{
if (count < 7) {
switch (count) {
case 6: *dest++ = value;
case 5: *dest++ = value;
case 4: *dest++ = value;
case 3: *dest++ = value;
case 2: *dest++ = value;
case 1: *dest = value;
}
return;
};
const int align = (quintptr)(dest) & 0xf;
switch (align) {
case 4: *dest++ = value; --count;
case 8: *dest++ = value; --count;
case 12: *dest++ = value; --count;
}
int count128 = count / 4;
__m128i *dst128 = reinterpret_cast<__m128i*>(dest);
const __m128i value128 = _mm_set_epi32(value, value, value, value);
int n = (count128 + 3) / 4;
switch (count128 & 0x3) {
case 0: do { _mm_stream_si128(dst128++, value128);
case 3: _mm_stream_si128(dst128++, value128);
case 2: _mm_stream_si128(dst128++, value128);
case 1: _mm_stream_si128(dst128++, value128);
} while (--n > 0);
}
const int rest = count & 0x3;
if (rest) {
switch (rest) {
case 3: dest[count - 3] = value;
case 2: dest[count - 2] = value;
case 1: dest[count - 1] = value;
}
}
}
void QT_FASTCALL comp_func_solid_SourceOver_sse2(uint *destPixels, int length, uint color, uint const_alpha)
{
if ((const_alpha & qAlpha(color)) == 255) {
qt_memfill32_sse2(destPixels, color, length);
} else {
if (const_alpha != 255)
color = BYTE_MUL(color, const_alpha);
const quint32 minusAlphaOfColor = qAlpha(~color);
int x = 0;
quint32 *dst = (quint32 *) destPixels;
const __m128i colorVector = _mm_set1_epi32(color);
const __m128i colorMask = _mm_set1_epi32(0x00ff00ff);
const __m128i half = _mm_set1_epi16(0x80);
const __m128i minusAlphaOfColorVector = _mm_set1_epi16(minusAlphaOfColor);
ALIGNMENT_PROLOGUE_16BYTES(dst, x, length)
destPixels[x] = color + BYTE_MUL(destPixels[x], minusAlphaOfColor);
for (; x < length-3; x += 4) {
__m128i dstVector = _mm_load_si128((__m128i *)&dst[x]);
BYTE_MUL_SSE2(dstVector, dstVector, minusAlphaOfColorVector, colorMask, half);
dstVector = _mm_add_epi8(colorVector, dstVector);
_mm_store_si128((__m128i *)&dst[x], dstVector);
}
for (;x < length; ++x)
destPixels[x] = color + BYTE_MUL(destPixels[x], minusAlphaOfColor);
}
}
CompositionFunctionSolid qt_functionForModeSolid_onlySSE2[numCompositionFunctions] = {
comp_func_solid_SourceOver_sse2,
comp_func_solid_DestinationOver,
comp_func_solid_Clear,
comp_func_solid_Source,
comp_func_solid_Destination,
comp_func_solid_SourceIn,
comp_func_solid_DestinationIn,
comp_func_solid_SourceOut,
comp_func_solid_DestinationOut,
comp_func_solid_SourceAtop,
comp_func_solid_DestinationAtop,
comp_func_solid_XOR,
comp_func_solid_Plus,
comp_func_solid_Multiply,
comp_func_solid_Screen,
comp_func_solid_Overlay,
comp_func_solid_Darken,
comp_func_solid_Lighten,
comp_func_solid_ColorDodge,
comp_func_solid_ColorBurn,
comp_func_solid_HardLight,
comp_func_solid_SoftLight,
comp_func_solid_Difference,
comp_func_solid_Exclusion,
rasterop_solid_SourceOrDestination,
rasterop_solid_SourceAndDestination,
rasterop_solid_SourceXorDestination,
rasterop_solid_NotSourceAndNotDestination,
rasterop_solid_NotSourceOrNotDestination,
rasterop_solid_NotSourceXorDestination,
rasterop_solid_NotSource,
rasterop_solid_NotSourceAndDestination,
rasterop_solid_SourceAndNotDestination
};
CompositionFunction qt_functionForMode_onlySSE2[numCompositionFunctions] = {
comp_func_SourceOver_sse2,
comp_func_DestinationOver,
comp_func_Clear,
comp_func_Source_sse2,
comp_func_Destination,
comp_func_SourceIn,
comp_func_DestinationIn,
comp_func_SourceOut,
comp_func_DestinationOut,
comp_func_SourceAtop,
comp_func_DestinationAtop,
comp_func_XOR,
comp_func_Plus_sse2,
comp_func_Multiply,
comp_func_Screen,
comp_func_Overlay,
comp_func_Darken,
comp_func_Lighten,
comp_func_ColorDodge,
comp_func_ColorBurn,
comp_func_HardLight,
comp_func_SoftLight,
comp_func_Difference,
comp_func_Exclusion,
rasterop_SourceOrDestination,
rasterop_SourceAndDestination,
rasterop_SourceXorDestination,
rasterop_NotSourceAndNotDestination,
rasterop_NotSourceOrNotDestination,
rasterop_NotSourceXorDestination,
rasterop_NotSource,
rasterop_NotSourceAndDestination,
rasterop_SourceAndNotDestination
};
void qt_memfill16_sse2(quint16 *dest, quint16 value, int count)
{
if (count < 3) {
switch (count) {
case 2: *dest++ = value;
case 1: *dest = value;
}
return;
}
const int align = (quintptr)(dest) & 0x3;
switch (align) {
case 2: *dest++ = value; --count;
}
const quint32 value32 = (value << 16) | value;
qt_memfill32_sse2(reinterpret_cast<quint32*>(dest), value32, count / 2);
if (count & 0x1)
dest[count - 1] = value;
}
void qt_bitmapblit32_sse2(QRasterBuffer *rasterBuffer, int x, int y,
quint32 color,
const uchar *src, int width, int height, int stride)
{
quint32 *dest = reinterpret_cast<quint32*>(rasterBuffer->scanLine(y)) + x;
const int destStride = rasterBuffer->bytesPerLine() / sizeof(quint32);
const __m128i c128 = _mm_set1_epi32(color);
const __m128i maskmask1 = _mm_set_epi32(0x10101010, 0x20202020,
0x40404040, 0x80808080);
const __m128i maskadd1 = _mm_set_epi32(0x70707070, 0x60606060,
0x40404040, 0x00000000);
if (width > 4) {
const __m128i maskmask2 = _mm_set_epi32(0x01010101, 0x02020202,
0x04040404, 0x08080808);
const __m128i maskadd2 = _mm_set_epi32(0x7f7f7f7f, 0x7e7e7e7e,
0x7c7c7c7c, 0x78787878);
while (height--) {
for (int x = 0; x < width; x += 8) {
const quint8 s = src[x >> 3];
if (!s)
continue;
__m128i mask1 = _mm_set1_epi8(s);
__m128i mask2 = mask1;
mask1 = _mm_and_si128(mask1, maskmask1);
mask1 = _mm_add_epi8(mask1, maskadd1);
_mm_maskmoveu_si128(c128, mask1, (char*)(dest + x));
mask2 = _mm_and_si128(mask2, maskmask2);
mask2 = _mm_add_epi8(mask2, maskadd2);
_mm_maskmoveu_si128(c128, mask2, (char*)(dest + x + 4));
}
dest += destStride;
src += stride;
}
} else {
while (height--) {
const quint8 s = *src;
if (s) {
__m128i mask1 = _mm_set1_epi8(s);
mask1 = _mm_and_si128(mask1, maskmask1);
mask1 = _mm_add_epi8(mask1, maskadd1);
_mm_maskmoveu_si128(c128, mask1, (char*)(dest));
}
dest += destStride;
src += stride;
}
}
}
void qt_bitmapblit16_sse2(QRasterBuffer *rasterBuffer, int x, int y,
quint32 color,
const uchar *src, int width, int height, int stride)
{
const quint16 c = qt_colorConvert<quint16, quint32>(color, 0);
quint16 *dest = reinterpret_cast<quint16*>(rasterBuffer->scanLine(y)) + x;
const int destStride = rasterBuffer->bytesPerLine() / sizeof(quint16);
const __m128i c128 = _mm_set1_epi16(c);
#if defined(Q_CC_MSVC)
# pragma warning(disable: 4309) // truncation of constant value
#endif
const __m128i maskmask = _mm_set_epi16(0x0101, 0x0202, 0x0404, 0x0808,
0x1010, 0x2020, 0x4040, 0x8080);
const __m128i maskadd = _mm_set_epi16(0x7f7f, 0x7e7e, 0x7c7c, 0x7878,
0x7070, 0x6060, 0x4040, 0x0000);
while (height--) {
for (int x = 0; x < width; x += 8) {
const quint8 s = src[x >> 3];
if (!s)
continue;
__m128i mask = _mm_set1_epi8(s);
mask = _mm_and_si128(mask, maskmask);
mask = _mm_add_epi8(mask, maskadd);
_mm_maskmoveu_si128(c128, mask, (char*)(dest + x));
}
dest += destStride;
src += stride;
}
}
class QSimdSse2
{
public:
typedef __m128i Int32x4;
typedef __m128 Float32x4;
union Vect_buffer_i { Int32x4 v; int i[4]; };
union Vect_buffer_f { Float32x4 v; float f[4]; };
static inline Float32x4 v_dup(float x) { return _mm_set1_ps(x); }
static inline Float32x4 v_dup(double x) { return _mm_set1_ps(x); }
static inline Int32x4 v_dup(int x) { return _mm_set1_epi32(x); }
static inline Int32x4 v_dup(uint x) { return _mm_set1_epi32(x); }
static inline Float32x4 v_add(Float32x4 a, Float32x4 b) { return _mm_add_ps(a, b); }
static inline Int32x4 v_add(Int32x4 a, Int32x4 b) { return _mm_add_epi32(a, b); }
static inline Float32x4 v_max(Float32x4 a, Float32x4 b) { return _mm_max_ps(a, b); }
static inline Float32x4 v_min(Float32x4 a, Float32x4 b) { return _mm_min_ps(a, b); }
static inline Int32x4 v_min_16(Int32x4 a, Int32x4 b) { return _mm_min_epi16(a, b); }
static inline Int32x4 v_and(Int32x4 a, Int32x4 b) { return _mm_and_si128(a, b); }
static inline Float32x4 v_sub(Float32x4 a, Float32x4 b) { return _mm_sub_ps(a, b); }
static inline Int32x4 v_sub(Int32x4 a, Int32x4 b) { return _mm_sub_epi32(a, b); }
static inline Float32x4 v_mul(Float32x4 a, Float32x4 b) { return _mm_mul_ps(a, b); }
static inline Float32x4 v_sqrt(Float32x4 x) { return _mm_sqrt_ps(x); }
static inline Int32x4 v_toInt(Float32x4 x) { return _mm_cvttps_epi32(x); }
// pre-VS 2008 doesn't have cast intrinsics, whereas 2008 and later requires it
// (same deal with gcc prior to 4.0)
#if (defined(Q_CC_MSVC) && _MSC_VER < 1500) || (defined(Q_CC_GNU) && __GNUC__ < 4)
static inline Int32x4 v_greaterOrEqual(Float32x4 a, Float32x4 b)
{
union Convert { Int32x4 vi; Float32x4 vf; } convert;
convert.vf = _mm_cmpgt_ps(a, b);
return convert.vi;
}
#else
static inline Int32x4 v_greaterOrEqual(Float32x4 a, Float32x4 b) { return _mm_castps_si128(_mm_cmpgt_ps(a, b)); }
#endif
};
const uint * QT_FASTCALL qt_fetch_radial_gradient_sse2(uint *buffer, const Operator *op, const QSpanData *data,
int y, int x, int length)
{
return qt_fetch_radial_gradient_template<QRadialFetchSimd<QSimdSse2> >(buffer, op, data, y, x, length);
}
QT_END_NAMESPACE
#endif // QT_HAVE_SSE2

View file

@ -1,145 +0,0 @@
/****************************************************************************
**
** Copyright (C) 2015 The Qt Company Ltd.
** Contact: http://www.qt.io/licensing/
**
** This file is part of the QtGui module of the Qt Toolkit.
**
** $QT_BEGIN_LICENSE:LGPL$
** Commercial License Usage
** Licensees holding valid commercial Qt licenses may use this file in
** accordance with the commercial license agreement provided with the
** Software or, alternatively, in accordance with the terms contained in
** a written agreement between you and The Qt Company. For licensing terms
** and conditions see http://www.qt.io/terms-conditions. For further
** information use the contact form at http://www.qt.io/contact-us.
**
** GNU Lesser General Public License Usage
** Alternatively, this file may be used under the terms of the GNU Lesser
** General Public License version 2.1 or version 3 as published by the Free
** Software Foundation and appearing in the file LICENSE.LGPLv21 and
** LICENSE.LGPLv3 included in the packaging of this file. Please review the
** following information to ensure the GNU Lesser General Public License
** requirements will be met: https://www.gnu.org/licenses/lgpl.html and
** http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
**
** As a special exception, The Qt Company gives you certain additional
** rights. These rights are described in The Qt Company LGPL Exception
** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
**
** GNU General Public License Usage
** Alternatively, this file may be used under the terms of the GNU
** General Public License version 3.0 as published by the Free Software
** Foundation and appearing in the file LICENSE.GPL included in the
** packaging of this file. Please review the following information to
** ensure the GNU General Public License version 3.0 requirements will be
** met: http://www.gnu.org/copyleft/gpl.html.
**
** $QT_END_LICENSE$
**
****************************************************************************/
#include <qdrawhelper_x86_p.h>
#if defined(QT_HAVE_3DNOW) && defined(QT_HAVE_SSE)
#include <qdrawhelper_sse_p.h>
#include <mm3dnow.h>
QT_BEGIN_NAMESPACE
struct QSSE3DNOWIntrinsics : public QSSEIntrinsics
{
static inline void end() {
_m_femms();
}
};
CompositionFunctionSolid qt_functionForModeSolid_SSE3DNOW[numCompositionFunctions] = {
comp_func_solid_SourceOver<QSSE3DNOWIntrinsics>,
comp_func_solid_DestinationOver<QSSE3DNOWIntrinsics>,
comp_func_solid_Clear<QSSE3DNOWIntrinsics>,
comp_func_solid_Source<QSSE3DNOWIntrinsics>,
0,
comp_func_solid_SourceIn<QSSE3DNOWIntrinsics>,
comp_func_solid_DestinationIn<QSSE3DNOWIntrinsics>,
comp_func_solid_SourceOut<QSSE3DNOWIntrinsics>,
comp_func_solid_DestinationOut<QSSE3DNOWIntrinsics>,
comp_func_solid_SourceAtop<QSSE3DNOWIntrinsics>,
comp_func_solid_DestinationAtop<QSSE3DNOWIntrinsics>,
comp_func_solid_XOR<QSSE3DNOWIntrinsics>,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // svg 1.2 modes
rasterop_solid_SourceOrDestination<QSSE3DNOWIntrinsics>,
rasterop_solid_SourceAndDestination<QSSE3DNOWIntrinsics>,
rasterop_solid_SourceXorDestination<QSSE3DNOWIntrinsics>,
rasterop_solid_NotSourceAndNotDestination<QSSE3DNOWIntrinsics>,
rasterop_solid_NotSourceOrNotDestination<QSSE3DNOWIntrinsics>,
rasterop_solid_NotSourceXorDestination<QSSE3DNOWIntrinsics>,
rasterop_solid_NotSource<QSSE3DNOWIntrinsics>,
rasterop_solid_NotSourceAndDestination<QSSE3DNOWIntrinsics>,
rasterop_solid_SourceAndNotDestination<QSSE3DNOWIntrinsics>
};
CompositionFunction qt_functionForMode_SSE3DNOW[numCompositionFunctions] = {
comp_func_SourceOver<QSSE3DNOWIntrinsics>,
comp_func_DestinationOver<QSSE3DNOWIntrinsics>,
comp_func_Clear<QSSE3DNOWIntrinsics>,
comp_func_Source<QSSE3DNOWIntrinsics>,
comp_func_Destination,
comp_func_SourceIn<QSSE3DNOWIntrinsics>,
comp_func_DestinationIn<QSSE3DNOWIntrinsics>,
comp_func_SourceOut<QSSE3DNOWIntrinsics>,
comp_func_DestinationOut<QSSE3DNOWIntrinsics>,
comp_func_SourceAtop<QSSE3DNOWIntrinsics>,
comp_func_DestinationAtop<QSSE3DNOWIntrinsics>,
comp_func_XOR<QSSE3DNOWIntrinsics>,
comp_func_Plus,
comp_func_Multiply,
comp_func_Screen,
comp_func_Overlay,
comp_func_Darken,
comp_func_Lighten,
comp_func_ColorDodge,
comp_func_ColorBurn,
comp_func_HardLight,
comp_func_SoftLight,
comp_func_Difference,
comp_func_Exclusion,
rasterop_SourceOrDestination,
rasterop_SourceAndDestination,
rasterop_SourceXorDestination,
rasterop_NotSourceAndNotDestination,
rasterop_NotSourceOrNotDestination,
rasterop_NotSourceXorDestination,
rasterop_NotSource,
rasterop_NotSourceAndDestination,
rasterop_SourceAndNotDestination
};
void qt_blend_color_argb_sse3dnow(int count, const QSpan *spans, void *userData)
{
qt_blend_color_argb_x86<QSSE3DNOWIntrinsics>(count, spans, userData,
(CompositionFunctionSolid*)qt_functionForModeSolid_SSE3DNOW);
}
void qt_memfill32_sse3dnow(quint32 *dest, quint32 value, int count)
{
return qt_memfill32_sse_template<QSSE3DNOWIntrinsics>(dest, value, count);
}
void qt_bitmapblit16_sse3dnow(QRasterBuffer *rasterBuffer, int x, int y,
quint32 color,
const uchar *src,
int width, int height, int stride)
{
return qt_bitmapblit16_sse_template<QSSE3DNOWIntrinsics>(rasterBuffer, x,y,
color, src, width,
height, stride);
}
QT_END_NAMESPACE
#endif // QT_HAVE_3DNOW && QT_HAVE_SSE

View file

@ -1,182 +0,0 @@
/****************************************************************************
**
** Copyright (C) 2015 The Qt Company Ltd.
** Contact: http://www.qt.io/licensing/
**
** This file is part of the QtGui module of the Qt Toolkit.
**
** $QT_BEGIN_LICENSE:LGPL$
** Commercial License Usage
** Licensees holding valid commercial Qt licenses may use this file in
** accordance with the commercial license agreement provided with the
** Software or, alternatively, in accordance with the terms contained in
** a written agreement between you and The Qt Company. For licensing terms
** and conditions see http://www.qt.io/terms-conditions. For further
** information use the contact form at http://www.qt.io/contact-us.
**
** GNU Lesser General Public License Usage
** Alternatively, this file may be used under the terms of the GNU Lesser
** General Public License version 2.1 or version 3 as published by the Free
** Software Foundation and appearing in the file LICENSE.LGPLv21 and
** LICENSE.LGPLv3 included in the packaging of this file. Please review the
** following information to ensure the GNU Lesser General Public License
** requirements will be met: https://www.gnu.org/licenses/lgpl.html and
** http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
**
** As a special exception, The Qt Company gives you certain additional
** rights. These rights are described in The Qt Company LGPL Exception
** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
**
** GNU General Public License Usage
** Alternatively, this file may be used under the terms of the GNU
** General Public License version 3.0 as published by the Free Software
** Foundation and appearing in the file LICENSE.GPL included in the
** packaging of this file. Please review the following information to
** ensure the GNU General Public License version 3.0 requirements will be
** met: http://www.gnu.org/copyleft/gpl.html.
**
** $QT_END_LICENSE$
**
****************************************************************************/
#ifndef QDRAWHELPER_SSE_P_H
#define QDRAWHELPER_SSE_P_H
//
// W A R N I N G
// -------------
//
// This file is not part of the Qt API. It exists purely as an
// implementation detail. This header file may change from version to
// version without notice, or even be removed.
//
// We mean it.
//
#include <qdrawhelper_mmx_p.h>
#ifdef QT_HAVE_SSE
#ifdef QT_LINUXBASE
// this is an evil hack - the posix_memalign declaration in LSB
// is wrong - see http://bugs.linuxbase.org/show_bug.cgi?id=2431
# define posix_memalign _lsb_hack_posix_memalign
# include <xmmintrin.h>
# undef posix_memalign
#else
# include <xmmintrin.h>
#endif
QT_BEGIN_NAMESPACE
#ifndef _MM_SHUFFLE
#define _MM_SHUFFLE(fp3,fp2,fp1,fp0) \
(((fp3) << 6) | ((fp2) << 4) | ((fp1) << 2) | (fp0))
#endif
struct QSSEIntrinsics : public QMMXIntrinsics
{
static inline m64 alpha(m64 x) {
return _mm_shuffle_pi16 (x, _MM_SHUFFLE(3, 3, 3, 3));
}
static inline m64 _load_alpha(uint x, const m64 &mmx_0x0000) {
m64 t = _mm_unpacklo_pi8(_mm_cvtsi32_si64(x), mmx_0x0000);
return _mm_shuffle_pi16 (t, _MM_SHUFFLE(0, 0, 0, 0));
}
};
template <class MM>
inline void qt_memfill32_sse_template(quint32 *dest, quint32 value, int count)
{
if (count < 7) {
switch (count) {
case 6: *dest++ = value;
case 5: *dest++ = value;
case 4: *dest++ = value;
case 3: *dest++ = value;
case 2: *dest++ = value;
case 1: *dest = value;
}
return;
};
__m64 *dst64 = reinterpret_cast<__m64*>(dest);
const __m64 value64 = _mm_set_pi32(value, value);
int count64 = count / 2;
int n = (count64 + 3) / 4;
switch (count64 & 0x3) {
case 0: do { _mm_stream_pi(dst64++, value64);
case 3: _mm_stream_pi(dst64++, value64);
case 2: _mm_stream_pi(dst64++, value64);
case 1: _mm_stream_pi(dst64++, value64);
} while (--n > 0);
}
if (count & 0x1)
dest[count - 1] = value;
MM::end();
}
template <class MM>
inline void qt_bitmapblit16_sse_template(QRasterBuffer *rasterBuffer,
int x, int y,
quint32 color,
const uchar *src,
int width, int height, int stride)
{
const quint16 c = qt_colorConvert<quint16, quint32>(color, 0);
quint16 *dest = reinterpret_cast<quint16*>(rasterBuffer->scanLine(y)) + x;
const int destStride = rasterBuffer->bytesPerLine() / sizeof(quint16);
const __m64 c64 = _mm_set1_pi16(c);
#ifdef Q_CC_MSVC
# pragma warning(disable: 4309) // truncation of constant value
#endif
const __m64 maskmask1 = _mm_set_pi16(0x1010, 0x2020, 0x4040, 0x8080);
const __m64 maskadd1 = _mm_set_pi16(0x7070, 0x6060, 0x4040, 0x0000);
if (width > 4) {
const __m64 maskmask2 = _mm_set_pi16(0x0101, 0x0202, 0x0404, 0x0808);
const __m64 maskadd2 = _mm_set_pi16(0x7f7f, 0x7e7e, 0x7c7c, 0x7878);
while (height--) {
for (int x = 0; x < width; x += 8) {
const quint8 s = src[x >> 3];
if (!s)
continue;
__m64 mask1 = _mm_set1_pi8(s);
__m64 mask2 = mask1;
mask1 = _m_pand(mask1, maskmask1);
mask1 = _mm_add_pi16(mask1, maskadd1);
_mm_maskmove_si64(c64, mask1, (char*)(dest + x));
mask2 = _m_pand(mask2, maskmask2);
mask2 = _mm_add_pi16(mask2, maskadd2);
_mm_maskmove_si64(c64, mask2, (char*)(dest + x + 4));
}
dest += destStride;
src += stride;
}
} else {
while (height--) {
const quint8 s = *src;
if (s) {
__m64 mask1 = _mm_set1_pi8(s);
mask1 = _m_pand(mask1, maskmask1);
mask1 = _mm_add_pi16(mask1, maskadd1);
_mm_maskmove_si64(c64, mask1, (char*)(dest));
}
dest += destStride;
src += stride;
}
}
MM::end();
}
QT_END_NAMESPACE
#endif // QT_HAVE_SSE
#endif // QDRAWHELPER_SSE_P_H

View file

@ -1,185 +0,0 @@
/****************************************************************************
**
** Copyright (C) 2015 The Qt Company Ltd.
** Contact: http://www.qt.io/licensing/
**
** This file is part of the QtGui module of the Qt Toolkit.
**
** $QT_BEGIN_LICENSE:LGPL$
** Commercial License Usage
** Licensees holding valid commercial Qt licenses may use this file in
** accordance with the commercial license agreement provided with the
** Software or, alternatively, in accordance with the terms contained in
** a written agreement between you and The Qt Company. For licensing terms
** and conditions see http://www.qt.io/terms-conditions. For further
** information use the contact form at http://www.qt.io/contact-us.
**
** GNU Lesser General Public License Usage
** Alternatively, this file may be used under the terms of the GNU Lesser
** General Public License version 2.1 or version 3 as published by the Free
** Software Foundation and appearing in the file LICENSE.LGPLv21 and
** LICENSE.LGPLv3 included in the packaging of this file. Please review the
** following information to ensure the GNU Lesser General Public License
** requirements will be met: https://www.gnu.org/licenses/lgpl.html and
** http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
**
** As a special exception, The Qt Company gives you certain additional
** rights. These rights are described in The Qt Company LGPL Exception
** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
**
** GNU General Public License Usage
** Alternatively, this file may be used under the terms of the GNU
** General Public License version 3.0 as published by the Free Software
** Foundation and appearing in the file LICENSE.GPL included in the
** packaging of this file. Please review the following information to
** ensure the GNU General Public License version 3.0 requirements will be
** met: http://www.gnu.org/copyleft/gpl.html.
**
** $QT_END_LICENSE$
**
****************************************************************************/
#include <qdrawhelper_x86_p.h>
#ifdef QT_HAVE_SSSE3
#include <qdrawingprimitive_sse2_p.h>
QT_BEGIN_NAMESPACE
inline static void blend_pixel(quint32 &dst, const quint32 src)
{
if (src >= 0xff000000)
dst = src;
else if (src != 0)
dst = src + BYTE_MUL(dst, qAlpha(~src));
}
/* The instruction palignr uses direct arguments, so we have to generate the code fo the different
shift (4, 8, 12). Checking the alignment inside the loop is unfortunatelly way too slow.
*/
#define BLENDING_LOOP(palignrOffset, length)\
for (; x-minusOffsetToAlignSrcOn16Bytes < length-7; x += 4) { \
const __m128i srcVectorLastLoaded = _mm_load_si128((__m128i *)&src[x - minusOffsetToAlignSrcOn16Bytes + 4]);\
const __m128i srcVector = _mm_alignr_epi8(srcVectorLastLoaded, srcVectorPrevLoaded, palignrOffset); \
const __m128i srcVectorAlpha = _mm_and_si128(srcVector, alphaMask); \
if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, alphaMask)) == 0xffff) { \
_mm_store_si128((__m128i *)&dst[x], srcVector); \
} else if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, nullVector)) != 0xffff) { \
__m128i alphaChannel = _mm_shuffle_epi8(srcVector, alphaShuffleMask); \
alphaChannel = _mm_sub_epi16(one, alphaChannel); \
const __m128i dstVector = _mm_load_si128((__m128i *)&dst[x]); \
__m128i destMultipliedByOneMinusAlpha; \
BYTE_MUL_SSE2(destMultipliedByOneMinusAlpha, dstVector, alphaChannel, colorMask, half); \
const __m128i result = _mm_add_epi8(srcVector, destMultipliedByOneMinusAlpha); \
_mm_store_si128((__m128i *)&dst[x], result); \
} \
srcVectorPrevLoaded = srcVectorLastLoaded;\
}
// Basically blend src over dst with the const alpha defined as constAlphaVector.
// nullVector, half, one, colorMask are constant across the whole image/texture, and should be defined as:
//const __m128i nullVector = _mm_set1_epi32(0);
//const __m128i half = _mm_set1_epi16(0x80);
//const __m128i one = _mm_set1_epi16(0xff);
//const __m128i colorMask = _mm_set1_epi32(0x00ff00ff);
//const __m128i alphaMask = _mm_set1_epi32(0xff000000);
//
// The computation being done is:
// result = s + d * (1-alpha)
// with shortcuts if fully opaque or fully transparent.
#define BLEND_SOURCE_OVER_ARGB32_SSSE3(dst, src, length, nullVector, half, one, colorMask, alphaMask) { \
int x = 0; \
\
/* First, get dst aligned. */ \
ALIGNMENT_PROLOGUE_16BYTES(dst, x, length) { \
blend_pixel(dst[x], src[x]); \
} \
\
const int minusOffsetToAlignSrcOn16Bytes = (reinterpret_cast<quintptr>(&(src[x])) >> 2) & 0x3;\
\
if (!minusOffsetToAlignSrcOn16Bytes) {\
/* src is aligned, usual algorithm but with aligned operations.\
See the SSE2 version for more documentation on the algorithm itself. */\
const __m128i alphaShuffleMask = _mm_set_epi8(char(0xff),15,char(0xff),15,char(0xff),11,char(0xff),11,char(0xff),7,char(0xff),7,char(0xff),3,char(0xff),3);\
for (; x < length-3; x += 4) { \
const __m128i srcVector = _mm_load_si128((__m128i *)&src[x]); \
const __m128i srcVectorAlpha = _mm_and_si128(srcVector, alphaMask); \
if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, alphaMask)) == 0xffff) { \
_mm_store_si128((__m128i *)&dst[x], srcVector); \
} else if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, nullVector)) != 0xffff) { \
__m128i alphaChannel = _mm_shuffle_epi8(srcVector, alphaShuffleMask); \
alphaChannel = _mm_sub_epi16(one, alphaChannel); \
const __m128i dstVector = _mm_load_si128((__m128i *)&dst[x]); \
__m128i destMultipliedByOneMinusAlpha; \
BYTE_MUL_SSE2(destMultipliedByOneMinusAlpha, dstVector, alphaChannel, colorMask, half); \
const __m128i result = _mm_add_epi8(srcVector, destMultipliedByOneMinusAlpha); \
_mm_store_si128((__m128i *)&dst[x], result); \
} \
} /* end for() */\
} else if ((length - x) >= 8) {\
/* We use two vectors to extract the src: prevLoaded for the first pixels, lastLoaded for the current pixels. */\
__m128i srcVectorPrevLoaded = _mm_load_si128((__m128i *)&src[x - minusOffsetToAlignSrcOn16Bytes]);\
const int palignrOffset = minusOffsetToAlignSrcOn16Bytes << 2;\
\
const __m128i alphaShuffleMask = _mm_set_epi8(char(0xff),15,char(0xff),15,char(0xff),11,char(0xff),11,char(0xff),7,char(0xff),7,char(0xff),3,char(0xff),3);\
switch (palignrOffset) {\
case 4:\
BLENDING_LOOP(4, length)\
break;\
case 8:\
BLENDING_LOOP(8, length)\
break;\
case 12:\
BLENDING_LOOP(12, length)\
break;\
}\
}\
for (; x < length; ++x) \
blend_pixel(dst[x], src[x]); \
}
void qt_blend_argb32_on_argb32_ssse3(uchar *destPixels, int dbpl,
const uchar *srcPixels, int sbpl,
int w, int h,
int const_alpha)
{
const quint32 *src = (const quint32 *) srcPixels;
quint32 *dst = (quint32 *) destPixels;
if (const_alpha == 256) {
const __m128i alphaMask = _mm_set1_epi32(0xff000000);
const __m128i nullVector = _mm_setzero_si128();
const __m128i half = _mm_set1_epi16(0x80);
const __m128i one = _mm_set1_epi16(0xff);
const __m128i colorMask = _mm_set1_epi32(0x00ff00ff);
for (int y = 0; y < h; ++y) {
BLEND_SOURCE_OVER_ARGB32_SSSE3(dst, src, w, nullVector, half, one, colorMask, alphaMask);
dst = (quint32 *)(((uchar *) dst) + dbpl);
src = (const quint32 *)(((const uchar *) src) + sbpl);
}
} else if (const_alpha != 0) {
// dest = (s + d * sia) * ca + d * cia
// = s * ca + d * (sia * ca + cia)
// = s * ca + d * (1 - sa*ca)
const_alpha = (const_alpha * 255) >> 8;
const __m128i nullVector = _mm_setzero_si128();
const __m128i half = _mm_set1_epi16(0x80);
const __m128i one = _mm_set1_epi16(0xff);
const __m128i colorMask = _mm_set1_epi32(0x00ff00ff);
const __m128i constAlphaVector = _mm_set1_epi16(const_alpha);
for (int y = 0; y < h; ++y) {
BLEND_SOURCE_OVER_ARGB32_WITH_CONST_ALPHA_SSE2(dst, src, w, nullVector, half, one, colorMask, constAlphaVector)
dst = (quint32 *)(((uchar *) dst) + dbpl);
src = (const quint32 *)(((const uchar *) src) + sbpl);
}
}
}
QT_END_NAMESPACE
#endif // QT_HAVE_SSSE3

View file

@ -1,141 +0,0 @@
/****************************************************************************
**
** Copyright (C) 2015 The Qt Company Ltd.
** Contact: http://www.qt.io/licensing/
**
** This file is part of the QtGui module of the Qt Toolkit.
**
** $QT_BEGIN_LICENSE:LGPL$
** Commercial License Usage
** Licensees holding valid commercial Qt licenses may use this file in
** accordance with the commercial license agreement provided with the
** Software or, alternatively, in accordance with the terms contained in
** a written agreement between you and The Qt Company. For licensing terms
** and conditions see http://www.qt.io/terms-conditions. For further
** information use the contact form at http://www.qt.io/contact-us.
**
** GNU Lesser General Public License Usage
** Alternatively, this file may be used under the terms of the GNU Lesser
** General Public License version 2.1 or version 3 as published by the Free
** Software Foundation and appearing in the file LICENSE.LGPLv21 and
** LICENSE.LGPLv3 included in the packaging of this file. Please review the
** following information to ensure the GNU Lesser General Public License
** requirements will be met: https://www.gnu.org/licenses/lgpl.html and
** http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
**
** As a special exception, The Qt Company gives you certain additional
** rights. These rights are described in The Qt Company LGPL Exception
** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
**
** GNU General Public License Usage
** Alternatively, this file may be used under the terms of the GNU
** General Public License version 3.0 as published by the Free Software
** Foundation and appearing in the file LICENSE.GPL included in the
** packaging of this file. Please review the following information to
** ensure the GNU General Public License version 3.0 requirements will be
** met: http://www.gnu.org/copyleft/gpl.html.
**
** $QT_END_LICENSE$
**
****************************************************************************/
#ifndef QDRAWHELPER_X86_P_H
#define QDRAWHELPER_X86_P_H
//
// W A R N I N G
// -------------
//
// This file is not part of the Qt API. It exists purely as an
// implementation detail. This header file may change from version to
// version without notice, or even be removed.
//
// We mean it.
//
#include <qdrawhelper_p.h>
QT_BEGIN_NAMESPACE
#ifdef QT_HAVE_MMX
extern CompositionFunction qt_functionForMode_MMX[];
extern CompositionFunctionSolid qt_functionForModeSolid_MMX[];
void qt_blend_color_argb_mmx(int count, const QSpan *spans, void *userData);
#endif
#ifdef QT_HAVE_SSE
void qt_memfill32_mmxext(quint32 *dest, quint32 value, int count);
void qt_bitmapblit16_mmxext(QRasterBuffer *rasterBuffer, int x, int y,
quint32 color, const uchar *src,
int width, int height, int stride);
#endif
#ifdef QT_HAVE_3DNOW
#if defined(QT_HAVE_MMX) || !defined(QT_HAVE_SSE)
extern CompositionFunction qt_functionForMode_MMX3DNOW[];
extern CompositionFunctionSolid qt_functionForModeSolid_MMX3DNOW[];
void qt_blend_color_argb_mmx3dnow(int count, const QSpan *spans,
void *userData);
#endif // MMX
#ifdef QT_HAVE_SSE
extern CompositionFunction qt_functionForMode_SSE3DNOW[];
extern CompositionFunctionSolid qt_functionForModeSolid_SSE3DNOW[];
void qt_memfill32_sse3dnow(quint32 *dest, quint32 value, int count);
void qt_bitmapblit16_sse3dnow(QRasterBuffer *rasterBuffer, int x, int y,
quint32 color,
const uchar *src, int width, int height,
int stride);
void qt_blend_color_argb_sse3dnow(int count, const QSpan *spans,
void *userData);
#endif // SSE
#endif // QT_HAVE_3DNOW
#ifdef QT_HAVE_SSE
void qt_memfill32_sse(quint32 *dest, quint32 value, int count);
void qt_bitmapblit16_sse(QRasterBuffer *rasterBuffer, int x, int y,
quint32 color,
const uchar *src, int width, int height, int stride);
void qt_blend_color_argb_sse(int count, const QSpan *spans, void *userData);
extern CompositionFunction qt_functionForMode_SSE[];
extern CompositionFunctionSolid qt_functionForModeSolid_SSE[];
#endif // QT_HAVE_SSE
#ifdef QT_HAVE_SSE2
void qt_memfill32_sse2(quint32 *dest, quint32 value, int count);
void qt_memfill16_sse2(quint16 *dest, quint16 value, int count);
void qt_bitmapblit32_sse2(QRasterBuffer *rasterBuffer, int x, int y,
quint32 color,
const uchar *src, int width, int height, int stride);
void qt_bitmapblit16_sse2(QRasterBuffer *rasterBuffer, int x, int y,
quint32 color,
const uchar *src, int width, int height, int stride);
void qt_blend_argb32_on_argb32_sse2(uchar *destPixels, int dbpl,
const uchar *srcPixels, int sbpl,
int w, int h,
int const_alpha);
void qt_blend_rgb32_on_rgb32_sse2(uchar *destPixels, int dbpl,
const uchar *srcPixels, int sbpl,
int w, int h,
int const_alpha);
extern CompositionFunction qt_functionForMode_onlySSE2[];
extern CompositionFunctionSolid qt_functionForModeSolid_onlySSE2[];
#endif // QT_HAVE_SSE2
#ifdef QT_HAVE_IWMMXT
void qt_blend_color_argb_iwmmxt(int count, const QSpan *spans, void *userData);
extern CompositionFunction qt_functionForMode_IWMMXT[];
extern CompositionFunctionSolid qt_functionForModeSolid_IWMMXT[];
#endif
static const int numCompositionFunctions = 33;
QT_END_NAMESPACE
#endif // QDRAWHELPER_X86_P_H

View file

@ -1,241 +0,0 @@
/****************************************************************************
**
** Copyright (C) 2015 The Qt Company Ltd.
** Contact: http://www.qt.io/licensing/
**
** This file is part of the QtGui module of the Qt Toolkit.
**
** $QT_BEGIN_LICENSE:LGPL$
** Commercial License Usage
** Licensees holding valid commercial Qt licenses may use this file in
** accordance with the commercial license agreement provided with the
** Software or, alternatively, in accordance with the terms contained in
** a written agreement between you and The Qt Company. For licensing terms
** and conditions see http://www.qt.io/terms-conditions. For further
** information use the contact form at http://www.qt.io/contact-us.
**
** GNU Lesser General Public License Usage
** Alternatively, this file may be used under the terms of the GNU Lesser
** General Public License version 2.1 or version 3 as published by the Free
** Software Foundation and appearing in the file LICENSE.LGPLv21 and
** LICENSE.LGPLv3 included in the packaging of this file. Please review the
** following information to ensure the GNU Lesser General Public License
** requirements will be met: https://www.gnu.org/licenses/lgpl.html and
** http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
**
** As a special exception, The Qt Company gives you certain additional
** rights. These rights are described in The Qt Company LGPL Exception
** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
**
** GNU General Public License Usage
** Alternatively, this file may be used under the terms of the GNU
** General Public License version 3.0 as published by the Free Software
** Foundation and appearing in the file LICENSE.GPL included in the
** packaging of this file. Please review the following information to
** ensure the GNU General Public License version 3.0 requirements will be
** met: http://www.gnu.org/copyleft/gpl.html.
**
** $QT_END_LICENSE$
**
****************************************************************************/
#ifndef QDRAWINGPRIMITIVE_SSE2_P_H
#define QDRAWINGPRIMITIVE_SSE2_P_H
#include <qsimd_p.h>
#ifdef QT_HAVE_SSE2
//
// W A R N I N G
// -------------
//
// This file is not part of the Qt API. It exists purely as an
// implementation detail. This header file may change from version to
// version without notice, or even be removed.
//
// We mean it.
//
QT_BEGIN_NAMESPACE
/*
* Multiply the components of pixelVector by alphaChannel
* Each 32bits components of alphaChannel must be in the form 0x00AA00AA
* colorMask must have 0x00ff00ff on each 32 bits component
* half must have the value 128 (0x80) for each 32 bits compnent
*/
#define BYTE_MUL_SSE2(result, pixelVector, alphaChannel, colorMask, half) \
{ \
/* 1. separate the colors in 2 vectors so each color is on 16 bits \
(in order to be multiplied by the alpha \
each 32 bit of dstVectorAG are in the form 0x00AA00GG \
each 32 bit of dstVectorRB are in the form 0x00RR00BB */\
__m128i pixelVectorAG = _mm_srli_epi16(pixelVector, 8); \
__m128i pixelVectorRB = _mm_and_si128(pixelVector, colorMask); \
\
/* 2. multiply the vectors by the alpha channel */\
pixelVectorAG = _mm_mullo_epi16(pixelVectorAG, alphaChannel); \
pixelVectorRB = _mm_mullo_epi16(pixelVectorRB, alphaChannel); \
\
/* 3. divide by 255, that's the tricky part. \
we do it like for BYTE_MUL(), with bit shift: X/255 ~= (X + X/256 + rounding)/256 */ \
/** so first (X + X/256 + rounding) */\
pixelVectorRB = _mm_add_epi16(pixelVectorRB, _mm_srli_epi16(pixelVectorRB, 8)); \
pixelVectorRB = _mm_add_epi16(pixelVectorRB, half); \
pixelVectorAG = _mm_add_epi16(pixelVectorAG, _mm_srli_epi16(pixelVectorAG, 8)); \
pixelVectorAG = _mm_add_epi16(pixelVectorAG, half); \
\
/** second divide by 256 */\
pixelVectorRB = _mm_srli_epi16(pixelVectorRB, 8); \
/** for AG, we could >> 8 to divide followed by << 8 to put the \
bytes in the correct position. By masking instead, we execute \
only one instruction */\
pixelVectorAG = _mm_andnot_si128(colorMask, pixelVectorAG); \
\
/* 4. combine the 2 pairs of colors */ \
result = _mm_or_si128(pixelVectorAG, pixelVectorRB); \
}
/*
* Each 32bits components of alphaChannel must be in the form 0x00AA00AA
* oneMinusAlphaChannel must be 255 - alpha for each 32 bits component
* colorMask must have 0x00ff00ff on each 32 bits component
* half must have the value 128 (0x80) for each 32 bits compnent
*/
#define INTERPOLATE_PIXEL_255_SSE2(result, srcVector, dstVector, alphaChannel, oneMinusAlphaChannel, colorMask, half) { \
/* interpolate AG */\
__m128i srcVectorAG = _mm_srli_epi16(srcVector, 8); \
__m128i dstVectorAG = _mm_srli_epi16(dstVector, 8); \
__m128i srcVectorAGalpha = _mm_mullo_epi16(srcVectorAG, alphaChannel); \
__m128i dstVectorAGoneMinusAlphalpha = _mm_mullo_epi16(dstVectorAG, oneMinusAlphaChannel); \
__m128i finalAG = _mm_add_epi16(srcVectorAGalpha, dstVectorAGoneMinusAlphalpha); \
finalAG = _mm_add_epi16(finalAG, _mm_srli_epi16(finalAG, 8)); \
finalAG = _mm_add_epi16(finalAG, half); \
finalAG = _mm_andnot_si128(colorMask, finalAG); \
\
/* interpolate RB */\
__m128i srcVectorRB = _mm_and_si128(srcVector, colorMask); \
__m128i dstVectorRB = _mm_and_si128(dstVector, colorMask); \
__m128i srcVectorRBalpha = _mm_mullo_epi16(srcVectorRB, alphaChannel); \
__m128i dstVectorRBoneMinusAlphalpha = _mm_mullo_epi16(dstVectorRB, oneMinusAlphaChannel); \
__m128i finalRB = _mm_add_epi16(srcVectorRBalpha, dstVectorRBoneMinusAlphalpha); \
finalRB = _mm_add_epi16(finalRB, _mm_srli_epi16(finalRB, 8)); \
finalRB = _mm_add_epi16(finalRB, half); \
finalRB = _mm_srli_epi16(finalRB, 8); \
\
/* combine */\
result = _mm_or_si128(finalAG, finalRB); \
}
// Basically blend src over dst with the const alpha defined as constAlphaVector.
// nullVector, half, one, colorMask are constant across the whole image/texture, and should be defined as:
//const __m128i nullVector = _mm_set1_epi32(0);
//const __m128i half = _mm_set1_epi16(0x80);
//const __m128i one = _mm_set1_epi16(0xff);
//const __m128i colorMask = _mm_set1_epi32(0x00ff00ff);
//const __m128i alphaMask = _mm_set1_epi32(0xff000000);
//
// The computation being done is:
// result = s + d * (1-alpha)
// with shortcuts if fully opaque or fully transparent.
#define BLEND_SOURCE_OVER_ARGB32_SSE2(dst, src, length, nullVector, half, one, colorMask, alphaMask) { \
int x = 0; \
\
/* First, get dst aligned. */ \
ALIGNMENT_PROLOGUE_16BYTES(dst, x, length) { \
uint s = src[x]; \
if (s >= 0xff000000) \
dst[x] = s; \
else if (s != 0) \
dst[x] = s + BYTE_MUL(dst[x], qAlpha(~s)); \
} \
\
for (; x < length-3; x += 4) { \
const __m128i srcVector = _mm_loadu_si128((__m128i *)&src[x]); \
const __m128i srcVectorAlpha = _mm_and_si128(srcVector, alphaMask); \
if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, alphaMask)) == 0xffff) { \
/* all opaque */ \
_mm_store_si128((__m128i *)&dst[x], srcVector); \
} else if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, nullVector)) != 0xffff) { \
/* not fully transparent */ \
/* extract the alpha channel on 2 x 16 bits */ \
/* so we have room for the multiplication */ \
/* each 32 bits will be in the form 0x00AA00AA */ \
/* with A being the 1 - alpha */ \
__m128i alphaChannel = _mm_srli_epi32(srcVector, 24); \
alphaChannel = _mm_or_si128(alphaChannel, _mm_slli_epi32(alphaChannel, 16)); \
alphaChannel = _mm_sub_epi16(one, alphaChannel); \
\
const __m128i dstVector = _mm_load_si128((__m128i *)&dst[x]); \
__m128i destMultipliedByOneMinusAlpha; \
BYTE_MUL_SSE2(destMultipliedByOneMinusAlpha, dstVector, alphaChannel, colorMask, half); \
\
/* result = s + d * (1-alpha) */\
const __m128i result = _mm_add_epi8(srcVector, destMultipliedByOneMinusAlpha); \
_mm_store_si128((__m128i *)&dst[x], result); \
} \
} \
for (; x < length; ++x) { \
uint s = src[x]; \
if (s >= 0xff000000) \
dst[x] = s; \
else if (s != 0) \
dst[x] = s + BYTE_MUL(dst[x], qAlpha(~s)); \
} \
}
// Basically blend src over dst with the const alpha defined as constAlphaVector.
// nullVector, half, one, colorMask are constant across the whole image/texture, and should be defined as:
//const __m128i nullVector = _mm_set1_epi32(0);
//const __m128i half = _mm_set1_epi16(0x80);
//const __m128i one = _mm_set1_epi16(0xff);
//const __m128i colorMask = _mm_set1_epi32(0x00ff00ff);
//
// The computation being done is:
// dest = (s + d * sia) * ca + d * cia
// = s * ca + d * (sia * ca + cia)
// = s * ca + d * (1 - sa*ca)
#define BLEND_SOURCE_OVER_ARGB32_WITH_CONST_ALPHA_SSE2(dst, src, length, nullVector, half, one, colorMask, constAlphaVector) \
{ \
int x = 0; \
\
ALIGNMENT_PROLOGUE_16BYTES(dst, x, length) { \
quint32 s = src[x]; \
if (s != 0) { \
s = BYTE_MUL(s, const_alpha); \
dst[x] = s + BYTE_MUL(dst[x], qAlpha(~s)); \
} \
} \
\
for (; x < length-3; x += 4) { \
__m128i srcVector = _mm_loadu_si128((__m128i *)&src[x]); \
if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVector, nullVector)) != 0xffff) { \
BYTE_MUL_SSE2(srcVector, srcVector, constAlphaVector, colorMask, half); \
\
__m128i alphaChannel = _mm_srli_epi32(srcVector, 24); \
alphaChannel = _mm_or_si128(alphaChannel, _mm_slli_epi32(alphaChannel, 16)); \
alphaChannel = _mm_sub_epi16(one, alphaChannel); \
\
const __m128i dstVector = _mm_load_si128((__m128i *)&dst[x]); \
__m128i destMultipliedByOneMinusAlpha; \
BYTE_MUL_SSE2(destMultipliedByOneMinusAlpha, dstVector, alphaChannel, colorMask, half); \
\
const __m128i result = _mm_add_epi8(srcVector, destMultipliedByOneMinusAlpha); \
_mm_store_si128((__m128i *)&dst[x], result); \
} \
} \
for (; x < length; ++x) { \
quint32 s = src[x]; \
if (s != 0) { \
s = BYTE_MUL(s, const_alpha); \
dst[x] = s + BYTE_MUL(dst[x], qAlpha(~s)); \
} \
} \
}
QT_END_NAMESPACE
#endif // QT_HAVE_SSE2
#endif // QDRAWINGPRIMITIVE_SSE2_P_H

View file

@ -1,57 +0,0 @@
/****************************************************************************
**
** Copyright (C) 2015 The Qt Company Ltd.
** Contact: http://www.qt.io/licensing/
**
** This file is part of the QtGui module of the Qt Toolkit.
**
** $QT_BEGIN_LICENSE:LGPL$
** Commercial License Usage
** Licensees holding valid commercial Qt licenses may use this file in
** accordance with the commercial license agreement provided with the
** Software or, alternatively, in accordance with the terms contained in
** a written agreement between you and The Qt Company. For licensing terms
** and conditions see http://www.qt.io/terms-conditions. For further
** information use the contact form at http://www.qt.io/contact-us.
**
** GNU Lesser General Public License Usage
** Alternatively, this file may be used under the terms of the GNU Lesser
** General Public License version 2.1 or version 3 as published by the Free
** Software Foundation and appearing in the file LICENSE.LGPLv21 and
** LICENSE.LGPLv3 included in the packaging of this file. Please review the
** following information to ensure the GNU Lesser General Public License
** requirements will be met: https://www.gnu.org/licenses/lgpl.html and
** http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
**
** As a special exception, The Qt Company gives you certain additional
** rights. These rights are described in The Qt Company LGPL Exception
** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
**
** GNU General Public License Usage
** Alternatively, this file may be used under the terms of the GNU
** General Public License version 3.0 as published by the Free Software
** Foundation and appearing in the file LICENSE.GPL included in the
** packaging of this file. Please review the following information to
** ensure the GNU General Public License version 3.0 requirements will be
** met: http://www.gnu.org/copyleft/gpl.html.
**
** $QT_END_LICENSE$
**
****************************************************************************/
#ifndef QWMATRIX_H
#define QWMATRIX_H
#include <QtGui/qmatrix.h>
QT_BEGIN_HEADER
QT_BEGIN_NAMESPACE
QT_END_NAMESPACE
QT_END_HEADER
#endif // QWMATRIX_H

File diff suppressed because it is too large Load diff