mirror of
https://bitbucket.org/smil3y/katie.git
synced 2025-02-24 19:02:59 +00:00
Merge branch 'nosimd' of https://github.com/fluxer/katie
This commit is contained in:
commit
113c57c95b
35 changed files with 11 additions and 7971 deletions
|
@ -149,42 +149,6 @@ macro(KATIE_SETUP_OBJECT FORTARGET)
|
|||
endif()
|
||||
endmacro()
|
||||
|
||||
function(KATIE_SETUP_SOURCES SOURCESVAR)
|
||||
set(compilesources)
|
||||
foreach(source ${ARGN})
|
||||
get_filename_component(sourcename ${source} NAME)
|
||||
set(compileflags)
|
||||
# TODO: sse4.1 and sse4.2 support, currently not needed
|
||||
foreach(flag 3dnow avx mmx sse sse2 sse3 ssse3 iwmmxt neon)
|
||||
string(REGEX MATCH "${flag}" flagmatch ${sourcename})
|
||||
string(TOUPPER "${flag}" upperflag)
|
||||
if("${flagmatch}" MATCHES "(iwmmxt|neon)" AND NOT "${KATIE_ARCHITECTURE}" STREQUAL "arm")
|
||||
set(flagmatch)
|
||||
katie_warning("The source file ${source} is ARM specifiec, make it conditional")
|
||||
endif()
|
||||
if("${flagmatch}" STREQUAL "mmx" AND "${sourcename}" MATCHES "iwmmxt")
|
||||
# false positive
|
||||
set(flagmatch)
|
||||
endif()
|
||||
if("${flagmatch}" STREQUAL "neon" AND KATIE_${upperflag}_RESULT)
|
||||
set(compileflags "${compileflags} -mfpu=neon")
|
||||
elseif("${flagmatch}" STREQUAL "iwmmxt" AND KATIE_${upperflag}_RESULT)
|
||||
set(compileflags "${compileflags} -mcpu=iwmmxt")
|
||||
elseif(flagmatch AND KATIE_${upperflag}_RESULT)
|
||||
set(compileflags "${compileflags} -m${flag}")
|
||||
endif()
|
||||
endforeach()
|
||||
if(compileflags)
|
||||
# message(STATUS "Setting up compile flags for: ${source} to: ${compileflags}")
|
||||
set_source_files_properties(${source} PROPERTIES COMPILE_FLAGS "${compileflags}")
|
||||
set(compilesources ${compilesources} ${source})
|
||||
endif()
|
||||
endforeach()
|
||||
if(compilesources)
|
||||
set(${SOURCESVAR} ${${SOURCESVAR}} ${compilesources} PARENT_SCOPE)
|
||||
endif()
|
||||
endfunction()
|
||||
|
||||
# a function to change full installation paths to relative so that CPack
|
||||
# generators do not choke, still paths must contain a string of some sort - if
|
||||
# they are null even quoting them will not help and CMake will complain that
|
||||
|
|
|
@ -1,415 +0,0 @@
|
|||
/****************************************************************************
|
||||
**
|
||||
** Copyright (C) 2015 The Qt Company Ltd.
|
||||
** Contact: http://www.qt.io/licensing/
|
||||
**
|
||||
** This file is part of the QtCore module of the Qt Toolkit.
|
||||
**
|
||||
** $QT_BEGIN_LICENSE:LGPL$
|
||||
** Commercial License Usage
|
||||
** Licensees holding valid commercial Qt licenses may use this file in
|
||||
** accordance with the commercial license agreement provided with the
|
||||
** Software or, alternatively, in accordance with the terms contained in
|
||||
** a written agreement between you and The Qt Company. For licensing terms
|
||||
** and conditions see http://www.qt.io/terms-conditions. For further
|
||||
** information use the contact form at http://www.qt.io/contact-us.
|
||||
**
|
||||
** GNU Lesser General Public License Usage
|
||||
** Alternatively, this file may be used under the terms of the GNU Lesser
|
||||
** General Public License version 2.1 or version 3 as published by the Free
|
||||
** Software Foundation and appearing in the file LICENSE.LGPLv21 and
|
||||
** LICENSE.LGPLv3 included in the packaging of this file. Please review the
|
||||
** following information to ensure the GNU Lesser General Public License
|
||||
** requirements will be met: https://www.gnu.org/licenses/lgpl.html and
|
||||
** http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
|
||||
**
|
||||
** As a special exception, The Qt Company gives you certain additional
|
||||
** rights. These rights are described in The Qt Company LGPL Exception
|
||||
** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
|
||||
**
|
||||
** GNU General Public License Usage
|
||||
** Alternatively, this file may be used under the terms of the GNU
|
||||
** General Public License version 3.0 as published by the Free Software
|
||||
** Foundation and appearing in the file LICENSE.GPL included in the
|
||||
** packaging of this file. Please review the following information to
|
||||
** ensure the GNU General Public License version 3.0 requirements will be
|
||||
** met: http://www.gnu.org/copyleft/gpl.html.
|
||||
**
|
||||
** $QT_END_LICENSE$
|
||||
**
|
||||
****************************************************************************/
|
||||
|
||||
#include "qsimd_p.h"
|
||||
#include <QByteArray>
|
||||
#include <stdio.h>
|
||||
|
||||
#if defined(Q_OS_WINCE)
|
||||
#include <windows.h>
|
||||
#endif
|
||||
|
||||
#if defined(Q_OS_WIN64)
|
||||
#include <intrin.h>
|
||||
#endif
|
||||
|
||||
#if defined(Q_OS_LINUX) && defined(__arm__)
|
||||
#include "qcore_unix_p.h"
|
||||
|
||||
// the kernel header definitions for HWCAP_*
|
||||
// (the ones we need/may need anyway)
|
||||
|
||||
// copied from <asm/hwcap.h> (ARM)
|
||||
#define HWCAP_IWMMXT 512
|
||||
#define HWCAP_CRUNCH 1024
|
||||
#define HWCAP_THUMBEE 2048
|
||||
#define HWCAP_NEON 4096
|
||||
#define HWCAP_VFPv3 8192
|
||||
#define HWCAP_VFPv3D16 16384
|
||||
|
||||
// copied from <linux/auxvec.h>
|
||||
#define AT_HWCAP 16 /* arch dependent hints at CPU capabilities */
|
||||
|
||||
#endif
|
||||
|
||||
QT_BEGIN_NAMESPACE
|
||||
|
||||
#if defined (Q_OS_NACL)
|
||||
static inline uint detectProcessorFeatures()
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
#elif defined (Q_OS_WINCE)
|
||||
static inline uint detectProcessorFeatures()
|
||||
{
|
||||
uint features = 0;
|
||||
|
||||
#if defined (ARM)
|
||||
if (IsProcessorFeaturePresent(PF_ARM_INTEL_WMMX)) {
|
||||
features = IWMMXT;
|
||||
return features;
|
||||
}
|
||||
#elif defined(_X86_)
|
||||
features = 0;
|
||||
#if defined QT_HAVE_MMX
|
||||
if (IsProcessorFeaturePresent(PF_MMX_INSTRUCTIONS_AVAILABLE))
|
||||
features |= MMX;
|
||||
#endif
|
||||
#if defined QT_HAVE_3DNOW
|
||||
if (IsProcessorFeaturePresent(PF_3DNOW_INSTRUCTIONS_AVAILABLE))
|
||||
features |= MMX3DNOW;
|
||||
#endif
|
||||
return features;
|
||||
#endif
|
||||
features = 0;
|
||||
return features;
|
||||
}
|
||||
|
||||
#elif defined(__arm__) || defined(__arm) || defined(QT_HAVE_IWMMXT) || defined(QT_HAVE_NEON)
|
||||
static inline uint detectProcessorFeatures()
|
||||
{
|
||||
uint features = 0;
|
||||
|
||||
#if defined(Q_OS_LINUX)
|
||||
int auxv = ::qt_safe_open("/proc/self/auxv", O_RDONLY);
|
||||
if (auxv != -1) {
|
||||
unsigned long vector[64];
|
||||
int nread;
|
||||
while (features == 0) {
|
||||
nread = ::qt_safe_read(auxv, (char *)vector, sizeof vector);
|
||||
if (nread <= 0) {
|
||||
// EOF or error
|
||||
break;
|
||||
}
|
||||
|
||||
int max = nread / (sizeof vector[0]);
|
||||
for (int i = 0; i < max; i += 2)
|
||||
if (vector[i] == AT_HWCAP) {
|
||||
if (vector[i+1] & HWCAP_IWMMXT)
|
||||
features |= IWMMXT;
|
||||
if (vector[i+1] & HWCAP_NEON)
|
||||
features |= NEON;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
::qt_safe_close(auxv);
|
||||
return features;
|
||||
}
|
||||
// fall back if /proc/self/auxv wasn't found
|
||||
#endif
|
||||
|
||||
#if defined(QT_HAVE_IWMMXT)
|
||||
// runtime detection only available when running as a previlegied process
|
||||
features = IWMMXT;
|
||||
#elif defined(QT_ALWAYS_HAVE_NEON)
|
||||
features = NEON;
|
||||
#endif
|
||||
|
||||
return features;
|
||||
}
|
||||
|
||||
#elif defined(__i386__) || defined(_M_IX86)
|
||||
static inline uint detectProcessorFeatures()
|
||||
{
|
||||
uint features = 0;
|
||||
|
||||
unsigned int extended_result = 0;
|
||||
unsigned int feature_result = 0;
|
||||
uint result = 0;
|
||||
/* see p. 118 of amd64 instruction set manual Vol3 */
|
||||
#if defined(Q_CC_GNU)
|
||||
long cpuid_supported, tmp1;
|
||||
asm ("pushf\n"
|
||||
"pop %0\n"
|
||||
"mov %0, %1\n"
|
||||
"xor $0x00200000, %0\n"
|
||||
"push %0\n"
|
||||
"popf\n"
|
||||
"pushf\n"
|
||||
"pop %0\n"
|
||||
"xor %1, %0\n" // %eax is now 0 if CPUID is not supported
|
||||
: "=a" (cpuid_supported), "=r" (tmp1)
|
||||
);
|
||||
if (cpuid_supported) {
|
||||
asm ("xchg %%ebx, %2\n"
|
||||
"cpuid\n"
|
||||
"xchg %%ebx, %2\n"
|
||||
: "=c" (feature_result), "=d" (result), "=&r" (tmp1)
|
||||
: "a" (1));
|
||||
|
||||
asm ("xchg %%ebx, %1\n"
|
||||
"cpuid\n"
|
||||
"cmp $0x80000000, %%eax\n"
|
||||
"jnbe 1f\n"
|
||||
"xor %0, %0\n"
|
||||
"jmp 2f\n"
|
||||
"1:\n"
|
||||
"mov $0x80000001, %%eax\n"
|
||||
"cpuid\n"
|
||||
"2:\n"
|
||||
"xchg %%ebx, %1\n"
|
||||
: "=d" (extended_result), "=&r" (tmp1)
|
||||
: "a" (0x80000000)
|
||||
: "%ecx"
|
||||
);
|
||||
}
|
||||
|
||||
#elif defined (Q_OS_WIN)
|
||||
_asm {
|
||||
push eax
|
||||
push ebx
|
||||
push ecx
|
||||
push edx
|
||||
pushfd
|
||||
pop eax
|
||||
mov ebx, eax
|
||||
xor eax, 00200000h
|
||||
push eax
|
||||
popfd
|
||||
pushfd
|
||||
pop eax
|
||||
mov edx, 0
|
||||
xor eax, ebx
|
||||
jz skip
|
||||
|
||||
mov eax, 1
|
||||
cpuid
|
||||
mov result, edx
|
||||
mov feature_result, ecx
|
||||
skip:
|
||||
pop edx
|
||||
pop ecx
|
||||
pop ebx
|
||||
pop eax
|
||||
}
|
||||
|
||||
_asm {
|
||||
push eax
|
||||
push ebx
|
||||
push ecx
|
||||
push edx
|
||||
pushfd
|
||||
pop eax
|
||||
mov ebx, eax
|
||||
xor eax, 00200000h
|
||||
push eax
|
||||
popfd
|
||||
pushfd
|
||||
pop eax
|
||||
mov edx, 0
|
||||
xor eax, ebx
|
||||
jz skip2
|
||||
|
||||
mov eax, 80000000h
|
||||
cpuid
|
||||
cmp eax, 80000000h
|
||||
jbe skip2
|
||||
mov eax, 80000001h
|
||||
cpuid
|
||||
mov extended_result, edx
|
||||
skip2:
|
||||
pop edx
|
||||
pop ecx
|
||||
pop ebx
|
||||
pop eax
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
// result now contains the standard feature bits
|
||||
if (result & (1u << 15))
|
||||
features |= CMOV;
|
||||
if (result & (1u << 23))
|
||||
features |= MMX;
|
||||
if (extended_result & (1u << 22))
|
||||
features |= MMXEXT;
|
||||
if (extended_result & (1u << 31))
|
||||
features |= MMX3DNOW;
|
||||
if (extended_result & (1u << 30))
|
||||
features |= MMX3DNOWEXT;
|
||||
if (result & (1u << 25))
|
||||
features |= SSE;
|
||||
if (result & (1u << 26))
|
||||
features |= SSE2;
|
||||
if (feature_result & (1u))
|
||||
features |= SSE3;
|
||||
if (feature_result & (1u << 9))
|
||||
features |= SSSE3;
|
||||
if (feature_result & (1u << 19))
|
||||
features |= SSE4_1;
|
||||
if (feature_result & (1u << 20))
|
||||
features |= SSE4_2;
|
||||
if (feature_result & (1u << 28))
|
||||
features |= AVX;
|
||||
|
||||
return features;
|
||||
}
|
||||
|
||||
#elif defined(__x86_64) || defined(Q_OS_WIN64)
|
||||
static inline uint detectProcessorFeatures()
|
||||
{
|
||||
uint features = MMX|SSE|SSE2|CMOV;
|
||||
uint feature_result = 0;
|
||||
|
||||
#if defined(Q_CC_GNU)
|
||||
quint64 tmp;
|
||||
asm ("xchg %%rbx, %1\n"
|
||||
"cpuid\n"
|
||||
"xchg %%rbx, %1\n"
|
||||
: "=c" (feature_result), "=&r" (tmp)
|
||||
: "a" (1)
|
||||
: "%edx"
|
||||
);
|
||||
#elif defined (Q_OS_WIN64)
|
||||
{
|
||||
int info[4];
|
||||
__cpuid(info, 1);
|
||||
feature_result = info[2];
|
||||
}
|
||||
#endif
|
||||
|
||||
if (feature_result & (1u))
|
||||
features |= SSE3;
|
||||
if (feature_result & (1u << 9))
|
||||
features |= SSSE3;
|
||||
if (feature_result & (1u << 19))
|
||||
features |= SSE4_1;
|
||||
if (feature_result & (1u << 20))
|
||||
features |= SSE4_2;
|
||||
if (feature_result & (1u << 28))
|
||||
features |= AVX;
|
||||
|
||||
return features;
|
||||
}
|
||||
|
||||
#elif defined(__ia64__)
|
||||
static inline uint detectProcessorFeatures()
|
||||
{
|
||||
return MMX|SSE|SSE2;
|
||||
}
|
||||
|
||||
#else
|
||||
static inline uint detectProcessorFeatures()
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Use kdesdk/scripts/generate_string_table.pl to update the table below.
|
||||
* Here's the data (don't forget the ONE leading space):
|
||||
mmx
|
||||
mmxext
|
||||
mmx3dnow
|
||||
mmx3dnowext
|
||||
sse
|
||||
sse2
|
||||
cmov
|
||||
iwmmxt
|
||||
neon
|
||||
sse3
|
||||
ssse3
|
||||
sse4.1
|
||||
sse4.2
|
||||
avx
|
||||
*/
|
||||
|
||||
// begin generated
|
||||
static const char features_string[] =
|
||||
" mmx\0"
|
||||
" mmxext\0"
|
||||
" mmx3dnow\0"
|
||||
" mmx3dnowext\0"
|
||||
" sse\0"
|
||||
" sse2\0"
|
||||
" cmov\0"
|
||||
" iwmmxt\0"
|
||||
" neon\0"
|
||||
" sse3\0"
|
||||
" ssse3\0"
|
||||
" sse4.1\0"
|
||||
" sse4.2\0"
|
||||
" avx\0"
|
||||
"\0";
|
||||
|
||||
static const int features_indices[] = {
|
||||
0, 5, 13, 23, 36, 41, 47, 53,
|
||||
61, 67, 73, 80, 88, 96, -1
|
||||
};
|
||||
// end generated
|
||||
|
||||
const int features_count = (sizeof features_indices - 1) / (sizeof features_indices[0]);
|
||||
|
||||
uint qDetectCPUFeatures()
|
||||
{
|
||||
static QBasicAtomicInt features = Q_BASIC_ATOMIC_INITIALIZER(-1);
|
||||
if (features != -1)
|
||||
return features;
|
||||
|
||||
uint f = detectProcessorFeatures();
|
||||
QByteArray disable = qgetenv("QT_NO_CPU_FEATURE");
|
||||
if (disable == "all") {
|
||||
f = 0;
|
||||
} else if (!disable.isEmpty()) {
|
||||
disable.prepend(' ');
|
||||
for (int i = 0; i < features_count; ++i) {
|
||||
if (disable.contains(features_string + features_indices[i]))
|
||||
f &= ~(1 << i);
|
||||
}
|
||||
}
|
||||
|
||||
features = f;
|
||||
return features;
|
||||
}
|
||||
|
||||
void qDumpCPUFeatures()
|
||||
{
|
||||
uint features = qDetectCPUFeatures();
|
||||
printf("Processor features: ");
|
||||
for (int i = 0; i < features_count; ++i) {
|
||||
if (features & (1 << i))
|
||||
printf("%s", features_string + features_indices[i]);
|
||||
}
|
||||
puts("");
|
||||
}
|
||||
|
||||
QT_END_NAMESPACE
|
|
@ -1,237 +0,0 @@
|
|||
/****************************************************************************
|
||||
**
|
||||
** Copyright (C) 2015 The Qt Company Ltd.
|
||||
** Contact: http://www.qt.io/licensing/
|
||||
**
|
||||
** This file is part of the QtCore module of the Qt Toolkit.
|
||||
**
|
||||
** $QT_BEGIN_LICENSE:LGPL$
|
||||
** Commercial License Usage
|
||||
** Licensees holding valid commercial Qt licenses may use this file in
|
||||
** accordance with the commercial license agreement provided with the
|
||||
** Software or, alternatively, in accordance with the terms contained in
|
||||
** a written agreement between you and The Qt Company. For licensing terms
|
||||
** and conditions see http://www.qt.io/terms-conditions. For further
|
||||
** information use the contact form at http://www.qt.io/contact-us.
|
||||
**
|
||||
** GNU Lesser General Public License Usage
|
||||
** Alternatively, this file may be used under the terms of the GNU Lesser
|
||||
** General Public License version 2.1 or version 3 as published by the Free
|
||||
** Software Foundation and appearing in the file LICENSE.LGPLv21 and
|
||||
** LICENSE.LGPLv3 included in the packaging of this file. Please review the
|
||||
** following information to ensure the GNU Lesser General Public License
|
||||
** requirements will be met: https://www.gnu.org/licenses/lgpl.html and
|
||||
** http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
|
||||
**
|
||||
** As a special exception, The Qt Company gives you certain additional
|
||||
** rights. These rights are described in The Qt Company LGPL Exception
|
||||
** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
|
||||
**
|
||||
** GNU General Public License Usage
|
||||
** Alternatively, this file may be used under the terms of the GNU
|
||||
** General Public License version 3.0 as published by the Free Software
|
||||
** Foundation and appearing in the file LICENSE.GPL included in the
|
||||
** packaging of this file. Please review the following information to
|
||||
** ensure the GNU General Public License version 3.0 requirements will be
|
||||
** met: http://www.gnu.org/copyleft/gpl.html.
|
||||
**
|
||||
** $QT_END_LICENSE$
|
||||
**
|
||||
****************************************************************************/
|
||||
|
||||
#ifndef QSIMD_P_H
|
||||
#define QSIMD_P_H
|
||||
|
||||
#include <qglobal.h>
|
||||
|
||||
|
||||
QT_BEGIN_HEADER
|
||||
|
||||
|
||||
#if defined(QT_NO_MAC_XARCH) || (defined(Q_OS_DARWIN) && (defined(__ppc__) || defined(__ppc64__)))
|
||||
// Disable MMX and SSE on Mac/PPC builds, or if the compiler
|
||||
// does not support -Xarch argument passing
|
||||
#undef QT_HAVE_SSE
|
||||
#undef QT_HAVE_SSE2
|
||||
#undef QT_HAVE_SSE3
|
||||
#undef QT_HAVE_SSSE3
|
||||
#undef QT_HAVE_SSE4_1
|
||||
#undef QT_HAVE_SSE4_2
|
||||
#undef QT_HAVE_AVX
|
||||
#undef QT_HAVE_3DNOW
|
||||
#undef QT_HAVE_MMX
|
||||
#endif
|
||||
|
||||
// SSE intrinsics
|
||||
#if defined(QT_HAVE_SSE2) && (defined(__SSE2__) || defined(Q_CC_MSVC))
|
||||
#if defined(QT_LINUXBASE)
|
||||
/// this is an evil hack - the posix_memalign declaration in LSB
|
||||
/// is wrong - see http://bugs.linuxbase.org/show_bug.cgi?id=2431
|
||||
# define posix_memalign _lsb_hack_posix_memalign
|
||||
# include <emmintrin.h>
|
||||
# undef posix_memalign
|
||||
#else
|
||||
# ifdef Q_CC_MINGW
|
||||
# include <windows.h>
|
||||
# endif
|
||||
# include <emmintrin.h>
|
||||
#endif
|
||||
|
||||
// SSE3 intrinsics
|
||||
#if defined(QT_HAVE_SSE3) && (defined(__SSE3__) || defined(Q_CC_MSVC))
|
||||
#include <pmmintrin.h>
|
||||
#endif
|
||||
|
||||
// SSSE3 intrinsics
|
||||
#if defined(QT_HAVE_SSSE3) && (defined(__SSSE3__) || defined(Q_CC_MSVC))
|
||||
#include <tmmintrin.h>
|
||||
#endif
|
||||
|
||||
// SSE4.1 intrinsics
|
||||
#if defined(QT_HAVE_SSE4_1) && (defined(__SSE4_1__) || defined(Q_CC_MSVC))
|
||||
#include <smmintrin.h>
|
||||
#endif
|
||||
|
||||
// SSE4.2 intrinsics
|
||||
#if defined(QT_HAVE_SSE4_2) && (defined(__SSE4_2__) || defined(Q_CC_MSVC))
|
||||
#include <nmmintrin.h>
|
||||
|
||||
// Add missing intrisics in some compilers (e.g. llvm-gcc)
|
||||
#ifndef _SIDD_UBYTE_OPS
|
||||
#define _SIDD_UBYTE_OPS 0x00
|
||||
#endif
|
||||
|
||||
#ifndef _SIDD_UWORD_OPS
|
||||
#define _SIDD_UWORD_OPS 0x01
|
||||
#endif
|
||||
|
||||
#ifndef _SIDD_SBYTE_OPS
|
||||
#define _SIDD_SBYTE_OPS 0x02
|
||||
#endif
|
||||
|
||||
#ifndef _SIDD_SWORD_OPS
|
||||
#define _SIDD_SWORD_OPS 0x03
|
||||
#endif
|
||||
|
||||
#ifndef _SIDD_CMP_EQUAL_ANY
|
||||
#define _SIDD_CMP_EQUAL_ANY 0x00
|
||||
#endif
|
||||
|
||||
#ifndef _SIDD_CMP_RANGES
|
||||
#define _SIDD_CMP_RANGES 0x04
|
||||
#endif
|
||||
|
||||
#ifndef _SIDD_CMP_EQUAL_EACH
|
||||
#define _SIDD_CMP_EQUAL_EACH 0x08
|
||||
#endif
|
||||
|
||||
#ifndef _SIDD_CMP_EQUAL_ORDERED
|
||||
#define _SIDD_CMP_EQUAL_ORDERED 0x0c
|
||||
#endif
|
||||
|
||||
#ifndef _SIDD_POSITIVE_POLARITY
|
||||
#define _SIDD_POSITIVE_POLARITY 0x00
|
||||
#endif
|
||||
|
||||
#ifndef _SIDD_NEGATIVE_POLARITY
|
||||
#define _SIDD_NEGATIVE_POLARITY 0x10
|
||||
#endif
|
||||
|
||||
#ifndef _SIDD_MASKED_POSITIVE_POLARITY
|
||||
#define _SIDD_MASKED_POSITIVE_POLARITY 0x20
|
||||
#endif
|
||||
|
||||
#ifndef _SIDD_MASKED_NEGATIVE_POLARITY
|
||||
#define _SIDD_MASKED_NEGATIVE_POLARITY 0x30
|
||||
#endif
|
||||
|
||||
#ifndef _SIDD_LEAST_SIGNIFICANT
|
||||
#define _SIDD_LEAST_SIGNIFICANT 0x00
|
||||
#endif
|
||||
|
||||
#ifndef _SIDD_MOST_SIGNIFICANT
|
||||
#define _SIDD_MOST_SIGNIFICANT 0x40
|
||||
#endif
|
||||
|
||||
#ifndef _SIDD_BIT_MASK
|
||||
#define _SIDD_BIT_MASK 0x00
|
||||
#endif
|
||||
|
||||
#ifndef _SIDD_UNIT_MASK
|
||||
#define _SIDD_UNIT_MASK 0x40
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
// AVX intrinsics
|
||||
#if defined(QT_HAVE_AVX) && (defined(__AVX__) || defined(Q_CC_MSVC))
|
||||
#include <immintrin.h>
|
||||
#endif
|
||||
|
||||
|
||||
#if !defined(QT_BOOTSTRAPPED) && (!defined(Q_CC_MSVC) || (defined(_M_X64) || _M_IX86_FP == 2))
|
||||
#define QT_ALWAYS_HAVE_SSE2
|
||||
#endif
|
||||
#endif // defined(QT_HAVE_SSE2) && (defined(__SSE2__) || defined(Q_CC_MSVC))
|
||||
|
||||
// NEON intrinsics
|
||||
#if defined __ARM_NEON__
|
||||
#define QT_ALWAYS_HAVE_NEON
|
||||
#include <arm_neon.h>
|
||||
#endif
|
||||
|
||||
|
||||
// IWMMXT intrinsics
|
||||
#if defined(QT_HAVE_IWMMXT)
|
||||
#include <mmintrin.h>
|
||||
#if defined(Q_OS_WINCE)
|
||||
# include "qplatformdefs.h"
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(QT_HAVE_IWMMXT)
|
||||
#if !defined(__IWMMXT__) && !defined(Q_OS_WINCE)
|
||||
# include <xmmintrin.h>
|
||||
#elif defined(Q_OS_WINCE_STD) && defined(_X86_)
|
||||
# pragma warning(disable: 4391)
|
||||
# include <xmmintrin.h>
|
||||
#endif
|
||||
#endif
|
||||
|
||||
// 3D now intrinsics
|
||||
#if defined(QT_HAVE_3DNOW) && (defined(__3dNOW__) || defined(Q_CC_MSVC))
|
||||
#include <mm3dnow.h>
|
||||
#endif
|
||||
|
||||
QT_BEGIN_NAMESPACE
|
||||
|
||||
|
||||
enum CPUFeatures {
|
||||
None = 0,
|
||||
MMX = 0x1,
|
||||
MMXEXT = 0x2,
|
||||
MMX3DNOW = 0x4,
|
||||
MMX3DNOWEXT = 0x8,
|
||||
SSE = 0x10,
|
||||
SSE2 = 0x20,
|
||||
CMOV = 0x40,
|
||||
IWMMXT = 0x80,
|
||||
NEON = 0x100,
|
||||
SSE3 = 0x200,
|
||||
SSSE3 = 0x400,
|
||||
SSE4_1 = 0x800,
|
||||
SSE4_2 = 0x1000,
|
||||
AVX = 0x2000
|
||||
};
|
||||
|
||||
Q_CORE_EXPORT uint qDetectCPUFeatures();
|
||||
|
||||
|
||||
#define ALIGNMENT_PROLOGUE_16BYTES(ptr, i, length) \
|
||||
for (; i < static_cast<int>(qMin(static_cast<quintptr>(length), ((4 - ((reinterpret_cast<quintptr>(ptr) >> 2) & 0x3)) & 0x3))); ++i)
|
||||
|
||||
QT_END_NAMESPACE
|
||||
|
||||
QT_END_HEADER
|
||||
|
||||
#endif // QSIMD_P_H
|
|
@ -46,7 +46,6 @@
|
|||
#include <qtextcodec.h>
|
||||
#endif
|
||||
#include <qutfcodec_p.h>
|
||||
#include "qsimd_p.h"
|
||||
#include <qdatastream.h>
|
||||
#include <qlist.h>
|
||||
#include "qlocale.h"
|
||||
|
@ -3535,61 +3534,6 @@ bool QString::endsWith(const QChar &c, Qt::CaseSensitivity cs) const
|
|||
Use toLocal8Bit() instead.
|
||||
*/
|
||||
|
||||
#if defined(QT_ALWAYS_HAVE_SSE2)
|
||||
static inline __m128i mergeQuestionMarks(__m128i chunk)
|
||||
{
|
||||
const __m128i questionMark = _mm_set1_epi16('?');
|
||||
|
||||
# ifdef __SSE4_2__
|
||||
// compare the unsigned shorts for the range 0x0100-0xFFFF
|
||||
// note on the use of _mm_cmpestrm:
|
||||
// The MSDN documentation online (http://technet.microsoft.com/en-us/library/bb514080.aspx)
|
||||
// says for range search the following:
|
||||
// For each character c in a, determine whether b0 <= c <= b1 or b2 <= c <= b3
|
||||
//
|
||||
// However, all examples on the Internet, including from Intel
|
||||
// (see http://software.intel.com/en-us/articles/xml-parsing-accelerator-with-intel-streaming-simd-extensions-4-intel-sse4/)
|
||||
// put the range to be searched first
|
||||
//
|
||||
// Disassembly and instruction-level debugging with GCC and ICC show
|
||||
// that they are doing the right thing. Inverting the arguments in the
|
||||
// instruction does cause a bunch of test failures.
|
||||
|
||||
const int mode = _SIDD_UWORD_OPS | _SIDD_CMP_RANGES | _SIDD_UNIT_MASK;
|
||||
const __m128i rangeMatch = _mm_cvtsi32_si128(0xffff0100);
|
||||
const __m128i offLimitMask = _mm_cmpestrm(rangeMatch, 2, chunk, 8, mode);
|
||||
|
||||
// replace the non-Latin 1 characters in the chunk with question marks
|
||||
chunk = _mm_blendv_epi8(chunk, questionMark, offLimitMask);
|
||||
# else
|
||||
// SSE has no compare instruction for unsigned comparison.
|
||||
// The variables must be shiffted + 0x8000 to be compared
|
||||
const __m128i signedBitOffset = _mm_set1_epi16(short(0x8000));
|
||||
const __m128i thresholdMask = _mm_set1_epi16(short(0xff + 0x8000));
|
||||
|
||||
const __m128i signedChunk = _mm_add_epi16(chunk, signedBitOffset);
|
||||
const __m128i offLimitMask = _mm_cmpgt_epi16(signedChunk, thresholdMask);
|
||||
|
||||
# ifdef __SSE4_1__
|
||||
// replace the non-Latin 1 characters in the chunk with question marks
|
||||
chunk = _mm_blendv_epi8(chunk, questionMark, offLimitMask);
|
||||
# else
|
||||
// offLimitQuestionMark contains '?' for each 16 bits that was off-limit
|
||||
// the 16 bits that were correct contains zeros
|
||||
const __m128i offLimitQuestionMark = _mm_and_si128(offLimitMask, questionMark);
|
||||
|
||||
// correctBytes contains the bytes that were in limit
|
||||
// the 16 bits that were off limits contains zeros
|
||||
const __m128i correctBytes = _mm_andnot_si128(offLimitMask, chunk);
|
||||
|
||||
// merge offLimitQuestionMark and correctBytes to have the result
|
||||
chunk = _mm_or_si128(correctBytes, offLimitQuestionMark);
|
||||
# endif
|
||||
# endif
|
||||
return chunk;
|
||||
}
|
||||
#endif
|
||||
|
||||
static QByteArray toLatin1_helper(const QChar *data, int length)
|
||||
{
|
||||
QByteArray ba;
|
||||
|
@ -3597,51 +3541,6 @@ static QByteArray toLatin1_helper(const QChar *data, int length)
|
|||
ba.resize(length);
|
||||
const ushort *src = reinterpret_cast<const ushort *>(data);
|
||||
uchar *dst = (uchar*) ba.data();
|
||||
#if defined(QT_ALWAYS_HAVE_SSE2)
|
||||
if (length >= 16) {
|
||||
const int chunkCount = length >> 4; // divided by 16
|
||||
|
||||
for (int i = 0; i < chunkCount; ++i) {
|
||||
__m128i chunk1 = _mm_loadu_si128((__m128i*)src); // load
|
||||
chunk1 = mergeQuestionMarks(chunk1);
|
||||
src += 8;
|
||||
|
||||
__m128i chunk2 = _mm_loadu_si128((__m128i*)src); // load
|
||||
chunk2 = mergeQuestionMarks(chunk2);
|
||||
src += 8;
|
||||
|
||||
// pack the two vector to 16 x 8bits elements
|
||||
const __m128i result = _mm_packus_epi16(chunk1, chunk2);
|
||||
|
||||
_mm_storeu_si128((__m128i*)dst, result); // store
|
||||
dst += 16;
|
||||
}
|
||||
length = length % 16;
|
||||
}
|
||||
#elif defined(QT_ALWAYS_HAVE_NEON)
|
||||
// Refer to the documentation of the SSE2 implementation
|
||||
// this use eactly the same method as for SSE except:
|
||||
// 1) neon has unsigned comparison
|
||||
// 2) packing is done to 64 bits (8 x 8bits component).
|
||||
if (length >= 16) {
|
||||
const int chunkCount = length >> 3; // divided by 8
|
||||
const uint16x8_t questionMark = vdupq_n_u16('?'); // set
|
||||
const uint16x8_t thresholdMask = vdupq_n_u16(0xff); // set
|
||||
for (int i = 0; i < chunkCount; ++i) {
|
||||
uint16x8_t chunk = vld1q_u16((uint16_t *)src); // load
|
||||
src += 8;
|
||||
|
||||
const uint16x8_t offLimitMask = vcgtq_u16(chunk, thresholdMask); // chunk > thresholdMask
|
||||
const uint16x8_t offLimitQuestionMark = vandq_u16(offLimitMask, questionMark); // offLimitMask & questionMark
|
||||
const uint16x8_t correctBytes = vbicq_u16(chunk, offLimitMask); // !offLimitMask & chunk
|
||||
chunk = vorrq_u16(correctBytes, offLimitQuestionMark); // correctBytes | offLimitQuestionMark
|
||||
const uint8x8_t result = vmovn_u16(chunk); // narrowing move->packing
|
||||
vst1_u8(dst, result); // store
|
||||
dst += 8;
|
||||
}
|
||||
length = length % 8;
|
||||
}
|
||||
#endif
|
||||
while (length--) {
|
||||
*dst++ = (*src>0xff) ? '?' : (uchar) *src;
|
||||
++src;
|
||||
|
@ -3783,31 +3682,6 @@ QString::Data *QString::fromLatin1_helper(const char *str, int size)
|
|||
d->data = d->array;
|
||||
d->array[size] = '\0';
|
||||
ushort *dst = d->data;
|
||||
/* SIMD:
|
||||
* Unpacking with SSE has been shown to improve performance on recent CPUs
|
||||
* The same method gives no improvement with NEON.
|
||||
*/
|
||||
#if defined(QT_ALWAYS_HAVE_SSE2)
|
||||
if (size >= 16) {
|
||||
int chunkCount = size >> 4; // divided by 16
|
||||
const __m128i nullMask = _mm_set1_epi32(0);
|
||||
for (int i = 0; i < chunkCount; ++i) {
|
||||
const __m128i chunk = _mm_loadu_si128((__m128i*)str); // load
|
||||
str += 16;
|
||||
|
||||
// unpack the first 8 bytes, padding with zeros
|
||||
const __m128i firstHalf = _mm_unpacklo_epi8(chunk, nullMask);
|
||||
_mm_storeu_si128((__m128i*)dst, firstHalf); // store
|
||||
dst += 8;
|
||||
|
||||
// unpack the last 8 bytes, padding with zeros
|
||||
const __m128i secondHalf = _mm_unpackhi_epi8 (chunk, nullMask);
|
||||
_mm_storeu_si128((__m128i*)dst, secondHalf); // store
|
||||
dst += 8;
|
||||
}
|
||||
size = size % 16;
|
||||
}
|
||||
#endif
|
||||
while (size--)
|
||||
*dst++ = (uchar)*str++;
|
||||
}
|
||||
|
|
|
@ -37,7 +37,6 @@ set(CORE_HEADERS
|
|||
${CMAKE_CURRENT_SOURCE_DIR}/tools/qsharedpointer.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/tools/qsharedpointer_impl.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/tools/qset.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/tools/qsimd_p.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/tools/qsize.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/tools/qstack.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/tools/qstring.h
|
||||
|
@ -80,7 +79,6 @@ set(CORE_SOURCES
|
|||
${CMAKE_CURRENT_SOURCE_DIR}/tools/qregexp.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/tools/qshareddata.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/tools/qsharedpointer.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/tools/qsimd.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/tools/qsize.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/tools/qstring.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/tools/qstringbuilder.cpp
|
||||
|
|
|
@ -61,13 +61,6 @@ set(GUI_SOURCES
|
|||
${CMAKE_CURRENT_SOURCE_DIR}/image/qgifhandler.cpp
|
||||
)
|
||||
|
||||
katie_setup_sources(
|
||||
GUI_SOURCES
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/image/qimage_neon.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/image/qimage_sse2.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/image/qimage_ssse3.cpp
|
||||
)
|
||||
|
||||
if(WITH_PNG)
|
||||
set(GUI_HEADERS
|
||||
${GUI_HEADERS}
|
||||
|
|
|
@ -57,7 +57,6 @@
|
|||
#include <qdrawhelper_p.h>
|
||||
#include <qmemrotate_p.h>
|
||||
#include <qpixmapdata_p.h>
|
||||
#include <qsimd_p.h>
|
||||
|
||||
#include <qhash.h>
|
||||
|
||||
|
@ -3374,35 +3373,6 @@ static InPlace_Image_Converter inplace_converter_map[QImage::NImageFormats][QIma
|
|||
} // Format_ARGB4444_Premultiplied
|
||||
};
|
||||
|
||||
void qInitImageConversions()
|
||||
{
|
||||
const uint features = qDetectCPUFeatures();
|
||||
Q_UNUSED(features);
|
||||
|
||||
#ifdef QT_HAVE_SSE2
|
||||
if (features & SSE2) {
|
||||
extern bool convert_ARGB_to_ARGB_PM_inplace_sse2(QImageData *data, Qt::ImageConversionFlags);
|
||||
inplace_converter_map[QImage::Format_ARGB32][QImage::Format_ARGB32_Premultiplied] = convert_ARGB_to_ARGB_PM_inplace_sse2;
|
||||
}
|
||||
#endif
|
||||
#ifdef QT_HAVE_SSSE3
|
||||
if (features & SSSE3) {
|
||||
extern void convert_RGB888_to_RGB32_ssse3(QImageData *dest, const QImageData *src, Qt::ImageConversionFlags);
|
||||
converter_map[QImage::Format_RGB888][QImage::Format_RGB32] = convert_RGB888_to_RGB32_ssse3;
|
||||
converter_map[QImage::Format_RGB888][QImage::Format_ARGB32] = convert_RGB888_to_RGB32_ssse3;
|
||||
converter_map[QImage::Format_RGB888][QImage::Format_ARGB32_Premultiplied] = convert_RGB888_to_RGB32_ssse3;
|
||||
}
|
||||
#endif
|
||||
#ifdef QT_HAVE_NEON
|
||||
if (features & NEON) {
|
||||
extern void convert_RGB888_to_RGB32_neon(QImageData *dest, const QImageData *src, Qt::ImageConversionFlags);
|
||||
converter_map[QImage::Format_RGB888][QImage::Format_RGB32] = convert_RGB888_to_RGB32_neon;
|
||||
converter_map[QImage::Format_RGB888][QImage::Format_ARGB32] = convert_RGB888_to_RGB32_neon;
|
||||
converter_map[QImage::Format_RGB888][QImage::Format_ARGB32_Premultiplied] = convert_RGB888_to_RGB32_neon;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void qGamma_correct_back_to_linear_cs(QImage *image)
|
||||
{
|
||||
extern uchar qt_pow_rgb_gamma[256];
|
||||
|
|
|
@ -1,116 +0,0 @@
|
|||
/****************************************************************************
|
||||
**
|
||||
** Copyright (C) 2015 The Qt Company Ltd.
|
||||
** Contact: http://www.qt.io/licensing/
|
||||
**
|
||||
** This file is part of the QtGui module of the Qt Toolkit.
|
||||
**
|
||||
** $QT_BEGIN_LICENSE:LGPL$
|
||||
** Commercial License Usage
|
||||
** Licensees holding valid commercial Qt licenses may use this file in
|
||||
** accordance with the commercial license agreement provided with the
|
||||
** Software or, alternatively, in accordance with the terms contained in
|
||||
** a written agreement between you and The Qt Company. For licensing terms
|
||||
** and conditions see http://www.qt.io/terms-conditions. For further
|
||||
** information use the contact form at http://www.qt.io/contact-us.
|
||||
**
|
||||
** GNU Lesser General Public License Usage
|
||||
** Alternatively, this file may be used under the terms of the GNU Lesser
|
||||
** General Public License version 2.1 or version 3 as published by the Free
|
||||
** Software Foundation and appearing in the file LICENSE.LGPLv21 and
|
||||
** LICENSE.LGPLv3 included in the packaging of this file. Please review the
|
||||
** following information to ensure the GNU Lesser General Public License
|
||||
** requirements will be met: https://www.gnu.org/licenses/lgpl.html and
|
||||
** http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
|
||||
**
|
||||
** As a special exception, The Qt Company gives you certain additional
|
||||
** rights. These rights are described in The Qt Company LGPL Exception
|
||||
** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
|
||||
**
|
||||
** GNU General Public License Usage
|
||||
** Alternatively, this file may be used under the terms of the GNU
|
||||
** General Public License version 3.0 as published by the Free Software
|
||||
** Foundation and appearing in the file LICENSE.GPL included in the
|
||||
** packaging of this file. Please review the following information to
|
||||
** ensure the GNU General Public License version 3.0 requirements will be
|
||||
** met: http://www.gnu.org/copyleft/gpl.html.
|
||||
**
|
||||
** $QT_END_LICENSE$
|
||||
**
|
||||
****************************************************************************/
|
||||
|
||||
#include <qimage.h>
|
||||
#include <qimage_p.h>
|
||||
#include <qsimd_p.h>
|
||||
|
||||
#ifdef QT_HAVE_NEON
|
||||
|
||||
QT_BEGIN_NAMESPACE
|
||||
|
||||
Q_GUI_EXPORT void QT_FASTCALL qt_convert_rgb888_to_rgb32_neon(quint32 *dst, const uchar *src, int len)
|
||||
{
|
||||
if (!len)
|
||||
return;
|
||||
|
||||
const quint32 *const end = dst + len;
|
||||
|
||||
// align dst on 64 bits
|
||||
const int offsetToAlignOn8Bytes = (reinterpret_cast<quintptr>(dst) >> 2) & 0x1;
|
||||
for (int i = 0; i < offsetToAlignOn8Bytes; ++i) {
|
||||
*dst++ = qRgb(src[0], src[1], src[2]);
|
||||
src += 3;
|
||||
}
|
||||
|
||||
if ((len - offsetToAlignOn8Bytes) >= 8) {
|
||||
const quint32 *const simdEnd = end - 7;
|
||||
register uint8x8_t fullVector asm ("d3") = vdup_n_u8(0xff);
|
||||
do {
|
||||
#if Q_BYTE_ORDER == Q_BIG_ENDIAN
|
||||
asm volatile (
|
||||
"vld3.8 { d4, d5, d6 }, [%[SRC]] !\n\t"
|
||||
"vst4.8 { d3, d4, d5, d6 }, [%[DST],:64] !\n\t"
|
||||
: [DST]"+r" (dst), [SRC]"+r" (src)
|
||||
: "w"(fullVector)
|
||||
: "memory", "d4", "d5", "d6"
|
||||
);
|
||||
#else
|
||||
asm volatile (
|
||||
"vld3.8 { d0, d1, d2 }, [%[SRC]] !\n\t"
|
||||
"vswp d0, d2\n\t"
|
||||
"vst4.8 { d0, d1, d2, d3 }, [%[DST],:64] !\n\t"
|
||||
: [DST]"+r" (dst), [SRC]"+r" (src)
|
||||
: "w"(fullVector)
|
||||
: "memory", "d0", "d1", "d2"
|
||||
);
|
||||
#endif
|
||||
} while (dst < simdEnd);
|
||||
}
|
||||
|
||||
while (dst != end) {
|
||||
*dst++ = qRgb(src[0], src[1], src[2]);
|
||||
src += 3;
|
||||
}
|
||||
}
|
||||
|
||||
void convert_RGB888_to_RGB32_neon(QImageData *dest, const QImageData *src, Qt::ImageConversionFlags)
|
||||
{
|
||||
Q_ASSERT(src->format == QImage::Format_RGB888);
|
||||
Q_ASSERT(dest->format == QImage::Format_RGB32 || dest->format == QImage::Format_ARGB32 || dest->format == QImage::Format_ARGB32_Premultiplied);
|
||||
Q_ASSERT(src->width == dest->width);
|
||||
Q_ASSERT(src->height == dest->height);
|
||||
|
||||
const uchar *src_data = (uchar *) src->data;
|
||||
quint32 *dest_data = (quint32 *) dest->data;
|
||||
|
||||
for (int i = 0; i < src->height; ++i) {
|
||||
qt_convert_rgb888_to_rgb32_neon(dest_data, src_data, src->width);
|
||||
src_data += src->bytes_per_line;
|
||||
dest_data = (quint32 *)((uchar*)dest_data + dest->bytes_per_line);
|
||||
}
|
||||
}
|
||||
|
||||
QT_END_NAMESPACE
|
||||
|
||||
#endif // QT_HAVE_NEON
|
||||
|
||||
|
|
@ -107,7 +107,6 @@ struct Q_GUI_EXPORT QImageData { // internal image data
|
|||
QPaintEngine *paintEngine;
|
||||
};
|
||||
|
||||
void qInitImageConversions();
|
||||
Q_GUI_EXPORT void qGamma_correct_back_to_linear_cs(QImage *image);
|
||||
|
||||
inline int qt_depthForFormat(QImage::Format format)
|
||||
|
|
|
@ -1,111 +0,0 @@
|
|||
/****************************************************************************
|
||||
**
|
||||
** Copyright (C) 2015 The Qt Company Ltd.
|
||||
** Contact: http://www.qt.io/licensing/
|
||||
**
|
||||
** This file is part of the QtGui module of the Qt Toolkit.
|
||||
**
|
||||
** $QT_BEGIN_LICENSE:LGPL$
|
||||
** Commercial License Usage
|
||||
** Licensees holding valid commercial Qt licenses may use this file in
|
||||
** accordance with the commercial license agreement provided with the
|
||||
** Software or, alternatively, in accordance with the terms contained in
|
||||
** a written agreement between you and The Qt Company. For licensing terms
|
||||
** and conditions see http://www.qt.io/terms-conditions. For further
|
||||
** information use the contact form at http://www.qt.io/contact-us.
|
||||
**
|
||||
** GNU Lesser General Public License Usage
|
||||
** Alternatively, this file may be used under the terms of the GNU Lesser
|
||||
** General Public License version 2.1 or version 3 as published by the Free
|
||||
** Software Foundation and appearing in the file LICENSE.LGPLv21 and
|
||||
** LICENSE.LGPLv3 included in the packaging of this file. Please review the
|
||||
** following information to ensure the GNU Lesser General Public License
|
||||
** requirements will be met: https://www.gnu.org/licenses/lgpl.html and
|
||||
** http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
|
||||
**
|
||||
** As a special exception, The Qt Company gives you certain additional
|
||||
** rights. These rights are described in The Qt Company LGPL Exception
|
||||
** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
|
||||
**
|
||||
** GNU General Public License Usage
|
||||
** Alternatively, this file may be used under the terms of the GNU
|
||||
** General Public License version 3.0 as published by the Free Software
|
||||
** Foundation and appearing in the file LICENSE.GPL included in the
|
||||
** packaging of this file. Please review the following information to
|
||||
** ensure the GNU General Public License version 3.0 requirements will be
|
||||
** met: http://www.gnu.org/copyleft/gpl.html.
|
||||
**
|
||||
** $QT_END_LICENSE$
|
||||
**
|
||||
****************************************************************************/
|
||||
|
||||
#include "qimage.h"
|
||||
#include <qimage_p.h>
|
||||
#include <qsimd_p.h>
|
||||
#include <qdrawhelper_p.h>
|
||||
#include <qdrawingprimitive_sse2_p.h>
|
||||
|
||||
#ifdef QT_HAVE_SSE2
|
||||
|
||||
QT_BEGIN_NAMESPACE
|
||||
|
||||
bool convert_ARGB_to_ARGB_PM_inplace_sse2(QImageData *data, Qt::ImageConversionFlags)
|
||||
{
|
||||
Q_ASSERT(data->format == QImage::Format_ARGB32);
|
||||
|
||||
// extra pixels on each line
|
||||
const int spare = data->width & 3;
|
||||
// width in pixels of the pad at the end of each line
|
||||
const int pad = (data->bytes_per_line >> 2) - data->width;
|
||||
const int iter = data->width >> 2;
|
||||
int height = data->height;
|
||||
|
||||
const __m128i alphaMask = _mm_set1_epi32(0xff000000);
|
||||
const __m128i nullVector = _mm_setzero_si128();
|
||||
const __m128i half = _mm_set1_epi16(0x80);
|
||||
const __m128i colorMask = _mm_set1_epi32(0x00ff00ff);
|
||||
|
||||
__m128i *d = reinterpret_cast<__m128i*>(data->data);
|
||||
while (height--) {
|
||||
const __m128i *end = d + iter;
|
||||
|
||||
for (; d != end; ++d) {
|
||||
const __m128i srcVector = _mm_loadu_si128(d);
|
||||
const __m128i srcVectorAlpha = _mm_and_si128(srcVector, alphaMask);
|
||||
if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, alphaMask)) == 0xffff) {
|
||||
// opaque, data is unchanged
|
||||
} else if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, nullVector)) == 0xffff) {
|
||||
// fully transparent
|
||||
_mm_storeu_si128(d, nullVector);
|
||||
} else {
|
||||
__m128i alphaChannel = _mm_srli_epi32(srcVector, 24);
|
||||
alphaChannel = _mm_or_si128(alphaChannel, _mm_slli_epi32(alphaChannel, 16));
|
||||
|
||||
__m128i result;
|
||||
BYTE_MUL_SSE2(result, srcVector, alphaChannel, colorMask, half);
|
||||
result = _mm_or_si128(_mm_andnot_si128(alphaMask, result), srcVectorAlpha);
|
||||
_mm_storeu_si128(d, result);
|
||||
}
|
||||
}
|
||||
|
||||
QRgb *p = reinterpret_cast<QRgb*>(d);
|
||||
QRgb *pe = p+spare;
|
||||
for (; p != pe; ++p) {
|
||||
if (*p < 0x00ffffff)
|
||||
*p = 0;
|
||||
else if (*p < 0xff000000)
|
||||
*p = PREMUL(*p);
|
||||
}
|
||||
|
||||
d = reinterpret_cast<__m128i*>(p+pad);
|
||||
}
|
||||
|
||||
data->format = QImage::Format_ARGB32_Premultiplied;
|
||||
return true;
|
||||
}
|
||||
|
||||
QT_END_NAMESPACE
|
||||
|
||||
#endif // QT_HAVE_SSE2
|
||||
|
||||
|
|
@ -1,151 +0,0 @@
|
|||
/****************************************************************************
|
||||
**
|
||||
** Copyright (C) 2015 The Qt Company Ltd.
|
||||
** Contact: http://www.qt.io/licensing/
|
||||
**
|
||||
** This file is part of the QtGui module of the Qt Toolkit.
|
||||
**
|
||||
** $QT_BEGIN_LICENSE:LGPL$
|
||||
** Commercial License Usage
|
||||
** Licensees holding valid commercial Qt licenses may use this file in
|
||||
** accordance with the commercial license agreement provided with the
|
||||
** Software or, alternatively, in accordance with the terms contained in
|
||||
** a written agreement between you and The Qt Company. For licensing terms
|
||||
** and conditions see http://www.qt.io/terms-conditions. For further
|
||||
** information use the contact form at http://www.qt.io/contact-us.
|
||||
**
|
||||
** GNU Lesser General Public License Usage
|
||||
** Alternatively, this file may be used under the terms of the GNU Lesser
|
||||
** General Public License version 2.1 or version 3 as published by the Free
|
||||
** Software Foundation and appearing in the file LICENSE.LGPLv21 and
|
||||
** LICENSE.LGPLv3 included in the packaging of this file. Please review the
|
||||
** following information to ensure the GNU Lesser General Public License
|
||||
** requirements will be met: https://www.gnu.org/licenses/lgpl.html and
|
||||
** http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
|
||||
**
|
||||
** As a special exception, The Qt Company gives you certain additional
|
||||
** rights. These rights are described in The Qt Company LGPL Exception
|
||||
** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
|
||||
**
|
||||
** GNU General Public License Usage
|
||||
** Alternatively, this file may be used under the terms of the GNU
|
||||
** General Public License version 3.0 as published by the Free Software
|
||||
** Foundation and appearing in the file LICENSE.GPL included in the
|
||||
** packaging of this file. Please review the following information to
|
||||
** ensure the GNU General Public License version 3.0 requirements will be
|
||||
** met: http://www.gnu.org/copyleft/gpl.html.
|
||||
**
|
||||
** $QT_END_LICENSE$
|
||||
**
|
||||
****************************************************************************/
|
||||
|
||||
#include <qimage.h>
|
||||
#include <qimage_p.h>
|
||||
#include <qsimd_p.h>
|
||||
|
||||
#ifdef QT_HAVE_SSSE3
|
||||
|
||||
QT_BEGIN_NAMESPACE
|
||||
|
||||
// Convert a scanline of RGB888 (src) to RGB32 (dst)
|
||||
// src must be at least len * 3 bytes
|
||||
// dst must be at least len * 4 bytes
|
||||
Q_GUI_EXPORT void QT_FASTCALL qt_convert_rgb888_to_rgb32_ssse3(quint32 *dst, const uchar *src, int len)
|
||||
{
|
||||
quint32 *const end = dst + len;
|
||||
|
||||
// Prologue, align dst to 16 bytes. The alignment is done on dst because it has 4 store()
|
||||
// for each 3 load() of src.
|
||||
const int offsetToAlignOn16Bytes = (4 - ((reinterpret_cast<quintptr>(dst) >> 2) & 0x3)) & 0x3;
|
||||
const int prologLength = qMin(len, offsetToAlignOn16Bytes);
|
||||
|
||||
for (int i = 0; i < prologLength; ++i) {
|
||||
*dst++ = qRgb(src[0], src[1], src[2]);
|
||||
src += 3;
|
||||
}
|
||||
|
||||
// Mask the 4 first colors of the RGB888 vector
|
||||
const __m128i shuffleMask = _mm_set_epi8(char(0xff), 9, 10, 11, char(0xff), 6, 7, 8, char(0xff), 3, 4, 5, char(0xff), 0, 1, 2);
|
||||
|
||||
// Mask the 4 last colors of a RGB888 vector with an offset of 1 (so the last 3 bytes are RGB)
|
||||
const __m128i shuffleMaskEnd = _mm_set_epi8(char(0xff), 13, 14, 15, char(0xff), 10, 11, 12, char(0xff), 7, 8, 9, char(0xff), 4, 5, 6);
|
||||
|
||||
// Mask to have alpha = 0xff
|
||||
const __m128i alphaMask = _mm_set1_epi32(0xff000000);
|
||||
|
||||
__m128i *inVectorPtr = (__m128i *)src;
|
||||
__m128i *dstVectorPtr = (__m128i *)dst;
|
||||
|
||||
const int simdRoundCount = (len - prologLength) / 16; // one iteration in the loop converts 16 pixels
|
||||
for (int i = 0; i < simdRoundCount; ++i) {
|
||||
/*
|
||||
RGB888 has 5 pixels per vector, + 1 byte from the next pixel. The idea here is
|
||||
to load vectors of RGB888 and use palignr to select a vector out of two vectors.
|
||||
|
||||
After 3 loads of RGB888 and 3 stores of RGB32, we have 4 pixels left in the last
|
||||
vector of RGB888, we can mask it directly to get a last store or RGB32. After that,
|
||||
the first next byte is a R, and we can loop for the next 16 pixels.
|
||||
|
||||
The conversion itself is done with a byte permutation (pshufb).
|
||||
*/
|
||||
__m128i firstSrcVector = _mm_lddqu_si128(inVectorPtr);
|
||||
__m128i outputVector = _mm_shuffle_epi8(firstSrcVector, shuffleMask);
|
||||
_mm_store_si128(dstVectorPtr, _mm_or_si128(outputVector, alphaMask));
|
||||
++inVectorPtr;
|
||||
++dstVectorPtr;
|
||||
|
||||
// There are 4 unused bytes left in srcVector, we need to load the next 16 bytes
|
||||
// and load the next input with palignr
|
||||
__m128i secondSrcVector = _mm_lddqu_si128(inVectorPtr);
|
||||
__m128i srcVector = _mm_alignr_epi8(secondSrcVector, firstSrcVector, 12);
|
||||
outputVector = _mm_shuffle_epi8(srcVector, shuffleMask);
|
||||
_mm_store_si128(dstVectorPtr, _mm_or_si128(outputVector, alphaMask));
|
||||
++inVectorPtr;
|
||||
++dstVectorPtr;
|
||||
firstSrcVector = secondSrcVector;
|
||||
|
||||
// We now have 8 unused bytes left in firstSrcVector
|
||||
secondSrcVector = _mm_lddqu_si128(inVectorPtr);
|
||||
srcVector = _mm_alignr_epi8(secondSrcVector, firstSrcVector, 8);
|
||||
outputVector = _mm_shuffle_epi8(srcVector, shuffleMask);
|
||||
_mm_store_si128(dstVectorPtr, _mm_or_si128(outputVector, alphaMask));
|
||||
++inVectorPtr;
|
||||
++dstVectorPtr;
|
||||
|
||||
// There are now 12 unused bytes in firstSrcVector.
|
||||
// We can mask them directly, almost there.
|
||||
outputVector = _mm_shuffle_epi8(secondSrcVector, shuffleMaskEnd);
|
||||
_mm_store_si128(dstVectorPtr, _mm_or_si128(outputVector, alphaMask));
|
||||
++dstVectorPtr;
|
||||
}
|
||||
src = (uchar *)inVectorPtr;
|
||||
dst = (quint32 *)dstVectorPtr;
|
||||
|
||||
while (dst != end) {
|
||||
*dst++ = qRgb(src[0], src[1], src[2]);
|
||||
src += 3;
|
||||
}
|
||||
}
|
||||
|
||||
void convert_RGB888_to_RGB32_ssse3(QImageData *dest, const QImageData *src, Qt::ImageConversionFlags)
|
||||
{
|
||||
Q_ASSERT(src->format == QImage::Format_RGB888);
|
||||
Q_ASSERT(dest->format == QImage::Format_RGB32 || dest->format == QImage::Format_ARGB32 || dest->format == QImage::Format_ARGB32_Premultiplied);
|
||||
Q_ASSERT(src->width == dest->width);
|
||||
Q_ASSERT(src->height == dest->height);
|
||||
|
||||
const uchar *src_data = (uchar *) src->data;
|
||||
quint32 *dest_data = (quint32 *) dest->data;
|
||||
|
||||
for (int i = 0; i < src->height; ++i) {
|
||||
qt_convert_rgb888_to_rgb32_ssse3(dest_data, src_data, src->width);
|
||||
src_data += src->bytes_per_line;
|
||||
dest_data = (quint32 *)((uchar*)dest_data + dest->bytes_per_line);
|
||||
}
|
||||
}
|
||||
|
||||
QT_END_NAMESPACE
|
||||
|
||||
#endif // QT_HAVE_SSSE3
|
||||
|
||||
|
|
@ -45,7 +45,6 @@
|
|||
#include <qvariant.h>
|
||||
#include <qvector.h>
|
||||
#include <qbuffer.h>
|
||||
#include <qsimd_p.h>
|
||||
|
||||
#include <stdio.h> // jpeglib needs this to be pre-included
|
||||
#include <setjmp.h>
|
||||
|
@ -779,22 +778,6 @@ bool QJpegHandlerPrivate::read(QImage *image)
|
|||
QJpegHandler::QJpegHandler()
|
||||
: d(new QJpegHandlerPrivate(this))
|
||||
{
|
||||
const uint features = qDetectCPUFeatures();
|
||||
Q_UNUSED(features);
|
||||
#if defined(QT_HAVE_NEON)
|
||||
// from qimage_neon.cpp
|
||||
Q_GUI_EXPORT void QT_FASTCALL qt_convert_rgb888_to_rgb32_neon(quint32 *dst, const uchar *src, int len);
|
||||
|
||||
if (features & NEON)
|
||||
rgb888ToRgb32ConverterPtr = qt_convert_rgb888_to_rgb32_neon;
|
||||
#endif // QT_HAVE_NEON
|
||||
#if defined(QT_HAVE_SSSE3)
|
||||
// from qimage_ssse3.cpp
|
||||
Q_GUI_EXPORT void QT_FASTCALL qt_convert_rgb888_to_rgb32_ssse3(quint32 *dst, const uchar *src, int len);
|
||||
|
||||
if (features & SSSE3)
|
||||
rgb888ToRgb32ConverterPtr = qt_convert_rgb888_to_rgb32_ssse3;
|
||||
#endif // QT_HAVE_SSSE3
|
||||
}
|
||||
|
||||
QJpegHandler::~QJpegHandler()
|
||||
|
|
|
@ -40,22 +40,18 @@
|
|||
****************************************************************************/
|
||||
|
||||
#include "qpixmap.h"
|
||||
|
||||
#include <qfont_p.h>
|
||||
|
||||
#include "qfont_p.h"
|
||||
#include "qpixmap_raster_p.h"
|
||||
#include "qnativeimage_p.h"
|
||||
#include "qimage_p.h"
|
||||
#include "qpaintengine.h"
|
||||
|
||||
#include "qbitmap.h"
|
||||
#include "qimage.h"
|
||||
#include <QBuffer>
|
||||
#include <QImageReader>
|
||||
#include <qimage_p.h>
|
||||
#include <qsimd_p.h>
|
||||
#include <qwidget_p.h>
|
||||
#include <qdrawhelper_p.h>
|
||||
#include "qbuffer.h"
|
||||
#include "qimagereader.h"
|
||||
#include "qimage_p.h"
|
||||
#include "qwidget_p.h"
|
||||
#include "qdrawhelper_p.h"
|
||||
|
||||
QT_BEGIN_NAMESPACE
|
||||
|
||||
|
@ -163,7 +159,6 @@ void QRasterPixmapData::fill(const QColor &color)
|
|||
if (alpha != 255) {
|
||||
if (!image.hasAlphaChannel()) {
|
||||
QImage::Format toFormat;
|
||||
#if !(defined(QT_HAVE_NEON) || defined(QT_ALWAYS_HAVE_SSE2))
|
||||
if (image.format() == QImage::Format_RGB16)
|
||||
toFormat = QImage::Format_ARGB8565_Premultiplied;
|
||||
else if (image.format() == QImage::Format_RGB666)
|
||||
|
@ -173,7 +168,6 @@ void QRasterPixmapData::fill(const QColor &color)
|
|||
else if (image.format() == QImage::Format_RGB444)
|
||||
toFormat = QImage::Format_ARGB4444_Premultiplied;
|
||||
else
|
||||
#endif
|
||||
toFormat = QImage::Format_ARGB32_Premultiplied;
|
||||
|
||||
if (!image.isNull() && qt_depthForFormat(image.format()) == qt_depthForFormat(toFormat)) {
|
||||
|
@ -364,7 +358,6 @@ void QRasterPixmapData::createPixmapForImage(QImage &sourceImage, Qt::ImageConve
|
|||
QImage::Format opaqueFormat = QNativeImage::systemFormat();
|
||||
QImage::Format alphaFormat = QImage::Format_ARGB32_Premultiplied;
|
||||
|
||||
#if !defined(QT_HAVE_NEON) && !defined(QT_ALWAYS_HAVE_SSE2)
|
||||
switch (opaqueFormat) {
|
||||
case QImage::Format_RGB16:
|
||||
alphaFormat = QImage::Format_ARGB8565_Premultiplied;
|
||||
|
@ -372,7 +365,6 @@ void QRasterPixmapData::createPixmapForImage(QImage &sourceImage, Qt::ImageConve
|
|||
default: // We don't care about the others...
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (!sourceImage.hasAlphaChannel()) {
|
||||
format = opaqueFormat;
|
||||
|
|
|
@ -824,7 +824,6 @@ QApplication::QApplication(Display *dpy, int &argc, char **argv,
|
|||
#endif // Q_WS_X11
|
||||
|
||||
extern void qInitDrawhelperAsm();
|
||||
extern void qInitImageConversions();
|
||||
extern int qRegisterGuiVariant();
|
||||
extern int qUnregisterGuiVariant();
|
||||
#ifndef QT_NO_STATEMACHINE
|
||||
|
@ -881,8 +880,6 @@ void QApplicationPrivate::initialize()
|
|||
|
||||
// Set up which span functions should be used in raster engine...
|
||||
qInitDrawhelperAsm();
|
||||
// and QImage conversion functions
|
||||
qInitImageConversions();
|
||||
|
||||
#ifndef QT_NO_WHEELEVENT
|
||||
QApplicationPrivate::wheel_scroll_lines = 3;
|
||||
|
|
|
@ -41,7 +41,6 @@ set(GUI_HEADERS
|
|||
${CMAKE_CURRENT_SOURCE_DIR}/painting/qtextureglyphcache_p.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/painting/qtransform.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/painting/qwindowsurface_p.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/painting/qwmatrix.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/painting/qpaintengine_raster_p.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/painting/qdrawhelper_p.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/painting/qblendfunctions_p.h
|
||||
|
@ -56,14 +55,6 @@ set(GUI_HEADERS
|
|||
${CMAKE_CURRENT_SOURCE_DIR}/painting/qgraphicssystemfactory_p.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/painting/qgraphicssystemplugin_p.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/painting/qwindowsurface_raster_p.h
|
||||
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/painting/qdrawhelper_x86_p.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/painting/qdrawhelper_mmx_p.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/painting/qdrawhelper_sse_p.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/painting/qdrawingprimitive_sse2_p.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/painting/qdrawhelper_neon_p.h
|
||||
|
||||
# XXX: obsolete?
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/painting/qrgb.h
|
||||
)
|
||||
|
||||
|
@ -113,20 +104,6 @@ set(GUI_SOURCES
|
|||
${CMAKE_CURRENT_SOURCE_DIR}/painting/qwindowsurface_raster.cpp
|
||||
)
|
||||
|
||||
katie_setup_sources(
|
||||
GUI_SOURCES
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/painting/qdrawhelper_mmx.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/painting/qdrawhelper_mmx3dnow.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/painting/qdrawhelper_sse3dnow.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/painting/qdrawhelper_sse.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/painting/qdrawhelper_sse2.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/painting/qdrawhelper_ssse3.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/painting/qdrawhelper_iwmmxt.cpp
|
||||
# TODO: link to pixman for arm-neon?
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/painting/qdrawhelper_neon.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/painting/qdrawhelper_neon_asm.S
|
||||
)
|
||||
|
||||
if(WITH_X11 AND X11_FOUND)
|
||||
set(GUI_HEADERS
|
||||
${GUI_HEADERS}
|
||||
|
|
|
@ -42,11 +42,6 @@
|
|||
#include <qdrawhelper_p.h>
|
||||
#include <qpaintengine_raster_p.h>
|
||||
#include <qpainter_p.h>
|
||||
#include <qdrawhelper_x86_p.h>
|
||||
#ifdef QT_HAVE_ARM_SIMD
|
||||
#include <qdrawhelper_arm_simd_p.h>
|
||||
#endif
|
||||
#include <qdrawhelper_neon_p.h>
|
||||
#include <qmath_p.h>
|
||||
#include <qmath.h>
|
||||
|
||||
|
@ -630,76 +625,6 @@ static inline uint interpolate_4_pixels_16(uint tl, uint tr, uint bl, uint br, i
|
|||
return (((tlrb + trrb + blrb + brrb) >> 8) & 0x00ff00ff) | ((tlag + trag + blag + brag) & 0xff00ff00);
|
||||
}
|
||||
|
||||
#if defined(QT_ALWAYS_HAVE_SSE2)
|
||||
#define interpolate_4_pixels_16_sse2(tl, tr, bl, br, distx, disty, colorMask, v_256, b) \
|
||||
{ \
|
||||
const __m128i dxdy = _mm_mullo_epi16 (distx, disty); \
|
||||
const __m128i distx_ = _mm_slli_epi16(distx, 4); \
|
||||
const __m128i disty_ = _mm_slli_epi16(disty, 4); \
|
||||
const __m128i idxidy = _mm_add_epi16(dxdy, _mm_sub_epi16(v_256, _mm_add_epi16(distx_, disty_))); \
|
||||
const __m128i dxidy = _mm_sub_epi16(distx_, dxdy); \
|
||||
const __m128i idxdy = _mm_sub_epi16(disty_, dxdy); \
|
||||
\
|
||||
__m128i tlAG = _mm_srli_epi16(tl, 8); \
|
||||
__m128i tlRB = _mm_and_si128(tl, colorMask); \
|
||||
__m128i trAG = _mm_srli_epi16(tr, 8); \
|
||||
__m128i trRB = _mm_and_si128(tr, colorMask); \
|
||||
__m128i blAG = _mm_srli_epi16(bl, 8); \
|
||||
__m128i blRB = _mm_and_si128(bl, colorMask); \
|
||||
__m128i brAG = _mm_srli_epi16(br, 8); \
|
||||
__m128i brRB = _mm_and_si128(br, colorMask); \
|
||||
\
|
||||
tlAG = _mm_mullo_epi16(tlAG, idxidy); \
|
||||
tlRB = _mm_mullo_epi16(tlRB, idxidy); \
|
||||
trAG = _mm_mullo_epi16(trAG, dxidy); \
|
||||
trRB = _mm_mullo_epi16(trRB, dxidy); \
|
||||
blAG = _mm_mullo_epi16(blAG, idxdy); \
|
||||
blRB = _mm_mullo_epi16(blRB, idxdy); \
|
||||
brAG = _mm_mullo_epi16(brAG, dxdy); \
|
||||
brRB = _mm_mullo_epi16(brRB, dxdy); \
|
||||
\
|
||||
/* Add the values, and shift to only keep 8 significant bits per colors */ \
|
||||
__m128i rAG =_mm_add_epi16(_mm_add_epi16(tlAG, trAG), _mm_add_epi16(blAG, brAG)); \
|
||||
__m128i rRB =_mm_add_epi16(_mm_add_epi16(tlRB, trRB), _mm_add_epi16(blRB, brRB)); \
|
||||
rAG = _mm_andnot_si128(colorMask, rAG); \
|
||||
rRB = _mm_srli_epi16(rRB, 8); \
|
||||
_mm_storeu_si128((__m128i*)(b), _mm_or_si128(rAG, rRB)); \
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(QT_ALWAYS_HAVE_NEON)
|
||||
#define interpolate_4_pixels_16_neon(tl, tr, bl, br, distx, disty, disty_, colorMask, invColorMask, v_256, b) \
|
||||
{ \
|
||||
const int16x8_t dxdy = vmulq_s16(distx, disty); \
|
||||
const int16x8_t distx_ = vshlq_n_s16(distx, 4); \
|
||||
const int16x8_t idxidy = vaddq_s16(dxdy, vsubq_s16(v_256, vaddq_s16(distx_, disty_))); \
|
||||
const int16x8_t dxidy = vsubq_s16(distx_, dxdy); \
|
||||
const int16x8_t idxdy = vsubq_s16(disty_, dxdy); \
|
||||
\
|
||||
int16x8_t tlAG = vreinterpretq_s16_u16(vshrq_n_u16(vreinterpretq_u16_s16(tl), 8)); \
|
||||
int16x8_t tlRB = vandq_s16(tl, colorMask); \
|
||||
int16x8_t trAG = vreinterpretq_s16_u16(vshrq_n_u16(vreinterpretq_u16_s16(tr), 8)); \
|
||||
int16x8_t trRB = vandq_s16(tr, colorMask); \
|
||||
int16x8_t blAG = vreinterpretq_s16_u16(vshrq_n_u16(vreinterpretq_u16_s16(bl), 8)); \
|
||||
int16x8_t blRB = vandq_s16(bl, colorMask); \
|
||||
int16x8_t brAG = vreinterpretq_s16_u16(vshrq_n_u16(vreinterpretq_u16_s16(br), 8)); \
|
||||
int16x8_t brRB = vandq_s16(br, colorMask); \
|
||||
\
|
||||
int16x8_t rAG = vmulq_s16(tlAG, idxidy); \
|
||||
int16x8_t rRB = vmulq_s16(tlRB, idxidy); \
|
||||
rAG = vmlaq_s16(rAG, trAG, dxidy); \
|
||||
rRB = vmlaq_s16(rRB, trRB, dxidy); \
|
||||
rAG = vmlaq_s16(rAG, blAG, idxdy); \
|
||||
rRB = vmlaq_s16(rRB, blRB, idxdy); \
|
||||
rAG = vmlaq_s16(rAG, brAG, dxdy); \
|
||||
rRB = vmlaq_s16(rRB, brRB, dxdy); \
|
||||
\
|
||||
rAG = vandq_s16(invColorMask, rAG); \
|
||||
rRB = vreinterpretq_s16_u16(vshrq_n_u16(vreinterpretq_u16_s16(rRB), 8)); \
|
||||
vst1q_s16((int16_t*)(b), vorrq_s16(rAG, rRB)); \
|
||||
}
|
||||
#endif
|
||||
|
||||
template<TextureBlendType blendType>
|
||||
Q_STATIC_TEMPLATE_FUNCTION inline void fetchTransformedBilinear_pixelBounds(int max, int l1, int l2, int &v1, int &v2)
|
||||
{
|
||||
|
@ -806,70 +731,6 @@ const uint * QT_FASTCALL fetchTransformedBilinear(uint *buffer, const Operator *
|
|||
}
|
||||
}
|
||||
|
||||
if (blendType != BlendTransformedBilinearTiled &&
|
||||
(format == QImage::Format_ARGB32_Premultiplied || format == QImage::Format_RGB32)) {
|
||||
#if defined(QT_ALWAYS_HAVE_SSE2)
|
||||
const __m128i disty_ = _mm_set1_epi16(disty);
|
||||
const __m128i idisty_ = _mm_set1_epi16(idisty);
|
||||
const __m128i colorMask = _mm_set1_epi32(0x00ff00ff);
|
||||
|
||||
lim -= 3;
|
||||
for (; f < lim; x += 4, f += 4) {
|
||||
// Load 4 pixels from s1, and split the alpha-green and red-blue component
|
||||
__m128i top = _mm_loadu_si128((__m128i*)((const uint *)(s1)+x));
|
||||
__m128i topAG = _mm_srli_epi16(top, 8);
|
||||
__m128i topRB = _mm_and_si128(top, colorMask);
|
||||
// Multiplies each colour component by idisty
|
||||
topAG = _mm_mullo_epi16 (topAG, idisty_);
|
||||
topRB = _mm_mullo_epi16 (topRB, idisty_);
|
||||
|
||||
// Same for the s2 vector
|
||||
__m128i bottom = _mm_loadu_si128((__m128i*)((const uint *)(s2)+x));
|
||||
__m128i bottomAG = _mm_srli_epi16(bottom, 8);
|
||||
__m128i bottomRB = _mm_and_si128(bottom, colorMask);
|
||||
bottomAG = _mm_mullo_epi16 (bottomAG, disty_);
|
||||
bottomRB = _mm_mullo_epi16 (bottomRB, disty_);
|
||||
|
||||
// Add the values, and shift to only keep 8 significant bits per colors
|
||||
__m128i rAG =_mm_add_epi16(topAG, bottomAG);
|
||||
rAG = _mm_srli_epi16(rAG, 8);
|
||||
_mm_storeu_si128((__m128i*)(&intermediate_buffer[1][f]), rAG);
|
||||
__m128i rRB =_mm_add_epi16(topRB, bottomRB);
|
||||
rRB = _mm_srli_epi16(rRB, 8);
|
||||
_mm_storeu_si128((__m128i*)(&intermediate_buffer[0][f]), rRB);
|
||||
}
|
||||
#elif defined(QT_ALWAYS_HAVE_NEON)
|
||||
const int16x8_t disty_ = vdupq_n_s16(disty);
|
||||
const int16x8_t idisty_ = vdupq_n_s16(idisty);
|
||||
const int16x8_t colorMask = vdupq_n_s16(0x00ff);
|
||||
|
||||
lim -= 3;
|
||||
for (; f < lim; x += 4, f += 4) {
|
||||
// Load 4 pixels from s1, and split the alpha-green and red-blue component
|
||||
int16x8_t top = vld1q_s16((int16_t*)((const uint *)(s1)+x));
|
||||
int16x8_t topAG = vreinterpretq_s16_u16(vshrq_n_u16(vreinterpretq_u16_s16(top), 8));
|
||||
int16x8_t topRB = vandq_s16(top, colorMask);
|
||||
// Multiplies each colour component by idisty
|
||||
topAG = vmulq_s16(topAG, idisty_);
|
||||
topRB = vmulq_s16(topRB, idisty_);
|
||||
|
||||
// Same for the s2 vector
|
||||
int16x8_t bottom = vld1q_s16((int16_t*)((const uint *)(s2)+x));
|
||||
int16x8_t bottomAG = vreinterpretq_s16_u16(vshrq_n_u16(vreinterpretq_u16_s16(bottom), 8));
|
||||
int16x8_t bottomRB = vandq_s16(bottom, colorMask);
|
||||
bottomAG = vmulq_s16(bottomAG, disty_);
|
||||
bottomRB = vmulq_s16(bottomRB, disty_);
|
||||
|
||||
// Add the values, and shift to only keep 8 significant bits per colors
|
||||
int16x8_t rAG = vaddq_s16(topAG, bottomAG);
|
||||
rAG = vreinterpretq_s16_u16(vshrq_n_u16(vreinterpretq_u16_s16(rAG), 8));
|
||||
vst1q_s16((int16_t*)(&intermediate_buffer[1][f]), rAG);
|
||||
int16x8_t rRB = vaddq_s16(topRB, bottomRB);
|
||||
rRB = vreinterpretq_s16_u16(vshrq_n_u16(vreinterpretq_u16_s16(rRB), 8));
|
||||
vst1q_s16((int16_t*)(&intermediate_buffer[0][f]), rRB);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
for (; f < count; f++) { // Same as above but without sse2
|
||||
if (blendType == BlendTransformedBilinearTiled) {
|
||||
if (x >= image_width) x -= image_width;
|
||||
|
@ -936,123 +797,6 @@ const uint * QT_FASTCALL fetchTransformedBilinear(uint *buffer, const Operator *
|
|||
const uchar *s2 = data->texture.scanLine(y2);
|
||||
int disty = (fy & 0x0000ffff) >> 12;
|
||||
|
||||
if (blendType != BlendTransformedBilinearTiled &&
|
||||
(format == QImage::Format_ARGB32_Premultiplied || format == QImage::Format_RGB32)) {
|
||||
|
||||
#define BILINEAR_DOWNSCALE_BOUNDS_PROLOG \
|
||||
while (b < end) { \
|
||||
int x1 = (fx >> 16); \
|
||||
int x2; \
|
||||
fetchTransformedBilinear_pixelBounds<blendType>(image_width, image_x1, image_x2, x1, x2); \
|
||||
if (x1 != x2) \
|
||||
break; \
|
||||
uint tl = fetch(s1, x1, data->texture.colorTable); \
|
||||
uint tr = fetch(s1, x2, data->texture.colorTable); \
|
||||
uint bl = fetch(s2, x1, data->texture.colorTable); \
|
||||
uint br = fetch(s2, x2, data->texture.colorTable); \
|
||||
int distx = (fx & 0x0000ffff) >> 12; \
|
||||
*b = interpolate_4_pixels_16(tl, tr, bl, br, distx, disty); \
|
||||
fx += fdx; \
|
||||
++b; \
|
||||
} \
|
||||
uint *boundedEnd; \
|
||||
if (fdx > 0) \
|
||||
boundedEnd = qMin(end, buffer + uint((image_x2 - (fx >> 16)) / data->m11)); \
|
||||
else \
|
||||
boundedEnd = qMin(end, buffer + uint((image_x1 - (fx >> 16)) / data->m11)); \
|
||||
boundedEnd -= 3;
|
||||
|
||||
#if defined(QT_ALWAYS_HAVE_SSE2)
|
||||
BILINEAR_DOWNSCALE_BOUNDS_PROLOG
|
||||
|
||||
const __m128i colorMask = _mm_set1_epi32(0x00ff00ff);
|
||||
const __m128i v_256 = _mm_set1_epi16(256);
|
||||
const __m128i v_disty = _mm_set1_epi16(disty);
|
||||
__m128i v_fdx = _mm_set1_epi32(fdx*4);
|
||||
|
||||
ptrdiff_t secondLine = reinterpret_cast<const uint *>(s2) - reinterpret_cast<const uint *>(s1);
|
||||
|
||||
union Vect_buffer { __m128i vect; quint32 i[4]; };
|
||||
Vect_buffer v_fx;
|
||||
|
||||
for (int i = 0; i < 4; i++) {
|
||||
v_fx.i[i] = fx;
|
||||
fx += fdx;
|
||||
}
|
||||
|
||||
while (b < boundedEnd) {
|
||||
|
||||
Vect_buffer tl, tr, bl, br;
|
||||
|
||||
for (int i = 0; i < 4; i++) {
|
||||
int x1 = v_fx.i[i] >> 16;
|
||||
const uint *addr_tl = reinterpret_cast<const uint *>(s1) + x1;
|
||||
const uint *addr_tr = addr_tl + 1;
|
||||
tl.i[i] = *addr_tl;
|
||||
tr.i[i] = *addr_tr;
|
||||
bl.i[i] = *(addr_tl+secondLine);
|
||||
br.i[i] = *(addr_tr+secondLine);
|
||||
}
|
||||
__m128i v_distx = _mm_srli_epi16(v_fx.vect, 12);
|
||||
v_distx = _mm_shufflehi_epi16(v_distx, _MM_SHUFFLE(2,2,0,0));
|
||||
v_distx = _mm_shufflelo_epi16(v_distx, _MM_SHUFFLE(2,2,0,0));
|
||||
|
||||
interpolate_4_pixels_16_sse2(tl.vect, tr.vect, bl.vect, br.vect, v_distx, v_disty, colorMask, v_256, b);
|
||||
b+=4;
|
||||
v_fx.vect = _mm_add_epi32(v_fx.vect, v_fdx);
|
||||
}
|
||||
fx = v_fx.i[0];
|
||||
#elif defined(QT_ALWAYS_HAVE_NEON)
|
||||
BILINEAR_DOWNSCALE_BOUNDS_PROLOG
|
||||
|
||||
const int16x8_t colorMask = vdupq_n_s16(0x00ff);
|
||||
const int16x8_t invColorMask = vmvnq_s16(colorMask);
|
||||
const int16x8_t v_256 = vdupq_n_s16(256);
|
||||
const int16x8_t v_disty = vdupq_n_s16(disty);
|
||||
const int16x8_t v_disty_ = vshlq_n_s16(v_disty, 4);
|
||||
int32x4_t v_fdx = vdupq_n_s32(fdx*4);
|
||||
|
||||
ptrdiff_t secondLine = reinterpret_cast<const uint *>(s2) - reinterpret_cast<const uint *>(s1);
|
||||
|
||||
union Vect_buffer { int32x4_t vect; quint32 i[4]; };
|
||||
Vect_buffer v_fx;
|
||||
|
||||
for (int i = 0; i < 4; i++) {
|
||||
v_fx.i[i] = fx;
|
||||
fx += fdx;
|
||||
}
|
||||
|
||||
const int32x4_t v_ffff_mask = vdupq_n_s32(0x0000ffff);
|
||||
|
||||
while (b < boundedEnd) {
|
||||
|
||||
Vect_buffer tl, tr, bl, br;
|
||||
|
||||
Vect_buffer v_fx_shifted;
|
||||
v_fx_shifted.vect = vshrq_n_s32(v_fx.vect, 16);
|
||||
|
||||
int32x4_t v_distx = vshrq_n_s32(vandq_s32(v_fx.vect, v_ffff_mask), 12);
|
||||
|
||||
for (int i = 0; i < 4; i++) {
|
||||
int x1 = v_fx_shifted.i[i];
|
||||
const uint *addr_tl = reinterpret_cast<const uint *>(s1) + x1;
|
||||
const uint *addr_tr = addr_tl + 1;
|
||||
tl.i[i] = *addr_tl;
|
||||
tr.i[i] = *addr_tr;
|
||||
bl.i[i] = *(addr_tl+secondLine);
|
||||
br.i[i] = *(addr_tr+secondLine);
|
||||
}
|
||||
|
||||
v_distx = vorrq_s32(v_distx, vshlq_n_s32(v_distx, 16));
|
||||
|
||||
interpolate_4_pixels_16_neon(vreinterpretq_s16_s32(tl.vect), vreinterpretq_s16_s32(tr.vect), vreinterpretq_s16_s32(bl.vect), vreinterpretq_s16_s32(br.vect), vreinterpretq_s16_s32(v_distx), v_disty, v_disty_, colorMask, invColorMask, v_256, b);
|
||||
b+=4;
|
||||
v_fx.vect = vaddq_s32(v_fx.vect, v_fdx);
|
||||
}
|
||||
fx = v_fx.i[0];
|
||||
#endif
|
||||
}
|
||||
|
||||
while (b < end) {
|
||||
int x1 = (fx >> 16);
|
||||
int x2;
|
||||
|
@ -1493,32 +1237,11 @@ static const uint * QT_FASTCALL qt_fetch_conical_gradient(uint *buffer, const Op
|
|||
return b;
|
||||
}
|
||||
|
||||
#if defined(Q_CC_RVCT)
|
||||
// Force ARM code generation for comp_func_* -methods
|
||||
# pragma push
|
||||
# pragma arm
|
||||
# if defined(QT_HAVE_ARMV6)
|
||||
static __forceinline void preload(const uint *start)
|
||||
{
|
||||
asm( "pld [start]" );
|
||||
}
|
||||
static const uint L2CacheLineLength = 32;
|
||||
static const uint L2CacheLineLengthInInts = L2CacheLineLength/sizeof(uint);
|
||||
# define PRELOAD_INIT(x) preload(x);
|
||||
# define PRELOAD_INIT2(x,y) PRELOAD_INIT(x) PRELOAD_INIT(y)
|
||||
# define PRELOAD_COND(x) if (((uint)&x[i])%L2CacheLineLength == 0) preload(&x[i] + L2CacheLineLengthInInts);
|
||||
// Two consecutive preloads stall, so space them out a bit by using different modulus.
|
||||
# define PRELOAD_COND2(x,y) if (((uint)&x[i])%L2CacheLineLength == 0) preload(&x[i] + L2CacheLineLengthInInts); \
|
||||
if (((uint)&y[i])%L2CacheLineLength == 16) preload(&y[i] + L2CacheLineLengthInInts);
|
||||
# endif // QT_HAVE_ARMV6
|
||||
#endif // Q_CC_RVCT
|
||||
|
||||
#if !defined(Q_CC_RVCT) || !defined(QT_HAVE_ARMV6)
|
||||
# define PRELOAD_INIT(x)
|
||||
# define PRELOAD_INIT2(x,y)
|
||||
# define PRELOAD_COND(x)
|
||||
# define PRELOAD_COND2(x,y)
|
||||
#endif
|
||||
// TODO: get rid of those
|
||||
#define PRELOAD_INIT(x)
|
||||
#define PRELOAD_INIT2(x,y)
|
||||
#define PRELOAD_COND(x)
|
||||
#define PRELOAD_COND2(x,y)
|
||||
|
||||
/* The constant alpha factor describes an alpha factor that gets applied
|
||||
to the result of the composition operation combining it with the destination.
|
||||
|
@ -6918,218 +6641,6 @@ void qInitDrawhelperAsm()
|
|||
qt_memfill32 = qt_memfill_template<quint32, quint32>;
|
||||
qt_memfill16 = qt_memfill_quint16; //qt_memfill_template<quint16, quint16>;
|
||||
|
||||
CompositionFunction *functionForModeAsm = 0;
|
||||
CompositionFunctionSolid *functionForModeSolidAsm = 0;
|
||||
|
||||
const uint features = qDetectCPUFeatures();
|
||||
if (false) {
|
||||
#ifdef QT_HAVE_SSE2
|
||||
} else if (features & SSE2) {
|
||||
qt_memfill32 = qt_memfill32_sse2;
|
||||
qt_memfill16 = qt_memfill16_sse2;
|
||||
qDrawHelper[QImage::Format_RGB32].bitmapBlit = qt_bitmapblit32_sse2;
|
||||
qDrawHelper[QImage::Format_ARGB32].bitmapBlit = qt_bitmapblit32_sse2;
|
||||
qDrawHelper[QImage::Format_ARGB32_Premultiplied].bitmapBlit = qt_bitmapblit32_sse2;
|
||||
qDrawHelper[QImage::Format_RGB16].bitmapBlit = qt_bitmapblit16_sse2;
|
||||
#endif
|
||||
#ifdef QT_HAVE_SSE
|
||||
} else if (features & SSE) {
|
||||
// qt_memfill32 = qt_memfill32_sse;
|
||||
qDrawHelper[QImage::Format_RGB16].bitmapBlit = qt_bitmapblit16_sse;
|
||||
#ifdef QT_HAVE_3DNOW
|
||||
if (features & MMX3DNOW) {
|
||||
qt_memfill32 = qt_memfill32_sse3dnow;
|
||||
qDrawHelper[QImage::Format_RGB16].bitmapBlit = qt_bitmapblit16_sse3dnow;
|
||||
}
|
||||
#endif
|
||||
#endif // SSE
|
||||
}
|
||||
#ifdef QT_HAVE_MMX
|
||||
if (features & MMX) {
|
||||
functionForModeAsm = qt_functionForMode_MMX;
|
||||
|
||||
functionForModeSolidAsm = qt_functionForModeSolid_MMX;
|
||||
qDrawHelper[QImage::Format_ARGB32_Premultiplied].blendColor = qt_blend_color_argb_mmx;
|
||||
#ifdef QT_HAVE_3DNOW
|
||||
if (features & MMX3DNOW) {
|
||||
functionForModeAsm = qt_functionForMode_MMX3DNOW;
|
||||
functionForModeSolidAsm = qt_functionForModeSolid_MMX3DNOW;
|
||||
qDrawHelper[QImage::Format_ARGB32_Premultiplied].blendColor = qt_blend_color_argb_mmx3dnow;
|
||||
}
|
||||
#endif // 3DNOW
|
||||
|
||||
extern void qt_blend_rgb32_on_rgb32_mmx(uchar *destPixels, int dbpl,
|
||||
const uchar *srcPixels, int sbpl,
|
||||
int w, int h,
|
||||
int const_alpha);
|
||||
extern void qt_blend_argb32_on_argb32_mmx(uchar *destPixels, int dbpl,
|
||||
const uchar *srcPixels, int sbpl,
|
||||
int w, int h,
|
||||
int const_alpha);
|
||||
|
||||
qBlendFunctions[QImage::Format_RGB32][QImage::Format_RGB32] = qt_blend_rgb32_on_rgb32_mmx;
|
||||
qBlendFunctions[QImage::Format_ARGB32_Premultiplied][QImage::Format_RGB32] = qt_blend_rgb32_on_rgb32_mmx;
|
||||
qBlendFunctions[QImage::Format_RGB32][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32_on_argb32_mmx;
|
||||
qBlendFunctions[QImage::Format_ARGB32_Premultiplied][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32_on_argb32_mmx;
|
||||
|
||||
}
|
||||
#endif // MMX
|
||||
|
||||
#ifdef QT_HAVE_SSE
|
||||
if (features & SSE) {
|
||||
extern void qt_blend_rgb32_on_rgb32_sse(uchar *destPixels, int dbpl,
|
||||
const uchar *srcPixels, int sbpl,
|
||||
int w, int h,
|
||||
int const_alpha);
|
||||
extern void qt_blend_argb32_on_argb32_sse(uchar *destPixels, int dbpl,
|
||||
const uchar *srcPixels, int sbpl,
|
||||
int w, int h,
|
||||
int const_alpha);
|
||||
|
||||
qBlendFunctions[QImage::Format_RGB32][QImage::Format_RGB32] = qt_blend_rgb32_on_rgb32_sse;
|
||||
qBlendFunctions[QImage::Format_ARGB32_Premultiplied][QImage::Format_RGB32] = qt_blend_rgb32_on_rgb32_sse;
|
||||
qBlendFunctions[QImage::Format_RGB32][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32_on_argb32_sse;
|
||||
qBlendFunctions[QImage::Format_ARGB32_Premultiplied][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32_on_argb32_sse;
|
||||
}
|
||||
#endif // SSE
|
||||
|
||||
#ifdef QT_HAVE_SSE2
|
||||
if (features & SSE2) {
|
||||
extern void qt_blend_rgb32_on_rgb32_sse2(uchar *destPixels, int dbpl,
|
||||
const uchar *srcPixels, int sbpl,
|
||||
int w, int h,
|
||||
int const_alpha);
|
||||
extern void qt_blend_argb32_on_argb32_sse2(uchar *destPixels, int dbpl,
|
||||
const uchar *srcPixels, int sbpl,
|
||||
int w, int h,
|
||||
int const_alpha);
|
||||
|
||||
qBlendFunctions[QImage::Format_RGB32][QImage::Format_RGB32] = qt_blend_rgb32_on_rgb32_sse2;
|
||||
qBlendFunctions[QImage::Format_ARGB32_Premultiplied][QImage::Format_RGB32] = qt_blend_rgb32_on_rgb32_sse2;
|
||||
qBlendFunctions[QImage::Format_RGB32][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32_on_argb32_sse2;
|
||||
qBlendFunctions[QImage::Format_ARGB32_Premultiplied][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32_on_argb32_sse2;
|
||||
|
||||
extern const uint * QT_FASTCALL qt_fetch_radial_gradient_sse2(uint *buffer, const Operator *op, const QSpanData *data,
|
||||
int y, int x, int length);
|
||||
|
||||
qt_fetch_radial_gradient = qt_fetch_radial_gradient_sse2;
|
||||
}
|
||||
|
||||
#ifdef QT_HAVE_SSSE3
|
||||
if (features & SSSE3) {
|
||||
extern void qt_blend_argb32_on_argb32_ssse3(uchar *destPixels, int dbpl,
|
||||
const uchar *srcPixels, int sbpl,
|
||||
int w, int h,
|
||||
int const_alpha);
|
||||
|
||||
qBlendFunctions[QImage::Format_RGB32][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32_on_argb32_ssse3;
|
||||
qBlendFunctions[QImage::Format_ARGB32_Premultiplied][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32_on_argb32_ssse3;
|
||||
}
|
||||
#endif // SSSE3
|
||||
|
||||
#endif // SSE2
|
||||
|
||||
#ifdef QT_HAVE_SSE
|
||||
if (features & SSE) {
|
||||
functionForModeAsm = qt_functionForMode_SSE;
|
||||
functionForModeSolidAsm = qt_functionForModeSolid_SSE;
|
||||
qDrawHelper[QImage::Format_ARGB32_Premultiplied].blendColor = qt_blend_color_argb_sse;
|
||||
#ifdef QT_HAVE_3DNOW
|
||||
if (features & MMX3DNOW) {
|
||||
functionForModeAsm = qt_functionForMode_SSE3DNOW;
|
||||
functionForModeSolidAsm = qt_functionForModeSolid_SSE3DNOW;
|
||||
qDrawHelper[QImage::Format_ARGB32_Premultiplied].blendColor = qt_blend_color_argb_sse3dnow;
|
||||
}
|
||||
#endif // 3DNOW
|
||||
|
||||
|
||||
#ifdef QT_HAVE_SSE2
|
||||
if (features & SSE2) {
|
||||
extern void QT_FASTCALL comp_func_SourceOver_sse2(uint *destPixels,
|
||||
const uint *srcPixels,
|
||||
int length,
|
||||
uint const_alpha);
|
||||
extern void QT_FASTCALL comp_func_solid_SourceOver_sse2(uint *destPixels, int length, uint color, uint const_alpha);
|
||||
extern void QT_FASTCALL comp_func_Plus_sse2(uint *dst, const uint *src, int length, uint const_alpha);
|
||||
extern void QT_FASTCALL comp_func_Source_sse2(uint *dst, const uint *src, int length, uint const_alpha);
|
||||
|
||||
functionForModeAsm[0] = comp_func_SourceOver_sse2;
|
||||
functionForModeAsm[QPainter::CompositionMode_Source] = comp_func_Source_sse2;
|
||||
functionForModeAsm[QPainter::CompositionMode_Plus] = comp_func_Plus_sse2;
|
||||
functionForModeSolidAsm[0] = comp_func_solid_SourceOver_sse2;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
#elif defined(QT_HAVE_SSE2)
|
||||
// this is the special case when SSE2 is usable but MMX/SSE is not usable (e.g.: Windows x64 + visual studio)
|
||||
if (features & SSE2) {
|
||||
functionForModeAsm = qt_functionForMode_onlySSE2;
|
||||
functionForModeSolidAsm = qt_functionForModeSolid_onlySSE2;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef QT_HAVE_IWMMXT
|
||||
if (features & IWMMXT) {
|
||||
functionForModeAsm = qt_functionForMode_IWMMXT;
|
||||
functionForModeSolidAsm = qt_functionForModeSolid_IWMMXT;
|
||||
qDrawHelper[QImage::Format_ARGB32_Premultiplied].blendColor = qt_blend_color_argb_iwmmxt;
|
||||
}
|
||||
#endif // IWMMXT
|
||||
|
||||
#if defined(QT_HAVE_ARM_SIMD)
|
||||
qBlendFunctions[QImage::Format_RGB32][QImage::Format_RGB32] = qt_blend_rgb32_on_rgb32_arm_simd;
|
||||
qBlendFunctions[QImage::Format_ARGB32_Premultiplied][QImage::Format_RGB32] = qt_blend_rgb32_on_rgb32_arm_simd;
|
||||
qBlendFunctions[QImage::Format_RGB32][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32_on_argb32_arm_simd;
|
||||
qBlendFunctions[QImage::Format_ARGB32_Premultiplied][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32_on_argb32_arm_simd;
|
||||
#elif defined(QT_HAVE_NEON)
|
||||
if (features & NEON) {
|
||||
qBlendFunctions[QImage::Format_RGB32][QImage::Format_RGB32] = qt_blend_rgb32_on_rgb32_neon;
|
||||
qBlendFunctions[QImage::Format_ARGB32_Premultiplied][QImage::Format_RGB32] = qt_blend_rgb32_on_rgb32_neon;
|
||||
qBlendFunctions[QImage::Format_RGB32][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32_on_argb32_neon;
|
||||
qBlendFunctions[QImage::Format_ARGB32_Premultiplied][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32_on_argb32_neon;
|
||||
qBlendFunctions[QImage::Format_RGB16][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32_on_rgb16_neon;
|
||||
qBlendFunctions[QImage::Format_ARGB32_Premultiplied][QImage::Format_RGB16] = qt_blend_rgb16_on_argb32_neon;
|
||||
qBlendFunctions[QImage::Format_RGB16][QImage::Format_RGB16] = qt_blend_rgb16_on_rgb16_neon;
|
||||
|
||||
qScaleFunctions[QImage::Format_RGB16][QImage::Format_ARGB32_Premultiplied] = qt_scale_image_argb32_on_rgb16_neon;
|
||||
qScaleFunctions[QImage::Format_RGB16][QImage::Format_RGB16] = qt_scale_image_rgb16_on_rgb16_neon;
|
||||
|
||||
qTransformFunctions[QImage::Format_RGB16][QImage::Format_ARGB32_Premultiplied] = qt_transform_image_argb32_on_rgb16_neon;
|
||||
qTransformFunctions[QImage::Format_RGB16][QImage::Format_RGB16] = qt_transform_image_rgb16_on_rgb16_neon;
|
||||
|
||||
qDrawHelper[QImage::Format_RGB16].alphamapBlit = qt_alphamapblit_quint16_neon;
|
||||
|
||||
functionForMode_C[QPainter::CompositionMode_SourceOver] = qt_blend_argb32_on_argb32_scanline_neon;
|
||||
functionForModeSolid_C[QPainter::CompositionMode_SourceOver] = comp_func_solid_SourceOver_neon;
|
||||
functionForMode_C[QPainter::CompositionMode_Plus] = comp_func_Plus_neon;
|
||||
destFetchProc[QImage::Format_RGB16] = qt_destFetchRGB16_neon;
|
||||
destStoreProc[QImage::Format_RGB16] = qt_destStoreRGB16_neon;
|
||||
|
||||
qMemRotateFunctions[QImage::Format_RGB16][0] = qt_memrotate90_16_neon;
|
||||
qMemRotateFunctions[QImage::Format_RGB16][2] = qt_memrotate270_16_neon;
|
||||
qt_memfill32 = qt_memfill32_neon;
|
||||
|
||||
extern const uint * QT_FASTCALL qt_fetch_radial_gradient_neon(uint *buffer, const Operator *op, const QSpanData *data,
|
||||
int y, int x, int length);
|
||||
|
||||
qt_fetch_radial_gradient = qt_fetch_radial_gradient_neon;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (functionForModeSolidAsm) {
|
||||
const int destinationMode = QPainter::CompositionMode_Destination;
|
||||
functionForModeSolidAsm[destinationMode] = functionForModeSolid_C[destinationMode];
|
||||
|
||||
// use the default qdrawhelper implementation for the
|
||||
// extended composition modes
|
||||
for (int mode = 12; mode < 24; ++mode)
|
||||
functionForModeSolidAsm[mode] = functionForModeSolid_C[mode];
|
||||
|
||||
functionForModeSolid = functionForModeSolidAsm;
|
||||
}
|
||||
if (functionForModeAsm)
|
||||
functionForMode = functionForModeAsm;
|
||||
|
||||
qt_build_pow_tables();
|
||||
}
|
||||
|
||||
|
|
|
@ -1,115 +0,0 @@
|
|||
/****************************************************************************
|
||||
**
|
||||
** Copyright (C) 2015 The Qt Company Ltd.
|
||||
** Contact: http://www.qt.io/licensing/
|
||||
**
|
||||
** This file is part of the QtGui module of the Qt Toolkit.
|
||||
**
|
||||
** $QT_BEGIN_LICENSE:LGPL$
|
||||
** Commercial License Usage
|
||||
** Licensees holding valid commercial Qt licenses may use this file in
|
||||
** accordance with the commercial license agreement provided with the
|
||||
** Software or, alternatively, in accordance with the terms contained in
|
||||
** a written agreement between you and The Qt Company. For licensing terms
|
||||
** and conditions see http://www.qt.io/terms-conditions. For further
|
||||
** information use the contact form at http://www.qt.io/contact-us.
|
||||
**
|
||||
** GNU Lesser General Public License Usage
|
||||
** Alternatively, this file may be used under the terms of the GNU Lesser
|
||||
** General Public License version 2.1 or version 3 as published by the Free
|
||||
** Software Foundation and appearing in the file LICENSE.LGPLv21 and
|
||||
** LICENSE.LGPLv3 included in the packaging of this file. Please review the
|
||||
** following information to ensure the GNU Lesser General Public License
|
||||
** requirements will be met: https://www.gnu.org/licenses/lgpl.html and
|
||||
** http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
|
||||
**
|
||||
** As a special exception, The Qt Company gives you certain additional
|
||||
** rights. These rights are described in The Qt Company LGPL Exception
|
||||
** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
|
||||
**
|
||||
** GNU General Public License Usage
|
||||
** Alternatively, this file may be used under the terms of the GNU
|
||||
** General Public License version 3.0 as published by the Free Software
|
||||
** Foundation and appearing in the file LICENSE.GPL included in the
|
||||
** packaging of this file. Please review the following information to
|
||||
** ensure the GNU General Public License version 3.0 requirements will be
|
||||
** met: http://www.gnu.org/copyleft/gpl.html.
|
||||
**
|
||||
** $QT_END_LICENSE$
|
||||
**
|
||||
****************************************************************************/
|
||||
|
||||
#include "qdrawhelper_arm_simd_p.h"
|
||||
|
||||
#include <qpaintengine_raster_p.h>
|
||||
#include <qblendfunctions_p.h>
|
||||
|
||||
#ifdef QT_HAVE_ARM_SIMD
|
||||
|
||||
|
||||
|
||||
// TODO: add GNU assembler instructions and support for other platforms.
|
||||
// Default to C code for now
|
||||
|
||||
void qt_blend_argb32_on_argb32_arm_simd(uchar *destPixels, int dbpl,
|
||||
const uchar *srcPixels, int sbpl,
|
||||
int w, int h,
|
||||
int const_alpha)
|
||||
{
|
||||
const uint *src = (const uint *) srcPixels;
|
||||
uint *dst = (uint *) destPixels;
|
||||
if (const_alpha == 256) {
|
||||
for (int y=0; y<h; ++y) {
|
||||
for (int x=0; x<w; ++x) {
|
||||
uint s = src[x];
|
||||
if (s >= 0xff000000)
|
||||
dst[x] = s;
|
||||
else if (s != 0)
|
||||
dst[x] = s + BYTE_MUL(dst[x], qAlpha(~s));
|
||||
}
|
||||
dst = (quint32 *)(((uchar *) dst) + dbpl);
|
||||
src = (const quint32 *)(((const uchar *) src) + sbpl);
|
||||
}
|
||||
} else if (const_alpha != 0) {
|
||||
const_alpha = (const_alpha * 255) >> 8;
|
||||
for (int y=0; y<h; ++y) {
|
||||
for (int x=0; x<w; ++x) {
|
||||
uint s = BYTE_MUL(src[x], const_alpha);
|
||||
dst[x] = s + BYTE_MUL(dst[x], qAlpha(~s));
|
||||
}
|
||||
dst = (quint32 *)(((uchar *) dst) + dbpl);
|
||||
src = (const quint32 *)(((const uchar *) src) + sbpl);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void qt_blend_rgb32_on_rgb32_arm_simd(uchar *destPixels, int dbpl,
|
||||
const uchar *srcPixels, int sbpl,
|
||||
int w, int h,
|
||||
int const_alpha)
|
||||
{
|
||||
if (const_alpha != 256) {
|
||||
qt_blend_argb32_on_argb32_arm_simd(destPixels, dbpl, srcPixels, sbpl, w, h, const_alpha);
|
||||
return;
|
||||
}
|
||||
|
||||
const uint *src = (const uint *) srcPixels;
|
||||
uint *dst = (uint *) destPixels;
|
||||
if (w <= 64) {
|
||||
for (int y=0; y<h; ++y) {
|
||||
qt_memconvert(dst, src, w);
|
||||
dst = (quint32 *)(((uchar *) dst) + dbpl);
|
||||
src = (const quint32 *)(((const uchar *) src) + sbpl);
|
||||
}
|
||||
} else {
|
||||
int len = w * 4;
|
||||
for (int y=0; y<h; ++y) {
|
||||
memcpy(dst, src, len);
|
||||
dst = (quint32 *)(((uchar *) dst) + dbpl);
|
||||
src = (const quint32 *)(((const uchar *) src) + sbpl);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#endif // QT_HAVE_ARMV_SIMD
|
|
@ -1,76 +0,0 @@
|
|||
/****************************************************************************
|
||||
**
|
||||
** Copyright (C) 2015 The Qt Company Ltd.
|
||||
** Contact: http://www.qt.io/licensing/
|
||||
**
|
||||
** This file is part of the QtGui module of the Qt Toolkit.
|
||||
**
|
||||
** $QT_BEGIN_LICENSE:LGPL$
|
||||
** Commercial License Usage
|
||||
** Licensees holding valid commercial Qt licenses may use this file in
|
||||
** accordance with the commercial license agreement provided with the
|
||||
** Software or, alternatively, in accordance with the terms contained in
|
||||
** a written agreement between you and The Qt Company. For licensing terms
|
||||
** and conditions see http://www.qt.io/terms-conditions. For further
|
||||
** information use the contact form at http://www.qt.io/contact-us.
|
||||
**
|
||||
** GNU Lesser General Public License Usage
|
||||
** Alternatively, this file may be used under the terms of the GNU Lesser
|
||||
** General Public License version 2.1 or version 3 as published by the Free
|
||||
** Software Foundation and appearing in the file LICENSE.LGPLv21 and
|
||||
** LICENSE.LGPLv3 included in the packaging of this file. Please review the
|
||||
** following information to ensure the GNU Lesser General Public License
|
||||
** requirements will be met: https://www.gnu.org/licenses/lgpl.html and
|
||||
** http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
|
||||
**
|
||||
** As a special exception, The Qt Company gives you certain additional
|
||||
** rights. These rights are described in The Qt Company LGPL Exception
|
||||
** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
|
||||
**
|
||||
** GNU General Public License Usage
|
||||
** Alternatively, this file may be used under the terms of the GNU
|
||||
** General Public License version 3.0 as published by the Free Software
|
||||
** Foundation and appearing in the file LICENSE.GPL included in the
|
||||
** packaging of this file. Please review the following information to
|
||||
** ensure the GNU General Public License version 3.0 requirements will be
|
||||
** met: http://www.gnu.org/copyleft/gpl.html.
|
||||
**
|
||||
** $QT_END_LICENSE$
|
||||
**
|
||||
****************************************************************************/
|
||||
|
||||
#ifndef QDRAWHELPER_ARM_SIMD_P_H
|
||||
#define QDRAWHELPER_ARM_SIMD_P_H
|
||||
|
||||
//
|
||||
// W A R N I N G
|
||||
// -------------
|
||||
//
|
||||
// This file is not part of the Qt API. It exists purely as an
|
||||
// implementation detail. This header file may change from version to
|
||||
// version without notice, or even be removed.
|
||||
//
|
||||
// We mean it.
|
||||
//
|
||||
|
||||
#include <qdrawhelper_p.h>
|
||||
|
||||
QT_BEGIN_NAMESPACE
|
||||
|
||||
#if defined(QT_HAVE_ARM_SIMD)
|
||||
|
||||
void qt_blend_argb32_on_argb32_arm_simd(uchar *destPixels, int dbpl,
|
||||
const uchar *srcPixels, int sbpl,
|
||||
int w, int h,
|
||||
int const_alpha);
|
||||
|
||||
void qt_blend_rgb32_on_rgb32_arm_simd(uchar *destPixels, int dbpl,
|
||||
const uchar *srcPixels, int sbpl,
|
||||
int w, int h,
|
||||
int const_alpha);
|
||||
|
||||
#endif // QT_HAVE_ARM_SIMD
|
||||
|
||||
QT_END_NAMESPACE
|
||||
|
||||
#endif // QDRAWHELPER_ARM_SIMD_P_H
|
|
@ -1,151 +0,0 @@
|
|||
/****************************************************************************
|
||||
**
|
||||
** Copyright (C) 2015 The Qt Company Ltd.
|
||||
** Contact: http://www.qt.io/licensing/
|
||||
**
|
||||
** This file is part of the QtGui module of the Qt Toolkit.
|
||||
**
|
||||
** $QT_BEGIN_LICENSE:LGPL$
|
||||
** Commercial License Usage
|
||||
** Licensees holding valid commercial Qt licenses may use this file in
|
||||
** accordance with the commercial license agreement provided with the
|
||||
** Software or, alternatively, in accordance with the terms contained in
|
||||
** a written agreement between you and The Qt Company. For licensing terms
|
||||
** and conditions see http://www.qt.io/terms-conditions. For further
|
||||
** information use the contact form at http://www.qt.io/contact-us.
|
||||
**
|
||||
** GNU Lesser General Public License Usage
|
||||
** Alternatively, this file may be used under the terms of the GNU Lesser
|
||||
** General Public License version 2.1 or version 3 as published by the Free
|
||||
** Software Foundation and appearing in the file LICENSE.LGPLv21 and
|
||||
** LICENSE.LGPLv3 included in the packaging of this file. Please review the
|
||||
** following information to ensure the GNU Lesser General Public License
|
||||
** requirements will be met: https://www.gnu.org/licenses/lgpl.html and
|
||||
** http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
|
||||
**
|
||||
** As a special exception, The Qt Company gives you certain additional
|
||||
** rights. These rights are described in The Qt Company LGPL Exception
|
||||
** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
|
||||
**
|
||||
** GNU General Public License Usage
|
||||
** Alternatively, this file may be used under the terms of the GNU
|
||||
** General Public License version 3.0 as published by the Free Software
|
||||
** Foundation and appearing in the file LICENSE.GPL included in the
|
||||
** packaging of this file. Please review the following information to
|
||||
** ensure the GNU General Public License version 3.0 requirements will be
|
||||
** met: http://www.gnu.org/copyleft/gpl.html.
|
||||
**
|
||||
** $QT_END_LICENSE$
|
||||
**
|
||||
****************************************************************************/
|
||||
|
||||
#ifdef QT_HAVE_IWMMXT
|
||||
|
||||
#include <mmintrin.h>
|
||||
#if defined(Q_OS_WINCE)
|
||||
# include "qplatformdefs.h"
|
||||
#endif
|
||||
#if !defined(__IWMMXT__) && !defined(Q_OS_WINCE)
|
||||
# include <xmmintrin.h>
|
||||
#elif defined(Q_OS_WINCE_STD) && defined(_X86_)
|
||||
# pragma warning(disable: 4391)
|
||||
# include <xmmintrin.h>
|
||||
#endif
|
||||
|
||||
#include <qdrawhelper_sse_p.h>
|
||||
|
||||
QT_BEGIN_NAMESPACE
|
||||
|
||||
#ifndef _MM_SHUFFLE
|
||||
#define _MM_SHUFFLE(fp3,fp2,fp1,fp0) \
|
||||
(((fp3) << 6) | ((fp2) << 4) | ((fp1) << 2) | (fp0))
|
||||
#endif
|
||||
|
||||
struct QIWMMXTIntrinsics : public QMMXCommonIntrinsics
|
||||
{
|
||||
static inline m64 alpha(m64 x) {
|
||||
return _mm_shuffle_pi16 (x, _MM_SHUFFLE(3, 3, 3, 3));
|
||||
}
|
||||
|
||||
static inline m64 _load_alpha(uint x, const m64 &mmx_0x0000) {
|
||||
m64 t = _mm_unpacklo_pi8(_mm_cvtsi32_si64(x), mmx_0x0000);
|
||||
return _mm_shuffle_pi16(t, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
}
|
||||
|
||||
static inline void end() {
|
||||
}
|
||||
};
|
||||
|
||||
CompositionFunctionSolid qt_functionForModeSolid_IWMMXT[numCompositionFunctions] = {
|
||||
comp_func_solid_SourceOver<QIWMMXTIntrinsics>,
|
||||
comp_func_solid_DestinationOver<QIWMMXTIntrinsics>,
|
||||
comp_func_solid_Clear<QIWMMXTIntrinsics>,
|
||||
comp_func_solid_Source<QIWMMXTIntrinsics>,
|
||||
0,
|
||||
comp_func_solid_SourceIn<QIWMMXTIntrinsics>,
|
||||
comp_func_solid_DestinationIn<QIWMMXTIntrinsics>,
|
||||
comp_func_solid_SourceOut<QIWMMXTIntrinsics>,
|
||||
comp_func_solid_DestinationOut<QIWMMXTIntrinsics>,
|
||||
comp_func_solid_SourceAtop<QIWMMXTIntrinsics>,
|
||||
comp_func_solid_DestinationAtop<QIWMMXTIntrinsics>,
|
||||
comp_func_solid_XOR<QIWMMXTIntrinsics>,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // svg 1.2 modes
|
||||
rasterop_solid_SourceOrDestination<QIWMMXTIntrinsics>,
|
||||
rasterop_solid_SourceAndDestination<QIWMMXTIntrinsics>,
|
||||
rasterop_solid_SourceXorDestination<QIWMMXTIntrinsics>,
|
||||
rasterop_solid_NotSourceAndNotDestination<QIWMMXTIntrinsics>,
|
||||
rasterop_solid_NotSourceOrNotDestination<QIWMMXTIntrinsics>,
|
||||
rasterop_solid_NotSourceXorDestination<QIWMMXTIntrinsics>,
|
||||
rasterop_solid_NotSource<QIWMMXTIntrinsics>,
|
||||
rasterop_solid_NotSourceAndDestination<QIWMMXTIntrinsics>,
|
||||
rasterop_solid_SourceAndNotDestination<QIWMMXTIntrinsics>
|
||||
};
|
||||
|
||||
CompositionFunction qt_functionForMode_IWMMXT[] = {
|
||||
comp_func_SourceOver<QIWMMXTIntrinsics>,
|
||||
comp_func_DestinationOver<QIWMMXTIntrinsics>,
|
||||
comp_func_Clear<QIWMMXTIntrinsics>,
|
||||
comp_func_Source<QIWMMXTIntrinsics>,
|
||||
comp_func_Destination,
|
||||
comp_func_SourceIn<QIWMMXTIntrinsics>,
|
||||
comp_func_DestinationIn<QIWMMXTIntrinsics>,
|
||||
comp_func_SourceOut<QIWMMXTIntrinsics>,
|
||||
comp_func_DestinationOut<QIWMMXTIntrinsics>,
|
||||
comp_func_SourceAtop<QIWMMXTIntrinsics>,
|
||||
comp_func_DestinationAtop<QIWMMXTIntrinsics>,
|
||||
comp_func_XOR<QIWMMXTIntrinsics>,
|
||||
comp_func_Plus,
|
||||
comp_func_Multiply,
|
||||
comp_func_Screen,
|
||||
comp_func_Overlay,
|
||||
comp_func_Darken,
|
||||
comp_func_Lighten,
|
||||
comp_func_ColorDodge,
|
||||
comp_func_ColorBurn,
|
||||
comp_func_HardLight,
|
||||
comp_func_SoftLight,
|
||||
comp_func_Difference,
|
||||
comp_func_Exclusion,
|
||||
rasterop_SourceOrDestination,
|
||||
rasterop_SourceAndDestination,
|
||||
rasterop_SourceXorDestination,
|
||||
rasterop_NotSourceAndNotDestination,
|
||||
rasterop_NotSourceOrNotDestination,
|
||||
rasterop_NotSourceXorDestination,
|
||||
rasterop_NotSource,
|
||||
rasterop_NotSourceAndDestination,
|
||||
rasterop_SourceAndNotDestination
|
||||
};
|
||||
|
||||
void qt_blend_color_argb_iwmmxt(int count, const QSpan *spans, void *userData)
|
||||
{
|
||||
qt_blend_color_argb_x86<QIWMMXTIntrinsics>(count, spans, userData,
|
||||
(CompositionFunctionSolid*)qt_functionForModeSolid_IWMMXT);
|
||||
}
|
||||
|
||||
QT_END_NAMESPACE
|
||||
|
||||
#endif // QT_HAVE_IWMMXT
|
||||
|
||||
|
||||
|
|
@ -1,159 +0,0 @@
|
|||
/****************************************************************************
|
||||
**
|
||||
** Copyright (C) 2015 The Qt Company Ltd.
|
||||
** Contact: http://www.qt.io/licensing/
|
||||
**
|
||||
** This file is part of the QtGui module of the Qt Toolkit.
|
||||
**
|
||||
** $QT_BEGIN_LICENSE:LGPL$
|
||||
** Commercial License Usage
|
||||
** Licensees holding valid commercial Qt licenses may use this file in
|
||||
** accordance with the commercial license agreement provided with the
|
||||
** Software or, alternatively, in accordance with the terms contained in
|
||||
** a written agreement between you and The Qt Company. For licensing terms
|
||||
** and conditions see http://www.qt.io/terms-conditions. For further
|
||||
** information use the contact form at http://www.qt.io/contact-us.
|
||||
**
|
||||
** GNU Lesser General Public License Usage
|
||||
** Alternatively, this file may be used under the terms of the GNU Lesser
|
||||
** General Public License version 2.1 or version 3 as published by the Free
|
||||
** Software Foundation and appearing in the file LICENSE.LGPLv21 and
|
||||
** LICENSE.LGPLv3 included in the packaging of this file. Please review the
|
||||
** following information to ensure the GNU Lesser General Public License
|
||||
** requirements will be met: https://www.gnu.org/licenses/lgpl.html and
|
||||
** http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
|
||||
**
|
||||
** As a special exception, The Qt Company gives you certain additional
|
||||
** rights. These rights are described in The Qt Company LGPL Exception
|
||||
** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
|
||||
**
|
||||
** GNU General Public License Usage
|
||||
** Alternatively, this file may be used under the terms of the GNU
|
||||
** General Public License version 3.0 as published by the Free Software
|
||||
** Foundation and appearing in the file LICENSE.GPL included in the
|
||||
** packaging of this file. Please review the following information to
|
||||
** ensure the GNU General Public License version 3.0 requirements will be
|
||||
** met: http://www.gnu.org/copyleft/gpl.html.
|
||||
**
|
||||
** $QT_END_LICENSE$
|
||||
**
|
||||
****************************************************************************/
|
||||
|
||||
#include <qdrawhelper_p.h>
|
||||
|
||||
#if defined(QT_HAVE_MMX)
|
||||
|
||||
#include <qdrawhelper_mmx_p.h>
|
||||
|
||||
QT_BEGIN_NAMESPACE
|
||||
|
||||
CompositionFunctionSolid qt_functionForModeSolid_MMX[numCompositionFunctions] = {
|
||||
comp_func_solid_SourceOver<QMMXIntrinsics>,
|
||||
comp_func_solid_DestinationOver<QMMXIntrinsics>,
|
||||
comp_func_solid_Clear<QMMXIntrinsics>,
|
||||
comp_func_solid_Source<QMMXIntrinsics>,
|
||||
0,
|
||||
comp_func_solid_SourceIn<QMMXIntrinsics>,
|
||||
comp_func_solid_DestinationIn<QMMXIntrinsics>,
|
||||
comp_func_solid_SourceOut<QMMXIntrinsics>,
|
||||
comp_func_solid_DestinationOut<QMMXIntrinsics>,
|
||||
comp_func_solid_SourceAtop<QMMXIntrinsics>,
|
||||
comp_func_solid_DestinationAtop<QMMXIntrinsics>,
|
||||
comp_func_solid_XOR<QMMXIntrinsics>,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // svg 1.2 modes
|
||||
rasterop_solid_SourceOrDestination<QMMXIntrinsics>,
|
||||
rasterop_solid_SourceAndDestination<QMMXIntrinsics>,
|
||||
rasterop_solid_SourceXorDestination<QMMXIntrinsics>,
|
||||
rasterop_solid_NotSourceAndNotDestination<QMMXIntrinsics>,
|
||||
rasterop_solid_NotSourceOrNotDestination<QMMXIntrinsics>,
|
||||
rasterop_solid_NotSourceXorDestination<QMMXIntrinsics>,
|
||||
rasterop_solid_NotSource<QMMXIntrinsics>,
|
||||
rasterop_solid_NotSourceAndDestination<QMMXIntrinsics>,
|
||||
rasterop_solid_SourceAndNotDestination<QMMXIntrinsics>
|
||||
};
|
||||
|
||||
CompositionFunction qt_functionForMode_MMX[numCompositionFunctions] = {
|
||||
comp_func_SourceOver<QMMXIntrinsics>,
|
||||
comp_func_DestinationOver<QMMXIntrinsics>,
|
||||
comp_func_Clear<QMMXIntrinsics>,
|
||||
comp_func_Source<QMMXIntrinsics>,
|
||||
comp_func_Destination,
|
||||
comp_func_SourceIn<QMMXIntrinsics>,
|
||||
comp_func_DestinationIn<QMMXIntrinsics>,
|
||||
comp_func_SourceOut<QMMXIntrinsics>,
|
||||
comp_func_DestinationOut<QMMXIntrinsics>,
|
||||
comp_func_SourceAtop<QMMXIntrinsics>,
|
||||
comp_func_DestinationAtop<QMMXIntrinsics>,
|
||||
comp_func_XOR<QMMXIntrinsics>,
|
||||
comp_func_Plus,
|
||||
comp_func_Multiply,
|
||||
comp_func_Screen,
|
||||
comp_func_Overlay,
|
||||
comp_func_Darken,
|
||||
comp_func_Lighten,
|
||||
comp_func_ColorDodge,
|
||||
comp_func_ColorBurn,
|
||||
comp_func_HardLight,
|
||||
comp_func_SoftLight,
|
||||
comp_func_Difference,
|
||||
comp_func_Exclusion,
|
||||
rasterop_SourceOrDestination,
|
||||
rasterop_SourceAndDestination,
|
||||
rasterop_SourceXorDestination,
|
||||
rasterop_NotSourceAndNotDestination,
|
||||
rasterop_NotSourceOrNotDestination,
|
||||
rasterop_NotSourceXorDestination,
|
||||
rasterop_NotSource,
|
||||
rasterop_NotSourceAndDestination,
|
||||
rasterop_SourceAndNotDestination
|
||||
};
|
||||
|
||||
void qt_blend_color_argb_mmx(int count, const QSpan *spans, void *userData)
|
||||
{
|
||||
qt_blend_color_argb_x86<QMMXIntrinsics>(count, spans, userData,
|
||||
(CompositionFunctionSolid*)qt_functionForModeSolid_MMX);
|
||||
}
|
||||
|
||||
|
||||
void qt_blend_argb32_on_argb32_mmx(uchar *destPixels, int dbpl,
|
||||
const uchar *srcPixels, int sbpl,
|
||||
int w, int h,
|
||||
int const_alpha)
|
||||
{
|
||||
const uint *src = (const uint *) srcPixels;
|
||||
uint *dst = (uint *) destPixels;
|
||||
|
||||
uint ca = const_alpha - 1;
|
||||
|
||||
for (int y=0; y<h; ++y) {
|
||||
comp_func_SourceOver<QMMXIntrinsics>(dst, src, w, ca);
|
||||
dst = (quint32 *)(((uchar *) dst) + dbpl);
|
||||
src = (const quint32 *)(((const uchar *) src) + sbpl);
|
||||
}
|
||||
}
|
||||
|
||||
void qt_blend_rgb32_on_rgb32_mmx(uchar *destPixels, int dbpl,
|
||||
const uchar *srcPixels, int sbpl,
|
||||
int w, int h,
|
||||
int const_alpha)
|
||||
{
|
||||
const uint *src = (const uint *) srcPixels;
|
||||
uint *dst = (uint *) destPixels;
|
||||
|
||||
uint ca = const_alpha - 1;
|
||||
|
||||
for (int y=0; y<h; ++y) {
|
||||
comp_func_Source<QMMXIntrinsics>(dst, src, w, ca);
|
||||
dst = (quint32 *)(((uchar *) dst) + dbpl);
|
||||
src = (const quint32 *)(((const uchar *) src) + sbpl);
|
||||
}
|
||||
}
|
||||
|
||||
QT_END_NAMESPACE
|
||||
|
||||
#endif // QT_HAVE_MMX
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
@ -1,130 +0,0 @@
|
|||
/****************************************************************************
|
||||
**
|
||||
** Copyright (C) 2015 The Qt Company Ltd.
|
||||
** Contact: http://www.qt.io/licensing/
|
||||
**
|
||||
** This file is part of the QtGui module of the Qt Toolkit.
|
||||
**
|
||||
** $QT_BEGIN_LICENSE:LGPL$
|
||||
** Commercial License Usage
|
||||
** Licensees holding valid commercial Qt licenses may use this file in
|
||||
** accordance with the commercial license agreement provided with the
|
||||
** Software or, alternatively, in accordance with the terms contained in
|
||||
** a written agreement between you and The Qt Company. For licensing terms
|
||||
** and conditions see http://www.qt.io/terms-conditions. For further
|
||||
** information use the contact form at http://www.qt.io/contact-us.
|
||||
**
|
||||
** GNU Lesser General Public License Usage
|
||||
** Alternatively, this file may be used under the terms of the GNU Lesser
|
||||
** General Public License version 2.1 or version 3 as published by the Free
|
||||
** Software Foundation and appearing in the file LICENSE.LGPLv21 and
|
||||
** LICENSE.LGPLv3 included in the packaging of this file. Please review the
|
||||
** following information to ensure the GNU Lesser General Public License
|
||||
** requirements will be met: https://www.gnu.org/licenses/lgpl.html and
|
||||
** http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
|
||||
**
|
||||
** As a special exception, The Qt Company gives you certain additional
|
||||
** rights. These rights are described in The Qt Company LGPL Exception
|
||||
** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
|
||||
**
|
||||
** GNU General Public License Usage
|
||||
** Alternatively, this file may be used under the terms of the GNU
|
||||
** General Public License version 3.0 as published by the Free Software
|
||||
** Foundation and appearing in the file LICENSE.GPL included in the
|
||||
** packaging of this file. Please review the following information to
|
||||
** ensure the GNU General Public License version 3.0 requirements will be
|
||||
** met: http://www.gnu.org/copyleft/gpl.html.
|
||||
**
|
||||
** $QT_END_LICENSE$
|
||||
**
|
||||
****************************************************************************/
|
||||
|
||||
#include <qdrawhelper_x86_p.h>
|
||||
|
||||
#ifdef QT_HAVE_3DNOW
|
||||
|
||||
#include <qdrawhelper_mmx_p.h>
|
||||
#include <mm3dnow.h>
|
||||
|
||||
QT_BEGIN_NAMESPACE
|
||||
|
||||
struct QMMX3DNOWIntrinsics : public QMMXCommonIntrinsics
|
||||
{
|
||||
static inline void end() {
|
||||
_m_femms();
|
||||
}
|
||||
};
|
||||
|
||||
CompositionFunctionSolid qt_functionForModeSolid_MMX3DNOW[numCompositionFunctions] = {
|
||||
comp_func_solid_SourceOver<QMMX3DNOWIntrinsics>,
|
||||
comp_func_solid_DestinationOver<QMMX3DNOWIntrinsics>,
|
||||
comp_func_solid_Clear<QMMX3DNOWIntrinsics>,
|
||||
comp_func_solid_Source<QMMX3DNOWIntrinsics>,
|
||||
0,
|
||||
comp_func_solid_SourceIn<QMMX3DNOWIntrinsics>,
|
||||
comp_func_solid_DestinationIn<QMMX3DNOWIntrinsics>,
|
||||
comp_func_solid_SourceOut<QMMX3DNOWIntrinsics>,
|
||||
comp_func_solid_DestinationOut<QMMX3DNOWIntrinsics>,
|
||||
comp_func_solid_SourceAtop<QMMX3DNOWIntrinsics>,
|
||||
comp_func_solid_DestinationAtop<QMMX3DNOWIntrinsics>,
|
||||
comp_func_solid_XOR<QMMX3DNOWIntrinsics>,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // svg 1.2 modes
|
||||
rasterop_solid_SourceOrDestination<QMMX3DNOWIntrinsics>,
|
||||
rasterop_solid_SourceAndDestination<QMMX3DNOWIntrinsics>,
|
||||
rasterop_solid_SourceXorDestination<QMMX3DNOWIntrinsics>,
|
||||
rasterop_solid_NotSourceAndNotDestination<QMMX3DNOWIntrinsics>,
|
||||
rasterop_solid_NotSourceOrNotDestination<QMMX3DNOWIntrinsics>,
|
||||
rasterop_solid_NotSourceXorDestination<QMMX3DNOWIntrinsics>,
|
||||
rasterop_solid_NotSource<QMMX3DNOWIntrinsics>,
|
||||
rasterop_solid_NotSourceAndDestination<QMMX3DNOWIntrinsics>,
|
||||
rasterop_solid_SourceAndNotDestination<QMMX3DNOWIntrinsics>
|
||||
};
|
||||
|
||||
CompositionFunction qt_functionForMode_MMX3DNOW[numCompositionFunctions] = {
|
||||
comp_func_SourceOver<QMMX3DNOWIntrinsics>,
|
||||
comp_func_DestinationOver<QMMX3DNOWIntrinsics>,
|
||||
comp_func_Clear<QMMX3DNOWIntrinsics>,
|
||||
comp_func_Source<QMMX3DNOWIntrinsics>,
|
||||
comp_func_Destination,
|
||||
comp_func_SourceIn<QMMX3DNOWIntrinsics>,
|
||||
comp_func_DestinationIn<QMMX3DNOWIntrinsics>,
|
||||
comp_func_SourceOut<QMMX3DNOWIntrinsics>,
|
||||
comp_func_DestinationOut<QMMX3DNOWIntrinsics>,
|
||||
comp_func_SourceAtop<QMMX3DNOWIntrinsics>,
|
||||
comp_func_DestinationAtop<QMMX3DNOWIntrinsics>,
|
||||
comp_func_XOR<QMMX3DNOWIntrinsics>,
|
||||
comp_func_Plus,
|
||||
comp_func_Multiply,
|
||||
comp_func_Screen,
|
||||
comp_func_Overlay,
|
||||
comp_func_Darken,
|
||||
comp_func_Lighten,
|
||||
comp_func_ColorDodge,
|
||||
comp_func_ColorBurn,
|
||||
comp_func_HardLight,
|
||||
comp_func_SoftLight,
|
||||
comp_func_Difference,
|
||||
comp_func_Exclusion,
|
||||
rasterop_SourceOrDestination,
|
||||
rasterop_SourceAndDestination,
|
||||
rasterop_SourceXorDestination,
|
||||
rasterop_NotSourceAndNotDestination,
|
||||
rasterop_NotSourceOrNotDestination,
|
||||
rasterop_NotSourceXorDestination,
|
||||
rasterop_NotSource,
|
||||
rasterop_NotSourceAndDestination,
|
||||
rasterop_SourceAndNotDestination
|
||||
};
|
||||
|
||||
void qt_blend_color_argb_mmx3dnow(int count, const QSpan *spans, void *userData)
|
||||
{
|
||||
qt_blend_color_argb_x86<QMMX3DNOWIntrinsics>(count, spans, userData,
|
||||
(CompositionFunctionSolid*)qt_functionForModeSolid_MMX3DNOW);
|
||||
}
|
||||
|
||||
QT_END_NAMESPACE
|
||||
|
||||
#endif // QT_HAVE_3DNOW
|
||||
|
||||
|
||||
|
|
@ -1,892 +0,0 @@
|
|||
/****************************************************************************
|
||||
**
|
||||
** Copyright (C) 2015 The Qt Company Ltd.
|
||||
** Contact: http://www.qt.io/licensing/
|
||||
**
|
||||
** This file is part of the QtGui module of the Qt Toolkit.
|
||||
**
|
||||
** $QT_BEGIN_LICENSE:LGPL$
|
||||
** Commercial License Usage
|
||||
** Licensees holding valid commercial Qt licenses may use this file in
|
||||
** accordance with the commercial license agreement provided with the
|
||||
** Software or, alternatively, in accordance with the terms contained in
|
||||
** a written agreement between you and The Qt Company. For licensing terms
|
||||
** and conditions see http://www.qt.io/terms-conditions. For further
|
||||
** information use the contact form at http://www.qt.io/contact-us.
|
||||
**
|
||||
** GNU Lesser General Public License Usage
|
||||
** Alternatively, this file may be used under the terms of the GNU Lesser
|
||||
** General Public License version 2.1 or version 3 as published by the Free
|
||||
** Software Foundation and appearing in the file LICENSE.LGPLv21 and
|
||||
** LICENSE.LGPLv3 included in the packaging of this file. Please review the
|
||||
** following information to ensure the GNU Lesser General Public License
|
||||
** requirements will be met: https://www.gnu.org/licenses/lgpl.html and
|
||||
** http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
|
||||
**
|
||||
** As a special exception, The Qt Company gives you certain additional
|
||||
** rights. These rights are described in The Qt Company LGPL Exception
|
||||
** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
|
||||
**
|
||||
** GNU General Public License Usage
|
||||
** Alternatively, this file may be used under the terms of the GNU
|
||||
** General Public License version 3.0 as published by the Free Software
|
||||
** Foundation and appearing in the file LICENSE.GPL included in the
|
||||
** packaging of this file. Please review the following information to
|
||||
** ensure the GNU General Public License version 3.0 requirements will be
|
||||
** met: http://www.gnu.org/copyleft/gpl.html.
|
||||
**
|
||||
** $QT_END_LICENSE$
|
||||
**
|
||||
****************************************************************************/
|
||||
|
||||
#ifndef QDRAWHELPER_MMX_P_H
|
||||
#define QDRAWHELPER_MMX_P_H
|
||||
|
||||
//
|
||||
// W A R N I N G
|
||||
// -------------
|
||||
//
|
||||
// This file is not part of the Qt API. It exists purely as an
|
||||
// implementation detail. This header file may change from version to
|
||||
// version without notice, or even be removed.
|
||||
//
|
||||
// We mean it.
|
||||
//
|
||||
|
||||
#include <qdrawhelper_p.h>
|
||||
#include <qdrawhelper_x86_p.h>
|
||||
#include <qpaintengine_raster_p.h>
|
||||
|
||||
#ifdef QT_HAVE_MMX
|
||||
#include <mmintrin.h>
|
||||
#endif
|
||||
|
||||
#define C_FF const m64 mmx_0x00ff = _mm_set1_pi16(0xff)
|
||||
#define C_80 const m64 mmx_0x0080 = _mm_set1_pi16(0x80)
|
||||
#define C_00 const m64 mmx_0x0000 = _mm_setzero_si64()
|
||||
|
||||
#ifdef Q_CC_MSVC
|
||||
# pragma warning(disable: 4799) // No EMMS at end of function
|
||||
#endif
|
||||
|
||||
typedef __m64 m64;
|
||||
|
||||
QT_BEGIN_NAMESPACE
|
||||
|
||||
struct QMMXCommonIntrinsics
|
||||
{
|
||||
static inline m64 alpha(m64 x) {
|
||||
x = _mm_unpackhi_pi16(x, x);
|
||||
x = _mm_unpackhi_pi16(x, x);
|
||||
return x;
|
||||
}
|
||||
|
||||
static inline m64 _negate(const m64 &x, const m64 &mmx_0x00ff) {
|
||||
return _mm_xor_si64(x, mmx_0x00ff);
|
||||
}
|
||||
|
||||
static inline m64 add(const m64 &a, const m64 &b) {
|
||||
return _mm_adds_pu16 (a, b);
|
||||
}
|
||||
|
||||
static inline m64 _byte_mul(const m64 &a, const m64 &b,
|
||||
const m64 &mmx_0x0080)
|
||||
{
|
||||
m64 res = _mm_mullo_pi16(a, b);
|
||||
res = _mm_adds_pu16(res, mmx_0x0080);
|
||||
res = _mm_adds_pu16(res, _mm_srli_pi16 (res, 8));
|
||||
return _mm_srli_pi16(res, 8);
|
||||
}
|
||||
|
||||
static inline m64 interpolate_pixel_256(const m64 &x, const m64 &a,
|
||||
const m64 &y, const m64 &b)
|
||||
{
|
||||
m64 res = _mm_adds_pu16(_mm_mullo_pi16(x, a), _mm_mullo_pi16(y, b));
|
||||
return _mm_srli_pi16(res, 8);
|
||||
}
|
||||
|
||||
static inline m64 _interpolate_pixel_255(const m64 &x, const m64 &a,
|
||||
const m64 &y, const m64 &b,
|
||||
const m64 &mmx_0x0080)
|
||||
{
|
||||
m64 res = _mm_adds_pu16(_mm_mullo_pi16(x, a), _mm_mullo_pi16(y, b));
|
||||
res = _mm_adds_pu16(res, mmx_0x0080);
|
||||
res = _mm_adds_pu16(res, _mm_srli_pi16 (res, 8));
|
||||
return _mm_srli_pi16(res, 8);
|
||||
}
|
||||
|
||||
static inline m64 _premul(m64 x, const m64 &mmx_0x0080) {
|
||||
m64 a = alpha(x);
|
||||
return _byte_mul(x, a, mmx_0x0080);
|
||||
}
|
||||
|
||||
static inline m64 _load(uint x, const m64 &mmx_0x0000) {
|
||||
return _mm_unpacklo_pi8(_mm_cvtsi32_si64(x), mmx_0x0000);
|
||||
}
|
||||
|
||||
static inline m64 _load_alpha(uint x, const m64 &) {
|
||||
x |= (x << 16);
|
||||
return _mm_set1_pi32(x);
|
||||
}
|
||||
|
||||
static inline uint _store(const m64 &x, const m64 &mmx_0x0000) {
|
||||
return _mm_cvtsi64_si32(_mm_packs_pu16(x, mmx_0x0000));
|
||||
}
|
||||
};
|
||||
|
||||
#define negate(x) _negate(x, mmx_0x00ff)
|
||||
#define byte_mul(a, b) _byte_mul(a, b, mmx_0x0080)
|
||||
#define interpolate_pixel_255(x, a, y, b) _interpolate_pixel_255(x, a, y, b, mmx_0x0080)
|
||||
#define premul(x) _premul(x, mmx_0x0080)
|
||||
#define load(x) _load(x, mmx_0x0000)
|
||||
#define load_alpha(x) _load_alpha(x, mmx_0x0000)
|
||||
#define store(x) _store(x, mmx_0x0000)
|
||||
|
||||
/*
|
||||
result = 0
|
||||
d = d * cia
|
||||
*/
|
||||
#define comp_func_Clear_impl(dest, length, const_alpha)\
|
||||
{\
|
||||
if (const_alpha == 255) {\
|
||||
qt_memfill(static_cast<quint32*>(dest), quint32(0), length);\
|
||||
} else {\
|
||||
C_FF; C_80; C_00;\
|
||||
m64 ia = MM::negate(MM::load_alpha(const_alpha));\
|
||||
for (int i = 0; i < length; ++i) {\
|
||||
dest[i] = MM::store(MM::byte_mul(MM::load(dest[i]), ia));\
|
||||
}\
|
||||
MM::end();\
|
||||
}\
|
||||
}
|
||||
|
||||
template <class MM>
|
||||
static void QT_FASTCALL comp_func_solid_Clear(uint *dest, int length, uint, uint const_alpha)
|
||||
{
|
||||
comp_func_Clear_impl(dest, length, const_alpha);
|
||||
}
|
||||
|
||||
template <class MM>
|
||||
static void QT_FASTCALL comp_func_Clear(uint *dest, const uint *, int length, uint const_alpha)
|
||||
{
|
||||
comp_func_Clear_impl(dest, length, const_alpha);
|
||||
}
|
||||
|
||||
/*
|
||||
result = s
|
||||
dest = s * ca + d * cia
|
||||
*/
|
||||
template <class MM>
|
||||
static void QT_FASTCALL comp_func_solid_Source(uint *dest, int length, uint src, uint const_alpha)
|
||||
{
|
||||
if (const_alpha == 255) {
|
||||
qt_memfill(static_cast<quint32*>(dest), quint32(src), length);
|
||||
} else {
|
||||
C_FF; C_80; C_00;
|
||||
const m64 a = MM::load_alpha(const_alpha);
|
||||
const m64 ia = MM::negate(a);
|
||||
const m64 s = MM::byte_mul(MM::load(src), a);
|
||||
for (int i = 0; i < length; ++i) {
|
||||
dest[i] = MM::store(MM::add(s, MM::byte_mul(MM::load(dest[i]), ia)));
|
||||
}
|
||||
MM::end();
|
||||
}
|
||||
}
|
||||
|
||||
template <class MM>
|
||||
static void QT_FASTCALL comp_func_Source(uint *dest, const uint *src, int length, uint const_alpha)
|
||||
{
|
||||
if (const_alpha == 255) {
|
||||
::memcpy(dest, src, length * sizeof(uint));
|
||||
} else {
|
||||
C_FF; C_80; C_00;
|
||||
const m64 a = MM::load_alpha(const_alpha);
|
||||
const m64 ia = MM::negate(a);
|
||||
for (int i = 0; i < length; ++i)
|
||||
dest[i] = MM::store(MM::interpolate_pixel_255(MM::load(src[i]), a,
|
||||
MM::load(dest[i]), ia));
|
||||
}
|
||||
MM::end();
|
||||
}
|
||||
|
||||
/*
|
||||
result = s + d * sia
|
||||
dest = (s + d * sia) * ca + d * cia
|
||||
= s * ca + d * (sia * ca + cia)
|
||||
= s * ca + d * (1 - sa*ca)
|
||||
*/
|
||||
template <class MM>
|
||||
static void QT_FASTCALL comp_func_solid_SourceOver(uint *dest, int length, uint src, uint const_alpha)
|
||||
{
|
||||
if ((const_alpha & qAlpha(src)) == 255) {
|
||||
qt_memfill(static_cast<quint32*>(dest), quint32(src), length);
|
||||
} else {
|
||||
C_FF; C_80; C_00;
|
||||
m64 s = MM::load(src);
|
||||
if (const_alpha != 255) {
|
||||
m64 ca = MM::load_alpha(const_alpha);
|
||||
s = MM::byte_mul(s, ca);
|
||||
}
|
||||
m64 a = MM::negate(MM::alpha(s));
|
||||
for (int i = 0; i < length; ++i)
|
||||
dest[i] = MM::store(MM::add(s, MM::byte_mul(MM::load(dest[i]), a)));
|
||||
MM::end();
|
||||
}
|
||||
}
|
||||
|
||||
template <class MM>
|
||||
static void QT_FASTCALL comp_func_SourceOver(uint *dest, const uint *src, int length, uint const_alpha)
|
||||
{
|
||||
C_FF; C_80; C_00;
|
||||
if (const_alpha == 255) {
|
||||
for (int i = 0; i < length; ++i) {
|
||||
const uint alphaMaskedSource = 0xff000000 & src[i];
|
||||
if (alphaMaskedSource == 0)
|
||||
continue;
|
||||
if (alphaMaskedSource == 0xff000000) {
|
||||
dest[i] = src[i];
|
||||
} else {
|
||||
m64 s = MM::load(src[i]);
|
||||
m64 ia = MM::negate(MM::alpha(s));
|
||||
dest[i] = MM::store(MM::add(s, MM::byte_mul(MM::load(dest[i]), ia)));
|
||||
}
|
||||
}
|
||||
} else {
|
||||
m64 ca = MM::load_alpha(const_alpha);
|
||||
for (int i = 0; i < length; ++i) {
|
||||
if ((0xff000000 & src[i]) == 0)
|
||||
continue;
|
||||
m64 s = MM::byte_mul(MM::load(src[i]), ca);
|
||||
m64 ia = MM::negate(MM::alpha(s));
|
||||
dest[i] = MM::store(MM::add(s, MM::byte_mul(MM::load(dest[i]), ia)));
|
||||
}
|
||||
}
|
||||
MM::end();
|
||||
}
|
||||
|
||||
/*
|
||||
result = d + s * dia
|
||||
dest = (d + s * dia) * ca + d * cia
|
||||
= d + s * dia * ca
|
||||
*/
|
||||
template <class MM>
|
||||
static void QT_FASTCALL comp_func_solid_DestinationOver(uint *dest, int length, uint src, uint const_alpha)
|
||||
{
|
||||
C_FF; C_80; C_00;
|
||||
m64 s = MM::load(src);
|
||||
if (const_alpha != 255)
|
||||
s = MM::byte_mul(s, MM::load_alpha(const_alpha));
|
||||
|
||||
for (int i = 0; i < length; ++i) {
|
||||
m64 d = MM::load(dest[i]);
|
||||
m64 dia = MM::negate(MM::alpha(d));
|
||||
dest[i] = MM::store(MM::add(d, MM::byte_mul(s, dia)));
|
||||
}
|
||||
MM::end();
|
||||
}
|
||||
|
||||
template <class MM>
|
||||
static void QT_FASTCALL comp_func_DestinationOver(uint *dest, const uint *src, int length, uint const_alpha)
|
||||
{
|
||||
C_FF; C_80; C_00;
|
||||
if (const_alpha == 255) {
|
||||
for (int i = 0; i < length; ++i) {
|
||||
m64 d = MM::load(dest[i]);
|
||||
m64 ia = MM::negate(MM::alpha(d));
|
||||
dest[i] = MM::store(MM::add(d, MM::byte_mul(MM::load(src[i]), ia)));
|
||||
}
|
||||
} else {
|
||||
m64 ca = MM::load_alpha(const_alpha);
|
||||
for (int i = 0; i < length; ++i) {
|
||||
m64 d = MM::load(dest[i]);
|
||||
m64 dia = MM::negate(MM::alpha(d));
|
||||
dia = MM::byte_mul(dia, ca);
|
||||
dest[i] = MM::store(MM::add(d, MM::byte_mul(MM::load(src[i]), dia)));
|
||||
}
|
||||
}
|
||||
MM::end();
|
||||
}
|
||||
|
||||
/*
|
||||
result = s * da
|
||||
dest = s * da * ca + d * cia
|
||||
*/
|
||||
template <class MM>
|
||||
static void QT_FASTCALL comp_func_solid_SourceIn(uint *dest, int length, uint src, uint const_alpha)
|
||||
{
|
||||
C_80; C_00;
|
||||
if (const_alpha == 255) {
|
||||
m64 s = MM::load(src);
|
||||
for (int i = 0; i < length; ++i) {
|
||||
m64 da = MM::alpha(MM::load(dest[i]));
|
||||
dest[i] = MM::store(MM::byte_mul(s, da));
|
||||
}
|
||||
} else {
|
||||
C_FF;
|
||||
m64 s = MM::load(src);
|
||||
m64 ca = MM::load_alpha(const_alpha);
|
||||
s = MM::byte_mul(s, ca);
|
||||
m64 cia = MM::negate(ca);
|
||||
for (int i = 0; i < length; ++i) {
|
||||
m64 d = MM::load(dest[i]);
|
||||
dest[i] = MM::store(MM::interpolate_pixel_255(s, MM::alpha(d), d, cia));
|
||||
}
|
||||
}
|
||||
MM::end();
|
||||
}
|
||||
|
||||
template <class MM>
|
||||
static void QT_FASTCALL comp_func_SourceIn(uint *dest, const uint *src, int length, uint const_alpha)
|
||||
{
|
||||
C_FF; C_80; C_00;
|
||||
if (const_alpha == 255) {
|
||||
for (int i = 0; i < length; ++i) {
|
||||
m64 a = MM::alpha(MM::load(dest[i]));
|
||||
dest[i] = MM::store(MM::byte_mul(MM::load(src[i]), a));
|
||||
}
|
||||
} else {
|
||||
m64 ca = MM::load_alpha(const_alpha);
|
||||
m64 cia = MM::negate(ca);
|
||||
for (int i = 0; i < length; ++i) {
|
||||
m64 d = MM::load(dest[i]);
|
||||
m64 da = MM::byte_mul(MM::alpha(d), ca);
|
||||
dest[i] = MM::store(MM::interpolate_pixel_255(
|
||||
MM::load(src[i]), da, d, cia));
|
||||
}
|
||||
}
|
||||
MM::end();
|
||||
}
|
||||
|
||||
/*
|
||||
result = d * sa
|
||||
dest = d * sa * ca + d * cia
|
||||
= d * (sa * ca + cia)
|
||||
*/
|
||||
template <class MM>
|
||||
static void QT_FASTCALL comp_func_solid_DestinationIn(uint *dest, int length, uint src, uint const_alpha)
|
||||
{
|
||||
C_80; C_00;
|
||||
m64 a = MM::alpha(MM::load(src));
|
||||
if (const_alpha != 255) {
|
||||
C_FF;
|
||||
m64 ca = MM::load_alpha(const_alpha);
|
||||
m64 cia = MM::negate(ca);
|
||||
a = MM::byte_mul(a, ca);
|
||||
a = MM::add(a, cia);
|
||||
}
|
||||
for (int i = 0; i < length; ++i)
|
||||
dest[i] = MM::store(MM::byte_mul(MM::load(dest[i]), a));
|
||||
MM::end();
|
||||
}
|
||||
|
||||
template <class MM>
|
||||
static void QT_FASTCALL comp_func_DestinationIn(uint *dest, const uint *src, int length, uint const_alpha)
|
||||
{
|
||||
C_FF; C_80; C_00;
|
||||
if (const_alpha == 255) {
|
||||
for (int i = 0; i < length; ++i) {
|
||||
m64 a = MM::alpha(MM::load(src[i]));
|
||||
dest[i] = MM::store(MM::byte_mul(MM::load(dest[i]), a));
|
||||
}
|
||||
} else {
|
||||
m64 ca = MM::load_alpha(const_alpha);
|
||||
m64 cia = MM::negate(ca);
|
||||
for (int i = 0; i < length; ++i) {
|
||||
m64 d = MM::load(dest[i]);
|
||||
m64 a = MM::alpha(MM::load(src[i]));
|
||||
a = MM::byte_mul(a, ca);
|
||||
a = MM::add(a, cia);
|
||||
dest[i] = MM::store(MM::byte_mul(d, a));
|
||||
}
|
||||
}
|
||||
MM::end();
|
||||
}
|
||||
|
||||
/*
|
||||
result = s * dia
|
||||
dest = s * dia * ca + d * cia
|
||||
*/
|
||||
template <class MM>
|
||||
static void QT_FASTCALL comp_func_solid_SourceOut(uint *dest, int length, uint src, uint const_alpha)
|
||||
{
|
||||
C_FF; C_80; C_00;
|
||||
m64 s = MM::load(src);
|
||||
if (const_alpha == 255) {
|
||||
for (int i = 0; i < length; ++i) {
|
||||
m64 dia = MM::negate(MM::alpha(MM::load(dest[i])));
|
||||
dest[i] = MM::store(MM::byte_mul(s, dia));
|
||||
}
|
||||
} else {
|
||||
m64 ca = MM::load_alpha(const_alpha);
|
||||
m64 cia = MM::negate(ca);
|
||||
s = MM::byte_mul(s, ca);
|
||||
for (int i = 0; i < length; ++i) {
|
||||
m64 d = MM::load(dest[i]);
|
||||
dest[i] = MM::store(MM::interpolate_pixel_255(s, MM::negate(MM::alpha(d)), d, cia));
|
||||
}
|
||||
}
|
||||
MM::end();
|
||||
}
|
||||
|
||||
template <class MM>
|
||||
static void QT_FASTCALL comp_func_SourceOut(uint *dest, const uint *src, int length, uint const_alpha)
|
||||
{
|
||||
C_FF; C_80; C_00;
|
||||
if (const_alpha == 255) {
|
||||
for (int i = 0; i < length; ++i) {
|
||||
m64 ia = MM::negate(MM::alpha(MM::load(dest[i])));
|
||||
dest[i] = MM::store(MM::byte_mul(MM::load(src[i]), ia));
|
||||
}
|
||||
} else {
|
||||
m64 ca = MM::load_alpha(const_alpha);
|
||||
m64 cia = MM::negate(ca);
|
||||
for (int i = 0; i < length; ++i) {
|
||||
m64 d = MM::load(dest[i]);
|
||||
m64 dia = MM::byte_mul(MM::negate(MM::alpha(d)), ca);
|
||||
dest[i] = MM::store(MM::interpolate_pixel_255(MM::load(src[i]), dia, d, cia));
|
||||
}
|
||||
}
|
||||
MM::end();
|
||||
}
|
||||
|
||||
/*
|
||||
result = d * sia
|
||||
dest = d * sia * ca + d * cia
|
||||
= d * (sia * ca + cia)
|
||||
*/
|
||||
template <class MM>
|
||||
static void QT_FASTCALL comp_func_solid_DestinationOut(uint *dest, int length, uint src, uint const_alpha)
|
||||
{
|
||||
C_FF; C_80; C_00;
|
||||
m64 a = MM::negate(MM::alpha(MM::load(src)));
|
||||
if (const_alpha != 255) {
|
||||
m64 ca = MM::load_alpha(const_alpha);
|
||||
a = MM::byte_mul(a, ca);
|
||||
a = MM::add(a, MM::negate(ca));
|
||||
}
|
||||
for (int i = 0; i < length; ++i)
|
||||
dest[i] = MM::store(MM::byte_mul(MM::load(dest[i]), a));
|
||||
MM::end();
|
||||
}
|
||||
|
||||
template <class MM>
|
||||
static void QT_FASTCALL comp_func_DestinationOut(uint *dest, const uint *src, int length, uint const_alpha)
|
||||
{
|
||||
C_FF; C_80; C_00;
|
||||
if (const_alpha == 255) {
|
||||
for (int i = 0; i < length; ++i) {
|
||||
m64 a = MM::negate(MM::alpha(MM::load(src[i])));
|
||||
dest[i] = MM::store(MM::byte_mul(MM::load(dest[i]), a));
|
||||
}
|
||||
} else {
|
||||
m64 ca = MM::load_alpha(const_alpha);
|
||||
m64 cia = MM::negate(ca);
|
||||
for (int i = 0; i < length; ++i) {
|
||||
m64 d = MM::load(dest[i]);
|
||||
m64 a = MM::negate(MM::alpha(MM::load(src[i])));
|
||||
a = MM::byte_mul(a, ca);
|
||||
a = MM::add(a, cia);
|
||||
dest[i] = MM::store(MM::byte_mul(d, a));
|
||||
}
|
||||
}
|
||||
MM::end();
|
||||
}
|
||||
|
||||
/*
|
||||
result = s*da + d*sia
|
||||
dest = s*da*ca + d*sia*ca + d *cia
|
||||
= s*ca * da + d * (sia*ca + cia)
|
||||
= s*ca * da + d * (1 - sa*ca)
|
||||
*/
|
||||
template <class MM>
|
||||
static void QT_FASTCALL comp_func_solid_SourceAtop(uint *dest, int length, uint src, uint const_alpha)
|
||||
{
|
||||
C_FF; C_80; C_00;
|
||||
m64 s = MM::load(src);
|
||||
if (const_alpha != 255) {
|
||||
m64 ca = MM::load_alpha(const_alpha);
|
||||
s = MM::byte_mul(s, ca);
|
||||
}
|
||||
m64 a = MM::negate(MM::alpha(s));
|
||||
for (int i = 0; i < length; ++i) {
|
||||
m64 d = MM::load(dest[i]);
|
||||
dest[i] = MM::store(MM::interpolate_pixel_255(s, MM::alpha(d), d, a));
|
||||
}
|
||||
MM::end();
|
||||
}
|
||||
|
||||
template <class MM>
|
||||
static void QT_FASTCALL comp_func_SourceAtop(uint *dest, const uint *src, int length, uint const_alpha)
|
||||
{
|
||||
C_FF; C_80; C_00;
|
||||
if (const_alpha == 255) {
|
||||
for (int i = 0; i < length; ++i) {
|
||||
m64 s = MM::load(src[i]);
|
||||
m64 d = MM::load(dest[i]);
|
||||
dest[i] = MM::store(MM::interpolate_pixel_255(s, MM::alpha(d), d,
|
||||
MM::negate(MM::alpha(s))));
|
||||
}
|
||||
} else {
|
||||
m64 ca = MM::load_alpha(const_alpha);
|
||||
for (int i = 0; i < length; ++i) {
|
||||
m64 s = MM::load(src[i]);
|
||||
s = MM::byte_mul(s, ca);
|
||||
m64 d = MM::load(dest[i]);
|
||||
dest[i] = MM::store(MM::interpolate_pixel_255(s, MM::alpha(d), d,
|
||||
MM::negate(MM::alpha(s))));
|
||||
}
|
||||
}
|
||||
MM::end();
|
||||
}
|
||||
|
||||
/*
|
||||
result = d*sa + s*dia
|
||||
dest = d*sa*ca + s*dia*ca + d *cia
|
||||
= s*ca * dia + d * (sa*ca + cia)
|
||||
*/
|
||||
template <class MM>
|
||||
static void QT_FASTCALL comp_func_solid_DestinationAtop(uint *dest, int length, uint src, uint const_alpha)
|
||||
{
|
||||
C_FF; C_80; C_00;
|
||||
m64 s = MM::load(src);
|
||||
m64 a = MM::alpha(s);
|
||||
if (const_alpha != 255) {
|
||||
m64 ca = MM::load_alpha(const_alpha);
|
||||
s = MM::byte_mul(s, ca);
|
||||
a = MM::alpha(s);
|
||||
a = MM::add(a, MM::negate(ca));
|
||||
}
|
||||
for (int i = 0; i < length; ++i) {
|
||||
m64 d = MM::load(dest[i]);
|
||||
dest[i] = MM::store(MM::interpolate_pixel_255(s, MM::negate(MM::alpha(d)), d, a));
|
||||
}
|
||||
MM::end();
|
||||
}
|
||||
|
||||
template <class MM>
|
||||
static void QT_FASTCALL comp_func_DestinationAtop(uint *dest, const uint *src, int length, uint const_alpha)
|
||||
{
|
||||
C_FF; C_80; C_00;
|
||||
if (const_alpha == 255) {
|
||||
for (int i = 0; i < length; ++i) {
|
||||
m64 s = MM::load(src[i]);
|
||||
m64 d = MM::load(dest[i]);
|
||||
dest[i] = MM::store(MM::interpolate_pixel_255(d, MM::alpha(s), s,
|
||||
MM::negate(MM::alpha(d))));
|
||||
}
|
||||
} else {
|
||||
m64 ca = MM::load_alpha(const_alpha);
|
||||
for (int i = 0; i < length; ++i) {
|
||||
m64 s = MM::load(src[i]);
|
||||
s = MM::byte_mul(s, ca);
|
||||
m64 d = MM::load(dest[i]);
|
||||
m64 a = MM::alpha(s);
|
||||
a = MM::add(a, MM::negate(ca));
|
||||
dest[i] = MM::store(MM::interpolate_pixel_255(s, MM::negate(MM::alpha(d)),
|
||||
d, a));
|
||||
}
|
||||
}
|
||||
MM::end();
|
||||
}
|
||||
|
||||
/*
|
||||
result = d*sia + s*dia
|
||||
dest = d*sia*ca + s*dia*ca + d *cia
|
||||
= s*ca * dia + d * (sia*ca + cia)
|
||||
= s*ca * dia + d * (1 - sa*ca)
|
||||
*/
|
||||
template <class MM>
|
||||
static void QT_FASTCALL comp_func_solid_XOR(uint *dest, int length, uint src, uint const_alpha)
|
||||
{
|
||||
C_FF; C_80; C_00;
|
||||
m64 s = MM::load(src);
|
||||
if (const_alpha != 255) {
|
||||
m64 ca = MM::load_alpha(const_alpha);
|
||||
s = MM::byte_mul(s, ca);
|
||||
}
|
||||
m64 a = MM::negate(MM::alpha(s));
|
||||
for (int i = 0; i < length; ++i) {
|
||||
m64 d = MM::load(dest[i]);
|
||||
dest[i] = MM::store(MM::interpolate_pixel_255(s, MM::negate(MM::alpha(d)),
|
||||
d, a));
|
||||
}
|
||||
MM::end();
|
||||
}
|
||||
|
||||
template <class MM>
|
||||
static void QT_FASTCALL comp_func_XOR(uint *dest, const uint *src, int length, uint const_alpha)
|
||||
{
|
||||
C_FF; C_80; C_00;
|
||||
if (const_alpha == 255) {
|
||||
for (int i = 0; i < length; ++i) {
|
||||
m64 s = MM::load(src[i]);
|
||||
m64 d = MM::load(dest[i]);
|
||||
dest[i] = MM::store(MM::interpolate_pixel_255(s, MM::negate(MM::alpha(d)),
|
||||
d, MM::negate(MM::alpha(s))));
|
||||
}
|
||||
} else {
|
||||
m64 ca = MM::load_alpha(const_alpha);
|
||||
for (int i = 0; i < length; ++i) {
|
||||
m64 s = MM::load(src[i]);
|
||||
s = MM::byte_mul(s, ca);
|
||||
m64 d = MM::load(dest[i]);
|
||||
dest[i] = MM::store(MM::interpolate_pixel_255(s, MM::negate(MM::alpha(d)),
|
||||
d, MM::negate(MM::alpha(s))));
|
||||
}
|
||||
}
|
||||
MM::end();
|
||||
}
|
||||
|
||||
template <class MM>
|
||||
static void QT_FASTCALL rasterop_solid_SourceOrDestination(uint *dest,
|
||||
int length,
|
||||
uint color,
|
||||
uint const_alpha)
|
||||
{
|
||||
Q_UNUSED(const_alpha);
|
||||
|
||||
if ((quintptr)(dest) & 0x7) {
|
||||
*dest++ |= color;
|
||||
--length;
|
||||
}
|
||||
|
||||
const int length64 = length / 2;
|
||||
if (length64) {
|
||||
__m64 *dst64 = reinterpret_cast<__m64*>(dest);
|
||||
const __m64 color64 = _mm_set_pi32(color, color);
|
||||
|
||||
int n = (length64 + 3) / 4;
|
||||
switch (length64 & 0x3) {
|
||||
case 0: do { *dst64 = _mm_or_si64(*dst64, color64); ++dst64;
|
||||
case 3: *dst64 = _mm_or_si64(*dst64, color64); ++dst64;
|
||||
case 2: *dst64 = _mm_or_si64(*dst64, color64); ++dst64;
|
||||
case 1: *dst64 = _mm_or_si64(*dst64, color64); ++dst64;
|
||||
} while (--n > 0);
|
||||
}
|
||||
}
|
||||
|
||||
if (length & 0x1) {
|
||||
dest[length - 1] |= color;
|
||||
}
|
||||
|
||||
MM::end();
|
||||
}
|
||||
|
||||
template <class MM>
|
||||
static void QT_FASTCALL rasterop_solid_SourceAndDestination(uint *dest,
|
||||
int length,
|
||||
uint color,
|
||||
uint const_alpha)
|
||||
{
|
||||
Q_UNUSED(const_alpha);
|
||||
|
||||
color |= 0xff000000;
|
||||
|
||||
if ((quintptr)(dest) & 0x7) { // align
|
||||
*dest++ &= color;
|
||||
--length;
|
||||
}
|
||||
|
||||
const int length64 = length / 2;
|
||||
if (length64) {
|
||||
__m64 *dst64 = reinterpret_cast<__m64*>(dest);
|
||||
const __m64 color64 = _mm_set_pi32(color, color);
|
||||
|
||||
int n = (length64 + 3) / 4;
|
||||
switch (length64 & 0x3) {
|
||||
case 0: do { *dst64 = _mm_and_si64(*dst64, color64); ++dst64;
|
||||
case 3: *dst64 = _mm_and_si64(*dst64, color64); ++dst64;
|
||||
case 2: *dst64 = _mm_and_si64(*dst64, color64); ++dst64;
|
||||
case 1: *dst64 = _mm_and_si64(*dst64, color64); ++dst64;
|
||||
} while (--n > 0);
|
||||
}
|
||||
}
|
||||
|
||||
if (length & 0x1) {
|
||||
dest[length - 1] &= color;
|
||||
}
|
||||
|
||||
MM::end();
|
||||
}
|
||||
|
||||
template <class MM>
|
||||
static void QT_FASTCALL rasterop_solid_SourceXorDestination(uint *dest,
|
||||
int length,
|
||||
uint color,
|
||||
uint const_alpha)
|
||||
{
|
||||
Q_UNUSED(const_alpha);
|
||||
|
||||
color &= 0x00ffffff;
|
||||
|
||||
if ((quintptr)(dest) & 0x7) {
|
||||
*dest++ ^= color;
|
||||
--length;
|
||||
}
|
||||
|
||||
const int length64 = length / 2;
|
||||
if (length64) {
|
||||
__m64 *dst64 = reinterpret_cast<__m64*>(dest);
|
||||
const __m64 color64 = _mm_set_pi32(color, color);
|
||||
|
||||
int n = (length64 + 3) / 4;
|
||||
switch (length64 & 0x3) {
|
||||
case 0: do { *dst64 = _mm_xor_si64(*dst64, color64); ++dst64;
|
||||
case 3: *dst64 = _mm_xor_si64(*dst64, color64); ++dst64;
|
||||
case 2: *dst64 = _mm_xor_si64(*dst64, color64); ++dst64;
|
||||
case 1: *dst64 = _mm_xor_si64(*dst64, color64); ++dst64;
|
||||
} while (--n > 0);
|
||||
}
|
||||
}
|
||||
|
||||
if (length & 0x1) {
|
||||
dest[length - 1] ^= color;
|
||||
}
|
||||
|
||||
MM::end();
|
||||
}
|
||||
|
||||
template <class MM>
|
||||
static void QT_FASTCALL rasterop_solid_SourceAndNotDestination(uint *dest,
|
||||
int length,
|
||||
uint color,
|
||||
uint const_alpha)
|
||||
{
|
||||
|
||||
Q_UNUSED(const_alpha);
|
||||
|
||||
if ((quintptr)(dest) & 0x7) {
|
||||
*dest = (color & ~(*dest)) | 0xff000000;
|
||||
++dest;
|
||||
--length;
|
||||
}
|
||||
|
||||
const int length64 = length / 2;
|
||||
if (length64) {
|
||||
__m64 *dst64 = reinterpret_cast<__m64*>(dest);
|
||||
const __m64 color64 = _mm_set_pi32(color, color);
|
||||
const m64 mmx_0xff000000 = _mm_set1_pi32(0xff000000);
|
||||
__m64 tmp1, tmp2, tmp3, tmp4;
|
||||
|
||||
int n = (length64 + 3) / 4;
|
||||
switch (length64 & 0x3) {
|
||||
case 0: do { tmp1 = _mm_andnot_si64(*dst64, color64);
|
||||
*dst64++ = _mm_or_si64(tmp1, mmx_0xff000000);
|
||||
case 3: tmp2 = _mm_andnot_si64(*dst64, color64);
|
||||
*dst64++ = _mm_or_si64(tmp2, mmx_0xff000000);
|
||||
case 2: tmp3 = _mm_andnot_si64(*dst64, color64);
|
||||
*dst64++ = _mm_or_si64(tmp3, mmx_0xff000000);
|
||||
case 1: tmp4 = _mm_andnot_si64(*dst64, color64);
|
||||
*dst64++ = _mm_or_si64(tmp4, mmx_0xff000000);
|
||||
} while (--n > 0);
|
||||
}
|
||||
}
|
||||
|
||||
if (length & 0x1) {
|
||||
dest[length - 1] = (color & ~(dest[length - 1])) | 0xff000000;
|
||||
}
|
||||
|
||||
MM::end();
|
||||
}
|
||||
|
||||
template <class MM>
|
||||
static void QT_FASTCALL rasterop_solid_NotSourceAndNotDestination(uint *dest,
|
||||
int length,
|
||||
uint color,
|
||||
uint const_alpha)
|
||||
{
|
||||
rasterop_solid_SourceAndNotDestination<MM>(dest, length,
|
||||
~color, const_alpha);
|
||||
}
|
||||
|
||||
template <class MM>
|
||||
static void QT_FASTCALL rasterop_solid_NotSourceOrNotDestination(uint *dest,
|
||||
int length,
|
||||
uint color,
|
||||
uint const_alpha)
|
||||
{
|
||||
Q_UNUSED(const_alpha);
|
||||
color = ~color | 0xff000000;
|
||||
while (length--) {
|
||||
*dest = color | ~(*dest);
|
||||
++dest;
|
||||
}
|
||||
}
|
||||
|
||||
template <class MM>
|
||||
static void QT_FASTCALL rasterop_solid_NotSourceXorDestination(uint *dest,
|
||||
int length,
|
||||
uint color,
|
||||
uint const_alpha)
|
||||
{
|
||||
rasterop_solid_SourceXorDestination<MM>(dest, length, ~color, const_alpha);
|
||||
}
|
||||
|
||||
template <class MM>
|
||||
static void QT_FASTCALL rasterop_solid_NotSource(uint *dest, int length,
|
||||
uint color, uint const_alpha)
|
||||
{
|
||||
Q_UNUSED(const_alpha);
|
||||
qt_memfill((quint32*)dest, ~color | 0xff000000, length);
|
||||
}
|
||||
|
||||
template <class MM>
|
||||
static void QT_FASTCALL rasterop_solid_NotSourceAndDestination(uint *dest,
|
||||
int length,
|
||||
uint color,
|
||||
uint const_alpha)
|
||||
{
|
||||
rasterop_solid_SourceAndDestination<MM>(dest, length,
|
||||
~color, const_alpha);
|
||||
}
|
||||
|
||||
template <class MM>
|
||||
static inline void qt_blend_color_argb_x86(int count, const QSpan *spans,
|
||||
void *userData,
|
||||
CompositionFunctionSolid *solidFunc)
|
||||
{
|
||||
QSpanData *data = reinterpret_cast<QSpanData *>(userData);
|
||||
if (data->rasterBuffer->compositionMode == QPainter::CompositionMode_Source
|
||||
|| (data->rasterBuffer->compositionMode == QPainter::CompositionMode_SourceOver
|
||||
&& qAlpha(data->solid.color) == 255)) {
|
||||
// inline for performance
|
||||
C_FF; C_80; C_00;
|
||||
while (count--) {
|
||||
uint *target = ((uint *)data->rasterBuffer->scanLine(spans->y)) + spans->x;
|
||||
if (spans->coverage == 255) {
|
||||
qt_memfill(static_cast<quint32*>(target), quint32(data->solid.color), spans->len);
|
||||
} else {
|
||||
// dest = s * ca + d * (1 - sa*ca) --> dest = s * ca + d * (1-ca)
|
||||
m64 ca = MM::load_alpha(spans->coverage);
|
||||
m64 s = MM::byte_mul(MM::load(data->solid.color), ca);
|
||||
m64 ica = MM::negate(ca);
|
||||
for (int i = 0; i < spans->len; ++i)
|
||||
target[i] = MM::store(MM::add(s, MM::byte_mul(MM::load(target[i]), ica)));
|
||||
}
|
||||
++spans;
|
||||
}
|
||||
MM::end();
|
||||
return;
|
||||
}
|
||||
CompositionFunctionSolid func = solidFunc[data->rasterBuffer->compositionMode];
|
||||
while (count--) {
|
||||
uint *target = ((uint *)data->rasterBuffer->scanLine(spans->y)) + spans->x;
|
||||
func(target, spans->len, data->solid.color, spans->coverage);
|
||||
++spans;
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef QT_HAVE_MMX
|
||||
struct QMMXIntrinsics : public QMMXCommonIntrinsics
|
||||
{
|
||||
static inline void end() {
|
||||
#if !defined(Q_OS_WINCE) || defined(_X86_)
|
||||
_mm_empty();
|
||||
#endif
|
||||
}
|
||||
};
|
||||
#endif // QT_HAVE_MMX
|
||||
|
||||
QT_END_NAMESPACE
|
||||
|
||||
#endif // QDRAWHELPER_MMX_P_H
|
File diff suppressed because it is too large
Load diff
|
@ -1,297 +0,0 @@
|
|||
/****************************************************************************
|
||||
**
|
||||
** Copyright (C) 2015 The Qt Company Ltd.
|
||||
** Contact: http://www.qt.io/licensing/
|
||||
**
|
||||
** This file is part of the QtGui module of the Qt Toolkit.
|
||||
**
|
||||
** $QT_BEGIN_LICENSE:LGPL$
|
||||
** Commercial License Usage
|
||||
** Licensees holding valid commercial Qt licenses may use this file in
|
||||
** accordance with the commercial license agreement provided with the
|
||||
** Software or, alternatively, in accordance with the terms contained in
|
||||
** a written agreement between you and The Qt Company. For licensing terms
|
||||
** and conditions see http://www.qt.io/terms-conditions. For further
|
||||
** information use the contact form at http://www.qt.io/contact-us.
|
||||
**
|
||||
** GNU Lesser General Public License Usage
|
||||
** Alternatively, this file may be used under the terms of the GNU Lesser
|
||||
** General Public License version 2.1 or version 3 as published by the Free
|
||||
** Software Foundation and appearing in the file LICENSE.LGPLv21 and
|
||||
** LICENSE.LGPLv3 included in the packaging of this file. Please review the
|
||||
** following information to ensure the GNU Lesser General Public License
|
||||
** requirements will be met: https://www.gnu.org/licenses/lgpl.html and
|
||||
** http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
|
||||
**
|
||||
** As a special exception, The Qt Company gives you certain additional
|
||||
** rights. These rights are described in The Qt Company LGPL Exception
|
||||
** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
|
||||
**
|
||||
** GNU General Public License Usage
|
||||
** Alternatively, this file may be used under the terms of the GNU
|
||||
** General Public License version 3.0 as published by the Free Software
|
||||
** Foundation and appearing in the file LICENSE.GPL included in the
|
||||
** packaging of this file. Please review the following information to
|
||||
** ensure the GNU General Public License version 3.0 requirements will be
|
||||
** met: http://www.gnu.org/copyleft/gpl.html.
|
||||
**
|
||||
** $QT_END_LICENSE$
|
||||
**
|
||||
****************************************************************************/
|
||||
|
||||
/* Prevent the stack from becoming executable for no reason... */
|
||||
#if defined(__linux__) && defined(__ELF__)
|
||||
.section .note.GNU-stack,"",%progbits
|
||||
#endif
|
||||
|
||||
.text
|
||||
.fpu neon
|
||||
.arch armv7a
|
||||
.altmacro
|
||||
|
||||
/* void blend_8_pixels_argb32_on_rgb16_neon(quint16 *dst, const quint32 *src, int const_alpha) */
|
||||
|
||||
.func blend_8_pixels_argb32_on_rgb16_neon
|
||||
.global blend_8_pixels_argb32_on_rgb16_neon
|
||||
/* For ELF format also set function visibility to hidden */
|
||||
#ifdef __ELF__
|
||||
.hidden blend_8_pixels_argb32_on_rgb16_neon
|
||||
.type blend_8_pixels_argb32_on_rgb16_neon, %function
|
||||
#endif
|
||||
blend_8_pixels_argb32_on_rgb16_neon:
|
||||
vld4.8 { d0, d1, d2, d3 }, [r1]
|
||||
vld1.16 { d4, d5 }, [r0]
|
||||
|
||||
cmp r2, #256
|
||||
beq .blend_32_inner
|
||||
|
||||
vdup.8 d6, r2
|
||||
|
||||
/* multiply by const_alpha */
|
||||
vmull.u8 q8, d6, d0
|
||||
vmull.u8 q9, d6, d1
|
||||
vmull.u8 q10, d6, d2
|
||||
vmull.u8 q11, d6, d3
|
||||
|
||||
vshrn.u16 d0, q8, #8
|
||||
vshrn.u16 d1, q9, #8
|
||||
vshrn.u16 d2, q10, #8
|
||||
vshrn.u16 d3, q11, #8
|
||||
|
||||
.blend_32_inner:
|
||||
/* convert 8 r5g6b5 pixel data from {d4, d5} to planar 8-bit format
|
||||
and put data into d6 - red, d7 - green, d30 - blue */
|
||||
vshrn.u16 d6, q2, #8
|
||||
vshrn.u16 d7, q2, #3
|
||||
vsli.u16 q2, q2, #5
|
||||
vsri.u8 d6, d6, #5
|
||||
vmvn.8 d3, d3
|
||||
vsri.u8 d7, d7, #6
|
||||
vshrn.u16 d30, q2, #2
|
||||
|
||||
pld [r0, #128]
|
||||
|
||||
/* now do alpha blending, storing results in 8-bit planar format
|
||||
into d16 - red, d19 - green, d18 - blue */
|
||||
vmull.u8 q10, d3, d6
|
||||
vmull.u8 q11, d3, d7
|
||||
vmull.u8 q12, d3, d30
|
||||
vrshr.u16 q13, q10, #8
|
||||
vrshr.u16 q3, q11, #8
|
||||
vrshr.u16 q15, q12, #8
|
||||
vraddhn.u16 d20, q10, q13
|
||||
vraddhn.u16 d23, q11, q3
|
||||
vraddhn.u16 d22, q12, q15
|
||||
vqadd.u8 d16, d2, d20
|
||||
vqadd.u8 q9, q0, q11
|
||||
/* convert the result to r5g6b5 and store it into {d28, d29} */
|
||||
vshll.u8 q14, d16, #8
|
||||
vshll.u8 q8, d19, #8
|
||||
vshll.u8 q9, d18, #8
|
||||
vsri.u16 q14, q8, #5
|
||||
vsri.u16 q14, q9, #11
|
||||
|
||||
vst1.16 { d28, d29 }, [r0]
|
||||
|
||||
bx lr
|
||||
|
||||
.endfunc
|
||||
|
||||
/* void blend_8_pixels_rgb16_on_rgb16_neon(quint16 *dst, const quint16 *src, int const_alpha) */
|
||||
|
||||
.func blend_8_pixels_rgb16_on_rgb16_neon
|
||||
.global blend_8_pixels_rgb16_on_rgb16_neon
|
||||
/* For ELF format also set function visibility to hidden */
|
||||
#ifdef __ELF__
|
||||
.hidden blend_8_pixels_rgb16_on_rgb16_neon
|
||||
.type blend_8_pixels_rgb16_on_rgb16_neon, %function
|
||||
#endif
|
||||
blend_8_pixels_rgb16_on_rgb16_neon:
|
||||
vld1.16 { d0, d1 }, [r0]
|
||||
vld1.16 { d2, d3 }, [r1]
|
||||
|
||||
rsb r3, r2, #256
|
||||
vdup.8 d4, r2
|
||||
vdup.8 d5, r3
|
||||
|
||||
/* convert 8 r5g6b5 pixel data from {d0, d1} to planar 8-bit format
|
||||
and put data into d6 - red, d7 - green, d30 - blue */
|
||||
vshrn.u16 d6, q0, #8
|
||||
vshrn.u16 d7, q0, #3
|
||||
vsli.u16 q0, q0, #5
|
||||
vsri.u8 d6, d6, #5
|
||||
vsri.u8 d7, d7, #6
|
||||
vshrn.u16 d30, q0, #2
|
||||
|
||||
/* same from {d2, d3} into {d26, d27, d28} */
|
||||
vshrn.u16 d26, q1, #8
|
||||
vshrn.u16 d27, q1, #3
|
||||
vsli.u16 q1, q1, #5
|
||||
vsri.u8 d26, d26, #5
|
||||
vsri.u8 d27, d27, #6
|
||||
vshrn.u16 d28, q1, #2
|
||||
|
||||
/* multiply dst by inv const_alpha */
|
||||
vmull.u8 q10, d5, d6
|
||||
vmull.u8 q11, d5, d7
|
||||
vmull.u8 q12, d5, d30
|
||||
|
||||
vshrn.u16 d6, q10, #8
|
||||
vshrn.u16 d7, q11, #8
|
||||
vshrn.u16 d30, q12, #8
|
||||
|
||||
/* multiply src by const_alpha */
|
||||
vmull.u8 q10, d4, d26
|
||||
vmull.u8 q11, d4, d27
|
||||
vmull.u8 q12, d4, d28
|
||||
|
||||
vshrn.u16 d26, q10, #8
|
||||
vshrn.u16 d27, q11, #8
|
||||
vshrn.u16 d28, q12, #8
|
||||
|
||||
/* preload dst + 128 */
|
||||
pld [r0, #128]
|
||||
|
||||
/* add components, storing results in 8-bit planar format
|
||||
into d16 - red, d19 - green, d18 - blue */
|
||||
vadd.u8 d16, d26, d6
|
||||
vadd.u8 d19, d27, d7
|
||||
vadd.u8 d18, d28, d30
|
||||
|
||||
/* convert the result to r5g6b5 and store it into {d28, d29} */
|
||||
vshll.u8 q14, d16, #8
|
||||
vshll.u8 q8, d19, #8
|
||||
vshll.u8 q9, d18, #8
|
||||
vsri.u16 q14, q8, #5
|
||||
vsri.u16 q14, q9, #11
|
||||
|
||||
vst1.16 { d28, d29 }, [r0]
|
||||
|
||||
bx lr
|
||||
|
||||
.endfunc
|
||||
|
||||
/* void qt_rotate90_16_neon(quint16 *dst, const quint16 *src, int sstride, int dstride, int count) */
|
||||
.func qt_rotate90_16_neon
|
||||
.global qt_rotate90_16_neon
|
||||
/* For ELF format also set function visibility to hidden */
|
||||
#ifdef __ELF__
|
||||
.hidden qt_rotate90_16_neon
|
||||
.type qt_rotate90_16_neon, %function
|
||||
#endif
|
||||
qt_rotate90_16_neon:
|
||||
push { r4-r11, lr }
|
||||
ldr r5, [sp, #(9*4)]
|
||||
|
||||
/* The preloads are the key to getting good performance */
|
||||
pld [r1]
|
||||
|
||||
mov r4, r5, asr #2
|
||||
add r6, r0, r3
|
||||
add r7, r6, r3
|
||||
|
||||
add r8, r7, r3
|
||||
add r9, r8, r3
|
||||
|
||||
pld [r1, r2]
|
||||
|
||||
add r10, r9, r3
|
||||
add r11, r10, r3
|
||||
|
||||
add r3, r3, r11
|
||||
and r5, r5, #3
|
||||
|
||||
pld [r1, r2, lsl #1]
|
||||
|
||||
cmp r4, #0
|
||||
beq .rotate90_16_tail
|
||||
|
||||
.rotate90_16_loop:
|
||||
vld1.16 { q8 }, [r1], r2
|
||||
|
||||
pld [r1, r2, lsl #1]
|
||||
|
||||
vld1.16 { q9 }, [r1], r2
|
||||
vld1.16 { q10 }, [r1], r2
|
||||
vld1.16 { q11 }, [r1], r2
|
||||
|
||||
pld [r1]
|
||||
|
||||
/* Could have used four quad-word zips instead,
|
||||
but those take three cycles as opposed to one. */
|
||||
vzip.16 d16, d20
|
||||
vzip.16 d17, d21
|
||||
|
||||
vzip.16 d18, d22
|
||||
|
||||
pld [r1, r2]
|
||||
|
||||
vzip.16 d19, d23
|
||||
|
||||
vzip.16 d16, d18
|
||||
vzip.16 d17, d19
|
||||
|
||||
pld [r1, r2, lsl #1]
|
||||
|
||||
vzip.16 d20, d22
|
||||
vzip.16 d21, d23
|
||||
|
||||
vst1.16 { d23 }, [r0]!
|
||||
vst1.16 { d21 }, [r6]!
|
||||
vst1.16 { d19 }, [r7]!
|
||||
vst1.16 { d17 }, [r8]!
|
||||
vst1.16 { d22 }, [r9]!
|
||||
vst1.16 { d20 }, [r10]!
|
||||
vst1.16 { d18 }, [r11]!
|
||||
vst1.16 { d16 }, [r3]!
|
||||
|
||||
sub r4, r4, #1
|
||||
cmp r4, #0
|
||||
bne .rotate90_16_loop
|
||||
b .rotate90_16_tail
|
||||
|
||||
.rotate90_16_tail_loop:
|
||||
sub r5, r5, #2
|
||||
|
||||
vld1.16 { q8 }, [r1], r2
|
||||
vld1.16 { q9 }, [r1], r2
|
||||
|
||||
vzip.16 d16, d18
|
||||
vzip.16 d17, d19
|
||||
|
||||
vst1.32 { d19[1] }, [r0]!
|
||||
vst1.32 { d19[0] }, [r6]!
|
||||
vst1.32 { d17[1] }, [r7]!
|
||||
vst1.32 { d17[0] }, [r8]!
|
||||
vst1.32 { d18[1] }, [r9]!
|
||||
vst1.32 { d18[0] }, [r10]!
|
||||
vst1.32 { d16[1] }, [r11]!
|
||||
vst1.32 { d16[0] }, [r3]!
|
||||
|
||||
.rotate90_16_tail:
|
||||
cmp r5, #0
|
||||
bgt .rotate90_16_tail_loop
|
||||
|
||||
pop { r4-r11, pc }
|
||||
|
||||
.endfunc
|
|
@ -1,146 +0,0 @@
|
|||
/****************************************************************************
|
||||
**
|
||||
** Copyright (C) 2015 The Qt Company Ltd.
|
||||
** Contact: http://www.qt.io/licensing/
|
||||
**
|
||||
** This file is part of the QtGui module of the Qt Toolkit.
|
||||
**
|
||||
** $QT_BEGIN_LICENSE:LGPL$
|
||||
** Commercial License Usage
|
||||
** Licensees holding valid commercial Qt licenses may use this file in
|
||||
** accordance with the commercial license agreement provided with the
|
||||
** Software or, alternatively, in accordance with the terms contained in
|
||||
** a written agreement between you and The Qt Company. For licensing terms
|
||||
** and conditions see http://www.qt.io/terms-conditions. For further
|
||||
** information use the contact form at http://www.qt.io/contact-us.
|
||||
**
|
||||
** GNU Lesser General Public License Usage
|
||||
** Alternatively, this file may be used under the terms of the GNU Lesser
|
||||
** General Public License version 2.1 or version 3 as published by the Free
|
||||
** Software Foundation and appearing in the file LICENSE.LGPLv21 and
|
||||
** LICENSE.LGPLv3 included in the packaging of this file. Please review the
|
||||
** following information to ensure the GNU Lesser General Public License
|
||||
** requirements will be met: https://www.gnu.org/licenses/lgpl.html and
|
||||
** http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
|
||||
**
|
||||
** As a special exception, The Qt Company gives you certain additional
|
||||
** rights. These rights are described in The Qt Company LGPL Exception
|
||||
** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
|
||||
**
|
||||
** GNU General Public License Usage
|
||||
** Alternatively, this file may be used under the terms of the GNU
|
||||
** General Public License version 3.0 as published by the Free Software
|
||||
** Foundation and appearing in the file LICENSE.GPL included in the
|
||||
** packaging of this file. Please review the following information to
|
||||
** ensure the GNU General Public License version 3.0 requirements will be
|
||||
** met: http://www.gnu.org/copyleft/gpl.html.
|
||||
**
|
||||
** $QT_END_LICENSE$
|
||||
**
|
||||
****************************************************************************/
|
||||
|
||||
#ifndef QDRAWHELPER_NEON_P_H
|
||||
#define QDRAWHELPER_NEON_P_H
|
||||
|
||||
//
|
||||
// W A R N I N G
|
||||
// -------------
|
||||
//
|
||||
// This file is not part of the Qt API. It exists purely as an
|
||||
// implementation detail. This header file may change from version to
|
||||
// version without notice, or even be removed.
|
||||
//
|
||||
// We mean it.
|
||||
//
|
||||
|
||||
#include <qdrawhelper_p.h>
|
||||
|
||||
QT_BEGIN_NAMESPACE
|
||||
|
||||
#ifdef QT_HAVE_NEON
|
||||
|
||||
void qt_blend_argb32_on_argb32_neon(uchar *destPixels, int dbpl,
|
||||
const uchar *srcPixels, int sbpl,
|
||||
int w, int h,
|
||||
int const_alpha);
|
||||
|
||||
void qt_blend_rgb32_on_rgb32_neon(uchar *destPixels, int dbpl,
|
||||
const uchar *srcPixels, int sbpl,
|
||||
int w, int h,
|
||||
int const_alpha);
|
||||
|
||||
void qt_blend_argb32_on_rgb16_neon(uchar *destPixels, int dbpl,
|
||||
const uchar *srcPixels, int sbpl,
|
||||
int w, int h,
|
||||
int const_alpha);
|
||||
|
||||
void qt_blend_argb32_on_argb32_scanline_neon(uint *dest,
|
||||
const uint *src,
|
||||
int length,
|
||||
uint const_alpha);
|
||||
|
||||
void qt_blend_rgb16_on_argb32_neon(uchar *destPixels, int dbpl,
|
||||
const uchar *srcPixels, int sbpl,
|
||||
int w, int h,
|
||||
int const_alpha);
|
||||
|
||||
void qt_blend_rgb16_on_rgb16_neon(uchar *destPixels, int dbpl,
|
||||
const uchar *srcPixels, int sbpl,
|
||||
int w, int h,
|
||||
int const_alpha);
|
||||
|
||||
void qt_alphamapblit_quint16_neon(QRasterBuffer *rasterBuffer,
|
||||
int x, int y, quint32 color,
|
||||
const uchar *bitmap,
|
||||
int mapWidth, int mapHeight, int mapStride,
|
||||
const QClipData *clip);
|
||||
|
||||
void qt_scale_image_argb32_on_rgb16_neon(uchar *destPixels, int dbpl,
|
||||
const uchar *srcPixels, int sbpl, int sh,
|
||||
const QRectF &targetRect,
|
||||
const QRectF &sourceRect,
|
||||
const QRect &clip,
|
||||
int const_alpha);
|
||||
|
||||
void qt_scale_image_rgb16_on_rgb16_neon(uchar *destPixels, int dbpl,
|
||||
const uchar *srcPixels, int sbpl, int sh,
|
||||
const QRectF &targetRect,
|
||||
const QRectF &sourceRect,
|
||||
const QRect &clip,
|
||||
int const_alpha);
|
||||
|
||||
void qt_transform_image_argb32_on_rgb16_neon(uchar *destPixels, int dbpl,
|
||||
const uchar *srcPixels, int sbpl,
|
||||
const QRectF &targetRect,
|
||||
const QRectF &sourceRect,
|
||||
const QRect &clip,
|
||||
const QTransform &targetRectTransform,
|
||||
int const_alpha);
|
||||
|
||||
void qt_transform_image_rgb16_on_rgb16_neon(uchar *destPixels, int dbpl,
|
||||
const uchar *srcPixels, int sbpl,
|
||||
const QRectF &targetRect,
|
||||
const QRectF &sourceRect,
|
||||
const QRect &clip,
|
||||
const QTransform &targetRectTransform,
|
||||
int const_alpha);
|
||||
|
||||
void qt_memfill32_neon(quint32 *dest, quint32 value, int count);
|
||||
void qt_memrotate90_16_neon(const uchar *srcPixels, int w, int h, int sbpl, uchar *destPixels, int dbpl);
|
||||
void qt_memrotate270_16_neon(const uchar *srcPixels, int w, int h, int sbpl, uchar *destPixels, int dbpl);
|
||||
|
||||
uint * QT_FASTCALL qt_destFetchRGB16_neon(uint *buffer,
|
||||
QRasterBuffer *rasterBuffer,
|
||||
int x, int y, int length);
|
||||
|
||||
void QT_FASTCALL qt_destStoreRGB16_neon(QRasterBuffer *rasterBuffer,
|
||||
int x, int y, const uint *buffer, int length);
|
||||
|
||||
void QT_FASTCALL comp_func_solid_SourceOver_neon(uint *destPixels, int length, uint color, uint const_alpha);
|
||||
void QT_FASTCALL comp_func_Plus_neon(uint *dst, const uint *src, int length, uint const_alpha);
|
||||
|
||||
#endif // QT_HAVE_NEON
|
||||
|
||||
QT_END_NAMESPACE
|
||||
|
||||
#endif // QDRAWHELPER_NEON_P_H
|
|
@ -62,7 +62,6 @@
|
|||
#define QT_FT_END_HEADER
|
||||
#endif
|
||||
#include "qrasterdefs_p.h"
|
||||
#include <qsimd_p.h>
|
||||
#include <qmath_p.h>
|
||||
|
||||
QT_BEGIN_NAMESPACE
|
||||
|
|
|
@ -1,172 +0,0 @@
|
|||
/****************************************************************************
|
||||
**
|
||||
** Copyright (C) 2015 The Qt Company Ltd.
|
||||
** Contact: http://www.qt.io/licensing/
|
||||
**
|
||||
** This file is part of the QtGui module of the Qt Toolkit.
|
||||
**
|
||||
** $QT_BEGIN_LICENSE:LGPL$
|
||||
** Commercial License Usage
|
||||
** Licensees holding valid commercial Qt licenses may use this file in
|
||||
** accordance with the commercial license agreement provided with the
|
||||
** Software or, alternatively, in accordance with the terms contained in
|
||||
** a written agreement between you and The Qt Company. For licensing terms
|
||||
** and conditions see http://www.qt.io/terms-conditions. For further
|
||||
** information use the contact form at http://www.qt.io/contact-us.
|
||||
**
|
||||
** GNU Lesser General Public License Usage
|
||||
** Alternatively, this file may be used under the terms of the GNU Lesser
|
||||
** General Public License version 2.1 or version 3 as published by the Free
|
||||
** Software Foundation and appearing in the file LICENSE.LGPLv21 and
|
||||
** LICENSE.LGPLv3 included in the packaging of this file. Please review the
|
||||
** following information to ensure the GNU Lesser General Public License
|
||||
** requirements will be met: https://www.gnu.org/licenses/lgpl.html and
|
||||
** http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
|
||||
**
|
||||
** As a special exception, The Qt Company gives you certain additional
|
||||
** rights. These rights are described in The Qt Company LGPL Exception
|
||||
** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
|
||||
**
|
||||
** GNU General Public License Usage
|
||||
** Alternatively, this file may be used under the terms of the GNU
|
||||
** General Public License version 3.0 as published by the Free Software
|
||||
** Foundation and appearing in the file LICENSE.GPL included in the
|
||||
** packaging of this file. Please review the following information to
|
||||
** ensure the GNU General Public License version 3.0 requirements will be
|
||||
** met: http://www.gnu.org/copyleft/gpl.html.
|
||||
**
|
||||
** $QT_END_LICENSE$
|
||||
**
|
||||
****************************************************************************/
|
||||
|
||||
#include <qdrawhelper_p.h>
|
||||
|
||||
#ifdef QT_HAVE_SSE
|
||||
|
||||
#include <qdrawhelper_sse_p.h>
|
||||
|
||||
QT_BEGIN_NAMESPACE
|
||||
|
||||
CompositionFunctionSolid qt_functionForModeSolid_SSE[numCompositionFunctions] = {
|
||||
comp_func_solid_SourceOver<QSSEIntrinsics>,
|
||||
comp_func_solid_DestinationOver<QSSEIntrinsics>,
|
||||
comp_func_solid_Clear<QSSEIntrinsics>,
|
||||
comp_func_solid_Source<QSSEIntrinsics>,
|
||||
0,
|
||||
comp_func_solid_SourceIn<QSSEIntrinsics>,
|
||||
comp_func_solid_DestinationIn<QSSEIntrinsics>,
|
||||
comp_func_solid_SourceOut<QSSEIntrinsics>,
|
||||
comp_func_solid_DestinationOut<QSSEIntrinsics>,
|
||||
comp_func_solid_SourceAtop<QSSEIntrinsics>,
|
||||
comp_func_solid_DestinationAtop<QSSEIntrinsics>,
|
||||
comp_func_solid_XOR<QSSEIntrinsics>,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // svg 1.2 modes
|
||||
rasterop_solid_SourceOrDestination<QMMXIntrinsics>,
|
||||
rasterop_solid_SourceAndDestination<QMMXIntrinsics>,
|
||||
rasterop_solid_SourceXorDestination<QMMXIntrinsics>,
|
||||
rasterop_solid_NotSourceAndNotDestination<QMMXIntrinsics>,
|
||||
rasterop_solid_NotSourceOrNotDestination<QMMXIntrinsics>,
|
||||
rasterop_solid_NotSourceXorDestination<QMMXIntrinsics>,
|
||||
rasterop_solid_NotSource<QMMXIntrinsics>,
|
||||
rasterop_solid_NotSourceAndDestination<QMMXIntrinsics>,
|
||||
rasterop_solid_SourceAndNotDestination<QMMXIntrinsics>
|
||||
};
|
||||
|
||||
CompositionFunction qt_functionForMode_SSE[numCompositionFunctions] = {
|
||||
comp_func_SourceOver<QSSEIntrinsics>,
|
||||
comp_func_DestinationOver<QSSEIntrinsics>,
|
||||
comp_func_Clear<QSSEIntrinsics>,
|
||||
comp_func_Source<QSSEIntrinsics>,
|
||||
comp_func_Destination,
|
||||
comp_func_SourceIn<QSSEIntrinsics>,
|
||||
comp_func_DestinationIn<QSSEIntrinsics>,
|
||||
comp_func_SourceOut<QSSEIntrinsics>,
|
||||
comp_func_DestinationOut<QSSEIntrinsics>,
|
||||
comp_func_SourceAtop<QSSEIntrinsics>,
|
||||
comp_func_DestinationAtop<QSSEIntrinsics>,
|
||||
comp_func_XOR<QSSEIntrinsics>,
|
||||
comp_func_Plus,
|
||||
comp_func_Multiply,
|
||||
comp_func_Screen,
|
||||
comp_func_Overlay,
|
||||
comp_func_Darken,
|
||||
comp_func_Lighten,
|
||||
comp_func_ColorDodge,
|
||||
comp_func_ColorBurn,
|
||||
comp_func_HardLight,
|
||||
comp_func_SoftLight,
|
||||
comp_func_Difference,
|
||||
comp_func_Exclusion,
|
||||
rasterop_SourceOrDestination,
|
||||
rasterop_SourceAndDestination,
|
||||
rasterop_SourceXorDestination,
|
||||
rasterop_NotSourceAndNotDestination,
|
||||
rasterop_NotSourceOrNotDestination,
|
||||
rasterop_NotSourceXorDestination,
|
||||
rasterop_NotSource,
|
||||
rasterop_NotSourceAndDestination,
|
||||
rasterop_SourceAndNotDestination
|
||||
};
|
||||
|
||||
void qt_blend_color_argb_sse(int count, const QSpan *spans, void *userData)
|
||||
{
|
||||
qt_blend_color_argb_x86<QSSEIntrinsics>(count, spans, userData,
|
||||
(CompositionFunctionSolid*)qt_functionForModeSolid_SSE);
|
||||
}
|
||||
|
||||
void qt_memfill32_sse(quint32 *dest, quint32 value, int count)
|
||||
{
|
||||
return qt_memfill32_sse_template<QSSEIntrinsics>(dest, value, count);
|
||||
}
|
||||
|
||||
void qt_bitmapblit16_sse(QRasterBuffer *rasterBuffer, int x, int y,
|
||||
quint32 color,
|
||||
const uchar *src,
|
||||
int width, int height, int stride)
|
||||
{
|
||||
return qt_bitmapblit16_sse_template<QSSEIntrinsics>(rasterBuffer, x,y,
|
||||
color, src, width,
|
||||
height, stride);
|
||||
}
|
||||
|
||||
void qt_blend_argb32_on_argb32_sse(uchar *destPixels, int dbpl,
|
||||
const uchar *srcPixels, int sbpl,
|
||||
int w, int h,
|
||||
int const_alpha)
|
||||
{
|
||||
const uint *src = (const uint *) srcPixels;
|
||||
uint *dst = (uint *) destPixels;
|
||||
|
||||
uint ca = const_alpha - 1;
|
||||
|
||||
for (int y=0; y<h; ++y) {
|
||||
comp_func_SourceOver<QSSEIntrinsics>(dst, src, w, ca);
|
||||
dst = (quint32 *)(((uchar *) dst) + dbpl);
|
||||
src = (const quint32 *)(((const uchar *) src) + sbpl);
|
||||
}
|
||||
}
|
||||
|
||||
void qt_blend_rgb32_on_rgb32_sse(uchar *destPixels, int dbpl,
|
||||
const uchar *srcPixels, int sbpl,
|
||||
int w, int h,
|
||||
int const_alpha)
|
||||
{
|
||||
const uint *src = (const uint *) srcPixels;
|
||||
uint *dst = (uint *) destPixels;
|
||||
|
||||
uint ca = const_alpha - 1;
|
||||
|
||||
for (int y=0; y<h; ++y) {
|
||||
comp_func_Source<QSSEIntrinsics>(dst, src, w, ca);
|
||||
dst = (quint32 *)(((uchar *) dst) + dbpl);
|
||||
src = (const quint32 *)(((const uchar *) src) + sbpl);
|
||||
}
|
||||
}
|
||||
|
||||
QT_END_NAMESPACE
|
||||
|
||||
#endif // QT_HAVE_SSE
|
||||
|
||||
|
||||
|
||||
|
|
@ -1,547 +0,0 @@
|
|||
/****************************************************************************
|
||||
**
|
||||
** Copyright (C) 2015 The Qt Company Ltd.
|
||||
** Contact: http://www.qt.io/licensing/
|
||||
**
|
||||
** This file is part of the QtGui module of the Qt Toolkit.
|
||||
**
|
||||
** $QT_BEGIN_LICENSE:LGPL$
|
||||
** Commercial License Usage
|
||||
** Licensees holding valid commercial Qt licenses may use this file in
|
||||
** accordance with the commercial license agreement provided with the
|
||||
** Software or, alternatively, in accordance with the terms contained in
|
||||
** a written agreement between you and The Qt Company. For licensing terms
|
||||
** and conditions see http://www.qt.io/terms-conditions. For further
|
||||
** information use the contact form at http://www.qt.io/contact-us.
|
||||
**
|
||||
** GNU Lesser General Public License Usage
|
||||
** Alternatively, this file may be used under the terms of the GNU Lesser
|
||||
** General Public License version 2.1 or version 3 as published by the Free
|
||||
** Software Foundation and appearing in the file LICENSE.LGPLv21 and
|
||||
** LICENSE.LGPLv3 included in the packaging of this file. Please review the
|
||||
** following information to ensure the GNU Lesser General Public License
|
||||
** requirements will be met: https://www.gnu.org/licenses/lgpl.html and
|
||||
** http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
|
||||
**
|
||||
** As a special exception, The Qt Company gives you certain additional
|
||||
** rights. These rights are described in The Qt Company LGPL Exception
|
||||
** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
|
||||
**
|
||||
** GNU General Public License Usage
|
||||
** Alternatively, this file may be used under the terms of the GNU
|
||||
** General Public License version 3.0 as published by the Free Software
|
||||
** Foundation and appearing in the file LICENSE.GPL included in the
|
||||
** packaging of this file. Please review the following information to
|
||||
** ensure the GNU General Public License version 3.0 requirements will be
|
||||
** met: http://www.gnu.org/copyleft/gpl.html.
|
||||
**
|
||||
** $QT_END_LICENSE$
|
||||
**
|
||||
****************************************************************************/
|
||||
|
||||
#include <qdrawhelper_x86_p.h>
|
||||
|
||||
#ifdef QT_HAVE_SSE2
|
||||
|
||||
#include <qdrawingprimitive_sse2_p.h>
|
||||
#include <qpaintengine_raster_p.h>
|
||||
|
||||
QT_BEGIN_NAMESPACE
|
||||
|
||||
void qt_blend_argb32_on_argb32_sse2(uchar *destPixels, int dbpl,
|
||||
const uchar *srcPixels, int sbpl,
|
||||
int w, int h,
|
||||
int const_alpha)
|
||||
{
|
||||
const quint32 *src = (const quint32 *) srcPixels;
|
||||
quint32 *dst = (quint32 *) destPixels;
|
||||
if (const_alpha == 256) {
|
||||
const __m128i alphaMask = _mm_set1_epi32(0xff000000);
|
||||
const __m128i nullVector = _mm_set1_epi32(0);
|
||||
const __m128i half = _mm_set1_epi16(0x80);
|
||||
const __m128i one = _mm_set1_epi16(0xff);
|
||||
const __m128i colorMask = _mm_set1_epi32(0x00ff00ff);
|
||||
for (int y = 0; y < h; ++y) {
|
||||
BLEND_SOURCE_OVER_ARGB32_SSE2(dst, src, w, nullVector, half, one, colorMask, alphaMask);
|
||||
dst = (quint32 *)(((uchar *) dst) + dbpl);
|
||||
src = (const quint32 *)(((const uchar *) src) + sbpl);
|
||||
}
|
||||
} else if (const_alpha != 0) {
|
||||
// dest = (s + d * sia) * ca + d * cia
|
||||
// = s * ca + d * (sia * ca + cia)
|
||||
// = s * ca + d * (1 - sa*ca)
|
||||
const_alpha = (const_alpha * 255) >> 8;
|
||||
const __m128i nullVector = _mm_set1_epi32(0);
|
||||
const __m128i half = _mm_set1_epi16(0x80);
|
||||
const __m128i one = _mm_set1_epi16(0xff);
|
||||
const __m128i colorMask = _mm_set1_epi32(0x00ff00ff);
|
||||
const __m128i constAlphaVector = _mm_set1_epi16(const_alpha);
|
||||
for (int y = 0; y < h; ++y) {
|
||||
BLEND_SOURCE_OVER_ARGB32_WITH_CONST_ALPHA_SSE2(dst, src, w, nullVector, half, one, colorMask, constAlphaVector)
|
||||
dst = (quint32 *)(((uchar *) dst) + dbpl);
|
||||
src = (const quint32 *)(((const uchar *) src) + sbpl);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// qblendfunctions.cpp
|
||||
void qt_blend_rgb32_on_rgb32(uchar *destPixels, int dbpl,
|
||||
const uchar *srcPixels, int sbpl,
|
||||
int w, int h,
|
||||
int const_alpha);
|
||||
|
||||
void qt_blend_rgb32_on_rgb32_sse2(uchar *destPixels, int dbpl,
|
||||
const uchar *srcPixels, int sbpl,
|
||||
int w, int h,
|
||||
int const_alpha)
|
||||
{
|
||||
const quint32 *src = (const quint32 *) srcPixels;
|
||||
quint32 *dst = (quint32 *) destPixels;
|
||||
if (const_alpha != 256) {
|
||||
if (const_alpha != 0) {
|
||||
const __m128i nullVector = _mm_set1_epi32(0);
|
||||
const __m128i half = _mm_set1_epi16(0x80);
|
||||
const __m128i colorMask = _mm_set1_epi32(0x00ff00ff);
|
||||
|
||||
const_alpha = (const_alpha * 255) >> 8;
|
||||
int one_minus_const_alpha = 255 - const_alpha;
|
||||
const __m128i constAlphaVector = _mm_set1_epi16(const_alpha);
|
||||
const __m128i oneMinusConstAlpha = _mm_set1_epi16(one_minus_const_alpha);
|
||||
for (int y = 0; y < h; ++y) {
|
||||
int x = 0;
|
||||
|
||||
// First, align dest to 16 bytes:
|
||||
ALIGNMENT_PROLOGUE_16BYTES(dst, x, w) {
|
||||
dst[x] = INTERPOLATE_PIXEL_255(src[x], const_alpha, dst[x], one_minus_const_alpha);
|
||||
}
|
||||
|
||||
for (; x < w-3; x += 4) {
|
||||
__m128i srcVector = _mm_loadu_si128((__m128i *)&src[x]);
|
||||
if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVector, nullVector)) != 0xffff) {
|
||||
const __m128i dstVector = _mm_load_si128((__m128i *)&dst[x]);
|
||||
__m128i result;
|
||||
INTERPOLATE_PIXEL_255_SSE2(result, srcVector, dstVector, constAlphaVector, oneMinusConstAlpha, colorMask, half);
|
||||
_mm_store_si128((__m128i *)&dst[x], result);
|
||||
}
|
||||
}
|
||||
for (; x<w; ++x) {
|
||||
dst[x] = INTERPOLATE_PIXEL_255(src[x], const_alpha, dst[x], one_minus_const_alpha);
|
||||
}
|
||||
dst = (quint32 *)(((uchar *) dst) + dbpl);
|
||||
src = (const quint32 *)(((const uchar *) src) + sbpl);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
qt_blend_rgb32_on_rgb32(destPixels, dbpl, srcPixels, sbpl, w, h, const_alpha);
|
||||
}
|
||||
}
|
||||
|
||||
void QT_FASTCALL comp_func_SourceOver_sse2(uint *destPixels, const uint *srcPixels, int length, uint const_alpha)
|
||||
{
|
||||
Q_ASSERT(const_alpha < 256);
|
||||
|
||||
const quint32 *src = (const quint32 *) srcPixels;
|
||||
quint32 *dst = (quint32 *) destPixels;
|
||||
|
||||
const __m128i nullVector = _mm_set1_epi32(0);
|
||||
const __m128i half = _mm_set1_epi16(0x80);
|
||||
const __m128i one = _mm_set1_epi16(0xff);
|
||||
const __m128i colorMask = _mm_set1_epi32(0x00ff00ff);
|
||||
if (const_alpha == 255) {
|
||||
const __m128i alphaMask = _mm_set1_epi32(0xff000000);
|
||||
BLEND_SOURCE_OVER_ARGB32_SSE2(dst, src, length, nullVector, half, one, colorMask, alphaMask);
|
||||
} else {
|
||||
const __m128i constAlphaVector = _mm_set1_epi16(const_alpha);
|
||||
BLEND_SOURCE_OVER_ARGB32_WITH_CONST_ALPHA_SSE2(dst, src, length, nullVector, half, one, colorMask, constAlphaVector);
|
||||
}
|
||||
}
|
||||
|
||||
void QT_FASTCALL comp_func_Plus_sse2(uint *dst, const uint *src, int length, uint const_alpha)
|
||||
{
|
||||
int x = 0;
|
||||
|
||||
if (const_alpha == 255) {
|
||||
// 1) Prologue: align destination on 16 bytes
|
||||
ALIGNMENT_PROLOGUE_16BYTES(dst, x, length)
|
||||
dst[x] = comp_func_Plus_one_pixel(dst[x], src[x]);
|
||||
|
||||
// 2) composition with SSE2
|
||||
for (; x < length - 3; x += 4) {
|
||||
const __m128i srcVector = _mm_loadu_si128((__m128i *)&src[x]);
|
||||
const __m128i dstVector = _mm_load_si128((__m128i *)&dst[x]);
|
||||
|
||||
const __m128i result = _mm_adds_epu8(srcVector, dstVector);
|
||||
_mm_store_si128((__m128i *)&dst[x], result);
|
||||
}
|
||||
|
||||
// 3) Epilogue:
|
||||
for (; x < length; ++x)
|
||||
dst[x] = comp_func_Plus_one_pixel(dst[x], src[x]);
|
||||
} else {
|
||||
const int one_minus_const_alpha = 255 - const_alpha;
|
||||
const __m128i constAlphaVector = _mm_set1_epi16(const_alpha);
|
||||
const __m128i oneMinusConstAlpha = _mm_set1_epi16(one_minus_const_alpha);
|
||||
|
||||
// 1) Prologue: align destination on 16 bytes
|
||||
ALIGNMENT_PROLOGUE_16BYTES(dst, x, length)
|
||||
dst[x] = comp_func_Plus_one_pixel_const_alpha(dst[x], src[x], const_alpha, one_minus_const_alpha);
|
||||
|
||||
const __m128i half = _mm_set1_epi16(0x80);
|
||||
const __m128i colorMask = _mm_set1_epi32(0x00ff00ff);
|
||||
// 2) composition with SSE2
|
||||
for (; x < length - 3; x += 4) {
|
||||
const __m128i srcVector = _mm_loadu_si128((__m128i *)&src[x]);
|
||||
const __m128i dstVector = _mm_load_si128((__m128i *)&dst[x]);
|
||||
|
||||
__m128i result = _mm_adds_epu8(srcVector, dstVector);
|
||||
INTERPOLATE_PIXEL_255_SSE2(result, result, dstVector, constAlphaVector, oneMinusConstAlpha, colorMask, half)
|
||||
_mm_store_si128((__m128i *)&dst[x], result);
|
||||
}
|
||||
|
||||
// 3) Epilogue:
|
||||
for (; x < length; ++x)
|
||||
dst[x] = comp_func_Plus_one_pixel_const_alpha(dst[x], src[x], const_alpha, one_minus_const_alpha);
|
||||
}
|
||||
}
|
||||
|
||||
void QT_FASTCALL comp_func_Source_sse2(uint *dst, const uint *src, int length, uint const_alpha)
|
||||
{
|
||||
if (const_alpha == 255) {
|
||||
::memcpy(dst, src, length * sizeof(uint));
|
||||
} else {
|
||||
const int ialpha = 255 - const_alpha;
|
||||
|
||||
int x = 0;
|
||||
|
||||
// 1) prologue, align on 16 bytes
|
||||
ALIGNMENT_PROLOGUE_16BYTES(dst, x, length)
|
||||
dst[x] = INTERPOLATE_PIXEL_255(src[x], const_alpha, dst[x], ialpha);
|
||||
|
||||
// 2) interpolate pixels with SSE2
|
||||
const __m128i half = _mm_set1_epi16(0x80);
|
||||
const __m128i colorMask = _mm_set1_epi32(0x00ff00ff);
|
||||
const __m128i constAlphaVector = _mm_set1_epi16(const_alpha);
|
||||
const __m128i oneMinusConstAlpha = _mm_set1_epi16(ialpha);
|
||||
for (; x < length - 3; x += 4) {
|
||||
const __m128i srcVector = _mm_loadu_si128((__m128i *)&src[x]);
|
||||
__m128i dstVector = _mm_load_si128((__m128i *)&dst[x]);
|
||||
INTERPOLATE_PIXEL_255_SSE2(dstVector, srcVector, dstVector, constAlphaVector, oneMinusConstAlpha, colorMask, half)
|
||||
_mm_store_si128((__m128i *)&dst[x], dstVector);
|
||||
}
|
||||
|
||||
// 3) Epilogue
|
||||
for (; x < length; ++x)
|
||||
dst[x] = INTERPOLATE_PIXEL_255(src[x], const_alpha, dst[x], ialpha);
|
||||
}
|
||||
}
|
||||
|
||||
void qt_memfill32_sse2(quint32 *dest, quint32 value, int count)
|
||||
{
|
||||
if (count < 7) {
|
||||
switch (count) {
|
||||
case 6: *dest++ = value;
|
||||
case 5: *dest++ = value;
|
||||
case 4: *dest++ = value;
|
||||
case 3: *dest++ = value;
|
||||
case 2: *dest++ = value;
|
||||
case 1: *dest = value;
|
||||
}
|
||||
return;
|
||||
};
|
||||
|
||||
const int align = (quintptr)(dest) & 0xf;
|
||||
switch (align) {
|
||||
case 4: *dest++ = value; --count;
|
||||
case 8: *dest++ = value; --count;
|
||||
case 12: *dest++ = value; --count;
|
||||
}
|
||||
|
||||
int count128 = count / 4;
|
||||
__m128i *dst128 = reinterpret_cast<__m128i*>(dest);
|
||||
const __m128i value128 = _mm_set_epi32(value, value, value, value);
|
||||
|
||||
int n = (count128 + 3) / 4;
|
||||
switch (count128 & 0x3) {
|
||||
case 0: do { _mm_stream_si128(dst128++, value128);
|
||||
case 3: _mm_stream_si128(dst128++, value128);
|
||||
case 2: _mm_stream_si128(dst128++, value128);
|
||||
case 1: _mm_stream_si128(dst128++, value128);
|
||||
} while (--n > 0);
|
||||
}
|
||||
|
||||
const int rest = count & 0x3;
|
||||
if (rest) {
|
||||
switch (rest) {
|
||||
case 3: dest[count - 3] = value;
|
||||
case 2: dest[count - 2] = value;
|
||||
case 1: dest[count - 1] = value;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void QT_FASTCALL comp_func_solid_SourceOver_sse2(uint *destPixels, int length, uint color, uint const_alpha)
|
||||
{
|
||||
if ((const_alpha & qAlpha(color)) == 255) {
|
||||
qt_memfill32_sse2(destPixels, color, length);
|
||||
} else {
|
||||
if (const_alpha != 255)
|
||||
color = BYTE_MUL(color, const_alpha);
|
||||
|
||||
const quint32 minusAlphaOfColor = qAlpha(~color);
|
||||
int x = 0;
|
||||
|
||||
quint32 *dst = (quint32 *) destPixels;
|
||||
const __m128i colorVector = _mm_set1_epi32(color);
|
||||
const __m128i colorMask = _mm_set1_epi32(0x00ff00ff);
|
||||
const __m128i half = _mm_set1_epi16(0x80);
|
||||
const __m128i minusAlphaOfColorVector = _mm_set1_epi16(minusAlphaOfColor);
|
||||
|
||||
ALIGNMENT_PROLOGUE_16BYTES(dst, x, length)
|
||||
destPixels[x] = color + BYTE_MUL(destPixels[x], minusAlphaOfColor);
|
||||
|
||||
for (; x < length-3; x += 4) {
|
||||
__m128i dstVector = _mm_load_si128((__m128i *)&dst[x]);
|
||||
BYTE_MUL_SSE2(dstVector, dstVector, minusAlphaOfColorVector, colorMask, half);
|
||||
dstVector = _mm_add_epi8(colorVector, dstVector);
|
||||
_mm_store_si128((__m128i *)&dst[x], dstVector);
|
||||
}
|
||||
for (;x < length; ++x)
|
||||
destPixels[x] = color + BYTE_MUL(destPixels[x], minusAlphaOfColor);
|
||||
}
|
||||
}
|
||||
|
||||
CompositionFunctionSolid qt_functionForModeSolid_onlySSE2[numCompositionFunctions] = {
|
||||
comp_func_solid_SourceOver_sse2,
|
||||
comp_func_solid_DestinationOver,
|
||||
comp_func_solid_Clear,
|
||||
comp_func_solid_Source,
|
||||
comp_func_solid_Destination,
|
||||
comp_func_solid_SourceIn,
|
||||
comp_func_solid_DestinationIn,
|
||||
comp_func_solid_SourceOut,
|
||||
comp_func_solid_DestinationOut,
|
||||
comp_func_solid_SourceAtop,
|
||||
comp_func_solid_DestinationAtop,
|
||||
comp_func_solid_XOR,
|
||||
comp_func_solid_Plus,
|
||||
comp_func_solid_Multiply,
|
||||
comp_func_solid_Screen,
|
||||
comp_func_solid_Overlay,
|
||||
comp_func_solid_Darken,
|
||||
comp_func_solid_Lighten,
|
||||
comp_func_solid_ColorDodge,
|
||||
comp_func_solid_ColorBurn,
|
||||
comp_func_solid_HardLight,
|
||||
comp_func_solid_SoftLight,
|
||||
comp_func_solid_Difference,
|
||||
comp_func_solid_Exclusion,
|
||||
rasterop_solid_SourceOrDestination,
|
||||
rasterop_solid_SourceAndDestination,
|
||||
rasterop_solid_SourceXorDestination,
|
||||
rasterop_solid_NotSourceAndNotDestination,
|
||||
rasterop_solid_NotSourceOrNotDestination,
|
||||
rasterop_solid_NotSourceXorDestination,
|
||||
rasterop_solid_NotSource,
|
||||
rasterop_solid_NotSourceAndDestination,
|
||||
rasterop_solid_SourceAndNotDestination
|
||||
};
|
||||
|
||||
CompositionFunction qt_functionForMode_onlySSE2[numCompositionFunctions] = {
|
||||
comp_func_SourceOver_sse2,
|
||||
comp_func_DestinationOver,
|
||||
comp_func_Clear,
|
||||
comp_func_Source_sse2,
|
||||
comp_func_Destination,
|
||||
comp_func_SourceIn,
|
||||
comp_func_DestinationIn,
|
||||
comp_func_SourceOut,
|
||||
comp_func_DestinationOut,
|
||||
comp_func_SourceAtop,
|
||||
comp_func_DestinationAtop,
|
||||
comp_func_XOR,
|
||||
comp_func_Plus_sse2,
|
||||
comp_func_Multiply,
|
||||
comp_func_Screen,
|
||||
comp_func_Overlay,
|
||||
comp_func_Darken,
|
||||
comp_func_Lighten,
|
||||
comp_func_ColorDodge,
|
||||
comp_func_ColorBurn,
|
||||
comp_func_HardLight,
|
||||
comp_func_SoftLight,
|
||||
comp_func_Difference,
|
||||
comp_func_Exclusion,
|
||||
rasterop_SourceOrDestination,
|
||||
rasterop_SourceAndDestination,
|
||||
rasterop_SourceXorDestination,
|
||||
rasterop_NotSourceAndNotDestination,
|
||||
rasterop_NotSourceOrNotDestination,
|
||||
rasterop_NotSourceXorDestination,
|
||||
rasterop_NotSource,
|
||||
rasterop_NotSourceAndDestination,
|
||||
rasterop_SourceAndNotDestination
|
||||
};
|
||||
|
||||
void qt_memfill16_sse2(quint16 *dest, quint16 value, int count)
|
||||
{
|
||||
if (count < 3) {
|
||||
switch (count) {
|
||||
case 2: *dest++ = value;
|
||||
case 1: *dest = value;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
const int align = (quintptr)(dest) & 0x3;
|
||||
switch (align) {
|
||||
case 2: *dest++ = value; --count;
|
||||
}
|
||||
|
||||
const quint32 value32 = (value << 16) | value;
|
||||
qt_memfill32_sse2(reinterpret_cast<quint32*>(dest), value32, count / 2);
|
||||
|
||||
if (count & 0x1)
|
||||
dest[count - 1] = value;
|
||||
}
|
||||
|
||||
void qt_bitmapblit32_sse2(QRasterBuffer *rasterBuffer, int x, int y,
|
||||
quint32 color,
|
||||
const uchar *src, int width, int height, int stride)
|
||||
{
|
||||
quint32 *dest = reinterpret_cast<quint32*>(rasterBuffer->scanLine(y)) + x;
|
||||
const int destStride = rasterBuffer->bytesPerLine() / sizeof(quint32);
|
||||
|
||||
const __m128i c128 = _mm_set1_epi32(color);
|
||||
const __m128i maskmask1 = _mm_set_epi32(0x10101010, 0x20202020,
|
||||
0x40404040, 0x80808080);
|
||||
const __m128i maskadd1 = _mm_set_epi32(0x70707070, 0x60606060,
|
||||
0x40404040, 0x00000000);
|
||||
|
||||
if (width > 4) {
|
||||
const __m128i maskmask2 = _mm_set_epi32(0x01010101, 0x02020202,
|
||||
0x04040404, 0x08080808);
|
||||
const __m128i maskadd2 = _mm_set_epi32(0x7f7f7f7f, 0x7e7e7e7e,
|
||||
0x7c7c7c7c, 0x78787878);
|
||||
while (height--) {
|
||||
for (int x = 0; x < width; x += 8) {
|
||||
const quint8 s = src[x >> 3];
|
||||
if (!s)
|
||||
continue;
|
||||
__m128i mask1 = _mm_set1_epi8(s);
|
||||
__m128i mask2 = mask1;
|
||||
|
||||
mask1 = _mm_and_si128(mask1, maskmask1);
|
||||
mask1 = _mm_add_epi8(mask1, maskadd1);
|
||||
_mm_maskmoveu_si128(c128, mask1, (char*)(dest + x));
|
||||
mask2 = _mm_and_si128(mask2, maskmask2);
|
||||
mask2 = _mm_add_epi8(mask2, maskadd2);
|
||||
_mm_maskmoveu_si128(c128, mask2, (char*)(dest + x + 4));
|
||||
}
|
||||
dest += destStride;
|
||||
src += stride;
|
||||
}
|
||||
} else {
|
||||
while (height--) {
|
||||
const quint8 s = *src;
|
||||
if (s) {
|
||||
__m128i mask1 = _mm_set1_epi8(s);
|
||||
mask1 = _mm_and_si128(mask1, maskmask1);
|
||||
mask1 = _mm_add_epi8(mask1, maskadd1);
|
||||
_mm_maskmoveu_si128(c128, mask1, (char*)(dest));
|
||||
}
|
||||
dest += destStride;
|
||||
src += stride;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void qt_bitmapblit16_sse2(QRasterBuffer *rasterBuffer, int x, int y,
|
||||
quint32 color,
|
||||
const uchar *src, int width, int height, int stride)
|
||||
{
|
||||
const quint16 c = qt_colorConvert<quint16, quint32>(color, 0);
|
||||
quint16 *dest = reinterpret_cast<quint16*>(rasterBuffer->scanLine(y)) + x;
|
||||
const int destStride = rasterBuffer->bytesPerLine() / sizeof(quint16);
|
||||
|
||||
const __m128i c128 = _mm_set1_epi16(c);
|
||||
#if defined(Q_CC_MSVC)
|
||||
# pragma warning(disable: 4309) // truncation of constant value
|
||||
#endif
|
||||
const __m128i maskmask = _mm_set_epi16(0x0101, 0x0202, 0x0404, 0x0808,
|
||||
0x1010, 0x2020, 0x4040, 0x8080);
|
||||
const __m128i maskadd = _mm_set_epi16(0x7f7f, 0x7e7e, 0x7c7c, 0x7878,
|
||||
0x7070, 0x6060, 0x4040, 0x0000);
|
||||
|
||||
while (height--) {
|
||||
for (int x = 0; x < width; x += 8) {
|
||||
const quint8 s = src[x >> 3];
|
||||
if (!s)
|
||||
continue;
|
||||
__m128i mask = _mm_set1_epi8(s);
|
||||
mask = _mm_and_si128(mask, maskmask);
|
||||
mask = _mm_add_epi8(mask, maskadd);
|
||||
_mm_maskmoveu_si128(c128, mask, (char*)(dest + x));
|
||||
}
|
||||
dest += destStride;
|
||||
src += stride;
|
||||
}
|
||||
}
|
||||
|
||||
class QSimdSse2
|
||||
{
|
||||
public:
|
||||
typedef __m128i Int32x4;
|
||||
typedef __m128 Float32x4;
|
||||
|
||||
union Vect_buffer_i { Int32x4 v; int i[4]; };
|
||||
union Vect_buffer_f { Float32x4 v; float f[4]; };
|
||||
|
||||
static inline Float32x4 v_dup(float x) { return _mm_set1_ps(x); }
|
||||
static inline Float32x4 v_dup(double x) { return _mm_set1_ps(x); }
|
||||
static inline Int32x4 v_dup(int x) { return _mm_set1_epi32(x); }
|
||||
static inline Int32x4 v_dup(uint x) { return _mm_set1_epi32(x); }
|
||||
|
||||
static inline Float32x4 v_add(Float32x4 a, Float32x4 b) { return _mm_add_ps(a, b); }
|
||||
static inline Int32x4 v_add(Int32x4 a, Int32x4 b) { return _mm_add_epi32(a, b); }
|
||||
|
||||
static inline Float32x4 v_max(Float32x4 a, Float32x4 b) { return _mm_max_ps(a, b); }
|
||||
static inline Float32x4 v_min(Float32x4 a, Float32x4 b) { return _mm_min_ps(a, b); }
|
||||
static inline Int32x4 v_min_16(Int32x4 a, Int32x4 b) { return _mm_min_epi16(a, b); }
|
||||
|
||||
static inline Int32x4 v_and(Int32x4 a, Int32x4 b) { return _mm_and_si128(a, b); }
|
||||
|
||||
static inline Float32x4 v_sub(Float32x4 a, Float32x4 b) { return _mm_sub_ps(a, b); }
|
||||
static inline Int32x4 v_sub(Int32x4 a, Int32x4 b) { return _mm_sub_epi32(a, b); }
|
||||
|
||||
static inline Float32x4 v_mul(Float32x4 a, Float32x4 b) { return _mm_mul_ps(a, b); }
|
||||
|
||||
static inline Float32x4 v_sqrt(Float32x4 x) { return _mm_sqrt_ps(x); }
|
||||
|
||||
static inline Int32x4 v_toInt(Float32x4 x) { return _mm_cvttps_epi32(x); }
|
||||
|
||||
// pre-VS 2008 doesn't have cast intrinsics, whereas 2008 and later requires it
|
||||
// (same deal with gcc prior to 4.0)
|
||||
#if (defined(Q_CC_MSVC) && _MSC_VER < 1500) || (defined(Q_CC_GNU) && __GNUC__ < 4)
|
||||
static inline Int32x4 v_greaterOrEqual(Float32x4 a, Float32x4 b)
|
||||
{
|
||||
union Convert { Int32x4 vi; Float32x4 vf; } convert;
|
||||
convert.vf = _mm_cmpgt_ps(a, b);
|
||||
return convert.vi;
|
||||
}
|
||||
#else
|
||||
static inline Int32x4 v_greaterOrEqual(Float32x4 a, Float32x4 b) { return _mm_castps_si128(_mm_cmpgt_ps(a, b)); }
|
||||
#endif
|
||||
};
|
||||
|
||||
const uint * QT_FASTCALL qt_fetch_radial_gradient_sse2(uint *buffer, const Operator *op, const QSpanData *data,
|
||||
int y, int x, int length)
|
||||
{
|
||||
return qt_fetch_radial_gradient_template<QRadialFetchSimd<QSimdSse2> >(buffer, op, data, y, x, length);
|
||||
}
|
||||
|
||||
|
||||
QT_END_NAMESPACE
|
||||
|
||||
#endif // QT_HAVE_SSE2
|
||||
|
||||
|
|
@ -1,145 +0,0 @@
|
|||
/****************************************************************************
|
||||
**
|
||||
** Copyright (C) 2015 The Qt Company Ltd.
|
||||
** Contact: http://www.qt.io/licensing/
|
||||
**
|
||||
** This file is part of the QtGui module of the Qt Toolkit.
|
||||
**
|
||||
** $QT_BEGIN_LICENSE:LGPL$
|
||||
** Commercial License Usage
|
||||
** Licensees holding valid commercial Qt licenses may use this file in
|
||||
** accordance with the commercial license agreement provided with the
|
||||
** Software or, alternatively, in accordance with the terms contained in
|
||||
** a written agreement between you and The Qt Company. For licensing terms
|
||||
** and conditions see http://www.qt.io/terms-conditions. For further
|
||||
** information use the contact form at http://www.qt.io/contact-us.
|
||||
**
|
||||
** GNU Lesser General Public License Usage
|
||||
** Alternatively, this file may be used under the terms of the GNU Lesser
|
||||
** General Public License version 2.1 or version 3 as published by the Free
|
||||
** Software Foundation and appearing in the file LICENSE.LGPLv21 and
|
||||
** LICENSE.LGPLv3 included in the packaging of this file. Please review the
|
||||
** following information to ensure the GNU Lesser General Public License
|
||||
** requirements will be met: https://www.gnu.org/licenses/lgpl.html and
|
||||
** http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
|
||||
**
|
||||
** As a special exception, The Qt Company gives you certain additional
|
||||
** rights. These rights are described in The Qt Company LGPL Exception
|
||||
** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
|
||||
**
|
||||
** GNU General Public License Usage
|
||||
** Alternatively, this file may be used under the terms of the GNU
|
||||
** General Public License version 3.0 as published by the Free Software
|
||||
** Foundation and appearing in the file LICENSE.GPL included in the
|
||||
** packaging of this file. Please review the following information to
|
||||
** ensure the GNU General Public License version 3.0 requirements will be
|
||||
** met: http://www.gnu.org/copyleft/gpl.html.
|
||||
**
|
||||
** $QT_END_LICENSE$
|
||||
**
|
||||
****************************************************************************/
|
||||
|
||||
#include <qdrawhelper_x86_p.h>
|
||||
|
||||
#if defined(QT_HAVE_3DNOW) && defined(QT_HAVE_SSE)
|
||||
|
||||
#include <qdrawhelper_sse_p.h>
|
||||
#include <mm3dnow.h>
|
||||
|
||||
QT_BEGIN_NAMESPACE
|
||||
|
||||
struct QSSE3DNOWIntrinsics : public QSSEIntrinsics
|
||||
{
|
||||
static inline void end() {
|
||||
_m_femms();
|
||||
}
|
||||
};
|
||||
|
||||
CompositionFunctionSolid qt_functionForModeSolid_SSE3DNOW[numCompositionFunctions] = {
|
||||
comp_func_solid_SourceOver<QSSE3DNOWIntrinsics>,
|
||||
comp_func_solid_DestinationOver<QSSE3DNOWIntrinsics>,
|
||||
comp_func_solid_Clear<QSSE3DNOWIntrinsics>,
|
||||
comp_func_solid_Source<QSSE3DNOWIntrinsics>,
|
||||
0,
|
||||
comp_func_solid_SourceIn<QSSE3DNOWIntrinsics>,
|
||||
comp_func_solid_DestinationIn<QSSE3DNOWIntrinsics>,
|
||||
comp_func_solid_SourceOut<QSSE3DNOWIntrinsics>,
|
||||
comp_func_solid_DestinationOut<QSSE3DNOWIntrinsics>,
|
||||
comp_func_solid_SourceAtop<QSSE3DNOWIntrinsics>,
|
||||
comp_func_solid_DestinationAtop<QSSE3DNOWIntrinsics>,
|
||||
comp_func_solid_XOR<QSSE3DNOWIntrinsics>,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // svg 1.2 modes
|
||||
rasterop_solid_SourceOrDestination<QSSE3DNOWIntrinsics>,
|
||||
rasterop_solid_SourceAndDestination<QSSE3DNOWIntrinsics>,
|
||||
rasterop_solid_SourceXorDestination<QSSE3DNOWIntrinsics>,
|
||||
rasterop_solid_NotSourceAndNotDestination<QSSE3DNOWIntrinsics>,
|
||||
rasterop_solid_NotSourceOrNotDestination<QSSE3DNOWIntrinsics>,
|
||||
rasterop_solid_NotSourceXorDestination<QSSE3DNOWIntrinsics>,
|
||||
rasterop_solid_NotSource<QSSE3DNOWIntrinsics>,
|
||||
rasterop_solid_NotSourceAndDestination<QSSE3DNOWIntrinsics>,
|
||||
rasterop_solid_SourceAndNotDestination<QSSE3DNOWIntrinsics>
|
||||
};
|
||||
|
||||
CompositionFunction qt_functionForMode_SSE3DNOW[numCompositionFunctions] = {
|
||||
comp_func_SourceOver<QSSE3DNOWIntrinsics>,
|
||||
comp_func_DestinationOver<QSSE3DNOWIntrinsics>,
|
||||
comp_func_Clear<QSSE3DNOWIntrinsics>,
|
||||
comp_func_Source<QSSE3DNOWIntrinsics>,
|
||||
comp_func_Destination,
|
||||
comp_func_SourceIn<QSSE3DNOWIntrinsics>,
|
||||
comp_func_DestinationIn<QSSE3DNOWIntrinsics>,
|
||||
comp_func_SourceOut<QSSE3DNOWIntrinsics>,
|
||||
comp_func_DestinationOut<QSSE3DNOWIntrinsics>,
|
||||
comp_func_SourceAtop<QSSE3DNOWIntrinsics>,
|
||||
comp_func_DestinationAtop<QSSE3DNOWIntrinsics>,
|
||||
comp_func_XOR<QSSE3DNOWIntrinsics>,
|
||||
comp_func_Plus,
|
||||
comp_func_Multiply,
|
||||
comp_func_Screen,
|
||||
comp_func_Overlay,
|
||||
comp_func_Darken,
|
||||
comp_func_Lighten,
|
||||
comp_func_ColorDodge,
|
||||
comp_func_ColorBurn,
|
||||
comp_func_HardLight,
|
||||
comp_func_SoftLight,
|
||||
comp_func_Difference,
|
||||
comp_func_Exclusion,
|
||||
rasterop_SourceOrDestination,
|
||||
rasterop_SourceAndDestination,
|
||||
rasterop_SourceXorDestination,
|
||||
rasterop_NotSourceAndNotDestination,
|
||||
rasterop_NotSourceOrNotDestination,
|
||||
rasterop_NotSourceXorDestination,
|
||||
rasterop_NotSource,
|
||||
rasterop_NotSourceAndDestination,
|
||||
rasterop_SourceAndNotDestination
|
||||
};
|
||||
|
||||
void qt_blend_color_argb_sse3dnow(int count, const QSpan *spans, void *userData)
|
||||
{
|
||||
qt_blend_color_argb_x86<QSSE3DNOWIntrinsics>(count, spans, userData,
|
||||
(CompositionFunctionSolid*)qt_functionForModeSolid_SSE3DNOW);
|
||||
}
|
||||
|
||||
void qt_memfill32_sse3dnow(quint32 *dest, quint32 value, int count)
|
||||
{
|
||||
return qt_memfill32_sse_template<QSSE3DNOWIntrinsics>(dest, value, count);
|
||||
}
|
||||
|
||||
|
||||
void qt_bitmapblit16_sse3dnow(QRasterBuffer *rasterBuffer, int x, int y,
|
||||
quint32 color,
|
||||
const uchar *src,
|
||||
int width, int height, int stride)
|
||||
{
|
||||
return qt_bitmapblit16_sse_template<QSSE3DNOWIntrinsics>(rasterBuffer, x,y,
|
||||
color, src, width,
|
||||
height, stride);
|
||||
}
|
||||
|
||||
QT_END_NAMESPACE
|
||||
|
||||
#endif // QT_HAVE_3DNOW && QT_HAVE_SSE
|
||||
|
||||
|
|
@ -1,182 +0,0 @@
|
|||
/****************************************************************************
|
||||
**
|
||||
** Copyright (C) 2015 The Qt Company Ltd.
|
||||
** Contact: http://www.qt.io/licensing/
|
||||
**
|
||||
** This file is part of the QtGui module of the Qt Toolkit.
|
||||
**
|
||||
** $QT_BEGIN_LICENSE:LGPL$
|
||||
** Commercial License Usage
|
||||
** Licensees holding valid commercial Qt licenses may use this file in
|
||||
** accordance with the commercial license agreement provided with the
|
||||
** Software or, alternatively, in accordance with the terms contained in
|
||||
** a written agreement between you and The Qt Company. For licensing terms
|
||||
** and conditions see http://www.qt.io/terms-conditions. For further
|
||||
** information use the contact form at http://www.qt.io/contact-us.
|
||||
**
|
||||
** GNU Lesser General Public License Usage
|
||||
** Alternatively, this file may be used under the terms of the GNU Lesser
|
||||
** General Public License version 2.1 or version 3 as published by the Free
|
||||
** Software Foundation and appearing in the file LICENSE.LGPLv21 and
|
||||
** LICENSE.LGPLv3 included in the packaging of this file. Please review the
|
||||
** following information to ensure the GNU Lesser General Public License
|
||||
** requirements will be met: https://www.gnu.org/licenses/lgpl.html and
|
||||
** http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
|
||||
**
|
||||
** As a special exception, The Qt Company gives you certain additional
|
||||
** rights. These rights are described in The Qt Company LGPL Exception
|
||||
** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
|
||||
**
|
||||
** GNU General Public License Usage
|
||||
** Alternatively, this file may be used under the terms of the GNU
|
||||
** General Public License version 3.0 as published by the Free Software
|
||||
** Foundation and appearing in the file LICENSE.GPL included in the
|
||||
** packaging of this file. Please review the following information to
|
||||
** ensure the GNU General Public License version 3.0 requirements will be
|
||||
** met: http://www.gnu.org/copyleft/gpl.html.
|
||||
**
|
||||
** $QT_END_LICENSE$
|
||||
**
|
||||
****************************************************************************/
|
||||
|
||||
#ifndef QDRAWHELPER_SSE_P_H
|
||||
#define QDRAWHELPER_SSE_P_H
|
||||
|
||||
//
|
||||
// W A R N I N G
|
||||
// -------------
|
||||
//
|
||||
// This file is not part of the Qt API. It exists purely as an
|
||||
// implementation detail. This header file may change from version to
|
||||
// version without notice, or even be removed.
|
||||
//
|
||||
// We mean it.
|
||||
//
|
||||
|
||||
#include <qdrawhelper_mmx_p.h>
|
||||
|
||||
#ifdef QT_HAVE_SSE
|
||||
|
||||
#ifdef QT_LINUXBASE
|
||||
// this is an evil hack - the posix_memalign declaration in LSB
|
||||
// is wrong - see http://bugs.linuxbase.org/show_bug.cgi?id=2431
|
||||
# define posix_memalign _lsb_hack_posix_memalign
|
||||
# include <xmmintrin.h>
|
||||
# undef posix_memalign
|
||||
#else
|
||||
# include <xmmintrin.h>
|
||||
#endif
|
||||
|
||||
QT_BEGIN_NAMESPACE
|
||||
|
||||
#ifndef _MM_SHUFFLE
|
||||
#define _MM_SHUFFLE(fp3,fp2,fp1,fp0) \
|
||||
(((fp3) << 6) | ((fp2) << 4) | ((fp1) << 2) | (fp0))
|
||||
#endif
|
||||
|
||||
struct QSSEIntrinsics : public QMMXIntrinsics
|
||||
{
|
||||
static inline m64 alpha(m64 x) {
|
||||
return _mm_shuffle_pi16 (x, _MM_SHUFFLE(3, 3, 3, 3));
|
||||
}
|
||||
|
||||
static inline m64 _load_alpha(uint x, const m64 &mmx_0x0000) {
|
||||
m64 t = _mm_unpacklo_pi8(_mm_cvtsi32_si64(x), mmx_0x0000);
|
||||
return _mm_shuffle_pi16 (t, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
}
|
||||
};
|
||||
|
||||
template <class MM>
|
||||
inline void qt_memfill32_sse_template(quint32 *dest, quint32 value, int count)
|
||||
{
|
||||
if (count < 7) {
|
||||
switch (count) {
|
||||
case 6: *dest++ = value;
|
||||
case 5: *dest++ = value;
|
||||
case 4: *dest++ = value;
|
||||
case 3: *dest++ = value;
|
||||
case 2: *dest++ = value;
|
||||
case 1: *dest = value;
|
||||
}
|
||||
return;
|
||||
};
|
||||
|
||||
__m64 *dst64 = reinterpret_cast<__m64*>(dest);
|
||||
const __m64 value64 = _mm_set_pi32(value, value);
|
||||
int count64 = count / 2;
|
||||
|
||||
int n = (count64 + 3) / 4;
|
||||
switch (count64 & 0x3) {
|
||||
case 0: do { _mm_stream_pi(dst64++, value64);
|
||||
case 3: _mm_stream_pi(dst64++, value64);
|
||||
case 2: _mm_stream_pi(dst64++, value64);
|
||||
case 1: _mm_stream_pi(dst64++, value64);
|
||||
} while (--n > 0);
|
||||
}
|
||||
|
||||
if (count & 0x1)
|
||||
dest[count - 1] = value;
|
||||
|
||||
MM::end();
|
||||
}
|
||||
|
||||
template <class MM>
|
||||
inline void qt_bitmapblit16_sse_template(QRasterBuffer *rasterBuffer,
|
||||
int x, int y,
|
||||
quint32 color,
|
||||
const uchar *src,
|
||||
int width, int height, int stride)
|
||||
{
|
||||
const quint16 c = qt_colorConvert<quint16, quint32>(color, 0);
|
||||
quint16 *dest = reinterpret_cast<quint16*>(rasterBuffer->scanLine(y)) + x;
|
||||
const int destStride = rasterBuffer->bytesPerLine() / sizeof(quint16);
|
||||
|
||||
const __m64 c64 = _mm_set1_pi16(c);
|
||||
#ifdef Q_CC_MSVC
|
||||
# pragma warning(disable: 4309) // truncation of constant value
|
||||
#endif
|
||||
const __m64 maskmask1 = _mm_set_pi16(0x1010, 0x2020, 0x4040, 0x8080);
|
||||
const __m64 maskadd1 = _mm_set_pi16(0x7070, 0x6060, 0x4040, 0x0000);
|
||||
|
||||
if (width > 4) {
|
||||
const __m64 maskmask2 = _mm_set_pi16(0x0101, 0x0202, 0x0404, 0x0808);
|
||||
const __m64 maskadd2 = _mm_set_pi16(0x7f7f, 0x7e7e, 0x7c7c, 0x7878);
|
||||
|
||||
while (height--) {
|
||||
for (int x = 0; x < width; x += 8) {
|
||||
const quint8 s = src[x >> 3];
|
||||
if (!s)
|
||||
continue;
|
||||
__m64 mask1 = _mm_set1_pi8(s);
|
||||
__m64 mask2 = mask1;
|
||||
mask1 = _m_pand(mask1, maskmask1);
|
||||
mask1 = _mm_add_pi16(mask1, maskadd1);
|
||||
_mm_maskmove_si64(c64, mask1, (char*)(dest + x));
|
||||
mask2 = _m_pand(mask2, maskmask2);
|
||||
mask2 = _mm_add_pi16(mask2, maskadd2);
|
||||
_mm_maskmove_si64(c64, mask2, (char*)(dest + x + 4));
|
||||
}
|
||||
dest += destStride;
|
||||
src += stride;
|
||||
}
|
||||
} else {
|
||||
while (height--) {
|
||||
const quint8 s = *src;
|
||||
if (s) {
|
||||
__m64 mask1 = _mm_set1_pi8(s);
|
||||
mask1 = _m_pand(mask1, maskmask1);
|
||||
mask1 = _mm_add_pi16(mask1, maskadd1);
|
||||
_mm_maskmove_si64(c64, mask1, (char*)(dest));
|
||||
}
|
||||
dest += destStride;
|
||||
src += stride;
|
||||
}
|
||||
}
|
||||
|
||||
MM::end();
|
||||
}
|
||||
|
||||
QT_END_NAMESPACE
|
||||
|
||||
#endif // QT_HAVE_SSE
|
||||
#endif // QDRAWHELPER_SSE_P_H
|
|
@ -1,185 +0,0 @@
|
|||
/****************************************************************************
|
||||
**
|
||||
** Copyright (C) 2015 The Qt Company Ltd.
|
||||
** Contact: http://www.qt.io/licensing/
|
||||
**
|
||||
** This file is part of the QtGui module of the Qt Toolkit.
|
||||
**
|
||||
** $QT_BEGIN_LICENSE:LGPL$
|
||||
** Commercial License Usage
|
||||
** Licensees holding valid commercial Qt licenses may use this file in
|
||||
** accordance with the commercial license agreement provided with the
|
||||
** Software or, alternatively, in accordance with the terms contained in
|
||||
** a written agreement between you and The Qt Company. For licensing terms
|
||||
** and conditions see http://www.qt.io/terms-conditions. For further
|
||||
** information use the contact form at http://www.qt.io/contact-us.
|
||||
**
|
||||
** GNU Lesser General Public License Usage
|
||||
** Alternatively, this file may be used under the terms of the GNU Lesser
|
||||
** General Public License version 2.1 or version 3 as published by the Free
|
||||
** Software Foundation and appearing in the file LICENSE.LGPLv21 and
|
||||
** LICENSE.LGPLv3 included in the packaging of this file. Please review the
|
||||
** following information to ensure the GNU Lesser General Public License
|
||||
** requirements will be met: https://www.gnu.org/licenses/lgpl.html and
|
||||
** http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
|
||||
**
|
||||
** As a special exception, The Qt Company gives you certain additional
|
||||
** rights. These rights are described in The Qt Company LGPL Exception
|
||||
** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
|
||||
**
|
||||
** GNU General Public License Usage
|
||||
** Alternatively, this file may be used under the terms of the GNU
|
||||
** General Public License version 3.0 as published by the Free Software
|
||||
** Foundation and appearing in the file LICENSE.GPL included in the
|
||||
** packaging of this file. Please review the following information to
|
||||
** ensure the GNU General Public License version 3.0 requirements will be
|
||||
** met: http://www.gnu.org/copyleft/gpl.html.
|
||||
**
|
||||
** $QT_END_LICENSE$
|
||||
**
|
||||
****************************************************************************/
|
||||
|
||||
#include <qdrawhelper_x86_p.h>
|
||||
|
||||
#ifdef QT_HAVE_SSSE3
|
||||
|
||||
#include <qdrawingprimitive_sse2_p.h>
|
||||
|
||||
QT_BEGIN_NAMESPACE
|
||||
|
||||
inline static void blend_pixel(quint32 &dst, const quint32 src)
|
||||
{
|
||||
if (src >= 0xff000000)
|
||||
dst = src;
|
||||
else if (src != 0)
|
||||
dst = src + BYTE_MUL(dst, qAlpha(~src));
|
||||
}
|
||||
|
||||
|
||||
/* The instruction palignr uses direct arguments, so we have to generate the code fo the different
|
||||
shift (4, 8, 12). Checking the alignment inside the loop is unfortunatelly way too slow.
|
||||
*/
|
||||
#define BLENDING_LOOP(palignrOffset, length)\
|
||||
for (; x-minusOffsetToAlignSrcOn16Bytes < length-7; x += 4) { \
|
||||
const __m128i srcVectorLastLoaded = _mm_load_si128((__m128i *)&src[x - minusOffsetToAlignSrcOn16Bytes + 4]);\
|
||||
const __m128i srcVector = _mm_alignr_epi8(srcVectorLastLoaded, srcVectorPrevLoaded, palignrOffset); \
|
||||
const __m128i srcVectorAlpha = _mm_and_si128(srcVector, alphaMask); \
|
||||
if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, alphaMask)) == 0xffff) { \
|
||||
_mm_store_si128((__m128i *)&dst[x], srcVector); \
|
||||
} else if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, nullVector)) != 0xffff) { \
|
||||
__m128i alphaChannel = _mm_shuffle_epi8(srcVector, alphaShuffleMask); \
|
||||
alphaChannel = _mm_sub_epi16(one, alphaChannel); \
|
||||
const __m128i dstVector = _mm_load_si128((__m128i *)&dst[x]); \
|
||||
__m128i destMultipliedByOneMinusAlpha; \
|
||||
BYTE_MUL_SSE2(destMultipliedByOneMinusAlpha, dstVector, alphaChannel, colorMask, half); \
|
||||
const __m128i result = _mm_add_epi8(srcVector, destMultipliedByOneMinusAlpha); \
|
||||
_mm_store_si128((__m128i *)&dst[x], result); \
|
||||
} \
|
||||
srcVectorPrevLoaded = srcVectorLastLoaded;\
|
||||
}
|
||||
|
||||
|
||||
// Basically blend src over dst with the const alpha defined as constAlphaVector.
|
||||
// nullVector, half, one, colorMask are constant across the whole image/texture, and should be defined as:
|
||||
//const __m128i nullVector = _mm_set1_epi32(0);
|
||||
//const __m128i half = _mm_set1_epi16(0x80);
|
||||
//const __m128i one = _mm_set1_epi16(0xff);
|
||||
//const __m128i colorMask = _mm_set1_epi32(0x00ff00ff);
|
||||
//const __m128i alphaMask = _mm_set1_epi32(0xff000000);
|
||||
//
|
||||
// The computation being done is:
|
||||
// result = s + d * (1-alpha)
|
||||
// with shortcuts if fully opaque or fully transparent.
|
||||
#define BLEND_SOURCE_OVER_ARGB32_SSSE3(dst, src, length, nullVector, half, one, colorMask, alphaMask) { \
|
||||
int x = 0; \
|
||||
\
|
||||
/* First, get dst aligned. */ \
|
||||
ALIGNMENT_PROLOGUE_16BYTES(dst, x, length) { \
|
||||
blend_pixel(dst[x], src[x]); \
|
||||
} \
|
||||
\
|
||||
const int minusOffsetToAlignSrcOn16Bytes = (reinterpret_cast<quintptr>(&(src[x])) >> 2) & 0x3;\
|
||||
\
|
||||
if (!minusOffsetToAlignSrcOn16Bytes) {\
|
||||
/* src is aligned, usual algorithm but with aligned operations.\
|
||||
See the SSE2 version for more documentation on the algorithm itself. */\
|
||||
const __m128i alphaShuffleMask = _mm_set_epi8(char(0xff),15,char(0xff),15,char(0xff),11,char(0xff),11,char(0xff),7,char(0xff),7,char(0xff),3,char(0xff),3);\
|
||||
for (; x < length-3; x += 4) { \
|
||||
const __m128i srcVector = _mm_load_si128((__m128i *)&src[x]); \
|
||||
const __m128i srcVectorAlpha = _mm_and_si128(srcVector, alphaMask); \
|
||||
if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, alphaMask)) == 0xffff) { \
|
||||
_mm_store_si128((__m128i *)&dst[x], srcVector); \
|
||||
} else if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, nullVector)) != 0xffff) { \
|
||||
__m128i alphaChannel = _mm_shuffle_epi8(srcVector, alphaShuffleMask); \
|
||||
alphaChannel = _mm_sub_epi16(one, alphaChannel); \
|
||||
const __m128i dstVector = _mm_load_si128((__m128i *)&dst[x]); \
|
||||
__m128i destMultipliedByOneMinusAlpha; \
|
||||
BYTE_MUL_SSE2(destMultipliedByOneMinusAlpha, dstVector, alphaChannel, colorMask, half); \
|
||||
const __m128i result = _mm_add_epi8(srcVector, destMultipliedByOneMinusAlpha); \
|
||||
_mm_store_si128((__m128i *)&dst[x], result); \
|
||||
} \
|
||||
} /* end for() */\
|
||||
} else if ((length - x) >= 8) {\
|
||||
/* We use two vectors to extract the src: prevLoaded for the first pixels, lastLoaded for the current pixels. */\
|
||||
__m128i srcVectorPrevLoaded = _mm_load_si128((__m128i *)&src[x - minusOffsetToAlignSrcOn16Bytes]);\
|
||||
const int palignrOffset = minusOffsetToAlignSrcOn16Bytes << 2;\
|
||||
\
|
||||
const __m128i alphaShuffleMask = _mm_set_epi8(char(0xff),15,char(0xff),15,char(0xff),11,char(0xff),11,char(0xff),7,char(0xff),7,char(0xff),3,char(0xff),3);\
|
||||
switch (palignrOffset) {\
|
||||
case 4:\
|
||||
BLENDING_LOOP(4, length)\
|
||||
break;\
|
||||
case 8:\
|
||||
BLENDING_LOOP(8, length)\
|
||||
break;\
|
||||
case 12:\
|
||||
BLENDING_LOOP(12, length)\
|
||||
break;\
|
||||
}\
|
||||
}\
|
||||
for (; x < length; ++x) \
|
||||
blend_pixel(dst[x], src[x]); \
|
||||
}
|
||||
|
||||
void qt_blend_argb32_on_argb32_ssse3(uchar *destPixels, int dbpl,
|
||||
const uchar *srcPixels, int sbpl,
|
||||
int w, int h,
|
||||
int const_alpha)
|
||||
{
|
||||
const quint32 *src = (const quint32 *) srcPixels;
|
||||
quint32 *dst = (quint32 *) destPixels;
|
||||
if (const_alpha == 256) {
|
||||
const __m128i alphaMask = _mm_set1_epi32(0xff000000);
|
||||
const __m128i nullVector = _mm_setzero_si128();
|
||||
const __m128i half = _mm_set1_epi16(0x80);
|
||||
const __m128i one = _mm_set1_epi16(0xff);
|
||||
const __m128i colorMask = _mm_set1_epi32(0x00ff00ff);
|
||||
|
||||
for (int y = 0; y < h; ++y) {
|
||||
BLEND_SOURCE_OVER_ARGB32_SSSE3(dst, src, w, nullVector, half, one, colorMask, alphaMask);
|
||||
dst = (quint32 *)(((uchar *) dst) + dbpl);
|
||||
src = (const quint32 *)(((const uchar *) src) + sbpl);
|
||||
}
|
||||
} else if (const_alpha != 0) {
|
||||
// dest = (s + d * sia) * ca + d * cia
|
||||
// = s * ca + d * (sia * ca + cia)
|
||||
// = s * ca + d * (1 - sa*ca)
|
||||
const_alpha = (const_alpha * 255) >> 8;
|
||||
const __m128i nullVector = _mm_setzero_si128();
|
||||
const __m128i half = _mm_set1_epi16(0x80);
|
||||
const __m128i one = _mm_set1_epi16(0xff);
|
||||
const __m128i colorMask = _mm_set1_epi32(0x00ff00ff);
|
||||
const __m128i constAlphaVector = _mm_set1_epi16(const_alpha);
|
||||
for (int y = 0; y < h; ++y) {
|
||||
BLEND_SOURCE_OVER_ARGB32_WITH_CONST_ALPHA_SSE2(dst, src, w, nullVector, half, one, colorMask, constAlphaVector)
|
||||
dst = (quint32 *)(((uchar *) dst) + dbpl);
|
||||
src = (const quint32 *)(((const uchar *) src) + sbpl);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
QT_END_NAMESPACE
|
||||
|
||||
#endif // QT_HAVE_SSSE3
|
||||
|
||||
|
|
@ -1,141 +0,0 @@
|
|||
/****************************************************************************
|
||||
**
|
||||
** Copyright (C) 2015 The Qt Company Ltd.
|
||||
** Contact: http://www.qt.io/licensing/
|
||||
**
|
||||
** This file is part of the QtGui module of the Qt Toolkit.
|
||||
**
|
||||
** $QT_BEGIN_LICENSE:LGPL$
|
||||
** Commercial License Usage
|
||||
** Licensees holding valid commercial Qt licenses may use this file in
|
||||
** accordance with the commercial license agreement provided with the
|
||||
** Software or, alternatively, in accordance with the terms contained in
|
||||
** a written agreement between you and The Qt Company. For licensing terms
|
||||
** and conditions see http://www.qt.io/terms-conditions. For further
|
||||
** information use the contact form at http://www.qt.io/contact-us.
|
||||
**
|
||||
** GNU Lesser General Public License Usage
|
||||
** Alternatively, this file may be used under the terms of the GNU Lesser
|
||||
** General Public License version 2.1 or version 3 as published by the Free
|
||||
** Software Foundation and appearing in the file LICENSE.LGPLv21 and
|
||||
** LICENSE.LGPLv3 included in the packaging of this file. Please review the
|
||||
** following information to ensure the GNU Lesser General Public License
|
||||
** requirements will be met: https://www.gnu.org/licenses/lgpl.html and
|
||||
** http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
|
||||
**
|
||||
** As a special exception, The Qt Company gives you certain additional
|
||||
** rights. These rights are described in The Qt Company LGPL Exception
|
||||
** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
|
||||
**
|
||||
** GNU General Public License Usage
|
||||
** Alternatively, this file may be used under the terms of the GNU
|
||||
** General Public License version 3.0 as published by the Free Software
|
||||
** Foundation and appearing in the file LICENSE.GPL included in the
|
||||
** packaging of this file. Please review the following information to
|
||||
** ensure the GNU General Public License version 3.0 requirements will be
|
||||
** met: http://www.gnu.org/copyleft/gpl.html.
|
||||
**
|
||||
** $QT_END_LICENSE$
|
||||
**
|
||||
****************************************************************************/
|
||||
|
||||
#ifndef QDRAWHELPER_X86_P_H
|
||||
#define QDRAWHELPER_X86_P_H
|
||||
|
||||
//
|
||||
// W A R N I N G
|
||||
// -------------
|
||||
//
|
||||
// This file is not part of the Qt API. It exists purely as an
|
||||
// implementation detail. This header file may change from version to
|
||||
// version without notice, or even be removed.
|
||||
//
|
||||
// We mean it.
|
||||
//
|
||||
|
||||
#include <qdrawhelper_p.h>
|
||||
|
||||
QT_BEGIN_NAMESPACE
|
||||
|
||||
#ifdef QT_HAVE_MMX
|
||||
extern CompositionFunction qt_functionForMode_MMX[];
|
||||
extern CompositionFunctionSolid qt_functionForModeSolid_MMX[];
|
||||
void qt_blend_color_argb_mmx(int count, const QSpan *spans, void *userData);
|
||||
#endif
|
||||
|
||||
#ifdef QT_HAVE_SSE
|
||||
void qt_memfill32_mmxext(quint32 *dest, quint32 value, int count);
|
||||
void qt_bitmapblit16_mmxext(QRasterBuffer *rasterBuffer, int x, int y,
|
||||
quint32 color, const uchar *src,
|
||||
int width, int height, int stride);
|
||||
#endif
|
||||
|
||||
#ifdef QT_HAVE_3DNOW
|
||||
#if defined(QT_HAVE_MMX) || !defined(QT_HAVE_SSE)
|
||||
extern CompositionFunction qt_functionForMode_MMX3DNOW[];
|
||||
extern CompositionFunctionSolid qt_functionForModeSolid_MMX3DNOW[];
|
||||
|
||||
void qt_blend_color_argb_mmx3dnow(int count, const QSpan *spans,
|
||||
void *userData);
|
||||
#endif // MMX
|
||||
|
||||
#ifdef QT_HAVE_SSE
|
||||
extern CompositionFunction qt_functionForMode_SSE3DNOW[];
|
||||
extern CompositionFunctionSolid qt_functionForModeSolid_SSE3DNOW[];
|
||||
|
||||
void qt_memfill32_sse3dnow(quint32 *dest, quint32 value, int count);
|
||||
void qt_bitmapblit16_sse3dnow(QRasterBuffer *rasterBuffer, int x, int y,
|
||||
quint32 color,
|
||||
const uchar *src, int width, int height,
|
||||
int stride);
|
||||
void qt_blend_color_argb_sse3dnow(int count, const QSpan *spans,
|
||||
void *userData);
|
||||
#endif // SSE
|
||||
#endif // QT_HAVE_3DNOW
|
||||
|
||||
#ifdef QT_HAVE_SSE
|
||||
void qt_memfill32_sse(quint32 *dest, quint32 value, int count);
|
||||
void qt_bitmapblit16_sse(QRasterBuffer *rasterBuffer, int x, int y,
|
||||
quint32 color,
|
||||
const uchar *src, int width, int height, int stride);
|
||||
|
||||
void qt_blend_color_argb_sse(int count, const QSpan *spans, void *userData);
|
||||
|
||||
extern CompositionFunction qt_functionForMode_SSE[];
|
||||
extern CompositionFunctionSolid qt_functionForModeSolid_SSE[];
|
||||
#endif // QT_HAVE_SSE
|
||||
|
||||
#ifdef QT_HAVE_SSE2
|
||||
void qt_memfill32_sse2(quint32 *dest, quint32 value, int count);
|
||||
void qt_memfill16_sse2(quint16 *dest, quint16 value, int count);
|
||||
void qt_bitmapblit32_sse2(QRasterBuffer *rasterBuffer, int x, int y,
|
||||
quint32 color,
|
||||
const uchar *src, int width, int height, int stride);
|
||||
void qt_bitmapblit16_sse2(QRasterBuffer *rasterBuffer, int x, int y,
|
||||
quint32 color,
|
||||
const uchar *src, int width, int height, int stride);
|
||||
void qt_blend_argb32_on_argb32_sse2(uchar *destPixels, int dbpl,
|
||||
const uchar *srcPixels, int sbpl,
|
||||
int w, int h,
|
||||
int const_alpha);
|
||||
void qt_blend_rgb32_on_rgb32_sse2(uchar *destPixels, int dbpl,
|
||||
const uchar *srcPixels, int sbpl,
|
||||
int w, int h,
|
||||
int const_alpha);
|
||||
|
||||
extern CompositionFunction qt_functionForMode_onlySSE2[];
|
||||
extern CompositionFunctionSolid qt_functionForModeSolid_onlySSE2[];
|
||||
#endif // QT_HAVE_SSE2
|
||||
|
||||
#ifdef QT_HAVE_IWMMXT
|
||||
void qt_blend_color_argb_iwmmxt(int count, const QSpan *spans, void *userData);
|
||||
|
||||
extern CompositionFunction qt_functionForMode_IWMMXT[];
|
||||
extern CompositionFunctionSolid qt_functionForModeSolid_IWMMXT[];
|
||||
#endif
|
||||
|
||||
static const int numCompositionFunctions = 33;
|
||||
|
||||
QT_END_NAMESPACE
|
||||
|
||||
#endif // QDRAWHELPER_X86_P_H
|
|
@ -1,241 +0,0 @@
|
|||
/****************************************************************************
|
||||
**
|
||||
** Copyright (C) 2015 The Qt Company Ltd.
|
||||
** Contact: http://www.qt.io/licensing/
|
||||
**
|
||||
** This file is part of the QtGui module of the Qt Toolkit.
|
||||
**
|
||||
** $QT_BEGIN_LICENSE:LGPL$
|
||||
** Commercial License Usage
|
||||
** Licensees holding valid commercial Qt licenses may use this file in
|
||||
** accordance with the commercial license agreement provided with the
|
||||
** Software or, alternatively, in accordance with the terms contained in
|
||||
** a written agreement between you and The Qt Company. For licensing terms
|
||||
** and conditions see http://www.qt.io/terms-conditions. For further
|
||||
** information use the contact form at http://www.qt.io/contact-us.
|
||||
**
|
||||
** GNU Lesser General Public License Usage
|
||||
** Alternatively, this file may be used under the terms of the GNU Lesser
|
||||
** General Public License version 2.1 or version 3 as published by the Free
|
||||
** Software Foundation and appearing in the file LICENSE.LGPLv21 and
|
||||
** LICENSE.LGPLv3 included in the packaging of this file. Please review the
|
||||
** following information to ensure the GNU Lesser General Public License
|
||||
** requirements will be met: https://www.gnu.org/licenses/lgpl.html and
|
||||
** http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
|
||||
**
|
||||
** As a special exception, The Qt Company gives you certain additional
|
||||
** rights. These rights are described in The Qt Company LGPL Exception
|
||||
** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
|
||||
**
|
||||
** GNU General Public License Usage
|
||||
** Alternatively, this file may be used under the terms of the GNU
|
||||
** General Public License version 3.0 as published by the Free Software
|
||||
** Foundation and appearing in the file LICENSE.GPL included in the
|
||||
** packaging of this file. Please review the following information to
|
||||
** ensure the GNU General Public License version 3.0 requirements will be
|
||||
** met: http://www.gnu.org/copyleft/gpl.html.
|
||||
**
|
||||
** $QT_END_LICENSE$
|
||||
**
|
||||
****************************************************************************/
|
||||
|
||||
#ifndef QDRAWINGPRIMITIVE_SSE2_P_H
|
||||
#define QDRAWINGPRIMITIVE_SSE2_P_H
|
||||
|
||||
#include <qsimd_p.h>
|
||||
|
||||
#ifdef QT_HAVE_SSE2
|
||||
|
||||
//
|
||||
// W A R N I N G
|
||||
// -------------
|
||||
//
|
||||
// This file is not part of the Qt API. It exists purely as an
|
||||
// implementation detail. This header file may change from version to
|
||||
// version without notice, or even be removed.
|
||||
//
|
||||
// We mean it.
|
||||
//
|
||||
|
||||
QT_BEGIN_NAMESPACE
|
||||
|
||||
/*
|
||||
* Multiply the components of pixelVector by alphaChannel
|
||||
* Each 32bits components of alphaChannel must be in the form 0x00AA00AA
|
||||
* colorMask must have 0x00ff00ff on each 32 bits component
|
||||
* half must have the value 128 (0x80) for each 32 bits compnent
|
||||
*/
|
||||
#define BYTE_MUL_SSE2(result, pixelVector, alphaChannel, colorMask, half) \
|
||||
{ \
|
||||
/* 1. separate the colors in 2 vectors so each color is on 16 bits \
|
||||
(in order to be multiplied by the alpha \
|
||||
each 32 bit of dstVectorAG are in the form 0x00AA00GG \
|
||||
each 32 bit of dstVectorRB are in the form 0x00RR00BB */\
|
||||
__m128i pixelVectorAG = _mm_srli_epi16(pixelVector, 8); \
|
||||
__m128i pixelVectorRB = _mm_and_si128(pixelVector, colorMask); \
|
||||
\
|
||||
/* 2. multiply the vectors by the alpha channel */\
|
||||
pixelVectorAG = _mm_mullo_epi16(pixelVectorAG, alphaChannel); \
|
||||
pixelVectorRB = _mm_mullo_epi16(pixelVectorRB, alphaChannel); \
|
||||
\
|
||||
/* 3. divide by 255, that's the tricky part. \
|
||||
we do it like for BYTE_MUL(), with bit shift: X/255 ~= (X + X/256 + rounding)/256 */ \
|
||||
/** so first (X + X/256 + rounding) */\
|
||||
pixelVectorRB = _mm_add_epi16(pixelVectorRB, _mm_srli_epi16(pixelVectorRB, 8)); \
|
||||
pixelVectorRB = _mm_add_epi16(pixelVectorRB, half); \
|
||||
pixelVectorAG = _mm_add_epi16(pixelVectorAG, _mm_srli_epi16(pixelVectorAG, 8)); \
|
||||
pixelVectorAG = _mm_add_epi16(pixelVectorAG, half); \
|
||||
\
|
||||
/** second divide by 256 */\
|
||||
pixelVectorRB = _mm_srli_epi16(pixelVectorRB, 8); \
|
||||
/** for AG, we could >> 8 to divide followed by << 8 to put the \
|
||||
bytes in the correct position. By masking instead, we execute \
|
||||
only one instruction */\
|
||||
pixelVectorAG = _mm_andnot_si128(colorMask, pixelVectorAG); \
|
||||
\
|
||||
/* 4. combine the 2 pairs of colors */ \
|
||||
result = _mm_or_si128(pixelVectorAG, pixelVectorRB); \
|
||||
}
|
||||
|
||||
/*
|
||||
* Each 32bits components of alphaChannel must be in the form 0x00AA00AA
|
||||
* oneMinusAlphaChannel must be 255 - alpha for each 32 bits component
|
||||
* colorMask must have 0x00ff00ff on each 32 bits component
|
||||
* half must have the value 128 (0x80) for each 32 bits compnent
|
||||
*/
|
||||
#define INTERPOLATE_PIXEL_255_SSE2(result, srcVector, dstVector, alphaChannel, oneMinusAlphaChannel, colorMask, half) { \
|
||||
/* interpolate AG */\
|
||||
__m128i srcVectorAG = _mm_srli_epi16(srcVector, 8); \
|
||||
__m128i dstVectorAG = _mm_srli_epi16(dstVector, 8); \
|
||||
__m128i srcVectorAGalpha = _mm_mullo_epi16(srcVectorAG, alphaChannel); \
|
||||
__m128i dstVectorAGoneMinusAlphalpha = _mm_mullo_epi16(dstVectorAG, oneMinusAlphaChannel); \
|
||||
__m128i finalAG = _mm_add_epi16(srcVectorAGalpha, dstVectorAGoneMinusAlphalpha); \
|
||||
finalAG = _mm_add_epi16(finalAG, _mm_srli_epi16(finalAG, 8)); \
|
||||
finalAG = _mm_add_epi16(finalAG, half); \
|
||||
finalAG = _mm_andnot_si128(colorMask, finalAG); \
|
||||
\
|
||||
/* interpolate RB */\
|
||||
__m128i srcVectorRB = _mm_and_si128(srcVector, colorMask); \
|
||||
__m128i dstVectorRB = _mm_and_si128(dstVector, colorMask); \
|
||||
__m128i srcVectorRBalpha = _mm_mullo_epi16(srcVectorRB, alphaChannel); \
|
||||
__m128i dstVectorRBoneMinusAlphalpha = _mm_mullo_epi16(dstVectorRB, oneMinusAlphaChannel); \
|
||||
__m128i finalRB = _mm_add_epi16(srcVectorRBalpha, dstVectorRBoneMinusAlphalpha); \
|
||||
finalRB = _mm_add_epi16(finalRB, _mm_srli_epi16(finalRB, 8)); \
|
||||
finalRB = _mm_add_epi16(finalRB, half); \
|
||||
finalRB = _mm_srli_epi16(finalRB, 8); \
|
||||
\
|
||||
/* combine */\
|
||||
result = _mm_or_si128(finalAG, finalRB); \
|
||||
}
|
||||
|
||||
// Basically blend src over dst with the const alpha defined as constAlphaVector.
|
||||
// nullVector, half, one, colorMask are constant across the whole image/texture, and should be defined as:
|
||||
//const __m128i nullVector = _mm_set1_epi32(0);
|
||||
//const __m128i half = _mm_set1_epi16(0x80);
|
||||
//const __m128i one = _mm_set1_epi16(0xff);
|
||||
//const __m128i colorMask = _mm_set1_epi32(0x00ff00ff);
|
||||
//const __m128i alphaMask = _mm_set1_epi32(0xff000000);
|
||||
//
|
||||
// The computation being done is:
|
||||
// result = s + d * (1-alpha)
|
||||
// with shortcuts if fully opaque or fully transparent.
|
||||
#define BLEND_SOURCE_OVER_ARGB32_SSE2(dst, src, length, nullVector, half, one, colorMask, alphaMask) { \
|
||||
int x = 0; \
|
||||
\
|
||||
/* First, get dst aligned. */ \
|
||||
ALIGNMENT_PROLOGUE_16BYTES(dst, x, length) { \
|
||||
uint s = src[x]; \
|
||||
if (s >= 0xff000000) \
|
||||
dst[x] = s; \
|
||||
else if (s != 0) \
|
||||
dst[x] = s + BYTE_MUL(dst[x], qAlpha(~s)); \
|
||||
} \
|
||||
\
|
||||
for (; x < length-3; x += 4) { \
|
||||
const __m128i srcVector = _mm_loadu_si128((__m128i *)&src[x]); \
|
||||
const __m128i srcVectorAlpha = _mm_and_si128(srcVector, alphaMask); \
|
||||
if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, alphaMask)) == 0xffff) { \
|
||||
/* all opaque */ \
|
||||
_mm_store_si128((__m128i *)&dst[x], srcVector); \
|
||||
} else if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, nullVector)) != 0xffff) { \
|
||||
/* not fully transparent */ \
|
||||
/* extract the alpha channel on 2 x 16 bits */ \
|
||||
/* so we have room for the multiplication */ \
|
||||
/* each 32 bits will be in the form 0x00AA00AA */ \
|
||||
/* with A being the 1 - alpha */ \
|
||||
__m128i alphaChannel = _mm_srli_epi32(srcVector, 24); \
|
||||
alphaChannel = _mm_or_si128(alphaChannel, _mm_slli_epi32(alphaChannel, 16)); \
|
||||
alphaChannel = _mm_sub_epi16(one, alphaChannel); \
|
||||
\
|
||||
const __m128i dstVector = _mm_load_si128((__m128i *)&dst[x]); \
|
||||
__m128i destMultipliedByOneMinusAlpha; \
|
||||
BYTE_MUL_SSE2(destMultipliedByOneMinusAlpha, dstVector, alphaChannel, colorMask, half); \
|
||||
\
|
||||
/* result = s + d * (1-alpha) */\
|
||||
const __m128i result = _mm_add_epi8(srcVector, destMultipliedByOneMinusAlpha); \
|
||||
_mm_store_si128((__m128i *)&dst[x], result); \
|
||||
} \
|
||||
} \
|
||||
for (; x < length; ++x) { \
|
||||
uint s = src[x]; \
|
||||
if (s >= 0xff000000) \
|
||||
dst[x] = s; \
|
||||
else if (s != 0) \
|
||||
dst[x] = s + BYTE_MUL(dst[x], qAlpha(~s)); \
|
||||
} \
|
||||
}
|
||||
|
||||
// Basically blend src over dst with the const alpha defined as constAlphaVector.
|
||||
// nullVector, half, one, colorMask are constant across the whole image/texture, and should be defined as:
|
||||
//const __m128i nullVector = _mm_set1_epi32(0);
|
||||
//const __m128i half = _mm_set1_epi16(0x80);
|
||||
//const __m128i one = _mm_set1_epi16(0xff);
|
||||
//const __m128i colorMask = _mm_set1_epi32(0x00ff00ff);
|
||||
//
|
||||
// The computation being done is:
|
||||
// dest = (s + d * sia) * ca + d * cia
|
||||
// = s * ca + d * (sia * ca + cia)
|
||||
// = s * ca + d * (1 - sa*ca)
|
||||
#define BLEND_SOURCE_OVER_ARGB32_WITH_CONST_ALPHA_SSE2(dst, src, length, nullVector, half, one, colorMask, constAlphaVector) \
|
||||
{ \
|
||||
int x = 0; \
|
||||
\
|
||||
ALIGNMENT_PROLOGUE_16BYTES(dst, x, length) { \
|
||||
quint32 s = src[x]; \
|
||||
if (s != 0) { \
|
||||
s = BYTE_MUL(s, const_alpha); \
|
||||
dst[x] = s + BYTE_MUL(dst[x], qAlpha(~s)); \
|
||||
} \
|
||||
} \
|
||||
\
|
||||
for (; x < length-3; x += 4) { \
|
||||
__m128i srcVector = _mm_loadu_si128((__m128i *)&src[x]); \
|
||||
if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVector, nullVector)) != 0xffff) { \
|
||||
BYTE_MUL_SSE2(srcVector, srcVector, constAlphaVector, colorMask, half); \
|
||||
\
|
||||
__m128i alphaChannel = _mm_srli_epi32(srcVector, 24); \
|
||||
alphaChannel = _mm_or_si128(alphaChannel, _mm_slli_epi32(alphaChannel, 16)); \
|
||||
alphaChannel = _mm_sub_epi16(one, alphaChannel); \
|
||||
\
|
||||
const __m128i dstVector = _mm_load_si128((__m128i *)&dst[x]); \
|
||||
__m128i destMultipliedByOneMinusAlpha; \
|
||||
BYTE_MUL_SSE2(destMultipliedByOneMinusAlpha, dstVector, alphaChannel, colorMask, half); \
|
||||
\
|
||||
const __m128i result = _mm_add_epi8(srcVector, destMultipliedByOneMinusAlpha); \
|
||||
_mm_store_si128((__m128i *)&dst[x], result); \
|
||||
} \
|
||||
} \
|
||||
for (; x < length; ++x) { \
|
||||
quint32 s = src[x]; \
|
||||
if (s != 0) { \
|
||||
s = BYTE_MUL(s, const_alpha); \
|
||||
dst[x] = s + BYTE_MUL(dst[x], qAlpha(~s)); \
|
||||
} \
|
||||
} \
|
||||
}
|
||||
|
||||
QT_END_NAMESPACE
|
||||
|
||||
#endif // QT_HAVE_SSE2
|
||||
|
||||
#endif // QDRAWINGPRIMITIVE_SSE2_P_H
|
|
@ -1,57 +0,0 @@
|
|||
/****************************************************************************
|
||||
**
|
||||
** Copyright (C) 2015 The Qt Company Ltd.
|
||||
** Contact: http://www.qt.io/licensing/
|
||||
**
|
||||
** This file is part of the QtGui module of the Qt Toolkit.
|
||||
**
|
||||
** $QT_BEGIN_LICENSE:LGPL$
|
||||
** Commercial License Usage
|
||||
** Licensees holding valid commercial Qt licenses may use this file in
|
||||
** accordance with the commercial license agreement provided with the
|
||||
** Software or, alternatively, in accordance with the terms contained in
|
||||
** a written agreement between you and The Qt Company. For licensing terms
|
||||
** and conditions see http://www.qt.io/terms-conditions. For further
|
||||
** information use the contact form at http://www.qt.io/contact-us.
|
||||
**
|
||||
** GNU Lesser General Public License Usage
|
||||
** Alternatively, this file may be used under the terms of the GNU Lesser
|
||||
** General Public License version 2.1 or version 3 as published by the Free
|
||||
** Software Foundation and appearing in the file LICENSE.LGPLv21 and
|
||||
** LICENSE.LGPLv3 included in the packaging of this file. Please review the
|
||||
** following information to ensure the GNU Lesser General Public License
|
||||
** requirements will be met: https://www.gnu.org/licenses/lgpl.html and
|
||||
** http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
|
||||
**
|
||||
** As a special exception, The Qt Company gives you certain additional
|
||||
** rights. These rights are described in The Qt Company LGPL Exception
|
||||
** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
|
||||
**
|
||||
** GNU General Public License Usage
|
||||
** Alternatively, this file may be used under the terms of the GNU
|
||||
** General Public License version 3.0 as published by the Free Software
|
||||
** Foundation and appearing in the file LICENSE.GPL included in the
|
||||
** packaging of this file. Please review the following information to
|
||||
** ensure the GNU General Public License version 3.0 requirements will be
|
||||
** met: http://www.gnu.org/copyleft/gpl.html.
|
||||
**
|
||||
** $QT_END_LICENSE$
|
||||
**
|
||||
****************************************************************************/
|
||||
|
||||
#ifndef QWMATRIX_H
|
||||
#define QWMATRIX_H
|
||||
|
||||
#include <QtGui/qmatrix.h>
|
||||
|
||||
QT_BEGIN_HEADER
|
||||
|
||||
QT_BEGIN_NAMESPACE
|
||||
|
||||
|
||||
|
||||
QT_END_NAMESPACE
|
||||
|
||||
QT_END_HEADER
|
||||
|
||||
#endif // QWMATRIX_H
|
File diff suppressed because it is too large
Load diff
Loading…
Add table
Reference in a new issue