qdrawhelper_ssse3.cpp

Absolute File Name:/home/qt/qt5_coco/qt5/qtbase/src/gui/painting/qdrawhelper_ssse3.cpp
Source codeSwitch to Preprocessed file
LineSourceCount
1/****************************************************************************-
2**-
3** Copyright (C) 2015 The Qt Company Ltd.-
4** Contact: http://www.qt.io/licensing/-
5**-
6** This file is part of the QtGui module of the Qt Toolkit.-
7**-
8** $QT_BEGIN_LICENSE:LGPL21$-
9** Commercial License Usage-
10** Licensees holding valid commercial Qt licenses may use this file in-
11** accordance with the commercial license agreement provided with the-
12** Software or, alternatively, in accordance with the terms contained in-
13** a written agreement between you and The Qt Company. For licensing terms-
14** and conditions see http://www.qt.io/terms-conditions. For further-
15** information use the contact form at http://www.qt.io/contact-us.-
16**-
17** GNU Lesser General Public License Usage-
18** Alternatively, this file may be used under the terms of the GNU Lesser-
19** General Public License version 2.1 or version 3 as published by the Free-
20** Software Foundation and appearing in the file LICENSE.LGPLv21 and-
21** LICENSE.LGPLv3 included in the packaging of this file. Please review the-
22** following information to ensure the GNU Lesser General Public License-
23** requirements will be met: https://www.gnu.org/licenses/lgpl.html and-
24** http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.-
25**-
26** As a special exception, The Qt Company gives you certain additional-
27** rights. These rights are described in The Qt Company LGPL Exception-
28** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.-
29**-
30** $QT_END_LICENSE$-
31**-
32****************************************************************************/-
33-
34#include <private/qdrawhelper_x86_p.h>-
35-
36#ifdef QT_COMPILER_SUPPORTS_SSSE3-
37-
38#include <private/qdrawingprimitive_sse2_p.h>-
39-
40QT_BEGIN_NAMESPACE-
41-
42inline static void blend_pixel(quint32 &dst, const quint32 src)-
43{-
44 if (src >= 0xff000000)
src >= 0xff000000Description
TRUEnever evaluated
FALSEnever evaluated
0
45 dst = src;
never executed: dst = src;
0
46 else if (src != 0)
src != 0Description
TRUEnever evaluated
FALSEnever evaluated
0
47 dst = src + BYTE_MUL(dst, qAlpha(~src));
never executed: dst = src + BYTE_MUL(dst, qAlpha(~src));
0
48}
never executed: end of block
0
49-
50-
51/* The instruction palignr uses direct arguments, so we have to generate the code fo the different-
52 shift (4, 8, 12). Checking the alignment inside the loop is unfortunatelly way too slow.-
53 */-
54#define BLENDING_LOOP(palignrOffset, length)\-
55 for (; x-minusOffsetToAlignSrcOn16Bytes < length-7; x += 4) { \-
56 const __m128i srcVectorLastLoaded = _mm_load_si128((const __m128i *)&src[x - minusOffsetToAlignSrcOn16Bytes + 4]);\-
57 const __m128i srcVector = _mm_alignr_epi8(srcVectorLastLoaded, srcVectorPrevLoaded, palignrOffset); \-
58 const __m128i srcVectorAlpha = _mm_and_si128(srcVector, alphaMask); \-
59 if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, alphaMask)) == 0xffff) { \-
60 _mm_store_si128((__m128i *)&dst[x], srcVector); \-
61 } else if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, nullVector)) != 0xffff) { \-
62 __m128i alphaChannel = _mm_shuffle_epi8(srcVector, alphaShuffleMask); \-
63 alphaChannel = _mm_sub_epi16(one, alphaChannel); \-
64 const __m128i dstVector = _mm_load_si128((__m128i *)&dst[x]); \-
65 __m128i destMultipliedByOneMinusAlpha; \-
66 BYTE_MUL_SSE2(destMultipliedByOneMinusAlpha, dstVector, alphaChannel, colorMask, half); \-
67 const __m128i result = _mm_add_epi8(srcVector, destMultipliedByOneMinusAlpha); \-
68 _mm_store_si128((__m128i *)&dst[x], result); \-
69 } \-
70 srcVectorPrevLoaded = srcVectorLastLoaded;\-
71 }-
72-
73-
74// Basically blend src over dst with the const alpha defined as constAlphaVector.-
75// nullVector, half, one, colorMask are constant across the whole image/texture, and should be defined as:-
76//const __m128i nullVector = _mm_set1_epi32(0);-
77//const __m128i half = _mm_set1_epi16(0x80);-
78//const __m128i one = _mm_set1_epi16(0xff);-
79//const __m128i colorMask = _mm_set1_epi32(0x00ff00ff);-
80//const __m128i alphaMask = _mm_set1_epi32(0xff000000);-
81//-
82// The computation being done is:-
83// result = s + d * (1-alpha)-
84// with shortcuts if fully opaque or fully transparent.-
85#define BLEND_SOURCE_OVER_ARGB32_SSSE3(dst, src, length, nullVector, half, one, colorMask, alphaMask) { \-
86 int x = 0; \-
87\-
88 /* First, get dst aligned. */ \-
89 ALIGNMENT_PROLOGUE_16BYTES(dst, x, length) { \-
90 blend_pixel(dst[x], src[x]); \-
91 } \-
92\-
93 const int minusOffsetToAlignSrcOn16Bytes = (reinterpret_cast<quintptr>(&(src[x])) >> 2) & 0x3;\-
94\-
95 if (!minusOffsetToAlignSrcOn16Bytes) {\-
96 /* src is aligned, usual algorithm but with aligned operations.\-
97 See the SSE2 version for more documentation on the algorithm itself. */\-
98 const __m128i alphaShuffleMask = _mm_set_epi8(char(0xff),15,char(0xff),15,char(0xff),11,char(0xff),11,char(0xff),7,char(0xff),7,char(0xff),3,char(0xff),3);\-
99 for (; x < length-3; x += 4) { \-
100 const __m128i srcVector = _mm_load_si128((const __m128i *)&src[x]); \-
101 const __m128i srcVectorAlpha = _mm_and_si128(srcVector, alphaMask); \-
102 if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, alphaMask)) == 0xffff) { \-
103 _mm_store_si128((__m128i *)&dst[x], srcVector); \-
104 } else if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, nullVector)) != 0xffff) { \-
105 __m128i alphaChannel = _mm_shuffle_epi8(srcVector, alphaShuffleMask); \-
106 alphaChannel = _mm_sub_epi16(one, alphaChannel); \-
107 const __m128i dstVector = _mm_load_si128((__m128i *)&dst[x]); \-
108 __m128i destMultipliedByOneMinusAlpha; \-
109 BYTE_MUL_SSE2(destMultipliedByOneMinusAlpha, dstVector, alphaChannel, colorMask, half); \-
110 const __m128i result = _mm_add_epi8(srcVector, destMultipliedByOneMinusAlpha); \-
111 _mm_store_si128((__m128i *)&dst[x], result); \-
112 } \-
113 } /* end for() */\-
114 } else if ((length - x) >= 8) {\-
115 /* We use two vectors to extract the src: prevLoaded for the first pixels, lastLoaded for the current pixels. */\-
116 __m128i srcVectorPrevLoaded = _mm_load_si128((const __m128i *)&src[x - minusOffsetToAlignSrcOn16Bytes]);\-
117 const int palignrOffset = minusOffsetToAlignSrcOn16Bytes << 2;\-
118\-
119 const __m128i alphaShuffleMask = _mm_set_epi8(char(0xff),15,char(0xff),15,char(0xff),11,char(0xff),11,char(0xff),7,char(0xff),7,char(0xff),3,char(0xff),3);\-
120 switch (palignrOffset) {\-
121 case 4:\-
122 BLENDING_LOOP(4, length)\-
123 break;\-
124 case 8:\-
125 BLENDING_LOOP(8, length)\-
126 break;\-
127 case 12:\-
128 BLENDING_LOOP(12, length)\-
129 break;\-
130 }\-
131 }\-
132 for (; x < length; ++x) \-
133 blend_pixel(dst[x], src[x]); \-
134}-
135-
136void qt_blend_argb32_on_argb32_ssse3(uchar *destPixels, int dbpl,-
137 const uchar *srcPixels, int sbpl,-
138 int w, int h,-
139 int const_alpha)-
140{-
141 const quint32 *src = (const quint32 *) srcPixels;-
142 quint32 *dst = (quint32 *) destPixels;-
143 if (const_alpha == 256) {
const_alpha == 256Description
TRUEnever evaluated
FALSEnever evaluated
0
144 const __m128i alphaMask = _mm_set1_epi32(0xff000000);-
145 const __m128i nullVector = _mm_setzero_si128();-
146 const __m128i half = _mm_set1_epi16(0x80);-
147 const __m128i one = _mm_set1_epi16(0xff);-
148 const __m128i colorMask = _mm_set1_epi32(0x00ff00ff);-
149-
150 for (int y = 0; y < h; ++y) {
y < hDescription
TRUEnever evaluated
FALSEnever evaluated
0
151 BLEND_SOURCE_OVER_ARGB32_SSSE3(dst, src, w, nullVector, half, one, colorMask, alphaMask);
never executed: end of block
never executed: end of block
never executed: end of block
never executed: end of block
never executed: end of block
never executed: end of block
never executed: end of block
never executed: end of block
never executed: break;
never executed: end of block
never executed: end of block
never executed: end of block
never executed: break;
never executed: end of block
never executed: end of block
never executed: end of block
never executed: break;
never executed: end of block
never executed: blend_pixel(dst[x], src[x]);
_mm_movemask_e...or)) != 0xffffDescription
TRUEnever evaluated
FALSEnever evaluated
(w - x) >= 8Description
TRUEnever evaluated
FALSEnever evaluated
_mm_movemask_e...or)) != 0xffffDescription
TRUEnever evaluated
FALSEnever evaluated
_mm_movemask_e...or)) != 0xffffDescription
TRUEnever evaluated
FALSEnever evaluated
_mm_movemask_e...or)) != 0xffffDescription
TRUEnever evaluated
FALSEnever evaluated
!minusOffsetTo...gnSrcOn16BytesDescription
TRUEnever evaluated
FALSEnever evaluated
_mm_movemask_e...sk)) == 0xffffDescription
TRUEnever evaluated
FALSEnever evaluated
_mm_movemask_e...sk)) == 0xffffDescription
TRUEnever evaluated
FALSEnever evaluated
_mm_movemask_e...sk)) == 0xffffDescription
TRUEnever evaluated
FALSEnever evaluated
_mm_movemask_e...sk)) == 0xffffDescription
TRUEnever evaluated
FALSEnever evaluated
x < static_cas...0x3)) & 0x3)))Description
TRUEnever evaluated
FALSEnever evaluated
x < w-3Description
TRUEnever evaluated
FALSEnever evaluated
x-minusOffsetT...n16Bytes < w-7Description
TRUEnever evaluated
FALSEnever evaluated
x-minusOffsetT...n16Bytes < w-7Description
TRUEnever evaluated
FALSEnever evaluated
x-minusOffsetT...n16Bytes < w-7Description
TRUEnever evaluated
FALSEnever evaluated
x < wDescription
TRUEnever evaluated
FALSEnever evaluated
never executed: case 4:
never executed: case 8:
never executed: case 12:
0
152 dst = (quint32 *)(((uchar *) dst) + dbpl);-
153 src = (const quint32 *)(((const uchar *) src) + sbpl);-
154 }
never executed: end of block
0
155 } else if (const_alpha != 0) {
never executed: end of block
const_alpha != 0Description
TRUEnever evaluated
FALSEnever evaluated
0
156 // dest = (s + d * sia) * ca + d * cia-
157 // = s * ca + d * (sia * ca + cia)-
158 // = s * ca + d * (1 - sa*ca)-
159 const_alpha = (const_alpha * 255) >> 8;-
160 const __m128i nullVector = _mm_setzero_si128();-
161 const __m128i half = _mm_set1_epi16(0x80);-
162 const __m128i one = _mm_set1_epi16(0xff);-
163 const __m128i colorMask = _mm_set1_epi32(0x00ff00ff);-
164 const __m128i constAlphaVector = _mm_set1_epi16(const_alpha);-
165 for (int y = 0; y < h; ++y) {
y < hDescription
TRUEnever evaluated
FALSEnever evaluated
0
166 BLEND_SOURCE_OVER_ARGB32_WITH_CONST_ALPHA_SSE2(dst, src, w, nullVector, half, one, colorMask, constAlphaVector)
never executed: end of block
never executed: end of block
never executed: end of block
never executed: end of block
never executed: end of block
never executed: end of block
s != 0Description
TRUEnever evaluated
FALSEnever evaluated
_mm_movemask_e...or)) != 0xffffDescription
TRUEnever evaluated
FALSEnever evaluated
s != 0Description
TRUEnever evaluated
FALSEnever evaluated
x < static_cas...0x3)) & 0x3)))Description
TRUEnever evaluated
FALSEnever evaluated
x < w-3Description
TRUEnever evaluated
FALSEnever evaluated
x < wDescription
TRUEnever evaluated
FALSEnever evaluated
0
167 dst = (quint32 *)(((uchar *) dst) + dbpl);-
168 src = (const quint32 *)(((const uchar *) src) + sbpl);-
169 }
never executed: end of block
0
170 }
never executed: end of block
0
171}
never executed: end of block
0
172-
173QT_END_NAMESPACE-
174-
175#endif // QT_COMPILER_SUPPORTS_SSSE3-
Source codeSwitch to Preprocessed file

Generated by Squish Coco Non-Commercial 4.3.0-BETA-master-30-08-2018-4cb69e9