qdrawhelper_ssse3.cpp

Absolute File Name:/home/qt/qt5_coco/qt5/qtbase/src/gui/painting/qdrawhelper_ssse3.cpp
Source codeSwitch to Preprocessed file
LineSourceCount
1/****************************************************************************-
2**-
3** Copyright (C) 2016 The Qt Company Ltd.-
4** Contact: https://www.qt.io/licensing/-
5**-
6** This file is part of the QtGui module of the Qt Toolkit.-
7**-
8** $QT_BEGIN_LICENSE:LGPL$-
9** Commercial License Usage-
10** Licensees holding valid commercial Qt licenses may use this file in-
11** accordance with the commercial license agreement provided with the-
12** Software or, alternatively, in accordance with the terms contained in-
13** a written agreement between you and The Qt Company. For licensing terms-
14** and conditions see https://www.qt.io/terms-conditions. For further-
15** information use the contact form at https://www.qt.io/contact-us.-
16**-
17** GNU Lesser General Public License Usage-
18** Alternatively, this file may be used under the terms of the GNU Lesser-
19** General Public License version 3 as published by the Free Software-
20** Foundation and appearing in the file LICENSE.LGPL3 included in the-
21** packaging of this file. Please review the following information to-
22** ensure the GNU Lesser General Public License version 3 requirements-
23** will be met: https://www.gnu.org/licenses/lgpl-3.0.html.-
24**-
25** GNU General Public License Usage-
26** Alternatively, this file may be used under the terms of the GNU-
27** General Public License version 2.0 or (at your option) the GNU General-
28** Public license version 3 or any later version approved by the KDE Free-
29** Qt Foundation. The licenses are as published by the Free Software-
30** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3-
31** included in the packaging of this file. Please review the following-
32** information to ensure the GNU General Public License requirements will-
33** be met: https://www.gnu.org/licenses/gpl-2.0.html and-
34** https://www.gnu.org/licenses/gpl-3.0.html.-
35**-
36** $QT_END_LICENSE$-
37**-
38****************************************************************************/-
39-
40#include <private/qdrawhelper_x86_p.h>-
41-
42#ifdef QT_COMPILER_SUPPORTS_SSSE3-
43-
44#include <private/qdrawingprimitive_sse2_p.h>-
45-
46QT_BEGIN_NAMESPACE-
47-
48inline static void blend_pixel(quint32 &dst, const quint32 src)-
49{-
50 if (src >= 0xff000000)
src >= 0xff000000Description
TRUEnever evaluated
FALSEnever evaluated
0
51 dst = src;
never executed: dst = src;
0
52 else if (src != 0)
src != 0Description
TRUEnever evaluated
FALSEnever evaluated
0
53 dst = src + BYTE_MUL(dst, qAlpha(~src));
never executed: dst = src + BYTE_MUL(dst, qAlpha(~src));
0
54}
never executed: end of block
0
55-
56-
57/* The instruction palignr uses direct arguments, so we have to generate the code fo the different-
58 shift (4, 8, 12). Checking the alignment inside the loop is unfortunatelly way too slow.-
59 */-
60#define BLENDING_LOOP(palignrOffset, length)\-
61 for (; x-minusOffsetToAlignSrcOn16Bytes < length-7; x += 4) { \-
62 const __m128i srcVectorLastLoaded = _mm_load_si128((const __m128i *)&src[x - minusOffsetToAlignSrcOn16Bytes + 4]);\-
63 const __m128i srcVector = _mm_alignr_epi8(srcVectorLastLoaded, srcVectorPrevLoaded, palignrOffset); \-
64 const __m128i srcVectorAlpha = _mm_and_si128(srcVector, alphaMask); \-
65 if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, alphaMask)) == 0xffff) { \-
66 _mm_store_si128((__m128i *)&dst[x], srcVector); \-
67 } else if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, nullVector)) != 0xffff) { \-
68 __m128i alphaChannel = _mm_shuffle_epi8(srcVector, alphaShuffleMask); \-
69 alphaChannel = _mm_sub_epi16(one, alphaChannel); \-
70 const __m128i dstVector = _mm_load_si128((__m128i *)&dst[x]); \-
71 __m128i destMultipliedByOneMinusAlpha; \-
72 BYTE_MUL_SSE2(destMultipliedByOneMinusAlpha, dstVector, alphaChannel, colorMask, half); \-
73 const __m128i result = _mm_add_epi8(srcVector, destMultipliedByOneMinusAlpha); \-
74 _mm_store_si128((__m128i *)&dst[x], result); \-
75 } \-
76 srcVectorPrevLoaded = srcVectorLastLoaded;\-
77 }-
78-
79-
80// Basically blend src over dst with the const alpha defined as constAlphaVector.-
81// nullVector, half, one, colorMask are constant across the whole image/texture, and should be defined as:-
82//const __m128i nullVector = _mm_set1_epi32(0);-
83//const __m128i half = _mm_set1_epi16(0x80);-
84//const __m128i one = _mm_set1_epi16(0xff);-
85//const __m128i colorMask = _mm_set1_epi32(0x00ff00ff);-
86//const __m128i alphaMask = _mm_set1_epi32(0xff000000);-
87//-
88// The computation being done is:-
89// result = s + d * (1-alpha)-
90// with shortcuts if fully opaque or fully transparent.-
91#define BLEND_SOURCE_OVER_ARGB32_SSSE3(dst, src, length, nullVector, half, one, colorMask, alphaMask) { \-
92 int x = 0; \-
93\-
94 /* First, get dst aligned. */ \-
95 ALIGNMENT_PROLOGUE_16BYTES(dst, x, length) { \-
96 blend_pixel(dst[x], src[x]); \-
97 } \-
98\-
99 const int minusOffsetToAlignSrcOn16Bytes = (reinterpret_cast<quintptr>(&(src[x])) >> 2) & 0x3;\-
100\-
101 if (!minusOffsetToAlignSrcOn16Bytes) {\-
102 /* src is aligned, usual algorithm but with aligned operations.\-
103 See the SSE2 version for more documentation on the algorithm itself. */\-
104 const __m128i alphaShuffleMask = _mm_set_epi8(char(0xff),15,char(0xff),15,char(0xff),11,char(0xff),11,char(0xff),7,char(0xff),7,char(0xff),3,char(0xff),3);\-
105 for (; x < length-3; x += 4) { \-
106 const __m128i srcVector = _mm_load_si128((const __m128i *)&src[x]); \-
107 const __m128i srcVectorAlpha = _mm_and_si128(srcVector, alphaMask); \-
108 if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, alphaMask)) == 0xffff) { \-
109 _mm_store_si128((__m128i *)&dst[x], srcVector); \-
110 } else if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, nullVector)) != 0xffff) { \-
111 __m128i alphaChannel = _mm_shuffle_epi8(srcVector, alphaShuffleMask); \-
112 alphaChannel = _mm_sub_epi16(one, alphaChannel); \-
113 const __m128i dstVector = _mm_load_si128((__m128i *)&dst[x]); \-
114 __m128i destMultipliedByOneMinusAlpha; \-
115 BYTE_MUL_SSE2(destMultipliedByOneMinusAlpha, dstVector, alphaChannel, colorMask, half); \-
116 const __m128i result = _mm_add_epi8(srcVector, destMultipliedByOneMinusAlpha); \-
117 _mm_store_si128((__m128i *)&dst[x], result); \-
118 } \-
119 } /* end for() */\-
120 } else if ((length - x) >= 8) {\-
121 /* We use two vectors to extract the src: prevLoaded for the first pixels, lastLoaded for the current pixels. */\-
122 __m128i srcVectorPrevLoaded = _mm_load_si128((const __m128i *)&src[x - minusOffsetToAlignSrcOn16Bytes]);\-
123 const int palignrOffset = minusOffsetToAlignSrcOn16Bytes << 2;\-
124\-
125 const __m128i alphaShuffleMask = _mm_set_epi8(char(0xff),15,char(0xff),15,char(0xff),11,char(0xff),11,char(0xff),7,char(0xff),7,char(0xff),3,char(0xff),3);\-
126 switch (palignrOffset) {\-
127 case 4:\-
128 BLENDING_LOOP(4, length)\-
129 break;\-
130 case 8:\-
131 BLENDING_LOOP(8, length)\-
132 break;\-
133 case 12:\-
134 BLENDING_LOOP(12, length)\-
135 break;\-
136 }\-
137 }\-
138 for (; x < length; ++x) \-
139 blend_pixel(dst[x], src[x]); \-
140}-
141-
142void qt_blend_argb32_on_argb32_ssse3(uchar *destPixels, int dbpl,-
143 const uchar *srcPixels, int sbpl,-
144 int w, int h,-
145 int const_alpha)-
146{-
147 const quint32 *src = (const quint32 *) srcPixels;-
148 quint32 *dst = (quint32 *) destPixels;-
149 if (const_alpha == 256) {
const_alpha == 256Description
TRUEnever evaluated
FALSEnever evaluated
0
150 const __m128i alphaMask = _mm_set1_epi32(0xff000000);-
151 const __m128i nullVector = _mm_setzero_si128();-
152 const __m128i half = _mm_set1_epi16(0x80);-
153 const __m128i one = _mm_set1_epi16(0xff);-
154 const __m128i colorMask = _mm_set1_epi32(0x00ff00ff);-
155-
156 for (int y = 0; y < h; ++y) {
y < hDescription
TRUEnever evaluated
FALSEnever evaluated
0
157 BLEND_SOURCE_OVER_ARGB32_SSSE3(dst, src, w, nullVector, half, one, colorMask, alphaMask);
never executed: end of block
never executed: end of block
never executed: end of block
never executed: end of block
never executed: end of block
never executed: end of block
never executed: end of block
never executed: end of block
never executed: break;
never executed: end of block
never executed: end of block
never executed: end of block
never executed: break;
never executed: end of block
never executed: end of block
never executed: end of block
never executed: break;
never executed: end of block
never executed: blend_pixel(dst[x], src[x]);
_mm_movemask_e...or)) != 0xffffDescription
TRUEnever evaluated
FALSEnever evaluated
(w - x) >= 8Description
TRUEnever evaluated
FALSEnever evaluated
_mm_movemask_e...or)) != 0xffffDescription
TRUEnever evaluated
FALSEnever evaluated
_mm_movemask_e...or)) != 0xffffDescription
TRUEnever evaluated
FALSEnever evaluated
_mm_movemask_e...or)) != 0xffffDescription
TRUEnever evaluated
FALSEnever evaluated
!minusOffsetTo...gnSrcOn16BytesDescription
TRUEnever evaluated
FALSEnever evaluated
_mm_movemask_e...sk)) == 0xffffDescription
TRUEnever evaluated
FALSEnever evaluated
_mm_movemask_e...sk)) == 0xffffDescription
TRUEnever evaluated
FALSEnever evaluated
_mm_movemask_e...sk)) == 0xffffDescription
TRUEnever evaluated
FALSEnever evaluated
_mm_movemask_e...sk)) == 0xffffDescription
TRUEnever evaluated
FALSEnever evaluated
x < static_cas...0x3)) & 0x3)))Description
TRUEnever evaluated
FALSEnever evaluated
x < w-3Description
TRUEnever evaluated
FALSEnever evaluated
x-minusOffsetT...n16Bytes < w-7Description
TRUEnever evaluated
FALSEnever evaluated
x-minusOffsetT...n16Bytes < w-7Description
TRUEnever evaluated
FALSEnever evaluated
x-minusOffsetT...n16Bytes < w-7Description
TRUEnever evaluated
FALSEnever evaluated
x < wDescription
TRUEnever evaluated
FALSEnever evaluated
never executed: case 4:
never executed: case 8:
never executed: case 12:
0
158 dst = (quint32 *)(((uchar *) dst) + dbpl);-
159 src = (const quint32 *)(((const uchar *) src) + sbpl);-
160 }
never executed: end of block
0
161 } else if (const_alpha != 0) {
never executed: end of block
const_alpha != 0Description
TRUEnever evaluated
FALSEnever evaluated
0
162 // dest = (s + d * sia) * ca + d * cia-
163 // = s * ca + d * (sia * ca + cia)-
164 // = s * ca + d * (1 - sa*ca)-
165 const_alpha = (const_alpha * 255) >> 8;-
166 const __m128i nullVector = _mm_setzero_si128();-
167 const __m128i half = _mm_set1_epi16(0x80);-
168 const __m128i one = _mm_set1_epi16(0xff);-
169 const __m128i colorMask = _mm_set1_epi32(0x00ff00ff);-
170 const __m128i constAlphaVector = _mm_set1_epi16(const_alpha);-
171 for (int y = 0; y < h; ++y) {
y < hDescription
TRUEnever evaluated
FALSEnever evaluated
0
172 BLEND_SOURCE_OVER_ARGB32_WITH_CONST_ALPHA_SSE2(dst, src, w, nullVector, half, one, colorMask, constAlphaVector)
never executed: end of block
never executed: end of block
never executed: end of block
never executed: end of block
never executed: end of block
never executed: end of block
s != 0Description
TRUEnever evaluated
FALSEnever evaluated
_mm_movemask_e...or)) != 0xffffDescription
TRUEnever evaluated
FALSEnever evaluated
s != 0Description
TRUEnever evaluated
FALSEnever evaluated
x < static_cas...0x3)) & 0x3)))Description
TRUEnever evaluated
FALSEnever evaluated
x < w-3Description
TRUEnever evaluated
FALSEnever evaluated
x < wDescription
TRUEnever evaluated
FALSEnever evaluated
0
173 dst = (quint32 *)(((uchar *) dst) + dbpl);-
174 src = (const quint32 *)(((const uchar *) src) + sbpl);-
175 }
never executed: end of block
0
176 }
never executed: end of block
0
177}
never executed: end of block
0
178-
179static inline void store_uint24_ssse3(uchar *dst, const uint *src, int len)-
180{-
181 int i = 0;-
182-
183 quint24 *dst24 = reinterpret_cast<quint24*>(dst);-
184 // Align dst on 16 bytes-
185 for (; i < len && (reinterpret_cast<quintptr>(dst24) & 0xf); ++i)
i < lenDescription
TRUEnever evaluated
FALSEnever evaluated
(reinterpret_c...(dst24) & 0xf)Description
TRUEnever evaluated
FALSEnever evaluated
0
186 *dst24++ = quint24(*src++);
never executed: *dst24++ = quint24(*src++);
0
187-
188 // Shuffle masks for first and second half of every output, all outputs are aligned so the shuffled ends are not used.-
189 const __m128i shuffleMask1 = _mm_setr_epi8(char(0x80), char(0x80), char(0x80), char(0x80), 2, 1, 0, 6, 5, 4, 10, 9, 8, 14, 13, 12);-
190 const __m128i shuffleMask2 = _mm_setr_epi8(2, 1, 0, 6, 5, 4, 10, 9, 8, 14, 13, 12, char(0x80), char(0x80), char(0x80), char(0x80));-
191-
192 const __m128i *inVectorPtr = (const __m128i *)src;-
193 __m128i *dstVectorPtr = (__m128i *)dst24;-
194-
195 for (; i < (len - 15); i += 16) {
i < (len - 15)Description
TRUEnever evaluated
FALSEnever evaluated
0
196 // Load four vectors, store three.-
197 // Create each output vector by combining two shuffled input vectors.-
198 __m128i srcVector1 = _mm_loadu_si128(inVectorPtr);-
199 ++inVectorPtr;-
200 __m128i srcVector2 = _mm_loadu_si128(inVectorPtr);-
201 ++inVectorPtr;-
202 __m128i outputVector1 = _mm_shuffle_epi8(srcVector1, shuffleMask1);-
203 __m128i outputVector2 = _mm_shuffle_epi8(srcVector2, shuffleMask2);-
204 __m128i outputVector = _mm_alignr_epi8(outputVector2, outputVector1, 4);-
205 _mm_store_si128(dstVectorPtr, outputVector);-
206 ++dstVectorPtr;-
207-
208 srcVector1 = _mm_loadu_si128(inVectorPtr);-
209 ++inVectorPtr;-
210 outputVector1 = _mm_shuffle_epi8(srcVector2, shuffleMask1);-
211 outputVector2 = _mm_shuffle_epi8(srcVector1, shuffleMask2);-
212 outputVector = _mm_alignr_epi8(outputVector2, outputVector1, 8);-
213 _mm_store_si128(dstVectorPtr, outputVector);-
214 ++dstVectorPtr;-
215-
216 srcVector2 = _mm_loadu_si128(inVectorPtr);-
217 ++inVectorPtr;-
218 outputVector1 = _mm_shuffle_epi8(srcVector1, shuffleMask1);-
219 outputVector2 = _mm_shuffle_epi8(srcVector2, shuffleMask2);-
220 outputVector = _mm_alignr_epi8(outputVector2, outputVector1, 12);-
221 _mm_store_si128(dstVectorPtr, outputVector);-
222 ++dstVectorPtr;-
223 }
never executed: end of block
0
224 dst24 = reinterpret_cast<quint24*>(dstVectorPtr);-
225 src = reinterpret_cast<const uint*>(inVectorPtr);-
226-
227 for (; i < len; ++i)
i < lenDescription
TRUEnever evaluated
FALSEnever evaluated
0
228 *dst24++ = quint24(*src++);
never executed: *dst24++ = quint24(*src++);
0
229}
never executed: end of block
0
230-
231void QT_FASTCALL storePixelsBPP24_ssse3(uchar *dest, const uint *src, int index, int count)-
232{-
233 store_uint24_ssse3(dest + index * 3, src, count);-
234}
never executed: end of block
0
235-
236extern void QT_FASTCALL qt_convert_rgb888_to_rgb32_ssse3(quint32 *dst, const uchar *src, int len);-
237-
238const uint * QT_FASTCALL qt_fetchUntransformed_888_ssse3(uint *buffer, const Operator *, const QSpanData *data,-
239 int y, int x, int length)-
240{-
241 const uchar *line = data->texture.scanLine(y) + x * 3;-
242 qt_convert_rgb888_to_rgb32_ssse3(buffer, line, length);-
243 return buffer;
never executed: return buffer;
0
244}-
245-
246QT_END_NAMESPACE-
247-
248#endif // QT_COMPILER_SUPPORTS_SSSE3-
Source codeSwitch to Preprocessed file

Generated by Squish Coco Non-Commercial 4.3.0-BETA-master-30-08-2018-4cb69e9