Line | Source | Count |
1 | | - |
2 | | - |
3 | | - |
4 | | - |
5 | | - |
6 | | - |
7 | | - |
8 | | - |
9 | | - |
10 | | - |
11 | | - |
12 | | - |
13 | | - |
14 | | - |
15 | | - |
16 | | - |
17 | | - |
18 | | - |
19 | | - |
20 | | - |
21 | | - |
22 | | - |
23 | | - |
24 | | - |
25 | | - |
26 | | - |
27 | | - |
28 | | - |
29 | | - |
30 | | - |
31 | | - |
32 | | - |
33 | | - |
34 | | - |
35 | | - |
36 | | - |
37 | | - |
38 | | - |
39 | | - |
40 | #include <private/qdrawhelper_x86_p.h> | - |
41 | | - |
42 | #ifdef QT_COMPILER_SUPPORTS_SSSE3 | - |
43 | | - |
44 | #include <private/qdrawingprimitive_sse2_p.h> | - |
45 | | - |
46 | QT_BEGIN_NAMESPACE | - |
47 | | - |
48 | inline static void blend_pixel(quint32 &dst, const quint32 src) | - |
49 | { | - |
50 | if (src >= 0xff000000) | - |
51 | dst = src; | - |
52 | else if (src != 0) | - |
53 | dst = src + BYTE_MUL(dst, qAlpha(~src)); | - |
54 | } | - |
55 | | - |
56 | | - |
57 | | - |
58 | | - |
59 | | - |
60 | #define BLENDING_LOOP(palignrOffset, length)\ | - |
61 | for (; x-minusOffsetToAlignSrcOn16Bytes < length-7; x += 4) { \ | - |
62 | const __m128i srcVectorLastLoaded = _mm_load_si128((const __m128i *)&src[x - minusOffsetToAlignSrcOn16Bytes + 4]);\ | - |
63 | const __m128i srcVector = _mm_alignr_epi8(srcVectorLastLoaded, srcVectorPrevLoaded, palignrOffset); \ | - |
64 | const __m128i srcVectorAlpha = _mm_and_si128(srcVector, alphaMask); \ | - |
65 | if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, alphaMask)) == 0xffff) { \ | - |
66 | _mm_store_si128((__m128i *)&dst[x], srcVector); \ | - |
67 | } else if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, nullVector)) != 0xffff) { \ | - |
68 | __m128i alphaChannel = _mm_shuffle_epi8(srcVector, alphaShuffleMask); \ | - |
69 | alphaChannel = _mm_sub_epi16(one, alphaChannel); \ | - |
70 | const __m128i dstVector = _mm_load_si128((__m128i *)&dst[x]); \ | - |
71 | __m128i destMultipliedByOneMinusAlpha; \ | - |
72 | BYTE_MUL_SSE2(destMultipliedByOneMinusAlpha, dstVector, alphaChannel, colorMask, half); \ | - |
73 | const __m128i result = _mm_add_epi8(srcVector, destMultipliedByOneMinusAlpha); \ | - |
74 | _mm_store_si128((__m128i *)&dst[x], result); \ | - |
75 | } \ | - |
76 | srcVectorPrevLoaded = srcVectorLastLoaded;\ | - |
77 | } | - |
78 | | - |
79 | | - |
80 | | - |
81 | | - |
82 | | - |
83 | | - |
84 | | - |
85 | | - |
86 | | - |
87 | | - |
88 | | - |
89 | | - |
90 | | - |
91 | #define BLEND_SOURCE_OVER_ARGB32_SSSE3(dst, src, length, nullVector, half, one, colorMask, alphaMask) { \ | - |
92 | int x = 0; \ | - |
93 | \ | - |
94 | /* First, get dst aligned. */ \ | - |
95 | ALIGNMENT_PROLOGUE_16BYTES(dst, x, length) { \ | - |
96 | blend_pixel(dst[x], src[x]); \ | - |
97 | } \ | - |
98 | \ | - |
99 | const int minusOffsetToAlignSrcOn16Bytes = (reinterpret_cast<quintptr>(&(src[x])) >> 2) & 0x3;\ | - |
100 | \ | - |
101 | if (!minusOffsetToAlignSrcOn16Bytes) {\ | - |
102 | /* src is aligned, usual algorithm but with aligned operations.\ | - |
103 | See the SSE2 version for more documentation on the algorithm itself. */\ | - |
104 | const __m128i alphaShuffleMask = _mm_set_epi8(char(0xff),15,char(0xff),15,char(0xff),11,char(0xff),11,char(0xff),7,char(0xff),7,char(0xff),3,char(0xff),3);\ | - |
105 | for (; x < length-3; x += 4) { \ | - |
106 | const __m128i srcVector = _mm_load_si128((const __m128i *)&src[x]); \ | - |
107 | const __m128i srcVectorAlpha = _mm_and_si128(srcVector, alphaMask); \ | - |
108 | if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, alphaMask)) == 0xffff) { \ | - |
109 | _mm_store_si128((__m128i *)&dst[x], srcVector); \ | - |
110 | } else if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, nullVector)) != 0xffff) { \ | - |
111 | __m128i alphaChannel = _mm_shuffle_epi8(srcVector, alphaShuffleMask); \ | - |
112 | alphaChannel = _mm_sub_epi16(one, alphaChannel); \ | - |
113 | const __m128i dstVector = _mm_load_si128((__m128i *)&dst[x]); \ | - |
114 | __m128i destMultipliedByOneMinusAlpha; \ | - |
115 | BYTE_MUL_SSE2(destMultipliedByOneMinusAlpha, dstVector, alphaChannel, colorMask, half); \ | - |
116 | const __m128i result = _mm_add_epi8(srcVector, destMultipliedByOneMinusAlpha); \ | - |
117 | _mm_store_si128((__m128i *)&dst[x], result); \ | - |
118 | } \ | - |
119 | } /* end for() */\ | - |
120 | } else if ((length - x) >= 8) {\ | - |
121 | /* We use two vectors to extract the src: prevLoaded for the first pixels, lastLoaded for the current pixels. */\ | - |
122 | __m128i srcVectorPrevLoaded = _mm_load_si128((const __m128i *)&src[x - minusOffsetToAlignSrcOn16Bytes]);\ | - |
123 | const int palignrOffset = minusOffsetToAlignSrcOn16Bytes << 2;\ | - |
124 | \ | - |
125 | const __m128i alphaShuffleMask = _mm_set_epi8(char(0xff),15,char(0xff),15,char(0xff),11,char(0xff),11,char(0xff),7,char(0xff),7,char(0xff),3,char(0xff),3);\ | - |
126 | switch (palignrOffset) {\ | - |
127 | case 4:\ | - |
128 | BLENDING_LOOP(4, length)\ | - |
129 | break;\ | - |
130 | case 8:\ | - |
131 | BLENDING_LOOP(8, length)\ | - |
132 | break;\ | - |
133 | case 12:\ | - |
134 | BLENDING_LOOP(12, length)\ | - |
135 | break;\ | - |
136 | }\ | - |
137 | }\ | - |
138 | for (; x < length; ++x) \ | - |
139 | blend_pixel(dst[x], src[x]); \ | - |
140 | } | - |
141 | | - |
142 | void qt_blend_argb32_on_argb32_ssse3(uchar *destPixels, int dbpl, | - |
143 | const uchar *srcPixels, int sbpl, | - |
144 | int w, int h, | - |
145 | int const_alpha) | - |
146 | { | - |
147 | const quint32 *src = (const quint32 *) srcPixels; | - |
148 | quint32 *dst = (quint32 *) destPixels; | - |
149 | if (const_alpha == 256) { | - |
150 | const __m128i alphaMask = _mm_set1_epi32(0xff000000); | - |
151 | const __m128i nullVector = _mm_setzero_si128(); | - |
152 | const __m128i half = _mm_set1_epi16(0x80); | - |
153 | const __m128i one = _mm_set1_epi16(0xff); | - |
154 | const __m128i colorMask = _mm_set1_epi32(0x00ff00ff); | - |
155 | | - |
156 | for (int y = 0; y < h; ++y) { | - |
157 | BLEND_SOURCE_OVER_ARGB32_SSSE3(dst, src, w, nullVector, half, one, colorMask, alphaMask); | - |
158 | dst = (quint32 *)(((uchar *) dst) + dbpl); | - |
159 | src = (const quint32 *)(((const uchar *) src) + sbpl); | - |
160 | } | - |
161 | } else if (const_alpha != 0) { | - |
162 | | - |
163 | | - |
164 | | - |
165 | const_alpha = (const_alpha * 255) >> 8; | - |
166 | const __m128i nullVector = _mm_setzero_si128(); | - |
167 | const __m128i half = _mm_set1_epi16(0x80); | - |
168 | const __m128i one = _mm_set1_epi16(0xff); | - |
169 | const __m128i colorMask = _mm_set1_epi32(0x00ff00ff); | - |
170 | const __m128i constAlphaVector = _mm_set1_epi16(const_alpha); | - |
171 | for (int y = 0; y < h; ++y) { | - |
172 | BLEND_SOURCE_OVER_ARGB32_WITH_CONST_ALPHA_SSE2(dst, src, w, nullVector, half, one, colorMask, constAlphaVector) | - |
173 | dst = (quint32 *)(((uchar *) dst) + dbpl); | - |
174 | src = (const quint32 *)(((const uchar *) src) + sbpl); | - |
175 | } | - |
176 | } | - |
177 | } | - |
178 | | - |
179 | static inline void store_uint24_ssse3(uchar *dst, const uint *src, int len) | - |
180 | { | - |
181 | int i = 0; | - |
182 | | - |
183 | quint24 *dst24 = reinterpret_cast<quint24*>(dst); | - |
184 | | - |
185 | for (; i < len && (reinterpret_cast<quintptr>(dst24) & 0xf); ++i)TRUE | never evaluated | FALSE | never evaluated |
TRUE | never evaluated | FALSE | never evaluated |
| 0 |
186 | *dst24++ = quint24(*src++); never executed: *dst24++ = quint24(*src++); | 0 |
187 | | - |
188 | | - |
189 | const __m128i shuffleMask1 = _mm_setr_epi8(char(0x80), char(0x80), char(0x80), char(0x80), 2, 1, 0, 6, 5, 4, 10, 9, 8, 14, 13, 12); | - |
190 | const __m128i shuffleMask2 = _mm_setr_epi8(2, 1, 0, 6, 5, 4, 10, 9, 8, 14, 13, 12, char(0x80), char(0x80), char(0x80), char(0x80)); | - |
191 | | - |
192 | const __m128i *inVectorPtr = (const __m128i *)src; | - |
193 | __m128i *dstVectorPtr = (__m128i *)dst24; | - |
194 | | - |
195 | for (; i < (len - 15); i += 16) {TRUE | never evaluated | FALSE | never evaluated |
| 0 |
196 | | - |
197 | | - |
198 | __m128i srcVector1 = _mm_loadu_si128(inVectorPtr); | - |
199 | ++inVectorPtr; | - |
200 | __m128i srcVector2 = _mm_loadu_si128(inVectorPtr); | - |
201 | ++inVectorPtr; | - |
202 | __m128i outputVector1 = _mm_shuffle_epi8(srcVector1, shuffleMask1); | - |
203 | __m128i outputVector2 = _mm_shuffle_epi8(srcVector2, shuffleMask2); | - |
204 | __m128i outputVector = _mm_alignr_epi8(outputVector2, outputVector1, 4); | - |
205 | _mm_store_si128(dstVectorPtr, outputVector); | - |
206 | ++dstVectorPtr; | - |
207 | | - |
208 | srcVector1 = _mm_loadu_si128(inVectorPtr); | - |
209 | ++inVectorPtr; | - |
210 | outputVector1 = _mm_shuffle_epi8(srcVector2, shuffleMask1); | - |
211 | outputVector2 = _mm_shuffle_epi8(srcVector1, shuffleMask2); | - |
212 | outputVector = _mm_alignr_epi8(outputVector2, outputVector1, 8); | - |
213 | _mm_store_si128(dstVectorPtr, outputVector); | - |
214 | ++dstVectorPtr; | - |
215 | | - |
216 | srcVector2 = _mm_loadu_si128(inVectorPtr); | - |
217 | ++inVectorPtr; | - |
218 | outputVector1 = _mm_shuffle_epi8(srcVector1, shuffleMask1); | - |
219 | outputVector2 = _mm_shuffle_epi8(srcVector2, shuffleMask2); | - |
220 | outputVector = _mm_alignr_epi8(outputVector2, outputVector1, 12); | - |
221 | _mm_store_si128(dstVectorPtr, outputVector); | - |
222 | ++dstVectorPtr; | - |
223 | } never executed: end of block | 0 |
224 | dst24 = reinterpret_cast<quint24*>(dstVectorPtr); | - |
225 | src = reinterpret_cast<const uint*>(inVectorPtr); | - |
226 | | - |
227 | for (; i < len; ++i)TRUE | never evaluated | FALSE | never evaluated |
| 0 |
228 | *dst24++ = quint24(*src++); never executed: *dst24++ = quint24(*src++); | 0 |
229 | } never executed: end of block | 0 |
230 | | - |
231 | void QT_FASTCALL storePixelsBPP24_ssse3(uchar *dest, const uint *src, int index, int count) | - |
232 | { | - |
233 | store_uint24_ssse3(dest + index * 3, src, count); | - |
234 | } never executed: end of block | 0 |
235 | | - |
236 | extern void QT_FASTCALL qt_convert_rgb888_to_rgb32_ssse3(quint32 *dst, const uchar *src, int len); | - |
237 | | - |
238 | const uint * QT_FASTCALL qt_fetchUntransformed_888_ssse3(uint *buffer, const Operator *, const QSpanData *data, | - |
239 | int y, int x, int length) | - |
240 | { | - |
241 | const uchar *line = data->texture.scanLine(y) + x * 3; | - |
242 | qt_convert_rgb888_to_rgb32_ssse3(buffer, line, length); | - |
243 | return buffer; never executed: return buffer; | 0 |
244 | } | - |
245 | | - |
246 | QT_END_NAMESPACE | - |
247 | | - |
248 | #endif // QT_COMPILER_SUPPORTS_SSSE3 | - |
| | |