painting/qdrawhelper_ssse3.cpp

Source codeSwitch to Preprocessed file
LineSource CodeCoverage
1/**************************************************************************** -
2** -
3** Copyright (C) 2013 Digia Plc and/or its subsidiary(-ies). -
4** Contact: http://www.qt-project.org/legal -
5** -
6** This file is part of the QtGui module of the Qt Toolkit. -
7** -
8** $QT_BEGIN_LICENSE:LGPL$ -
9** Commercial License Usage -
10** Licensees holding valid commercial Qt licenses may use this file in -
11** accordance with the commercial license agreement provided with the -
12** Software or, alternatively, in accordance with the terms contained in -
13** a written agreement between you and Digia. For licensing terms and -
14** conditions see http://qt.digia.com/licensing. For further information -
15** use the contact form at http://qt.digia.com/contact-us. -
16** -
17** GNU Lesser General Public License Usage -
18** Alternatively, this file may be used under the terms of the GNU Lesser -
19** General Public License version 2.1 as published by the Free Software -
20** Foundation and appearing in the file LICENSE.LGPL included in the -
21** packaging of this file. Please review the following information to -
22** ensure the GNU Lesser General Public License version 2.1 requirements -
23** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html. -
24** -
25** In addition, as a special exception, Digia gives you certain additional -
26** rights. These rights are described in the Digia Qt LGPL Exception -
27** version 1.1, included in the file LGPL_EXCEPTION.txt in this package. -
28** -
29** GNU General Public License Usage -
30** Alternatively, this file may be used under the terms of the GNU -
31** General Public License version 3.0 as published by the Free Software -
32** Foundation and appearing in the file LICENSE.GPL included in the -
33** packaging of this file. Please review the following information to -
34** ensure the GNU General Public License version 3.0 requirements will be -
35** met: http://www.gnu.org/copyleft/gpl.html. -
36** -
37** -
38** $QT_END_LICENSE$ -
39** -
40****************************************************************************/ -
41 -
42#include <private/qdrawhelper_x86_p.h> -
43 -
44#ifdef QT_COMPILER_SUPPORTS_SSSE3 -
45 -
46#include <private/qdrawingprimitive_sse2_p.h> -
47 -
48QT_BEGIN_NAMESPACE -
49 -
50inline static void blend_pixel(quint32 &dst, const quint32 src) -
51{ -
52 if (src >= 0xff000000)
evaluated: src >= 0xff000000
TRUEFALSE
yes
Evaluation Count:1646
yes
Evaluation Count:2877
1646-2877
53 dst = src;
executed: dst = src;
Execution Count:1646
1646
54 else if (src != 0)
evaluated: src != 0
TRUEFALSE
yes
Evaluation Count:605
yes
Evaluation Count:2272
605-2272
55 dst = src + BYTE_MUL(dst, qAlpha(~src));
executed: dst = src + BYTE_MUL(dst, qAlpha(~src));
Execution Count:605
605
56} -
57 -
58 -
59/* The instruction palignr uses direct arguments, so we have to generate the code fo the different -
60 shift (4, 8, 12). Checking the alignment inside the loop is unfortunatelly way too slow. -
61 */ -
62#define BLENDING_LOOP(palignrOffset, length)\ -
63 for (; x-minusOffsetToAlignSrcOn16Bytes < length-7; x += 4) { \ -
64 const __m128i srcVectorLastLoaded = _mm_load_si128((__m128i *)&src[x - minusOffsetToAlignSrcOn16Bytes + 4]);\ -
65 const __m128i srcVector = _mm_alignr_epi8(srcVectorLastLoaded, srcVectorPrevLoaded, palignrOffset); \ -
66 const __m128i srcVectorAlpha = _mm_and_si128(srcVector, alphaMask); \ -
67 if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, alphaMask)) == 0xffff) { \ -
68 _mm_store_si128((__m128i *)&dst[x], srcVector); \ -
69 } else if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, nullVector)) != 0xffff) { \ -
70 __m128i alphaChannel = _mm_shuffle_epi8(srcVector, alphaShuffleMask); \ -
71 alphaChannel = _mm_sub_epi16(one, alphaChannel); \ -
72 const __m128i dstVector = _mm_load_si128((__m128i *)&dst[x]); \ -
73 __m128i destMultipliedByOneMinusAlpha; \ -
74 BYTE_MUL_SSE2(destMultipliedByOneMinusAlpha, dstVector, alphaChannel, colorMask, half); \ -
75 const __m128i result = _mm_add_epi8(srcVector, destMultipliedByOneMinusAlpha); \ -
76 _mm_store_si128((__m128i *)&dst[x], result); \ -
77 } \ -
78 srcVectorPrevLoaded = srcVectorLastLoaded;\ -
79 } -
80 -
81 -
82// Basically blend src over dst with the const alpha defined as constAlphaVector. -
83// nullVector, half, one, colorMask are constant across the whole image/texture, and should be defined as: -
84//const __m128i nullVector = _mm_set1_epi32(0); -
85//const __m128i half = _mm_set1_epi16(0x80); -
86//const __m128i one = _mm_set1_epi16(0xff); -
87//const __m128i colorMask = _mm_set1_epi32(0x00ff00ff); -
88//const __m128i alphaMask = _mm_set1_epi32(0xff000000); -
89// -
90// The computation being done is: -
91// result = s + d * (1-alpha) -
92// with shortcuts if fully opaque or fully transparent. -
93#define BLEND_SOURCE_OVER_ARGB32_SSSE3(dst, src, length, nullVector, half, one, colorMask, alphaMask) { \ -
94 int x = 0; \ -
95\ -
96 /* First, get dst aligned. */ \ -
97 ALIGNMENT_PROLOGUE_16BYTES(dst, x, length) { \ -
98 blend_pixel(dst[x], src[x]); \ -
99 } \ -
100\ -
101 const int minusOffsetToAlignSrcOn16Bytes = (reinterpret_cast<quintptr>(&(src[x])) >> 2) & 0x3;\ -
102\ -
103 if (!minusOffsetToAlignSrcOn16Bytes) {\ -
104 /* src is aligned, usual algorithm but with aligned operations.\ -
105 See the SSE2 version for more documentation on the algorithm itself. */\ -
106 const __m128i alphaShuffleMask = _mm_set_epi8(0xff,15,0xff,15,0xff,11,0xff,11,0xff,7,0xff,7,0xff,3,0xff,3);\ -
107 for (; x < length-3; x += 4) { \ -
108 const __m128i srcVector = _mm_load_si128((__m128i *)&src[x]); \ -
109 const __m128i srcVectorAlpha = _mm_and_si128(srcVector, alphaMask); \ -
110 if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, alphaMask)) == 0xffff) { \ -
111 _mm_store_si128((__m128i *)&dst[x], srcVector); \ -
112 } else if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, nullVector)) != 0xffff) { \ -
113 __m128i alphaChannel = _mm_shuffle_epi8(srcVector, alphaShuffleMask); \ -
114 alphaChannel = _mm_sub_epi16(one, alphaChannel); \ -
115 const __m128i dstVector = _mm_load_si128((__m128i *)&dst[x]); \ -
116 __m128i destMultipliedByOneMinusAlpha; \ -
117 BYTE_MUL_SSE2(destMultipliedByOneMinusAlpha, dstVector, alphaChannel, colorMask, half); \ -
118 const __m128i result = _mm_add_epi8(srcVector, destMultipliedByOneMinusAlpha); \ -
119 _mm_store_si128((__m128i *)&dst[x], result); \ -
120 } \ -
121 } /* end for() */\ -
122 } else if ((length - x) >= 8) {\ -
123 /* We use two vectors to extract the src: prevLoaded for the first pixels, lastLoaded for the current pixels. */\ -
124 __m128i srcVectorPrevLoaded = _mm_load_si128((__m128i *)&src[x - minusOffsetToAlignSrcOn16Bytes]);\ -
125 const int palignrOffset = minusOffsetToAlignSrcOn16Bytes << 2;\ -
126\ -
127 const __m128i alphaShuffleMask = _mm_set_epi8(0xff,15,0xff,15,0xff,11,0xff,11,0xff,7,0xff,7,0xff,3,0xff,3);\ -
128 switch (palignrOffset) {\ -
129 case 4:\ -
130 BLENDING_LOOP(4, length)\ -
131 break;\ -
132 case 8:\ -
133 BLENDING_LOOP(8, length)\ -
134 break;\ -
135 case 12:\ -
136 BLENDING_LOOP(12, length)\ -
137 break;\ -
138 }\ -
139 }\ -
140 for (; x < length; ++x) \ -
141 blend_pixel(dst[x], src[x]); \ -
142} -
143 -
144void qt_blend_argb32_on_argb32_ssse3(uchar *destPixels, int dbpl, -
145 const uchar *srcPixels, int sbpl, -
146 int w, int h, -
147 int const_alpha) -
148{ -
149 const quint32 *src = (const quint32 *) srcPixels;
executed (the execution status of this line is deduced): const quint32 *src = (const quint32 *) srcPixels;
-
150 quint32 *dst = (quint32 *) destPixels;
executed (the execution status of this line is deduced): quint32 *dst = (quint32 *) destPixels;
-
151 if (const_alpha == 256) {
evaluated: const_alpha == 256
TRUEFALSE
yes
Evaluation Count:44
yes
Evaluation Count:7
7-44
152 const __m128i alphaMask = _mm_set1_epi32(0xff000000);
executed (the execution status of this line is deduced): const __m128i alphaMask = _mm_set1_epi32(0xff000000);
-
153 const __m128i nullVector = _mm_setzero_si128();
executed (the execution status of this line is deduced): const __m128i nullVector = _mm_setzero_si128();
-
154 const __m128i half = _mm_set1_epi16(0x80);
executed (the execution status of this line is deduced): const __m128i half = _mm_set1_epi16(0x80);
-
155 const __m128i one = _mm_set1_epi16(0xff);
executed (the execution status of this line is deduced): const __m128i one = _mm_set1_epi16(0xff);
-
156 const __m128i colorMask = _mm_set1_epi32(0x00ff00ff);
executed (the execution status of this line is deduced): const __m128i colorMask = _mm_set1_epi32(0x00ff00ff);
-
157 -
158 for (int y = 0; y < h; ++y) {
evaluated: y < h
TRUEFALSE
yes
Evaluation Count:4052
yes
Evaluation Count:44
44-4052
159 BLEND_SOURCE_OVER_ARGB32_SSSE3(dst, src, w, nullVector, half, one, colorMask, alphaMask);
executed: }
Execution Count:1481
executed: }
Execution Count:81444
executed: }
Execution Count:802
executed: }
Execution Count:3242
executed: }
Execution Count:874
executed: }
Execution Count:1479
executed: }
Execution Count:6444
executed: break;
Execution Count:534
executed: }
Execution Count:304
executed: }
Execution Count:371
executed: }
Execution Count:860
executed: break;
Execution Count:166
executed: }
Execution Count:144
executed: }
Execution Count:323
executed: }
Execution Count:588
executed: break;
Execution Count:110
executed: }
Execution Count:810
executed: blend_pixel(dst[x], src[x]);
Execution Count:3042
evaluated: _mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, nullVector)) != 0xffff
TRUEFALSE
yes
Evaluation Count:802
yes
Evaluation Count:70109
partially evaluated: (w - x) >= 8
TRUEFALSE
yes
Evaluation Count:810
no
Evaluation Count:0
evaluated: _mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, nullVector)) != 0xffff
TRUEFALSE
yes
Evaluation Count:1479
yes
Evaluation Count:4091
evaluated: _mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, nullVector)) != 0xffff
TRUEFALSE
yes
Evaluation Count:371
yes
Evaluation Count:185
evaluated: _mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, nullVector)) != 0xffff
TRUEFALSE
yes
Evaluation Count:323
yes
Evaluation Count:121
evaluated: !minusOffsetToAlignSrcOn16Bytes
TRUEFALSE
yes
Evaluation Count:3242
yes
Evaluation Count:810
evaluated: _mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, alphaMask)) == 0xffff
TRUEFALSE
yes
Evaluation Count:81444
yes
Evaluation Count:70911
evaluated: _mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, alphaMask)) == 0xffff
TRUEFALSE
yes
Evaluation Count:874
yes
Evaluation Count:5570
evaluated: _mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, alphaMask)) == 0xffff
TRUEFALSE
yes
Evaluation Count:304
yes
Evaluation Count:556
evaluated: _mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, alphaMask)) == 0xffff
TRUEFALSE
yes
Evaluation Count:144
yes
Evaluation Count:444
evaluated: x < static_cast<int>(qMin(static_cast<quintptr>(w), ((4 - ((reinterpret_cast<quintptr>(dst) >> 2) & 0x3)) & 0x3)))
TRUEFALSE
yes
Evaluation Count:1481
yes
Evaluation Count:4052
evaluated: x < w-3
TRUEFALSE
yes
Evaluation Count:152355
yes
Evaluation Count:3242
evaluated: x-minusOffsetToAlignSrcOn16Bytes < w-7
TRUEFALSE
yes
Evaluation Count:6444
yes
Evaluation Count:534
evaluated: x-minusOffsetToAlignSrcOn16Bytes < w-7
TRUEFALSE
yes
Evaluation Count:860
yes
Evaluation Count:166
evaluated: x-minusOffsetToAlignSrcOn16Bytes < w-7
TRUEFALSE
yes
Evaluation Count:588
yes
Evaluation Count:110
evaluated: x < w
TRUEFALSE
yes
Evaluation Count:3042
yes
Evaluation Count:4052
0-152355
160 dst = (quint32 *)(((uchar *) dst) + dbpl);
executed (the execution status of this line is deduced): dst = (quint32 *)(((uchar *) dst) + dbpl);
-
161 src = (const quint32 *)(((const uchar *) src) + sbpl);
executed (the execution status of this line is deduced): src = (const quint32 *)(((const uchar *) src) + sbpl);
-
162 }
executed: }
Execution Count:4052
4052
163 } else if (const_alpha != 0) {
executed: }
Execution Count:44
partially evaluated: const_alpha != 0
TRUEFALSE
yes
Evaluation Count:7
no
Evaluation Count:0
0-44
164 // dest = (s + d * sia) * ca + d * cia -
165 // = s * ca + d * (sia * ca + cia) -
166 // = s * ca + d * (1 - sa*ca) -
167 const_alpha = (const_alpha * 255) >> 8;
executed (the execution status of this line is deduced): const_alpha = (const_alpha * 255) >> 8;
-
168 const __m128i nullVector = _mm_setzero_si128();
executed (the execution status of this line is deduced): const __m128i nullVector = _mm_setzero_si128();
-
169 const __m128i half = _mm_set1_epi16(0x80);
executed (the execution status of this line is deduced): const __m128i half = _mm_set1_epi16(0x80);
-
170 const __m128i one = _mm_set1_epi16(0xff);
executed (the execution status of this line is deduced): const __m128i one = _mm_set1_epi16(0xff);
-
171 const __m128i colorMask = _mm_set1_epi32(0x00ff00ff);
executed (the execution status of this line is deduced): const __m128i colorMask = _mm_set1_epi32(0x00ff00ff);
-
172 const __m128i constAlphaVector = _mm_set1_epi16(const_alpha);
executed (the execution status of this line is deduced): const __m128i constAlphaVector = _mm_set1_epi16(const_alpha);
-
173 for (int y = 0; y < h; ++y) {
evaluated: y < h
TRUEFALSE
yes
Evaluation Count:780
yes
Evaluation Count:7
7-780
174 BLEND_SOURCE_OVER_ARGB32_WITH_CONST_ALPHA_SSE2(dst, src, w, nullVector, half, one, colorMask, constAlphaVector)
executed: }
Execution Count:2304
executed: }
Execution Count:2304
executed: }
Execution Count:23076
executed: }
Execution Count:23076
executed: }
Execution Count:2304
executed: }
Execution Count:2304
partially evaluated: s != 0
TRUEFALSE
yes
Evaluation Count:2304
no
Evaluation Count:0
partially evaluated: _mm_movemask_epi8(_mm_cmpeq_epi32(srcVector, nullVector)) != 0xffff
TRUEFALSE
yes
Evaluation Count:23076
no
Evaluation Count:0
partially evaluated: s != 0
TRUEFALSE
yes
Evaluation Count:2304
no
Evaluation Count:0
evaluated: x < static_cast<int>(qMin(static_cast<quintptr>(w), ((4 - ((reinterpret_cast<quintptr>(dst) >> 2) & 0x3)) & 0x3)))
TRUEFALSE
yes
Evaluation Count:2304
yes
Evaluation Count:780
evaluated: x < w-3
TRUEFALSE
yes
Evaluation Count:23076
yes
Evaluation Count:780
evaluated: x < w
TRUEFALSE
yes
Evaluation Count:2304
yes
Evaluation Count:780
0-23076
175 dst = (quint32 *)(((uchar *) dst) + dbpl);
executed (the execution status of this line is deduced): dst = (quint32 *)(((uchar *) dst) + dbpl);
-
176 src = (const quint32 *)(((const uchar *) src) + sbpl);
executed (the execution status of this line is deduced): src = (const quint32 *)(((const uchar *) src) + sbpl);
-
177 }
executed: }
Execution Count:780
780
178 }
executed: }
Execution Count:7
7
179} -
180 -
181QT_END_NAMESPACE -
182 -
183#endif // QT_COMPILER_SUPPORTS_SSSE3 -
184 -
Source codeSwitch to Preprocessed file

Generated by Squish Coco Non-Commercial