Line | Source Code | Coverage |
---|
1 | /**************************************************************************** | - |
2 | ** | - |
3 | ** Copyright (C) 2013 Digia Plc and/or its subsidiary(-ies). | - |
4 | ** Contact: http://www.qt-project.org/legal | - |
5 | ** | - |
6 | ** This file is part of the QtGui module of the Qt Toolkit. | - |
7 | ** | - |
8 | ** $QT_BEGIN_LICENSE:LGPL$ | - |
9 | ** Commercial License Usage | - |
10 | ** Licensees holding valid commercial Qt licenses may use this file in | - |
11 | ** accordance with the commercial license agreement provided with the | - |
12 | ** Software or, alternatively, in accordance with the terms contained in | - |
13 | ** a written agreement between you and Digia. For licensing terms and | - |
14 | ** conditions see http://qt.digia.com/licensing. For further information | - |
15 | ** use the contact form at http://qt.digia.com/contact-us. | - |
16 | ** | - |
17 | ** GNU Lesser General Public License Usage | - |
18 | ** Alternatively, this file may be used under the terms of the GNU Lesser | - |
19 | ** General Public License version 2.1 as published by the Free Software | - |
20 | ** Foundation and appearing in the file LICENSE.LGPL included in the | - |
21 | ** packaging of this file. Please review the following information to | - |
22 | ** ensure the GNU Lesser General Public License version 2.1 requirements | - |
23 | ** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html. | - |
24 | ** | - |
25 | ** In addition, as a special exception, Digia gives you certain additional | - |
26 | ** rights. These rights are described in the Digia Qt LGPL Exception | - |
27 | ** version 1.1, included in the file LGPL_EXCEPTION.txt in this package. | - |
28 | ** | - |
29 | ** GNU General Public License Usage | - |
30 | ** Alternatively, this file may be used under the terms of the GNU | - |
31 | ** General Public License version 3.0 as published by the Free Software | - |
32 | ** Foundation and appearing in the file LICENSE.GPL included in the | - |
33 | ** packaging of this file. Please review the following information to | - |
34 | ** ensure the GNU General Public License version 3.0 requirements will be | - |
35 | ** met: http://www.gnu.org/copyleft/gpl.html. | - |
36 | ** | - |
37 | ** | - |
38 | ** $QT_END_LICENSE$ | - |
39 | ** | - |
40 | ****************************************************************************/ | - |
41 | | - |
42 | #include <private/qdrawhelper_x86_p.h> | - |
43 | | - |
44 | #ifdef QT_COMPILER_SUPPORTS_SSE2 | - |
45 | | - |
46 | #include <private/qdrawingprimitive_sse2_p.h> | - |
47 | #include <private/qpaintengine_raster_p.h> | - |
48 | | - |
49 | QT_BEGIN_NAMESPACE | - |
50 | | - |
51 | #ifndef QDRAWHELPER_AVX | - |
52 | // in AVX mode, we'll use the SSSE3 code | - |
53 | void qt_blend_argb32_on_argb32_sse2(uchar *destPixels, int dbpl, | - |
54 | const uchar *srcPixels, int sbpl, | - |
55 | int w, int h, | - |
56 | int const_alpha) | - |
57 | { | - |
58 | const quint32 *src = (const quint32 *) srcPixels; never executed (the execution status of this line is deduced): const quint32 *src = (const quint32 *) srcPixels; | - |
59 | quint32 *dst = (quint32 *) destPixels; never executed (the execution status of this line is deduced): quint32 *dst = (quint32 *) destPixels; | - |
60 | if (const_alpha == 256) { never evaluated: const_alpha == 256 | 0 |
61 | const __m128i alphaMask = _mm_set1_epi32(0xff000000); never executed (the execution status of this line is deduced): const __m128i alphaMask = _mm_set1_epi32(0xff000000); | - |
62 | const __m128i nullVector = _mm_set1_epi32(0); never executed (the execution status of this line is deduced): const __m128i nullVector = _mm_set1_epi32(0); | - |
63 | const __m128i half = _mm_set1_epi16(0x80); never executed (the execution status of this line is deduced): const __m128i half = _mm_set1_epi16(0x80); | - |
64 | const __m128i one = _mm_set1_epi16(0xff); never executed (the execution status of this line is deduced): const __m128i one = _mm_set1_epi16(0xff); | - |
65 | const __m128i colorMask = _mm_set1_epi32(0x00ff00ff); never executed (the execution status of this line is deduced): const __m128i colorMask = _mm_set1_epi32(0x00ff00ff); | - |
66 | for (int y = 0; y < h; ++y) { | 0 |
67 | BLEND_SOURCE_OVER_ARGB32_SSE2(dst, src, w, nullVector, half, one, colorMask, alphaMask); never executed: dst[x] = s; never executed: dst[x] = s + BYTE_MUL(dst[x], qAlpha(~s)); never executed: } never executed: } never executed: dst[x] = s; never executed: dst[x] = s + BYTE_MUL(dst[x], qAlpha(~s)); never evaluated: s != 0 never evaluated: _mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, nullVector)) != 0xffff never evaluated: s != 0 never evaluated: s >= 0xff000000 never evaluated: _mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, alphaMask)) == 0xffff never evaluated: s >= 0xff000000 never evaluated: x < static_cast<int>(qMin(static_cast<quintptr>(w), ((4 - ((reinterpret_cast<quintptr>(dst) >> 2) & 0x3)) & 0x3))) never evaluated: x < w-3 never evaluated: x < w | 0 |
68 | dst = (quint32 *)(((uchar *) dst) + dbpl); never executed (the execution status of this line is deduced): dst = (quint32 *)(((uchar *) dst) + dbpl); | - |
69 | src = (const quint32 *)(((const uchar *) src) + sbpl); never executed (the execution status of this line is deduced): src = (const quint32 *)(((const uchar *) src) + sbpl); | - |
70 | } | 0 |
71 | } else if (const_alpha != 0) { never executed: } never evaluated: const_alpha != 0 | 0 |
72 | // dest = (s + d * sia) * ca + d * cia | - |
73 | // = s * ca + d * (sia * ca + cia) | - |
74 | // = s * ca + d * (1 - sa*ca) | - |
75 | const_alpha = (const_alpha * 255) >> 8; never executed (the execution status of this line is deduced): const_alpha = (const_alpha * 255) >> 8; | - |
76 | const __m128i nullVector = _mm_set1_epi32(0); never executed (the execution status of this line is deduced): const __m128i nullVector = _mm_set1_epi32(0); | - |
77 | const __m128i half = _mm_set1_epi16(0x80); never executed (the execution status of this line is deduced): const __m128i half = _mm_set1_epi16(0x80); | - |
78 | const __m128i one = _mm_set1_epi16(0xff); never executed (the execution status of this line is deduced): const __m128i one = _mm_set1_epi16(0xff); | - |
79 | const __m128i colorMask = _mm_set1_epi32(0x00ff00ff); never executed (the execution status of this line is deduced): const __m128i colorMask = _mm_set1_epi32(0x00ff00ff); | - |
80 | const __m128i constAlphaVector = _mm_set1_epi16(const_alpha); never executed (the execution status of this line is deduced): const __m128i constAlphaVector = _mm_set1_epi16(const_alpha); | - |
81 | for (int y = 0; y < h; ++y) { | 0 |
82 | BLEND_SOURCE_OVER_ARGB32_WITH_CONST_ALPHA_SSE2(dst, src, w, nullVector, half, one, colorMask, constAlphaVector) never executed: } never executed: } never executed: } never executed: } never executed: } never executed: } never evaluated: s != 0 never evaluated: _mm_movemask_epi8(_mm_cmpeq_epi32(srcVector, nullVector)) != 0xffff never evaluated: s != 0 never evaluated: x < static_cast<int>(qMin(static_cast<quintptr>(w), ((4 - ((reinterpret_cast<quintptr>(dst) >> 2) & 0x3)) & 0x3))) never evaluated: x < w-3 never evaluated: x < w | 0 |
83 | dst = (quint32 *)(((uchar *) dst) + dbpl); never executed (the execution status of this line is deduced): dst = (quint32 *)(((uchar *) dst) + dbpl); | - |
84 | src = (const quint32 *)(((const uchar *) src) + sbpl); never executed (the execution status of this line is deduced): src = (const quint32 *)(((const uchar *) src) + sbpl); | - |
85 | } | 0 |
86 | } | 0 |
87 | } | - |
88 | #endif | - |
89 | | - |
90 | // qblendfunctions.cpp | - |
91 | void qt_blend_rgb32_on_rgb32(uchar *destPixels, int dbpl, | - |
92 | const uchar *srcPixels, int sbpl, | - |
93 | int w, int h, | - |
94 | int const_alpha); | - |
95 | | - |
96 | void qt_blend_rgb32_on_rgb32_sse2(uchar *destPixels, int dbpl, | - |
97 | const uchar *srcPixels, int sbpl, | - |
98 | int w, int h, | - |
99 | int const_alpha) | - |
100 | { | - |
101 | const quint32 *src = (const quint32 *) srcPixels; executed (the execution status of this line is deduced): const quint32 *src = (const quint32 *) srcPixels; | - |
102 | quint32 *dst = (quint32 *) destPixels; executed (the execution status of this line is deduced): quint32 *dst = (quint32 *) destPixels; | - |
103 | if (const_alpha != 256) { evaluated: const_alpha != 256 yes Evaluation Count:9 | yes Evaluation Count:40 |
| 9-40 |
104 | if (const_alpha != 0) { partially evaluated: const_alpha != 0 yes Evaluation Count:9 | no Evaluation Count:0 |
| 0-9 |
105 | const __m128i nullVector = _mm_set1_epi32(0); executed (the execution status of this line is deduced): const __m128i nullVector = _mm_set1_epi32(0); | - |
106 | const __m128i half = _mm_set1_epi16(0x80); executed (the execution status of this line is deduced): const __m128i half = _mm_set1_epi16(0x80); | - |
107 | const __m128i colorMask = _mm_set1_epi32(0x00ff00ff); executed (the execution status of this line is deduced): const __m128i colorMask = _mm_set1_epi32(0x00ff00ff); | - |
108 | | - |
109 | const_alpha = (const_alpha * 255) >> 8; executed (the execution status of this line is deduced): const_alpha = (const_alpha * 255) >> 8; | - |
110 | int one_minus_const_alpha = 255 - const_alpha; executed (the execution status of this line is deduced): int one_minus_const_alpha = 255 - const_alpha; | - |
111 | const __m128i constAlphaVector = _mm_set1_epi16(const_alpha); executed (the execution status of this line is deduced): const __m128i constAlphaVector = _mm_set1_epi16(const_alpha); | - |
112 | const __m128i oneMinusConstAlpha = _mm_set1_epi16(one_minus_const_alpha); executed (the execution status of this line is deduced): const __m128i oneMinusConstAlpha = _mm_set1_epi16(one_minus_const_alpha); | - |
113 | for (int y = 0; y < h; ++y) { evaluated: y < h yes Evaluation Count:958 | yes Evaluation Count:9 |
| 9-958 |
114 | int x = 0; executed (the execution status of this line is deduced): int x = 0; | - |
115 | | - |
116 | // First, align dest to 16 bytes: | - |
117 | ALIGNMENT_PROLOGUE_16BYTES(dst, x, w) { evaluated: x < static_cast<int>(qMin(static_cast<quintptr>(w), ((4 - ((reinterpret_cast<quintptr>(dst) >> 2) & 0x3)) & 0x3))) yes Evaluation Count:2354 | yes Evaluation Count:958 |
| 958-2354 |
118 | dst[x] = INTERPOLATE_PIXEL_255(src[x], const_alpha, dst[x], one_minus_const_alpha); executed (the execution status of this line is deduced): dst[x] = INTERPOLATE_PIXEL_255(src[x], const_alpha, dst[x], one_minus_const_alpha); | - |
119 | } executed: } Execution Count:2354 | 2354 |
120 | | - |
121 | for (; x < w-3; x += 4) { evaluated: x < w-3 yes Evaluation Count:31868 | yes Evaluation Count:958 |
| 958-31868 |
122 | __m128i srcVector = _mm_loadu_si128((__m128i *)&src[x]); executed (the execution status of this line is deduced): __m128i srcVector = _mm_loadu_si128((__m128i *)&src[x]); | - |
123 | if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVector, nullVector)) != 0xffff) { partially evaluated: _mm_movemask_epi8(_mm_cmpeq_epi32(srcVector, nullVector)) != 0xffff yes Evaluation Count:31868 | no Evaluation Count:0 |
| 0-31868 |
124 | const __m128i dstVector = _mm_load_si128((__m128i *)&dst[x]); executed (the execution status of this line is deduced): const __m128i dstVector = _mm_load_si128((__m128i *)&dst[x]); | - |
125 | __m128i result; executed (the execution status of this line is deduced): __m128i result; | - |
126 | INTERPOLATE_PIXEL_255_SSE2(result, srcVector, dstVector, constAlphaVector, oneMinusConstAlpha, colorMask, half); executed (the execution status of this line is deduced): { __m128i srcVectorAG = _mm_srli_epi16(srcVector, 8); __m128i dstVectorAG = _mm_srli_epi16(dstVector, 8); __m128i srcVectorAGalpha = _mm_mullo_epi16(srcVectorAG, constAlphaVector); __m128i dstVectorAGoneMinusAlphalpha = _mm_mullo_epi16(dstVectorAG, oneMinusConstAlpha); __m128i finalAG = _mm_add_epi16(srcVectorAGalpha, dstVectorAGoneMinusAlphalpha); finalAG = _mm_add_epi16(finalAG, _mm_srli_epi16(finalAG, 8)); finalAG = _mm_add_epi16(finalAG, half); finalAG = _mm_andnot_si128(colorMask, finalAG); __m128i srcVectorRB = _mm_and_si128(srcVector, colorMask); __m128i dstVectorRB = _mm_and_si128(dstVector, colorMask); __m128i srcVectorRBalpha = _mm_mullo_epi16(srcVectorRB, constAlphaVector); __m128i dstVectorRBoneMinusAlphalpha = _mm_mullo_epi16(dstVectorRB, oneMinusConstAlpha); __m128i finalRB = _mm_add_epi16(srcVectorRBalpha, dstVectorRBoneMinusAlphalpha); finalRB = _mm_add_epi16(finalRB, _mm_srli_epi16(finalRB, 8)); finalRB = _mm_add_epi16(finalRB, half); finalRB = _mm_srli_epi16(finalRB, 8); result = _mm_or_si128(finalAG, finalRB); }; | - |
127 | _mm_store_si128((__m128i *)&dst[x], result); executed (the execution status of this line is deduced): _mm_store_si128((__m128i *)&dst[x], result); | - |
128 | } executed: } Execution Count:31868 | 31868 |
129 | } executed: } Execution Count:31868 | 31868 |
130 | for (; x<w; ++x) { evaluated: x<w yes Evaluation Count:2354 | yes Evaluation Count:958 |
| 958-2354 |
131 | dst[x] = INTERPOLATE_PIXEL_255(src[x], const_alpha, dst[x], one_minus_const_alpha); executed (the execution status of this line is deduced): dst[x] = INTERPOLATE_PIXEL_255(src[x], const_alpha, dst[x], one_minus_const_alpha); | - |
132 | } executed: } Execution Count:2354 | 2354 |
133 | dst = (quint32 *)(((uchar *) dst) + dbpl); executed (the execution status of this line is deduced): dst = (quint32 *)(((uchar *) dst) + dbpl); | - |
134 | src = (const quint32 *)(((const uchar *) src) + sbpl); executed (the execution status of this line is deduced): src = (const quint32 *)(((const uchar *) src) + sbpl); | - |
135 | } executed: } Execution Count:958 | 958 |
136 | } executed: } Execution Count:9 | 9 |
137 | } else { executed: } Execution Count:9 | 9 |
138 | qt_blend_rgb32_on_rgb32(destPixels, dbpl, srcPixels, sbpl, w, h, const_alpha); executed (the execution status of this line is deduced): qt_blend_rgb32_on_rgb32(destPixels, dbpl, srcPixels, sbpl, w, h, const_alpha); | - |
139 | } executed: } Execution Count:40 | 40 |
140 | } | - |
141 | | - |
142 | void QT_FASTCALL comp_func_SourceOver_sse2(uint *destPixels, const uint *srcPixels, int length, uint const_alpha) | - |
143 | { | - |
144 | Q_ASSERT(const_alpha < 256); executed (the execution status of this line is deduced): qt_noop(); | - |
145 | | - |
146 | const quint32 *src = (const quint32 *) srcPixels; executed (the execution status of this line is deduced): const quint32 *src = (const quint32 *) srcPixels; | - |
147 | quint32 *dst = (quint32 *) destPixels; executed (the execution status of this line is deduced): quint32 *dst = (quint32 *) destPixels; | - |
148 | | - |
149 | const __m128i nullVector = _mm_set1_epi32(0); executed (the execution status of this line is deduced): const __m128i nullVector = _mm_set1_epi32(0); | - |
150 | const __m128i half = _mm_set1_epi16(0x80); executed (the execution status of this line is deduced): const __m128i half = _mm_set1_epi16(0x80); | - |
151 | const __m128i one = _mm_set1_epi16(0xff); executed (the execution status of this line is deduced): const __m128i one = _mm_set1_epi16(0xff); | - |
152 | const __m128i colorMask = _mm_set1_epi32(0x00ff00ff); executed (the execution status of this line is deduced): const __m128i colorMask = _mm_set1_epi32(0x00ff00ff); | - |
153 | if (const_alpha == 255) { evaluated: const_alpha == 255 yes Evaluation Count:1136722 | yes Evaluation Count:63433 |
| 63433-1136722 |
154 | const __m128i alphaMask = _mm_set1_epi32(0xff000000); executed (the execution status of this line is deduced): const __m128i alphaMask = _mm_set1_epi32(0xff000000); | - |
155 | BLEND_SOURCE_OVER_ARGB32_SSE2(dst, src, length, nullVector, half, one, colorMask, alphaMask); executed: dst[x] = s; Execution Count:11856 executed: dst[x] = s + BYTE_MUL(dst[x], qAlpha(~s)); Execution Count:56107 executed: } Execution Count:135056 executed: } Execution Count:2381974 executed: dst[x] = s; Execution Count:157247 executed: dst[x] = s + BYTE_MUL(dst[x], qAlpha(~s)); Execution Count:26316 evaluated: s != 0 yes Evaluation Count:56107 | yes Evaluation Count:11465 |
evaluated: _mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, nullVector)) != 0xffff yes Evaluation Count:2381974 | yes Evaluation Count:128158 |
evaluated: s != 0 yes Evaluation Count:26316 | yes Evaluation Count:150873 |
evaluated: s >= 0xff000000 yes Evaluation Count:11856 | yes Evaluation Count:67572 |
evaluated: _mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, alphaMask)) == 0xffff yes Evaluation Count:135056 | yes Evaluation Count:2510132 |
evaluated: s >= 0xff000000 yes Evaluation Count:157247 | yes Evaluation Count:177189 |
evaluated: x < static_cast<int>(qMin(static_cast<quintptr>(length), ((4 - ((reinterpret_cast<quintptr>(dst) >> 2) & 0x3)) & 0x3))) yes Evaluation Count:79428 | yes Evaluation Count:1136722 |
evaluated: x < length-3 yes Evaluation Count:2645188 | yes Evaluation Count:1136722 |
evaluated: x < length yes Evaluation Count:334436 | yes Evaluation Count:1136722 |
| 11465-2645188 |
156 | } else { executed: } Execution Count:1136722 | 1136722 |
157 | const __m128i constAlphaVector = _mm_set1_epi16(const_alpha); executed (the execution status of this line is deduced): const __m128i constAlphaVector = _mm_set1_epi16(const_alpha); | - |
158 | BLEND_SOURCE_OVER_ARGB32_WITH_CONST_ALPHA_SSE2(dst, src, length, nullVector, half, one, colorMask, constAlphaVector); executed: } Execution Count:39788 executed: } Execution Count:40704 executed: } Execution Count:1170552 executed: } Execution Count:1170552 executed: } Execution Count:144150 executed: } Execution Count:144461 evaluated: s != 0 yes Evaluation Count:39788 | yes Evaluation Count:916 |
partially evaluated: _mm_movemask_epi8(_mm_cmpeq_epi32(srcVector, nullVector)) != 0xffff yes Evaluation Count:1170552 | no Evaluation Count:0 |
evaluated: s != 0 yes Evaluation Count:144150 | yes Evaluation Count:311 |
evaluated: x < static_cast<int>(qMin(static_cast<quintptr>(length), ((4 - ((reinterpret_cast<quintptr>(dst) >> 2) & 0x3)) & 0x3))) yes Evaluation Count:40704 | yes Evaluation Count:63433 |
evaluated: x < length-3 yes Evaluation Count:1170552 | yes Evaluation Count:63433 |
evaluated: x < length yes Evaluation Count:144461 | yes Evaluation Count:63433 |
| 0-1170552 |
159 | } executed: } Execution Count:63433 | 63433 |
160 | } | - |
161 | | - |
162 | void QT_FASTCALL comp_func_Plus_sse2(uint *dst, const uint *src, int length, uint const_alpha) | - |
163 | { | - |
164 | int x = 0; never executed (the execution status of this line is deduced): int x = 0; | - |
165 | | - |
166 | if (const_alpha == 255) { never evaluated: const_alpha == 255 | 0 |
167 | // 1) Prologue: align destination on 16 bytes | - |
168 | ALIGNMENT_PROLOGUE_16BYTES(dst, x, length) never evaluated: x < static_cast<int>(qMin(static_cast<quintptr>(length), ((4 - ((reinterpret_cast<quintptr>(dst) >> 2) & 0x3)) & 0x3))) | 0 |
169 | dst[x] = comp_func_Plus_one_pixel(dst[x], src[x]); never executed: dst[x] = comp_func_Plus_one_pixel(dst[x], src[x]); | 0 |
170 | | - |
171 | // 2) composition with SSE2 | - |
172 | for (; x < length - 3; x += 4) { never evaluated: x < length - 3 | 0 |
173 | const __m128i srcVector = _mm_loadu_si128((__m128i *)&src[x]); never executed (the execution status of this line is deduced): const __m128i srcVector = _mm_loadu_si128((__m128i *)&src[x]); | - |
174 | const __m128i dstVector = _mm_load_si128((__m128i *)&dst[x]); never executed (the execution status of this line is deduced): const __m128i dstVector = _mm_load_si128((__m128i *)&dst[x]); | - |
175 | | - |
176 | const __m128i result = _mm_adds_epu8(srcVector, dstVector); never executed (the execution status of this line is deduced): const __m128i result = _mm_adds_epu8(srcVector, dstVector); | - |
177 | _mm_store_si128((__m128i *)&dst[x], result); never executed (the execution status of this line is deduced): _mm_store_si128((__m128i *)&dst[x], result); | - |
178 | } | 0 |
179 | | - |
180 | // 3) Epilogue: | - |
181 | for (; x < length; ++x) never evaluated: x < length | 0 |
182 | dst[x] = comp_func_Plus_one_pixel(dst[x], src[x]); never executed: dst[x] = comp_func_Plus_one_pixel(dst[x], src[x]); | 0 |
183 | } else { | 0 |
184 | const int one_minus_const_alpha = 255 - const_alpha; never executed (the execution status of this line is deduced): const int one_minus_const_alpha = 255 - const_alpha; | - |
185 | const __m128i constAlphaVector = _mm_set1_epi16(const_alpha); never executed (the execution status of this line is deduced): const __m128i constAlphaVector = _mm_set1_epi16(const_alpha); | - |
186 | const __m128i oneMinusConstAlpha = _mm_set1_epi16(one_minus_const_alpha); never executed (the execution status of this line is deduced): const __m128i oneMinusConstAlpha = _mm_set1_epi16(one_minus_const_alpha); | - |
187 | | - |
188 | // 1) Prologue: align destination on 16 bytes | - |
189 | ALIGNMENT_PROLOGUE_16BYTES(dst, x, length) never evaluated: x < static_cast<int>(qMin(static_cast<quintptr>(length), ((4 - ((reinterpret_cast<quintptr>(dst) >> 2) & 0x3)) & 0x3))) | 0 |
190 | dst[x] = comp_func_Plus_one_pixel_const_alpha(dst[x], src[x], const_alpha, one_minus_const_alpha); never executed: dst[x] = comp_func_Plus_one_pixel_const_alpha(dst[x], src[x], const_alpha, one_minus_const_alpha); | 0 |
191 | | - |
192 | const __m128i half = _mm_set1_epi16(0x80); never executed (the execution status of this line is deduced): const __m128i half = _mm_set1_epi16(0x80); | - |
193 | const __m128i colorMask = _mm_set1_epi32(0x00ff00ff); never executed (the execution status of this line is deduced): const __m128i colorMask = _mm_set1_epi32(0x00ff00ff); | - |
194 | // 2) composition with SSE2 | - |
195 | for (; x < length - 3; x += 4) { never evaluated: x < length - 3 | 0 |
196 | const __m128i srcVector = _mm_loadu_si128((__m128i *)&src[x]); never executed (the execution status of this line is deduced): const __m128i srcVector = _mm_loadu_si128((__m128i *)&src[x]); | - |
197 | const __m128i dstVector = _mm_load_si128((__m128i *)&dst[x]); never executed (the execution status of this line is deduced): const __m128i dstVector = _mm_load_si128((__m128i *)&dst[x]); | - |
198 | | - |
199 | __m128i result = _mm_adds_epu8(srcVector, dstVector); never executed (the execution status of this line is deduced): __m128i result = _mm_adds_epu8(srcVector, dstVector); | - |
200 | INTERPOLATE_PIXEL_255_SSE2(result, result, dstVector, constAlphaVector, oneMinusConstAlpha, colorMask, half) never executed (the execution status of this line is deduced): { __m128i srcVectorAG = _mm_srli_epi16(result, 8); __m128i dstVectorAG = _mm_srli_epi16(dstVector, 8); __m128i srcVectorAGalpha = _mm_mullo_epi16(srcVectorAG, constAlphaVector); __m128i dstVectorAGoneMinusAlphalpha = _mm_mullo_epi16(dstVectorAG, oneMinusConstAlpha); __m128i finalAG = _mm_add_epi16(srcVectorAGalpha, dstVectorAGoneMinusAlphalpha); finalAG = _mm_add_epi16(finalAG, _mm_srli_epi16(finalAG, 8)); finalAG = _mm_add_epi16(finalAG, half); finalAG = _mm_andnot_si128(colorMask, finalAG); __m128i srcVectorRB = _mm_and_si128(result, colorMask); __m128i dstVectorRB = _mm_and_si128(dstVector, colorMask); __m128i srcVectorRBalpha = _mm_mullo_epi16(srcVectorRB, constAlphaVector); __m128i dstVectorRBoneMinusAlphalpha = _mm_mullo_epi16(dstVectorRB, oneMinusConstAlpha); __m128i finalRB = _mm_add_epi16(srcVectorRBalpha, dstVectorRBoneMinusAlphalpha); finalRB = _mm_add_epi16(finalRB, _mm_srli_epi16(finalRB, 8)); finalRB = _mm_add_epi16(finalRB, half); finalRB = _mm_srli_epi16(finalRB, 8); result = _mm_or_si128(finalAG, finalRB); } | - |
201 | _mm_store_si128((__m128i *)&dst[x], result); never executed (the execution status of this line is deduced): _mm_store_si128((__m128i *)&dst[x], result); | - |
202 | } | 0 |
203 | | - |
204 | // 3) Epilogue: | - |
205 | for (; x < length; ++x) never evaluated: x < length | 0 |
206 | dst[x] = comp_func_Plus_one_pixel_const_alpha(dst[x], src[x], const_alpha, one_minus_const_alpha); never executed: dst[x] = comp_func_Plus_one_pixel_const_alpha(dst[x], src[x], const_alpha, one_minus_const_alpha); | 0 |
207 | } | 0 |
208 | } | - |
209 | | - |
210 | void QT_FASTCALL comp_func_Source_sse2(uint *dst, const uint *src, int length, uint const_alpha) | - |
211 | { | - |
212 | if (const_alpha == 255) { evaluated: const_alpha == 255 yes Evaluation Count:63722 | yes Evaluation Count:3248 |
| 3248-63722 |
213 | ::memcpy(dst, src, length * sizeof(uint)); executed (the execution status of this line is deduced): ::memcpy(dst, src, length * sizeof(uint)); | - |
214 | } else { executed: } Execution Count:63728 | 63728 |
215 | const int ialpha = 255 - const_alpha; executed (the execution status of this line is deduced): const int ialpha = 255 - const_alpha; | - |
216 | | - |
217 | int x = 0; executed (the execution status of this line is deduced): int x = 0; | - |
218 | | - |
219 | // 1) prologue, align on 16 bytes | - |
220 | ALIGNMENT_PROLOGUE_16BYTES(dst, x, length) evaluated: x < static_cast<int>(qMin(static_cast<quintptr>(length), ((4 - ((reinterpret_cast<quintptr>(dst) >> 2) & 0x3)) & 0x3))) yes Evaluation Count:2425 | yes Evaluation Count:3248 |
| 2425-3248 |
221 | dst[x] = INTERPOLATE_PIXEL_255(src[x], const_alpha, dst[x], ialpha); executed: dst[x] = INTERPOLATE_PIXEL_255(src[x], const_alpha, dst[x], ialpha); Execution Count:2425 | 2425 |
222 | | - |
223 | // 2) interpolate pixels with SSE2 | - |
224 | const __m128i half = _mm_set1_epi16(0x80); executed (the execution status of this line is deduced): const __m128i half = _mm_set1_epi16(0x80); | - |
225 | const __m128i colorMask = _mm_set1_epi32(0x00ff00ff); executed (the execution status of this line is deduced): const __m128i colorMask = _mm_set1_epi32(0x00ff00ff); | - |
226 | const __m128i constAlphaVector = _mm_set1_epi16(const_alpha); executed (the execution status of this line is deduced): const __m128i constAlphaVector = _mm_set1_epi16(const_alpha); | - |
227 | const __m128i oneMinusConstAlpha = _mm_set1_epi16(ialpha); executed (the execution status of this line is deduced): const __m128i oneMinusConstAlpha = _mm_set1_epi16(ialpha); | - |
228 | for (; x < length - 3; x += 4) { partially evaluated: x < length - 3 no Evaluation Count:0 | yes Evaluation Count:3248 |
| 0-3248 |
229 | const __m128i srcVector = _mm_loadu_si128((__m128i *)&src[x]); never executed (the execution status of this line is deduced): const __m128i srcVector = _mm_loadu_si128((__m128i *)&src[x]); | - |
230 | __m128i dstVector = _mm_load_si128((__m128i *)&dst[x]); never executed (the execution status of this line is deduced): __m128i dstVector = _mm_load_si128((__m128i *)&dst[x]); | - |
231 | INTERPOLATE_PIXEL_255_SSE2(dstVector, srcVector, dstVector, constAlphaVector, oneMinusConstAlpha, colorMask, half) never executed (the execution status of this line is deduced): { __m128i srcVectorAG = _mm_srli_epi16(srcVector, 8); __m128i dstVectorAG = _mm_srli_epi16(dstVector, 8); __m128i srcVectorAGalpha = _mm_mullo_epi16(srcVectorAG, constAlphaVector); __m128i dstVectorAGoneMinusAlphalpha = _mm_mullo_epi16(dstVectorAG, oneMinusConstAlpha); __m128i finalAG = _mm_add_epi16(srcVectorAGalpha, dstVectorAGoneMinusAlphalpha); finalAG = _mm_add_epi16(finalAG, _mm_srli_epi16(finalAG, 8)); finalAG = _mm_add_epi16(finalAG, half); finalAG = _mm_andnot_si128(colorMask, finalAG); __m128i srcVectorRB = _mm_and_si128(srcVector, colorMask); __m128i dstVectorRB = _mm_and_si128(dstVector, colorMask); __m128i srcVectorRBalpha = _mm_mullo_epi16(srcVectorRB, constAlphaVector); __m128i dstVectorRBoneMinusAlphalpha = _mm_mullo_epi16(dstVectorRB, oneMinusConstAlpha); __m128i finalRB = _mm_add_epi16(srcVectorRBalpha, dstVectorRBoneMinusAlphalpha); finalRB = _mm_add_epi16(finalRB, _mm_srli_epi16(finalRB, 8)); finalRB = _mm_add_epi16(finalRB, half); finalRB = _mm_srli_epi16(finalRB, 8); dstVector = _mm_or_si128(finalAG, finalRB); } | - |
232 | _mm_store_si128((__m128i *)&dst[x], dstVector); never executed (the execution status of this line is deduced): _mm_store_si128((__m128i *)&dst[x], dstVector); | - |
233 | } | 0 |
234 | | - |
235 | // 3) Epilogue | - |
236 | for (; x < length; ++x) evaluated: x < length yes Evaluation Count:835 | yes Evaluation Count:3248 |
| 835-3248 |
237 | dst[x] = INTERPOLATE_PIXEL_255(src[x], const_alpha, dst[x], ialpha); executed: dst[x] = INTERPOLATE_PIXEL_255(src[x], const_alpha, dst[x], ialpha); Execution Count:835 | 835 |
238 | } executed: } Execution Count:3248 | 3248 |
239 | } | - |
240 | | - |
241 | void qt_memfill32_sse2(quint32 *dest, quint32 value, int count) | - |
242 | { | - |
243 | if (count < 7) { evaluated: count < 7 yes Evaluation Count:862603 | yes Evaluation Count:1366744 |
| 862603-1366744 |
244 | switch (count) { | - |
245 | case 6: *dest++ = value; executed (the execution status of this line is deduced): case 6: *dest++ = value; | - |
246 | case 5: *dest++ = value; code before this statement executed: case 5: Execution Count:161820 | 161820 |
247 | case 4: *dest++ = value; code before this statement executed: case 4: Execution Count:189515 | 189515 |
248 | case 3: *dest++ = value; code before this statement executed: case 3: Execution Count:215554 | 215554 |
249 | case 2: *dest++ = value; code before this statement executed: case 2: Execution Count:241583 | 241583 |
250 | case 1: *dest = value; code before this statement executed: case 1: Execution Count:323948 | 323948 |
251 | } executed: } Execution Count:862603 | 862603 |
252 | return; executed: return; Execution Count:862603 | 862603 |
253 | }; | - |
254 | | - |
255 | const int align = (quintptr)(dest) & 0xf; executed (the execution status of this line is deduced): const int align = (quintptr)(dest) & 0xf; | - |
256 | switch (align) { | - |
257 | case 4: *dest++ = value; --count; executed (the execution status of this line is deduced): case 4: *dest++ = value; --count; | - |
258 | case 8: *dest++ = value; --count; code before this statement executed: case 8: Execution Count:473513 | 473513 |
259 | case 12: *dest++ = value; --count; code before this statement executed: case 12: Execution Count:713293 | 713293 |
260 | } executed: } Execution Count:1036622 | 1036622 |
261 | | - |
262 | int count128 = count / 4; executed (the execution status of this line is deduced): int count128 = count / 4; | - |
263 | __m128i *dst128 = reinterpret_cast<__m128i*>(dest); executed (the execution status of this line is deduced): __m128i *dst128 = reinterpret_cast<__m128i*>(dest); | - |
264 | const __m128i value128 = _mm_set_epi32(value, value, value, value); executed (the execution status of this line is deduced): const __m128i value128 = _mm_set_epi32(value, value, value, value); | - |
265 | | - |
266 | int n = (count128 + 3) / 4; executed (the execution status of this line is deduced): int n = (count128 + 3) / 4; | - |
267 | switch (count128 & 0x3) { | - |
268 | case 0: do { _mm_stream_si128(dst128++, value128); executed (the execution status of this line is deduced): case 0: do { _mm_stream_si128(dst128++, value128); | - |
269 | case 3: _mm_stream_si128(dst128++, value128); code before this statement executed: case 3: Execution Count:14972429 | 14972429 |
270 | case 2: _mm_stream_si128(dst128++, value128); code before this statement executed: case 2: Execution Count:15192949 | 15192949 |
271 | case 1: _mm_stream_si128(dst128++, value128); code before this statement executed: case 1: Execution Count:15564464 | 15564464 |
272 | } while (--n > 0); executed: } Execution Count:16001814 evaluated: --n > 0 yes Evaluation Count:14635070 | yes Evaluation Count:1366744 |
| 1366744-16001814 |
273 | } executed: } Execution Count:1366744 | 1366744 |
274 | | - |
275 | const int rest = count & 0x3; executed (the execution status of this line is deduced): const int rest = count & 0x3; | - |
276 | if (rest) { evaluated: rest yes Evaluation Count:1091376 | yes Evaluation Count:275368 |
| 275368-1091376 |
277 | switch (rest) { | - |
278 | case 3: dest[count - 3] = value; executed (the execution status of this line is deduced): case 3: dest[count - 3] = value; | - |
279 | case 2: dest[count - 2] = value; code before this statement executed: case 2: Execution Count:543520 | 543520 |
280 | case 1: dest[count - 1] = value; code before this statement executed: case 1: Execution Count:811996 | 811996 |
281 | } executed: } Execution Count:1091376 | 1091376 |
282 | } executed: } Execution Count:1091376 | 1091376 |
283 | } executed: } Execution Count:1366744 | 1366744 |
284 | | - |
285 | void QT_FASTCALL comp_func_solid_SourceOver_sse2(uint *destPixels, int length, uint color, uint const_alpha) | - |
286 | { | - |
287 | if ((const_alpha & qAlpha(color)) == 255) { evaluated: (const_alpha & qAlpha(color)) == 255 yes Evaluation Count:2037 | yes Evaluation Count:36156 |
| 2037-36156 |
288 | qt_memfill32_sse2(destPixels, color, length); executed (the execution status of this line is deduced): qt_memfill32_sse2(destPixels, color, length); | - |
289 | } else { executed: } Execution Count:2037 | 2037 |
290 | if (const_alpha != 255) evaluated: const_alpha != 255 yes Evaluation Count:18265 | yes Evaluation Count:17891 |
| 17891-18265 |
291 | color = BYTE_MUL(color, const_alpha); executed: color = BYTE_MUL(color, const_alpha); Execution Count:18265 | 18265 |
292 | | - |
293 | const quint32 minusAlphaOfColor = qAlpha(~color); executed (the execution status of this line is deduced): const quint32 minusAlphaOfColor = qAlpha(~color); | - |
294 | int x = 0; executed (the execution status of this line is deduced): int x = 0; | - |
295 | | - |
296 | quint32 *dst = (quint32 *) destPixels; executed (the execution status of this line is deduced): quint32 *dst = (quint32 *) destPixels; | - |
297 | const __m128i colorVector = _mm_set1_epi32(color); executed (the execution status of this line is deduced): const __m128i colorVector = _mm_set1_epi32(color); | - |
298 | const __m128i colorMask = _mm_set1_epi32(0x00ff00ff); executed (the execution status of this line is deduced): const __m128i colorMask = _mm_set1_epi32(0x00ff00ff); | - |
299 | const __m128i half = _mm_set1_epi16(0x80); executed (the execution status of this line is deduced): const __m128i half = _mm_set1_epi16(0x80); | - |
300 | const __m128i minusAlphaOfColorVector = _mm_set1_epi16(minusAlphaOfColor); executed (the execution status of this line is deduced): const __m128i minusAlphaOfColorVector = _mm_set1_epi16(minusAlphaOfColor); | - |
301 | | - |
302 | ALIGNMENT_PROLOGUE_16BYTES(dst, x, length) evaluated: x < static_cast<int>(qMin(static_cast<quintptr>(length), ((4 - ((reinterpret_cast<quintptr>(dst) >> 2) & 0x3)) & 0x3))) yes Evaluation Count:32425 | yes Evaluation Count:36156 |
| 32425-36156 |
303 | destPixels[x] = color + BYTE_MUL(destPixels[x], minusAlphaOfColor); executed: destPixels[x] = color + BYTE_MUL(destPixels[x], minusAlphaOfColor); Execution Count:32425 | 32425 |
304 | | - |
305 | for (; x < length-3; x += 4) { evaluated: x < length-3 yes Evaluation Count:172971 | yes Evaluation Count:36156 |
| 36156-172971 |
306 | __m128i dstVector = _mm_load_si128((__m128i *)&dst[x]); executed (the execution status of this line is deduced): __m128i dstVector = _mm_load_si128((__m128i *)&dst[x]); | - |
307 | BYTE_MUL_SSE2(dstVector, dstVector, minusAlphaOfColorVector, colorMask, half); executed (the execution status of this line is deduced): { __m128i pixelVectorAG = _mm_srli_epi16(dstVector, 8); __m128i pixelVectorRB = _mm_and_si128(dstVector, colorMask); pixelVectorAG = _mm_mullo_epi16(pixelVectorAG, minusAlphaOfColorVector); pixelVectorRB = _mm_mullo_epi16(pixelVectorRB, minusAlphaOfColorVector); pixelVectorRB = _mm_add_epi16(pixelVectorRB, _mm_srli_epi16(pixelVectorRB, 8)); pixelVectorRB = _mm_add_epi16(pixelVectorRB, half); pixelVectorAG = _mm_add_epi16(pixelVectorAG, _mm_srli_epi16(pixelVectorAG, 8)); pixelVectorAG = _mm_add_epi16(pixelVectorAG, half); pixelVectorRB = _mm_srli_epi16(pixelVectorRB, 8); pixelVectorAG = _mm_andnot_si128(colorMask, pixelVectorAG); dstVector = _mm_or_si128(pixelVectorAG, pixelVectorRB); }; | - |
308 | dstVector = _mm_add_epi8(colorVector, dstVector); executed (the execution status of this line is deduced): dstVector = _mm_add_epi8(colorVector, dstVector); | - |
309 | _mm_store_si128((__m128i *)&dst[x], dstVector); executed (the execution status of this line is deduced): _mm_store_si128((__m128i *)&dst[x], dstVector); | - |
310 | } executed: } Execution Count:172971 | 172971 |
311 | for (;x < length; ++x) evaluated: x < length yes Evaluation Count:18654 | yes Evaluation Count:36156 |
| 18654-36156 |
312 | destPixels[x] = color + BYTE_MUL(destPixels[x], minusAlphaOfColor); executed: destPixels[x] = color + BYTE_MUL(destPixels[x], minusAlphaOfColor); Execution Count:18654 | 18654 |
313 | } executed: } Execution Count:36156 | 36156 |
314 | } | - |
315 | | - |
316 | #ifndef QDRAWHELPER_AVX | - |
317 | CompositionFunctionSolid qt_functionForModeSolid_SSE2[numCompositionFunctions] = { | - |
318 | comp_func_solid_SourceOver_sse2, | - |
319 | comp_func_solid_DestinationOver, | - |
320 | comp_func_solid_Clear, | - |
321 | comp_func_solid_Source, | - |
322 | comp_func_solid_Destination, | - |
323 | comp_func_solid_SourceIn, | - |
324 | comp_func_solid_DestinationIn, | - |
325 | comp_func_solid_SourceOut, | - |
326 | comp_func_solid_DestinationOut, | - |
327 | comp_func_solid_SourceAtop, | - |
328 | comp_func_solid_DestinationAtop, | - |
329 | comp_func_solid_XOR, | - |
330 | comp_func_solid_Plus, | - |
331 | comp_func_solid_Multiply, | - |
332 | comp_func_solid_Screen, | - |
333 | comp_func_solid_Overlay, | - |
334 | comp_func_solid_Darken, | - |
335 | comp_func_solid_Lighten, | - |
336 | comp_func_solid_ColorDodge, | - |
337 | comp_func_solid_ColorBurn, | - |
338 | comp_func_solid_HardLight, | - |
339 | comp_func_solid_SoftLight, | - |
340 | comp_func_solid_Difference, | - |
341 | comp_func_solid_Exclusion, | - |
342 | rasterop_solid_SourceOrDestination, | - |
343 | rasterop_solid_SourceAndDestination, | - |
344 | rasterop_solid_SourceXorDestination, | - |
345 | rasterop_solid_NotSourceAndNotDestination, | - |
346 | rasterop_solid_NotSourceOrNotDestination, | - |
347 | rasterop_solid_NotSourceXorDestination, | - |
348 | rasterop_solid_NotSource, | - |
349 | rasterop_solid_NotSourceAndDestination, | - |
350 | rasterop_solid_SourceAndNotDestination, | - |
351 | rasterop_solid_NotSourceOrDestination, | - |
352 | rasterop_solid_SourceOrNotDestination, | - |
353 | rasterop_solid_ClearDestination, | - |
354 | rasterop_solid_SetDestination, | - |
355 | rasterop_solid_NotDestination | - |
356 | }; | - |
357 | | - |
358 | CompositionFunction qt_functionForMode_SSE2[numCompositionFunctions] = { | - |
359 | comp_func_SourceOver_sse2, | - |
360 | comp_func_DestinationOver, | - |
361 | comp_func_Clear, | - |
362 | comp_func_Source_sse2, | - |
363 | comp_func_Destination, | - |
364 | comp_func_SourceIn, | - |
365 | comp_func_DestinationIn, | - |
366 | comp_func_SourceOut, | - |
367 | comp_func_DestinationOut, | - |
368 | comp_func_SourceAtop, | - |
369 | comp_func_DestinationAtop, | - |
370 | comp_func_XOR, | - |
371 | comp_func_Plus_sse2, | - |
372 | comp_func_Multiply, | - |
373 | comp_func_Screen, | - |
374 | comp_func_Overlay, | - |
375 | comp_func_Darken, | - |
376 | comp_func_Lighten, | - |
377 | comp_func_ColorDodge, | - |
378 | comp_func_ColorBurn, | - |
379 | comp_func_HardLight, | - |
380 | comp_func_SoftLight, | - |
381 | comp_func_Difference, | - |
382 | comp_func_Exclusion, | - |
383 | rasterop_SourceOrDestination, | - |
384 | rasterop_SourceAndDestination, | - |
385 | rasterop_SourceXorDestination, | - |
386 | rasterop_NotSourceAndNotDestination, | - |
387 | rasterop_NotSourceOrNotDestination, | - |
388 | rasterop_NotSourceXorDestination, | - |
389 | rasterop_NotSource, | - |
390 | rasterop_NotSourceAndDestination, | - |
391 | rasterop_SourceAndNotDestination, | - |
392 | rasterop_NotSourceOrDestination, | - |
393 | rasterop_SourceOrNotDestination, | - |
394 | rasterop_ClearDestination, | - |
395 | rasterop_SetDestination, | - |
396 | rasterop_NotDestination | - |
397 | }; | - |
398 | #endif | - |
399 | | - |
400 | void qt_memfill16_sse2(quint16 *dest, quint16 value, int count) | - |
401 | { | - |
402 | if (count < 3) { evaluated: count < 3 yes Evaluation Count:7044494 | yes Evaluation Count:1413641 |
| 1413641-7044494 |
403 | switch (count) { | - |
404 | case 2: *dest++ = value; executed (the execution status of this line is deduced): case 2: *dest++ = value; | - |
405 | case 1: *dest = value; code before this statement executed: case 1: Execution Count:425755 | 425755 |
406 | } executed: } Execution Count:7044494 | 7044494 |
407 | return; executed: return; Execution Count:7044494 | 7044494 |
408 | } | - |
409 | | - |
410 | const int align = (quintptr)(dest) & 0x3; executed (the execution status of this line is deduced): const int align = (quintptr)(dest) & 0x3; | - |
411 | switch (align) { | - |
412 | case 2: *dest++ = value; --count; executed (the execution status of this line is deduced): case 2: *dest++ = value; --count; | - |
413 | } executed: } Execution Count:235663 | 235663 |
414 | | - |
415 | const quint32 value32 = (value << 16) | value; executed (the execution status of this line is deduced): const quint32 value32 = (value << 16) | value; | - |
416 | qt_memfill32_sse2(reinterpret_cast<quint32*>(dest), value32, count / 2); executed (the execution status of this line is deduced): qt_memfill32_sse2(reinterpret_cast<quint32*>(dest), value32, count / 2); | - |
417 | | - |
418 | if (count & 0x1) evaluated: count & 0x1 yes Evaluation Count:287920 | yes Evaluation Count:1125721 |
| 287920-1125721 |
419 | dest[count - 1] = value; executed: dest[count - 1] = value; Execution Count:287920 | 287920 |
420 | } executed: } Execution Count:1413641 | 1413641 |
421 | | - |
422 | void qt_bitmapblit32_sse2(QRasterBuffer *rasterBuffer, int x, int y, | - |
423 | quint32 color, | - |
424 | const uchar *src, int width, int height, int stride) | - |
425 | { | - |
426 | quint32 *dest = reinterpret_cast<quint32*>(rasterBuffer->scanLine(y)) + x; executed (the execution status of this line is deduced): quint32 *dest = reinterpret_cast<quint32*>(rasterBuffer->scanLine(y)) + x; | - |
427 | const int destStride = rasterBuffer->bytesPerLine() / sizeof(quint32); executed (the execution status of this line is deduced): const int destStride = rasterBuffer->bytesPerLine() / sizeof(quint32); | - |
428 | | - |
429 | const __m128i c128 = _mm_set1_epi32(color); executed (the execution status of this line is deduced): const __m128i c128 = _mm_set1_epi32(color); | - |
430 | const __m128i maskmask1 = _mm_set_epi32(0x10101010, 0x20202020, executed (the execution status of this line is deduced): const __m128i maskmask1 = _mm_set_epi32(0x10101010, 0x20202020, | - |
431 | 0x40404040, 0x80808080); executed (the execution status of this line is deduced): 0x40404040, 0x80808080); | - |
432 | const __m128i maskadd1 = _mm_set_epi32(0x70707070, 0x60606060, executed (the execution status of this line is deduced): const __m128i maskadd1 = _mm_set_epi32(0x70707070, 0x60606060, | - |
433 | 0x40404040, 0x00000000); executed (the execution status of this line is deduced): 0x40404040, 0x00000000); | - |
434 | | - |
435 | if (width > 4) { evaluated: width > 4 yes Evaluation Count:302 | yes Evaluation Count:68 |
| 68-302 |
436 | const __m128i maskmask2 = _mm_set_epi32(0x01010101, 0x02020202, executed (the execution status of this line is deduced): const __m128i maskmask2 = _mm_set_epi32(0x01010101, 0x02020202, | - |
437 | 0x04040404, 0x08080808); executed (the execution status of this line is deduced): 0x04040404, 0x08080808); | - |
438 | const __m128i maskadd2 = _mm_set_epi32(0x7f7f7f7f, 0x7e7e7e7e, executed (the execution status of this line is deduced): const __m128i maskadd2 = _mm_set_epi32(0x7f7f7f7f, 0x7e7e7e7e, | - |
439 | 0x7c7c7c7c, 0x78787878); executed (the execution status of this line is deduced): 0x7c7c7c7c, 0x78787878); | - |
440 | while (height--) { evaluated: height-- yes Evaluation Count:2738 | yes Evaluation Count:302 |
| 302-2738 |
441 | for (int x = 0; x < width; x += 8) { evaluated: x < width yes Evaluation Count:2754 | yes Evaluation Count:2738 |
| 2738-2754 |
442 | const quint8 s = src[x >> 3]; executed (the execution status of this line is deduced): const quint8 s = src[x >> 3]; | - |
443 | if (!s) evaluated: !s yes Evaluation Count:376 | yes Evaluation Count:2378 |
| 376-2378 |
444 | continue; executed: continue; Execution Count:376 | 376 |
445 | __m128i mask1 = _mm_set1_epi8(s); executed (the execution status of this line is deduced): __m128i mask1 = _mm_set1_epi8(s); | - |
446 | __m128i mask2 = mask1; executed (the execution status of this line is deduced): __m128i mask2 = mask1; | - |
447 | | - |
448 | mask1 = _mm_and_si128(mask1, maskmask1); executed (the execution status of this line is deduced): mask1 = _mm_and_si128(mask1, maskmask1); | - |
449 | mask1 = _mm_add_epi8(mask1, maskadd1); executed (the execution status of this line is deduced): mask1 = _mm_add_epi8(mask1, maskadd1); | - |
450 | _mm_maskmoveu_si128(c128, mask1, (char*)(dest + x)); executed (the execution status of this line is deduced): _mm_maskmoveu_si128(c128, mask1, (char*)(dest + x)); | - |
451 | mask2 = _mm_and_si128(mask2, maskmask2); executed (the execution status of this line is deduced): mask2 = _mm_and_si128(mask2, maskmask2); | - |
452 | mask2 = _mm_add_epi8(mask2, maskadd2); executed (the execution status of this line is deduced): mask2 = _mm_add_epi8(mask2, maskadd2); | - |
453 | _mm_maskmoveu_si128(c128, mask2, (char*)(dest + x + 4)); executed (the execution status of this line is deduced): _mm_maskmoveu_si128(c128, mask2, (char*)(dest + x + 4)); | - |
454 | } executed: } Execution Count:2378 | 2378 |
455 | dest += destStride; executed (the execution status of this line is deduced): dest += destStride; | - |
456 | src += stride; executed (the execution status of this line is deduced): src += stride; | - |
457 | } executed: } Execution Count:2738 | 2738 |
458 | } else { executed: } Execution Count:302 | 302 |
459 | while (height--) { evaluated: height-- yes Evaluation Count:660 | yes Evaluation Count:68 |
| 68-660 |
460 | const quint8 s = *src; executed (the execution status of this line is deduced): const quint8 s = *src; | - |
461 | if (s) { evaluated: s yes Evaluation Count:620 | yes Evaluation Count:40 |
| 40-620 |
462 | __m128i mask1 = _mm_set1_epi8(s); executed (the execution status of this line is deduced): __m128i mask1 = _mm_set1_epi8(s); | - |
463 | mask1 = _mm_and_si128(mask1, maskmask1); executed (the execution status of this line is deduced): mask1 = _mm_and_si128(mask1, maskmask1); | - |
464 | mask1 = _mm_add_epi8(mask1, maskadd1); executed (the execution status of this line is deduced): mask1 = _mm_add_epi8(mask1, maskadd1); | - |
465 | _mm_maskmoveu_si128(c128, mask1, (char*)(dest)); executed (the execution status of this line is deduced): _mm_maskmoveu_si128(c128, mask1, (char*)(dest)); | - |
466 | } executed: } Execution Count:620 | 620 |
467 | dest += destStride; executed (the execution status of this line is deduced): dest += destStride; | - |
468 | src += stride; executed (the execution status of this line is deduced): src += stride; | - |
469 | } executed: } Execution Count:660 | 660 |
470 | } executed: } Execution Count:68 | 68 |
471 | } | - |
472 | | - |
473 | void qt_bitmapblit16_sse2(QRasterBuffer *rasterBuffer, int x, int y, | - |
474 | quint32 color, | - |
475 | const uchar *src, int width, int height, int stride) | - |
476 | { | - |
477 | const quint16 c = qConvertRgb32To16(color); never executed (the execution status of this line is deduced): const quint16 c = qConvertRgb32To16(color); | - |
478 | quint16 *dest = reinterpret_cast<quint16*>(rasterBuffer->scanLine(y)) + x; never executed (the execution status of this line is deduced): quint16 *dest = reinterpret_cast<quint16*>(rasterBuffer->scanLine(y)) + x; | - |
479 | const int destStride = rasterBuffer->bytesPerLine() / sizeof(quint16); never executed (the execution status of this line is deduced): const int destStride = rasterBuffer->bytesPerLine() / sizeof(quint16); | - |
480 | | - |
481 | const __m128i c128 = _mm_set1_epi16(c); never executed (the execution status of this line is deduced): const __m128i c128 = _mm_set1_epi16(c); | - |
482 | #if defined(Q_CC_MSVC) | - |
483 | # pragma warning(disable: 4309) // truncation of constant value | - |
484 | #endif | - |
485 | const __m128i maskmask = _mm_set_epi16(0x0101, 0x0202, 0x0404, 0x0808, never executed (the execution status of this line is deduced): const __m128i maskmask = _mm_set_epi16(0x0101, 0x0202, 0x0404, 0x0808, | - |
486 | 0x1010, 0x2020, 0x4040, 0x8080); never executed (the execution status of this line is deduced): 0x1010, 0x2020, 0x4040, 0x8080); | - |
487 | const __m128i maskadd = _mm_set_epi16(0x7f7f, 0x7e7e, 0x7c7c, 0x7878, never executed (the execution status of this line is deduced): const __m128i maskadd = _mm_set_epi16(0x7f7f, 0x7e7e, 0x7c7c, 0x7878, | - |
488 | 0x7070, 0x6060, 0x4040, 0x0000); never executed (the execution status of this line is deduced): 0x7070, 0x6060, 0x4040, 0x0000); | - |
489 | | - |
490 | while (height--) { never evaluated: height-- | 0 |
491 | for (int x = 0; x < width; x += 8) { never evaluated: x < width | 0 |
492 | const quint8 s = src[x >> 3]; never executed (the execution status of this line is deduced): const quint8 s = src[x >> 3]; | - |
493 | if (!s) | 0 |
494 | continue; never executed: continue; | 0 |
495 | __m128i mask = _mm_set1_epi8(s); never executed (the execution status of this line is deduced): __m128i mask = _mm_set1_epi8(s); | - |
496 | mask = _mm_and_si128(mask, maskmask); never executed (the execution status of this line is deduced): mask = _mm_and_si128(mask, maskmask); | - |
497 | mask = _mm_add_epi8(mask, maskadd); never executed (the execution status of this line is deduced): mask = _mm_add_epi8(mask, maskadd); | - |
498 | _mm_maskmoveu_si128(c128, mask, (char*)(dest + x)); never executed (the execution status of this line is deduced): _mm_maskmoveu_si128(c128, mask, (char*)(dest + x)); | - |
499 | } | 0 |
500 | dest += destStride; never executed (the execution status of this line is deduced): dest += destStride; | - |
501 | src += stride; never executed (the execution status of this line is deduced): src += stride; | - |
502 | } | 0 |
503 | } | 0 |
504 | | - |
505 | class QSimdSse2 | - |
506 | { | - |
507 | public: | - |
508 | typedef __m128i Int32x4; | - |
509 | typedef __m128 Float32x4; | - |
510 | | - |
511 | union Vect_buffer_i { Int32x4 v; int i[4]; }; | - |
512 | union Vect_buffer_f { Float32x4 v; float f[4]; }; | - |
513 | | - |
514 | static inline Float32x4 v_dup(float x) { return _mm_set1_ps(x); } executed: return _mm_set1_ps(x); Execution Count:86307 | 86307 |
515 | static inline Float32x4 v_dup(double x) { return _mm_set1_ps(x); } executed: return _mm_set1_ps(x); Execution Count:143845 | 143845 |
516 | static inline Int32x4 v_dup(int x) { return _mm_set1_epi32(x); } executed: return _mm_set1_epi32(x); Execution Count:28769 | 28769 |
517 | static inline Int32x4 v_dup(uint x) { return _mm_set1_epi32(x); } executed: return _mm_set1_epi32(x); Execution Count:57538 | 57538 |
518 | | - |
519 | static inline Float32x4 v_add(Float32x4 a, Float32x4 b) { return _mm_add_ps(a, b); } executed: return _mm_add_ps(a, b); Execution Count:4176636 | 4176636 |
520 | static inline Int32x4 v_add(Int32x4 a, Int32x4 b) { return _mm_add_epi32(a, b); } never executed: return _mm_add_epi32(a, b); | 0 |
521 | | - |
522 | static inline Float32x4 v_max(Float32x4 a, Float32x4 b) { return _mm_max_ps(a, b); } executed: return _mm_max_ps(a, b); Execution Count:760424 | 760424 |
523 | static inline Float32x4 v_min(Float32x4 a, Float32x4 b) { return _mm_min_ps(a, b); } executed: return _mm_min_ps(a, b); Execution Count:64318 | 64318 |
524 | static inline Int32x4 v_min_16(Int32x4 a, Int32x4 b) { return _mm_min_epi16(a, b); } never executed: return _mm_min_epi16(a, b); | 0 |
525 | | - |
526 | static inline Int32x4 v_and(Int32x4 a, Int32x4 b) { return _mm_and_si128(a, b); } executed: return _mm_and_si128(a, b); Execution Count:1327894 | 1327894 |
527 | | - |
528 | static inline Float32x4 v_sub(Float32x4 a, Float32x4 b) { return _mm_sub_ps(a, b); } executed: return _mm_sub_ps(a, b); Execution Count:696106 | 696106 |
529 | static inline Int32x4 v_sub(Int32x4 a, Int32x4 b) { return _mm_sub_epi32(a, b); } never executed: return _mm_sub_epi32(a, b); | 0 |
530 | | - |
531 | static inline Float32x4 v_mul(Float32x4 a, Float32x4 b) { return _mm_mul_ps(a, b); } executed: return _mm_mul_ps(a, b); Execution Count:1392212 | 1392212 |
532 | | - |
533 | static inline Float32x4 v_sqrt(Float32x4 x) { return _mm_sqrt_ps(x); } executed: return _mm_sqrt_ps(x); Execution Count:696106 | 696106 |
534 | | - |
535 | static inline Int32x4 v_toInt(Float32x4 x) { return _mm_cvttps_epi32(x); } executed: return _mm_cvttps_epi32(x); Execution Count:696106 | 696106 |
536 | | - |
537 | // pre-VS 2008 doesn't have cast intrinsics, whereas 2008 and later requires it | - |
538 | // (same deal with gcc prior to 4.0) | - |
539 | #if (defined(Q_CC_MSVC) && _MSC_VER < 1500) || (defined(Q_CC_GNU) && __GNUC__ < 4) | - |
540 | static inline Int32x4 v_greaterOrEqual(Float32x4 a, Float32x4 b) | - |
541 | { | - |
542 | union Convert { Int32x4 vi; Float32x4 vf; } convert; | - |
543 | convert.vf = _mm_cmpgt_ps(a, b); | - |
544 | return convert.vi; | - |
545 | } | - |
546 | #else | - |
547 | static inline Int32x4 v_greaterOrEqual(Float32x4 a, Float32x4 b) { return _mm_castps_si128(_mm_cmpgt_ps(a, b)); } executed: return _mm_castps_si128(_mm_cmpgt_ps(a, b)); Execution Count:1392212 | 1392212 |
548 | #endif | - |
549 | }; | - |
550 | | - |
551 | const uint * QT_FASTCALL qt_fetch_radial_gradient_sse2(uint *buffer, const Operator *op, const QSpanData *data, | - |
552 | int y, int x, int length) | - |
553 | { | - |
554 | return qt_fetch_radial_gradient_template<QRadialFetchSimd<QSimdSse2> >(buffer, op, data, y, x, length); executed: return qt_fetch_radial_gradient_template<QRadialFetchSimd<QSimdSse2> >(buffer, op, data, y, x, length); Execution Count:28769 | 28769 |
555 | } | - |
556 | | - |
557 | void qt_scale_image_argb32_on_argb32_sse2(uchar *destPixels, int dbpl, | - |
558 | const uchar *srcPixels, int sbpl, | - |
559 | const QRectF &targetRect, | - |
560 | const QRectF &sourceRect, | - |
561 | const QRect &clip, | - |
562 | int const_alpha) | - |
563 | { | - |
564 | if (const_alpha != 256) { partially evaluated: const_alpha != 256 no Evaluation Count:0 | yes Evaluation Count:1358 |
| 0-1358 |
565 | // from qblendfunctions.cpp | - |
566 | extern void qt_scale_image_argb32_on_argb32(uchar *destPixels, int dbpl, never executed (the execution status of this line is deduced): extern void qt_scale_image_argb32_on_argb32(uchar *destPixels, int dbpl, | - |
567 | const uchar *srcPixels, int sbpl, never executed (the execution status of this line is deduced): const uchar *srcPixels, int sbpl, | - |
568 | const QRectF &targetRect, never executed (the execution status of this line is deduced): const QRectF &targetRect, | - |
569 | const QRectF &sourceRect, never executed (the execution status of this line is deduced): const QRectF &sourceRect, | - |
570 | const QRect &clip, never executed (the execution status of this line is deduced): const QRect &clip, | - |
571 | int const_alpha); never executed (the execution status of this line is deduced): int const_alpha); | - |
572 | return qt_scale_image_argb32_on_argb32(destPixels, dbpl, srcPixels, sbpl, targetRect, sourceRect, clip, const_alpha); never executed: return qt_scale_image_argb32_on_argb32(destPixels, dbpl, srcPixels, sbpl, targetRect, sourceRect, clip, const_alpha); | 0 |
573 | } | - |
574 | | - |
575 | qreal sx = targetRect.width() / (qreal) sourceRect.width(); executed (the execution status of this line is deduced): qreal sx = targetRect.width() / (qreal) sourceRect.width(); | - |
576 | qreal sy = targetRect.height() / (qreal) sourceRect.height(); executed (the execution status of this line is deduced): qreal sy = targetRect.height() / (qreal) sourceRect.height(); | - |
577 | | - |
578 | int ix = 0x00010000 / sx; executed (the execution status of this line is deduced): int ix = 0x00010000 / sx; | - |
579 | int iy = 0x00010000 / sy; executed (the execution status of this line is deduced): int iy = 0x00010000 / sy; | - |
580 | | - |
581 | int cx1 = clip.x(); executed (the execution status of this line is deduced): int cx1 = clip.x(); | - |
582 | int cx2 = clip.x() + clip.width(); executed (the execution status of this line is deduced): int cx2 = clip.x() + clip.width(); | - |
583 | int cy1 = clip.top(); executed (the execution status of this line is deduced): int cy1 = clip.top(); | - |
584 | int cy2 = clip.y() + clip.height(); executed (the execution status of this line is deduced): int cy2 = clip.y() + clip.height(); | - |
585 | | - |
586 | int tx1 = qRound(targetRect.left()); executed (the execution status of this line is deduced): int tx1 = qRound(targetRect.left()); | - |
587 | int tx2 = qRound(targetRect.right()); executed (the execution status of this line is deduced): int tx2 = qRound(targetRect.right()); | - |
588 | int ty1 = qRound(targetRect.top()); executed (the execution status of this line is deduced): int ty1 = qRound(targetRect.top()); | - |
589 | int ty2 = qRound(targetRect.bottom()); executed (the execution status of this line is deduced): int ty2 = qRound(targetRect.bottom()); | - |
590 | | - |
591 | if (tx2 < tx1) evaluated: tx2 < tx1 yes Evaluation Count:6 | yes Evaluation Count:1352 |
| 6-1352 |
592 | qSwap(tx2, tx1); executed: qSwap(tx2, tx1); Execution Count:6 | 6 |
593 | if (ty2 < ty1) evaluated: ty2 < ty1 yes Evaluation Count:6 | yes Evaluation Count:1352 |
| 6-1352 |
594 | qSwap(ty2, ty1); executed: qSwap(ty2, ty1); Execution Count:6 | 6 |
595 | | - |
596 | if (tx1 < cx1) partially evaluated: tx1 < cx1 no Evaluation Count:0 | yes Evaluation Count:1358 |
| 0-1358 |
597 | tx1 = cx1; never executed: tx1 = cx1; | 0 |
598 | if (tx2 >= cx2) evaluated: tx2 >= cx2 yes Evaluation Count:1344 | yes Evaluation Count:14 |
| 14-1344 |
599 | tx2 = cx2; executed: tx2 = cx2; Execution Count:1344 | 1344 |
600 | | - |
601 | if (tx1 >= tx2) partially evaluated: tx1 >= tx2 no Evaluation Count:0 | yes Evaluation Count:1358 |
| 0-1358 |
602 | return; | 0 |
603 | | - |
604 | if (ty1 < cy1) partially evaluated: ty1 < cy1 no Evaluation Count:0 | yes Evaluation Count:1358 |
| 0-1358 |
605 | ty1 = cy1; never executed: ty1 = cy1; | 0 |
606 | if (ty2 >= cy2) evaluated: ty2 >= cy2 yes Evaluation Count:22 | yes Evaluation Count:1336 |
| 22-1336 |
607 | ty2 = cy2; executed: ty2 = cy2; Execution Count:22 | 22 |
608 | if (ty1 >= ty2) evaluated: ty1 >= ty2 yes Evaluation Count:449 | yes Evaluation Count:909 |
| 449-909 |
609 | return; executed: return; Execution Count:449 | 449 |
610 | | - |
611 | int h = ty2 - ty1; executed (the execution status of this line is deduced): int h = ty2 - ty1; | - |
612 | int w = tx2 - tx1; executed (the execution status of this line is deduced): int w = tx2 - tx1; | - |
613 | | - |
614 | quint32 basex; executed (the execution status of this line is deduced): quint32 basex; | - |
615 | quint32 srcy; executed (the execution status of this line is deduced): quint32 srcy; | - |
616 | | - |
617 | if (sx < 0) { evaluated: sx < 0 yes Evaluation Count:6 | yes Evaluation Count:903 |
| 6-903 |
618 | int dstx = qFloor((tx1 + qreal(0.5) - targetRect.right()) * ix) + 1; executed (the execution status of this line is deduced): int dstx = qFloor((tx1 + qreal(0.5) - targetRect.right()) * ix) + 1; | - |
619 | basex = quint32(sourceRect.right() * 65536) + dstx; executed (the execution status of this line is deduced): basex = quint32(sourceRect.right() * 65536) + dstx; | - |
620 | } else { executed: } Execution Count:6 | 6 |
621 | int dstx = qCeil((tx1 + qreal(0.5) - targetRect.left()) * ix) - 1; executed (the execution status of this line is deduced): int dstx = qCeil((tx1 + qreal(0.5) - targetRect.left()) * ix) - 1; | - |
622 | basex = quint32(sourceRect.left() * 65536) + dstx; executed (the execution status of this line is deduced): basex = quint32(sourceRect.left() * 65536) + dstx; | - |
623 | } executed: } Execution Count:903 | 903 |
624 | if (sy < 0) { evaluated: sy < 0 yes Evaluation Count:6 | yes Evaluation Count:903 |
| 6-903 |
625 | int dsty = qFloor((ty1 + qreal(0.5) - targetRect.bottom()) * iy) + 1; executed (the execution status of this line is deduced): int dsty = qFloor((ty1 + qreal(0.5) - targetRect.bottom()) * iy) + 1; | - |
626 | srcy = quint32(sourceRect.bottom() * 65536) + dsty; executed (the execution status of this line is deduced): srcy = quint32(sourceRect.bottom() * 65536) + dsty; | - |
627 | } else { executed: } Execution Count:6 | 6 |
628 | int dsty = qCeil((ty1 + qreal(0.5) - targetRect.top()) * iy) - 1; executed (the execution status of this line is deduced): int dsty = qCeil((ty1 + qreal(0.5) - targetRect.top()) * iy) - 1; | - |
629 | srcy = quint32(sourceRect.top() * 65536) + dsty; executed (the execution status of this line is deduced): srcy = quint32(sourceRect.top() * 65536) + dsty; | - |
630 | } executed: } Execution Count:903 | 903 |
631 | | - |
632 | quint32 *dst = ((quint32 *) (destPixels + ty1 * dbpl)) + tx1; executed (the execution status of this line is deduced): quint32 *dst = ((quint32 *) (destPixels + ty1 * dbpl)) + tx1; | - |
633 | | - |
634 | const __m128i nullVector = _mm_set1_epi32(0); executed (the execution status of this line is deduced): const __m128i nullVector = _mm_set1_epi32(0); | - |
635 | const __m128i half = _mm_set1_epi16(0x80); executed (the execution status of this line is deduced): const __m128i half = _mm_set1_epi16(0x80); | - |
636 | const __m128i one = _mm_set1_epi16(0xff); executed (the execution status of this line is deduced): const __m128i one = _mm_set1_epi16(0xff); | - |
637 | const __m128i colorMask = _mm_set1_epi32(0x00ff00ff); executed (the execution status of this line is deduced): const __m128i colorMask = _mm_set1_epi32(0x00ff00ff); | - |
638 | const __m128i alphaMask = _mm_set1_epi32(0xff000000); executed (the execution status of this line is deduced): const __m128i alphaMask = _mm_set1_epi32(0xff000000); | - |
639 | const __m128i ixVector = _mm_set1_epi32(4*ix); executed (the execution status of this line is deduced): const __m128i ixVector = _mm_set1_epi32(4*ix); | - |
640 | | - |
641 | | - |
642 | while (h--) { evaluated: h-- yes Evaluation Count:1764 | yes Evaluation Count:909 |
| 909-1764 |
643 | const uint *src = (const quint32 *) (srcPixels + (srcy >> 16) * sbpl); executed (the execution status of this line is deduced): const uint *src = (const quint32 *) (srcPixels + (srcy >> 16) * sbpl); | - |
644 | int srcx = basex; executed (the execution status of this line is deduced): int srcx = basex; | - |
645 | int x = 0; executed (the execution status of this line is deduced): int x = 0; | - |
646 | | - |
647 | ALIGNMENT_PROLOGUE_16BYTES(dst, x, w) { evaluated: x < static_cast<int>(qMin(static_cast<quintptr>(w), ((4 - ((reinterpret_cast<quintptr>(dst) >> 2) & 0x3)) & 0x3))) yes Evaluation Count:752 | yes Evaluation Count:1764 |
| 752-1764 |
648 | uint s = src[srcx >> 16]; executed (the execution status of this line is deduced): uint s = src[srcx >> 16]; | - |
649 | dst[x] = s + BYTE_MUL(dst[x], qAlpha(~s)); executed (the execution status of this line is deduced): dst[x] = s + BYTE_MUL(dst[x], qAlpha(~s)); | - |
650 | srcx += ix; executed (the execution status of this line is deduced): srcx += ix; | - |
651 | } executed: } Execution Count:752 | 752 |
652 | | - |
653 | __m128i srcxVector = _mm_set_epi32(srcx, srcx + ix, srcx + ix + ix, srcx + ix + ix + ix); executed (the execution status of this line is deduced): __m128i srcxVector = _mm_set_epi32(srcx, srcx + ix, srcx + ix + ix, srcx + ix + ix + ix); | - |
654 | | - |
655 | for (; x<w - 3; x += 4) { evaluated: x<w - 3 yes Evaluation Count:40635 | yes Evaluation Count:1764 |
| 1764-40635 |
656 | union Vect_buffer { __m128i vect; quint32 i[4]; }; executed (the execution status of this line is deduced): union Vect_buffer { __m128i vect; quint32 i[4]; }; | - |
657 | Vect_buffer addr; executed (the execution status of this line is deduced): Vect_buffer addr; | - |
658 | addr.vect = _mm_srli_epi32(srcxVector, 16); executed (the execution status of this line is deduced): addr.vect = _mm_srli_epi32(srcxVector, 16); | - |
659 | srcxVector = _mm_add_epi32(srcxVector, ixVector); executed (the execution status of this line is deduced): srcxVector = _mm_add_epi32(srcxVector, ixVector); | - |
660 | | - |
661 | const __m128i srcVector = _mm_set_epi32(src[addr.i[0]], src[addr.i[1]], src[addr.i[2]], src[addr.i[3]]); executed (the execution status of this line is deduced): const __m128i srcVector = _mm_set_epi32(src[addr.i[0]], src[addr.i[1]], src[addr.i[2]], src[addr.i[3]]); | - |
662 | BLEND_SOURCE_OVER_ARGB32_SSE2_helper(dst, srcVector, nullVector, half, one, colorMask, alphaMask); executed: } Execution Count:34682 executed: } Execution Count:607 evaluated: _mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, nullVector)) != 0xffff yes Evaluation Count:607 | yes Evaluation Count:5346 |
evaluated: _mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, alphaMask)) == 0xffff yes Evaluation Count:34682 | yes Evaluation Count:5953 |
| 607-34682 |
663 | } | - |
664 | | - |
665 | for (; x<w; x++) { evaluated: x<w yes Evaluation Count:970 | yes Evaluation Count:1764 |
| 970-1764 |
666 | uint s = src[(basex + x*ix) >> 16]; executed (the execution status of this line is deduced): uint s = src[(basex + x*ix) >> 16]; | - |
667 | dst[x] = s + BYTE_MUL(dst[x], qAlpha(~s)); executed (the execution status of this line is deduced): dst[x] = s + BYTE_MUL(dst[x], qAlpha(~s)); | - |
668 | } executed: } Execution Count:970 | 970 |
669 | dst = (quint32 *)(((uchar *) dst) + dbpl); executed (the execution status of this line is deduced): dst = (quint32 *)(((uchar *) dst) + dbpl); | - |
670 | srcy += iy; executed (the execution status of this line is deduced): srcy += iy; | - |
671 | } executed: } Execution Count:1764 | 1764 |
672 | } executed: } Execution Count:909 | 909 |
673 | | - |
674 | | - |
675 | QT_END_NAMESPACE | - |
676 | | - |
677 | #endif // QT_COMPILER_SUPPORTS_SSE2 | - |
678 | | - |
| | |