painting/qdrawhelper_sse2.cpp

Switch to Source codePreprocessed file
LineSource CodeCoverage
1 -
2 -
3 -
4 -
5 -
6 -
7 -
8 -
9void qt_blend_argb32_on_argb32_sse2(uchar *destPixels, int dbpl, -
10 const uchar *srcPixels, int sbpl, -
11 int w, int h, -
12 int const_alpha) -
13{ -
14 const quint32 *src = (const quint32 *) srcPixels; -
15 quint32 *dst = (quint32 *) destPixels; -
16 if (const_alpha == 256) {
never evaluated: const_alpha == 256
0
17 const __m128i alphaMask = _mm_set1_epi32(0xff000000); -
18 const __m128i nullVector = _mm_set1_epi32(0); -
19 const __m128i half = _mm_set1_epi16(0x80); -
20 const __m128i one = _mm_set1_epi16(0xff); -
21 const __m128i colorMask = _mm_set1_epi32(0x00ff00ff); -
22 for (int y = 0; y < h; ++y) {
never evaluated: y < h
0
23 { int x = 0; for (; x < static_cast<int>(qMin(static_cast<quintptr>(w), ((4 - ((reinterpret_cast<quintptr>(dst) >> 2) & 0x3)) & 0x3))); ++x) { uint s = src[x]; if (s >= 0xff000000) dst[x] = s; else if (s != 0) dst[x] = s + BYTE_MUL(dst[x], qAlpha(~s)); } for (; x < w-3; x += 4) { const __m128i srcVector = _mm_loadu_si128((__m128i *)&src[x]); { const __m128i srcVectorAlpha = _mm_and_si128(srcVector, alphaMask); if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, alphaMask)) == 0xffff) { _mm_store_si128((__m128i *)&dst[x], srcVector); } else if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, nullVector)) != 0xffff) { __m128i alphaChannel = _mm_srli_epi32(srcVector, 24); alphaChannel = _mm_or_si128(alphaChannel, _mm_slli_epi32(alphaChannel, 16)); alphaChannel = _mm_sub_epi16(one, alphaChannel); const __m128i dstVector = _mm_load_si128((__m128i *)&dst[x]); __m128i destMultipliedByOneMinusAlpha; { __m128i pixelVectorAG = _mm_srli_epi16(dstVector, 8); __m128i pixelVectorRB = _mm_and_si128(dstVector, colorMask); pixelVectorAG = _mm_mullo_epi16(pixelVectorAG, alphaChannel); pixelVectorRB = _mm_mullo_epi16(pixelVectorRB, alphaChannel); pixelVectorRB = _mm_add_epi16(pixelVectorRB, _mm_srli_epi16(pixelVectorRB, 8)); pixelVectorRB = _mm_add_epi16(pixelVectorRB, half); pixelVectorAG = _mm_add_epi16(pixelVectorAG, _mm_srli_epi16(pixelVectorAG, 8)); pixelVectorAG = _mm_add_epi16(pixelVectorAG, half); pixelVectorRB = _mm_srli_epi16(pixelVectorRB, 8); pixelVectorAG = _mm_andnot_si128(colorMask, pixelVectorAG); destMultipliedByOneMinusAlpha = _mm_or_si128(pixelVectorAG, pixelVectorRB); }; const __m128i result = _mm_add_epi8(srcVector, destMultipliedByOneMinusAlpha); _mm_store_si128((__m128i *)&dst[x], result); } } } for (; x < w; ++x) { uint s = src[x]; if (s >= 0xff000000) dst[x] = s; else if (s != 0) dst[x] = s + BYTE_MUL(dst[x], qAlpha(~s)); } };
never executed: dst[x] = s;
never executed: dst[x] = s + BYTE_MUL(dst[x], qAlpha(~s));
never evaluated: s != 0
never evaluated: _mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, nullVector)) != 0xffff
never evaluated: s != 0
never evaluated: s >= 0xff000000
never evaluated: _mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, alphaMask)) == 0xffff
never evaluated: s >= 0xff000000
never evaluated: x < static_cast<int>(qMin(static_cast<quintptr>(w), ((4 - ((reinterpret_cast<quintptr>(dst) >> 2) & 0x3)) & 0x3)))
never evaluated: x < w-3
never evaluated: x < w
never executed: }
never executed: }
never executed: dst[x] = s;
never executed: dst[x] = s + BYTE_MUL(dst[x], qAlpha(~s));
0
24 dst = (quint32 *)(((uchar *) dst) + dbpl); -
25 src = (const quint32 *)(((const uchar *) src) + sbpl); -
26 }
never executed: }
0
27 } else if (const_alpha != 0) {
never evaluated: const_alpha != 0
never executed: }
0
28 -
29 -
30 -
31 const_alpha = (const_alpha * 255) >> 8; -
32 const __m128i nullVector = _mm_set1_epi32(0); -
33 const __m128i half = _mm_set1_epi16(0x80); -
34 const __m128i one = _mm_set1_epi16(0xff); -
35 const __m128i colorMask = _mm_set1_epi32(0x00ff00ff); -
36 const __m128i constAlphaVector = _mm_set1_epi16(const_alpha); -
37 for (int y = 0; y < h; ++y) {
never evaluated: y < h
0
38 { int x = 0; for (; x < static_cast<int>(qMin(static_cast<quintptr>(w), ((4 - ((reinterpret_cast<quintptr>(dst) >> 2) & 0x3)) & 0x3))); ++x) { quint32 s = src[x]; if (s != 0) { s = BYTE_MUL(s, const_alpha); dst[x] = s + BYTE_MUL(dst[x], qAlpha(~s)); } } for (; x < w-3; x += 4) { __m128i srcVector = _mm_loadu_si128((__m128i *)&src[x]); if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVector, nullVector)) != 0xffff) { { __m128i pixelVectorAG = _mm_srli_epi16(srcVector, 8); __m128i pixelVectorRB = _mm_and_si128(srcVector, colorMask); pixelVectorAG = _mm_mullo_epi16(pixelVectorAG, constAlphaVector); pixelVectorRB = _mm_mullo_epi16(pixelVectorRB, constAlphaVector); pixelVectorRB = _mm_add_epi16(pixelVectorRB, _mm_srli_epi16(pixelVectorRB, 8)); pixelVectorRB = _mm_add_epi16(pixelVectorRB, half); pixelVectorAG = _mm_add_epi16(pixelVectorAG, _mm_srli_epi16(pixelVectorAG, 8)); pixelVectorAG = _mm_add_epi16(pixelVectorAG, half); pixelVectorRB = _mm_srli_epi16(pixelVectorRB, 8); pixelVectorAG = _mm_andnot_si128(colorMask, pixelVectorAG); srcVector = _mm_or_si128(pixelVectorAG, pixelVectorRB); }; __m128i alphaChannel = _mm_srli_epi32(srcVector, 24); alphaChannel = _mm_or_si128(alphaChannel, _mm_slli_epi32(alphaChannel, 16)); alphaChannel = _mm_sub_epi16(one, alphaChannel); const __m128i dstVector = _mm_load_si128((__m128i *)&dst[x]); __m128i destMultipliedByOneMinusAlpha; { __m128i pixelVectorAG = _mm_srli_epi16(dstVector, 8); __m128i pixelVectorRB = _mm_and_si128(dstVector, colorMask); pixelVectorAG = _mm_mullo_epi16(pixelVectorAG, alphaChannel); pixelVectorRB = _mm_mullo_epi16(pixelVectorRB, alphaChannel); pixelVectorRB = _mm_add_epi16(pixelVectorRB, _mm_srli_epi16(pixelVectorRB, 8)); pixelVectorRB = _mm_add_epi16(pixelVectorRB, half); pixelVectorAG = _mm_add_epi16(pixelVectorAG, _mm_srli_epi16(pixelVectorAG, 8)); pixelVectorAG = _mm_add_epi16(pixelVectorAG, half); pixelVectorRB = _mm_srli_epi16(pixelVectorRB, 8); pixelVectorAG = _mm_andnot_si128(colorMask, pixelVectorAG); destMultipliedByOneMinusAlpha = _mm_or_si128(pixelVectorAG, pixelVectorRB); }; const __m128i result = _mm_add_epi8(srcVector, destMultipliedByOneMinusAlpha); _mm_store_si128((__m128i *)&dst[x], result); } } for (; x < w; ++x) { quint32 s = src[x]; if (s != 0) { s = BYTE_MUL(s, const_alpha); dst[x] = s + BYTE_MUL(dst[x], qAlpha(~s)); } } }
never executed: }
never executed: }
never executed: }
never evaluated: s != 0
never evaluated: _mm_movemask_epi8(_mm_cmpeq_epi32(srcVector, nullVector)) != 0xffff
never evaluated: s != 0
never executed: }
never evaluated: x < static_cast<int>(qMin(static_cast<quintptr>(w), ((4 - ((reinterpret_cast<quintptr>(dst) >> 2) & 0x3)) & 0x3)))
never evaluated: x < w-3
never evaluated: x < w
never executed: }
never executed: }
0
39 dst = (quint32 *)(((uchar *) dst) + dbpl); -
40 src = (const quint32 *)(((const uchar *) src) + sbpl); -
41 }
never executed: }
0
42 }
never executed: }
0
43} -
44 -
45 -
46 -
47void qt_blend_rgb32_on_rgb32(uchar *destPixels, int dbpl, -
48 const uchar *srcPixels, int sbpl, -
49 int w, int h, -
50 int const_alpha); -
51 -
52void qt_blend_rgb32_on_rgb32_sse2(uchar *destPixels, int dbpl, -
53 const uchar *srcPixels, int sbpl, -
54 int w, int h, -
55 int const_alpha) -
56{ -
57 const quint32 *src = (const quint32 *) srcPixels; -
58 quint32 *dst = (quint32 *) destPixels; -
59 if (const_alpha != 256) {
evaluated: const_alpha != 256
TRUEFALSE
yes
Evaluation Count:9
yes
Evaluation Count:40
9-40
60 if (const_alpha != 0) {
partially evaluated: const_alpha != 0
TRUEFALSE
yes
Evaluation Count:9
no
Evaluation Count:0
0-9
61 const __m128i nullVector = _mm_set1_epi32(0); -
62 const __m128i half = _mm_set1_epi16(0x80); -
63 const __m128i colorMask = _mm_set1_epi32(0x00ff00ff); -
64 -
65 const_alpha = (const_alpha * 255) >> 8; -
66 int one_minus_const_alpha = 255 - const_alpha; -
67 const __m128i constAlphaVector = _mm_set1_epi16(const_alpha); -
68 const __m128i oneMinusConstAlpha = _mm_set1_epi16(one_minus_const_alpha); -
69 for (int y = 0; y < h; ++y) {
evaluated: y < h
TRUEFALSE
yes
Evaluation Count:958
yes
Evaluation Count:9
9-958
70 int x = 0; -
71 -
72 -
73 for (; x < static_cast<int>(qMin(static_cast<quintptr>(w), ((4 - ((reinterpret_cast<quintptr>(dst) >> 2) & 0x3)) & 0x3))); ++x) {
evaluated: x < static_cast<int>(qMin(static_cast<quintptr>(w), ((4 - ((reinterpret_cast<quintptr>(dst) >> 2) & 0x3)) & 0x3)))
TRUEFALSE
yes
Evaluation Count:2354
yes
Evaluation Count:958
958-2354
74 dst[x] = INTERPOLATE_PIXEL_255(src[x], const_alpha, dst[x], one_minus_const_alpha); -
75 }
executed: }
Execution Count:2354
2354
76 -
77 for (; x < w-3; x += 4) {
evaluated: x < w-3
TRUEFALSE
yes
Evaluation Count:31868
yes
Evaluation Count:958
958-31868
78 __m128i srcVector = _mm_loadu_si128((__m128i *)&src[x]); -
79 if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVector, nullVector)) != 0xffff) {
partially evaluated: _mm_movemask_epi8(_mm_cmpeq_epi32(srcVector, nullVector)) != 0xffff
TRUEFALSE
yes
Evaluation Count:31868
no
Evaluation Count:0
0-31868
80 const __m128i dstVector = _mm_load_si128((__m128i *)&dst[x]); -
81 __m128i result; -
82 { __m128i srcVectorAG = _mm_srli_epi16(srcVector, 8); __m128i dstVectorAG = _mm_srli_epi16(dstVector, 8); __m128i srcVectorAGalpha = _mm_mullo_epi16(srcVectorAG, constAlphaVector); __m128i dstVectorAGoneMinusAlphalpha = _mm_mullo_epi16(dstVectorAG, oneMinusConstAlpha); __m128i finalAG = _mm_add_epi16(srcVectorAGalpha, dstVectorAGoneMinusAlphalpha); finalAG = _mm_add_epi16(finalAG, _mm_srli_epi16(finalAG, 8)); finalAG = _mm_add_epi16(finalAG, half); finalAG = _mm_andnot_si128(colorMask, finalAG); __m128i srcVectorRB = _mm_and_si128(srcVector, colorMask); __m128i dstVectorRB = _mm_and_si128(dstVector, colorMask); __m128i srcVectorRBalpha = _mm_mullo_epi16(srcVectorRB, constAlphaVector); __m128i dstVectorRBoneMinusAlphalpha = _mm_mullo_epi16(dstVectorRB, oneMinusConstAlpha); __m128i finalRB = _mm_add_epi16(srcVectorRBalpha, dstVectorRBoneMinusAlphalpha); finalRB = _mm_add_epi16(finalRB, _mm_srli_epi16(finalRB, 8)); finalRB = _mm_add_epi16(finalRB, half); finalRB = _mm_srli_epi16(finalRB, 8); result = _mm_or_si128(finalAG, finalRB); }; -
83 _mm_store_si128((__m128i *)&dst[x], result); -
84 }
executed: }
Execution Count:31868
31868
85 }
executed: }
Execution Count:31868
31868
86 for (; x<w; ++x) {
evaluated: x<w
TRUEFALSE
yes
Evaluation Count:2354
yes
Evaluation Count:958
958-2354
87 dst[x] = INTERPOLATE_PIXEL_255(src[x], const_alpha, dst[x], one_minus_const_alpha); -
88 }
executed: }
Execution Count:2354
2354
89 dst = (quint32 *)(((uchar *) dst) + dbpl); -
90 src = (const quint32 *)(((const uchar *) src) + sbpl); -
91 }
executed: }
Execution Count:958
958
92 }
executed: }
Execution Count:9
9
93 } else {
executed: }
Execution Count:9
9
94 qt_blend_rgb32_on_rgb32(destPixels, dbpl, srcPixels, sbpl, w, h, const_alpha); -
95 }
executed: }
Execution Count:40
40
96} -
97 -
98void comp_func_SourceOver_sse2(uint *destPixels, const uint *srcPixels, int length, uint const_alpha) -
99{ -
100 qt_noop(); -
101 -
102 const quint32 *src = (const quint32 *) srcPixels; -
103 quint32 *dst = (quint32 *) destPixels; -
104 -
105 const __m128i nullVector = _mm_set1_epi32(0); -
106 const __m128i half = _mm_set1_epi16(0x80); -
107 const __m128i one = _mm_set1_epi16(0xff); -
108 const __m128i colorMask = _mm_set1_epi32(0x00ff00ff); -
109 if (const_alpha == 255) {
evaluated: const_alpha == 255
TRUEFALSE
yes
Evaluation Count:1136722
yes
Evaluation Count:63433
63433-1136722
110 const __m128i alphaMask = _mm_set1_epi32(0xff000000); -
111 { int x = 0; for (; x < static_cast<int>(qMin(static_cast<quintptr>(length), ((4 - ((reinterpret_cast<quintptr>(dst) >> 2) & 0x3)) & 0x3))); ++x) { uint s = src[x]; if (s >= 0xff000000) dst[x] = s; else if (s != 0) dst[x] = s + BYTE_MUL(dst[x], qAlpha(~s)); } for (; x < length-3; x += 4) { const __m128i srcVector = _mm_loadu_si128((__m128i *)&src[x]); { const __m128i srcVectorAlpha = _mm_and_si128(srcVector, alphaMask); if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, alphaMask)) == 0xffff) { _mm_store_si128((__m128i *)&dst[x], srcVector); } else if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, nullVector)) != 0xffff) { __m128i alphaChannel = _mm_srli_epi32(srcVector, 24); alphaChannel = _mm_or_si128(alphaChannel, _mm_slli_epi32(alphaChannel, 16)); alphaChannel = _mm_sub_epi16(one, alphaChannel); const __m128i dstVector = _mm_load_si128((__m128i *)&dst[x]); __m128i destMultipliedByOneMinusAlpha; { __m128i pixelVectorAG = _mm_srli_epi16(dstVector, 8); __m128i pixelVectorRB = _mm_and_si128(dstVector, colorMask); pixelVectorAG = _mm_mullo_epi16(pixelVectorAG, alphaChannel); pixelVectorRB = _mm_mullo_epi16(pixelVectorRB, alphaChannel); pixelVectorRB = _mm_add_epi16(pixelVectorRB, _mm_srli_epi16(pixelVectorRB, 8)); pixelVectorRB = _mm_add_epi16(pixelVectorRB, half); pixelVectorAG = _mm_add_epi16(pixelVectorAG, _mm_srli_epi16(pixelVectorAG, 8)); pixelVectorAG = _mm_add_epi16(pixelVectorAG, half); pixelVectorRB = _mm_srli_epi16(pixelVectorRB, 8); pixelVectorAG = _mm_andnot_si128(colorMask, pixelVectorAG); destMultipliedByOneMinusAlpha = _mm_or_si128(pixelVectorAG, pixelVectorRB); }; const __m128i result = _mm_add_epi8(srcVector, destMultipliedByOneMinusAlpha); _mm_store_si128((__m128i *)&dst[x], result); } } } for (; x < length; ++x) { uint s = src[x]; if (s >= 0xff000000) dst[x] = s; else if (s != 0) dst[x] = s + BYTE_MUL(dst[x], qAlpha(~s)); } };
evaluated: s != 0
TRUEFALSE
yes
Evaluation Count:56107
yes
Evaluation Count:11465
evaluated: _mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, nullVector)) != 0xffff
TRUEFALSE
yes
Evaluation Count:2381974
yes
Evaluation Count:128158
evaluated: s != 0
TRUEFALSE
yes
Evaluation Count:26316
yes
Evaluation Count:150873
evaluated: s >= 0xff000000
TRUEFALSE
yes
Evaluation Count:11856
yes
Evaluation Count:67572
evaluated: _mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, alphaMask)) == 0xffff
TRUEFALSE
yes
Evaluation Count:135056
yes
Evaluation Count:2510132
evaluated: s >= 0xff000000
TRUEFALSE
yes
Evaluation Count:157247
yes
Evaluation Count:177189
evaluated: x < static_cast<int>(qMin(static_cast<quintptr>(length), ((4 - ((reinterpret_cast<quintptr>(dst) >> 2) & 0x3)) & 0x3)))
TRUEFALSE
yes
Evaluation Count:79428
yes
Evaluation Count:1136722
evaluated: x < length-3
TRUEFALSE
yes
Evaluation Count:2645188
yes
Evaluation Count:1136722
evaluated: x < length
TRUEFALSE
yes
Evaluation Count:334436
yes
Evaluation Count:1136722
executed: dst[x] = s;
Execution Count:11856
executed: dst[x] = s + BYTE_MUL(dst[x], qAlpha(~s));
Execution Count:56107
executed: }
Execution Count:135056
executed: }
Execution Count:2381974
executed: dst[x] = s;
Execution Count:157247
executed: dst[x] = s + BYTE_MUL(dst[x], qAlpha(~s));
Execution Count:26316
11465-2645188
112 } else {
executed: }
Execution Count:1136722
1136722
113 const __m128i constAlphaVector = _mm_set1_epi16(const_alpha); -
114 { int x = 0; for (; x < static_cast<int>(qMin(static_cast<quintptr>(length), ((4 - ((reinterpret_cast<quintptr>(dst) >> 2) & 0x3)) & 0x3))); ++x) { quint32 s = src[x]; if (s != 0) { s = BYTE_MUL(s, const_alpha); dst[x] = s + BYTE_MUL(dst[x], qAlpha(~s)); } } for (; x < length-3; x += 4) { __m128i srcVector = _mm_loadu_si128((__m128i *)&src[x]); if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVector, nullVector)) != 0xffff) { { __m128i pixelVectorAG = _mm_srli_epi16(srcVector, 8); __m128i pixelVectorRB = _mm_and_si128(srcVector, colorMask); pixelVectorAG = _mm_mullo_epi16(pixelVectorAG, constAlphaVector); pixelVectorRB = _mm_mullo_epi16(pixelVectorRB, constAlphaVector); pixelVectorRB = _mm_add_epi16(pixelVectorRB, _mm_srli_epi16(pixelVectorRB, 8)); pixelVectorRB = _mm_add_epi16(pixelVectorRB, half); pixelVectorAG = _mm_add_epi16(pixelVectorAG, _mm_srli_epi16(pixelVectorAG, 8)); pixelVectorAG = _mm_add_epi16(pixelVectorAG, half); pixelVectorRB = _mm_srli_epi16(pixelVectorRB, 8); pixelVectorAG = _mm_andnot_si128(colorMask, pixelVectorAG); srcVector = _mm_or_si128(pixelVectorAG, pixelVectorRB); }; __m128i alphaChannel = _mm_srli_epi32(srcVector, 24); alphaChannel = _mm_or_si128(alphaChannel, _mm_slli_epi32(alphaChannel, 16)); alphaChannel = _mm_sub_epi16(one, alphaChannel); const __m128i dstVector = _mm_load_si128((__m128i *)&dst[x]); __m128i destMultipliedByOneMinusAlpha; { __m128i pixelVectorAG = _mm_srli_epi16(dstVector, 8); __m128i pixelVectorRB = _mm_and_si128(dstVector, colorMask); pixelVectorAG = _mm_mullo_epi16(pixelVectorAG, alphaChannel); pixelVectorRB = _mm_mullo_epi16(pixelVectorRB, alphaChannel); pixelVectorRB = _mm_add_epi16(pixelVectorRB, _mm_srli_epi16(pixelVectorRB, 8)); pixelVectorRB = _mm_add_epi16(pixelVectorRB, half); pixelVectorAG = _mm_add_epi16(pixelVectorAG, _mm_srli_epi16(pixelVectorAG, 8)); pixelVectorAG = _mm_add_epi16(pixelVectorAG, half); pixelVectorRB = _mm_srli_epi16(pixelVectorRB, 8); pixelVectorAG = _mm_andnot_si128(colorMask, pixelVectorAG); destMultipliedByOneMinusAlpha = _mm_or_si128(pixelVectorAG, pixelVectorRB); }; const __m128i result = _mm_add_epi8(srcVector, destMultipliedByOneMinusAlpha); _mm_store_si128((__m128i *)&dst[x], result); } } for (; x < length; ++x) { quint32 s = src[x]; if (s != 0) { s = BYTE_MUL(s, const_alpha); dst[x] = s + BYTE_MUL(dst[x], qAlpha(~s)); } } };
evaluated: s != 0
TRUEFALSE
yes
Evaluation Count:39788
yes
Evaluation Count:916
partially evaluated: _mm_movemask_epi8(_mm_cmpeq_epi32(srcVector, nullVector)) != 0xffff
TRUEFALSE
yes
Evaluation Count:1170552
no
Evaluation Count:0
evaluated: s != 0
TRUEFALSE
yes
Evaluation Count:144150
yes
Evaluation Count:311
evaluated: x < static_cast<int>(qMin(static_cast<quintptr>(length), ((4 - ((reinterpret_cast<quintptr>(dst) >> 2) & 0x3)) & 0x3)))
TRUEFALSE
yes
Evaluation Count:40704
yes
Evaluation Count:63433
evaluated: x < length-3
TRUEFALSE
yes
Evaluation Count:1170552
yes
Evaluation Count:63433
evaluated: x < length
TRUEFALSE
yes
Evaluation Count:144461
yes
Evaluation Count:63433
executed: }
Execution Count:39788
executed: }
Execution Count:40704
executed: }
Execution Count:1170552
executed: }
Execution Count:1170552
executed: }
Execution Count:144150
executed: }
Execution Count:144461
0-1170552
115 }
executed: }
Execution Count:63433
63433
116} -
117 -
118void comp_func_Plus_sse2(uint *dst, const uint *src, int length, uint const_alpha) -
119{ -
120 int x = 0; -
121 -
122 if (const_alpha == 255) {
never evaluated: const_alpha == 255
0
123 -
124 for (; x < static_cast<int>(qMin(static_cast<quintptr>(length), ((4 - ((reinterpret_cast<quintptr>(dst) >> 2) & 0x3)) & 0x3))); ++x)
never evaluated: x < static_cast<int>(qMin(static_cast<quintptr>(length), ((4 - ((reinterpret_cast<quintptr>(dst) >> 2) & 0x3)) & 0x3)))
0
125 dst[x] = comp_func_Plus_one_pixel(dst[x], src[x]);
never executed: dst[x] = comp_func_Plus_one_pixel(dst[x], src[x]);
0
126 -
127 -
128 for (; x < length - 3; x += 4) {
never evaluated: x < length - 3
0
129 const __m128i srcVector = _mm_loadu_si128((__m128i *)&src[x]); -
130 const __m128i dstVector = _mm_load_si128((__m128i *)&dst[x]); -
131 -
132 const __m128i result = _mm_adds_epu8(srcVector, dstVector); -
133 _mm_store_si128((__m128i *)&dst[x], result); -
134 }
never executed: }
0
135 -
136 -
137 for (; x < length; ++x)
never evaluated: x < length
0
138 dst[x] = comp_func_Plus_one_pixel(dst[x], src[x]);
never executed: dst[x] = comp_func_Plus_one_pixel(dst[x], src[x]);
0
139 } else {
never executed: }
0
140 const int one_minus_const_alpha = 255 - const_alpha; -
141 const __m128i constAlphaVector = _mm_set1_epi16(const_alpha); -
142 const __m128i oneMinusConstAlpha = _mm_set1_epi16(one_minus_const_alpha); -
143 -
144 -
145 for (; x < static_cast<int>(qMin(static_cast<quintptr>(length), ((4 - ((reinterpret_cast<quintptr>(dst) >> 2) & 0x3)) & 0x3))); ++x)
never evaluated: x < static_cast<int>(qMin(static_cast<quintptr>(length), ((4 - ((reinterpret_cast<quintptr>(dst) >> 2) & 0x3)) & 0x3)))
0
146 dst[x] = comp_func_Plus_one_pixel_const_alpha(dst[x], src[x], const_alpha, one_minus_const_alpha);
never executed: dst[x] = comp_func_Plus_one_pixel_const_alpha(dst[x], src[x], const_alpha, one_minus_const_alpha);
0
147 -
148 const __m128i half = _mm_set1_epi16(0x80); -
149 const __m128i colorMask = _mm_set1_epi32(0x00ff00ff); -
150 -
151 for (; x < length - 3; x += 4) {
never evaluated: x < length - 3
0
152 const __m128i srcVector = _mm_loadu_si128((__m128i *)&src[x]); -
153 const __m128i dstVector = _mm_load_si128((__m128i *)&dst[x]); -
154 -
155 __m128i result = _mm_adds_epu8(srcVector, dstVector); -
156 { __m128i srcVectorAG = _mm_srli_epi16(result, 8); __m128i dstVectorAG = _mm_srli_epi16(dstVector, 8); __m128i srcVectorAGalpha = _mm_mullo_epi16(srcVectorAG, constAlphaVector); __m128i dstVectorAGoneMinusAlphalpha = _mm_mullo_epi16(dstVectorAG, oneMinusConstAlpha); __m128i finalAG = _mm_add_epi16(srcVectorAGalpha, dstVectorAGoneMinusAlphalpha); finalAG = _mm_add_epi16(finalAG, _mm_srli_epi16(finalAG, 8)); finalAG = _mm_add_epi16(finalAG, half); finalAG = _mm_andnot_si128(colorMask, finalAG); __m128i srcVectorRB = _mm_and_si128(result, colorMask); __m128i dstVectorRB = _mm_and_si128(dstVector, colorMask); __m128i srcVectorRBalpha = _mm_mullo_epi16(srcVectorRB, constAlphaVector); __m128i dstVectorRBoneMinusAlphalpha = _mm_mullo_epi16(dstVectorRB, oneMinusConstAlpha); __m128i finalRB = _mm_add_epi16(srcVectorRBalpha, dstVectorRBoneMinusAlphalpha); finalRB = _mm_add_epi16(finalRB, _mm_srli_epi16(finalRB, 8)); finalRB = _mm_add_epi16(finalRB, half); finalRB = _mm_srli_epi16(finalRB, 8); result = _mm_or_si128(finalAG, finalRB); } -
157 _mm_store_si128((__m128i *)&dst[x], result); -
158 }
never executed: }
0
159 -
160 -
161 for (; x < length; ++x)
never evaluated: x < length
0
162 dst[x] = comp_func_Plus_one_pixel_const_alpha(dst[x], src[x], const_alpha, one_minus_const_alpha);
never executed: dst[x] = comp_func_Plus_one_pixel_const_alpha(dst[x], src[x], const_alpha, one_minus_const_alpha);
0
163 }
never executed: }
0
164} -
165 -
166void comp_func_Source_sse2(uint *dst, const uint *src, int length, uint const_alpha) -
167{ -
168 if (const_alpha == 255) {
evaluated: const_alpha == 255
TRUEFALSE
yes
Evaluation Count:63722
yes
Evaluation Count:3248
3248-63722
169 ::memcpy(dst, src, length * sizeof(uint)); -
170 } else {
executed: }
Execution Count:63728
63728
171 const int ialpha = 255 - const_alpha; -
172 -
173 int x = 0; -
174 -
175 -
176 for (; x < static_cast<int>(qMin(static_cast<quintptr>(length), ((4 - ((reinterpret_cast<quintptr>(dst) >> 2) & 0x3)) & 0x3))); ++x)
evaluated: x < static_cast<int>(qMin(static_cast<quintptr>(length), ((4 - ((reinterpret_cast<quintptr>(dst) >> 2) & 0x3)) & 0x3)))
TRUEFALSE
yes
Evaluation Count:2425
yes
Evaluation Count:3248
2425-3248
177 dst[x] = INTERPOLATE_PIXEL_255(src[x], const_alpha, dst[x], ialpha);
executed: dst[x] = INTERPOLATE_PIXEL_255(src[x], const_alpha, dst[x], ialpha);
Execution Count:2425
2425
178 -
179 -
180 const __m128i half = _mm_set1_epi16(0x80); -
181 const __m128i colorMask = _mm_set1_epi32(0x00ff00ff); -
182 const __m128i constAlphaVector = _mm_set1_epi16(const_alpha); -
183 const __m128i oneMinusConstAlpha = _mm_set1_epi16(ialpha); -
184 for (; x < length - 3; x += 4) {
partially evaluated: x < length - 3
TRUEFALSE
no
Evaluation Count:0
yes
Evaluation Count:3248
0-3248
185 const __m128i srcVector = _mm_loadu_si128((__m128i *)&src[x]); -
186 __m128i dstVector = _mm_load_si128((__m128i *)&dst[x]); -
187 { __m128i srcVectorAG = _mm_srli_epi16(srcVector, 8); __m128i dstVectorAG = _mm_srli_epi16(dstVector, 8); __m128i srcVectorAGalpha = _mm_mullo_epi16(srcVectorAG, constAlphaVector); __m128i dstVectorAGoneMinusAlphalpha = _mm_mullo_epi16(dstVectorAG, oneMinusConstAlpha); __m128i finalAG = _mm_add_epi16(srcVectorAGalpha, dstVectorAGoneMinusAlphalpha); finalAG = _mm_add_epi16(finalAG, _mm_srli_epi16(finalAG, 8)); finalAG = _mm_add_epi16(finalAG, half); finalAG = _mm_andnot_si128(colorMask, finalAG); __m128i srcVectorRB = _mm_and_si128(srcVector, colorMask); __m128i dstVectorRB = _mm_and_si128(dstVector, colorMask); __m128i srcVectorRBalpha = _mm_mullo_epi16(srcVectorRB, constAlphaVector); __m128i dstVectorRBoneMinusAlphalpha = _mm_mullo_epi16(dstVectorRB, oneMinusConstAlpha); __m128i finalRB = _mm_add_epi16(srcVectorRBalpha, dstVectorRBoneMinusAlphalpha); finalRB = _mm_add_epi16(finalRB, _mm_srli_epi16(finalRB, 8)); finalRB = _mm_add_epi16(finalRB, half); finalRB = _mm_srli_epi16(finalRB, 8); dstVector = _mm_or_si128(finalAG, finalRB); } -
188 _mm_store_si128((__m128i *)&dst[x], dstVector); -
189 }
never executed: }
0
190 -
191 -
192 for (; x < length; ++x)
evaluated: x < length
TRUEFALSE
yes
Evaluation Count:835
yes
Evaluation Count:3248
835-3248
193 dst[x] = INTERPOLATE_PIXEL_255(src[x], const_alpha, dst[x], ialpha);
executed: dst[x] = INTERPOLATE_PIXEL_255(src[x], const_alpha, dst[x], ialpha);
Execution Count:835
835
194 }
executed: }
Execution Count:3248
3248
195} -
196 -
197void qt_memfill32_sse2(quint32 *dest, quint32 value, int count) -
198{ -
199 if (count < 7) {
evaluated: count < 7
TRUEFALSE
yes
Evaluation Count:862603
yes
Evaluation Count:1366744
862603-1366744
200 switch (count) { -
201 case 6: *dest++ = value; -
202 case 5: *dest++ = value;
code before this statement executed: case 5:
Execution Count:161820
161820
203 case 4: *dest++ = value;
code before this statement executed: case 4:
Execution Count:189515
189515
204 case 3: *dest++ = value;
code before this statement executed: case 3:
Execution Count:215554
215554
205 case 2: *dest++ = value;
code before this statement executed: case 2:
Execution Count:241583
241583
206 case 1: *dest = value;
code before this statement executed: case 1:
Execution Count:323948
323948
207 }
executed: }
Execution Count:862603
862603
208 return;
executed: return;
Execution Count:862603
862603
209 }; -
210 -
211 const int align = (quintptr)(dest) & 0xf; -
212 switch (align) { -
213 case 4: *dest++ = value; --count; -
214 case 8: *dest++ = value; --count;
code before this statement executed: case 8:
Execution Count:473513
473513
215 case 12: *dest++ = value; --count;
code before this statement executed: case 12:
Execution Count:713293
713293
216 }
executed: }
Execution Count:1036622
1036622
217 -
218 int count128 = count / 4; -
219 __m128i *dst128 = reinterpret_cast<__m128i*>(dest); -
220 const __m128i value128 = _mm_set_epi32(value, value, value, value); -
221 -
222 int n = (count128 + 3) / 4; -
223 switch (count128 & 0x3) { -
224 case 0: do { _mm_stream_si128(dst128++, value128); -
225 case 3: _mm_stream_si128(dst128++, value128);
code before this statement executed: case 3:
Execution Count:14972429
14972429
226 case 2: _mm_stream_si128(dst128++, value128);
code before this statement executed: case 2:
Execution Count:15192949
15192949
227 case 1: _mm_stream_si128(dst128++, value128);
code before this statement executed: case 1:
Execution Count:15564464
15564464
228 } while (--n > 0);
evaluated: --n > 0
TRUEFALSE
yes
Evaluation Count:14635070
yes
Evaluation Count:1366744
executed: }
Execution Count:16001814
1366744-16001814
229 }
executed: }
Execution Count:1366744
1366744
230 -
231 const int rest = count & 0x3; -
232 if (rest) {
evaluated: rest
TRUEFALSE
yes
Evaluation Count:1091376
yes
Evaluation Count:275368
275368-1091376
233 switch (rest) { -
234 case 3: dest[count - 3] = value; -
235 case 2: dest[count - 2] = value;
code before this statement executed: case 2:
Execution Count:543520
543520
236 case 1: dest[count - 1] = value;
code before this statement executed: case 1:
Execution Count:811996
811996
237 }
executed: }
Execution Count:1091376
1091376
238 }
executed: }
Execution Count:1091376
1091376
239}
executed: }
Execution Count:1366744
1366744
240 -
241void comp_func_solid_SourceOver_sse2(uint *destPixels, int length, uint color, uint const_alpha) -
242{ -
243 if ((const_alpha & qAlpha(color)) == 255) {
evaluated: (const_alpha & qAlpha(color)) == 255
TRUEFALSE
yes
Evaluation Count:2037
yes
Evaluation Count:36156
2037-36156
244 qt_memfill32_sse2(destPixels, color, length); -
245 } else {
executed: }
Execution Count:2037
2037
246 if (const_alpha != 255)
evaluated: const_alpha != 255
TRUEFALSE
yes
Evaluation Count:18265
yes
Evaluation Count:17891
17891-18265
247 color = BYTE_MUL(color, const_alpha);
executed: color = BYTE_MUL(color, const_alpha);
Execution Count:18265
18265
248 -
249 const quint32 minusAlphaOfColor = qAlpha(~color); -
250 int x = 0; -
251 -
252 quint32 *dst = (quint32 *) destPixels; -
253 const __m128i colorVector = _mm_set1_epi32(color); -
254 const __m128i colorMask = _mm_set1_epi32(0x00ff00ff); -
255 const __m128i half = _mm_set1_epi16(0x80); -
256 const __m128i minusAlphaOfColorVector = _mm_set1_epi16(minusAlphaOfColor); -
257 -
258 for (; x < static_cast<int>(qMin(static_cast<quintptr>(length), ((4 - ((reinterpret_cast<quintptr>(dst) >> 2) & 0x3)) & 0x3))); ++x)
evaluated: x < static_cast<int>(qMin(static_cast<quintptr>(length), ((4 - ((reinterpret_cast<quintptr>(dst) >> 2) & 0x3)) & 0x3)))
TRUEFALSE
yes
Evaluation Count:32425
yes
Evaluation Count:36156
32425-36156
259 destPixels[x] = color + BYTE_MUL(destPixels[x], minusAlphaOfColor);
executed: destPixels[x] = color + BYTE_MUL(destPixels[x], minusAlphaOfColor);
Execution Count:32425
32425
260 -
261 for (; x < length-3; x += 4) {
evaluated: x < length-3
TRUEFALSE
yes
Evaluation Count:172971
yes
Evaluation Count:36156
36156-172971
262 __m128i dstVector = _mm_load_si128((__m128i *)&dst[x]); -
263 { __m128i pixelVectorAG = _mm_srli_epi16(dstVector, 8); __m128i pixelVectorRB = _mm_and_si128(dstVector, colorMask); pixelVectorAG = _mm_mullo_epi16(pixelVectorAG, minusAlphaOfColorVector); pixelVectorRB = _mm_mullo_epi16(pixelVectorRB, minusAlphaOfColorVector); pixelVectorRB = _mm_add_epi16(pixelVectorRB, _mm_srli_epi16(pixelVectorRB, 8)); pixelVectorRB = _mm_add_epi16(pixelVectorRB, half); pixelVectorAG = _mm_add_epi16(pixelVectorAG, _mm_srli_epi16(pixelVectorAG, 8)); pixelVectorAG = _mm_add_epi16(pixelVectorAG, half); pixelVectorRB = _mm_srli_epi16(pixelVectorRB, 8); pixelVectorAG = _mm_andnot_si128(colorMask, pixelVectorAG); dstVector = _mm_or_si128(pixelVectorAG, pixelVectorRB); }; -
264 dstVector = _mm_add_epi8(colorVector, dstVector); -
265 _mm_store_si128((__m128i *)&dst[x], dstVector); -
266 }
executed: }
Execution Count:172971
172971
267 for (;x < length; ++x)
evaluated: x < length
TRUEFALSE
yes
Evaluation Count:18654
yes
Evaluation Count:36156
18654-36156
268 destPixels[x] = color + BYTE_MUL(destPixels[x], minusAlphaOfColor);
executed: destPixels[x] = color + BYTE_MUL(destPixels[x], minusAlphaOfColor);
Execution Count:18654
18654
269 }
executed: }
Execution Count:36156
36156
270} -
271 -
272 -
273CompositionFunctionSolid qt_functionForModeSolid_SSE2[numCompositionFunctions] = { -
274 comp_func_solid_SourceOver_sse2, -
275 comp_func_solid_DestinationOver, -
276 comp_func_solid_Clear, -
277 comp_func_solid_Source, -
278 comp_func_solid_Destination, -
279 comp_func_solid_SourceIn, -
280 comp_func_solid_DestinationIn, -
281 comp_func_solid_SourceOut, -
282 comp_func_solid_DestinationOut, -
283 comp_func_solid_SourceAtop, -
284 comp_func_solid_DestinationAtop, -
285 comp_func_solid_XOR, -
286 comp_func_solid_Plus, -
287 comp_func_solid_Multiply, -
288 comp_func_solid_Screen, -
289 comp_func_solid_Overlay, -
290 comp_func_solid_Darken, -
291 comp_func_solid_Lighten, -
292 comp_func_solid_ColorDodge, -
293 comp_func_solid_ColorBurn, -
294 comp_func_solid_HardLight, -
295 comp_func_solid_SoftLight, -
296 comp_func_solid_Difference, -
297 comp_func_solid_Exclusion, -
298 rasterop_solid_SourceOrDestination, -
299 rasterop_solid_SourceAndDestination, -
300 rasterop_solid_SourceXorDestination, -
301 rasterop_solid_NotSourceAndNotDestination, -
302 rasterop_solid_NotSourceOrNotDestination, -
303 rasterop_solid_NotSourceXorDestination, -
304 rasterop_solid_NotSource, -
305 rasterop_solid_NotSourceAndDestination, -
306 rasterop_solid_SourceAndNotDestination, -
307 rasterop_solid_NotSourceOrDestination, -
308 rasterop_solid_SourceOrNotDestination, -
309 rasterop_solid_ClearDestination, -
310 rasterop_solid_SetDestination, -
311 rasterop_solid_NotDestination -
312}; -
313 -
314CompositionFunction qt_functionForMode_SSE2[numCompositionFunctions] = { -
315 comp_func_SourceOver_sse2, -
316 comp_func_DestinationOver, -
317 comp_func_Clear, -
318 comp_func_Source_sse2, -
319 comp_func_Destination, -
320 comp_func_SourceIn, -
321 comp_func_DestinationIn, -
322 comp_func_SourceOut, -
323 comp_func_DestinationOut, -
324 comp_func_SourceAtop, -
325 comp_func_DestinationAtop, -
326 comp_func_XOR, -
327 comp_func_Plus_sse2, -
328 comp_func_Multiply, -
329 comp_func_Screen, -
330 comp_func_Overlay, -
331 comp_func_Darken, -
332 comp_func_Lighten, -
333 comp_func_ColorDodge, -
334 comp_func_ColorBurn, -
335 comp_func_HardLight, -
336 comp_func_SoftLight, -
337 comp_func_Difference, -
338 comp_func_Exclusion, -
339 rasterop_SourceOrDestination, -
340 rasterop_SourceAndDestination, -
341 rasterop_SourceXorDestination, -
342 rasterop_NotSourceAndNotDestination, -
343 rasterop_NotSourceOrNotDestination, -
344 rasterop_NotSourceXorDestination, -
345 rasterop_NotSource, -
346 rasterop_NotSourceAndDestination, -
347 rasterop_SourceAndNotDestination, -
348 rasterop_NotSourceOrDestination, -
349 rasterop_SourceOrNotDestination, -
350 rasterop_ClearDestination, -
351 rasterop_SetDestination, -
352 rasterop_NotDestination -
353}; -
354 -
355 -
356void qt_memfill16_sse2(quint16 *dest, quint16 value, int count) -
357{ -
358 if (count < 3) {
evaluated: count < 3
TRUEFALSE
yes
Evaluation Count:7044494
yes
Evaluation Count:1413641
1413641-7044494
359 switch (count) { -
360 case 2: *dest++ = value; -
361 case 1: *dest = value;
code before this statement executed: case 1:
Execution Count:425755
425755
362 }
executed: }
Execution Count:7044494
7044494
363 return;
executed: return;
Execution Count:7044494
7044494
364 } -
365 -
366 const int align = (quintptr)(dest) & 0x3; -
367 switch (align) { -
368 case 2: *dest++ = value; --count; -
369 }
executed: }
Execution Count:235663
235663
370 -
371 const quint32 value32 = (value << 16) | value; -
372 qt_memfill32_sse2(reinterpret_cast<quint32*>(dest), value32, count / 2); -
373 -
374 if (count & 0x1)
evaluated: count & 0x1
TRUEFALSE
yes
Evaluation Count:287920
yes
Evaluation Count:1125721
287920-1125721
375 dest[count - 1] = value;
executed: dest[count - 1] = value;
Execution Count:287920
287920
376}
executed: }
Execution Count:1413641
1413641
377 -
378void qt_bitmapblit32_sse2(QRasterBuffer *rasterBuffer, int x, int y, -
379 quint32 color, -
380 const uchar *src, int width, int height, int stride) -
381{ -
382 quint32 *dest = reinterpret_cast<quint32*>(rasterBuffer->scanLine(y)) + x; -
383 const int destStride = rasterBuffer->bytesPerLine() / sizeof(quint32); -
384 -
385 const __m128i c128 = _mm_set1_epi32(color); -
386 const __m128i maskmask1 = _mm_set_epi32(0x10101010, 0x20202020, -
387 0x40404040, 0x80808080); -
388 const __m128i maskadd1 = _mm_set_epi32(0x70707070, 0x60606060, -
389 0x40404040, 0x00000000); -
390 -
391 if (width > 4) {
evaluated: width > 4
TRUEFALSE
yes
Evaluation Count:302
yes
Evaluation Count:68
68-302
392 const __m128i maskmask2 = _mm_set_epi32(0x01010101, 0x02020202, -
393 0x04040404, 0x08080808); -
394 const __m128i maskadd2 = _mm_set_epi32(0x7f7f7f7f, 0x7e7e7e7e, -
395 0x7c7c7c7c, 0x78787878); -
396 while (height--) {
evaluated: height--
TRUEFALSE
yes
Evaluation Count:2738
yes
Evaluation Count:302
302-2738
397 for (int x = 0; x < width; x += 8) {
evaluated: x < width
TRUEFALSE
yes
Evaluation Count:2754
yes
Evaluation Count:2738
2738-2754
398 const quint8 s = src[x >> 3]; -
399 if (!s)
evaluated: !s
TRUEFALSE
yes
Evaluation Count:376
yes
Evaluation Count:2378
376-2378
400 continue;
executed: continue;
Execution Count:376
376
401 __m128i mask1 = _mm_set1_epi8(s); -
402 __m128i mask2 = mask1; -
403 -
404 mask1 = _mm_and_si128(mask1, maskmask1); -
405 mask1 = _mm_add_epi8(mask1, maskadd1); -
406 _mm_maskmoveu_si128(c128, mask1, (char*)(dest + x)); -
407 mask2 = _mm_and_si128(mask2, maskmask2); -
408 mask2 = _mm_add_epi8(mask2, maskadd2); -
409 _mm_maskmoveu_si128(c128, mask2, (char*)(dest + x + 4)); -
410 }
executed: }
Execution Count:2378
2378
411 dest += destStride; -
412 src += stride; -
413 }
executed: }
Execution Count:2738
2738
414 } else {
executed: }
Execution Count:302
302
415 while (height--) {
evaluated: height--
TRUEFALSE
yes
Evaluation Count:660
yes
Evaluation Count:68
68-660
416 const quint8 s = *src; -
417 if (s) {
evaluated: s
TRUEFALSE
yes
Evaluation Count:620
yes
Evaluation Count:40
40-620
418 __m128i mask1 = _mm_set1_epi8(s); -
419 mask1 = _mm_and_si128(mask1, maskmask1); -
420 mask1 = _mm_add_epi8(mask1, maskadd1); -
421 _mm_maskmoveu_si128(c128, mask1, (char*)(dest)); -
422 }
executed: }
Execution Count:620
620
423 dest += destStride; -
424 src += stride; -
425 }
executed: }
Execution Count:660
660
426 }
executed: }
Execution Count:68
68
427} -
428 -
429void qt_bitmapblit16_sse2(QRasterBuffer *rasterBuffer, int x, int y, -
430 quint32 color, -
431 const uchar *src, int width, int height, int stride) -
432{ -
433 const quint16 c = qConvertRgb32To16(color); -
434 quint16 *dest = reinterpret_cast<quint16*>(rasterBuffer->scanLine(y)) + x; -
435 const int destStride = rasterBuffer->bytesPerLine() / sizeof(quint16); -
436 -
437 const __m128i c128 = _mm_set1_epi16(c); -
438 -
439 -
440 -
441 const __m128i maskmask = _mm_set_epi16(0x0101, 0x0202, 0x0404, 0x0808, -
442 0x1010, 0x2020, 0x4040, 0x8080); -
443 const __m128i maskadd = _mm_set_epi16(0x7f7f, 0x7e7e, 0x7c7c, 0x7878, -
444 0x7070, 0x6060, 0x4040, 0x0000); -
445 -
446 while (height--) {
never evaluated: height--
0
447 for (int x = 0; x < width; x += 8) {
never evaluated: x < width
0
448 const quint8 s = src[x >> 3]; -
449 if (!s)
never evaluated: !s
0
450 continue;
never executed: continue;
0
451 __m128i mask = _mm_set1_epi8(s); -
452 mask = _mm_and_si128(mask, maskmask); -
453 mask = _mm_add_epi8(mask, maskadd); -
454 _mm_maskmoveu_si128(c128, mask, (char*)(dest + x)); -
455 }
never executed: }
0
456 dest += destStride; -
457 src += stride; -
458 }
never executed: }
0
459}
never executed: }
0
460 -
461class QSimdSse2 -
462{ -
463public: -
464 typedef __m128i Int32x4; -
465 typedef __m128 Float32x4; -
466 -
467 union Vect_buffer_i { Int32x4 v; int i[4]; }; -
468 union Vect_buffer_f { Float32x4 v; float f[4]; }; -
469 -
470 static inline Float32x4 v_dup(float x) { return _mm_set1_ps(x); }
executed: return _mm_set1_ps(x);
Execution Count:86307
86307
471 static inline Float32x4 v_dup(double x) { return _mm_set1_ps(x); }
executed: return _mm_set1_ps(x);
Execution Count:143845
143845
472 static inline Int32x4 v_dup(int x) { return _mm_set1_epi32(x); }
executed: return _mm_set1_epi32(x);
Execution Count:28769
28769
473 static inline Int32x4 v_dup(uint x) { return _mm_set1_epi32(x); }
executed: return _mm_set1_epi32(x);
Execution Count:57538
57538
474 -
475 static inline Float32x4 v_add(Float32x4 a, Float32x4 b) { return _mm_add_ps(a, b); }
executed: return _mm_add_ps(a, b);
Execution Count:4176636
4176636
476 static inline Int32x4 v_add(Int32x4 a, Int32x4 b) { return _mm_add_epi32(a, b); }
never executed: return _mm_add_epi32(a, b);
0
477 -
478 static inline Float32x4 v_max(Float32x4 a, Float32x4 b) { return _mm_max_ps(a, b); }
executed: return _mm_max_ps(a, b);
Execution Count:760424
760424
479 static inline Float32x4 v_min(Float32x4 a, Float32x4 b) { return _mm_min_ps(a, b); }
executed: return _mm_min_ps(a, b);
Execution Count:64318
64318
480 static inline Int32x4 v_min_16(Int32x4 a, Int32x4 b) { return _mm_min_epi16(a, b); }
never executed: return _mm_min_epi16(a, b);
0
481 -
482 static inline Int32x4 v_and(Int32x4 a, Int32x4 b) { return _mm_and_si128(a, b); }
executed: return _mm_and_si128(a, b);
Execution Count:1327894
1327894
483 -
484 static inline Float32x4 v_sub(Float32x4 a, Float32x4 b) { return _mm_sub_ps(a, b); }
executed: return _mm_sub_ps(a, b);
Execution Count:696106
696106
485 static inline Int32x4 v_sub(Int32x4 a, Int32x4 b) { return _mm_sub_epi32(a, b); }
never executed: return _mm_sub_epi32(a, b);
0
486 -
487 static inline Float32x4 v_mul(Float32x4 a, Float32x4 b) { return _mm_mul_ps(a, b); }
executed: return _mm_mul_ps(a, b);
Execution Count:1392212
1392212
488 -
489 static inline Float32x4 v_sqrt(Float32x4 x) { return _mm_sqrt_ps(x); }
executed: return _mm_sqrt_ps(x);
Execution Count:696106
696106
490 -
491 static inline Int32x4 v_toInt(Float32x4 x) { return _mm_cvttps_epi32(x); }
executed: return _mm_cvttps_epi32(x);
Execution Count:696106
696106
492 static inline Int32x4 v_greaterOrEqual(Float32x4 a, Float32x4 b) { return _mm_castps_si128(_mm_cmpgt_ps(a, b)); }
executed: return _mm_castps_si128(_mm_cmpgt_ps(a, b));
Execution Count:1392212
1392212
493 -
494}; -
495 -
496const uint * qt_fetch_radial_gradient_sse2(uint *buffer, const Operator *op, const QSpanData *data, -
497 int y, int x, int length) -
498{ -
499 return qt_fetch_radial_gradient_template<QRadialFetchSimd<QSimdSse2> >(buffer, op, data, y, x, length);
executed: return qt_fetch_radial_gradient_template<QRadialFetchSimd<QSimdSse2> >(buffer, op, data, y, x, length);
Execution Count:28769
28769
500} -
501 -
502void qt_scale_image_argb32_on_argb32_sse2(uchar *destPixels, int dbpl, -
503 const uchar *srcPixels, int sbpl, -
504 const QRectF &targetRect, -
505 const QRectF &sourceRect, -
506 const QRect &clip, -
507 int const_alpha) -
508{ -
509 if (const_alpha != 256) {
partially evaluated: const_alpha != 256
TRUEFALSE
no
Evaluation Count:0
yes
Evaluation Count:1358
0-1358
510 -
511 extern void qt_scale_image_argb32_on_argb32(uchar *destPixels, int dbpl, -
512 const uchar *srcPixels, int sbpl, -
513 const QRectF &targetRect, -
514 const QRectF &sourceRect, -
515 const QRect &clip, -
516 int const_alpha); -
517 return qt_scale_image_argb32_on_argb32(destPixels, dbpl, srcPixels, sbpl, targetRect, sourceRect, clip, const_alpha);
never executed: return qt_scale_image_argb32_on_argb32(destPixels, dbpl, srcPixels, sbpl, targetRect, sourceRect, clip, const_alpha);
0
518 } -
519 -
520 qreal sx = targetRect.width() / (qreal) sourceRect.width(); -
521 qreal sy = targetRect.height() / (qreal) sourceRect.height(); -
522 -
523 int ix = 0x00010000 / sx; -
524 int iy = 0x00010000 / sy; -
525 -
526 int cx1 = clip.x(); -
527 int cx2 = clip.x() + clip.width(); -
528 int cy1 = clip.top(); -
529 int cy2 = clip.y() + clip.height(); -
530 -
531 int tx1 = qRound(targetRect.left()); -
532 int tx2 = qRound(targetRect.right()); -
533 int ty1 = qRound(targetRect.top()); -
534 int ty2 = qRound(targetRect.bottom()); -
535 -
536 if (tx2 < tx1)
evaluated: tx2 < tx1
TRUEFALSE
yes
Evaluation Count:6
yes
Evaluation Count:1352
6-1352
537 qSwap(tx2, tx1);
executed: qSwap(tx2, tx1);
Execution Count:6
6
538 if (ty2 < ty1)
evaluated: ty2 < ty1
TRUEFALSE
yes
Evaluation Count:6
yes
Evaluation Count:1352
6-1352
539 qSwap(ty2, ty1);
executed: qSwap(ty2, ty1);
Execution Count:6
6
540 -
541 if (tx1 < cx1)
partially evaluated: tx1 < cx1
TRUEFALSE
no
Evaluation Count:0
yes
Evaluation Count:1358
0-1358
542 tx1 = cx1;
never executed: tx1 = cx1;
0
543 if (tx2 >= cx2)
evaluated: tx2 >= cx2
TRUEFALSE
yes
Evaluation Count:1344
yes
Evaluation Count:14
14-1344
544 tx2 = cx2;
executed: tx2 = cx2;
Execution Count:1344
1344
545 -
546 if (tx1 >= tx2)
partially evaluated: tx1 >= tx2
TRUEFALSE
no
Evaluation Count:0
yes
Evaluation Count:1358
0-1358
547 return;
never executed: return;
0
548 -
549 if (ty1 < cy1)
partially evaluated: ty1 < cy1
TRUEFALSE
no
Evaluation Count:0
yes
Evaluation Count:1358
0-1358
550 ty1 = cy1;
never executed: ty1 = cy1;
0
551 if (ty2 >= cy2)
evaluated: ty2 >= cy2
TRUEFALSE
yes
Evaluation Count:22
yes
Evaluation Count:1336
22-1336
552 ty2 = cy2;
executed: ty2 = cy2;
Execution Count:22
22
553 if (ty1 >= ty2)
evaluated: ty1 >= ty2
TRUEFALSE
yes
Evaluation Count:449
yes
Evaluation Count:909
449-909
554 return;
executed: return;
Execution Count:449
449
555 -
556 int h = ty2 - ty1; -
557 int w = tx2 - tx1; -
558 -
559 quint32 basex; -
560 quint32 srcy; -
561 -
562 if (sx < 0) {
evaluated: sx < 0
TRUEFALSE
yes
Evaluation Count:6
yes
Evaluation Count:903
6-903
563 int dstx = qFloor((tx1 + qreal(0.5) - targetRect.right()) * ix) + 1; -
564 basex = quint32(sourceRect.right() * 65536) + dstx; -
565 } else {
executed: }
Execution Count:6
6
566 int dstx = qCeil((tx1 + qreal(0.5) - targetRect.left()) * ix) - 1; -
567 basex = quint32(sourceRect.left() * 65536) + dstx; -
568 }
executed: }
Execution Count:903
903
569 if (sy < 0) {
evaluated: sy < 0
TRUEFALSE
yes
Evaluation Count:6
yes
Evaluation Count:903
6-903
570 int dsty = qFloor((ty1 + qreal(0.5) - targetRect.bottom()) * iy) + 1; -
571 srcy = quint32(sourceRect.bottom() * 65536) + dsty; -
572 } else {
executed: }
Execution Count:6
6
573 int dsty = qCeil((ty1 + qreal(0.5) - targetRect.top()) * iy) - 1; -
574 srcy = quint32(sourceRect.top() * 65536) + dsty; -
575 }
executed: }
Execution Count:903
903
576 -
577 quint32 *dst = ((quint32 *) (destPixels + ty1 * dbpl)) + tx1; -
578 -
579 const __m128i nullVector = _mm_set1_epi32(0); -
580 const __m128i half = _mm_set1_epi16(0x80); -
581 const __m128i one = _mm_set1_epi16(0xff); -
582 const __m128i colorMask = _mm_set1_epi32(0x00ff00ff); -
583 const __m128i alphaMask = _mm_set1_epi32(0xff000000); -
584 const __m128i ixVector = _mm_set1_epi32(4*ix); -
585 -
586 -
587 while (h--) {
evaluated: h--
TRUEFALSE
yes
Evaluation Count:1764
yes
Evaluation Count:909
909-1764
588 const uint *src = (const quint32 *) (srcPixels + (srcy >> 16) * sbpl); -
589 int srcx = basex; -
590 int x = 0; -
591 -
592 for (; x < static_cast<int>(qMin(static_cast<quintptr>(w), ((4 - ((reinterpret_cast<quintptr>(dst) >> 2) & 0x3)) & 0x3))); ++x) {
evaluated: x < static_cast<int>(qMin(static_cast<quintptr>(w), ((4 - ((reinterpret_cast<quintptr>(dst) >> 2) & 0x3)) & 0x3)))
TRUEFALSE
yes
Evaluation Count:752
yes
Evaluation Count:1764
752-1764
593 uint s = src[srcx >> 16]; -
594 dst[x] = s + BYTE_MUL(dst[x], qAlpha(~s)); -
595 srcx += ix; -
596 }
executed: }
Execution Count:752
752
597 -
598 __m128i srcxVector = _mm_set_epi32(srcx, srcx + ix, srcx + ix + ix, srcx + ix + ix + ix); -
599 -
600 for (; x<w - 3; x += 4) {
evaluated: x<w - 3
TRUEFALSE
yes
Evaluation Count:40635
yes
Evaluation Count:1764
1764-40635
601 union Vect_buffer { __m128i vect; quint32 i[4]; }; -
602 Vect_buffer addr; -
603 addr.vect = _mm_srli_epi32(srcxVector, 16); -
604 srcxVector = _mm_add_epi32(srcxVector, ixVector); -
605 -
606 const __m128i srcVector = _mm_set_epi32(src[addr.i[0]], src[addr.i[1]], src[addr.i[2]], src[addr.i[3]]); -
607 { const __m128i srcVectorAlpha = _mm_and_si128(srcVector, alphaMask); if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, alphaMask)) == 0xffff) { _mm_store_si128((__m128i *)&dst[x], srcVector); } else if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, nullVector)) != 0xffff) { __m128i alphaChannel = _mm_srli_epi32(srcVector, 24); alphaChannel = _mm_or_si128(alphaChannel, _mm_slli_epi32(alphaChannel, 16)); alphaChannel = _mm_sub_epi16(one, alphaChannel); const __m128i dstVector = _mm_load_si128((__m128i *)&dst[x]); __m128i destMultipliedByOneMinusAlpha; { __m128i pixelVectorAG = _mm_srli_epi16(dstVector, 8); __m128i pixelVectorRB = _mm_and_si128(dstVector, colorMask); pixelVectorAG = _mm_mullo_epi16(pixelVectorAG, alphaChannel); pixelVectorRB = _mm_mullo_epi16(pixelVectorRB, alphaChannel); pixelVectorRB = _mm_add_epi16(pixelVectorRB, _mm_srli_epi16(pixelVectorRB, 8)); pixelVectorRB = _mm_add_epi16(pixelVectorRB, half); pixelVectorAG = _mm_add_epi16(pixelVectorAG, _mm_srli_epi16(pixelVectorAG, 8)); pixelVectorAG = _mm_add_epi16(pixelVectorAG, half); pixelVectorRB = _mm_srli_epi16(pixelVectorRB, 8); pixelVectorAG = _mm_andnot_si128(colorMask, pixelVectorAG); destMultipliedByOneMinusAlpha = _mm_or_si128(pixelVectorAG, pixelVectorRB); }; const __m128i result = _mm_add_epi8(srcVector, destMultipliedByOneMinusAlpha); _mm_store_si128((__m128i *)&dst[x], result); } };
executed: }
Execution Count:34682
executed: }
Execution Count:607
evaluated: _mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, nullVector)) != 0xffff
TRUEFALSE
yes
Evaluation Count:607
yes
Evaluation Count:5346
evaluated: _mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, alphaMask)) == 0xffff
TRUEFALSE
yes
Evaluation Count:34682
yes
Evaluation Count:5953
607-34682
608 } -
609 -
610 for (; x<w; x++) {
evaluated: x<w
TRUEFALSE
yes
Evaluation Count:970
yes
Evaluation Count:1764
970-1764
611 uint s = src[(basex + x*ix) >> 16]; -
612 dst[x] = s + BYTE_MUL(dst[x], qAlpha(~s)); -
613 }
executed: }
Execution Count:970
970
614 dst = (quint32 *)(((uchar *) dst) + dbpl); -
615 srcy += iy; -
616 }
executed: }
Execution Count:1764
1764
617}
executed: }
Execution Count:909
909
618 -
619 -
620 -
621 -
Switch to Source codePreprocessed file

Generated by Squish Coco Non-Commercial