Line | Source Code | Coverage |
---|
1 | | - |
2 | | - |
3 | | - |
4 | | - |
5 | | - |
6 | | - |
7 | | - |
8 | | - |
9 | void qt_blend_argb32_on_argb32_sse2(uchar *destPixels, int dbpl, | - |
10 | const uchar *srcPixels, int sbpl, | - |
11 | int w, int h, | - |
12 | int const_alpha) | - |
13 | { | - |
14 | const quint32 *src = (const quint32 *) srcPixels; | - |
15 | quint32 *dst = (quint32 *) destPixels; | - |
16 | if (const_alpha == 256) { never evaluated: const_alpha == 256 | 0 |
17 | const __m128i alphaMask = _mm_set1_epi32(0xff000000); | - |
18 | const __m128i nullVector = _mm_set1_epi32(0); | - |
19 | const __m128i half = _mm_set1_epi16(0x80); | - |
20 | const __m128i one = _mm_set1_epi16(0xff); | - |
21 | const __m128i colorMask = _mm_set1_epi32(0x00ff00ff); | - |
22 | for (int y = 0; y < h; ++y) { | 0 |
23 | { int x = 0; for (; x < static_cast<int>(qMin(static_cast<quintptr>(w), ((4 - ((reinterpret_cast<quintptr>(dst) >> 2) & 0x3)) & 0x3))); ++x) { uint s = src[x]; if (s >= 0xff000000) dst[x] = s; else if (s != 0) dst[x] = s + BYTE_MUL(dst[x], qAlpha(~s)); } for (; x < w-3; x += 4) { const __m128i srcVector = _mm_loadu_si128((__m128i *)&src[x]); { const __m128i srcVectorAlpha = _mm_and_si128(srcVector, alphaMask); if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, alphaMask)) == 0xffff) { _mm_store_si128((__m128i *)&dst[x], srcVector); } else if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, nullVector)) != 0xffff) { __m128i alphaChannel = _mm_srli_epi32(srcVector, 24); alphaChannel = _mm_or_si128(alphaChannel, _mm_slli_epi32(alphaChannel, 16)); alphaChannel = _mm_sub_epi16(one, alphaChannel); const __m128i dstVector = _mm_load_si128((__m128i *)&dst[x]); __m128i destMultipliedByOneMinusAlpha; { __m128i pixelVectorAG = _mm_srli_epi16(dstVector, 8); __m128i pixelVectorRB = _mm_and_si128(dstVector, colorMask); pixelVectorAG = _mm_mullo_epi16(pixelVectorAG, alphaChannel); pixelVectorRB = _mm_mullo_epi16(pixelVectorRB, alphaChannel); pixelVectorRB = _mm_add_epi16(pixelVectorRB, _mm_srli_epi16(pixelVectorRB, 8)); pixelVectorRB = _mm_add_epi16(pixelVectorRB, half); pixelVectorAG = _mm_add_epi16(pixelVectorAG, _mm_srli_epi16(pixelVectorAG, 8)); pixelVectorAG = _mm_add_epi16(pixelVectorAG, half); pixelVectorRB = _mm_srli_epi16(pixelVectorRB, 8); pixelVectorAG = _mm_andnot_si128(colorMask, pixelVectorAG); destMultipliedByOneMinusAlpha = _mm_or_si128(pixelVectorAG, pixelVectorRB); }; const __m128i result = _mm_add_epi8(srcVector, destMultipliedByOneMinusAlpha); _mm_store_si128((__m128i *)&dst[x], result); } } } for (; x < w; ++x) { uint s = src[x]; if (s >= 0xff000000) dst[x] = s; else if (s != 0) dst[x] = s + BYTE_MUL(dst[x], qAlpha(~s)); } }; never executed: dst[x] = s; never executed: dst[x] = s + BYTE_MUL(dst[x], qAlpha(~s)); never evaluated: _mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, nullVector)) != 0xffff never evaluated: s >= 0xff000000 never evaluated: _mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, alphaMask)) == 0xffff never evaluated: s >= 0xff000000 never evaluated: x < static_cast<int>(qMin(static_cast<quintptr>(w), ((4 - ((reinterpret_cast<quintptr>(dst) >> 2) & 0x3)) & 0x3))) never executed: dst[x] = s; never executed: dst[x] = s + BYTE_MUL(dst[x], qAlpha(~s)); | 0 |
24 | dst = (quint32 *)(((uchar *) dst) + dbpl); | - |
25 | src = (const quint32 *)(((const uchar *) src) + sbpl); | - |
26 | } | 0 |
27 | } else if (const_alpha != 0) { never evaluated: const_alpha != 0 | 0 |
28 | | - |
29 | | - |
30 | | - |
31 | const_alpha = (const_alpha * 255) >> 8; | - |
32 | const __m128i nullVector = _mm_set1_epi32(0); | - |
33 | const __m128i half = _mm_set1_epi16(0x80); | - |
34 | const __m128i one = _mm_set1_epi16(0xff); | - |
35 | const __m128i colorMask = _mm_set1_epi32(0x00ff00ff); | - |
36 | const __m128i constAlphaVector = _mm_set1_epi16(const_alpha); | - |
37 | for (int y = 0; y < h; ++y) { | 0 |
38 | { int x = 0; for (; x < static_cast<int>(qMin(static_cast<quintptr>(w), ((4 - ((reinterpret_cast<quintptr>(dst) >> 2) & 0x3)) & 0x3))); ++x) { quint32 s = src[x]; if (s != 0) { s = BYTE_MUL(s, const_alpha); dst[x] = s + BYTE_MUL(dst[x], qAlpha(~s)); } } for (; x < w-3; x += 4) { __m128i srcVector = _mm_loadu_si128((__m128i *)&src[x]); if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVector, nullVector)) != 0xffff) { { __m128i pixelVectorAG = _mm_srli_epi16(srcVector, 8); __m128i pixelVectorRB = _mm_and_si128(srcVector, colorMask); pixelVectorAG = _mm_mullo_epi16(pixelVectorAG, constAlphaVector); pixelVectorRB = _mm_mullo_epi16(pixelVectorRB, constAlphaVector); pixelVectorRB = _mm_add_epi16(pixelVectorRB, _mm_srli_epi16(pixelVectorRB, 8)); pixelVectorRB = _mm_add_epi16(pixelVectorRB, half); pixelVectorAG = _mm_add_epi16(pixelVectorAG, _mm_srli_epi16(pixelVectorAG, 8)); pixelVectorAG = _mm_add_epi16(pixelVectorAG, half); pixelVectorRB = _mm_srli_epi16(pixelVectorRB, 8); pixelVectorAG = _mm_andnot_si128(colorMask, pixelVectorAG); srcVector = _mm_or_si128(pixelVectorAG, pixelVectorRB); }; __m128i alphaChannel = _mm_srli_epi32(srcVector, 24); alphaChannel = _mm_or_si128(alphaChannel, _mm_slli_epi32(alphaChannel, 16)); alphaChannel = _mm_sub_epi16(one, alphaChannel); const __m128i dstVector = _mm_load_si128((__m128i *)&dst[x]); __m128i destMultipliedByOneMinusAlpha; { __m128i pixelVectorAG = _mm_srli_epi16(dstVector, 8); __m128i pixelVectorRB = _mm_and_si128(dstVector, colorMask); pixelVectorAG = _mm_mullo_epi16(pixelVectorAG, alphaChannel); pixelVectorRB = _mm_mullo_epi16(pixelVectorRB, alphaChannel); pixelVectorRB = _mm_add_epi16(pixelVectorRB, _mm_srli_epi16(pixelVectorRB, 8)); pixelVectorRB = _mm_add_epi16(pixelVectorRB, half); pixelVectorAG = _mm_add_epi16(pixelVectorAG, _mm_srli_epi16(pixelVectorAG, 8)); pixelVectorAG = _mm_add_epi16(pixelVectorAG, half); pixelVectorRB = _mm_srli_epi16(pixelVectorRB, 8); pixelVectorAG = _mm_andnot_si128(colorMask, pixelVectorAG); destMultipliedByOneMinusAlpha = _mm_or_si128(pixelVectorAG, pixelVectorRB); }; const __m128i result = _mm_add_epi8(srcVector, destMultipliedByOneMinusAlpha); _mm_store_si128((__m128i *)&dst[x], result); } } for (; x < w; ++x) { quint32 s = src[x]; if (s != 0) { s = BYTE_MUL(s, const_alpha); dst[x] = s + BYTE_MUL(dst[x], qAlpha(~s)); } } } never evaluated: _mm_movemask_epi8(_mm_cmpeq_epi32(srcVector, nullVector)) != 0xffff never evaluated: x < static_cast<int>(qMin(static_cast<quintptr>(w), ((4 - ((reinterpret_cast<quintptr>(dst) >> 2) & 0x3)) & 0x3))) | 0 |
39 | dst = (quint32 *)(((uchar *) dst) + dbpl); | - |
40 | src = (const quint32 *)(((const uchar *) src) + sbpl); | - |
41 | } | 0 |
42 | } | 0 |
43 | } | - |
44 | | - |
45 | | - |
46 | | - |
47 | void qt_blend_rgb32_on_rgb32(uchar *destPixels, int dbpl, | - |
48 | const uchar *srcPixels, int sbpl, | - |
49 | int w, int h, | - |
50 | int const_alpha); | - |
51 | | - |
52 | void qt_blend_rgb32_on_rgb32_sse2(uchar *destPixels, int dbpl, | - |
53 | const uchar *srcPixels, int sbpl, | - |
54 | int w, int h, | - |
55 | int const_alpha) | - |
56 | { | - |
57 | const quint32 *src = (const quint32 *) srcPixels; | - |
58 | quint32 *dst = (quint32 *) destPixels; | - |
59 | if (const_alpha != 256) { evaluated: const_alpha != 256 yes Evaluation Count:9 | yes Evaluation Count:40 |
| 9-40 |
60 | if (const_alpha != 0) { partially evaluated: const_alpha != 0 yes Evaluation Count:9 | no Evaluation Count:0 |
| 0-9 |
61 | const __m128i nullVector = _mm_set1_epi32(0); | - |
62 | const __m128i half = _mm_set1_epi16(0x80); | - |
63 | const __m128i colorMask = _mm_set1_epi32(0x00ff00ff); | - |
64 | | - |
65 | const_alpha = (const_alpha * 255) >> 8; | - |
66 | int one_minus_const_alpha = 255 - const_alpha; | - |
67 | const __m128i constAlphaVector = _mm_set1_epi16(const_alpha); | - |
68 | const __m128i oneMinusConstAlpha = _mm_set1_epi16(one_minus_const_alpha); | - |
69 | for (int y = 0; y < h; ++y) { evaluated: y < h yes Evaluation Count:958 | yes Evaluation Count:9 |
| 9-958 |
70 | int x = 0; | - |
71 | | - |
72 | | - |
73 | for (; x < static_cast<int>(qMin(static_cast<quintptr>(w), ((4 - ((reinterpret_cast<quintptr>(dst) >> 2) & 0x3)) & 0x3))); ++x) { evaluated: x < static_cast<int>(qMin(static_cast<quintptr>(w), ((4 - ((reinterpret_cast<quintptr>(dst) >> 2) & 0x3)) & 0x3))) yes Evaluation Count:2354 | yes Evaluation Count:958 |
| 958-2354 |
74 | dst[x] = INTERPOLATE_PIXEL_255(src[x], const_alpha, dst[x], one_minus_const_alpha); | - |
75 | } executed: } Execution Count:2354 | 2354 |
76 | | - |
77 | for (; x < w-3; x += 4) { evaluated: x < w-3 yes Evaluation Count:31868 | yes Evaluation Count:958 |
| 958-31868 |
78 | __m128i srcVector = _mm_loadu_si128((__m128i *)&src[x]); | - |
79 | if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVector, nullVector)) != 0xffff) { partially evaluated: _mm_movemask_epi8(_mm_cmpeq_epi32(srcVector, nullVector)) != 0xffff yes Evaluation Count:31868 | no Evaluation Count:0 |
| 0-31868 |
80 | const __m128i dstVector = _mm_load_si128((__m128i *)&dst[x]); | - |
81 | __m128i result; | - |
82 | { __m128i srcVectorAG = _mm_srli_epi16(srcVector, 8); __m128i dstVectorAG = _mm_srli_epi16(dstVector, 8); __m128i srcVectorAGalpha = _mm_mullo_epi16(srcVectorAG, constAlphaVector); __m128i dstVectorAGoneMinusAlphalpha = _mm_mullo_epi16(dstVectorAG, oneMinusConstAlpha); __m128i finalAG = _mm_add_epi16(srcVectorAGalpha, dstVectorAGoneMinusAlphalpha); finalAG = _mm_add_epi16(finalAG, _mm_srli_epi16(finalAG, 8)); finalAG = _mm_add_epi16(finalAG, half); finalAG = _mm_andnot_si128(colorMask, finalAG); __m128i srcVectorRB = _mm_and_si128(srcVector, colorMask); __m128i dstVectorRB = _mm_and_si128(dstVector, colorMask); __m128i srcVectorRBalpha = _mm_mullo_epi16(srcVectorRB, constAlphaVector); __m128i dstVectorRBoneMinusAlphalpha = _mm_mullo_epi16(dstVectorRB, oneMinusConstAlpha); __m128i finalRB = _mm_add_epi16(srcVectorRBalpha, dstVectorRBoneMinusAlphalpha); finalRB = _mm_add_epi16(finalRB, _mm_srli_epi16(finalRB, 8)); finalRB = _mm_add_epi16(finalRB, half); finalRB = _mm_srli_epi16(finalRB, 8); result = _mm_or_si128(finalAG, finalRB); }; | - |
83 | _mm_store_si128((__m128i *)&dst[x], result); | - |
84 | } executed: } Execution Count:31868 | 31868 |
85 | } executed: } Execution Count:31868 | 31868 |
86 | for (; x<w; ++x) { evaluated: x<w yes Evaluation Count:2354 | yes Evaluation Count:958 |
| 958-2354 |
87 | dst[x] = INTERPOLATE_PIXEL_255(src[x], const_alpha, dst[x], one_minus_const_alpha); | - |
88 | } executed: } Execution Count:2354 | 2354 |
89 | dst = (quint32 *)(((uchar *) dst) + dbpl); | - |
90 | src = (const quint32 *)(((const uchar *) src) + sbpl); | - |
91 | } executed: } Execution Count:958 | 958 |
92 | } executed: } Execution Count:9 | 9 |
93 | } else { executed: } Execution Count:9 | 9 |
94 | qt_blend_rgb32_on_rgb32(destPixels, dbpl, srcPixels, sbpl, w, h, const_alpha); | - |
95 | } executed: } Execution Count:40 | 40 |
96 | } | - |
97 | | - |
98 | void comp_func_SourceOver_sse2(uint *destPixels, const uint *srcPixels, int length, uint const_alpha) | - |
99 | { | - |
100 | qt_noop(); | - |
101 | | - |
102 | const quint32 *src = (const quint32 *) srcPixels; | - |
103 | quint32 *dst = (quint32 *) destPixels; | - |
104 | | - |
105 | const __m128i nullVector = _mm_set1_epi32(0); | - |
106 | const __m128i half = _mm_set1_epi16(0x80); | - |
107 | const __m128i one = _mm_set1_epi16(0xff); | - |
108 | const __m128i colorMask = _mm_set1_epi32(0x00ff00ff); | - |
109 | if (const_alpha == 255) { evaluated: const_alpha == 255 yes Evaluation Count:1136722 | yes Evaluation Count:63433 |
| 63433-1136722 |
110 | const __m128i alphaMask = _mm_set1_epi32(0xff000000); | - |
111 | { int x = 0; for (; x < static_cast<int>(qMin(static_cast<quintptr>(length), ((4 - ((reinterpret_cast<quintptr>(dst) >> 2) & 0x3)) & 0x3))); ++x) { uint s = src[x]; if (s >= 0xff000000) dst[x] = s; else if (s != 0) dst[x] = s + BYTE_MUL(dst[x], qAlpha(~s)); } for (; x < length-3; x += 4) { const __m128i srcVector = _mm_loadu_si128((__m128i *)&src[x]); { const __m128i srcVectorAlpha = _mm_and_si128(srcVector, alphaMask); if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, alphaMask)) == 0xffff) { _mm_store_si128((__m128i *)&dst[x], srcVector); } else if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, nullVector)) != 0xffff) { __m128i alphaChannel = _mm_srli_epi32(srcVector, 24); alphaChannel = _mm_or_si128(alphaChannel, _mm_slli_epi32(alphaChannel, 16)); alphaChannel = _mm_sub_epi16(one, alphaChannel); const __m128i dstVector = _mm_load_si128((__m128i *)&dst[x]); __m128i destMultipliedByOneMinusAlpha; { __m128i pixelVectorAG = _mm_srli_epi16(dstVector, 8); __m128i pixelVectorRB = _mm_and_si128(dstVector, colorMask); pixelVectorAG = _mm_mullo_epi16(pixelVectorAG, alphaChannel); pixelVectorRB = _mm_mullo_epi16(pixelVectorRB, alphaChannel); pixelVectorRB = _mm_add_epi16(pixelVectorRB, _mm_srli_epi16(pixelVectorRB, 8)); pixelVectorRB = _mm_add_epi16(pixelVectorRB, half); pixelVectorAG = _mm_add_epi16(pixelVectorAG, _mm_srli_epi16(pixelVectorAG, 8)); pixelVectorAG = _mm_add_epi16(pixelVectorAG, half); pixelVectorRB = _mm_srli_epi16(pixelVectorRB, 8); pixelVectorAG = _mm_andnot_si128(colorMask, pixelVectorAG); destMultipliedByOneMinusAlpha = _mm_or_si128(pixelVectorAG, pixelVectorRB); }; const __m128i result = _mm_add_epi8(srcVector, destMultipliedByOneMinusAlpha); _mm_store_si128((__m128i *)&dst[x], result); } } } for (; x < length; ++x) { uint s = src[x]; if (s >= 0xff000000) dst[x] = s; else if (s != 0) dst[x] = s + BYTE_MUL(dst[x], qAlpha(~s)); } }; evaluated: s != 0 yes Evaluation Count:56107 | yes Evaluation Count:11465 |
evaluated: _mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, nullVector)) != 0xffff yes Evaluation Count:2381974 | yes Evaluation Count:128158 |
evaluated: s != 0 yes Evaluation Count:26316 | yes Evaluation Count:150873 |
evaluated: s >= 0xff000000 yes Evaluation Count:11856 | yes Evaluation Count:67572 |
evaluated: _mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, alphaMask)) == 0xffff yes Evaluation Count:135056 | yes Evaluation Count:2510132 |
evaluated: s >= 0xff000000 yes Evaluation Count:157247 | yes Evaluation Count:177189 |
evaluated: x < static_cast<int>(qMin(static_cast<quintptr>(length), ((4 - ((reinterpret_cast<quintptr>(dst) >> 2) & 0x3)) & 0x3))) yes Evaluation Count:79428 | yes Evaluation Count:1136722 |
evaluated: x < length-3 yes Evaluation Count:2645188 | yes Evaluation Count:1136722 |
evaluated: x < length yes Evaluation Count:334436 | yes Evaluation Count:1136722 |
executed: dst[x] = s; Execution Count:11856 executed: dst[x] = s + BYTE_MUL(dst[x], qAlpha(~s)); Execution Count:56107 executed: } Execution Count:135056 executed: } Execution Count:2381974 executed: dst[x] = s; Execution Count:157247 executed: dst[x] = s + BYTE_MUL(dst[x], qAlpha(~s)); Execution Count:26316 | 11465-2645188 |
112 | } else { executed: } Execution Count:1136722 | 1136722 |
113 | const __m128i constAlphaVector = _mm_set1_epi16(const_alpha); | - |
114 | { int x = 0; for (; x < static_cast<int>(qMin(static_cast<quintptr>(length), ((4 - ((reinterpret_cast<quintptr>(dst) >> 2) & 0x3)) & 0x3))); ++x) { quint32 s = src[x]; if (s != 0) { s = BYTE_MUL(s, const_alpha); dst[x] = s + BYTE_MUL(dst[x], qAlpha(~s)); } } for (; x < length-3; x += 4) { __m128i srcVector = _mm_loadu_si128((__m128i *)&src[x]); if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVector, nullVector)) != 0xffff) { { __m128i pixelVectorAG = _mm_srli_epi16(srcVector, 8); __m128i pixelVectorRB = _mm_and_si128(srcVector, colorMask); pixelVectorAG = _mm_mullo_epi16(pixelVectorAG, constAlphaVector); pixelVectorRB = _mm_mullo_epi16(pixelVectorRB, constAlphaVector); pixelVectorRB = _mm_add_epi16(pixelVectorRB, _mm_srli_epi16(pixelVectorRB, 8)); pixelVectorRB = _mm_add_epi16(pixelVectorRB, half); pixelVectorAG = _mm_add_epi16(pixelVectorAG, _mm_srli_epi16(pixelVectorAG, 8)); pixelVectorAG = _mm_add_epi16(pixelVectorAG, half); pixelVectorRB = _mm_srli_epi16(pixelVectorRB, 8); pixelVectorAG = _mm_andnot_si128(colorMask, pixelVectorAG); srcVector = _mm_or_si128(pixelVectorAG, pixelVectorRB); }; __m128i alphaChannel = _mm_srli_epi32(srcVector, 24); alphaChannel = _mm_or_si128(alphaChannel, _mm_slli_epi32(alphaChannel, 16)); alphaChannel = _mm_sub_epi16(one, alphaChannel); const __m128i dstVector = _mm_load_si128((__m128i *)&dst[x]); __m128i destMultipliedByOneMinusAlpha; { __m128i pixelVectorAG = _mm_srli_epi16(dstVector, 8); __m128i pixelVectorRB = _mm_and_si128(dstVector, colorMask); pixelVectorAG = _mm_mullo_epi16(pixelVectorAG, alphaChannel); pixelVectorRB = _mm_mullo_epi16(pixelVectorRB, alphaChannel); pixelVectorRB = _mm_add_epi16(pixelVectorRB, _mm_srli_epi16(pixelVectorRB, 8)); pixelVectorRB = _mm_add_epi16(pixelVectorRB, half); pixelVectorAG = _mm_add_epi16(pixelVectorAG, _mm_srli_epi16(pixelVectorAG, 8)); pixelVectorAG = _mm_add_epi16(pixelVectorAG, half); pixelVectorRB = _mm_srli_epi16(pixelVectorRB, 8); pixelVectorAG = _mm_andnot_si128(colorMask, pixelVectorAG); destMultipliedByOneMinusAlpha = _mm_or_si128(pixelVectorAG, pixelVectorRB); }; const __m128i result = _mm_add_epi8(srcVector, destMultipliedByOneMinusAlpha); _mm_store_si128((__m128i *)&dst[x], result); } } for (; x < length; ++x) { quint32 s = src[x]; if (s != 0) { s = BYTE_MUL(s, const_alpha); dst[x] = s + BYTE_MUL(dst[x], qAlpha(~s)); } } }; evaluated: s != 0 yes Evaluation Count:39788 | yes Evaluation Count:916 |
partially evaluated: _mm_movemask_epi8(_mm_cmpeq_epi32(srcVector, nullVector)) != 0xffff yes Evaluation Count:1170552 | no Evaluation Count:0 |
evaluated: s != 0 yes Evaluation Count:144150 | yes Evaluation Count:311 |
evaluated: x < static_cast<int>(qMin(static_cast<quintptr>(length), ((4 - ((reinterpret_cast<quintptr>(dst) >> 2) & 0x3)) & 0x3))) yes Evaluation Count:40704 | yes Evaluation Count:63433 |
evaluated: x < length-3 yes Evaluation Count:1170552 | yes Evaluation Count:63433 |
evaluated: x < length yes Evaluation Count:144461 | yes Evaluation Count:63433 |
executed: } Execution Count:39788 executed: } Execution Count:40704 executed: } Execution Count:1170552 executed: } Execution Count:1170552 executed: } Execution Count:144150 executed: } Execution Count:144461 | 0-1170552 |
115 | } executed: } Execution Count:63433 | 63433 |
116 | } | - |
117 | | - |
118 | void comp_func_Plus_sse2(uint *dst, const uint *src, int length, uint const_alpha) | - |
119 | { | - |
120 | int x = 0; | - |
121 | | - |
122 | if (const_alpha == 255) { never evaluated: const_alpha == 255 | 0 |
123 | | - |
124 | for (; x < static_cast<int>(qMin(static_cast<quintptr>(length), ((4 - ((reinterpret_cast<quintptr>(dst) >> 2) & 0x3)) & 0x3))); ++x) never evaluated: x < static_cast<int>(qMin(static_cast<quintptr>(length), ((4 - ((reinterpret_cast<quintptr>(dst) >> 2) & 0x3)) & 0x3))) | 0 |
125 | dst[x] = comp_func_Plus_one_pixel(dst[x], src[x]); never executed: dst[x] = comp_func_Plus_one_pixel(dst[x], src[x]); | 0 |
126 | | - |
127 | | - |
128 | for (; x < length - 3; x += 4) { never evaluated: x < length - 3 | 0 |
129 | const __m128i srcVector = _mm_loadu_si128((__m128i *)&src[x]); | - |
130 | const __m128i dstVector = _mm_load_si128((__m128i *)&dst[x]); | - |
131 | | - |
132 | const __m128i result = _mm_adds_epu8(srcVector, dstVector); | - |
133 | _mm_store_si128((__m128i *)&dst[x], result); | - |
134 | } | 0 |
135 | | - |
136 | | - |
137 | for (; x < length; ++x) never evaluated: x < length | 0 |
138 | dst[x] = comp_func_Plus_one_pixel(dst[x], src[x]); never executed: dst[x] = comp_func_Plus_one_pixel(dst[x], src[x]); | 0 |
139 | } else { | 0 |
140 | const int one_minus_const_alpha = 255 - const_alpha; | - |
141 | const __m128i constAlphaVector = _mm_set1_epi16(const_alpha); | - |
142 | const __m128i oneMinusConstAlpha = _mm_set1_epi16(one_minus_const_alpha); | - |
143 | | - |
144 | | - |
145 | for (; x < static_cast<int>(qMin(static_cast<quintptr>(length), ((4 - ((reinterpret_cast<quintptr>(dst) >> 2) & 0x3)) & 0x3))); ++x) never evaluated: x < static_cast<int>(qMin(static_cast<quintptr>(length), ((4 - ((reinterpret_cast<quintptr>(dst) >> 2) & 0x3)) & 0x3))) | 0 |
146 | dst[x] = comp_func_Plus_one_pixel_const_alpha(dst[x], src[x], const_alpha, one_minus_const_alpha); never executed: dst[x] = comp_func_Plus_one_pixel_const_alpha(dst[x], src[x], const_alpha, one_minus_const_alpha); | 0 |
147 | | - |
148 | const __m128i half = _mm_set1_epi16(0x80); | - |
149 | const __m128i colorMask = _mm_set1_epi32(0x00ff00ff); | - |
150 | | - |
151 | for (; x < length - 3; x += 4) { never evaluated: x < length - 3 | 0 |
152 | const __m128i srcVector = _mm_loadu_si128((__m128i *)&src[x]); | - |
153 | const __m128i dstVector = _mm_load_si128((__m128i *)&dst[x]); | - |
154 | | - |
155 | __m128i result = _mm_adds_epu8(srcVector, dstVector); | - |
156 | { __m128i srcVectorAG = _mm_srli_epi16(result, 8); __m128i dstVectorAG = _mm_srli_epi16(dstVector, 8); __m128i srcVectorAGalpha = _mm_mullo_epi16(srcVectorAG, constAlphaVector); __m128i dstVectorAGoneMinusAlphalpha = _mm_mullo_epi16(dstVectorAG, oneMinusConstAlpha); __m128i finalAG = _mm_add_epi16(srcVectorAGalpha, dstVectorAGoneMinusAlphalpha); finalAG = _mm_add_epi16(finalAG, _mm_srli_epi16(finalAG, 8)); finalAG = _mm_add_epi16(finalAG, half); finalAG = _mm_andnot_si128(colorMask, finalAG); __m128i srcVectorRB = _mm_and_si128(result, colorMask); __m128i dstVectorRB = _mm_and_si128(dstVector, colorMask); __m128i srcVectorRBalpha = _mm_mullo_epi16(srcVectorRB, constAlphaVector); __m128i dstVectorRBoneMinusAlphalpha = _mm_mullo_epi16(dstVectorRB, oneMinusConstAlpha); __m128i finalRB = _mm_add_epi16(srcVectorRBalpha, dstVectorRBoneMinusAlphalpha); finalRB = _mm_add_epi16(finalRB, _mm_srli_epi16(finalRB, 8)); finalRB = _mm_add_epi16(finalRB, half); finalRB = _mm_srli_epi16(finalRB, 8); result = _mm_or_si128(finalAG, finalRB); } | - |
157 | _mm_store_si128((__m128i *)&dst[x], result); | - |
158 | } | 0 |
159 | | - |
160 | | - |
161 | for (; x < length; ++x) never evaluated: x < length | 0 |
162 | dst[x] = comp_func_Plus_one_pixel_const_alpha(dst[x], src[x], const_alpha, one_minus_const_alpha); never executed: dst[x] = comp_func_Plus_one_pixel_const_alpha(dst[x], src[x], const_alpha, one_minus_const_alpha); | 0 |
163 | } | 0 |
164 | } | - |
165 | | - |
166 | void comp_func_Source_sse2(uint *dst, const uint *src, int length, uint const_alpha) | - |
167 | { | - |
168 | if (const_alpha == 255) { evaluated: const_alpha == 255 yes Evaluation Count:63722 | yes Evaluation Count:3248 |
| 3248-63722 |
169 | ::memcpy(dst, src, length * sizeof(uint)); | - |
170 | } else { executed: } Execution Count:63728 | 63728 |
171 | const int ialpha = 255 - const_alpha; | - |
172 | | - |
173 | int x = 0; | - |
174 | | - |
175 | | - |
176 | for (; x < static_cast<int>(qMin(static_cast<quintptr>(length), ((4 - ((reinterpret_cast<quintptr>(dst) >> 2) & 0x3)) & 0x3))); ++x) evaluated: x < static_cast<int>(qMin(static_cast<quintptr>(length), ((4 - ((reinterpret_cast<quintptr>(dst) >> 2) & 0x3)) & 0x3))) yes Evaluation Count:2425 | yes Evaluation Count:3248 |
| 2425-3248 |
177 | dst[x] = INTERPOLATE_PIXEL_255(src[x], const_alpha, dst[x], ialpha); executed: dst[x] = INTERPOLATE_PIXEL_255(src[x], const_alpha, dst[x], ialpha); Execution Count:2425 | 2425 |
178 | | - |
179 | | - |
180 | const __m128i half = _mm_set1_epi16(0x80); | - |
181 | const __m128i colorMask = _mm_set1_epi32(0x00ff00ff); | - |
182 | const __m128i constAlphaVector = _mm_set1_epi16(const_alpha); | - |
183 | const __m128i oneMinusConstAlpha = _mm_set1_epi16(ialpha); | - |
184 | for (; x < length - 3; x += 4) { partially evaluated: x < length - 3 no Evaluation Count:0 | yes Evaluation Count:3248 |
| 0-3248 |
185 | const __m128i srcVector = _mm_loadu_si128((__m128i *)&src[x]); | - |
186 | __m128i dstVector = _mm_load_si128((__m128i *)&dst[x]); | - |
187 | { __m128i srcVectorAG = _mm_srli_epi16(srcVector, 8); __m128i dstVectorAG = _mm_srli_epi16(dstVector, 8); __m128i srcVectorAGalpha = _mm_mullo_epi16(srcVectorAG, constAlphaVector); __m128i dstVectorAGoneMinusAlphalpha = _mm_mullo_epi16(dstVectorAG, oneMinusConstAlpha); __m128i finalAG = _mm_add_epi16(srcVectorAGalpha, dstVectorAGoneMinusAlphalpha); finalAG = _mm_add_epi16(finalAG, _mm_srli_epi16(finalAG, 8)); finalAG = _mm_add_epi16(finalAG, half); finalAG = _mm_andnot_si128(colorMask, finalAG); __m128i srcVectorRB = _mm_and_si128(srcVector, colorMask); __m128i dstVectorRB = _mm_and_si128(dstVector, colorMask); __m128i srcVectorRBalpha = _mm_mullo_epi16(srcVectorRB, constAlphaVector); __m128i dstVectorRBoneMinusAlphalpha = _mm_mullo_epi16(dstVectorRB, oneMinusConstAlpha); __m128i finalRB = _mm_add_epi16(srcVectorRBalpha, dstVectorRBoneMinusAlphalpha); finalRB = _mm_add_epi16(finalRB, _mm_srli_epi16(finalRB, 8)); finalRB = _mm_add_epi16(finalRB, half); finalRB = _mm_srli_epi16(finalRB, 8); dstVector = _mm_or_si128(finalAG, finalRB); } | - |
188 | _mm_store_si128((__m128i *)&dst[x], dstVector); | - |
189 | } | 0 |
190 | | - |
191 | | - |
192 | for (; x < length; ++x) evaluated: x < length yes Evaluation Count:835 | yes Evaluation Count:3248 |
| 835-3248 |
193 | dst[x] = INTERPOLATE_PIXEL_255(src[x], const_alpha, dst[x], ialpha); executed: dst[x] = INTERPOLATE_PIXEL_255(src[x], const_alpha, dst[x], ialpha); Execution Count:835 | 835 |
194 | } executed: } Execution Count:3248 | 3248 |
195 | } | - |
196 | | - |
197 | void qt_memfill32_sse2(quint32 *dest, quint32 value, int count) | - |
198 | { | - |
199 | if (count < 7) { evaluated: count < 7 yes Evaluation Count:862603 | yes Evaluation Count:1366744 |
| 862603-1366744 |
200 | switch (count) { | - |
201 | case 6: *dest++ = value; | - |
202 | case 5: *dest++ = value; code before this statement executed: case 5: Execution Count:161820 | 161820 |
203 | case 4: *dest++ = value; code before this statement executed: case 4: Execution Count:189515 | 189515 |
204 | case 3: *dest++ = value; code before this statement executed: case 3: Execution Count:215554 | 215554 |
205 | case 2: *dest++ = value; code before this statement executed: case 2: Execution Count:241583 | 241583 |
206 | case 1: *dest = value; code before this statement executed: case 1: Execution Count:323948 | 323948 |
207 | } executed: } Execution Count:862603 | 862603 |
208 | return; executed: return; Execution Count:862603 | 862603 |
209 | }; | - |
210 | | - |
211 | const int align = (quintptr)(dest) & 0xf; | - |
212 | switch (align) { | - |
213 | case 4: *dest++ = value; --count; | - |
214 | case 8: *dest++ = value; --count; code before this statement executed: case 8: Execution Count:473513 | 473513 |
215 | case 12: *dest++ = value; --count; code before this statement executed: case 12: Execution Count:713293 | 713293 |
216 | } executed: } Execution Count:1036622 | 1036622 |
217 | | - |
218 | int count128 = count / 4; | - |
219 | __m128i *dst128 = reinterpret_cast<__m128i*>(dest); | - |
220 | const __m128i value128 = _mm_set_epi32(value, value, value, value); | - |
221 | | - |
222 | int n = (count128 + 3) / 4; | - |
223 | switch (count128 & 0x3) { | - |
224 | case 0: do { _mm_stream_si128(dst128++, value128); | - |
225 | case 3: _mm_stream_si128(dst128++, value128); code before this statement executed: case 3: Execution Count:14972429 | 14972429 |
226 | case 2: _mm_stream_si128(dst128++, value128); code before this statement executed: case 2: Execution Count:15192949 | 15192949 |
227 | case 1: _mm_stream_si128(dst128++, value128); code before this statement executed: case 1: Execution Count:15564464 | 15564464 |
228 | } while (--n > 0); evaluated: --n > 0 yes Evaluation Count:14635070 | yes Evaluation Count:1366744 |
executed: } Execution Count:16001814 | 1366744-16001814 |
229 | } executed: } Execution Count:1366744 | 1366744 |
230 | | - |
231 | const int rest = count & 0x3; | - |
232 | if (rest) { evaluated: rest yes Evaluation Count:1091376 | yes Evaluation Count:275368 |
| 275368-1091376 |
233 | switch (rest) { | - |
234 | case 3: dest[count - 3] = value; | - |
235 | case 2: dest[count - 2] = value; code before this statement executed: case 2: Execution Count:543520 | 543520 |
236 | case 1: dest[count - 1] = value; code before this statement executed: case 1: Execution Count:811996 | 811996 |
237 | } executed: } Execution Count:1091376 | 1091376 |
238 | } executed: } Execution Count:1091376 | 1091376 |
239 | } executed: } Execution Count:1366744 | 1366744 |
240 | | - |
241 | void comp_func_solid_SourceOver_sse2(uint *destPixels, int length, uint color, uint const_alpha) | - |
242 | { | - |
243 | if ((const_alpha & qAlpha(color)) == 255) { evaluated: (const_alpha & qAlpha(color)) == 255 yes Evaluation Count:2037 | yes Evaluation Count:36156 |
| 2037-36156 |
244 | qt_memfill32_sse2(destPixels, color, length); | - |
245 | } else { executed: } Execution Count:2037 | 2037 |
246 | if (const_alpha != 255) evaluated: const_alpha != 255 yes Evaluation Count:18265 | yes Evaluation Count:17891 |
| 17891-18265 |
247 | color = BYTE_MUL(color, const_alpha); executed: color = BYTE_MUL(color, const_alpha); Execution Count:18265 | 18265 |
248 | | - |
249 | const quint32 minusAlphaOfColor = qAlpha(~color); | - |
250 | int x = 0; | - |
251 | | - |
252 | quint32 *dst = (quint32 *) destPixels; | - |
253 | const __m128i colorVector = _mm_set1_epi32(color); | - |
254 | const __m128i colorMask = _mm_set1_epi32(0x00ff00ff); | - |
255 | const __m128i half = _mm_set1_epi16(0x80); | - |
256 | const __m128i minusAlphaOfColorVector = _mm_set1_epi16(minusAlphaOfColor); | - |
257 | | - |
258 | for (; x < static_cast<int>(qMin(static_cast<quintptr>(length), ((4 - ((reinterpret_cast<quintptr>(dst) >> 2) & 0x3)) & 0x3))); ++x) evaluated: x < static_cast<int>(qMin(static_cast<quintptr>(length), ((4 - ((reinterpret_cast<quintptr>(dst) >> 2) & 0x3)) & 0x3))) yes Evaluation Count:32425 | yes Evaluation Count:36156 |
| 32425-36156 |
259 | destPixels[x] = color + BYTE_MUL(destPixels[x], minusAlphaOfColor); executed: destPixels[x] = color + BYTE_MUL(destPixels[x], minusAlphaOfColor); Execution Count:32425 | 32425 |
260 | | - |
261 | for (; x < length-3; x += 4) { evaluated: x < length-3 yes Evaluation Count:172971 | yes Evaluation Count:36156 |
| 36156-172971 |
262 | __m128i dstVector = _mm_load_si128((__m128i *)&dst[x]); | - |
263 | { __m128i pixelVectorAG = _mm_srli_epi16(dstVector, 8); __m128i pixelVectorRB = _mm_and_si128(dstVector, colorMask); pixelVectorAG = _mm_mullo_epi16(pixelVectorAG, minusAlphaOfColorVector); pixelVectorRB = _mm_mullo_epi16(pixelVectorRB, minusAlphaOfColorVector); pixelVectorRB = _mm_add_epi16(pixelVectorRB, _mm_srli_epi16(pixelVectorRB, 8)); pixelVectorRB = _mm_add_epi16(pixelVectorRB, half); pixelVectorAG = _mm_add_epi16(pixelVectorAG, _mm_srli_epi16(pixelVectorAG, 8)); pixelVectorAG = _mm_add_epi16(pixelVectorAG, half); pixelVectorRB = _mm_srli_epi16(pixelVectorRB, 8); pixelVectorAG = _mm_andnot_si128(colorMask, pixelVectorAG); dstVector = _mm_or_si128(pixelVectorAG, pixelVectorRB); }; | - |
264 | dstVector = _mm_add_epi8(colorVector, dstVector); | - |
265 | _mm_store_si128((__m128i *)&dst[x], dstVector); | - |
266 | } executed: } Execution Count:172971 | 172971 |
267 | for (;x < length; ++x) evaluated: x < length yes Evaluation Count:18654 | yes Evaluation Count:36156 |
| 18654-36156 |
268 | destPixels[x] = color + BYTE_MUL(destPixels[x], minusAlphaOfColor); executed: destPixels[x] = color + BYTE_MUL(destPixels[x], minusAlphaOfColor); Execution Count:18654 | 18654 |
269 | } executed: } Execution Count:36156 | 36156 |
270 | } | - |
271 | | - |
272 | | - |
273 | CompositionFunctionSolid qt_functionForModeSolid_SSE2[numCompositionFunctions] = { | - |
274 | comp_func_solid_SourceOver_sse2, | - |
275 | comp_func_solid_DestinationOver, | - |
276 | comp_func_solid_Clear, | - |
277 | comp_func_solid_Source, | - |
278 | comp_func_solid_Destination, | - |
279 | comp_func_solid_SourceIn, | - |
280 | comp_func_solid_DestinationIn, | - |
281 | comp_func_solid_SourceOut, | - |
282 | comp_func_solid_DestinationOut, | - |
283 | comp_func_solid_SourceAtop, | - |
284 | comp_func_solid_DestinationAtop, | - |
285 | comp_func_solid_XOR, | - |
286 | comp_func_solid_Plus, | - |
287 | comp_func_solid_Multiply, | - |
288 | comp_func_solid_Screen, | - |
289 | comp_func_solid_Overlay, | - |
290 | comp_func_solid_Darken, | - |
291 | comp_func_solid_Lighten, | - |
292 | comp_func_solid_ColorDodge, | - |
293 | comp_func_solid_ColorBurn, | - |
294 | comp_func_solid_HardLight, | - |
295 | comp_func_solid_SoftLight, | - |
296 | comp_func_solid_Difference, | - |
297 | comp_func_solid_Exclusion, | - |
298 | rasterop_solid_SourceOrDestination, | - |
299 | rasterop_solid_SourceAndDestination, | - |
300 | rasterop_solid_SourceXorDestination, | - |
301 | rasterop_solid_NotSourceAndNotDestination, | - |
302 | rasterop_solid_NotSourceOrNotDestination, | - |
303 | rasterop_solid_NotSourceXorDestination, | - |
304 | rasterop_solid_NotSource, | - |
305 | rasterop_solid_NotSourceAndDestination, | - |
306 | rasterop_solid_SourceAndNotDestination, | - |
307 | rasterop_solid_NotSourceOrDestination, | - |
308 | rasterop_solid_SourceOrNotDestination, | - |
309 | rasterop_solid_ClearDestination, | - |
310 | rasterop_solid_SetDestination, | - |
311 | rasterop_solid_NotDestination | - |
312 | }; | - |
313 | | - |
314 | CompositionFunction qt_functionForMode_SSE2[numCompositionFunctions] = { | - |
315 | comp_func_SourceOver_sse2, | - |
316 | comp_func_DestinationOver, | - |
317 | comp_func_Clear, | - |
318 | comp_func_Source_sse2, | - |
319 | comp_func_Destination, | - |
320 | comp_func_SourceIn, | - |
321 | comp_func_DestinationIn, | - |
322 | comp_func_SourceOut, | - |
323 | comp_func_DestinationOut, | - |
324 | comp_func_SourceAtop, | - |
325 | comp_func_DestinationAtop, | - |
326 | comp_func_XOR, | - |
327 | comp_func_Plus_sse2, | - |
328 | comp_func_Multiply, | - |
329 | comp_func_Screen, | - |
330 | comp_func_Overlay, | - |
331 | comp_func_Darken, | - |
332 | comp_func_Lighten, | - |
333 | comp_func_ColorDodge, | - |
334 | comp_func_ColorBurn, | - |
335 | comp_func_HardLight, | - |
336 | comp_func_SoftLight, | - |
337 | comp_func_Difference, | - |
338 | comp_func_Exclusion, | - |
339 | rasterop_SourceOrDestination, | - |
340 | rasterop_SourceAndDestination, | - |
341 | rasterop_SourceXorDestination, | - |
342 | rasterop_NotSourceAndNotDestination, | - |
343 | rasterop_NotSourceOrNotDestination, | - |
344 | rasterop_NotSourceXorDestination, | - |
345 | rasterop_NotSource, | - |
346 | rasterop_NotSourceAndDestination, | - |
347 | rasterop_SourceAndNotDestination, | - |
348 | rasterop_NotSourceOrDestination, | - |
349 | rasterop_SourceOrNotDestination, | - |
350 | rasterop_ClearDestination, | - |
351 | rasterop_SetDestination, | - |
352 | rasterop_NotDestination | - |
353 | }; | - |
354 | | - |
355 | | - |
356 | void qt_memfill16_sse2(quint16 *dest, quint16 value, int count) | - |
357 | { | - |
358 | if (count < 3) { evaluated: count < 3 yes Evaluation Count:7044494 | yes Evaluation Count:1413641 |
| 1413641-7044494 |
359 | switch (count) { | - |
360 | case 2: *dest++ = value; | - |
361 | case 1: *dest = value; code before this statement executed: case 1: Execution Count:425755 | 425755 |
362 | } executed: } Execution Count:7044494 | 7044494 |
363 | return; executed: return; Execution Count:7044494 | 7044494 |
364 | } | - |
365 | | - |
366 | const int align = (quintptr)(dest) & 0x3; | - |
367 | switch (align) { | - |
368 | case 2: *dest++ = value; --count; | - |
369 | } executed: } Execution Count:235663 | 235663 |
370 | | - |
371 | const quint32 value32 = (value << 16) | value; | - |
372 | qt_memfill32_sse2(reinterpret_cast<quint32*>(dest), value32, count / 2); | - |
373 | | - |
374 | if (count & 0x1) evaluated: count & 0x1 yes Evaluation Count:287920 | yes Evaluation Count:1125721 |
| 287920-1125721 |
375 | dest[count - 1] = value; executed: dest[count - 1] = value; Execution Count:287920 | 287920 |
376 | } executed: } Execution Count:1413641 | 1413641 |
377 | | - |
378 | void qt_bitmapblit32_sse2(QRasterBuffer *rasterBuffer, int x, int y, | - |
379 | quint32 color, | - |
380 | const uchar *src, int width, int height, int stride) | - |
381 | { | - |
382 | quint32 *dest = reinterpret_cast<quint32*>(rasterBuffer->scanLine(y)) + x; | - |
383 | const int destStride = rasterBuffer->bytesPerLine() / sizeof(quint32); | - |
384 | | - |
385 | const __m128i c128 = _mm_set1_epi32(color); | - |
386 | const __m128i maskmask1 = _mm_set_epi32(0x10101010, 0x20202020, | - |
387 | 0x40404040, 0x80808080); | - |
388 | const __m128i maskadd1 = _mm_set_epi32(0x70707070, 0x60606060, | - |
389 | 0x40404040, 0x00000000); | - |
390 | | - |
391 | if (width > 4) { evaluated: width > 4 yes Evaluation Count:302 | yes Evaluation Count:68 |
| 68-302 |
392 | const __m128i maskmask2 = _mm_set_epi32(0x01010101, 0x02020202, | - |
393 | 0x04040404, 0x08080808); | - |
394 | const __m128i maskadd2 = _mm_set_epi32(0x7f7f7f7f, 0x7e7e7e7e, | - |
395 | 0x7c7c7c7c, 0x78787878); | - |
396 | while (height--) { evaluated: height-- yes Evaluation Count:2738 | yes Evaluation Count:302 |
| 302-2738 |
397 | for (int x = 0; x < width; x += 8) { evaluated: x < width yes Evaluation Count:2754 | yes Evaluation Count:2738 |
| 2738-2754 |
398 | const quint8 s = src[x >> 3]; | - |
399 | if (!s) evaluated: !s yes Evaluation Count:376 | yes Evaluation Count:2378 |
| 376-2378 |
400 | continue; executed: continue; Execution Count:376 | 376 |
401 | __m128i mask1 = _mm_set1_epi8(s); | - |
402 | __m128i mask2 = mask1; | - |
403 | | - |
404 | mask1 = _mm_and_si128(mask1, maskmask1); | - |
405 | mask1 = _mm_add_epi8(mask1, maskadd1); | - |
406 | _mm_maskmoveu_si128(c128, mask1, (char*)(dest + x)); | - |
407 | mask2 = _mm_and_si128(mask2, maskmask2); | - |
408 | mask2 = _mm_add_epi8(mask2, maskadd2); | - |
409 | _mm_maskmoveu_si128(c128, mask2, (char*)(dest + x + 4)); | - |
410 | } executed: } Execution Count:2378 | 2378 |
411 | dest += destStride; | - |
412 | src += stride; | - |
413 | } executed: } Execution Count:2738 | 2738 |
414 | } else { executed: } Execution Count:302 | 302 |
415 | while (height--) { evaluated: height-- yes Evaluation Count:660 | yes Evaluation Count:68 |
| 68-660 |
416 | const quint8 s = *src; | - |
417 | if (s) { evaluated: s yes Evaluation Count:620 | yes Evaluation Count:40 |
| 40-620 |
418 | __m128i mask1 = _mm_set1_epi8(s); | - |
419 | mask1 = _mm_and_si128(mask1, maskmask1); | - |
420 | mask1 = _mm_add_epi8(mask1, maskadd1); | - |
421 | _mm_maskmoveu_si128(c128, mask1, (char*)(dest)); | - |
422 | } executed: } Execution Count:620 | 620 |
423 | dest += destStride; | - |
424 | src += stride; | - |
425 | } executed: } Execution Count:660 | 660 |
426 | } executed: } Execution Count:68 | 68 |
427 | } | - |
428 | | - |
429 | void qt_bitmapblit16_sse2(QRasterBuffer *rasterBuffer, int x, int y, | - |
430 | quint32 color, | - |
431 | const uchar *src, int width, int height, int stride) | - |
432 | { | - |
433 | const quint16 c = qConvertRgb32To16(color); | - |
434 | quint16 *dest = reinterpret_cast<quint16*>(rasterBuffer->scanLine(y)) + x; | - |
435 | const int destStride = rasterBuffer->bytesPerLine() / sizeof(quint16); | - |
436 | | - |
437 | const __m128i c128 = _mm_set1_epi16(c); | - |
438 | | - |
439 | | - |
440 | | - |
441 | const __m128i maskmask = _mm_set_epi16(0x0101, 0x0202, 0x0404, 0x0808, | - |
442 | 0x1010, 0x2020, 0x4040, 0x8080); | - |
443 | const __m128i maskadd = _mm_set_epi16(0x7f7f, 0x7e7e, 0x7c7c, 0x7878, | - |
444 | 0x7070, 0x6060, 0x4040, 0x0000); | - |
445 | | - |
446 | while (height--) { never evaluated: height-- | 0 |
447 | for (int x = 0; x < width; x += 8) { never evaluated: x < width | 0 |
448 | const quint8 s = src[x >> 3]; | - |
449 | if (!s) | 0 |
450 | continue; never executed: continue; | 0 |
451 | __m128i mask = _mm_set1_epi8(s); | - |
452 | mask = _mm_and_si128(mask, maskmask); | - |
453 | mask = _mm_add_epi8(mask, maskadd); | - |
454 | _mm_maskmoveu_si128(c128, mask, (char*)(dest + x)); | - |
455 | } | 0 |
456 | dest += destStride; | - |
457 | src += stride; | - |
458 | } | 0 |
459 | } | 0 |
460 | | - |
461 | class QSimdSse2 | - |
462 | { | - |
463 | public: | - |
464 | typedef __m128i Int32x4; | - |
465 | typedef __m128 Float32x4; | - |
466 | | - |
467 | union Vect_buffer_i { Int32x4 v; int i[4]; }; | - |
468 | union Vect_buffer_f { Float32x4 v; float f[4]; }; | - |
469 | | - |
470 | static inline Float32x4 v_dup(float x) { return _mm_set1_ps(x); } executed: return _mm_set1_ps(x); Execution Count:86307 | 86307 |
471 | static inline Float32x4 v_dup(double x) { return _mm_set1_ps(x); } executed: return _mm_set1_ps(x); Execution Count:143845 | 143845 |
472 | static inline Int32x4 v_dup(int x) { return _mm_set1_epi32(x); } executed: return _mm_set1_epi32(x); Execution Count:28769 | 28769 |
473 | static inline Int32x4 v_dup(uint x) { return _mm_set1_epi32(x); } executed: return _mm_set1_epi32(x); Execution Count:57538 | 57538 |
474 | | - |
475 | static inline Float32x4 v_add(Float32x4 a, Float32x4 b) { return _mm_add_ps(a, b); } executed: return _mm_add_ps(a, b); Execution Count:4176636 | 4176636 |
476 | static inline Int32x4 v_add(Int32x4 a, Int32x4 b) { return _mm_add_epi32(a, b); } never executed: return _mm_add_epi32(a, b); | 0 |
477 | | - |
478 | static inline Float32x4 v_max(Float32x4 a, Float32x4 b) { return _mm_max_ps(a, b); } executed: return _mm_max_ps(a, b); Execution Count:760424 | 760424 |
479 | static inline Float32x4 v_min(Float32x4 a, Float32x4 b) { return _mm_min_ps(a, b); } executed: return _mm_min_ps(a, b); Execution Count:64318 | 64318 |
480 | static inline Int32x4 v_min_16(Int32x4 a, Int32x4 b) { return _mm_min_epi16(a, b); } never executed: return _mm_min_epi16(a, b); | 0 |
481 | | - |
482 | static inline Int32x4 v_and(Int32x4 a, Int32x4 b) { return _mm_and_si128(a, b); } executed: return _mm_and_si128(a, b); Execution Count:1327894 | 1327894 |
483 | | - |
484 | static inline Float32x4 v_sub(Float32x4 a, Float32x4 b) { return _mm_sub_ps(a, b); } executed: return _mm_sub_ps(a, b); Execution Count:696106 | 696106 |
485 | static inline Int32x4 v_sub(Int32x4 a, Int32x4 b) { return _mm_sub_epi32(a, b); } never executed: return _mm_sub_epi32(a, b); | 0 |
486 | | - |
487 | static inline Float32x4 v_mul(Float32x4 a, Float32x4 b) { return _mm_mul_ps(a, b); } executed: return _mm_mul_ps(a, b); Execution Count:1392212 | 1392212 |
488 | | - |
489 | static inline Float32x4 v_sqrt(Float32x4 x) { return _mm_sqrt_ps(x); } executed: return _mm_sqrt_ps(x); Execution Count:696106 | 696106 |
490 | | - |
491 | static inline Int32x4 v_toInt(Float32x4 x) { return _mm_cvttps_epi32(x); } executed: return _mm_cvttps_epi32(x); Execution Count:696106 | 696106 |
492 | static inline Int32x4 v_greaterOrEqual(Float32x4 a, Float32x4 b) { return _mm_castps_si128(_mm_cmpgt_ps(a, b)); } executed: return _mm_castps_si128(_mm_cmpgt_ps(a, b)); Execution Count:1392212 | 1392212 |
493 | | - |
494 | }; | - |
495 | | - |
496 | const uint * qt_fetch_radial_gradient_sse2(uint *buffer, const Operator *op, const QSpanData *data, | - |
497 | int y, int x, int length) | - |
498 | { | - |
499 | return qt_fetch_radial_gradient_template<QRadialFetchSimd<QSimdSse2> >(buffer, op, data, y, x, length); executed: return qt_fetch_radial_gradient_template<QRadialFetchSimd<QSimdSse2> >(buffer, op, data, y, x, length); Execution Count:28769 | 28769 |
500 | } | - |
501 | | - |
502 | void qt_scale_image_argb32_on_argb32_sse2(uchar *destPixels, int dbpl, | - |
503 | const uchar *srcPixels, int sbpl, | - |
504 | const QRectF &targetRect, | - |
505 | const QRectF &sourceRect, | - |
506 | const QRect &clip, | - |
507 | int const_alpha) | - |
508 | { | - |
509 | if (const_alpha != 256) { partially evaluated: const_alpha != 256 no Evaluation Count:0 | yes Evaluation Count:1358 |
| 0-1358 |
510 | | - |
511 | extern void qt_scale_image_argb32_on_argb32(uchar *destPixels, int dbpl, | - |
512 | const uchar *srcPixels, int sbpl, | - |
513 | const QRectF &targetRect, | - |
514 | const QRectF &sourceRect, | - |
515 | const QRect &clip, | - |
516 | int const_alpha); | - |
517 | return qt_scale_image_argb32_on_argb32(destPixels, dbpl, srcPixels, sbpl, targetRect, sourceRect, clip, const_alpha); never executed: return qt_scale_image_argb32_on_argb32(destPixels, dbpl, srcPixels, sbpl, targetRect, sourceRect, clip, const_alpha); | 0 |
518 | } | - |
519 | | - |
520 | qreal sx = targetRect.width() / (qreal) sourceRect.width(); | - |
521 | qreal sy = targetRect.height() / (qreal) sourceRect.height(); | - |
522 | | - |
523 | int ix = 0x00010000 / sx; | - |
524 | int iy = 0x00010000 / sy; | - |
525 | | - |
526 | int cx1 = clip.x(); | - |
527 | int cx2 = clip.x() + clip.width(); | - |
528 | int cy1 = clip.top(); | - |
529 | int cy2 = clip.y() + clip.height(); | - |
530 | | - |
531 | int tx1 = qRound(targetRect.left()); | - |
532 | int tx2 = qRound(targetRect.right()); | - |
533 | int ty1 = qRound(targetRect.top()); | - |
534 | int ty2 = qRound(targetRect.bottom()); | - |
535 | | - |
536 | if (tx2 < tx1) evaluated: tx2 < tx1 yes Evaluation Count:6 | yes Evaluation Count:1352 |
| 6-1352 |
537 | qSwap(tx2, tx1); executed: qSwap(tx2, tx1); Execution Count:6 | 6 |
538 | if (ty2 < ty1) evaluated: ty2 < ty1 yes Evaluation Count:6 | yes Evaluation Count:1352 |
| 6-1352 |
539 | qSwap(ty2, ty1); executed: qSwap(ty2, ty1); Execution Count:6 | 6 |
540 | | - |
541 | if (tx1 < cx1) partially evaluated: tx1 < cx1 no Evaluation Count:0 | yes Evaluation Count:1358 |
| 0-1358 |
542 | tx1 = cx1; never executed: tx1 = cx1; | 0 |
543 | if (tx2 >= cx2) evaluated: tx2 >= cx2 yes Evaluation Count:1344 | yes Evaluation Count:14 |
| 14-1344 |
544 | tx2 = cx2; executed: tx2 = cx2; Execution Count:1344 | 1344 |
545 | | - |
546 | if (tx1 >= tx2) partially evaluated: tx1 >= tx2 no Evaluation Count:0 | yes Evaluation Count:1358 |
| 0-1358 |
547 | return; | 0 |
548 | | - |
549 | if (ty1 < cy1) partially evaluated: ty1 < cy1 no Evaluation Count:0 | yes Evaluation Count:1358 |
| 0-1358 |
550 | ty1 = cy1; never executed: ty1 = cy1; | 0 |
551 | if (ty2 >= cy2) evaluated: ty2 >= cy2 yes Evaluation Count:22 | yes Evaluation Count:1336 |
| 22-1336 |
552 | ty2 = cy2; executed: ty2 = cy2; Execution Count:22 | 22 |
553 | if (ty1 >= ty2) evaluated: ty1 >= ty2 yes Evaluation Count:449 | yes Evaluation Count:909 |
| 449-909 |
554 | return; executed: return; Execution Count:449 | 449 |
555 | | - |
556 | int h = ty2 - ty1; | - |
557 | int w = tx2 - tx1; | - |
558 | | - |
559 | quint32 basex; | - |
560 | quint32 srcy; | - |
561 | | - |
562 | if (sx < 0) { evaluated: sx < 0 yes Evaluation Count:6 | yes Evaluation Count:903 |
| 6-903 |
563 | int dstx = qFloor((tx1 + qreal(0.5) - targetRect.right()) * ix) + 1; | - |
564 | basex = quint32(sourceRect.right() * 65536) + dstx; | - |
565 | } else { executed: } Execution Count:6 | 6 |
566 | int dstx = qCeil((tx1 + qreal(0.5) - targetRect.left()) * ix) - 1; | - |
567 | basex = quint32(sourceRect.left() * 65536) + dstx; | - |
568 | } executed: } Execution Count:903 | 903 |
569 | if (sy < 0) { evaluated: sy < 0 yes Evaluation Count:6 | yes Evaluation Count:903 |
| 6-903 |
570 | int dsty = qFloor((ty1 + qreal(0.5) - targetRect.bottom()) * iy) + 1; | - |
571 | srcy = quint32(sourceRect.bottom() * 65536) + dsty; | - |
572 | } else { executed: } Execution Count:6 | 6 |
573 | int dsty = qCeil((ty1 + qreal(0.5) - targetRect.top()) * iy) - 1; | - |
574 | srcy = quint32(sourceRect.top() * 65536) + dsty; | - |
575 | } executed: } Execution Count:903 | 903 |
576 | | - |
577 | quint32 *dst = ((quint32 *) (destPixels + ty1 * dbpl)) + tx1; | - |
578 | | - |
579 | const __m128i nullVector = _mm_set1_epi32(0); | - |
580 | const __m128i half = _mm_set1_epi16(0x80); | - |
581 | const __m128i one = _mm_set1_epi16(0xff); | - |
582 | const __m128i colorMask = _mm_set1_epi32(0x00ff00ff); | - |
583 | const __m128i alphaMask = _mm_set1_epi32(0xff000000); | - |
584 | const __m128i ixVector = _mm_set1_epi32(4*ix); | - |
585 | | - |
586 | | - |
587 | while (h--) { evaluated: h-- yes Evaluation Count:1764 | yes Evaluation Count:909 |
| 909-1764 |
588 | const uint *src = (const quint32 *) (srcPixels + (srcy >> 16) * sbpl); | - |
589 | int srcx = basex; | - |
590 | int x = 0; | - |
591 | | - |
592 | for (; x < static_cast<int>(qMin(static_cast<quintptr>(w), ((4 - ((reinterpret_cast<quintptr>(dst) >> 2) & 0x3)) & 0x3))); ++x) { evaluated: x < static_cast<int>(qMin(static_cast<quintptr>(w), ((4 - ((reinterpret_cast<quintptr>(dst) >> 2) & 0x3)) & 0x3))) yes Evaluation Count:752 | yes Evaluation Count:1764 |
| 752-1764 |
593 | uint s = src[srcx >> 16]; | - |
594 | dst[x] = s + BYTE_MUL(dst[x], qAlpha(~s)); | - |
595 | srcx += ix; | - |
596 | } executed: } Execution Count:752 | 752 |
597 | | - |
598 | __m128i srcxVector = _mm_set_epi32(srcx, srcx + ix, srcx + ix + ix, srcx + ix + ix + ix); | - |
599 | | - |
600 | for (; x<w - 3; x += 4) { evaluated: x<w - 3 yes Evaluation Count:40635 | yes Evaluation Count:1764 |
| 1764-40635 |
601 | union Vect_buffer { __m128i vect; quint32 i[4]; }; | - |
602 | Vect_buffer addr; | - |
603 | addr.vect = _mm_srli_epi32(srcxVector, 16); | - |
604 | srcxVector = _mm_add_epi32(srcxVector, ixVector); | - |
605 | | - |
606 | const __m128i srcVector = _mm_set_epi32(src[addr.i[0]], src[addr.i[1]], src[addr.i[2]], src[addr.i[3]]); | - |
607 | { const __m128i srcVectorAlpha = _mm_and_si128(srcVector, alphaMask); if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, alphaMask)) == 0xffff) { _mm_store_si128((__m128i *)&dst[x], srcVector); } else if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, nullVector)) != 0xffff) { __m128i alphaChannel = _mm_srli_epi32(srcVector, 24); alphaChannel = _mm_or_si128(alphaChannel, _mm_slli_epi32(alphaChannel, 16)); alphaChannel = _mm_sub_epi16(one, alphaChannel); const __m128i dstVector = _mm_load_si128((__m128i *)&dst[x]); __m128i destMultipliedByOneMinusAlpha; { __m128i pixelVectorAG = _mm_srli_epi16(dstVector, 8); __m128i pixelVectorRB = _mm_and_si128(dstVector, colorMask); pixelVectorAG = _mm_mullo_epi16(pixelVectorAG, alphaChannel); pixelVectorRB = _mm_mullo_epi16(pixelVectorRB, alphaChannel); pixelVectorRB = _mm_add_epi16(pixelVectorRB, _mm_srli_epi16(pixelVectorRB, 8)); pixelVectorRB = _mm_add_epi16(pixelVectorRB, half); pixelVectorAG = _mm_add_epi16(pixelVectorAG, _mm_srli_epi16(pixelVectorAG, 8)); pixelVectorAG = _mm_add_epi16(pixelVectorAG, half); pixelVectorRB = _mm_srli_epi16(pixelVectorRB, 8); pixelVectorAG = _mm_andnot_si128(colorMask, pixelVectorAG); destMultipliedByOneMinusAlpha = _mm_or_si128(pixelVectorAG, pixelVectorRB); }; const __m128i result = _mm_add_epi8(srcVector, destMultipliedByOneMinusAlpha); _mm_store_si128((__m128i *)&dst[x], result); } }; executed: } Execution Count:34682 executed: } Execution Count:607 evaluated: _mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, nullVector)) != 0xffff yes Evaluation Count:607 | yes Evaluation Count:5346 |
evaluated: _mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, alphaMask)) == 0xffff yes Evaluation Count:34682 | yes Evaluation Count:5953 |
| 607-34682 |
608 | } | - |
609 | | - |
610 | for (; x<w; x++) { evaluated: x<w yes Evaluation Count:970 | yes Evaluation Count:1764 |
| 970-1764 |
611 | uint s = src[(basex + x*ix) >> 16]; | - |
612 | dst[x] = s + BYTE_MUL(dst[x], qAlpha(~s)); | - |
613 | } executed: } Execution Count:970 | 970 |
614 | dst = (quint32 *)(((uchar *) dst) + dbpl); | - |
615 | srcy += iy; | - |
616 | } executed: } Execution Count:1764 | 1764 |
617 | } executed: } Execution Count:909 | 909 |
618 | | - |
619 | | - |
620 | | - |
621 | | - |
| | |