painting/qdrawhelper_ssse3.cpp

Switch to Source codePreprocessed file
LineSource CodeCoverage
1 -
2 -
3 -
4 -
5 -
6 -
7inline static void blend_pixel(quint32 &dst, const quint32 src) -
8{ -
9 if (src >= 0xff000000)
evaluated: src >= 0xff000000
TRUEFALSE
yes
Evaluation Count:1646
yes
Evaluation Count:2877
1646-2877
10 dst = src;
executed: dst = src;
Execution Count:1646
1646
11 else if (src != 0)
evaluated: src != 0
TRUEFALSE
yes
Evaluation Count:605
yes
Evaluation Count:2272
605-2272
12 dst = src + BYTE_MUL(dst, qAlpha(~src));
executed: dst = src + BYTE_MUL(dst, qAlpha(~src));
Execution Count:605
605
13} -
14void qt_blend_argb32_on_argb32_avx(uchar *destPixels, int dbpl, -
15 const uchar *srcPixels, int sbpl, -
16 int w, int h, -
17 int const_alpha) -
18{ -
19 const quint32 *src = (const quint32 *) srcPixels; -
20 quint32 *dst = (quint32 *) destPixels; -
21 if (const_alpha == 256) {
evaluated: const_alpha == 256
TRUEFALSE
yes
Evaluation Count:44
yes
Evaluation Count:7
7-44
22 const __m128i alphaMask = _mm_set1_epi32(0xff000000); -
23 const __m128i nullVector = _mm_setzero_si128(); -
24 const __m128i half = _mm_set1_epi16(0x80); -
25 const __m128i one = _mm_set1_epi16(0xff); -
26 const __m128i colorMask = _mm_set1_epi32(0x00ff00ff); -
27 -
28 for (int y = 0; y < h; ++y) {
evaluated: y < h
TRUEFALSE
yes
Evaluation Count:4052
yes
Evaluation Count:44
44-4052
29 { int x = 0; for (; x < static_cast<int>(qMin(static_cast<quintptr>(w), ((4 - ((reinterpret_cast<quintptr>(dst) >> 2) & 0x3)) & 0x3))); ++x) { blend_pixel(dst[x], src[x]); } const int minusOffsetToAlignSrcOn16Bytes = (reinterpret_cast<quintptr>(&(src[x])) >> 2) & 0x3; if (!minusOffsetToAlignSrcOn16Bytes) { const __m128i alphaShuffleMask = _mm_set_epi8(0xff,15,0xff,15,0xff,11,0xff,11,0xff,7,0xff,7,0xff,3,0xff,3); for (; x < w-3; x += 4) { const __m128i srcVector = _mm_load_si128((__m128i *)&src[x]); const __m128i srcVectorAlpha = _mm_and_si128(srcVector, alphaMask); if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, alphaMask)) == 0xffff) { _mm_store_si128((__m128i *)&dst[x], srcVector); } else if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, nullVector)) != 0xffff) { __m128i alphaChannel = _mm_shuffle_epi8(srcVector, alphaShuffleMask); alphaChannel = _mm_sub_epi16(one, alphaChannel); const __m128i dstVector = _mm_load_si128((__m128i *)&dst[x]); __m128i destMultipliedByOneMinusAlpha; { __m128i pixelVectorAG = _mm_srli_epi16(dstVector, 8); __m128i pixelVectorRB = _mm_and_si128(dstVector, colorMask); pixelVectorAG = _mm_mullo_epi16(pixelVectorAG, alphaChannel); pixelVectorRB = _mm_mullo_epi16(pixelVectorRB, alphaChannel); pixelVectorRB = _mm_add_epi16(pixelVectorRB, _mm_srli_epi16(pixelVectorRB, 8)); pixelVectorRB = _mm_add_epi16(pixelVectorRB, half); pixelVectorAG = _mm_add_epi16(pixelVectorAG, _mm_srli_epi16(pixelVectorAG, 8)); pixelVectorAG = _mm_add_epi16(pixelVectorAG, half); pixelVectorRB = _mm_srli_epi16(pixelVectorRB, 8); pixelVectorAG = _mm_andnot_si128(colorMask, pixelVectorAG); destMultipliedByOneMinusAlpha = _mm_or_si128(pixelVectorAG, pixelVectorRB); }; const __m128i result = _mm_add_epi8(srcVector, destMultipliedByOneMinusAlpha); _mm_store_si128((__m128i *)&dst[x], result); } } } else if ((w - x) >= 8) { __m128i srcVectorPrevLoaded = _mm_load_si128((__m128i *)&src[x - minusOffsetToAlignSrcOn16Bytes]); const int palignrOffset = minusOffsetToAlignSrcOn16Bytes << 2; const __m128i alphaShuffleMask = _mm_set_epi8(0xff,15,0xff,15,0xff,11,0xff,11,0xff,7,0xff,7,0xff,3,0xff,3); switch (palignrOffset) { case 4: for (; x-minusOffsetToAlignSrcOn16Bytes < w-7; x += 4) { const __m128i srcVectorLastLoaded = _mm_load_si128((__m128i *)&src[x - minusOffsetToAlignSrcOn16Bytes + 4]); const __m128i srcVector = _mm_alignr_epi8(srcVectorLastLoaded, srcVectorPrevLoaded, 4); const __m128i srcVectorAlpha = _mm_and_si128(srcVector, alphaMask); if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, alphaMask)) == 0xffff) { _mm_store_si128((__m128i *)&dst[x], srcVector); } else if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, nullVector)) != 0xffff) { __m128i alphaChannel = _mm_shuffle_epi8(srcVector, alphaShuffleMask); alphaChannel = _mm_sub_epi16(one, alphaChannel); const __m128i dstVector = _mm_load_si128((__m128i *)&dst[x]); __m128i destMultipliedByOneMinusAlpha; { __m128i pixelVectorAG = _mm_srli_epi16(dstVector, 8); __m128i pixelVectorRB = _mm_and_si128(dstVector, colorMask); pixelVectorAG = _mm_mullo_epi16(pixelVectorAG, alphaChannel); pixelVectorRB = _mm_mullo_epi16(pixelVectorRB, alphaChannel); pixelVectorRB = _mm_add_epi16(pixelVectorRB, _mm_srli_epi16(pixelVectorRB, 8)); pixelVectorRB = _mm_add_epi16(pixelVectorRB, half); pixelVectorAG = _mm_add_epi16(pixelVectorAG, _mm_srli_epi16(pixelVectorAG, 8)); pixelVectorAG = _mm_add_epi16(pixelVectorAG, half); pixelVectorRB = _mm_srli_epi16(pixelVectorRB, 8); pixelVectorAG = _mm_andnot_si128(colorMask, pixelVectorAG); destMultipliedByOneMinusAlpha = _mm_or_si128(pixelVectorAG, pixelVectorRB); }; const __m128i result = _mm_add_epi8(srcVector, destMultipliedByOneMinusAlpha); _mm_store_si128((__m128i *)&dst[x], result); } srcVectorPrevLoaded = srcVectorLastLoaded; } break; case 8: for (; x-minusOffsetToAlignSrcOn16Bytes < w-7; x += 4) { const __m128i srcVectorLastLoaded = _mm_load_si128((__m128i *)&src[x - minusOffsetToAlignSrcOn16Bytes + 4]); const __m128i srcVector = _mm_alignr_epi8(srcVectorLastLoaded, srcVectorPrevLoaded, 8); const __m128i srcVectorAlpha = _mm_and_si128(srcVector, alphaMask); if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, alphaMask)) == 0xffff) { _mm_store_si128((__m128i *)&dst[x], srcVector); } else if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, nullVector)) != 0xffff) { __m128i alphaChannel = _mm_shuffle_epi8(srcVector, alphaShuffleMask); alphaChannel = _mm_sub_epi16(one, alphaChannel); const __m128i dstVector = _mm_load_si128((__m128i *)&dst[x]); __m128i destMultipliedByOneMinusAlpha; { __m128i pixelVectorAG = _mm_srli_epi16(dstVector, 8); __m128i pixelVectorRB = _mm_and_si128(dstVector, colorMask); pixelVectorAG = _mm_mullo_epi16(pixelVectorAG, alphaChannel); pixelVectorRB = _mm_mullo_epi16(pixelVectorRB, alphaChannel); pixelVectorRB = _mm_add_epi16(pixelVectorRB, _mm_srli_epi16(pixelVectorRB, 8)); pixelVectorRB = _mm_add_epi16(pixelVectorRB, half); pixelVectorAG = _mm_add_epi16(pixelVectorAG, _mm_srli_epi16(pixelVectorAG, 8)); pixelVectorAG = _mm_add_epi16(pixelVectorAG, half); pixelVectorRB = _mm_srli_epi16(pixelVectorRB, 8); pixelVectorAG = _mm_andnot_si128(colorMask, pixelVectorAG); destMultipliedByOneMinusAlpha = _mm_or_si128(pixelVectorAG, pixelVectorRB); }; const __m128i result = _mm_add_epi8(srcVector, destMultipliedByOneMinusAlpha); _mm_store_si128((__m128i *)&dst[x], result); } srcVectorPrevLoaded = srcVectorLastLoaded; } break; case 12: for (; x-minusOffsetToAlignSrcOn16Bytes < w-7; x += 4) { const __m128i srcVectorLastLoaded = _mm_load_si128((__m128i *)&src[x - minusOffsetToAlignSrcOn16Bytes + 4]); const __m128i srcVector = _mm_alignr_epi8(srcVectorLastLoaded, srcVectorPrevLoaded, 12); const __m128i srcVectorAlpha = _mm_and_si128(srcVector, alphaMask); if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, alphaMask)) == 0xffff) { _mm_store_si128((__m128i *)&dst[x], srcVector); } else if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, nullVector)) != 0xffff) { __m128i alphaChannel = _mm_shuffle_epi8(srcVector, alphaShuffleMask); alphaChannel = _mm_sub_epi16(one, alphaChannel); const __m128i dstVector = _mm_load_si128((__m128i *)&dst[x]); __m128i destMultipliedByOneMinusAlpha; { __m128i pixelVectorAG = _mm_srli_epi16(dstVector, 8); __m128i pixelVectorRB = _mm_and_si128(dstVector, colorMask); pixelVectorAG = _mm_mullo_epi16(pixelVectorAG, alphaChannel); pixelVectorRB = _mm_mullo_epi16(pixelVectorRB, alphaChannel); pixelVectorRB = _mm_add_epi16(pixelVectorRB, _mm_srli_epi16(pixelVectorRB, 8)); pixelVectorRB = _mm_add_epi16(pixelVectorRB, half); pixelVectorAG = _mm_add_epi16(pixelVectorAG, _mm_srli_epi16(pixelVectorAG, 8)); pixelVectorAG = _mm_add_epi16(pixelVectorAG, half); pixelVectorRB = _mm_srli_epi16(pixelVectorRB, 8); pixelVectorAG = _mm_andnot_si128(colorMask, pixelVectorAG); destMultipliedByOneMinusAlpha = _mm_or_si128(pixelVectorAG, pixelVectorRB); }; const __m128i result = _mm_add_epi8(srcVector, destMultipliedByOneMinusAlpha); _mm_store_si128((__m128i *)&dst[x], result); } srcVectorPrevLoaded = srcVectorLastLoaded; } break; } } for (; x < w; ++x) blend_pixel(dst[x], src[x]); };
executed: }
Execution Count:304
executed: }
Execution Count:371
executed: }
Execution Count:860
executed: break;
Execution Count:166
executed: }
Execution Count:144
executed: }
Execution Count:323
executed: }
Execution Count:588
executed: break;
Execution Count:110
executed: }
Execution Count:810
executed: blend_pixel(dst[x], src[x]);
Execution Count:3042
executed: }
Execution Count:1481
executed: }
Execution Count:81444
evaluated: _mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, nullVector)) != 0xffff
TRUEFALSE
yes
Evaluation Count:802
yes
Evaluation Count:70109
partially evaluated: (w - x) >= 8
TRUEFALSE
yes
Evaluation Count:810
no
Evaluation Count:0
evaluated: _mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, nullVector)) != 0xffff
TRUEFALSE
yes
Evaluation Count:1479
yes
Evaluation Count:4091
evaluated: _mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, nullVector)) != 0xffff
TRUEFALSE
yes
Evaluation Count:371
yes
Evaluation Count:185
evaluated: _mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, nullVector)) != 0xffff
TRUEFALSE
yes
Evaluation Count:323
yes
Evaluation Count:121
executed: }
Execution Count:802
evaluated: !minusOffsetToAlignSrcOn16Bytes
TRUEFALSE
yes
Evaluation Count:3242
yes
Evaluation Count:810
evaluated: _mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, alphaMask)) == 0xffff
TRUEFALSE
yes
Evaluation Count:81444
yes
Evaluation Count:70911
evaluated: _mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, alphaMask)) == 0xffff
TRUEFALSE
yes
Evaluation Count:874
yes
Evaluation Count:5570
evaluated: _mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, alphaMask)) == 0xffff
TRUEFALSE
yes
Evaluation Count:304
yes
Evaluation Count:556
evaluated: _mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, alphaMask)) == 0xffff
TRUEFALSE
yes
Evaluation Count:144
yes
Evaluation Count:444
evaluated: x < static_cast<int>(qMin(static_cast<quintptr>(w), ((4 - ((reinterpret_cast<quintptr>(dst) >> 2) & 0x3)) & 0x3)))
TRUEFALSE
yes
Evaluation Count:1481
yes
Evaluation Count:4052
executed: }
Execution Count:3242
evaluated: x < w-3
TRUEFALSE
yes
Evaluation Count:152355
yes
Evaluation Count:3242
evaluated: x-minusOffsetToAlignSrcOn16Bytes < w-7
TRUEFALSE
yes
Evaluation Count:6444
yes
Evaluation Count:534
evaluated: x-minusOffsetToAlignSrcOn16Bytes < w-7
TRUEFALSE
yes
Evaluation Count:860
yes
Evaluation Count:166
evaluated: x-minusOffsetToAlignSrcOn16Bytes < w-7
TRUEFALSE
yes
Evaluation Count:588
yes
Evaluation Count:110
evaluated: x < w
TRUEFALSE
yes
Evaluation Count:3042
yes
Evaluation Count:4052
executed: }
Execution Count:874
executed: }
Execution Count:1479
executed: }
Execution Count:6444
executed: break;
Execution Count:534
0-152355
30 dst = (quint32 *)(((uchar *) dst) + dbpl); -
31 src = (const quint32 *)(((const uchar *) src) + sbpl); -
32 }
executed: }
Execution Count:4052
4052
33 } else if (const_alpha != 0) {
executed: }
Execution Count:44
partially evaluated: const_alpha != 0
TRUEFALSE
yes
Evaluation Count:7
no
Evaluation Count:0
0-44
34 -
35 -
36 -
37 const_alpha = (const_alpha * 255) >> 8; -
38 const __m128i nullVector = _mm_setzero_si128(); -
39 const __m128i half = _mm_set1_epi16(0x80); -
40 const __m128i one = _mm_set1_epi16(0xff); -
41 const __m128i colorMask = _mm_set1_epi32(0x00ff00ff); -
42 const __m128i constAlphaVector = _mm_set1_epi16(const_alpha); -
43 for (int y = 0; y < h; ++y) {
evaluated: y < h
TRUEFALSE
yes
Evaluation Count:780
yes
Evaluation Count:7
7-780
44 { int x = 0; for (; x < static_cast<int>(qMin(static_cast<quintptr>(w), ((4 - ((reinterpret_cast<quintptr>(dst) >> 2) & 0x3)) & 0x3))); ++x) { quint32 s = src[x]; if (s != 0) { s = BYTE_MUL(s, const_alpha); dst[x] = s + BYTE_MUL(dst[x], qAlpha(~s)); } } for (; x < w-3; x += 4) { __m128i srcVector = _mm_loadu_si128((__m128i *)&src[x]); if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVector, nullVector)) != 0xffff) { { __m128i pixelVectorAG = _mm_srli_epi16(srcVector, 8); __m128i pixelVectorRB = _mm_and_si128(srcVector, colorMask); pixelVectorAG = _mm_mullo_epi16(pixelVectorAG, constAlphaVector); pixelVectorRB = _mm_mullo_epi16(pixelVectorRB, constAlphaVector); pixelVectorRB = _mm_add_epi16(pixelVectorRB, _mm_srli_epi16(pixelVectorRB, 8)); pixelVectorRB = _mm_add_epi16(pixelVectorRB, half); pixelVectorAG = _mm_add_epi16(pixelVectorAG, _mm_srli_epi16(pixelVectorAG, 8)); pixelVectorAG = _mm_add_epi16(pixelVectorAG, half); pixelVectorRB = _mm_srli_epi16(pixelVectorRB, 8); pixelVectorAG = _mm_andnot_si128(colorMask, pixelVectorAG); srcVector = _mm_or_si128(pixelVectorAG, pixelVectorRB); }; __m128i alphaChannel = _mm_srli_epi32(srcVector, 24); alphaChannel = _mm_or_si128(alphaChannel, _mm_slli_epi32(alphaChannel, 16)); alphaChannel = _mm_sub_epi16(one, alphaChannel); const __m128i dstVector = _mm_load_si128((__m128i *)&dst[x]); __m128i destMultipliedByOneMinusAlpha; { __m128i pixelVectorAG = _mm_srli_epi16(dstVector, 8); __m128i pixelVectorRB = _mm_and_si128(dstVector, colorMask); pixelVectorAG = _mm_mullo_epi16(pixelVectorAG, alphaChannel); pixelVectorRB = _mm_mullo_epi16(pixelVectorRB, alphaChannel); pixelVectorRB = _mm_add_epi16(pixelVectorRB, _mm_srli_epi16(pixelVectorRB, 8)); pixelVectorRB = _mm_add_epi16(pixelVectorRB, half); pixelVectorAG = _mm_add_epi16(pixelVectorAG, _mm_srli_epi16(pixelVectorAG, 8)); pixelVectorAG = _mm_add_epi16(pixelVectorAG, half); pixelVectorRB = _mm_srli_epi16(pixelVectorRB, 8); pixelVectorAG = _mm_andnot_si128(colorMask, pixelVectorAG); destMultipliedByOneMinusAlpha = _mm_or_si128(pixelVectorAG, pixelVectorRB); }; const __m128i result = _mm_add_epi8(srcVector, destMultipliedByOneMinusAlpha); _mm_store_si128((__m128i *)&dst[x], result); } } for (; x < w; ++x) { quint32 s = src[x]; if (s != 0) { s = BYTE_MUL(s, const_alpha); dst[x] = s + BYTE_MUL(dst[x], qAlpha(~s)); } } }
executed: }
Execution Count:2304
executed: }
Execution Count:2304
executed: }
Execution Count:23076
executed: }
Execution Count:23076
executed: }
Execution Count:2304
executed: }
Execution Count:2304
partially evaluated: s != 0
TRUEFALSE
yes
Evaluation Count:2304
no
Evaluation Count:0
partially evaluated: _mm_movemask_epi8(_mm_cmpeq_epi32(srcVector, nullVector)) != 0xffff
TRUEFALSE
yes
Evaluation Count:23076
no
Evaluation Count:0
partially evaluated: s != 0
TRUEFALSE
yes
Evaluation Count:2304
no
Evaluation Count:0
evaluated: x < static_cast<int>(qMin(static_cast<quintptr>(w), ((4 - ((reinterpret_cast<quintptr>(dst) >> 2) & 0x3)) & 0x3)))
TRUEFALSE
yes
Evaluation Count:2304
yes
Evaluation Count:780
evaluated: x < w-3
TRUEFALSE
yes
Evaluation Count:23076
yes
Evaluation Count:780
evaluated: x < w
TRUEFALSE
yes
Evaluation Count:2304
yes
Evaluation Count:780
0-23076
45 dst = (quint32 *)(((uchar *) dst) + dbpl); -
46 src = (const quint32 *)(((const uchar *) src) + sbpl); -
47 }
executed: }
Execution Count:780
780
48 }
executed: }
Execution Count:7
7
49} -
50 -
51 -
52 -
Switch to Source codePreprocessed file

Generated by Squish Coco Non-Commercial