Line | Source Code | Coverage |
---|
1 | | - | 2 | | - | 3 | | - | 4 | | - | 5 | | - | 6 | | - | 7 | inline static void blend_pixel(quint32 &dst, const quint32 src) | - | 8 | { | - | 9 | if (src >= 0xff000000) evaluated: src >= 0xff000000 yes Evaluation Count:1646 | yes Evaluation Count:2877 |
| 1646-2877 | 10 | dst = src; executed: dst = src; Execution Count:1646 | 1646 | 11 | else if (src != 0) evaluated: src != 0 yes Evaluation Count:605 | yes Evaluation Count:2272 |
| 605-2272 | 12 | dst = src + BYTE_MUL(dst, qAlpha(~src)); executed: dst = src + BYTE_MUL(dst, qAlpha(~src)); Execution Count:605 | 605 | 13 | } | - | 14 | void qt_blend_argb32_on_argb32_avx(uchar *destPixels, int dbpl, | - | 15 | const uchar *srcPixels, int sbpl, | - | 16 | int w, int h, | - | 17 | int const_alpha) | - | 18 | { | - | 19 | const quint32 *src = (const quint32 *) srcPixels; | - | 20 | quint32 *dst = (quint32 *) destPixels; | - | 21 | if (const_alpha == 256) { evaluated: const_alpha == 256 yes Evaluation Count:44 | yes Evaluation Count:7 |
| 7-44 | 22 | const __m128i alphaMask = _mm_set1_epi32(0xff000000); | - | 23 | const __m128i nullVector = _mm_setzero_si128(); | - | 24 | const __m128i half = _mm_set1_epi16(0x80); | - | 25 | const __m128i one = _mm_set1_epi16(0xff); | - | 26 | const __m128i colorMask = _mm_set1_epi32(0x00ff00ff); | - | 27 | | - | 28 | for (int y = 0; y < h; ++y) { evaluated: y < h yes Evaluation Count:4052 | yes Evaluation Count:44 |
| 44-4052 | 29 | { int x = 0; for (; x < static_cast<int>(qMin(static_cast<quintptr>(w), ((4 - ((reinterpret_cast<quintptr>(dst) >> 2) & 0x3)) & 0x3))); ++x) { blend_pixel(dst[x], src[x]); } const int minusOffsetToAlignSrcOn16Bytes = (reinterpret_cast<quintptr>(&(src[x])) >> 2) & 0x3; if (!minusOffsetToAlignSrcOn16Bytes) { const __m128i alphaShuffleMask = _mm_set_epi8(0xff,15,0xff,15,0xff,11,0xff,11,0xff,7,0xff,7,0xff,3,0xff,3); for (; x < w-3; x += 4) { const __m128i srcVector = _mm_load_si128((__m128i *)&src[x]); const __m128i srcVectorAlpha = _mm_and_si128(srcVector, alphaMask); if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, alphaMask)) == 0xffff) { _mm_store_si128((__m128i *)&dst[x], srcVector); } else if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, nullVector)) != 0xffff) { __m128i alphaChannel = _mm_shuffle_epi8(srcVector, alphaShuffleMask); alphaChannel = _mm_sub_epi16(one, alphaChannel); const __m128i dstVector = _mm_load_si128((__m128i *)&dst[x]); __m128i destMultipliedByOneMinusAlpha; { __m128i pixelVectorAG = _mm_srli_epi16(dstVector, 8); __m128i pixelVectorRB = _mm_and_si128(dstVector, colorMask); pixelVectorAG = _mm_mullo_epi16(pixelVectorAG, alphaChannel); pixelVectorRB = _mm_mullo_epi16(pixelVectorRB, alphaChannel); pixelVectorRB = _mm_add_epi16(pixelVectorRB, _mm_srli_epi16(pixelVectorRB, 8)); pixelVectorRB = _mm_add_epi16(pixelVectorRB, half); pixelVectorAG = _mm_add_epi16(pixelVectorAG, _mm_srli_epi16(pixelVectorAG, 8)); pixelVectorAG = _mm_add_epi16(pixelVectorAG, half); pixelVectorRB = _mm_srli_epi16(pixelVectorRB, 8); pixelVectorAG = _mm_andnot_si128(colorMask, pixelVectorAG); destMultipliedByOneMinusAlpha = _mm_or_si128(pixelVectorAG, pixelVectorRB); }; const __m128i result = _mm_add_epi8(srcVector, destMultipliedByOneMinusAlpha); _mm_store_si128((__m128i *)&dst[x], result); } } } else if ((w - x) >= 8) { __m128i srcVectorPrevLoaded = _mm_load_si128((__m128i *)&src[x - minusOffsetToAlignSrcOn16Bytes]); const int palignrOffset = minusOffsetToAlignSrcOn16Bytes << 2; const __m128i alphaShuffleMask = _mm_set_epi8(0xff,15,0xff,15,0xff,11,0xff,11,0xff,7,0xff,7,0xff,3,0xff,3); switch (palignrOffset) { case 4: for (; x-minusOffsetToAlignSrcOn16Bytes < w-7; x += 4) { const __m128i srcVectorLastLoaded = _mm_load_si128((__m128i *)&src[x - minusOffsetToAlignSrcOn16Bytes + 4]); const __m128i srcVector = _mm_alignr_epi8(srcVectorLastLoaded, srcVectorPrevLoaded, 4); const __m128i srcVectorAlpha = _mm_and_si128(srcVector, alphaMask); if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, alphaMask)) == 0xffff) { _mm_store_si128((__m128i *)&dst[x], srcVector); } else if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, nullVector)) != 0xffff) { __m128i alphaChannel = _mm_shuffle_epi8(srcVector, alphaShuffleMask); alphaChannel = _mm_sub_epi16(one, alphaChannel); const __m128i dstVector = _mm_load_si128((__m128i *)&dst[x]); __m128i destMultipliedByOneMinusAlpha; { __m128i pixelVectorAG = _mm_srli_epi16(dstVector, 8); __m128i pixelVectorRB = _mm_and_si128(dstVector, colorMask); pixelVectorAG = _mm_mullo_epi16(pixelVectorAG, alphaChannel); pixelVectorRB = _mm_mullo_epi16(pixelVectorRB, alphaChannel); pixelVectorRB = _mm_add_epi16(pixelVectorRB, _mm_srli_epi16(pixelVectorRB, 8)); pixelVectorRB = _mm_add_epi16(pixelVectorRB, half); pixelVectorAG = _mm_add_epi16(pixelVectorAG, _mm_srli_epi16(pixelVectorAG, 8)); pixelVectorAG = _mm_add_epi16(pixelVectorAG, half); pixelVectorRB = _mm_srli_epi16(pixelVectorRB, 8); pixelVectorAG = _mm_andnot_si128(colorMask, pixelVectorAG); destMultipliedByOneMinusAlpha = _mm_or_si128(pixelVectorAG, pixelVectorRB); }; const __m128i result = _mm_add_epi8(srcVector, destMultipliedByOneMinusAlpha); _mm_store_si128((__m128i *)&dst[x], result); } srcVectorPrevLoaded = srcVectorLastLoaded; } break; case 8: for (; x-minusOffsetToAlignSrcOn16Bytes < w-7; x += 4) { const __m128i srcVectorLastLoaded = _mm_load_si128((__m128i *)&src[x - minusOffsetToAlignSrcOn16Bytes + 4]); const __m128i srcVector = _mm_alignr_epi8(srcVectorLastLoaded, srcVectorPrevLoaded, 8); const __m128i srcVectorAlpha = _mm_and_si128(srcVector, alphaMask); if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, alphaMask)) == 0xffff) { _mm_store_si128((__m128i *)&dst[x], srcVector); } else if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, nullVector)) != 0xffff) { __m128i alphaChannel = _mm_shuffle_epi8(srcVector, alphaShuffleMask); alphaChannel = _mm_sub_epi16(one, alphaChannel); const __m128i dstVector = _mm_load_si128((__m128i *)&dst[x]); __m128i destMultipliedByOneMinusAlpha; { __m128i pixelVectorAG = _mm_srli_epi16(dstVector, 8); __m128i pixelVectorRB = _mm_and_si128(dstVector, colorMask); pixelVectorAG = _mm_mullo_epi16(pixelVectorAG, alphaChannel); pixelVectorRB = _mm_mullo_epi16(pixelVectorRB, alphaChannel); pixelVectorRB = _mm_add_epi16(pixelVectorRB, _mm_srli_epi16(pixelVectorRB, 8)); pixelVectorRB = _mm_add_epi16(pixelVectorRB, half); pixelVectorAG = _mm_add_epi16(pixelVectorAG, _mm_srli_epi16(pixelVectorAG, 8)); pixelVectorAG = _mm_add_epi16(pixelVectorAG, half); pixelVectorRB = _mm_srli_epi16(pixelVectorRB, 8); pixelVectorAG = _mm_andnot_si128(colorMask, pixelVectorAG); destMultipliedByOneMinusAlpha = _mm_or_si128(pixelVectorAG, pixelVectorRB); }; const __m128i result = _mm_add_epi8(srcVector, destMultipliedByOneMinusAlpha); _mm_store_si128((__m128i *)&dst[x], result); } srcVectorPrevLoaded = srcVectorLastLoaded; } break; case 12: for (; x-minusOffsetToAlignSrcOn16Bytes < w-7; x += 4) { const __m128i srcVectorLastLoaded = _mm_load_si128((__m128i *)&src[x - minusOffsetToAlignSrcOn16Bytes + 4]); const __m128i srcVector = _mm_alignr_epi8(srcVectorLastLoaded, srcVectorPrevLoaded, 12); const __m128i srcVectorAlpha = _mm_and_si128(srcVector, alphaMask); if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, alphaMask)) == 0xffff) { _mm_store_si128((__m128i *)&dst[x], srcVector); } else if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, nullVector)) != 0xffff) { __m128i alphaChannel = _mm_shuffle_epi8(srcVector, alphaShuffleMask); alphaChannel = _mm_sub_epi16(one, alphaChannel); const __m128i dstVector = _mm_load_si128((__m128i *)&dst[x]); __m128i destMultipliedByOneMinusAlpha; { __m128i pixelVectorAG = _mm_srli_epi16(dstVector, 8); __m128i pixelVectorRB = _mm_and_si128(dstVector, colorMask); pixelVectorAG = _mm_mullo_epi16(pixelVectorAG, alphaChannel); pixelVectorRB = _mm_mullo_epi16(pixelVectorRB, alphaChannel); pixelVectorRB = _mm_add_epi16(pixelVectorRB, _mm_srli_epi16(pixelVectorRB, 8)); pixelVectorRB = _mm_add_epi16(pixelVectorRB, half); pixelVectorAG = _mm_add_epi16(pixelVectorAG, _mm_srli_epi16(pixelVectorAG, 8)); pixelVectorAG = _mm_add_epi16(pixelVectorAG, half); pixelVectorRB = _mm_srli_epi16(pixelVectorRB, 8); pixelVectorAG = _mm_andnot_si128(colorMask, pixelVectorAG); destMultipliedByOneMinusAlpha = _mm_or_si128(pixelVectorAG, pixelVectorRB); }; const __m128i result = _mm_add_epi8(srcVector, destMultipliedByOneMinusAlpha); _mm_store_si128((__m128i *)&dst[x], result); } srcVectorPrevLoaded = srcVectorLastLoaded; } break; } } for (; x < w; ++x) blend_pixel(dst[x], src[x]); }; executed: } Execution Count:304 executed: } Execution Count:371 executed: } Execution Count:860 executed: break; Execution Count:166 executed: } Execution Count:144 executed: } Execution Count:323 executed: } Execution Count:588 executed: break; Execution Count:110 executed: } Execution Count:810 executed: blend_pixel(dst[x], src[x]); Execution Count:3042 executed: } Execution Count:1481 executed: } Execution Count:81444 evaluated: _mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, nullVector)) != 0xffff yes Evaluation Count:802 | yes Evaluation Count:70109 |
partially evaluated: (w - x) >= 8 yes Evaluation Count:810 | no Evaluation Count:0 |
evaluated: _mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, nullVector)) != 0xffff yes Evaluation Count:1479 | yes Evaluation Count:4091 |
evaluated: _mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, nullVector)) != 0xffff yes Evaluation Count:371 | yes Evaluation Count:185 |
evaluated: _mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, nullVector)) != 0xffff yes Evaluation Count:323 | yes Evaluation Count:121 |
executed: } Execution Count:802 evaluated: !minusOffsetToAlignSrcOn16Bytes yes Evaluation Count:3242 | yes Evaluation Count:810 |
evaluated: _mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, alphaMask)) == 0xffff yes Evaluation Count:81444 | yes Evaluation Count:70911 |
evaluated: _mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, alphaMask)) == 0xffff yes Evaluation Count:874 | yes Evaluation Count:5570 |
evaluated: _mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, alphaMask)) == 0xffff yes Evaluation Count:304 | yes Evaluation Count:556 |
evaluated: _mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, alphaMask)) == 0xffff yes Evaluation Count:144 | yes Evaluation Count:444 |
evaluated: x < static_cast<int>(qMin(static_cast<quintptr>(w), ((4 - ((reinterpret_cast<quintptr>(dst) >> 2) & 0x3)) & 0x3))) yes Evaluation Count:1481 | yes Evaluation Count:4052 |
executed: } Execution Count:3242 evaluated: x < w-3 yes Evaluation Count:152355 | yes Evaluation Count:3242 |
evaluated: x-minusOffsetToAlignSrcOn16Bytes < w-7 yes Evaluation Count:6444 | yes Evaluation Count:534 |
evaluated: x-minusOffsetToAlignSrcOn16Bytes < w-7 yes Evaluation Count:860 | yes Evaluation Count:166 |
evaluated: x-minusOffsetToAlignSrcOn16Bytes < w-7 yes Evaluation Count:588 | yes Evaluation Count:110 |
evaluated: x < w yes Evaluation Count:3042 | yes Evaluation Count:4052 |
executed: } Execution Count:874 executed: } Execution Count:1479 executed: } Execution Count:6444 executed: break; Execution Count:534 | 0-152355 | 30 | dst = (quint32 *)(((uchar *) dst) + dbpl); | - | 31 | src = (const quint32 *)(((const uchar *) src) + sbpl); | - | 32 | } executed: } Execution Count:4052 | 4052 | 33 | } else if (const_alpha != 0) { executed: } Execution Count:44 partially evaluated: const_alpha != 0 yes Evaluation Count:7 | no Evaluation Count:0 |
| 0-44 | 34 | | - | 35 | | - | 36 | | - | 37 | const_alpha = (const_alpha * 255) >> 8; | - | 38 | const __m128i nullVector = _mm_setzero_si128(); | - | 39 | const __m128i half = _mm_set1_epi16(0x80); | - | 40 | const __m128i one = _mm_set1_epi16(0xff); | - | 41 | const __m128i colorMask = _mm_set1_epi32(0x00ff00ff); | - | 42 | const __m128i constAlphaVector = _mm_set1_epi16(const_alpha); | - | 43 | for (int y = 0; y < h; ++y) { evaluated: y < h yes Evaluation Count:780 | yes Evaluation Count:7 |
| 7-780 | 44 | { int x = 0; for (; x < static_cast<int>(qMin(static_cast<quintptr>(w), ((4 - ((reinterpret_cast<quintptr>(dst) >> 2) & 0x3)) & 0x3))); ++x) { quint32 s = src[x]; if (s != 0) { s = BYTE_MUL(s, const_alpha); dst[x] = s + BYTE_MUL(dst[x], qAlpha(~s)); } } for (; x < w-3; x += 4) { __m128i srcVector = _mm_loadu_si128((__m128i *)&src[x]); if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVector, nullVector)) != 0xffff) { { __m128i pixelVectorAG = _mm_srli_epi16(srcVector, 8); __m128i pixelVectorRB = _mm_and_si128(srcVector, colorMask); pixelVectorAG = _mm_mullo_epi16(pixelVectorAG, constAlphaVector); pixelVectorRB = _mm_mullo_epi16(pixelVectorRB, constAlphaVector); pixelVectorRB = _mm_add_epi16(pixelVectorRB, _mm_srli_epi16(pixelVectorRB, 8)); pixelVectorRB = _mm_add_epi16(pixelVectorRB, half); pixelVectorAG = _mm_add_epi16(pixelVectorAG, _mm_srli_epi16(pixelVectorAG, 8)); pixelVectorAG = _mm_add_epi16(pixelVectorAG, half); pixelVectorRB = _mm_srli_epi16(pixelVectorRB, 8); pixelVectorAG = _mm_andnot_si128(colorMask, pixelVectorAG); srcVector = _mm_or_si128(pixelVectorAG, pixelVectorRB); }; __m128i alphaChannel = _mm_srli_epi32(srcVector, 24); alphaChannel = _mm_or_si128(alphaChannel, _mm_slli_epi32(alphaChannel, 16)); alphaChannel = _mm_sub_epi16(one, alphaChannel); const __m128i dstVector = _mm_load_si128((__m128i *)&dst[x]); __m128i destMultipliedByOneMinusAlpha; { __m128i pixelVectorAG = _mm_srli_epi16(dstVector, 8); __m128i pixelVectorRB = _mm_and_si128(dstVector, colorMask); pixelVectorAG = _mm_mullo_epi16(pixelVectorAG, alphaChannel); pixelVectorRB = _mm_mullo_epi16(pixelVectorRB, alphaChannel); pixelVectorRB = _mm_add_epi16(pixelVectorRB, _mm_srli_epi16(pixelVectorRB, 8)); pixelVectorRB = _mm_add_epi16(pixelVectorRB, half); pixelVectorAG = _mm_add_epi16(pixelVectorAG, _mm_srli_epi16(pixelVectorAG, 8)); pixelVectorAG = _mm_add_epi16(pixelVectorAG, half); pixelVectorRB = _mm_srli_epi16(pixelVectorRB, 8); pixelVectorAG = _mm_andnot_si128(colorMask, pixelVectorAG); destMultipliedByOneMinusAlpha = _mm_or_si128(pixelVectorAG, pixelVectorRB); }; const __m128i result = _mm_add_epi8(srcVector, destMultipliedByOneMinusAlpha); _mm_store_si128((__m128i *)&dst[x], result); } } for (; x < w; ++x) { quint32 s = src[x]; if (s != 0) { s = BYTE_MUL(s, const_alpha); dst[x] = s + BYTE_MUL(dst[x], qAlpha(~s)); } } } executed: } Execution Count:2304 executed: } Execution Count:2304 executed: } Execution Count:23076 executed: } Execution Count:23076 executed: } Execution Count:2304 executed: } Execution Count:2304 partially evaluated: s != 0 yes Evaluation Count:2304 | no Evaluation Count:0 |
partially evaluated: _mm_movemask_epi8(_mm_cmpeq_epi32(srcVector, nullVector)) != 0xffff yes Evaluation Count:23076 | no Evaluation Count:0 |
partially evaluated: s != 0 yes Evaluation Count:2304 | no Evaluation Count:0 |
evaluated: x < static_cast<int>(qMin(static_cast<quintptr>(w), ((4 - ((reinterpret_cast<quintptr>(dst) >> 2) & 0x3)) & 0x3))) yes Evaluation Count:2304 | yes Evaluation Count:780 |
evaluated: x < w-3 yes Evaluation Count:23076 | yes Evaluation Count:780 |
evaluated: x < w yes Evaluation Count:2304 | yes Evaluation Count:780 |
| 0-23076 | 45 | dst = (quint32 *)(((uchar *) dst) + dbpl); | - | 46 | src = (const quint32 *)(((const uchar *) src) + sbpl); | - | 47 | } executed: } Execution Count:780 | 780 | 48 | } executed: } Execution Count:7 | 7 | 49 | } | - | 50 | | - | 51 | | - | 52 | | - | | | |
|