Line | Source | Count |
1 | | - |
2 | | - |
3 | | - |
4 | | - |
5 | | - |
6 | | - |
7 | inline static void blend_pixel(quint32 &dst, const quint32 src) | - |
8 | { | - |
9 | if (src >= 0xff000000TRUE | never evaluated | FALSE | never evaluated |
) | 0 |
10 | dst = src; never executed: dst = src; | 0 |
11 | else if (src != 0TRUE | never evaluated | FALSE | never evaluated |
) | 0 |
12 | dst = src + BYTE_MUL(dst, qAlpha(~src)); never executed: dst = src + BYTE_MUL(dst, qAlpha(~src)); | 0 |
13 | } never executed: end of block | 0 |
14 | void qt_blend_argb32_on_argb32_ssse3(uchar *destPixels, int dbpl, | - |
15 | const uchar *srcPixels, int sbpl, | - |
16 | int w, int h, | - |
17 | int const_alpha) | - |
18 | { | - |
19 | const quint32 *src = (const quint32 *) srcPixels; | - |
20 | quint32 *dst = (quint32 *) destPixels; | - |
21 | if (const_alpha == 256TRUE | never evaluated | FALSE | never evaluated |
) { | 0 |
22 | const __m128i alphaMask = _mm_set1_epi32(0xff000000); | - |
23 | const __m128i nullVector = _mm_setzero_si128(); | - |
24 | const __m128i half = _mm_set1_epi16(0x80); | - |
25 | const __m128i one = _mm_set1_epi16(0xff); | - |
26 | const __m128i colorMask = _mm_set1_epi32(0x00ff00ff); | - |
27 | | - |
28 | for (int y = 0; y < hTRUE | never evaluated | FALSE | never evaluated |
; ++y) { | 0 |
29 | { int x = 0; for (; x < static_cast<int>(qMin(static_cast<quintptr>(w), ((4 - ((reinterpret_cast<quintptr>(dst) >> 2) & 0x3)) & 0x3)))TRUE | never evaluated | FALSE | never evaluated |
; ++x) { blend_pixel(dst[x], src[x]); }never executed: end of block const int minusOffsetToAlignSrcOn16Bytes = (reinterpret_cast<quintptr>(&(src[x])) >> 2) & 0x3; if (!minusOffsetToAlignSrcOn16BytesTRUE | never evaluated | FALSE | never evaluated |
) { const __m128i alphaShuffleMask = _mm_set_epi8(char(0xff),15,char(0xff),15,char(0xff),11,char(0xff),11,char(0xff),7,char(0xff),7,char(0xff),3,char(0xff),3); for (; x < w-3TRUE | never evaluated | FALSE | never evaluated |
; x += 4) { const __m128i srcVector = _mm_load_si128((const __m128i *)&src[x]); const __m128i srcVectorAlpha = _mm_and_si128(srcVector, alphaMask); if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, alphaMask)) == 0xffffTRUE | never evaluated | FALSE | never evaluated |
) { _mm_store_si128((__m128i *)&dst[x], srcVector); }never executed: end of block else if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, nullVector)) != 0xffffTRUE | never evaluated | FALSE | never evaluated |
) { __m128i alphaChannel = _mm_shuffle_epi8(srcVector, alphaShuffleMask); alphaChannel = _mm_sub_epi16(one, alphaChannel); const __m128i dstVector = _mm_load_si128((__m128i *)&dst[x]); __m128i destMultipliedByOneMinusAlpha; { __m128i pixelVectorAG = _mm_srli_epi16(dstVector, 8); __m128i pixelVectorRB = _mm_and_si128(dstVector, colorMask); pixelVectorAG = _mm_mullo_epi16(pixelVectorAG, alphaChannel); pixelVectorRB = _mm_mullo_epi16(pixelVectorRB, alphaChannel); pixelVectorRB = _mm_add_epi16(pixelVectorRB, _mm_srli_epi16(pixelVectorRB, 8)); pixelVectorRB = _mm_add_epi16(pixelVectorRB, half); pixelVectorAG = _mm_add_epi16(pixelVectorAG, _mm_srli_epi16(pixelVectorAG, 8)); pixelVectorAG = _mm_add_epi16(pixelVectorAG, half); pixelVectorRB = _mm_srli_epi16(pixelVectorRB, 8); pixelVectorAG = _mm_andnot_si128(colorMask, pixelVectorAG); destMultipliedByOneMinusAlpha = _mm_or_si128(pixelVectorAG, pixelVectorRB); }; const __m128i result = _mm_add_epi8(srcVector, destMultipliedByOneMinusAlpha); _mm_store_si128((__m128i *)&dst[x], result); }never executed: end of block }never executed: end of block }never executed: end of block else if ((TRUE | never evaluated | FALSE | never evaluated |
w - x) >= 8TRUE | never evaluated | FALSE | never evaluated |
) { __m128i srcVectorPrevLoaded = _mm_load_si128((const __m128i *)&src[x - minusOffsetToAlignSrcOn16Bytes]); const int palignrOffset = minusOffsetToAlignSrcOn16Bytes << 2; const __m128i alphaShuffleMask = _mm_set_epi8(char(0xff),15,char(0xff),15,char(0xff),11,char(0xff),11,char(0xff),7,char(0xff),7,char(0xff),3,char(0xff),3); switch (palignrOffset) { casenever executed: case 4: 4:never executed: case 4: for (; x-minusOffsetToAlignSrcOn16Bytes < w-7TRUE | never evaluated | FALSE | never evaluated |
; x += 4) { const __m128i srcVectorLastLoaded = _mm_load_si128((const __m128i *)&src[x - minusOffsetToAlignSrcOn16Bytes + 4]); const __m128i srcVector = ((__m128i) __builtin_ia32_palignr128 ((__v2di)(__m128i)(srcVectorLastLoaded), (__v2di)(__m128i)(srcVectorPrevLoaded), (int)(4) * 8)); const __m128i srcVectorAlpha = _mm_and_si128(srcVector, alphaMask); if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, alphaMask)) == 0xffffTRUE | never evaluated | FALSE | never evaluated |
) { _mm_store_si128((__m128i *)&dst[x], srcVector); }never executed: end of block else if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, nullVector)) != 0xffffTRUE | never evaluated | FALSE | never evaluated |
) { __m128i alphaChannel = _mm_shuffle_epi8(srcVector, alphaShuffleMask); alphaChannel = _mm_sub_epi16(one, alphaChannel); const __m128i dstVector = _mm_load_si128((__m128i *)&dst[x]); __m128i destMultipliedByOneMinusAlpha; { __m128i pixelVectorAG = _mm_srli_epi16(dstVector, 8); __m128i pixelVectorRB = _mm_and_si128(dstVector, colorMask); pixelVectorAG = _mm_mullo_epi16(pixelVectorAG, alphaChannel); pixelVectorRB = _mm_mullo_epi16(pixelVectorRB, alphaChannel); pixelVectorRB = _mm_add_epi16(pixelVectorRB, _mm_srli_epi16(pixelVectorRB, 8)); pixelVectorRB = _mm_add_epi16(pixelVectorRB, half); pixelVectorAG = _mm_add_epi16(pixelVectorAG, _mm_srli_epi16(pixelVectorAG, 8)); pixelVectorAG = _mm_add_epi16(pixelVectorAG, half); pixelVectorRB = _mm_srli_epi16(pixelVectorRB, 8); pixelVectorAG = _mm_andnot_si128(colorMask, pixelVectorAG); destMultipliedByOneMinusAlpha = _mm_or_si128(pixelVectorAG, pixelVectorRB); }; const __m128i result = _mm_add_epi8(srcVector, destMultipliedByOneMinusAlpha); _mm_store_si128((__m128i *)&dst[x], result); }never executed: end of block srcVectorPrevLoaded = srcVectorLastLoaded; }never executed: end of block break;never executed: break; casenever executed: case 8: 8:never executed: case 8: for (; x-minusOffsetToAlignSrcOn16Bytes < w-7TRUE | never evaluated | FALSE | never evaluated |
; x += 4) { const __m128i srcVectorLastLoaded = _mm_load_si128((const __m128i *)&src[x - minusOffsetToAlignSrcOn16Bytes + 4]); const __m128i srcVector = ((__m128i) __builtin_ia32_palignr128 ((__v2di)(__m128i)(srcVectorLastLoaded), (__v2di)(__m128i)(srcVectorPrevLoaded), (int)(8) * 8)); const __m128i srcVectorAlpha = _mm_and_si128(srcVector, alphaMask); if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, alphaMask)) == 0xffffTRUE | never evaluated | FALSE | never evaluated |
) { _mm_store_si128((__m128i *)&dst[x], srcVector); }never executed: end of block else if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, nullVector)) != 0xffffTRUE | never evaluated | FALSE | never evaluated |
) { __m128i alphaChannel = _mm_shuffle_epi8(srcVector, alphaShuffleMask); alphaChannel = _mm_sub_epi16(one, alphaChannel); const __m128i dstVector = _mm_load_si128((__m128i *)&dst[x]); __m128i destMultipliedByOneMinusAlpha; { __m128i pixelVectorAG = _mm_srli_epi16(dstVector, 8); __m128i pixelVectorRB = _mm_and_si128(dstVector, colorMask); pixelVectorAG = _mm_mullo_epi16(pixelVectorAG, alphaChannel); pixelVectorRB = _mm_mullo_epi16(pixelVectorRB, alphaChannel); pixelVectorRB = _mm_add_epi16(pixelVectorRB, _mm_srli_epi16(pixelVectorRB, 8)); pixelVectorRB = _mm_add_epi16(pixelVectorRB, half); pixelVectorAG = _mm_add_epi16(pixelVectorAG, _mm_srli_epi16(pixelVectorAG, 8)); pixelVectorAG = _mm_add_epi16(pixelVectorAG, half); pixelVectorRB = _mm_srli_epi16(pixelVectorRB, 8); pixelVectorAG = _mm_andnot_si128(colorMask, pixelVectorAG); destMultipliedByOneMinusAlpha = _mm_or_si128(pixelVectorAG, pixelVectorRB); }; const __m128i result = _mm_add_epi8(srcVector, destMultipliedByOneMinusAlpha); _mm_store_si128((__m128i *)&dst[x], result); }never executed: end of block srcVectorPrevLoaded = srcVectorLastLoaded; }never executed: end of block break;never executed: break; casenever executed: case 12: 12:never executed: case 12: for (; x-minusOffsetToAlignSrcOn16Bytes < w-7TRUE | never evaluated | FALSE | never evaluated |
; x += 4) { const __m128i srcVectorLastLoaded = _mm_load_si128((const __m128i *)&src[x - minusOffsetToAlignSrcOn16Bytes + 4]); const __m128i srcVector = ((__m128i) __builtin_ia32_palignr128 ((__v2di)(__m128i)(srcVectorLastLoaded), (__v2di)(__m128i)(srcVectorPrevLoaded), (int)(12) * 8)); const __m128i srcVectorAlpha = _mm_and_si128(srcVector, alphaMask); if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, alphaMask)) == 0xffffTRUE | never evaluated | FALSE | never evaluated |
) { _mm_store_si128((__m128i *)&dst[x], srcVector); }never executed: end of block else if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, nullVector)) != 0xffffTRUE | never evaluated | FALSE | never evaluated |
) { __m128i alphaChannel = _mm_shuffle_epi8(srcVector, alphaShuffleMask); alphaChannel = _mm_sub_epi16(one, alphaChannel); const __m128i dstVector = _mm_load_si128((__m128i *)&dst[x]); __m128i destMultipliedByOneMinusAlpha; { __m128i pixelVectorAG = _mm_srli_epi16(dstVector, 8); __m128i pixelVectorRB = _mm_and_si128(dstVector, colorMask); pixelVectorAG = _mm_mullo_epi16(pixelVectorAG, alphaChannel); pixelVectorRB = _mm_mullo_epi16(pixelVectorRB, alphaChannel); pixelVectorRB = _mm_add_epi16(pixelVectorRB, _mm_srli_epi16(pixelVectorRB, 8)); pixelVectorRB = _mm_add_epi16(pixelVectorRB, half); pixelVectorAG = _mm_add_epi16(pixelVectorAG, _mm_srli_epi16(pixelVectorAG, 8)); pixelVectorAG = _mm_add_epi16(pixelVectorAG, half); pixelVectorRB = _mm_srli_epi16(pixelVectorRB, 8); pixelVectorAG = _mm_andnot_si128(colorMask, pixelVectorAG); destMultipliedByOneMinusAlpha = _mm_or_si128(pixelVectorAG, pixelVectorRB); }; const __m128i result = _mm_add_epi8(srcVector, destMultipliedByOneMinusAlpha); _mm_store_si128((__m128i *)&dst[x], result); }never executed: end of block srcVectorPrevLoaded = srcVectorLastLoaded; }never executed: end of block break;never executed: break; } }never executed: end of block for (; x < wTRUE | never evaluated | FALSE | never evaluated |
; ++x) blend_pixel(dst[x], src[x]);never executed: blend_pixel(dst[x], src[x]); }; | 0 |
30 | dst = (quint32 *)(((uchar *) dst) + dbpl); | - |
31 | src = (const quint32 *)(((const uchar *) src) + sbpl); | - |
32 | } never executed: end of block | 0 |
33 | } never executed: end of block else if (const_alpha != 0TRUE | never evaluated | FALSE | never evaluated |
) { | 0 |
34 | | - |
35 | | - |
36 | | - |
37 | const_alpha = (const_alpha * 255) >> 8; | - |
38 | const __m128i nullVector = _mm_setzero_si128(); | - |
39 | const __m128i half = _mm_set1_epi16(0x80); | - |
40 | const __m128i one = _mm_set1_epi16(0xff); | - |
41 | const __m128i colorMask = _mm_set1_epi32(0x00ff00ff); | - |
42 | const __m128i constAlphaVector = _mm_set1_epi16(const_alpha); | - |
43 | for (int y = 0; y < hTRUE | never evaluated | FALSE | never evaluated |
; ++y) { | 0 |
44 | { int x = 0; for (; x < static_cast<int>(qMin(static_cast<quintptr>(w), ((4 - ((reinterpret_cast<quintptr>(dst) >> 2) & 0x3)) & 0x3)))TRUE | never evaluated | FALSE | never evaluated |
; ++x) { quint32 s = src[x]; if (s != 0TRUE | never evaluated | FALSE | never evaluated |
) { s = BYTE_MUL(s, const_alpha); dst[x] = s + BYTE_MUL(dst[x], qAlpha(~s)); }never executed: end of block }never executed: end of block for (; x < w-3TRUE | never evaluated | FALSE | never evaluated |
; x += 4) { __m128i srcVector = _mm_loadu_si128((const __m128i *)&src[x]); if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVector, nullVector)) != 0xffffTRUE | never evaluated | FALSE | never evaluated |
) { { __m128i pixelVectorAG = _mm_srli_epi16(srcVector, 8); __m128i pixelVectorRB = _mm_and_si128(srcVector, colorMask); pixelVectorAG = _mm_mullo_epi16(pixelVectorAG, constAlphaVector); pixelVectorRB = _mm_mullo_epi16(pixelVectorRB, constAlphaVector); pixelVectorRB = _mm_add_epi16(pixelVectorRB, _mm_srli_epi16(pixelVectorRB, 8)); pixelVectorRB = _mm_add_epi16(pixelVectorRB, half); pixelVectorAG = _mm_add_epi16(pixelVectorAG, _mm_srli_epi16(pixelVectorAG, 8)); pixelVectorAG = _mm_add_epi16(pixelVectorAG, half); pixelVectorRB = _mm_srli_epi16(pixelVectorRB, 8); pixelVectorAG = _mm_andnot_si128(colorMask, pixelVectorAG); srcVector = _mm_or_si128(pixelVectorAG, pixelVectorRB); }; __m128i alphaChannel = _mm_srli_epi32(srcVector, 24); alphaChannel = _mm_or_si128(alphaChannel, _mm_slli_epi32(alphaChannel, 16)); alphaChannel = _mm_sub_epi16(one, alphaChannel); const __m128i dstVector = _mm_load_si128((__m128i *)&dst[x]); __m128i destMultipliedByOneMinusAlpha; { __m128i pixelVectorAG = _mm_srli_epi16(dstVector, 8); __m128i pixelVectorRB = _mm_and_si128(dstVector, colorMask); pixelVectorAG = _mm_mullo_epi16(pixelVectorAG, alphaChannel); pixelVectorRB = _mm_mullo_epi16(pixelVectorRB, alphaChannel); pixelVectorRB = _mm_add_epi16(pixelVectorRB, _mm_srli_epi16(pixelVectorRB, 8)); pixelVectorRB = _mm_add_epi16(pixelVectorRB, half); pixelVectorAG = _mm_add_epi16(pixelVectorAG, _mm_srli_epi16(pixelVectorAG, 8)); pixelVectorAG = _mm_add_epi16(pixelVectorAG, half); pixelVectorRB = _mm_srli_epi16(pixelVectorRB, 8); pixelVectorAG = _mm_andnot_si128(colorMask, pixelVectorAG); destMultipliedByOneMinusAlpha = _mm_or_si128(pixelVectorAG, pixelVectorRB); }; const __m128i result = _mm_add_epi8(srcVector, destMultipliedByOneMinusAlpha); _mm_store_si128((__m128i *)&dst[x], result); }never executed: end of block }never executed: end of block for (; x < wTRUE | never evaluated | FALSE | never evaluated |
; ++x) { quint32 s = src[x]; if (s != 0TRUE | never evaluated | FALSE | never evaluated |
) { s = BYTE_MUL(s, const_alpha); dst[x] = s + BYTE_MUL(dst[x], qAlpha(~s)); }never executed: end of block }never executed: end of block } | 0 |
45 | dst = (quint32 *)(((uchar *) dst) + dbpl); | - |
46 | src = (const quint32 *)(((const uchar *) src) + sbpl); | - |
47 | } never executed: end of block | 0 |
48 | } never executed: end of block | 0 |
49 | } never executed: end of block | 0 |
50 | | - |
51 | | - |
| | |