qdrawhelper_ssse3.cpp

Absolute File Name:/home/qt/qt5_coco/qt5/qtbase/src/gui/painting/qdrawhelper_ssse3.cpp
Switch to Source codePreprocessed file
LineSourceCount
1-
2-
3-
4-
5-
6-
7inline static void blend_pixel(quint32 &dst, const quint32 src)-
8{-
9 if (src >= 0xff000000
src >= 0xff000000Description
TRUEnever evaluated
FALSEnever evaluated
)
0
10 dst = src;
never executed: dst = src;
0
11 else if (src != 0
src != 0Description
TRUEnever evaluated
FALSEnever evaluated
)
0
12 dst = src + BYTE_MUL(dst, qAlpha(~src));
never executed: dst = src + BYTE_MUL(dst, qAlpha(~src));
0
13}
never executed: end of block
0
14void qt_blend_argb32_on_argb32_ssse3(uchar *destPixels, int dbpl,-
15 const uchar *srcPixels, int sbpl,-
16 int w, int h,-
17 int const_alpha)-
18{-
19 const quint32 *src = (const quint32 *) srcPixels;-
20 quint32 *dst = (quint32 *) destPixels;-
21 if (const_alpha == 256
const_alpha == 256Description
TRUEnever evaluated
FALSEnever evaluated
) {
0
22 const __m128i alphaMask = _mm_set1_epi32(0xff000000);-
23 const __m128i nullVector = _mm_setzero_si128();-
24 const __m128i half = _mm_set1_epi16(0x80);-
25 const __m128i one = _mm_set1_epi16(0xff);-
26 const __m128i colorMask = _mm_set1_epi32(0x00ff00ff);-
27-
28 for (int y = 0; y < h
y < hDescription
TRUEnever evaluated
FALSEnever evaluated
; ++y) {
0
29 { int x = 0; for (; x < static_cast<int>(qMin(static_cast<quintptr>(w), ((4 - ((reinterpret_cast<quintptr>(dst) >> 2) & 0x3)) & 0x3)))
x < static_cas...0x3)) & 0x3)))Description
TRUEnever evaluated
FALSEnever evaluated
; ++x) { blend_pixel(dst[x], src[x]); }
never executed: end of block
const int minusOffsetToAlignSrcOn16Bytes = (reinterpret_cast<quintptr>(&(src[x])) >> 2) & 0x3; if (!minusOffsetToAlignSrcOn16Bytes
!minusOffsetTo...gnSrcOn16BytesDescription
TRUEnever evaluated
FALSEnever evaluated
) { const __m128i alphaShuffleMask = _mm_set_epi8(char(0xff),15,char(0xff),15,char(0xff),11,char(0xff),11,char(0xff),7,char(0xff),7,char(0xff),3,char(0xff),3); for (; x < w-3
x < w-3Description
TRUEnever evaluated
FALSEnever evaluated
; x += 4) { const __m128i srcVector = _mm_load_si128((const __m128i *)&src[x]); const __m128i srcVectorAlpha = _mm_and_si128(srcVector, alphaMask); if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, alphaMask)) == 0xffff
_mm_movemask_e...sk)) == 0xffffDescription
TRUEnever evaluated
FALSEnever evaluated
) { _mm_store_si128((__m128i *)&dst[x], srcVector); }
never executed: end of block
else if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, nullVector)) != 0xffff
_mm_movemask_e...or)) != 0xffffDescription
TRUEnever evaluated
FALSEnever evaluated
) { __m128i alphaChannel = _mm_shuffle_epi8(srcVector, alphaShuffleMask); alphaChannel = _mm_sub_epi16(one, alphaChannel); const __m128i dstVector = _mm_load_si128((__m128i *)&dst[x]); __m128i destMultipliedByOneMinusAlpha; { __m128i pixelVectorAG = _mm_srli_epi16(dstVector, 8); __m128i pixelVectorRB = _mm_and_si128(dstVector, colorMask); pixelVectorAG = _mm_mullo_epi16(pixelVectorAG, alphaChannel); pixelVectorRB = _mm_mullo_epi16(pixelVectorRB, alphaChannel); pixelVectorRB = _mm_add_epi16(pixelVectorRB, _mm_srli_epi16(pixelVectorRB, 8)); pixelVectorRB = _mm_add_epi16(pixelVectorRB, half); pixelVectorAG = _mm_add_epi16(pixelVectorAG, _mm_srli_epi16(pixelVectorAG, 8)); pixelVectorAG = _mm_add_epi16(pixelVectorAG, half); pixelVectorRB = _mm_srli_epi16(pixelVectorRB, 8); pixelVectorAG = _mm_andnot_si128(colorMask, pixelVectorAG); destMultipliedByOneMinusAlpha = _mm_or_si128(pixelVectorAG, pixelVectorRB); }; const __m128i result = _mm_add_epi8(srcVector, destMultipliedByOneMinusAlpha); _mm_store_si128((__m128i *)&dst[x], result); }
never executed: end of block
}
never executed: end of block
}
never executed: end of block
else if ((
(w - x) >= 8Description
TRUEnever evaluated
FALSEnever evaluated
w - x) >= 8
(w - x) >= 8Description
TRUEnever evaluated
FALSEnever evaluated
) { __m128i srcVectorPrevLoaded = _mm_load_si128((const __m128i *)&src[x - minusOffsetToAlignSrcOn16Bytes]); const int palignrOffset = minusOffsetToAlignSrcOn16Bytes << 2; const __m128i alphaShuffleMask = _mm_set_epi8(char(0xff),15,char(0xff),15,char(0xff),11,char(0xff),11,char(0xff),7,char(0xff),7,char(0xff),3,char(0xff),3); switch (palignrOffset) { case
never executed: case 4:
4:
never executed: case 4:
for (; x-minusOffsetToAlignSrcOn16Bytes < w-7
x-minusOffsetT...n16Bytes < w-7Description
TRUEnever evaluated
FALSEnever evaluated
; x += 4) { const __m128i srcVectorLastLoaded = _mm_load_si128((const __m128i *)&src[x - minusOffsetToAlignSrcOn16Bytes + 4]); const __m128i srcVector = ((__m128i) __builtin_ia32_palignr128 ((__v2di)(__m128i)(srcVectorLastLoaded), (__v2di)(__m128i)(srcVectorPrevLoaded), (int)(4) * 8)); const __m128i srcVectorAlpha = _mm_and_si128(srcVector, alphaMask); if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, alphaMask)) == 0xffff
_mm_movemask_e...sk)) == 0xffffDescription
TRUEnever evaluated
FALSEnever evaluated
) { _mm_store_si128((__m128i *)&dst[x], srcVector); }
never executed: end of block
else if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, nullVector)) != 0xffff
_mm_movemask_e...or)) != 0xffffDescription
TRUEnever evaluated
FALSEnever evaluated
) { __m128i alphaChannel = _mm_shuffle_epi8(srcVector, alphaShuffleMask); alphaChannel = _mm_sub_epi16(one, alphaChannel); const __m128i dstVector = _mm_load_si128((__m128i *)&dst[x]); __m128i destMultipliedByOneMinusAlpha; { __m128i pixelVectorAG = _mm_srli_epi16(dstVector, 8); __m128i pixelVectorRB = _mm_and_si128(dstVector, colorMask); pixelVectorAG = _mm_mullo_epi16(pixelVectorAG, alphaChannel); pixelVectorRB = _mm_mullo_epi16(pixelVectorRB, alphaChannel); pixelVectorRB = _mm_add_epi16(pixelVectorRB, _mm_srli_epi16(pixelVectorRB, 8)); pixelVectorRB = _mm_add_epi16(pixelVectorRB, half); pixelVectorAG = _mm_add_epi16(pixelVectorAG, _mm_srli_epi16(pixelVectorAG, 8)); pixelVectorAG = _mm_add_epi16(pixelVectorAG, half); pixelVectorRB = _mm_srli_epi16(pixelVectorRB, 8); pixelVectorAG = _mm_andnot_si128(colorMask, pixelVectorAG); destMultipliedByOneMinusAlpha = _mm_or_si128(pixelVectorAG, pixelVectorRB); }; const __m128i result = _mm_add_epi8(srcVector, destMultipliedByOneMinusAlpha); _mm_store_si128((__m128i *)&dst[x], result); }
never executed: end of block
srcVectorPrevLoaded = srcVectorLastLoaded; }
never executed: end of block
break;
never executed: break;
case
never executed: case 8:
8:
never executed: case 8:
for (; x-minusOffsetToAlignSrcOn16Bytes < w-7
x-minusOffsetT...n16Bytes < w-7Description
TRUEnever evaluated
FALSEnever evaluated
; x += 4) { const __m128i srcVectorLastLoaded = _mm_load_si128((const __m128i *)&src[x - minusOffsetToAlignSrcOn16Bytes + 4]); const __m128i srcVector = ((__m128i) __builtin_ia32_palignr128 ((__v2di)(__m128i)(srcVectorLastLoaded), (__v2di)(__m128i)(srcVectorPrevLoaded), (int)(8) * 8)); const __m128i srcVectorAlpha = _mm_and_si128(srcVector, alphaMask); if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, alphaMask)) == 0xffff
_mm_movemask_e...sk)) == 0xffffDescription
TRUEnever evaluated
FALSEnever evaluated
) { _mm_store_si128((__m128i *)&dst[x], srcVector); }
never executed: end of block
else if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, nullVector)) != 0xffff
_mm_movemask_e...or)) != 0xffffDescription
TRUEnever evaluated
FALSEnever evaluated
) { __m128i alphaChannel = _mm_shuffle_epi8(srcVector, alphaShuffleMask); alphaChannel = _mm_sub_epi16(one, alphaChannel); const __m128i dstVector = _mm_load_si128((__m128i *)&dst[x]); __m128i destMultipliedByOneMinusAlpha; { __m128i pixelVectorAG = _mm_srli_epi16(dstVector, 8); __m128i pixelVectorRB = _mm_and_si128(dstVector, colorMask); pixelVectorAG = _mm_mullo_epi16(pixelVectorAG, alphaChannel); pixelVectorRB = _mm_mullo_epi16(pixelVectorRB, alphaChannel); pixelVectorRB = _mm_add_epi16(pixelVectorRB, _mm_srli_epi16(pixelVectorRB, 8)); pixelVectorRB = _mm_add_epi16(pixelVectorRB, half); pixelVectorAG = _mm_add_epi16(pixelVectorAG, _mm_srli_epi16(pixelVectorAG, 8)); pixelVectorAG = _mm_add_epi16(pixelVectorAG, half); pixelVectorRB = _mm_srli_epi16(pixelVectorRB, 8); pixelVectorAG = _mm_andnot_si128(colorMask, pixelVectorAG); destMultipliedByOneMinusAlpha = _mm_or_si128(pixelVectorAG, pixelVectorRB); }; const __m128i result = _mm_add_epi8(srcVector, destMultipliedByOneMinusAlpha); _mm_store_si128((__m128i *)&dst[x], result); }
never executed: end of block
srcVectorPrevLoaded = srcVectorLastLoaded; }
never executed: end of block
break;
never executed: break;
case
never executed: case 12:
12:
never executed: case 12:
for (; x-minusOffsetToAlignSrcOn16Bytes < w-7
x-minusOffsetT...n16Bytes < w-7Description
TRUEnever evaluated
FALSEnever evaluated
; x += 4) { const __m128i srcVectorLastLoaded = _mm_load_si128((const __m128i *)&src[x - minusOffsetToAlignSrcOn16Bytes + 4]); const __m128i srcVector = ((__m128i) __builtin_ia32_palignr128 ((__v2di)(__m128i)(srcVectorLastLoaded), (__v2di)(__m128i)(srcVectorPrevLoaded), (int)(12) * 8)); const __m128i srcVectorAlpha = _mm_and_si128(srcVector, alphaMask); if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, alphaMask)) == 0xffff
_mm_movemask_e...sk)) == 0xffffDescription
TRUEnever evaluated
FALSEnever evaluated
) { _mm_store_si128((__m128i *)&dst[x], srcVector); }
never executed: end of block
else if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, nullVector)) != 0xffff
_mm_movemask_e...or)) != 0xffffDescription
TRUEnever evaluated
FALSEnever evaluated
) { __m128i alphaChannel = _mm_shuffle_epi8(srcVector, alphaShuffleMask); alphaChannel = _mm_sub_epi16(one, alphaChannel); const __m128i dstVector = _mm_load_si128((__m128i *)&dst[x]); __m128i destMultipliedByOneMinusAlpha; { __m128i pixelVectorAG = _mm_srli_epi16(dstVector, 8); __m128i pixelVectorRB = _mm_and_si128(dstVector, colorMask); pixelVectorAG = _mm_mullo_epi16(pixelVectorAG, alphaChannel); pixelVectorRB = _mm_mullo_epi16(pixelVectorRB, alphaChannel); pixelVectorRB = _mm_add_epi16(pixelVectorRB, _mm_srli_epi16(pixelVectorRB, 8)); pixelVectorRB = _mm_add_epi16(pixelVectorRB, half); pixelVectorAG = _mm_add_epi16(pixelVectorAG, _mm_srli_epi16(pixelVectorAG, 8)); pixelVectorAG = _mm_add_epi16(pixelVectorAG, half); pixelVectorRB = _mm_srli_epi16(pixelVectorRB, 8); pixelVectorAG = _mm_andnot_si128(colorMask, pixelVectorAG); destMultipliedByOneMinusAlpha = _mm_or_si128(pixelVectorAG, pixelVectorRB); }; const __m128i result = _mm_add_epi8(srcVector, destMultipliedByOneMinusAlpha); _mm_store_si128((__m128i *)&dst[x], result); }
never executed: end of block
srcVectorPrevLoaded = srcVectorLastLoaded; }
never executed: end of block
break;
never executed: break;
} }
never executed: end of block
for (; x < w
x < wDescription
TRUEnever evaluated
FALSEnever evaluated
; ++x) blend_pixel(dst[x], src[x]);
never executed: blend_pixel(dst[x], src[x]);
};
0
30 dst = (quint32 *)(((uchar *) dst) + dbpl);-
31 src = (const quint32 *)(((const uchar *) src) + sbpl);-
32 }
never executed: end of block
0
33 }
never executed: end of block
else if (const_alpha != 0
const_alpha != 0Description
TRUEnever evaluated
FALSEnever evaluated
) {
0
34-
35-
36-
37 const_alpha = (const_alpha * 255) >> 8;-
38 const __m128i nullVector = _mm_setzero_si128();-
39 const __m128i half = _mm_set1_epi16(0x80);-
40 const __m128i one = _mm_set1_epi16(0xff);-
41 const __m128i colorMask = _mm_set1_epi32(0x00ff00ff);-
42 const __m128i constAlphaVector = _mm_set1_epi16(const_alpha);-
43 for (int y = 0; y < h
y < hDescription
TRUEnever evaluated
FALSEnever evaluated
; ++y) {
0
44 { int x = 0; for (; x < static_cast<int>(qMin(static_cast<quintptr>(w), ((4 - ((reinterpret_cast<quintptr>(dst) >> 2) & 0x3)) & 0x3)))
x < static_cas...0x3)) & 0x3)))Description
TRUEnever evaluated
FALSEnever evaluated
; ++x) { quint32 s = src[x]; if (s != 0
s != 0Description
TRUEnever evaluated
FALSEnever evaluated
) { s = BYTE_MUL(s, const_alpha); dst[x] = s + BYTE_MUL(dst[x], qAlpha(~s)); }
never executed: end of block
}
never executed: end of block
for (; x < w-3
x < w-3Description
TRUEnever evaluated
FALSEnever evaluated
; x += 4) { __m128i srcVector = _mm_loadu_si128((const __m128i *)&src[x]); if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVector, nullVector)) != 0xffff
_mm_movemask_e...or)) != 0xffffDescription
TRUEnever evaluated
FALSEnever evaluated
) { { __m128i pixelVectorAG = _mm_srli_epi16(srcVector, 8); __m128i pixelVectorRB = _mm_and_si128(srcVector, colorMask); pixelVectorAG = _mm_mullo_epi16(pixelVectorAG, constAlphaVector); pixelVectorRB = _mm_mullo_epi16(pixelVectorRB, constAlphaVector); pixelVectorRB = _mm_add_epi16(pixelVectorRB, _mm_srli_epi16(pixelVectorRB, 8)); pixelVectorRB = _mm_add_epi16(pixelVectorRB, half); pixelVectorAG = _mm_add_epi16(pixelVectorAG, _mm_srli_epi16(pixelVectorAG, 8)); pixelVectorAG = _mm_add_epi16(pixelVectorAG, half); pixelVectorRB = _mm_srli_epi16(pixelVectorRB, 8); pixelVectorAG = _mm_andnot_si128(colorMask, pixelVectorAG); srcVector = _mm_or_si128(pixelVectorAG, pixelVectorRB); }; __m128i alphaChannel = _mm_srli_epi32(srcVector, 24); alphaChannel = _mm_or_si128(alphaChannel, _mm_slli_epi32(alphaChannel, 16)); alphaChannel = _mm_sub_epi16(one, alphaChannel); const __m128i dstVector = _mm_load_si128((__m128i *)&dst[x]); __m128i destMultipliedByOneMinusAlpha; { __m128i pixelVectorAG = _mm_srli_epi16(dstVector, 8); __m128i pixelVectorRB = _mm_and_si128(dstVector, colorMask); pixelVectorAG = _mm_mullo_epi16(pixelVectorAG, alphaChannel); pixelVectorRB = _mm_mullo_epi16(pixelVectorRB, alphaChannel); pixelVectorRB = _mm_add_epi16(pixelVectorRB, _mm_srli_epi16(pixelVectorRB, 8)); pixelVectorRB = _mm_add_epi16(pixelVectorRB, half); pixelVectorAG = _mm_add_epi16(pixelVectorAG, _mm_srli_epi16(pixelVectorAG, 8)); pixelVectorAG = _mm_add_epi16(pixelVectorAG, half); pixelVectorRB = _mm_srli_epi16(pixelVectorRB, 8); pixelVectorAG = _mm_andnot_si128(colorMask, pixelVectorAG); destMultipliedByOneMinusAlpha = _mm_or_si128(pixelVectorAG, pixelVectorRB); }; const __m128i result = _mm_add_epi8(srcVector, destMultipliedByOneMinusAlpha); _mm_store_si128((__m128i *)&dst[x], result); }
never executed: end of block
}
never executed: end of block
for (; x < w
x < wDescription
TRUEnever evaluated
FALSEnever evaluated
; ++x) { quint32 s = src[x]; if (s != 0
s != 0Description
TRUEnever evaluated
FALSEnever evaluated
) { s = BYTE_MUL(s, const_alpha); dst[x] = s + BYTE_MUL(dst[x], qAlpha(~s)); }
never executed: end of block
}
never executed: end of block
}
0
45 dst = (quint32 *)(((uchar *) dst) + dbpl);-
46 src = (const quint32 *)(((const uchar *) src) + sbpl);-
47 }
never executed: end of block
0
48 }
never executed: end of block
0
49}
never executed: end of block
0
50-
51-
Switch to Source codePreprocessed file

Generated by Squish Coco Non-Commercial 4.3.0-BETA-master-30-08-2018-4cb69e9