| Line | Source Code | Coverage |
|---|
| 1 | | - |
| 2 | | - |
| 3 | | - |
| 4 | | - |
| 5 | | - |
| 6 | | - |
| 7 | | - |
| 8 | | - |
| 9 | void qt_blend_argb32_on_argb32_sse2(uchar *destPixels, int dbpl, | - |
| 10 | const uchar *srcPixels, int sbpl, | - |
| 11 | int w, int h, | - |
| 12 | int const_alpha) | - |
| 13 | { | - |
| 14 | const quint32 *src = (const quint32 *) srcPixels; | - |
| 15 | quint32 *dst = (quint32 *) destPixels; | - |
| 16 | if (const_alpha == 256) { never evaluated: const_alpha == 256 | 0 |
| 17 | const __m128i alphaMask = _mm_set1_epi32(0xff000000); | - |
| 18 | const __m128i nullVector = _mm_set1_epi32(0); | - |
| 19 | const __m128i half = _mm_set1_epi16(0x80); | - |
| 20 | const __m128i one = _mm_set1_epi16(0xff); | - |
| 21 | const __m128i colorMask = _mm_set1_epi32(0x00ff00ff); | - |
| 22 | for (int y = 0; y < h; ++y) { | 0 |
| 23 | { int x = 0; for (; x < static_cast<int>(qMin(static_cast<quintptr>(w), ((4 - ((reinterpret_cast<quintptr>(dst) >> 2) & 0x3)) & 0x3))); ++x) { uint s = src[x]; if (s >= 0xff000000) dst[x] = s; else if (s != 0) dst[x] = s + BYTE_MUL(dst[x], qAlpha(~s)); } for (; x < w-3; x += 4) { const __m128i srcVector = _mm_loadu_si128((__m128i *)&src[x]); { const __m128i srcVectorAlpha = _mm_and_si128(srcVector, alphaMask); if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, alphaMask)) == 0xffff) { _mm_store_si128((__m128i *)&dst[x], srcVector); } else if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, nullVector)) != 0xffff) { __m128i alphaChannel = _mm_srli_epi32(srcVector, 24); alphaChannel = _mm_or_si128(alphaChannel, _mm_slli_epi32(alphaChannel, 16)); alphaChannel = _mm_sub_epi16(one, alphaChannel); const __m128i dstVector = _mm_load_si128((__m128i *)&dst[x]); __m128i destMultipliedByOneMinusAlpha; { __m128i pixelVectorAG = _mm_srli_epi16(dstVector, 8); __m128i pixelVectorRB = _mm_and_si128(dstVector, colorMask); pixelVectorAG = _mm_mullo_epi16(pixelVectorAG, alphaChannel); pixelVectorRB = _mm_mullo_epi16(pixelVectorRB, alphaChannel); pixelVectorRB = _mm_add_epi16(pixelVectorRB, _mm_srli_epi16(pixelVectorRB, 8)); pixelVectorRB = _mm_add_epi16(pixelVectorRB, half); pixelVectorAG = _mm_add_epi16(pixelVectorAG, _mm_srli_epi16(pixelVectorAG, 8)); pixelVectorAG = _mm_add_epi16(pixelVectorAG, half); pixelVectorRB = _mm_srli_epi16(pixelVectorRB, 8); pixelVectorAG = _mm_andnot_si128(colorMask, pixelVectorAG); destMultipliedByOneMinusAlpha = _mm_or_si128(pixelVectorAG, pixelVectorRB); }; const __m128i result = _mm_add_epi8(srcVector, destMultipliedByOneMinusAlpha); _mm_store_si128((__m128i *)&dst[x], result); } } } for (; x < w; ++x) { uint s = src[x]; if (s >= 0xff000000) dst[x] = s; else if (s != 0) dst[x] = s + BYTE_MUL(dst[x], qAlpha(~s)); } }; never executed: dst[x] = s; never executed: dst[x] = s + BYTE_MUL(dst[x], qAlpha(~s)); never evaluated: _mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, nullVector)) != 0xffff never evaluated: s >= 0xff000000 never evaluated: _mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, alphaMask)) == 0xffff never evaluated: s >= 0xff000000 never evaluated: x < static_cast<int>(qMin(static_cast<quintptr>(w), ((4 - ((reinterpret_cast<quintptr>(dst) >> 2) & 0x3)) & 0x3))) never executed: dst[x] = s; never executed: dst[x] = s + BYTE_MUL(dst[x], qAlpha(~s)); | 0 |
| 24 | dst = (quint32 *)(((uchar *) dst) + dbpl); | - |
| 25 | src = (const quint32 *)(((const uchar *) src) + sbpl); | - |
| 26 | } | 0 |
| 27 | } else if (const_alpha != 0) { never evaluated: const_alpha != 0 | 0 |
| 28 | | - |
| 29 | | - |
| 30 | | - |
| 31 | const_alpha = (const_alpha * 255) >> 8; | - |
| 32 | const __m128i nullVector = _mm_set1_epi32(0); | - |
| 33 | const __m128i half = _mm_set1_epi16(0x80); | - |
| 34 | const __m128i one = _mm_set1_epi16(0xff); | - |
| 35 | const __m128i colorMask = _mm_set1_epi32(0x00ff00ff); | - |
| 36 | const __m128i constAlphaVector = _mm_set1_epi16(const_alpha); | - |
| 37 | for (int y = 0; y < h; ++y) { | 0 |
| 38 | { int x = 0; for (; x < static_cast<int>(qMin(static_cast<quintptr>(w), ((4 - ((reinterpret_cast<quintptr>(dst) >> 2) & 0x3)) & 0x3))); ++x) { quint32 s = src[x]; if (s != 0) { s = BYTE_MUL(s, const_alpha); dst[x] = s + BYTE_MUL(dst[x], qAlpha(~s)); } } for (; x < w-3; x += 4) { __m128i srcVector = _mm_loadu_si128((__m128i *)&src[x]); if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVector, nullVector)) != 0xffff) { { __m128i pixelVectorAG = _mm_srli_epi16(srcVector, 8); __m128i pixelVectorRB = _mm_and_si128(srcVector, colorMask); pixelVectorAG = _mm_mullo_epi16(pixelVectorAG, constAlphaVector); pixelVectorRB = _mm_mullo_epi16(pixelVectorRB, constAlphaVector); pixelVectorRB = _mm_add_epi16(pixelVectorRB, _mm_srli_epi16(pixelVectorRB, 8)); pixelVectorRB = _mm_add_epi16(pixelVectorRB, half); pixelVectorAG = _mm_add_epi16(pixelVectorAG, _mm_srli_epi16(pixelVectorAG, 8)); pixelVectorAG = _mm_add_epi16(pixelVectorAG, half); pixelVectorRB = _mm_srli_epi16(pixelVectorRB, 8); pixelVectorAG = _mm_andnot_si128(colorMask, pixelVectorAG); srcVector = _mm_or_si128(pixelVectorAG, pixelVectorRB); }; __m128i alphaChannel = _mm_srli_epi32(srcVector, 24); alphaChannel = _mm_or_si128(alphaChannel, _mm_slli_epi32(alphaChannel, 16)); alphaChannel = _mm_sub_epi16(one, alphaChannel); const __m128i dstVector = _mm_load_si128((__m128i *)&dst[x]); __m128i destMultipliedByOneMinusAlpha; { __m128i pixelVectorAG = _mm_srli_epi16(dstVector, 8); __m128i pixelVectorRB = _mm_and_si128(dstVector, colorMask); pixelVectorAG = _mm_mullo_epi16(pixelVectorAG, alphaChannel); pixelVectorRB = _mm_mullo_epi16(pixelVectorRB, alphaChannel); pixelVectorRB = _mm_add_epi16(pixelVectorRB, _mm_srli_epi16(pixelVectorRB, 8)); pixelVectorRB = _mm_add_epi16(pixelVectorRB, half); pixelVectorAG = _mm_add_epi16(pixelVectorAG, _mm_srli_epi16(pixelVectorAG, 8)); pixelVectorAG = _mm_add_epi16(pixelVectorAG, half); pixelVectorRB = _mm_srli_epi16(pixelVectorRB, 8); pixelVectorAG = _mm_andnot_si128(colorMask, pixelVectorAG); destMultipliedByOneMinusAlpha = _mm_or_si128(pixelVectorAG, pixelVectorRB); }; const __m128i result = _mm_add_epi8(srcVector, destMultipliedByOneMinusAlpha); _mm_store_si128((__m128i *)&dst[x], result); } } for (; x < w; ++x) { quint32 s = src[x]; if (s != 0) { s = BYTE_MUL(s, const_alpha); dst[x] = s + BYTE_MUL(dst[x], qAlpha(~s)); } } } never evaluated: _mm_movemask_epi8(_mm_cmpeq_epi32(srcVector, nullVector)) != 0xffff never evaluated: x < static_cast<int>(qMin(static_cast<quintptr>(w), ((4 - ((reinterpret_cast<quintptr>(dst) >> 2) & 0x3)) & 0x3))) | 0 |
| 39 | dst = (quint32 *)(((uchar *) dst) + dbpl); | - |
| 40 | src = (const quint32 *)(((const uchar *) src) + sbpl); | - |
| 41 | } | 0 |
| 42 | } | 0 |
| 43 | } | - |
| 44 | | - |
| 45 | | - |
| 46 | | - |
| 47 | void qt_blend_rgb32_on_rgb32(uchar *destPixels, int dbpl, | - |
| 48 | const uchar *srcPixels, int sbpl, | - |
| 49 | int w, int h, | - |
| 50 | int const_alpha); | - |
| 51 | | - |
| 52 | void qt_blend_rgb32_on_rgb32_sse2(uchar *destPixels, int dbpl, | - |
| 53 | const uchar *srcPixels, int sbpl, | - |
| 54 | int w, int h, | - |
| 55 | int const_alpha) | - |
| 56 | { | - |
| 57 | const quint32 *src = (const quint32 *) srcPixels; | - |
| 58 | quint32 *dst = (quint32 *) destPixels; | - |
| 59 | if (const_alpha != 256) { evaluated: const_alpha != 256| yes Evaluation Count:9 | yes Evaluation Count:40 |
| 9-40 |
| 60 | if (const_alpha != 0) { partially evaluated: const_alpha != 0| yes Evaluation Count:9 | no Evaluation Count:0 |
| 0-9 |
| 61 | const __m128i nullVector = _mm_set1_epi32(0); | - |
| 62 | const __m128i half = _mm_set1_epi16(0x80); | - |
| 63 | const __m128i colorMask = _mm_set1_epi32(0x00ff00ff); | - |
| 64 | | - |
| 65 | const_alpha = (const_alpha * 255) >> 8; | - |
| 66 | int one_minus_const_alpha = 255 - const_alpha; | - |
| 67 | const __m128i constAlphaVector = _mm_set1_epi16(const_alpha); | - |
| 68 | const __m128i oneMinusConstAlpha = _mm_set1_epi16(one_minus_const_alpha); | - |
| 69 | for (int y = 0; y < h; ++y) { evaluated: y < h| yes Evaluation Count:958 | yes Evaluation Count:9 |
| 9-958 |
| 70 | int x = 0; | - |
| 71 | | - |
| 72 | | - |
| 73 | for (; x < static_cast<int>(qMin(static_cast<quintptr>(w), ((4 - ((reinterpret_cast<quintptr>(dst) >> 2) & 0x3)) & 0x3))); ++x) { evaluated: x < static_cast<int>(qMin(static_cast<quintptr>(w), ((4 - ((reinterpret_cast<quintptr>(dst) >> 2) & 0x3)) & 0x3)))| yes Evaluation Count:2354 | yes Evaluation Count:958 |
| 958-2354 |
| 74 | dst[x] = INTERPOLATE_PIXEL_255(src[x], const_alpha, dst[x], one_minus_const_alpha); | - |
| 75 | } executed: }Execution Count:2354 | 2354 |
| 76 | | - |
| 77 | for (; x < w-3; x += 4) { evaluated: x < w-3| yes Evaluation Count:31868 | yes Evaluation Count:958 |
| 958-31868 |
| 78 | __m128i srcVector = _mm_loadu_si128((__m128i *)&src[x]); | - |
| 79 | if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVector, nullVector)) != 0xffff) { partially evaluated: _mm_movemask_epi8(_mm_cmpeq_epi32(srcVector, nullVector)) != 0xffff| yes Evaluation Count:31868 | no Evaluation Count:0 |
| 0-31868 |
| 80 | const __m128i dstVector = _mm_load_si128((__m128i *)&dst[x]); | - |
| 81 | __m128i result; | - |
| 82 | { __m128i srcVectorAG = _mm_srli_epi16(srcVector, 8); __m128i dstVectorAG = _mm_srli_epi16(dstVector, 8); __m128i srcVectorAGalpha = _mm_mullo_epi16(srcVectorAG, constAlphaVector); __m128i dstVectorAGoneMinusAlphalpha = _mm_mullo_epi16(dstVectorAG, oneMinusConstAlpha); __m128i finalAG = _mm_add_epi16(srcVectorAGalpha, dstVectorAGoneMinusAlphalpha); finalAG = _mm_add_epi16(finalAG, _mm_srli_epi16(finalAG, 8)); finalAG = _mm_add_epi16(finalAG, half); finalAG = _mm_andnot_si128(colorMask, finalAG); __m128i srcVectorRB = _mm_and_si128(srcVector, colorMask); __m128i dstVectorRB = _mm_and_si128(dstVector, colorMask); __m128i srcVectorRBalpha = _mm_mullo_epi16(srcVectorRB, constAlphaVector); __m128i dstVectorRBoneMinusAlphalpha = _mm_mullo_epi16(dstVectorRB, oneMinusConstAlpha); __m128i finalRB = _mm_add_epi16(srcVectorRBalpha, dstVectorRBoneMinusAlphalpha); finalRB = _mm_add_epi16(finalRB, _mm_srli_epi16(finalRB, 8)); finalRB = _mm_add_epi16(finalRB, half); finalRB = _mm_srli_epi16(finalRB, 8); result = _mm_or_si128(finalAG, finalRB); }; | - |
| 83 | _mm_store_si128((__m128i *)&dst[x], result); | - |
| 84 | } executed: }Execution Count:31868 | 31868 |
| 85 | } executed: }Execution Count:31868 | 31868 |
| 86 | for (; x<w; ++x) { evaluated: x<w| yes Evaluation Count:2354 | yes Evaluation Count:958 |
| 958-2354 |
| 87 | dst[x] = INTERPOLATE_PIXEL_255(src[x], const_alpha, dst[x], one_minus_const_alpha); | - |
| 88 | } executed: }Execution Count:2354 | 2354 |
| 89 | dst = (quint32 *)(((uchar *) dst) + dbpl); | - |
| 90 | src = (const quint32 *)(((const uchar *) src) + sbpl); | - |
| 91 | } executed: }Execution Count:958 | 958 |
| 92 | } executed: }Execution Count:9 | 9 |
| 93 | } else { executed: }Execution Count:9 | 9 |
| 94 | qt_blend_rgb32_on_rgb32(destPixels, dbpl, srcPixels, sbpl, w, h, const_alpha); | - |
| 95 | } executed: }Execution Count:40 | 40 |
| 96 | } | - |
| 97 | | - |
| 98 | void comp_func_SourceOver_sse2(uint *destPixels, const uint *srcPixels, int length, uint const_alpha) | - |
| 99 | { | - |
| 100 | qt_noop(); | - |
| 101 | | - |
| 102 | const quint32 *src = (const quint32 *) srcPixels; | - |
| 103 | quint32 *dst = (quint32 *) destPixels; | - |
| 104 | | - |
| 105 | const __m128i nullVector = _mm_set1_epi32(0); | - |
| 106 | const __m128i half = _mm_set1_epi16(0x80); | - |
| 107 | const __m128i one = _mm_set1_epi16(0xff); | - |
| 108 | const __m128i colorMask = _mm_set1_epi32(0x00ff00ff); | - |
| 109 | if (const_alpha == 255) { evaluated: const_alpha == 255| yes Evaluation Count:1136722 | yes Evaluation Count:63433 |
| 63433-1136722 |
| 110 | const __m128i alphaMask = _mm_set1_epi32(0xff000000); | - |
| 111 | { int x = 0; for (; x < static_cast<int>(qMin(static_cast<quintptr>(length), ((4 - ((reinterpret_cast<quintptr>(dst) >> 2) & 0x3)) & 0x3))); ++x) { uint s = src[x]; if (s >= 0xff000000) dst[x] = s; else if (s != 0) dst[x] = s + BYTE_MUL(dst[x], qAlpha(~s)); } for (; x < length-3; x += 4) { const __m128i srcVector = _mm_loadu_si128((__m128i *)&src[x]); { const __m128i srcVectorAlpha = _mm_and_si128(srcVector, alphaMask); if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, alphaMask)) == 0xffff) { _mm_store_si128((__m128i *)&dst[x], srcVector); } else if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, nullVector)) != 0xffff) { __m128i alphaChannel = _mm_srli_epi32(srcVector, 24); alphaChannel = _mm_or_si128(alphaChannel, _mm_slli_epi32(alphaChannel, 16)); alphaChannel = _mm_sub_epi16(one, alphaChannel); const __m128i dstVector = _mm_load_si128((__m128i *)&dst[x]); __m128i destMultipliedByOneMinusAlpha; { __m128i pixelVectorAG = _mm_srli_epi16(dstVector, 8); __m128i pixelVectorRB = _mm_and_si128(dstVector, colorMask); pixelVectorAG = _mm_mullo_epi16(pixelVectorAG, alphaChannel); pixelVectorRB = _mm_mullo_epi16(pixelVectorRB, alphaChannel); pixelVectorRB = _mm_add_epi16(pixelVectorRB, _mm_srli_epi16(pixelVectorRB, 8)); pixelVectorRB = _mm_add_epi16(pixelVectorRB, half); pixelVectorAG = _mm_add_epi16(pixelVectorAG, _mm_srli_epi16(pixelVectorAG, 8)); pixelVectorAG = _mm_add_epi16(pixelVectorAG, half); pixelVectorRB = _mm_srli_epi16(pixelVectorRB, 8); pixelVectorAG = _mm_andnot_si128(colorMask, pixelVectorAG); destMultipliedByOneMinusAlpha = _mm_or_si128(pixelVectorAG, pixelVectorRB); }; const __m128i result = _mm_add_epi8(srcVector, destMultipliedByOneMinusAlpha); _mm_store_si128((__m128i *)&dst[x], result); } } } for (; x < length; ++x) { uint s = src[x]; if (s >= 0xff000000) dst[x] = s; else if (s != 0) dst[x] = s + BYTE_MUL(dst[x], qAlpha(~s)); } }; evaluated: s != 0| yes Evaluation Count:56107 | yes Evaluation Count:11465 |
evaluated: _mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, nullVector)) != 0xffff| yes Evaluation Count:2381974 | yes Evaluation Count:128158 |
evaluated: s != 0| yes Evaluation Count:26316 | yes Evaluation Count:150873 |
evaluated: s >= 0xff000000| yes Evaluation Count:11856 | yes Evaluation Count:67572 |
evaluated: _mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, alphaMask)) == 0xffff| yes Evaluation Count:135056 | yes Evaluation Count:2510132 |
evaluated: s >= 0xff000000| yes Evaluation Count:157247 | yes Evaluation Count:177189 |
evaluated: x < static_cast<int>(qMin(static_cast<quintptr>(length), ((4 - ((reinterpret_cast<quintptr>(dst) >> 2) & 0x3)) & 0x3)))| yes Evaluation Count:79428 | yes Evaluation Count:1136722 |
evaluated: x < length-3| yes Evaluation Count:2645188 | yes Evaluation Count:1136722 |
evaluated: x < length| yes Evaluation Count:334436 | yes Evaluation Count:1136722 |
executed: dst[x] = s;Execution Count:11856 executed: dst[x] = s + BYTE_MUL(dst[x], qAlpha(~s));Execution Count:56107 executed: }Execution Count:135056 executed: }Execution Count:2381974 executed: dst[x] = s;Execution Count:157247 executed: dst[x] = s + BYTE_MUL(dst[x], qAlpha(~s));Execution Count:26316 | 11465-2645188 |
| 112 | } else { executed: }Execution Count:1136722 | 1136722 |
| 113 | const __m128i constAlphaVector = _mm_set1_epi16(const_alpha); | - |
| 114 | { int x = 0; for (; x < static_cast<int>(qMin(static_cast<quintptr>(length), ((4 - ((reinterpret_cast<quintptr>(dst) >> 2) & 0x3)) & 0x3))); ++x) { quint32 s = src[x]; if (s != 0) { s = BYTE_MUL(s, const_alpha); dst[x] = s + BYTE_MUL(dst[x], qAlpha(~s)); } } for (; x < length-3; x += 4) { __m128i srcVector = _mm_loadu_si128((__m128i *)&src[x]); if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVector, nullVector)) != 0xffff) { { __m128i pixelVectorAG = _mm_srli_epi16(srcVector, 8); __m128i pixelVectorRB = _mm_and_si128(srcVector, colorMask); pixelVectorAG = _mm_mullo_epi16(pixelVectorAG, constAlphaVector); pixelVectorRB = _mm_mullo_epi16(pixelVectorRB, constAlphaVector); pixelVectorRB = _mm_add_epi16(pixelVectorRB, _mm_srli_epi16(pixelVectorRB, 8)); pixelVectorRB = _mm_add_epi16(pixelVectorRB, half); pixelVectorAG = _mm_add_epi16(pixelVectorAG, _mm_srli_epi16(pixelVectorAG, 8)); pixelVectorAG = _mm_add_epi16(pixelVectorAG, half); pixelVectorRB = _mm_srli_epi16(pixelVectorRB, 8); pixelVectorAG = _mm_andnot_si128(colorMask, pixelVectorAG); srcVector = _mm_or_si128(pixelVectorAG, pixelVectorRB); }; __m128i alphaChannel = _mm_srli_epi32(srcVector, 24); alphaChannel = _mm_or_si128(alphaChannel, _mm_slli_epi32(alphaChannel, 16)); alphaChannel = _mm_sub_epi16(one, alphaChannel); const __m128i dstVector = _mm_load_si128((__m128i *)&dst[x]); __m128i destMultipliedByOneMinusAlpha; { __m128i pixelVectorAG = _mm_srli_epi16(dstVector, 8); __m128i pixelVectorRB = _mm_and_si128(dstVector, colorMask); pixelVectorAG = _mm_mullo_epi16(pixelVectorAG, alphaChannel); pixelVectorRB = _mm_mullo_epi16(pixelVectorRB, alphaChannel); pixelVectorRB = _mm_add_epi16(pixelVectorRB, _mm_srli_epi16(pixelVectorRB, 8)); pixelVectorRB = _mm_add_epi16(pixelVectorRB, half); pixelVectorAG = _mm_add_epi16(pixelVectorAG, _mm_srli_epi16(pixelVectorAG, 8)); pixelVectorAG = _mm_add_epi16(pixelVectorAG, half); pixelVectorRB = _mm_srli_epi16(pixelVectorRB, 8); pixelVectorAG = _mm_andnot_si128(colorMask, pixelVectorAG); destMultipliedByOneMinusAlpha = _mm_or_si128(pixelVectorAG, pixelVectorRB); }; const __m128i result = _mm_add_epi8(srcVector, destMultipliedByOneMinusAlpha); _mm_store_si128((__m128i *)&dst[x], result); } } for (; x < length; ++x) { quint32 s = src[x]; if (s != 0) { s = BYTE_MUL(s, const_alpha); dst[x] = s + BYTE_MUL(dst[x], qAlpha(~s)); } } }; evaluated: s != 0| yes Evaluation Count:39788 | yes Evaluation Count:916 |
partially evaluated: _mm_movemask_epi8(_mm_cmpeq_epi32(srcVector, nullVector)) != 0xffff| yes Evaluation Count:1170552 | no Evaluation Count:0 |
evaluated: s != 0| yes Evaluation Count:144150 | yes Evaluation Count:311 |
evaluated: x < static_cast<int>(qMin(static_cast<quintptr>(length), ((4 - ((reinterpret_cast<quintptr>(dst) >> 2) & 0x3)) & 0x3)))| yes Evaluation Count:40704 | yes Evaluation Count:63433 |
evaluated: x < length-3| yes Evaluation Count:1170552 | yes Evaluation Count:63433 |
evaluated: x < length| yes Evaluation Count:144461 | yes Evaluation Count:63433 |
executed: }Execution Count:39788 executed: }Execution Count:40704 executed: }Execution Count:1170552 executed: }Execution Count:1170552 executed: }Execution Count:144150 executed: }Execution Count:144461 | 0-1170552 |
| 115 | } executed: }Execution Count:63433 | 63433 |
| 116 | } | - |
| 117 | | - |
| 118 | void comp_func_Plus_sse2(uint *dst, const uint *src, int length, uint const_alpha) | - |
| 119 | { | - |
| 120 | int x = 0; | - |
| 121 | | - |
| 122 | if (const_alpha == 255) { never evaluated: const_alpha == 255 | 0 |
| 123 | | - |
| 124 | for (; x < static_cast<int>(qMin(static_cast<quintptr>(length), ((4 - ((reinterpret_cast<quintptr>(dst) >> 2) & 0x3)) & 0x3))); ++x) never evaluated: x < static_cast<int>(qMin(static_cast<quintptr>(length), ((4 - ((reinterpret_cast<quintptr>(dst) >> 2) & 0x3)) & 0x3))) | 0 |
| 125 | dst[x] = comp_func_Plus_one_pixel(dst[x], src[x]); never executed: dst[x] = comp_func_Plus_one_pixel(dst[x], src[x]); | 0 |
| 126 | | - |
| 127 | | - |
| 128 | for (; x < length - 3; x += 4) { never evaluated: x < length - 3 | 0 |
| 129 | const __m128i srcVector = _mm_loadu_si128((__m128i *)&src[x]); | - |
| 130 | const __m128i dstVector = _mm_load_si128((__m128i *)&dst[x]); | - |
| 131 | | - |
| 132 | const __m128i result = _mm_adds_epu8(srcVector, dstVector); | - |
| 133 | _mm_store_si128((__m128i *)&dst[x], result); | - |
| 134 | } | 0 |
| 135 | | - |
| 136 | | - |
| 137 | for (; x < length; ++x) never evaluated: x < length | 0 |
| 138 | dst[x] = comp_func_Plus_one_pixel(dst[x], src[x]); never executed: dst[x] = comp_func_Plus_one_pixel(dst[x], src[x]); | 0 |
| 139 | } else { | 0 |
| 140 | const int one_minus_const_alpha = 255 - const_alpha; | - |
| 141 | const __m128i constAlphaVector = _mm_set1_epi16(const_alpha); | - |
| 142 | const __m128i oneMinusConstAlpha = _mm_set1_epi16(one_minus_const_alpha); | - |
| 143 | | - |
| 144 | | - |
| 145 | for (; x < static_cast<int>(qMin(static_cast<quintptr>(length), ((4 - ((reinterpret_cast<quintptr>(dst) >> 2) & 0x3)) & 0x3))); ++x) never evaluated: x < static_cast<int>(qMin(static_cast<quintptr>(length), ((4 - ((reinterpret_cast<quintptr>(dst) >> 2) & 0x3)) & 0x3))) | 0 |
| 146 | dst[x] = comp_func_Plus_one_pixel_const_alpha(dst[x], src[x], const_alpha, one_minus_const_alpha); never executed: dst[x] = comp_func_Plus_one_pixel_const_alpha(dst[x], src[x], const_alpha, one_minus_const_alpha); | 0 |
| 147 | | - |
| 148 | const __m128i half = _mm_set1_epi16(0x80); | - |
| 149 | const __m128i colorMask = _mm_set1_epi32(0x00ff00ff); | - |
| 150 | | - |
| 151 | for (; x < length - 3; x += 4) { never evaluated: x < length - 3 | 0 |
| 152 | const __m128i srcVector = _mm_loadu_si128((__m128i *)&src[x]); | - |
| 153 | const __m128i dstVector = _mm_load_si128((__m128i *)&dst[x]); | - |
| 154 | | - |
| 155 | __m128i result = _mm_adds_epu8(srcVector, dstVector); | - |
| 156 | { __m128i srcVectorAG = _mm_srli_epi16(result, 8); __m128i dstVectorAG = _mm_srli_epi16(dstVector, 8); __m128i srcVectorAGalpha = _mm_mullo_epi16(srcVectorAG, constAlphaVector); __m128i dstVectorAGoneMinusAlphalpha = _mm_mullo_epi16(dstVectorAG, oneMinusConstAlpha); __m128i finalAG = _mm_add_epi16(srcVectorAGalpha, dstVectorAGoneMinusAlphalpha); finalAG = _mm_add_epi16(finalAG, _mm_srli_epi16(finalAG, 8)); finalAG = _mm_add_epi16(finalAG, half); finalAG = _mm_andnot_si128(colorMask, finalAG); __m128i srcVectorRB = _mm_and_si128(result, colorMask); __m128i dstVectorRB = _mm_and_si128(dstVector, colorMask); __m128i srcVectorRBalpha = _mm_mullo_epi16(srcVectorRB, constAlphaVector); __m128i dstVectorRBoneMinusAlphalpha = _mm_mullo_epi16(dstVectorRB, oneMinusConstAlpha); __m128i finalRB = _mm_add_epi16(srcVectorRBalpha, dstVectorRBoneMinusAlphalpha); finalRB = _mm_add_epi16(finalRB, _mm_srli_epi16(finalRB, 8)); finalRB = _mm_add_epi16(finalRB, half); finalRB = _mm_srli_epi16(finalRB, 8); result = _mm_or_si128(finalAG, finalRB); } | - |
| 157 | _mm_store_si128((__m128i *)&dst[x], result); | - |
| 158 | } | 0 |
| 159 | | - |
| 160 | | - |
| 161 | for (; x < length; ++x) never evaluated: x < length | 0 |
| 162 | dst[x] = comp_func_Plus_one_pixel_const_alpha(dst[x], src[x], const_alpha, one_minus_const_alpha); never executed: dst[x] = comp_func_Plus_one_pixel_const_alpha(dst[x], src[x], const_alpha, one_minus_const_alpha); | 0 |
| 163 | } | 0 |
| 164 | } | - |
| 165 | | - |
| 166 | void comp_func_Source_sse2(uint *dst, const uint *src, int length, uint const_alpha) | - |
| 167 | { | - |
| 168 | if (const_alpha == 255) { evaluated: const_alpha == 255| yes Evaluation Count:63722 | yes Evaluation Count:3248 |
| 3248-63722 |
| 169 | ::memcpy(dst, src, length * sizeof(uint)); | - |
| 170 | } else { executed: }Execution Count:63728 | 63728 |
| 171 | const int ialpha = 255 - const_alpha; | - |
| 172 | | - |
| 173 | int x = 0; | - |
| 174 | | - |
| 175 | | - |
| 176 | for (; x < static_cast<int>(qMin(static_cast<quintptr>(length), ((4 - ((reinterpret_cast<quintptr>(dst) >> 2) & 0x3)) & 0x3))); ++x) evaluated: x < static_cast<int>(qMin(static_cast<quintptr>(length), ((4 - ((reinterpret_cast<quintptr>(dst) >> 2) & 0x3)) & 0x3)))| yes Evaluation Count:2425 | yes Evaluation Count:3248 |
| 2425-3248 |
| 177 | dst[x] = INTERPOLATE_PIXEL_255(src[x], const_alpha, dst[x], ialpha); executed: dst[x] = INTERPOLATE_PIXEL_255(src[x], const_alpha, dst[x], ialpha);Execution Count:2425 | 2425 |
| 178 | | - |
| 179 | | - |
| 180 | const __m128i half = _mm_set1_epi16(0x80); | - |
| 181 | const __m128i colorMask = _mm_set1_epi32(0x00ff00ff); | - |
| 182 | const __m128i constAlphaVector = _mm_set1_epi16(const_alpha); | - |
| 183 | const __m128i oneMinusConstAlpha = _mm_set1_epi16(ialpha); | - |
| 184 | for (; x < length - 3; x += 4) { partially evaluated: x < length - 3| no Evaluation Count:0 | yes Evaluation Count:3248 |
| 0-3248 |
| 185 | const __m128i srcVector = _mm_loadu_si128((__m128i *)&src[x]); | - |
| 186 | __m128i dstVector = _mm_load_si128((__m128i *)&dst[x]); | - |
| 187 | { __m128i srcVectorAG = _mm_srli_epi16(srcVector, 8); __m128i dstVectorAG = _mm_srli_epi16(dstVector, 8); __m128i srcVectorAGalpha = _mm_mullo_epi16(srcVectorAG, constAlphaVector); __m128i dstVectorAGoneMinusAlphalpha = _mm_mullo_epi16(dstVectorAG, oneMinusConstAlpha); __m128i finalAG = _mm_add_epi16(srcVectorAGalpha, dstVectorAGoneMinusAlphalpha); finalAG = _mm_add_epi16(finalAG, _mm_srli_epi16(finalAG, 8)); finalAG = _mm_add_epi16(finalAG, half); finalAG = _mm_andnot_si128(colorMask, finalAG); __m128i srcVectorRB = _mm_and_si128(srcVector, colorMask); __m128i dstVectorRB = _mm_and_si128(dstVector, colorMask); __m128i srcVectorRBalpha = _mm_mullo_epi16(srcVectorRB, constAlphaVector); __m128i dstVectorRBoneMinusAlphalpha = _mm_mullo_epi16(dstVectorRB, oneMinusConstAlpha); __m128i finalRB = _mm_add_epi16(srcVectorRBalpha, dstVectorRBoneMinusAlphalpha); finalRB = _mm_add_epi16(finalRB, _mm_srli_epi16(finalRB, 8)); finalRB = _mm_add_epi16(finalRB, half); finalRB = _mm_srli_epi16(finalRB, 8); dstVector = _mm_or_si128(finalAG, finalRB); } | - |
| 188 | _mm_store_si128((__m128i *)&dst[x], dstVector); | - |
| 189 | } | 0 |
| 190 | | - |
| 191 | | - |
| 192 | for (; x < length; ++x) evaluated: x < length| yes Evaluation Count:835 | yes Evaluation Count:3248 |
| 835-3248 |
| 193 | dst[x] = INTERPOLATE_PIXEL_255(src[x], const_alpha, dst[x], ialpha); executed: dst[x] = INTERPOLATE_PIXEL_255(src[x], const_alpha, dst[x], ialpha);Execution Count:835 | 835 |
| 194 | } executed: }Execution Count:3248 | 3248 |
| 195 | } | - |
| 196 | | - |
| 197 | void qt_memfill32_sse2(quint32 *dest, quint32 value, int count) | - |
| 198 | { | - |
| 199 | if (count < 7) { evaluated: count < 7| yes Evaluation Count:862603 | yes Evaluation Count:1366744 |
| 862603-1366744 |
| 200 | switch (count) { | - |
| 201 | case 6: *dest++ = value; | - |
| 202 | case 5: *dest++ = value; code before this statement executed: case 5:Execution Count:161820 | 161820 |
| 203 | case 4: *dest++ = value; code before this statement executed: case 4:Execution Count:189515 | 189515 |
| 204 | case 3: *dest++ = value; code before this statement executed: case 3:Execution Count:215554 | 215554 |
| 205 | case 2: *dest++ = value; code before this statement executed: case 2:Execution Count:241583 | 241583 |
| 206 | case 1: *dest = value; code before this statement executed: case 1:Execution Count:323948 | 323948 |
| 207 | } executed: }Execution Count:862603 | 862603 |
| 208 | return; executed: return;Execution Count:862603 | 862603 |
| 209 | }; | - |
| 210 | | - |
| 211 | const int align = (quintptr)(dest) & 0xf; | - |
| 212 | switch (align) { | - |
| 213 | case 4: *dest++ = value; --count; | - |
| 214 | case 8: *dest++ = value; --count; code before this statement executed: case 8:Execution Count:473513 | 473513 |
| 215 | case 12: *dest++ = value; --count; code before this statement executed: case 12:Execution Count:713293 | 713293 |
| 216 | } executed: }Execution Count:1036622 | 1036622 |
| 217 | | - |
| 218 | int count128 = count / 4; | - |
| 219 | __m128i *dst128 = reinterpret_cast<__m128i*>(dest); | - |
| 220 | const __m128i value128 = _mm_set_epi32(value, value, value, value); | - |
| 221 | | - |
| 222 | int n = (count128 + 3) / 4; | - |
| 223 | switch (count128 & 0x3) { | - |
| 224 | case 0: do { _mm_stream_si128(dst128++, value128); | - |
| 225 | case 3: _mm_stream_si128(dst128++, value128); code before this statement executed: case 3:Execution Count:14972429 | 14972429 |
| 226 | case 2: _mm_stream_si128(dst128++, value128); code before this statement executed: case 2:Execution Count:15192949 | 15192949 |
| 227 | case 1: _mm_stream_si128(dst128++, value128); code before this statement executed: case 1:Execution Count:15564464 | 15564464 |
| 228 | } while (--n > 0); evaluated: --n > 0| yes Evaluation Count:14635070 | yes Evaluation Count:1366744 |
executed: }Execution Count:16001814 | 1366744-16001814 |
| 229 | } executed: }Execution Count:1366744 | 1366744 |
| 230 | | - |
| 231 | const int rest = count & 0x3; | - |
| 232 | if (rest) { evaluated: rest| yes Evaluation Count:1091376 | yes Evaluation Count:275368 |
| 275368-1091376 |
| 233 | switch (rest) { | - |
| 234 | case 3: dest[count - 3] = value; | - |
| 235 | case 2: dest[count - 2] = value; code before this statement executed: case 2:Execution Count:543520 | 543520 |
| 236 | case 1: dest[count - 1] = value; code before this statement executed: case 1:Execution Count:811996 | 811996 |
| 237 | } executed: }Execution Count:1091376 | 1091376 |
| 238 | } executed: }Execution Count:1091376 | 1091376 |
| 239 | } executed: }Execution Count:1366744 | 1366744 |
| 240 | | - |
| 241 | void comp_func_solid_SourceOver_sse2(uint *destPixels, int length, uint color, uint const_alpha) | - |
| 242 | { | - |
| 243 | if ((const_alpha & qAlpha(color)) == 255) { evaluated: (const_alpha & qAlpha(color)) == 255| yes Evaluation Count:2037 | yes Evaluation Count:36156 |
| 2037-36156 |
| 244 | qt_memfill32_sse2(destPixels, color, length); | - |
| 245 | } else { executed: }Execution Count:2037 | 2037 |
| 246 | if (const_alpha != 255) evaluated: const_alpha != 255| yes Evaluation Count:18265 | yes Evaluation Count:17891 |
| 17891-18265 |
| 247 | color = BYTE_MUL(color, const_alpha); executed: color = BYTE_MUL(color, const_alpha);Execution Count:18265 | 18265 |
| 248 | | - |
| 249 | const quint32 minusAlphaOfColor = qAlpha(~color); | - |
| 250 | int x = 0; | - |
| 251 | | - |
| 252 | quint32 *dst = (quint32 *) destPixels; | - |
| 253 | const __m128i colorVector = _mm_set1_epi32(color); | - |
| 254 | const __m128i colorMask = _mm_set1_epi32(0x00ff00ff); | - |
| 255 | const __m128i half = _mm_set1_epi16(0x80); | - |
| 256 | const __m128i minusAlphaOfColorVector = _mm_set1_epi16(minusAlphaOfColor); | - |
| 257 | | - |
| 258 | for (; x < static_cast<int>(qMin(static_cast<quintptr>(length), ((4 - ((reinterpret_cast<quintptr>(dst) >> 2) & 0x3)) & 0x3))); ++x) evaluated: x < static_cast<int>(qMin(static_cast<quintptr>(length), ((4 - ((reinterpret_cast<quintptr>(dst) >> 2) & 0x3)) & 0x3)))| yes Evaluation Count:32425 | yes Evaluation Count:36156 |
| 32425-36156 |
| 259 | destPixels[x] = color + BYTE_MUL(destPixels[x], minusAlphaOfColor); executed: destPixels[x] = color + BYTE_MUL(destPixels[x], minusAlphaOfColor);Execution Count:32425 | 32425 |
| 260 | | - |
| 261 | for (; x < length-3; x += 4) { evaluated: x < length-3| yes Evaluation Count:172971 | yes Evaluation Count:36156 |
| 36156-172971 |
| 262 | __m128i dstVector = _mm_load_si128((__m128i *)&dst[x]); | - |
| 263 | { __m128i pixelVectorAG = _mm_srli_epi16(dstVector, 8); __m128i pixelVectorRB = _mm_and_si128(dstVector, colorMask); pixelVectorAG = _mm_mullo_epi16(pixelVectorAG, minusAlphaOfColorVector); pixelVectorRB = _mm_mullo_epi16(pixelVectorRB, minusAlphaOfColorVector); pixelVectorRB = _mm_add_epi16(pixelVectorRB, _mm_srli_epi16(pixelVectorRB, 8)); pixelVectorRB = _mm_add_epi16(pixelVectorRB, half); pixelVectorAG = _mm_add_epi16(pixelVectorAG, _mm_srli_epi16(pixelVectorAG, 8)); pixelVectorAG = _mm_add_epi16(pixelVectorAG, half); pixelVectorRB = _mm_srli_epi16(pixelVectorRB, 8); pixelVectorAG = _mm_andnot_si128(colorMask, pixelVectorAG); dstVector = _mm_or_si128(pixelVectorAG, pixelVectorRB); }; | - |
| 264 | dstVector = _mm_add_epi8(colorVector, dstVector); | - |
| 265 | _mm_store_si128((__m128i *)&dst[x], dstVector); | - |
| 266 | } executed: }Execution Count:172971 | 172971 |
| 267 | for (;x < length; ++x) evaluated: x < length| yes Evaluation Count:18654 | yes Evaluation Count:36156 |
| 18654-36156 |
| 268 | destPixels[x] = color + BYTE_MUL(destPixels[x], minusAlphaOfColor); executed: destPixels[x] = color + BYTE_MUL(destPixels[x], minusAlphaOfColor);Execution Count:18654 | 18654 |
| 269 | } executed: }Execution Count:36156 | 36156 |
| 270 | } | - |
| 271 | | - |
| 272 | | - |
| 273 | CompositionFunctionSolid qt_functionForModeSolid_SSE2[numCompositionFunctions] = { | - |
| 274 | comp_func_solid_SourceOver_sse2, | - |
| 275 | comp_func_solid_DestinationOver, | - |
| 276 | comp_func_solid_Clear, | - |
| 277 | comp_func_solid_Source, | - |
| 278 | comp_func_solid_Destination, | - |
| 279 | comp_func_solid_SourceIn, | - |
| 280 | comp_func_solid_DestinationIn, | - |
| 281 | comp_func_solid_SourceOut, | - |
| 282 | comp_func_solid_DestinationOut, | - |
| 283 | comp_func_solid_SourceAtop, | - |
| 284 | comp_func_solid_DestinationAtop, | - |
| 285 | comp_func_solid_XOR, | - |
| 286 | comp_func_solid_Plus, | - |
| 287 | comp_func_solid_Multiply, | - |
| 288 | comp_func_solid_Screen, | - |
| 289 | comp_func_solid_Overlay, | - |
| 290 | comp_func_solid_Darken, | - |
| 291 | comp_func_solid_Lighten, | - |
| 292 | comp_func_solid_ColorDodge, | - |
| 293 | comp_func_solid_ColorBurn, | - |
| 294 | comp_func_solid_HardLight, | - |
| 295 | comp_func_solid_SoftLight, | - |
| 296 | comp_func_solid_Difference, | - |
| 297 | comp_func_solid_Exclusion, | - |
| 298 | rasterop_solid_SourceOrDestination, | - |
| 299 | rasterop_solid_SourceAndDestination, | - |
| 300 | rasterop_solid_SourceXorDestination, | - |
| 301 | rasterop_solid_NotSourceAndNotDestination, | - |
| 302 | rasterop_solid_NotSourceOrNotDestination, | - |
| 303 | rasterop_solid_NotSourceXorDestination, | - |
| 304 | rasterop_solid_NotSource, | - |
| 305 | rasterop_solid_NotSourceAndDestination, | - |
| 306 | rasterop_solid_SourceAndNotDestination, | - |
| 307 | rasterop_solid_NotSourceOrDestination, | - |
| 308 | rasterop_solid_SourceOrNotDestination, | - |
| 309 | rasterop_solid_ClearDestination, | - |
| 310 | rasterop_solid_SetDestination, | - |
| 311 | rasterop_solid_NotDestination | - |
| 312 | }; | - |
| 313 | | - |
| 314 | CompositionFunction qt_functionForMode_SSE2[numCompositionFunctions] = { | - |
| 315 | comp_func_SourceOver_sse2, | - |
| 316 | comp_func_DestinationOver, | - |
| 317 | comp_func_Clear, | - |
| 318 | comp_func_Source_sse2, | - |
| 319 | comp_func_Destination, | - |
| 320 | comp_func_SourceIn, | - |
| 321 | comp_func_DestinationIn, | - |
| 322 | comp_func_SourceOut, | - |
| 323 | comp_func_DestinationOut, | - |
| 324 | comp_func_SourceAtop, | - |
| 325 | comp_func_DestinationAtop, | - |
| 326 | comp_func_XOR, | - |
| 327 | comp_func_Plus_sse2, | - |
| 328 | comp_func_Multiply, | - |
| 329 | comp_func_Screen, | - |
| 330 | comp_func_Overlay, | - |
| 331 | comp_func_Darken, | - |
| 332 | comp_func_Lighten, | - |
| 333 | comp_func_ColorDodge, | - |
| 334 | comp_func_ColorBurn, | - |
| 335 | comp_func_HardLight, | - |
| 336 | comp_func_SoftLight, | - |
| 337 | comp_func_Difference, | - |
| 338 | comp_func_Exclusion, | - |
| 339 | rasterop_SourceOrDestination, | - |
| 340 | rasterop_SourceAndDestination, | - |
| 341 | rasterop_SourceXorDestination, | - |
| 342 | rasterop_NotSourceAndNotDestination, | - |
| 343 | rasterop_NotSourceOrNotDestination, | - |
| 344 | rasterop_NotSourceXorDestination, | - |
| 345 | rasterop_NotSource, | - |
| 346 | rasterop_NotSourceAndDestination, | - |
| 347 | rasterop_SourceAndNotDestination, | - |
| 348 | rasterop_NotSourceOrDestination, | - |
| 349 | rasterop_SourceOrNotDestination, | - |
| 350 | rasterop_ClearDestination, | - |
| 351 | rasterop_SetDestination, | - |
| 352 | rasterop_NotDestination | - |
| 353 | }; | - |
| 354 | | - |
| 355 | | - |
| 356 | void qt_memfill16_sse2(quint16 *dest, quint16 value, int count) | - |
| 357 | { | - |
| 358 | if (count < 3) { evaluated: count < 3| yes Evaluation Count:7044494 | yes Evaluation Count:1413641 |
| 1413641-7044494 |
| 359 | switch (count) { | - |
| 360 | case 2: *dest++ = value; | - |
| 361 | case 1: *dest = value; code before this statement executed: case 1:Execution Count:425755 | 425755 |
| 362 | } executed: }Execution Count:7044494 | 7044494 |
| 363 | return; executed: return;Execution Count:7044494 | 7044494 |
| 364 | } | - |
| 365 | | - |
| 366 | const int align = (quintptr)(dest) & 0x3; | - |
| 367 | switch (align) { | - |
| 368 | case 2: *dest++ = value; --count; | - |
| 369 | } executed: }Execution Count:235663 | 235663 |
| 370 | | - |
| 371 | const quint32 value32 = (value << 16) | value; | - |
| 372 | qt_memfill32_sse2(reinterpret_cast<quint32*>(dest), value32, count / 2); | - |
| 373 | | - |
| 374 | if (count & 0x1) evaluated: count & 0x1| yes Evaluation Count:287920 | yes Evaluation Count:1125721 |
| 287920-1125721 |
| 375 | dest[count - 1] = value; executed: dest[count - 1] = value;Execution Count:287920 | 287920 |
| 376 | } executed: }Execution Count:1413641 | 1413641 |
| 377 | | - |
| 378 | void qt_bitmapblit32_sse2(QRasterBuffer *rasterBuffer, int x, int y, | - |
| 379 | quint32 color, | - |
| 380 | const uchar *src, int width, int height, int stride) | - |
| 381 | { | - |
| 382 | quint32 *dest = reinterpret_cast<quint32*>(rasterBuffer->scanLine(y)) + x; | - |
| 383 | const int destStride = rasterBuffer->bytesPerLine() / sizeof(quint32); | - |
| 384 | | - |
| 385 | const __m128i c128 = _mm_set1_epi32(color); | - |
| 386 | const __m128i maskmask1 = _mm_set_epi32(0x10101010, 0x20202020, | - |
| 387 | 0x40404040, 0x80808080); | - |
| 388 | const __m128i maskadd1 = _mm_set_epi32(0x70707070, 0x60606060, | - |
| 389 | 0x40404040, 0x00000000); | - |
| 390 | | - |
| 391 | if (width > 4) { evaluated: width > 4| yes Evaluation Count:302 | yes Evaluation Count:68 |
| 68-302 |
| 392 | const __m128i maskmask2 = _mm_set_epi32(0x01010101, 0x02020202, | - |
| 393 | 0x04040404, 0x08080808); | - |
| 394 | const __m128i maskadd2 = _mm_set_epi32(0x7f7f7f7f, 0x7e7e7e7e, | - |
| 395 | 0x7c7c7c7c, 0x78787878); | - |
| 396 | while (height--) { evaluated: height--| yes Evaluation Count:2738 | yes Evaluation Count:302 |
| 302-2738 |
| 397 | for (int x = 0; x < width; x += 8) { evaluated: x < width| yes Evaluation Count:2754 | yes Evaluation Count:2738 |
| 2738-2754 |
| 398 | const quint8 s = src[x >> 3]; | - |
| 399 | if (!s) evaluated: !s| yes Evaluation Count:376 | yes Evaluation Count:2378 |
| 376-2378 |
| 400 | continue; executed: continue;Execution Count:376 | 376 |
| 401 | __m128i mask1 = _mm_set1_epi8(s); | - |
| 402 | __m128i mask2 = mask1; | - |
| 403 | | - |
| 404 | mask1 = _mm_and_si128(mask1, maskmask1); | - |
| 405 | mask1 = _mm_add_epi8(mask1, maskadd1); | - |
| 406 | _mm_maskmoveu_si128(c128, mask1, (char*)(dest + x)); | - |
| 407 | mask2 = _mm_and_si128(mask2, maskmask2); | - |
| 408 | mask2 = _mm_add_epi8(mask2, maskadd2); | - |
| 409 | _mm_maskmoveu_si128(c128, mask2, (char*)(dest + x + 4)); | - |
| 410 | } executed: }Execution Count:2378 | 2378 |
| 411 | dest += destStride; | - |
| 412 | src += stride; | - |
| 413 | } executed: }Execution Count:2738 | 2738 |
| 414 | } else { executed: }Execution Count:302 | 302 |
| 415 | while (height--) { evaluated: height--| yes Evaluation Count:660 | yes Evaluation Count:68 |
| 68-660 |
| 416 | const quint8 s = *src; | - |
| 417 | if (s) { evaluated: s| yes Evaluation Count:620 | yes Evaluation Count:40 |
| 40-620 |
| 418 | __m128i mask1 = _mm_set1_epi8(s); | - |
| 419 | mask1 = _mm_and_si128(mask1, maskmask1); | - |
| 420 | mask1 = _mm_add_epi8(mask1, maskadd1); | - |
| 421 | _mm_maskmoveu_si128(c128, mask1, (char*)(dest)); | - |
| 422 | } executed: }Execution Count:620 | 620 |
| 423 | dest += destStride; | - |
| 424 | src += stride; | - |
| 425 | } executed: }Execution Count:660 | 660 |
| 426 | } executed: }Execution Count:68 | 68 |
| 427 | } | - |
| 428 | | - |
| 429 | void qt_bitmapblit16_sse2(QRasterBuffer *rasterBuffer, int x, int y, | - |
| 430 | quint32 color, | - |
| 431 | const uchar *src, int width, int height, int stride) | - |
| 432 | { | - |
| 433 | const quint16 c = qConvertRgb32To16(color); | - |
| 434 | quint16 *dest = reinterpret_cast<quint16*>(rasterBuffer->scanLine(y)) + x; | - |
| 435 | const int destStride = rasterBuffer->bytesPerLine() / sizeof(quint16); | - |
| 436 | | - |
| 437 | const __m128i c128 = _mm_set1_epi16(c); | - |
| 438 | | - |
| 439 | | - |
| 440 | | - |
| 441 | const __m128i maskmask = _mm_set_epi16(0x0101, 0x0202, 0x0404, 0x0808, | - |
| 442 | 0x1010, 0x2020, 0x4040, 0x8080); | - |
| 443 | const __m128i maskadd = _mm_set_epi16(0x7f7f, 0x7e7e, 0x7c7c, 0x7878, | - |
| 444 | 0x7070, 0x6060, 0x4040, 0x0000); | - |
| 445 | | - |
| 446 | while (height--) { never evaluated: height-- | 0 |
| 447 | for (int x = 0; x < width; x += 8) { never evaluated: x < width | 0 |
| 448 | const quint8 s = src[x >> 3]; | - |
| 449 | if (!s) | 0 |
| 450 | continue; never executed: continue; | 0 |
| 451 | __m128i mask = _mm_set1_epi8(s); | - |
| 452 | mask = _mm_and_si128(mask, maskmask); | - |
| 453 | mask = _mm_add_epi8(mask, maskadd); | - |
| 454 | _mm_maskmoveu_si128(c128, mask, (char*)(dest + x)); | - |
| 455 | } | 0 |
| 456 | dest += destStride; | - |
| 457 | src += stride; | - |
| 458 | } | 0 |
| 459 | } | 0 |
| 460 | | - |
| 461 | class QSimdSse2 | - |
| 462 | { | - |
| 463 | public: | - |
| 464 | typedef __m128i Int32x4; | - |
| 465 | typedef __m128 Float32x4; | - |
| 466 | | - |
| 467 | union Vect_buffer_i { Int32x4 v; int i[4]; }; | - |
| 468 | union Vect_buffer_f { Float32x4 v; float f[4]; }; | - |
| 469 | | - |
| 470 | static inline Float32x4 v_dup(float x) { return _mm_set1_ps(x); } executed: return _mm_set1_ps(x);Execution Count:86307 | 86307 |
| 471 | static inline Float32x4 v_dup(double x) { return _mm_set1_ps(x); } executed: return _mm_set1_ps(x);Execution Count:143845 | 143845 |
| 472 | static inline Int32x4 v_dup(int x) { return _mm_set1_epi32(x); } executed: return _mm_set1_epi32(x);Execution Count:28769 | 28769 |
| 473 | static inline Int32x4 v_dup(uint x) { return _mm_set1_epi32(x); } executed: return _mm_set1_epi32(x);Execution Count:57538 | 57538 |
| 474 | | - |
| 475 | static inline Float32x4 v_add(Float32x4 a, Float32x4 b) { return _mm_add_ps(a, b); } executed: return _mm_add_ps(a, b);Execution Count:4176636 | 4176636 |
| 476 | static inline Int32x4 v_add(Int32x4 a, Int32x4 b) { return _mm_add_epi32(a, b); } never executed: return _mm_add_epi32(a, b); | 0 |
| 477 | | - |
| 478 | static inline Float32x4 v_max(Float32x4 a, Float32x4 b) { return _mm_max_ps(a, b); } executed: return _mm_max_ps(a, b);Execution Count:760424 | 760424 |
| 479 | static inline Float32x4 v_min(Float32x4 a, Float32x4 b) { return _mm_min_ps(a, b); } executed: return _mm_min_ps(a, b);Execution Count:64318 | 64318 |
| 480 | static inline Int32x4 v_min_16(Int32x4 a, Int32x4 b) { return _mm_min_epi16(a, b); } never executed: return _mm_min_epi16(a, b); | 0 |
| 481 | | - |
| 482 | static inline Int32x4 v_and(Int32x4 a, Int32x4 b) { return _mm_and_si128(a, b); } executed: return _mm_and_si128(a, b);Execution Count:1327894 | 1327894 |
| 483 | | - |
| 484 | static inline Float32x4 v_sub(Float32x4 a, Float32x4 b) { return _mm_sub_ps(a, b); } executed: return _mm_sub_ps(a, b);Execution Count:696106 | 696106 |
| 485 | static inline Int32x4 v_sub(Int32x4 a, Int32x4 b) { return _mm_sub_epi32(a, b); } never executed: return _mm_sub_epi32(a, b); | 0 |
| 486 | | - |
| 487 | static inline Float32x4 v_mul(Float32x4 a, Float32x4 b) { return _mm_mul_ps(a, b); } executed: return _mm_mul_ps(a, b);Execution Count:1392212 | 1392212 |
| 488 | | - |
| 489 | static inline Float32x4 v_sqrt(Float32x4 x) { return _mm_sqrt_ps(x); } executed: return _mm_sqrt_ps(x);Execution Count:696106 | 696106 |
| 490 | | - |
| 491 | static inline Int32x4 v_toInt(Float32x4 x) { return _mm_cvttps_epi32(x); } executed: return _mm_cvttps_epi32(x);Execution Count:696106 | 696106 |
| 492 | static inline Int32x4 v_greaterOrEqual(Float32x4 a, Float32x4 b) { return _mm_castps_si128(_mm_cmpgt_ps(a, b)); } executed: return _mm_castps_si128(_mm_cmpgt_ps(a, b));Execution Count:1392212 | 1392212 |
| 493 | | - |
| 494 | }; | - |
| 495 | | - |
| 496 | const uint * qt_fetch_radial_gradient_sse2(uint *buffer, const Operator *op, const QSpanData *data, | - |
| 497 | int y, int x, int length) | - |
| 498 | { | - |
| 499 | return qt_fetch_radial_gradient_template<QRadialFetchSimd<QSimdSse2> >(buffer, op, data, y, x, length); executed: return qt_fetch_radial_gradient_template<QRadialFetchSimd<QSimdSse2> >(buffer, op, data, y, x, length);Execution Count:28769 | 28769 |
| 500 | } | - |
| 501 | | - |
| 502 | void qt_scale_image_argb32_on_argb32_sse2(uchar *destPixels, int dbpl, | - |
| 503 | const uchar *srcPixels, int sbpl, | - |
| 504 | const QRectF &targetRect, | - |
| 505 | const QRectF &sourceRect, | - |
| 506 | const QRect &clip, | - |
| 507 | int const_alpha) | - |
| 508 | { | - |
| 509 | if (const_alpha != 256) { partially evaluated: const_alpha != 256| no Evaluation Count:0 | yes Evaluation Count:1358 |
| 0-1358 |
| 510 | | - |
| 511 | extern void qt_scale_image_argb32_on_argb32(uchar *destPixels, int dbpl, | - |
| 512 | const uchar *srcPixels, int sbpl, | - |
| 513 | const QRectF &targetRect, | - |
| 514 | const QRectF &sourceRect, | - |
| 515 | const QRect &clip, | - |
| 516 | int const_alpha); | - |
| 517 | return qt_scale_image_argb32_on_argb32(destPixels, dbpl, srcPixels, sbpl, targetRect, sourceRect, clip, const_alpha); never executed: return qt_scale_image_argb32_on_argb32(destPixels, dbpl, srcPixels, sbpl, targetRect, sourceRect, clip, const_alpha); | 0 |
| 518 | } | - |
| 519 | | - |
| 520 | qreal sx = targetRect.width() / (qreal) sourceRect.width(); | - |
| 521 | qreal sy = targetRect.height() / (qreal) sourceRect.height(); | - |
| 522 | | - |
| 523 | int ix = 0x00010000 / sx; | - |
| 524 | int iy = 0x00010000 / sy; | - |
| 525 | | - |
| 526 | int cx1 = clip.x(); | - |
| 527 | int cx2 = clip.x() + clip.width(); | - |
| 528 | int cy1 = clip.top(); | - |
| 529 | int cy2 = clip.y() + clip.height(); | - |
| 530 | | - |
| 531 | int tx1 = qRound(targetRect.left()); | - |
| 532 | int tx2 = qRound(targetRect.right()); | - |
| 533 | int ty1 = qRound(targetRect.top()); | - |
| 534 | int ty2 = qRound(targetRect.bottom()); | - |
| 535 | | - |
| 536 | if (tx2 < tx1) evaluated: tx2 < tx1| yes Evaluation Count:6 | yes Evaluation Count:1352 |
| 6-1352 |
| 537 | qSwap(tx2, tx1); executed: qSwap(tx2, tx1);Execution Count:6 | 6 |
| 538 | if (ty2 < ty1) evaluated: ty2 < ty1| yes Evaluation Count:6 | yes Evaluation Count:1352 |
| 6-1352 |
| 539 | qSwap(ty2, ty1); executed: qSwap(ty2, ty1);Execution Count:6 | 6 |
| 540 | | - |
| 541 | if (tx1 < cx1) partially evaluated: tx1 < cx1| no Evaluation Count:0 | yes Evaluation Count:1358 |
| 0-1358 |
| 542 | tx1 = cx1; never executed: tx1 = cx1; | 0 |
| 543 | if (tx2 >= cx2) evaluated: tx2 >= cx2| yes Evaluation Count:1344 | yes Evaluation Count:14 |
| 14-1344 |
| 544 | tx2 = cx2; executed: tx2 = cx2;Execution Count:1344 | 1344 |
| 545 | | - |
| 546 | if (tx1 >= tx2) partially evaluated: tx1 >= tx2| no Evaluation Count:0 | yes Evaluation Count:1358 |
| 0-1358 |
| 547 | return; | 0 |
| 548 | | - |
| 549 | if (ty1 < cy1) partially evaluated: ty1 < cy1| no Evaluation Count:0 | yes Evaluation Count:1358 |
| 0-1358 |
| 550 | ty1 = cy1; never executed: ty1 = cy1; | 0 |
| 551 | if (ty2 >= cy2) evaluated: ty2 >= cy2| yes Evaluation Count:22 | yes Evaluation Count:1336 |
| 22-1336 |
| 552 | ty2 = cy2; executed: ty2 = cy2;Execution Count:22 | 22 |
| 553 | if (ty1 >= ty2) evaluated: ty1 >= ty2| yes Evaluation Count:449 | yes Evaluation Count:909 |
| 449-909 |
| 554 | return; executed: return;Execution Count:449 | 449 |
| 555 | | - |
| 556 | int h = ty2 - ty1; | - |
| 557 | int w = tx2 - tx1; | - |
| 558 | | - |
| 559 | quint32 basex; | - |
| 560 | quint32 srcy; | - |
| 561 | | - |
| 562 | if (sx < 0) { evaluated: sx < 0| yes Evaluation Count:6 | yes Evaluation Count:903 |
| 6-903 |
| 563 | int dstx = qFloor((tx1 + qreal(0.5) - targetRect.right()) * ix) + 1; | - |
| 564 | basex = quint32(sourceRect.right() * 65536) + dstx; | - |
| 565 | } else { executed: }Execution Count:6 | 6 |
| 566 | int dstx = qCeil((tx1 + qreal(0.5) - targetRect.left()) * ix) - 1; | - |
| 567 | basex = quint32(sourceRect.left() * 65536) + dstx; | - |
| 568 | } executed: }Execution Count:903 | 903 |
| 569 | if (sy < 0) { evaluated: sy < 0| yes Evaluation Count:6 | yes Evaluation Count:903 |
| 6-903 |
| 570 | int dsty = qFloor((ty1 + qreal(0.5) - targetRect.bottom()) * iy) + 1; | - |
| 571 | srcy = quint32(sourceRect.bottom() * 65536) + dsty; | - |
| 572 | } else { executed: }Execution Count:6 | 6 |
| 573 | int dsty = qCeil((ty1 + qreal(0.5) - targetRect.top()) * iy) - 1; | - |
| 574 | srcy = quint32(sourceRect.top() * 65536) + dsty; | - |
| 575 | } executed: }Execution Count:903 | 903 |
| 576 | | - |
| 577 | quint32 *dst = ((quint32 *) (destPixels + ty1 * dbpl)) + tx1; | - |
| 578 | | - |
| 579 | const __m128i nullVector = _mm_set1_epi32(0); | - |
| 580 | const __m128i half = _mm_set1_epi16(0x80); | - |
| 581 | const __m128i one = _mm_set1_epi16(0xff); | - |
| 582 | const __m128i colorMask = _mm_set1_epi32(0x00ff00ff); | - |
| 583 | const __m128i alphaMask = _mm_set1_epi32(0xff000000); | - |
| 584 | const __m128i ixVector = _mm_set1_epi32(4*ix); | - |
| 585 | | - |
| 586 | | - |
| 587 | while (h--) { evaluated: h--| yes Evaluation Count:1764 | yes Evaluation Count:909 |
| 909-1764 |
| 588 | const uint *src = (const quint32 *) (srcPixels + (srcy >> 16) * sbpl); | - |
| 589 | int srcx = basex; | - |
| 590 | int x = 0; | - |
| 591 | | - |
| 592 | for (; x < static_cast<int>(qMin(static_cast<quintptr>(w), ((4 - ((reinterpret_cast<quintptr>(dst) >> 2) & 0x3)) & 0x3))); ++x) { evaluated: x < static_cast<int>(qMin(static_cast<quintptr>(w), ((4 - ((reinterpret_cast<quintptr>(dst) >> 2) & 0x3)) & 0x3)))| yes Evaluation Count:752 | yes Evaluation Count:1764 |
| 752-1764 |
| 593 | uint s = src[srcx >> 16]; | - |
| 594 | dst[x] = s + BYTE_MUL(dst[x], qAlpha(~s)); | - |
| 595 | srcx += ix; | - |
| 596 | } executed: }Execution Count:752 | 752 |
| 597 | | - |
| 598 | __m128i srcxVector = _mm_set_epi32(srcx, srcx + ix, srcx + ix + ix, srcx + ix + ix + ix); | - |
| 599 | | - |
| 600 | for (; x<w - 3; x += 4) { evaluated: x<w - 3| yes Evaluation Count:40635 | yes Evaluation Count:1764 |
| 1764-40635 |
| 601 | union Vect_buffer { __m128i vect; quint32 i[4]; }; | - |
| 602 | Vect_buffer addr; | - |
| 603 | addr.vect = _mm_srli_epi32(srcxVector, 16); | - |
| 604 | srcxVector = _mm_add_epi32(srcxVector, ixVector); | - |
| 605 | | - |
| 606 | const __m128i srcVector = _mm_set_epi32(src[addr.i[0]], src[addr.i[1]], src[addr.i[2]], src[addr.i[3]]); | - |
| 607 | { const __m128i srcVectorAlpha = _mm_and_si128(srcVector, alphaMask); if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, alphaMask)) == 0xffff) { _mm_store_si128((__m128i *)&dst[x], srcVector); } else if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, nullVector)) != 0xffff) { __m128i alphaChannel = _mm_srli_epi32(srcVector, 24); alphaChannel = _mm_or_si128(alphaChannel, _mm_slli_epi32(alphaChannel, 16)); alphaChannel = _mm_sub_epi16(one, alphaChannel); const __m128i dstVector = _mm_load_si128((__m128i *)&dst[x]); __m128i destMultipliedByOneMinusAlpha; { __m128i pixelVectorAG = _mm_srli_epi16(dstVector, 8); __m128i pixelVectorRB = _mm_and_si128(dstVector, colorMask); pixelVectorAG = _mm_mullo_epi16(pixelVectorAG, alphaChannel); pixelVectorRB = _mm_mullo_epi16(pixelVectorRB, alphaChannel); pixelVectorRB = _mm_add_epi16(pixelVectorRB, _mm_srli_epi16(pixelVectorRB, 8)); pixelVectorRB = _mm_add_epi16(pixelVectorRB, half); pixelVectorAG = _mm_add_epi16(pixelVectorAG, _mm_srli_epi16(pixelVectorAG, 8)); pixelVectorAG = _mm_add_epi16(pixelVectorAG, half); pixelVectorRB = _mm_srli_epi16(pixelVectorRB, 8); pixelVectorAG = _mm_andnot_si128(colorMask, pixelVectorAG); destMultipliedByOneMinusAlpha = _mm_or_si128(pixelVectorAG, pixelVectorRB); }; const __m128i result = _mm_add_epi8(srcVector, destMultipliedByOneMinusAlpha); _mm_store_si128((__m128i *)&dst[x], result); } }; executed: }Execution Count:34682 executed: }Execution Count:607 evaluated: _mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, nullVector)) != 0xffff| yes Evaluation Count:607 | yes Evaluation Count:5346 |
evaluated: _mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, alphaMask)) == 0xffff| yes Evaluation Count:34682 | yes Evaluation Count:5953 |
| 607-34682 |
| 608 | } | - |
| 609 | | - |
| 610 | for (; x<w; x++) { evaluated: x<w| yes Evaluation Count:970 | yes Evaluation Count:1764 |
| 970-1764 |
| 611 | uint s = src[(basex + x*ix) >> 16]; | - |
| 612 | dst[x] = s + BYTE_MUL(dst[x], qAlpha(~s)); | - |
| 613 | } executed: }Execution Count:970 | 970 |
| 614 | dst = (quint32 *)(((uchar *) dst) + dbpl); | - |
| 615 | srcy += iy; | - |
| 616 | } executed: }Execution Count:1764 | 1764 |
| 617 | } executed: }Execution Count:909 | 909 |
| 618 | | - |
| 619 | | - |
| 620 | | - |
| 621 | | - |
| | |