| Line | Source | Count |
| 1 | | - |
| 2 | | - |
| 3 | | - |
| 4 | | - |
| 5 | | - |
| 6 | | - |
| 7 | | - |
| 8 | | - |
| 9 | | - |
| 10 | | - |
| 11 | | - |
| 12 | | - |
| 13 | | - |
| 14 | | - |
| 15 | | - |
| 16 | | - |
| 17 | | - |
| 18 | | - |
| 19 | | - |
| 20 | | - |
| 21 | | - |
| 22 | | - |
| 23 | | - |
| 24 | | - |
| 25 | | - |
| 26 | | - |
| 27 | | - |
| 28 | | - |
| 29 | | - |
| 30 | | - |
| 31 | | - |
| 32 | | - |
| 33 | | - |
| 34 | | - |
| 35 | | - |
| 36 | | - |
| 37 | | - |
| 38 | | - |
| 39 | | - |
| 40 | #include <private/qdrawhelper_x86_p.h> | - |
| 41 | | - |
| 42 | #ifdef QT_COMPILER_SUPPORTS_SSSE3 | - |
| 43 | | - |
| 44 | #include <private/qdrawingprimitive_sse2_p.h> | - |
| 45 | | - |
| 46 | QT_BEGIN_NAMESPACE | - |
| 47 | | - |
| 48 | inline static void blend_pixel(quint32 &dst, const quint32 src) | - |
| 49 | { | - |
| 50 | if (src >= 0xff000000) | - |
| 51 | dst = src; | - |
| 52 | else if (src != 0) | - |
| 53 | dst = src + BYTE_MUL(dst, qAlpha(~src)); | - |
| 54 | } | - |
| 55 | | - |
| 56 | | - |
| 57 | | - |
| 58 | | - |
| 59 | | - |
| 60 | #define BLENDING_LOOP(palignrOffset, length)\ | - |
| 61 | for (; x-minusOffsetToAlignSrcOn16Bytes < length-7; x += 4) { \ | - |
| 62 | const __m128i srcVectorLastLoaded = _mm_load_si128((const __m128i *)&src[x - minusOffsetToAlignSrcOn16Bytes + 4]);\ | - |
| 63 | const __m128i srcVector = _mm_alignr_epi8(srcVectorLastLoaded, srcVectorPrevLoaded, palignrOffset); \ | - |
| 64 | const __m128i srcVectorAlpha = _mm_and_si128(srcVector, alphaMask); \ | - |
| 65 | if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, alphaMask)) == 0xffff) { \ | - |
| 66 | _mm_store_si128((__m128i *)&dst[x], srcVector); \ | - |
| 67 | } else if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, nullVector)) != 0xffff) { \ | - |
| 68 | __m128i alphaChannel = _mm_shuffle_epi8(srcVector, alphaShuffleMask); \ | - |
| 69 | alphaChannel = _mm_sub_epi16(one, alphaChannel); \ | - |
| 70 | const __m128i dstVector = _mm_load_si128((__m128i *)&dst[x]); \ | - |
| 71 | __m128i destMultipliedByOneMinusAlpha; \ | - |
| 72 | BYTE_MUL_SSE2(destMultipliedByOneMinusAlpha, dstVector, alphaChannel, colorMask, half); \ | - |
| 73 | const __m128i result = _mm_add_epi8(srcVector, destMultipliedByOneMinusAlpha); \ | - |
| 74 | _mm_store_si128((__m128i *)&dst[x], result); \ | - |
| 75 | } \ | - |
| 76 | srcVectorPrevLoaded = srcVectorLastLoaded;\ | - |
| 77 | } | - |
| 78 | | - |
| 79 | | - |
| 80 | | - |
| 81 | | - |
| 82 | | - |
| 83 | | - |
| 84 | | - |
| 85 | | - |
| 86 | | - |
| 87 | | - |
| 88 | | - |
| 89 | | - |
| 90 | | - |
| 91 | #define BLEND_SOURCE_OVER_ARGB32_SSSE3(dst, src, length, nullVector, half, one, colorMask, alphaMask) { \ | - |
| 92 | int x = 0; \ | - |
| 93 | \ | - |
| 94 | /* First, get dst aligned. */ \ | - |
| 95 | ALIGNMENT_PROLOGUE_16BYTES(dst, x, length) { \ | - |
| 96 | blend_pixel(dst[x], src[x]); \ | - |
| 97 | } \ | - |
| 98 | \ | - |
| 99 | const int minusOffsetToAlignSrcOn16Bytes = (reinterpret_cast<quintptr>(&(src[x])) >> 2) & 0x3;\ | - |
| 100 | \ | - |
| 101 | if (!minusOffsetToAlignSrcOn16Bytes) {\ | - |
| 102 | /* src is aligned, usual algorithm but with aligned operations.\ | - |
| 103 | See the SSE2 version for more documentation on the algorithm itself. */\ | - |
| 104 | const __m128i alphaShuffleMask = _mm_set_epi8(char(0xff),15,char(0xff),15,char(0xff),11,char(0xff),11,char(0xff),7,char(0xff),7,char(0xff),3,char(0xff),3);\ | - |
| 105 | for (; x < length-3; x += 4) { \ | - |
| 106 | const __m128i srcVector = _mm_load_si128((const __m128i *)&src[x]); \ | - |
| 107 | const __m128i srcVectorAlpha = _mm_and_si128(srcVector, alphaMask); \ | - |
| 108 | if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, alphaMask)) == 0xffff) { \ | - |
| 109 | _mm_store_si128((__m128i *)&dst[x], srcVector); \ | - |
| 110 | } else if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, nullVector)) != 0xffff) { \ | - |
| 111 | __m128i alphaChannel = _mm_shuffle_epi8(srcVector, alphaShuffleMask); \ | - |
| 112 | alphaChannel = _mm_sub_epi16(one, alphaChannel); \ | - |
| 113 | const __m128i dstVector = _mm_load_si128((__m128i *)&dst[x]); \ | - |
| 114 | __m128i destMultipliedByOneMinusAlpha; \ | - |
| 115 | BYTE_MUL_SSE2(destMultipliedByOneMinusAlpha, dstVector, alphaChannel, colorMask, half); \ | - |
| 116 | const __m128i result = _mm_add_epi8(srcVector, destMultipliedByOneMinusAlpha); \ | - |
| 117 | _mm_store_si128((__m128i *)&dst[x], result); \ | - |
| 118 | } \ | - |
| 119 | } /* end for() */\ | - |
| 120 | } else if ((length - x) >= 8) {\ | - |
| 121 | /* We use two vectors to extract the src: prevLoaded for the first pixels, lastLoaded for the current pixels. */\ | - |
| 122 | __m128i srcVectorPrevLoaded = _mm_load_si128((const __m128i *)&src[x - minusOffsetToAlignSrcOn16Bytes]);\ | - |
| 123 | const int palignrOffset = minusOffsetToAlignSrcOn16Bytes << 2;\ | - |
| 124 | \ | - |
| 125 | const __m128i alphaShuffleMask = _mm_set_epi8(char(0xff),15,char(0xff),15,char(0xff),11,char(0xff),11,char(0xff),7,char(0xff),7,char(0xff),3,char(0xff),3);\ | - |
| 126 | switch (palignrOffset) {\ | - |
| 127 | case 4:\ | - |
| 128 | BLENDING_LOOP(4, length)\ | - |
| 129 | break;\ | - |
| 130 | case 8:\ | - |
| 131 | BLENDING_LOOP(8, length)\ | - |
| 132 | break;\ | - |
| 133 | case 12:\ | - |
| 134 | BLENDING_LOOP(12, length)\ | - |
| 135 | break;\ | - |
| 136 | }\ | - |
| 137 | }\ | - |
| 138 | for (; x < length; ++x) \ | - |
| 139 | blend_pixel(dst[x], src[x]); \ | - |
| 140 | } | - |
| 141 | | - |
| 142 | void qt_blend_argb32_on_argb32_ssse3(uchar *destPixels, int dbpl, | - |
| 143 | const uchar *srcPixels, int sbpl, | - |
| 144 | int w, int h, | - |
| 145 | int const_alpha) | - |
| 146 | { | - |
| 147 | const quint32 *src = (const quint32 *) srcPixels; | - |
| 148 | quint32 *dst = (quint32 *) destPixels; | - |
| 149 | if (const_alpha == 256) { | - |
| 150 | const __m128i alphaMask = _mm_set1_epi32(0xff000000); | - |
| 151 | const __m128i nullVector = _mm_setzero_si128(); | - |
| 152 | const __m128i half = _mm_set1_epi16(0x80); | - |
| 153 | const __m128i one = _mm_set1_epi16(0xff); | - |
| 154 | const __m128i colorMask = _mm_set1_epi32(0x00ff00ff); | - |
| 155 | | - |
| 156 | for (int y = 0; y < h; ++y) { | - |
| 157 | BLEND_SOURCE_OVER_ARGB32_SSSE3(dst, src, w, nullVector, half, one, colorMask, alphaMask); | - |
| 158 | dst = (quint32 *)(((uchar *) dst) + dbpl); | - |
| 159 | src = (const quint32 *)(((const uchar *) src) + sbpl); | - |
| 160 | } | - |
| 161 | } else if (const_alpha != 0) { | - |
| 162 | | - |
| 163 | | - |
| 164 | | - |
| 165 | const_alpha = (const_alpha * 255) >> 8; | - |
| 166 | const __m128i nullVector = _mm_setzero_si128(); | - |
| 167 | const __m128i half = _mm_set1_epi16(0x80); | - |
| 168 | const __m128i one = _mm_set1_epi16(0xff); | - |
| 169 | const __m128i colorMask = _mm_set1_epi32(0x00ff00ff); | - |
| 170 | const __m128i constAlphaVector = _mm_set1_epi16(const_alpha); | - |
| 171 | for (int y = 0; y < h; ++y) { | - |
| 172 | BLEND_SOURCE_OVER_ARGB32_WITH_CONST_ALPHA_SSE2(dst, src, w, nullVector, half, one, colorMask, constAlphaVector) | - |
| 173 | dst = (quint32 *)(((uchar *) dst) + dbpl); | - |
| 174 | src = (const quint32 *)(((const uchar *) src) + sbpl); | - |
| 175 | } | - |
| 176 | } | - |
| 177 | } | - |
| 178 | | - |
| 179 | static inline void store_uint24_ssse3(uchar *dst, const uint *src, int len) | - |
| 180 | { | - |
| 181 | int i = 0; | - |
| 182 | | - |
| 183 | quint24 *dst24 = reinterpret_cast<quint24*>(dst); | - |
| 184 | | - |
| 185 | for (; i < len && (reinterpret_cast<quintptr>(dst24) & 0xf); ++i)| TRUE | never evaluated | | FALSE | never evaluated |
| TRUE | never evaluated | | FALSE | never evaluated |
| 0 |
| 186 | *dst24++ = quint24(*src++); never executed: *dst24++ = quint24(*src++); | 0 |
| 187 | | - |
| 188 | | - |
| 189 | const __m128i shuffleMask1 = _mm_setr_epi8(char(0x80), char(0x80), char(0x80), char(0x80), 2, 1, 0, 6, 5, 4, 10, 9, 8, 14, 13, 12); | - |
| 190 | const __m128i shuffleMask2 = _mm_setr_epi8(2, 1, 0, 6, 5, 4, 10, 9, 8, 14, 13, 12, char(0x80), char(0x80), char(0x80), char(0x80)); | - |
| 191 | | - |
| 192 | const __m128i *inVectorPtr = (const __m128i *)src; | - |
| 193 | __m128i *dstVectorPtr = (__m128i *)dst24; | - |
| 194 | | - |
| 195 | for (; i < (len - 15); i += 16) {| TRUE | never evaluated | | FALSE | never evaluated |
| 0 |
| 196 | | - |
| 197 | | - |
| 198 | __m128i srcVector1 = _mm_loadu_si128(inVectorPtr); | - |
| 199 | ++inVectorPtr; | - |
| 200 | __m128i srcVector2 = _mm_loadu_si128(inVectorPtr); | - |
| 201 | ++inVectorPtr; | - |
| 202 | __m128i outputVector1 = _mm_shuffle_epi8(srcVector1, shuffleMask1); | - |
| 203 | __m128i outputVector2 = _mm_shuffle_epi8(srcVector2, shuffleMask2); | - |
| 204 | __m128i outputVector = _mm_alignr_epi8(outputVector2, outputVector1, 4); | - |
| 205 | _mm_store_si128(dstVectorPtr, outputVector); | - |
| 206 | ++dstVectorPtr; | - |
| 207 | | - |
| 208 | srcVector1 = _mm_loadu_si128(inVectorPtr); | - |
| 209 | ++inVectorPtr; | - |
| 210 | outputVector1 = _mm_shuffle_epi8(srcVector2, shuffleMask1); | - |
| 211 | outputVector2 = _mm_shuffle_epi8(srcVector1, shuffleMask2); | - |
| 212 | outputVector = _mm_alignr_epi8(outputVector2, outputVector1, 8); | - |
| 213 | _mm_store_si128(dstVectorPtr, outputVector); | - |
| 214 | ++dstVectorPtr; | - |
| 215 | | - |
| 216 | srcVector2 = _mm_loadu_si128(inVectorPtr); | - |
| 217 | ++inVectorPtr; | - |
| 218 | outputVector1 = _mm_shuffle_epi8(srcVector1, shuffleMask1); | - |
| 219 | outputVector2 = _mm_shuffle_epi8(srcVector2, shuffleMask2); | - |
| 220 | outputVector = _mm_alignr_epi8(outputVector2, outputVector1, 12); | - |
| 221 | _mm_store_si128(dstVectorPtr, outputVector); | - |
| 222 | ++dstVectorPtr; | - |
| 223 | } never executed: end of block | 0 |
| 224 | dst24 = reinterpret_cast<quint24*>(dstVectorPtr); | - |
| 225 | src = reinterpret_cast<const uint*>(inVectorPtr); | - |
| 226 | | - |
| 227 | for (; i < len; ++i)| TRUE | never evaluated | | FALSE | never evaluated |
| 0 |
| 228 | *dst24++ = quint24(*src++); never executed: *dst24++ = quint24(*src++); | 0 |
| 229 | } never executed: end of block | 0 |
| 230 | | - |
| 231 | void QT_FASTCALL storePixelsBPP24_ssse3(uchar *dest, const uint *src, int index, int count) | - |
| 232 | { | - |
| 233 | store_uint24_ssse3(dest + index * 3, src, count); | - |
| 234 | } never executed: end of block | 0 |
| 235 | | - |
| 236 | extern void QT_FASTCALL qt_convert_rgb888_to_rgb32_ssse3(quint32 *dst, const uchar *src, int len); | - |
| 237 | | - |
| 238 | const uint * QT_FASTCALL qt_fetchUntransformed_888_ssse3(uint *buffer, const Operator *, const QSpanData *data, | - |
| 239 | int y, int x, int length) | - |
| 240 | { | - |
| 241 | const uchar *line = data->texture.scanLine(y) + x * 3; | - |
| 242 | qt_convert_rgb888_to_rgb32_ssse3(buffer, line, length); | - |
| 243 | return buffer; never executed: return buffer; | 0 |
| 244 | } | - |
| 245 | | - |
| 246 | QT_END_NAMESPACE | - |
| 247 | | - |
| 248 | #endif // QT_COMPILER_SUPPORTS_SSSE3 | - |
| | |