qdrawhelper_ssse3.cpp

Absolute File Name:

/home/qt/qt5_coco/qt5/qtbase/src/gui/painting/qdrawhelper_ssse3.cpp

Source code

Switch to Preprocessed file

Line Source Count

1 /**************************************************************************** -

2 ** -

4 ** Contact: https://www.qt.io/licensing/ -

5 ** -

6 ** This file is part of the QtGui module of the Qt Toolkit. -

7 ** -

8 ** $QT_BEGIN_LICENSE:LGPL$ -

9 ** Commercial License Usage -

10 ** Licensees holding valid commercial Qt licenses may use this file in -

11 ** accordance with the commercial license agreement provided with the -

12 ** Software or, alternatively, in accordance with the terms contained in -

13 ** a written agreement between you and The Qt Company. For licensing terms -

14 ** and conditions see https://www.qt.io/terms-conditions. For further -

15 ** information use the contact form at https://www.qt.io/contact-us. -

16 ** -

17 ** GNU Lesser General Public License Usage -

18 ** Alternatively, this file may be used under the terms of the GNU Lesser -

19 ** General Public License version 3 as published by the Free Software -

20 ** Foundation and appearing in the file LICENSE.LGPL3 included in the -

21 ** packaging of this file. Please review the following information to -

22 ** ensure the GNU Lesser General Public License version 3 requirements -

23 ** will be met: https://www.gnu.org/licenses/lgpl-3.0.html. -

24 ** -

25 ** GNU General Public License Usage -

26 ** Alternatively, this file may be used under the terms of the GNU -

27 ** General Public License version 2.0 or (at your option) the GNU General -

28 ** Public license version 3 or any later version approved by the KDE Free -

29 ** Qt Foundation. The licenses are as published by the Free Software -

30 ** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3 -

31 ** included in the packaging of this file. Please review the following -

32 ** information to ensure the GNU General Public License requirements will -

33 ** be met: https://www.gnu.org/licenses/gpl-2.0.html and -

34 ** https://www.gnu.org/licenses/gpl-3.0.html. -

35 ** -

36 ** $QT_END_LICENSE$ -

37 ** -

38 ****************************************************************************/ -

39 -

40 #include <private/qdrawhelper_x86_p.h> -

41 -

42 #ifdef QT_COMPILER_SUPPORTS_SSSE3 -

43 -

44 #include <private/qdrawingprimitive_sse2_p.h> -

45 -

46 QT_BEGIN_NAMESPACE -

47 -

48 inline static void blend_pixel(quint32 &dst, const quint32 src) -

49 { -

50 if (src >= 0xff000000) -

51 dst = src; -

52 else if (src != 0) -

53 dst = src + BYTE_MUL(dst, qAlpha(~src)); -

54 } -

55 -

56 -

57 /* The instruction palignr uses direct arguments, so we have to generate the code fo the different -

58 shift (4, 8, 12). Checking the alignment inside the loop is unfortunatelly way too slow. -

59 */ -

60 #define BLENDING_LOOP(palignrOffset, length)\ -

61 for (; x-minusOffsetToAlignSrcOn16Bytes < length-7; x += 4) { \ -

62 const __m128i srcVectorLastLoaded = _mm_load_si128((const __m128i *)&src[x - minusOffsetToAlignSrcOn16Bytes + 4]);\ -

63 const __m128i srcVector = _mm_alignr_epi8(srcVectorLastLoaded, srcVectorPrevLoaded, palignrOffset); \ -

64 const __m128i srcVectorAlpha = _mm_and_si128(srcVector, alphaMask); \ -

65 if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, alphaMask)) == 0xffff) { \ -

66 _mm_store_si128((__m128i *)&dst[x], srcVector); \ -

67 } else if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, nullVector)) != 0xffff) { \ -

68 __m128i alphaChannel = _mm_shuffle_epi8(srcVector, alphaShuffleMask); \ -

69 alphaChannel = _mm_sub_epi16(one, alphaChannel); \ -

70 const __m128i dstVector = _mm_load_si128((__m128i *)&dst[x]); \ -

71 __m128i destMultipliedByOneMinusAlpha; \ -

72 BYTE_MUL_SSE2(destMultipliedByOneMinusAlpha, dstVector, alphaChannel, colorMask, half); \ -

73 const __m128i result = _mm_add_epi8(srcVector, destMultipliedByOneMinusAlpha); \ -

74 _mm_store_si128((__m128i *)&dst[x], result); \ -

75 } \ -

76 srcVectorPrevLoaded = srcVectorLastLoaded;\ -

77 } -

78 -

79 -

80 // Basically blend src over dst with the const alpha defined as constAlphaVector. -

81 // nullVector, half, one, colorMask are constant across the whole image/texture, and should be defined as: -

82 //const __m128i nullVector = _mm_set1_epi32(0); -

83 //const __m128i half = _mm_set1_epi16(0x80); -

84 //const __m128i one = _mm_set1_epi16(0xff); -

85 //const __m128i colorMask = _mm_set1_epi32(0x00ff00ff); -

86 //const __m128i alphaMask = _mm_set1_epi32(0xff000000); -

87 // -

88 // The computation being done is: -

89 // result = s + d * (1-alpha) -

90 // with shortcuts if fully opaque or fully transparent. -

91 #define BLEND_SOURCE_OVER_ARGB32_SSSE3(dst, src, length, nullVector, half, one, colorMask, alphaMask) { \ -

92 int x = 0; \ -

93 \ -

94 /* First, get dst aligned. */ \ -

95 ALIGNMENT_PROLOGUE_16BYTES(dst, x, length) { \ -

96 blend_pixel(dst[x], src[x]); \ -

97 } \ -

98 \ -

99 const int minusOffsetToAlignSrcOn16Bytes = (reinterpret_cast<quintptr>(&(src[x])) >> 2) & 0x3;\ -

100 \ -

101 if (!minusOffsetToAlignSrcOn16Bytes) {\ -

102 /* src is aligned, usual algorithm but with aligned operations.\ -

103 See the SSE2 version for more documentation on the algorithm itself. */\ -

104 const __m128i alphaShuffleMask = _mm_set_epi8(char(0xff),15,char(0xff),15,char(0xff),11,char(0xff),11,char(0xff),7,char(0xff),7,char(0xff),3,char(0xff),3);\ -

105 for (; x < length-3; x += 4) { \ -

106 const __m128i srcVector = _mm_load_si128((const __m128i *)&src[x]); \ -

107 const __m128i srcVectorAlpha = _mm_and_si128(srcVector, alphaMask); \ -

108 if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, alphaMask)) == 0xffff) { \ -

109 _mm_store_si128((__m128i *)&dst[x], srcVector); \ -

110 } else if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, nullVector)) != 0xffff) { \ -

111 __m128i alphaChannel = _mm_shuffle_epi8(srcVector, alphaShuffleMask); \ -

112 alphaChannel = _mm_sub_epi16(one, alphaChannel); \ -

113 const __m128i dstVector = _mm_load_si128((__m128i *)&dst[x]); \ -

114 __m128i destMultipliedByOneMinusAlpha; \ -

115 BYTE_MUL_SSE2(destMultipliedByOneMinusAlpha, dstVector, alphaChannel, colorMask, half); \ -

116 const __m128i result = _mm_add_epi8(srcVector, destMultipliedByOneMinusAlpha); \ -

117 _mm_store_si128((__m128i *)&dst[x], result); \ -

118 } \ -

119 } /* end for() */\ -

120 } else if ((length - x) >= 8) {\ -

121 /* We use two vectors to extract the src: prevLoaded for the first pixels, lastLoaded for the current pixels. */\ -

122 __m128i srcVectorPrevLoaded = _mm_load_si128((const __m128i *)&src[x - minusOffsetToAlignSrcOn16Bytes]);\ -

123 const int palignrOffset = minusOffsetToAlignSrcOn16Bytes << 2;\ -

124 \ -

125 const __m128i alphaShuffleMask = _mm_set_epi8(char(0xff),15,char(0xff),15,char(0xff),11,char(0xff),11,char(0xff),7,char(0xff),7,char(0xff),3,char(0xff),3);\ -

126 switch (palignrOffset) {\ -

127 case 4:\ -

128 BLENDING_LOOP(4, length)\ -

129 break;\ -

130 case 8:\ -

131 BLENDING_LOOP(8, length)\ -

132 break;\ -

133 case 12:\ -

134 BLENDING_LOOP(12, length)\ -

135 break;\ -

136 }\ -

137 }\ -

138 for (; x < length; ++x) \ -

139 blend_pixel(dst[x], src[x]); \ -

140 } -

141 -

142 void qt_blend_argb32_on_argb32_ssse3(uchar *destPixels, int dbpl, -

143 const uchar *srcPixels, int sbpl, -

144 int w, int h, -

145 int const_alpha) -

146 { -

147 const quint32 *src = (const quint32 *) srcPixels; -

148 quint32 *dst = (quint32 *) destPixels; -

149 if (const_alpha == 256) { -

150 const __m128i alphaMask = _mm_set1_epi32(0xff000000); -

151 const __m128i nullVector = _mm_setzero_si128(); -

152 const __m128i half = _mm_set1_epi16(0x80); -

153 const __m128i one = _mm_set1_epi16(0xff); -

154 const __m128i colorMask = _mm_set1_epi32(0x00ff00ff); -

155 -

156 for (int y = 0; y < h; ++y) { -

157 BLEND_SOURCE_OVER_ARGB32_SSSE3(dst, src, w, nullVector, half, one, colorMask, alphaMask); -

158 dst = (quint32 *)(((uchar *) dst) + dbpl); -

159 src = (const quint32 *)(((const uchar *) src) + sbpl); -

160 } -

161 } else if (const_alpha != 0) { -

162 // dest = (s + d * sia) * ca + d * cia -

163 // = s * ca + d * (sia * ca + cia) -

164 // = s * ca + d * (1 - sa*ca) -

165 const_alpha = (const_alpha * 255) >> 8; -

166 const __m128i nullVector = _mm_setzero_si128(); -

167 const __m128i half = _mm_set1_epi16(0x80); -

168 const __m128i one = _mm_set1_epi16(0xff); -

169 const __m128i colorMask = _mm_set1_epi32(0x00ff00ff); -

170 const __m128i constAlphaVector = _mm_set1_epi16(const_alpha); -

171 for (int y = 0; y < h; ++y) { -

172 BLEND_SOURCE_OVER_ARGB32_WITH_CONST_ALPHA_SSE2(dst, src, w, nullVector, half, one, colorMask, constAlphaVector) -

173 dst = (quint32 *)(((uchar *) dst) + dbpl); -

174 src = (const quint32 *)(((const uchar *) src) + sbpl); -

175 } -

176 } -

177 } -

178 -

179 static inline void store_uint24_ssse3(uchar *dst, const uint *src, int len) -

180 { -

181 int i = 0; -

182 -

183 quint24 *dst24 = reinterpret_cast<quint24*>(dst); -

184 // Align dst on 16 bytes -

185

i < len	Description
TRUE	never evaluated
FALSE	never evaluated

(reinterpret_c...(dst24) & 0xf)	Description
TRUE	never evaluated
FALSE	never evaluated

for (; i < len && (reinterpret_cast<quintptr>(dst24) & 0xf); ++i)

i < len	Description
TRUE	never evaluated
FALSE	never evaluated

(reinterpret_c...(dst24) & 0xf)	Description
TRUE	never evaluated
FALSE	never evaluated

186

*dst24++ = quint24(*src++);

never executed: *dst24++ = quint24(*src++);

187 -

188 // Shuffle masks for first and second half of every output, all outputs are aligned so the shuffled ends are not used. -

189 const __m128i shuffleMask1 = _mm_setr_epi8(char(0x80), char(0x80), char(0x80), char(0x80), 2, 1, 0, 6, 5, 4, 10, 9, 8, 14, 13, 12); -

190 const __m128i shuffleMask2 = _mm_setr_epi8(2, 1, 0, 6, 5, 4, 10, 9, 8, 14, 13, 12, char(0x80), char(0x80), char(0x80), char(0x80)); -

191 -

192 const __m128i *inVectorPtr = (const __m128i *)src; -

193 __m128i *dstVectorPtr = (__m128i *)dst24; -

194 -

195

for (; i < (len - 15); i += 16) {

i < (len - 15)	Description
TRUE	never evaluated
FALSE	never evaluated

196 // Load four vectors, store three. -

197 // Create each output vector by combining two shuffled input vectors. -

198 __m128i srcVector1 = _mm_loadu_si128(inVectorPtr); -

199 ++inVectorPtr; -

200 __m128i srcVector2 = _mm_loadu_si128(inVectorPtr); -

201 ++inVectorPtr; -

202 __m128i outputVector1 = _mm_shuffle_epi8(srcVector1, shuffleMask1); -

203 __m128i outputVector2 = _mm_shuffle_epi8(srcVector2, shuffleMask2); -

204 __m128i outputVector = _mm_alignr_epi8(outputVector2, outputVector1, 4); -

205 _mm_store_si128(dstVectorPtr, outputVector); -

206 ++dstVectorPtr; -

207 -

208 srcVector1 = _mm_loadu_si128(inVectorPtr); -

209 ++inVectorPtr; -

210 outputVector1 = _mm_shuffle_epi8(srcVector2, shuffleMask1); -

211 outputVector2 = _mm_shuffle_epi8(srcVector1, shuffleMask2); -

212 outputVector = _mm_alignr_epi8(outputVector2, outputVector1, 8); -

213 _mm_store_si128(dstVectorPtr, outputVector); -

214 ++dstVectorPtr; -

215 -

216 srcVector2 = _mm_loadu_si128(inVectorPtr); -

217 ++inVectorPtr; -

218 outputVector1 = _mm_shuffle_epi8(srcVector1, shuffleMask1); -

219 outputVector2 = _mm_shuffle_epi8(srcVector2, shuffleMask2); -

220 outputVector = _mm_alignr_epi8(outputVector2, outputVector1, 12); -

221 _mm_store_si128(dstVectorPtr, outputVector); -

222 ++dstVectorPtr; -

223

}

never executed: end of block

224 dst24 = reinterpret_cast<quint24*>(dstVectorPtr); -

225 src = reinterpret_cast<const uint*>(inVectorPtr); -

226 -

227

for (; i < len; ++i)

i < len	Description
TRUE	never evaluated
FALSE	never evaluated

228

*dst24++ = quint24(*src++);

never executed: *dst24++ = quint24(*src++);

229

}

never executed: end of block

230 -

231 void QT_FASTCALL storePixelsBPP24_ssse3(uchar *dest, const uint *src, int index, int count) -

232 { -

233 store_uint24_ssse3(dest + index * 3, src, count); -

234

}

never executed: end of block

235 -

236 extern void QT_FASTCALL qt_convert_rgb888_to_rgb32_ssse3(quint32 *dst, const uchar *src, int len); -

237 -

238 const uint * QT_FASTCALL qt_fetchUntransformed_888_ssse3(uint *buffer, const Operator *, const QSpanData *data, -

239 int y, int x, int length) -

240 { -

241 const uchar *line = data->texture.scanLine(y) + x * 3; -

242 qt_convert_rgb888_to_rgb32_ssse3(buffer, line, length); -

243

return buffer;

never executed: return buffer;

244 } -

245 -

246 QT_END_NAMESPACE -

247 -

248 #endif // QT_COMPILER_SUPPORTS_SSSE3 -

Source code

Switch to Preprocessed file

Generated by Squish Coco Non-Commercial 4.3.0-BETA-master-30-08-2018-4cb69e9