/home/roberto/qt5_coco/qtsdk/qtbase/src/gui/painting/qdrawhelper

painting/qdrawhelper_ssse3.cpp

Source code

Switch to Preprocessed file

Line Source Code Coverage

1 /**************************************************************************** -

2 ** -

4 ** Contact: http://www.qt-project.org/legal -

5 ** -

6 ** This file is part of the QtGui module of the Qt Toolkit. -

7 ** -

8 ** $QT_BEGIN_LICENSE:LGPL$ -

9 ** Commercial License Usage -

10 ** Licensees holding valid commercial Qt licenses may use this file in -

11 ** accordance with the commercial license agreement provided with the -

12 ** Software or, alternatively, in accordance with the terms contained in -

13 ** a written agreement between you and Digia. For licensing terms and -

14 ** conditions see http://qt.digia.com/licensing. For further information -

15 ** use the contact form at http://qt.digia.com/contact-us. -

16 ** -

17 ** GNU Lesser General Public License Usage -

18 ** Alternatively, this file may be used under the terms of the GNU Lesser -

19 ** General Public License version 2.1 as published by the Free Software -

20 ** Foundation and appearing in the file LICENSE.LGPL included in the -

21 ** packaging of this file. Please review the following information to -

22 ** ensure the GNU Lesser General Public License version 2.1 requirements -

23 ** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html. -

24 ** -

25 ** In addition, as a special exception, Digia gives you certain additional -

26 ** rights. These rights are described in the Digia Qt LGPL Exception -

27 ** version 1.1, included in the file LGPL_EXCEPTION.txt in this package. -

28 ** -

29 ** GNU General Public License Usage -

30 ** Alternatively, this file may be used under the terms of the GNU -

31 ** General Public License version 3.0 as published by the Free Software -

32 ** Foundation and appearing in the file LICENSE.GPL included in the -

33 ** packaging of this file. Please review the following information to -

34 ** ensure the GNU General Public License version 3.0 requirements will be -

35 ** met: http://www.gnu.org/copyleft/gpl.html. -

36 ** -

37 ** -

38 ** $QT_END_LICENSE$ -

39 ** -

40 ****************************************************************************/ -

41 -

42 #include <private/qdrawhelper_x86_p.h> -

43 -

44 #ifdef QT_COMPILER_SUPPORTS_SSSE3 -

45 -

46 #include <private/qdrawingprimitive_sse2_p.h> -

47 -

48 QT_BEGIN_NAMESPACE -

49 -

50 inline static void blend_pixel(quint32 &dst, const quint32 src) -

51 { -

if (src >= 0xff000000)

evaluated: src >= 0xff000000

TRUE	FALSE
yes Evaluation Count:1646	yes Evaluation Count:2877

1646-2877

dst = src;

executed: dst = src;

Execution Count:1646

1646

else if (src != 0)

evaluated: src != 0

TRUE	FALSE
yes Evaluation Count:605	yes Evaluation Count:2272

605-2272

dst = src + BYTE_MUL(dst, qAlpha(~src));

executed: dst = src + BYTE_MUL(dst, qAlpha(~src));

Execution Count:605

605

56 } -

57 -

58 -

59 /* The instruction palignr uses direct arguments, so we have to generate the code fo the different -

60 shift (4, 8, 12). Checking the alignment inside the loop is unfortunatelly way too slow. -

61 */ -

62 #define BLENDING_LOOP(palignrOffset, length)\ -

63 for (; x-minusOffsetToAlignSrcOn16Bytes < length-7; x += 4) { \ -

64 const __m128i srcVectorLastLoaded = _mm_load_si128((__m128i *)&src[x - minusOffsetToAlignSrcOn16Bytes + 4]);\ -

65 const __m128i srcVector = _mm_alignr_epi8(srcVectorLastLoaded, srcVectorPrevLoaded, palignrOffset); \ -

66 const __m128i srcVectorAlpha = _mm_and_si128(srcVector, alphaMask); \ -

67 if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, alphaMask)) == 0xffff) { \ -

68 _mm_store_si128((__m128i *)&dst[x], srcVector); \ -

69 } else if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, nullVector)) != 0xffff) { \ -

70 __m128i alphaChannel = _mm_shuffle_epi8(srcVector, alphaShuffleMask); \ -

71 alphaChannel = _mm_sub_epi16(one, alphaChannel); \ -

72 const __m128i dstVector = _mm_load_si128((__m128i *)&dst[x]); \ -

73 __m128i destMultipliedByOneMinusAlpha; \ -

74 BYTE_MUL_SSE2(destMultipliedByOneMinusAlpha, dstVector, alphaChannel, colorMask, half); \ -

75 const __m128i result = _mm_add_epi8(srcVector, destMultipliedByOneMinusAlpha); \ -

76 _mm_store_si128((__m128i *)&dst[x], result); \ -

77 } \ -

78 srcVectorPrevLoaded = srcVectorLastLoaded;\ -

79 } -

80 -

81 -

82 // Basically blend src over dst with the const alpha defined as constAlphaVector. -

83 // nullVector, half, one, colorMask are constant across the whole image/texture, and should be defined as: -

84 //const __m128i nullVector = _mm_set1_epi32(0); -

85 //const __m128i half = _mm_set1_epi16(0x80); -

86 //const __m128i one = _mm_set1_epi16(0xff); -

87 //const __m128i colorMask = _mm_set1_epi32(0x00ff00ff); -

88 //const __m128i alphaMask = _mm_set1_epi32(0xff000000); -

89 // -

90 // The computation being done is: -

91 // result = s + d * (1-alpha) -

92 // with shortcuts if fully opaque or fully transparent. -

93 #define BLEND_SOURCE_OVER_ARGB32_SSSE3(dst, src, length, nullVector, half, one, colorMask, alphaMask) { \ -

94 int x = 0; \ -

95 \ -

96 /* First, get dst aligned. */ \ -

97 ALIGNMENT_PROLOGUE_16BYTES(dst, x, length) { \ -

98 blend_pixel(dst[x], src[x]); \ -

99 } \ -

100 \ -

101 const int minusOffsetToAlignSrcOn16Bytes = (reinterpret_cast<quintptr>(&(src[x])) >> 2) & 0x3;\ -

102 \ -

103 if (!minusOffsetToAlignSrcOn16Bytes) {\ -

104 /* src is aligned, usual algorithm but with aligned operations.\ -

105 See the SSE2 version for more documentation on the algorithm itself. */\ -

106 const __m128i alphaShuffleMask = _mm_set_epi8(0xff,15,0xff,15,0xff,11,0xff,11,0xff,7,0xff,7,0xff,3,0xff,3);\ -

107 for (; x < length-3; x += 4) { \ -

108 const __m128i srcVector = _mm_load_si128((__m128i *)&src[x]); \ -

109 const __m128i srcVectorAlpha = _mm_and_si128(srcVector, alphaMask); \ -

110 if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, alphaMask)) == 0xffff) { \ -

111 _mm_store_si128((__m128i *)&dst[x], srcVector); \ -

112 } else if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, nullVector)) != 0xffff) { \ -

113 __m128i alphaChannel = _mm_shuffle_epi8(srcVector, alphaShuffleMask); \ -

114 alphaChannel = _mm_sub_epi16(one, alphaChannel); \ -

115 const __m128i dstVector = _mm_load_si128((__m128i *)&dst[x]); \ -

116 __m128i destMultipliedByOneMinusAlpha; \ -

117 BYTE_MUL_SSE2(destMultipliedByOneMinusAlpha, dstVector, alphaChannel, colorMask, half); \ -

118 const __m128i result = _mm_add_epi8(srcVector, destMultipliedByOneMinusAlpha); \ -

119 _mm_store_si128((__m128i *)&dst[x], result); \ -

120 } \ -

121 } /* end for() */\ -

122 } else if ((length - x) >= 8) {\ -

123 /* We use two vectors to extract the src: prevLoaded for the first pixels, lastLoaded for the current pixels. */\ -

124 __m128i srcVectorPrevLoaded = _mm_load_si128((__m128i *)&src[x - minusOffsetToAlignSrcOn16Bytes]);\ -

125 const int palignrOffset = minusOffsetToAlignSrcOn16Bytes << 2;\ -

126 \ -

127 const __m128i alphaShuffleMask = _mm_set_epi8(0xff,15,0xff,15,0xff,11,0xff,11,0xff,7,0xff,7,0xff,3,0xff,3);\ -

128 switch (palignrOffset) {\ -

129 case 4:\ -

130 BLENDING_LOOP(4, length)\ -

131 break;\ -

132 case 8:\ -

133 BLENDING_LOOP(8, length)\ -

134 break;\ -

135 case 12:\ -

136 BLENDING_LOOP(12, length)\ -

137 break;\ -

138 }\ -

139 }\ -

140 for (; x < length; ++x) \ -

141 blend_pixel(dst[x], src[x]); \ -

142 } -

143 -

144 void qt_blend_argb32_on_argb32_ssse3(uchar *destPixels, int dbpl, -

145 const uchar *srcPixels, int sbpl, -

146 int w, int h, -

147 int const_alpha) -

148 { -

149

const quint32 *src = (const quint32 *) srcPixels;

executed (the execution status of this line is deduced): const quint32 *src = (const quint32 *) srcPixels;

150

quint32 *dst = (quint32 *) destPixels;

executed (the execution status of this line is deduced): quint32 *dst = (quint32 *) destPixels;

151

if (const_alpha == 256) {

evaluated: const_alpha == 256

TRUE	FALSE
yes Evaluation Count:44	yes Evaluation Count:7

7-44

152

const __m128i alphaMask = _mm_set1_epi32(0xff000000);

executed (the execution status of this line is deduced): const __m128i alphaMask = _mm_set1_epi32(0xff000000);

153

const __m128i nullVector = _mm_setzero_si128();

executed (the execution status of this line is deduced): const __m128i nullVector = _mm_setzero_si128();

154

const __m128i half = _mm_set1_epi16(0x80);

executed (the execution status of this line is deduced): const __m128i half = _mm_set1_epi16(0x80);

155

const __m128i one = _mm_set1_epi16(0xff);

executed (the execution status of this line is deduced): const __m128i one = _mm_set1_epi16(0xff);

156

const __m128i colorMask = _mm_set1_epi32(0x00ff00ff);

executed (the execution status of this line is deduced): const __m128i colorMask = _mm_set1_epi32(0x00ff00ff);

157 -

158

for (int y = 0; y < h; ++y) {

evaluated: y < h

TRUE	FALSE
yes Evaluation Count:4052	yes Evaluation Count:44

44-4052

159

BLEND_SOURCE_OVER_ARGB32_SSSE3(dst, src, w, nullVector, half, one, colorMask, alphaMask);

executed: }

Execution Count:1481

executed: }

Execution Count:81444

executed: }

Execution Count:802

executed: }

Execution Count:3242

executed: }

Execution Count:874

executed: }

Execution Count:1479

executed: }

Execution Count:6444

executed: break;

Execution Count:534

executed: }

Execution Count:304

executed: }

Execution Count:371

executed: }

Execution Count:860

executed: break;

Execution Count:166

executed: }

Execution Count:144

executed: }

Execution Count:323

executed: }

Execution Count:588

executed: break;

Execution Count:110

executed: }

Execution Count:810

executed: blend_pixel(dst[x], src[x]);

Execution Count:3042

evaluated: _mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, nullVector)) != 0xffff

TRUE	FALSE
yes Evaluation Count:802	yes Evaluation Count:70109

partially evaluated: (w - x) >= 8

TRUE	FALSE
yes Evaluation Count:810	no Evaluation Count:0

evaluated: _mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, nullVector)) != 0xffff

TRUE	FALSE
yes Evaluation Count:1479	yes Evaluation Count:4091

evaluated: _mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, nullVector)) != 0xffff

TRUE	FALSE
yes Evaluation Count:371	yes Evaluation Count:185

evaluated: _mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, nullVector)) != 0xffff

TRUE	FALSE
yes Evaluation Count:323	yes Evaluation Count:121

evaluated: !minusOffsetToAlignSrcOn16Bytes

TRUE	FALSE
yes Evaluation Count:3242	yes Evaluation Count:810

evaluated: _mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, alphaMask)) == 0xffff

TRUE	FALSE
yes Evaluation Count:81444	yes Evaluation Count:70911

evaluated: _mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, alphaMask)) == 0xffff

TRUE	FALSE
yes Evaluation Count:874	yes Evaluation Count:5570

evaluated: _mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, alphaMask)) == 0xffff

TRUE	FALSE
yes Evaluation Count:304	yes Evaluation Count:556

evaluated: _mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, alphaMask)) == 0xffff

TRUE	FALSE
yes Evaluation Count:144	yes Evaluation Count:444

evaluated: x < static_cast<int>(qMin(static_cast<quintptr>(w), ((4 - ((reinterpret_cast<quintptr>(dst) >> 2) & 0x3)) & 0x3)))

TRUE	FALSE
yes Evaluation Count:1481	yes Evaluation Count:4052

evaluated: x < w-3

TRUE	FALSE
yes Evaluation Count:152355	yes Evaluation Count:3242

evaluated: x-minusOffsetToAlignSrcOn16Bytes < w-7

TRUE	FALSE
yes Evaluation Count:6444	yes Evaluation Count:534

evaluated: x-minusOffsetToAlignSrcOn16Bytes < w-7

TRUE	FALSE
yes Evaluation Count:860	yes Evaluation Count:166

evaluated: x-minusOffsetToAlignSrcOn16Bytes < w-7

TRUE	FALSE
yes Evaluation Count:588	yes Evaluation Count:110

evaluated: x < w

TRUE	FALSE
yes Evaluation Count:3042	yes Evaluation Count:4052

0-152355

160

dst = (quint32 *)(((uchar *) dst) + dbpl);

executed (the execution status of this line is deduced): dst = (quint32 *)(((uchar *) dst) + dbpl);

161

src = (const quint32 *)(((const uchar *) src) + sbpl);

executed (the execution status of this line is deduced): src = (const quint32 *)(((const uchar *) src) + sbpl);

162

}

executed: }

Execution Count:4052

4052

163

} else if (const_alpha != 0) {

executed: }

Execution Count:44

partially evaluated: const_alpha != 0

TRUE	FALSE
yes Evaluation Count:7	no Evaluation Count:0

0-44

164 // dest = (s + d * sia) * ca + d * cia -

165 // = s * ca + d * (sia * ca + cia) -

166 // = s * ca + d * (1 - sa*ca) -

167

const_alpha = (const_alpha * 255) >> 8;

executed (the execution status of this line is deduced): const_alpha = (const_alpha * 255) >> 8;

168

const __m128i nullVector = _mm_setzero_si128();

executed (the execution status of this line is deduced): const __m128i nullVector = _mm_setzero_si128();

169

const __m128i half = _mm_set1_epi16(0x80);

executed (the execution status of this line is deduced): const __m128i half = _mm_set1_epi16(0x80);

170

const __m128i one = _mm_set1_epi16(0xff);

executed (the execution status of this line is deduced): const __m128i one = _mm_set1_epi16(0xff);

171

const __m128i colorMask = _mm_set1_epi32(0x00ff00ff);

executed (the execution status of this line is deduced): const __m128i colorMask = _mm_set1_epi32(0x00ff00ff);

172

const __m128i constAlphaVector = _mm_set1_epi16(const_alpha);

executed (the execution status of this line is deduced): const __m128i constAlphaVector = _mm_set1_epi16(const_alpha);

173

for (int y = 0; y < h; ++y) {

evaluated: y < h

TRUE	FALSE
yes Evaluation Count:780	yes Evaluation Count:7

7-780

174

BLEND_SOURCE_OVER_ARGB32_WITH_CONST_ALPHA_SSE2(dst, src, w, nullVector, half, one, colorMask, constAlphaVector)

executed: }

Execution Count:2304

executed: }

Execution Count:2304

executed: }

Execution Count:23076

executed: }

Execution Count:23076

executed: }

Execution Count:2304

executed: }

Execution Count:2304

partially evaluated: s != 0

TRUE	FALSE
yes Evaluation Count:2304	no Evaluation Count:0

partially evaluated: _mm_movemask_epi8(_mm_cmpeq_epi32(srcVector, nullVector)) != 0xffff

TRUE	FALSE
yes Evaluation Count:23076	no Evaluation Count:0

partially evaluated: s != 0

TRUE	FALSE
yes Evaluation Count:2304	no Evaluation Count:0

evaluated: x < static_cast<int>(qMin(static_cast<quintptr>(w), ((4 - ((reinterpret_cast<quintptr>(dst) >> 2) & 0x3)) & 0x3)))

TRUE	FALSE
yes Evaluation Count:2304	yes Evaluation Count:780

evaluated: x < w-3

TRUE	FALSE
yes Evaluation Count:23076	yes Evaluation Count:780

evaluated: x < w

TRUE	FALSE
yes Evaluation Count:2304	yes Evaluation Count:780

0-23076

175

dst = (quint32 *)(((uchar *) dst) + dbpl);

executed (the execution status of this line is deduced): dst = (quint32 *)(((uchar *) dst) + dbpl);

176

src = (const quint32 *)(((const uchar *) src) + sbpl);

executed (the execution status of this line is deduced): src = (const quint32 *)(((const uchar *) src) + sbpl);

177

}

executed: }

Execution Count:780

780

178

}

executed: }

Execution Count:7

179 } -

180 -

181 QT_END_NAMESPACE -

182 -

183 #endif // QT_COMPILER_SUPPORTS_SSSE3 -

184 -

Source code

Switch to Preprocessed file