qdrawhelper_ssse3.cpp

Absolute File Name:

/home/qt/qt5_coco/qt5/qtbase/src/gui/painting/qdrawhelper_ssse3.cpp

Source code

Switch to Preprocessed file

Line Source Count

1 /**************************************************************************** -

2 ** -

4 ** Contact: http://www.qt.io/licensing/ -

5 ** -

6 ** This file is part of the QtGui module of the Qt Toolkit. -

7 ** -

8 ** $QT_BEGIN_LICENSE:LGPL21$ -

9 ** Commercial License Usage -

10 ** Licensees holding valid commercial Qt licenses may use this file in -

11 ** accordance with the commercial license agreement provided with the -

12 ** Software or, alternatively, in accordance with the terms contained in -

13 ** a written agreement between you and The Qt Company. For licensing terms -

14 ** and conditions see http://www.qt.io/terms-conditions. For further -

15 ** information use the contact form at http://www.qt.io/contact-us. -

16 ** -

17 ** GNU Lesser General Public License Usage -

18 ** Alternatively, this file may be used under the terms of the GNU Lesser -

19 ** General Public License version 2.1 or version 3 as published by the Free -

20 ** Software Foundation and appearing in the file LICENSE.LGPLv21 and -

21 ** LICENSE.LGPLv3 included in the packaging of this file. Please review the -

22 ** following information to ensure the GNU Lesser General Public License -

23 ** requirements will be met: https://www.gnu.org/licenses/lgpl.html and -

24 ** http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html. -

25 ** -

26 ** As a special exception, The Qt Company gives you certain additional -

27 ** rights. These rights are described in The Qt Company LGPL Exception -

28 ** version 1.1, included in the file LGPL_EXCEPTION.txt in this package. -

29 ** -

30 ** $QT_END_LICENSE$ -

31 ** -

32 ****************************************************************************/ -

33 -

34 #include <private/qdrawhelper_x86_p.h> -

35 -

36 #ifdef QT_COMPILER_SUPPORTS_SSSE3 -

37 -

38 #include <private/qdrawingprimitive_sse2_p.h> -

39 -

40 QT_BEGIN_NAMESPACE -

41 -

42 inline static void blend_pixel(quint32 &dst, const quint32 src) -

43 { -

if (src >= 0xff000000)

src >= 0xff000000	Description
TRUE	never evaluated
FALSE	never evaluated

dst = src;

never executed: dst = src;

else if (src != 0)

src != 0	Description
TRUE	never evaluated
FALSE	never evaluated

dst = src + BYTE_MUL(dst, qAlpha(~src));

never executed: dst = src + BYTE_MUL(dst, qAlpha(~src));

}

never executed: end of block

49 -

50 -

51 /* The instruction palignr uses direct arguments, so we have to generate the code fo the different -

52 shift (4, 8, 12). Checking the alignment inside the loop is unfortunatelly way too slow. -

53 */ -

54 #define BLENDING_LOOP(palignrOffset, length)\ -

55 for (; x-minusOffsetToAlignSrcOn16Bytes < length-7; x += 4) { \ -

56 const __m128i srcVectorLastLoaded = _mm_load_si128((const __m128i *)&src[x - minusOffsetToAlignSrcOn16Bytes + 4]);\ -

57 const __m128i srcVector = _mm_alignr_epi8(srcVectorLastLoaded, srcVectorPrevLoaded, palignrOffset); \ -

58 const __m128i srcVectorAlpha = _mm_and_si128(srcVector, alphaMask); \ -

59 if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, alphaMask)) == 0xffff) { \ -

60 _mm_store_si128((__m128i *)&dst[x], srcVector); \ -

61 } else if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, nullVector)) != 0xffff) { \ -

62 __m128i alphaChannel = _mm_shuffle_epi8(srcVector, alphaShuffleMask); \ -

63 alphaChannel = _mm_sub_epi16(one, alphaChannel); \ -

64 const __m128i dstVector = _mm_load_si128((__m128i *)&dst[x]); \ -

65 __m128i destMultipliedByOneMinusAlpha; \ -

66 BYTE_MUL_SSE2(destMultipliedByOneMinusAlpha, dstVector, alphaChannel, colorMask, half); \ -

67 const __m128i result = _mm_add_epi8(srcVector, destMultipliedByOneMinusAlpha); \ -

68 _mm_store_si128((__m128i *)&dst[x], result); \ -

69 } \ -

70 srcVectorPrevLoaded = srcVectorLastLoaded;\ -

71 } -

72 -

73 -

74 // Basically blend src over dst with the const alpha defined as constAlphaVector. -

75 // nullVector, half, one, colorMask are constant across the whole image/texture, and should be defined as: -

76 //const __m128i nullVector = _mm_set1_epi32(0); -

77 //const __m128i half = _mm_set1_epi16(0x80); -

78 //const __m128i one = _mm_set1_epi16(0xff); -

79 //const __m128i colorMask = _mm_set1_epi32(0x00ff00ff); -

80 //const __m128i alphaMask = _mm_set1_epi32(0xff000000); -

81 // -

82 // The computation being done is: -

83 // result = s + d * (1-alpha) -

84 // with shortcuts if fully opaque or fully transparent. -

85 #define BLEND_SOURCE_OVER_ARGB32_SSSE3(dst, src, length, nullVector, half, one, colorMask, alphaMask) { \ -

86 int x = 0; \ -

87 \ -

88 /* First, get dst aligned. */ \ -

89 ALIGNMENT_PROLOGUE_16BYTES(dst, x, length) { \ -

90 blend_pixel(dst[x], src[x]); \ -

91 } \ -

92 \ -

93 const int minusOffsetToAlignSrcOn16Bytes = (reinterpret_cast<quintptr>(&(src[x])) >> 2) & 0x3;\ -

94 \ -

95 if (!minusOffsetToAlignSrcOn16Bytes) {\ -

96 /* src is aligned, usual algorithm but with aligned operations.\ -

97 See the SSE2 version for more documentation on the algorithm itself. */\ -

98 const __m128i alphaShuffleMask = _mm_set_epi8(char(0xff),15,char(0xff),15,char(0xff),11,char(0xff),11,char(0xff),7,char(0xff),7,char(0xff),3,char(0xff),3);\ -

99 for (; x < length-3; x += 4) { \ -

100 const __m128i srcVector = _mm_load_si128((const __m128i *)&src[x]); \ -

101 const __m128i srcVectorAlpha = _mm_and_si128(srcVector, alphaMask); \ -

102 if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, alphaMask)) == 0xffff) { \ -

103 _mm_store_si128((__m128i *)&dst[x], srcVector); \ -

104 } else if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, nullVector)) != 0xffff) { \ -

105 __m128i alphaChannel = _mm_shuffle_epi8(srcVector, alphaShuffleMask); \ -

106 alphaChannel = _mm_sub_epi16(one, alphaChannel); \ -

107 const __m128i dstVector = _mm_load_si128((__m128i *)&dst[x]); \ -

108 __m128i destMultipliedByOneMinusAlpha; \ -

109 BYTE_MUL_SSE2(destMultipliedByOneMinusAlpha, dstVector, alphaChannel, colorMask, half); \ -

110 const __m128i result = _mm_add_epi8(srcVector, destMultipliedByOneMinusAlpha); \ -

111 _mm_store_si128((__m128i *)&dst[x], result); \ -

112 } \ -

113 } /* end for() */\ -

114 } else if ((length - x) >= 8) {\ -

115 /* We use two vectors to extract the src: prevLoaded for the first pixels, lastLoaded for the current pixels. */\ -

116 __m128i srcVectorPrevLoaded = _mm_load_si128((const __m128i *)&src[x - minusOffsetToAlignSrcOn16Bytes]);\ -

117 const int palignrOffset = minusOffsetToAlignSrcOn16Bytes << 2;\ -

118 \ -

119 const __m128i alphaShuffleMask = _mm_set_epi8(char(0xff),15,char(0xff),15,char(0xff),11,char(0xff),11,char(0xff),7,char(0xff),7,char(0xff),3,char(0xff),3);\ -

120 switch (palignrOffset) {\ -

121 case 4:\ -

122 BLENDING_LOOP(4, length)\ -

123 break;\ -

124 case 8:\ -

125 BLENDING_LOOP(8, length)\ -

126 break;\ -

127 case 12:\ -

128 BLENDING_LOOP(12, length)\ -

129 break;\ -

130 }\ -

131 }\ -

132 for (; x < length; ++x) \ -

133 blend_pixel(dst[x], src[x]); \ -

134 } -

135 -

136 void qt_blend_argb32_on_argb32_ssse3(uchar *destPixels, int dbpl, -

137 const uchar *srcPixels, int sbpl, -

138 int w, int h, -

139 int const_alpha) -

140 { -

141 const quint32 *src = (const quint32 *) srcPixels; -

142 quint32 *dst = (quint32 *) destPixels; -

143

if (const_alpha == 256) {

const_alpha == 256	Description
TRUE	never evaluated
FALSE	never evaluated

144 const __m128i alphaMask = _mm_set1_epi32(0xff000000); -

145 const __m128i nullVector = _mm_setzero_si128(); -

146 const __m128i half = _mm_set1_epi16(0x80); -

147 const __m128i one = _mm_set1_epi16(0xff); -

148 const __m128i colorMask = _mm_set1_epi32(0x00ff00ff); -

149 -

150

for (int y = 0; y < h; ++y) {

y < h	Description
TRUE	never evaluated
FALSE	never evaluated

151

BLEND_SOURCE_OVER_ARGB32_SSSE3(dst, src, w, nullVector, half, one, colorMask, alphaMask);

never executed: end of block

never executed: break;

never executed: end of block

never executed: break;

never executed: end of block

never executed: break;

never executed: end of block

never executed: blend_pixel(dst[x], src[x]);

_mm_movemask_e...or)) != 0xffff	Description
TRUE	never evaluated
FALSE	never evaluated

(w - x) >= 8	Description
TRUE	never evaluated
FALSE	never evaluated

_mm_movemask_e...or)) != 0xffff	Description
TRUE	never evaluated
FALSE	never evaluated

_mm_movemask_e...or)) != 0xffff	Description
TRUE	never evaluated
FALSE	never evaluated

_mm_movemask_e...or)) != 0xffff	Description
TRUE	never evaluated
FALSE	never evaluated

!minusOffsetTo...gnSrcOn16Bytes	Description
TRUE	never evaluated
FALSE	never evaluated

_mm_movemask_e...sk)) == 0xffff	Description
TRUE	never evaluated
FALSE	never evaluated

_mm_movemask_e...sk)) == 0xffff	Description
TRUE	never evaluated
FALSE	never evaluated

_mm_movemask_e...sk)) == 0xffff	Description
TRUE	never evaluated
FALSE	never evaluated

_mm_movemask_e...sk)) == 0xffff	Description
TRUE	never evaluated
FALSE	never evaluated

x < static_cas...0x3)) & 0x3)))	Description
TRUE	never evaluated
FALSE	never evaluated

x < w-3	Description
TRUE	never evaluated
FALSE	never evaluated

x-minusOffsetT...n16Bytes < w-7	Description
TRUE	never evaluated
FALSE	never evaluated

x-minusOffsetT...n16Bytes < w-7	Description
TRUE	never evaluated
FALSE	never evaluated

x-minusOffsetT...n16Bytes < w-7	Description
TRUE	never evaluated
FALSE	never evaluated

x < w	Description
TRUE	never evaluated
FALSE	never evaluated

never executed: case 4:

never executed: case 8:

never executed: case 12:

152 dst = (quint32 *)(((uchar *) dst) + dbpl); -

153 src = (const quint32 *)(((const uchar *) src) + sbpl); -

154

}

never executed: end of block

155

} else if (const_alpha != 0) {

never executed: end of block

const_alpha != 0	Description
TRUE	never evaluated
FALSE	never evaluated

156 // dest = (s + d * sia) * ca + d * cia -

157 // = s * ca + d * (sia * ca + cia) -

158 // = s * ca + d * (1 - sa*ca) -

159 const_alpha = (const_alpha * 255) >> 8; -

160 const __m128i nullVector = _mm_setzero_si128(); -

161 const __m128i half = _mm_set1_epi16(0x80); -

162 const __m128i one = _mm_set1_epi16(0xff); -

163 const __m128i colorMask = _mm_set1_epi32(0x00ff00ff); -

164 const __m128i constAlphaVector = _mm_set1_epi16(const_alpha); -

165

for (int y = 0; y < h; ++y) {

y < h	Description
TRUE	never evaluated
FALSE	never evaluated

166

BLEND_SOURCE_OVER_ARGB32_WITH_CONST_ALPHA_SSE2(dst, src, w, nullVector, half, one, colorMask, constAlphaVector)

never executed: end of block

s != 0	Description
TRUE	never evaluated
FALSE	never evaluated

_mm_movemask_e...or)) != 0xffff	Description
TRUE	never evaluated
FALSE	never evaluated

s != 0	Description
TRUE	never evaluated
FALSE	never evaluated

x < static_cas...0x3)) & 0x3)))	Description
TRUE	never evaluated
FALSE	never evaluated

x < w-3	Description
TRUE	never evaluated
FALSE	never evaluated

x < w	Description
TRUE	never evaluated
FALSE	never evaluated

167 dst = (quint32 *)(((uchar *) dst) + dbpl); -

168 src = (const quint32 *)(((const uchar *) src) + sbpl); -

169

}

never executed: end of block

170

}

never executed: end of block

171

}

never executed: end of block

172 -

173 QT_END_NAMESPACE -

174 -

175 #endif // QT_COMPILER_SUPPORTS_SSSE3 -

Source code

Switch to Preprocessed file

Generated by Squish Coco Non-Commercial 4.3.0-BETA-master-30-08-2018-4cb69e9