Absolute File Name: | /home/qt/qt5_coco/qt5/qtbase/src/corelib/tools/qsimd.cpp |
Source code | Switch to Preprocessed file |
Line | Source | Count | ||||||
---|---|---|---|---|---|---|---|---|
1 | /**************************************************************************** | - | ||||||
2 | ** | - | ||||||
3 | ** Copyright (C) 2016 The Qt Company Ltd. | - | ||||||
4 | ** Copyright (C) 2016 Intel Corporation. | - | ||||||
5 | ** Contact: https://www.qt.io/licensing/ | - | ||||||
6 | ** | - | ||||||
7 | ** This file is part of the QtCore module of the Qt Toolkit. | - | ||||||
8 | ** | - | ||||||
9 | ** $QT_BEGIN_LICENSE:LGPL$ | - | ||||||
10 | ** Commercial License Usage | - | ||||||
11 | ** Licensees holding valid commercial Qt licenses may use this file in | - | ||||||
12 | ** accordance with the commercial license agreement provided with the | - | ||||||
13 | ** Software or, alternatively, in accordance with the terms contained in | - | ||||||
14 | ** a written agreement between you and The Qt Company. For licensing terms | - | ||||||
15 | ** and conditions see https://www.qt.io/terms-conditions. For further | - | ||||||
16 | ** information use the contact form at https://www.qt.io/contact-us. | - | ||||||
17 | ** | - | ||||||
18 | ** GNU Lesser General Public License Usage | - | ||||||
19 | ** Alternatively, this file may be used under the terms of the GNU Lesser | - | ||||||
20 | ** General Public License version 3 as published by the Free Software | - | ||||||
21 | ** Foundation and appearing in the file LICENSE.LGPL3 included in the | - | ||||||
22 | ** packaging of this file. Please review the following information to | - | ||||||
23 | ** ensure the GNU Lesser General Public License version 3 requirements | - | ||||||
24 | ** will be met: https://www.gnu.org/licenses/lgpl-3.0.html. | - | ||||||
25 | ** | - | ||||||
26 | ** GNU General Public License Usage | - | ||||||
27 | ** Alternatively, this file may be used under the terms of the GNU | - | ||||||
28 | ** General Public License version 2.0 or (at your option) the GNU General | - | ||||||
29 | ** Public license version 3 or any later version approved by the KDE Free | - | ||||||
30 | ** Qt Foundation. The licenses are as published by the Free Software | - | ||||||
31 | ** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3 | - | ||||||
32 | ** included in the packaging of this file. Please review the following | - | ||||||
33 | ** information to ensure the GNU General Public License requirements will | - | ||||||
34 | ** be met: https://www.gnu.org/licenses/gpl-2.0.html and | - | ||||||
35 | ** https://www.gnu.org/licenses/gpl-3.0.html. | - | ||||||
36 | ** | - | ||||||
37 | ** $QT_END_LICENSE$ | - | ||||||
38 | ** | - | ||||||
39 | ****************************************************************************/ | - | ||||||
40 | - | |||||||
41 | #include "qsimd_p.h" | - | ||||||
42 | #include "qalgorithms.h" | - | ||||||
43 | #include <QByteArray> | - | ||||||
44 | #include <stdio.h> | - | ||||||
45 | - | |||||||
46 | #ifdef Q_OS_LINUX | - | ||||||
47 | # include "../testlib/3rdparty/valgrind_p.h" | - | ||||||
48 | #endif | - | ||||||
49 | - | |||||||
50 | #if defined(Q_OS_WIN) | - | ||||||
51 | # if defined(Q_OS_WINCE) | - | ||||||
52 | # include <qt_windows.h> | - | ||||||
53 | # if _WIN32_WCE < 0x800 | - | ||||||
54 | # include <cmnintrin.h> | - | ||||||
55 | # endif | - | ||||||
56 | # endif | - | ||||||
57 | # if !defined(Q_CC_GNU) | - | ||||||
58 | # ifndef Q_OS_WINCE | - | ||||||
59 | # include <intrin.h> | - | ||||||
60 | # endif | - | ||||||
61 | # endif | - | ||||||
62 | #elif defined(Q_OS_LINUX) && (defined(Q_PROCESSOR_ARM) || defined(Q_PROCESSOR_MIPS_32)) | - | ||||||
63 | #include "private/qcore_unix_p.h" | - | ||||||
64 | - | |||||||
65 | // the kernel header definitions for HWCAP_* | - | ||||||
66 | // (the ones we need/may need anyway) | - | ||||||
67 | - | |||||||
68 | // copied from <asm/hwcap.h> (ARM) | - | ||||||
69 | #define HWCAP_CRUNCH 1024 | - | ||||||
70 | #define HWCAP_THUMBEE 2048 | - | ||||||
71 | #define HWCAP_NEON 4096 | - | ||||||
72 | #define HWCAP_VFPv3 8192 | - | ||||||
73 | #define HWCAP_VFPv3D16 16384 | - | ||||||
74 | - | |||||||
75 | // copied from <asm/hwcap.h> (ARM): | - | ||||||
76 | #define HWCAP2_CRC32 (1 << 4) | - | ||||||
77 | - | |||||||
78 | // copied from <asm/hwcap.h> (Aarch64) | - | ||||||
79 | #define HWCAP_CRC32 (1 << 7) | - | ||||||
80 | - | |||||||
81 | // copied from <linux/auxvec.h> | - | ||||||
82 | #define AT_HWCAP 16 /* arch dependent hints at CPU capabilities */ | - | ||||||
83 | #define AT_HWCAP2 26 /* extension of AT_HWCAP */ | - | ||||||
84 | - | |||||||
85 | #elif defined(Q_CC_GHS) | - | ||||||
86 | #include <INTEGRITY_types.h> | - | ||||||
87 | #endif | - | ||||||
88 | - | |||||||
89 | QT_BEGIN_NAMESPACE | - | ||||||
90 | - | |||||||
91 | #if defined (Q_OS_NACL) | - | ||||||
92 | static inline uint detectProcessorFeatures() | - | ||||||
93 | { | - | ||||||
94 | return 0; | - | ||||||
95 | } | - | ||||||
96 | #elif defined (Q_OS_WINCE) | - | ||||||
97 | static inline quint64 detectProcessorFeatures() | - | ||||||
98 | { | - | ||||||
99 | quint64 features = 0; | - | ||||||
100 | - | |||||||
101 | #if defined (ARM) | - | ||||||
102 | # ifdef PF_ARM_NEON | - | ||||||
103 | if (IsProcessorFeaturePresent(PF_ARM_NEON)) | - | ||||||
104 | features |= Q_UINT64_C(1) << CpuFeatureNEON; | - | ||||||
105 | # endif | - | ||||||
106 | #elif defined(_X86_) | - | ||||||
107 | if (IsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE)) | - | ||||||
108 | features |= Q_UINT64_C(1) << CpuFeatureSSE2; | - | ||||||
109 | if (IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE)) | - | ||||||
110 | features |= Q_UINT64_C(1) << CpuFeatureSSE3; | - | ||||||
111 | #endif | - | ||||||
112 | return features; | - | ||||||
113 | } | - | ||||||
114 | - | |||||||
115 | #elif defined(Q_PROCESSOR_ARM) | - | ||||||
116 | static inline quint64 detectProcessorFeatures() | - | ||||||
117 | { | - | ||||||
118 | quint64 features = 0; | - | ||||||
119 | - | |||||||
120 | #if defined(Q_OS_LINUX) | - | ||||||
121 | # if defined(Q_PROCESSOR_ARM_V8) && defined(Q_PROCESSOR_ARM_64) | - | ||||||
122 | features |= Q_UINT64_C(1) << CpuFeatureNEON; // NEON is always available on ARMv8 64bit. | - | ||||||
123 | # endif | - | ||||||
124 | int auxv = qt_safe_open("/proc/self/auxv", O_RDONLY); | - | ||||||
125 | if (auxv != -1) { | - | ||||||
126 | unsigned long vector[64]; | - | ||||||
127 | int nread; | - | ||||||
128 | while (features == 0) { | - | ||||||
129 | nread = qt_safe_read(auxv, (char *)vector, sizeof vector); | - | ||||||
130 | if (nread <= 0) { | - | ||||||
131 | // EOF or error | - | ||||||
132 | break; | - | ||||||
133 | } | - | ||||||
134 | - | |||||||
135 | int max = nread / (sizeof vector[0]); | - | ||||||
136 | for (int i = 0; i < max; i += 2) { | - | ||||||
137 | if (vector[i] == AT_HWCAP) { | - | ||||||
138 | # if defined(Q_PROCESSOR_ARM_V8) && defined(Q_PROCESSOR_ARM_64) | - | ||||||
139 | // For Aarch64: | - | ||||||
140 | if (vector[i+1] & HWCAP_CRC32) | - | ||||||
141 | features |= Q_UINT64_C(1) << CpuFeatureCRC32; | - | ||||||
142 | # endif | - | ||||||
143 | // Aarch32, or ARMv7 or before: | - | ||||||
144 | if (vector[i+1] & HWCAP_NEON) | - | ||||||
145 | features |= Q_UINT64_C(1) << CpuFeatureNEON; | - | ||||||
146 | } | - | ||||||
147 | # if defined(Q_PROCESSOR_ARM_32) | - | ||||||
148 | // For Aarch32: | - | ||||||
149 | if (vector[i] == AT_HWCAP2) { | - | ||||||
150 | if (vector[i+1] & HWCAP2_CRC32) | - | ||||||
151 | features |= Q_UINT64_C(1) << CpuFeatureCRC32; | - | ||||||
152 | } | - | ||||||
153 | # endif | - | ||||||
154 | } | - | ||||||
155 | } | - | ||||||
156 | - | |||||||
157 | qt_safe_close(auxv); | - | ||||||
158 | return features; | - | ||||||
159 | } | - | ||||||
160 | // fall back if /proc/self/auxv wasn't found | - | ||||||
161 | #endif | - | ||||||
162 | - | |||||||
163 | #if defined(__ARM_NEON__) | - | ||||||
164 | features |= Q_UINT64_C(1) << CpuFeatureNEON; | - | ||||||
165 | #endif | - | ||||||
166 | #if defined(__ARM_FEATURE_CRC32) | - | ||||||
167 | features |= Q_UINT64_C(1) << CpuFeatureCRC32; | - | ||||||
168 | #endif | - | ||||||
169 | - | |||||||
170 | return features; | - | ||||||
171 | } | - | ||||||
172 | - | |||||||
173 | #elif defined(Q_PROCESSOR_X86) | - | ||||||
174 | - | |||||||
175 | #ifdef Q_PROCESSOR_X86_32 | - | ||||||
176 | # define PICreg "%%ebx" | - | ||||||
177 | #else | - | ||||||
178 | # define PICreg "%%rbx" | - | ||||||
179 | #endif | - | ||||||
180 | - | |||||||
181 | static int maxBasicCpuidSupported() | - | ||||||
182 | { | - | ||||||
183 | #if defined(Q_CC_GNU) | - | ||||||
184 | qregisterint tmp1; | - | ||||||
185 | - | |||||||
186 | # if Q_PROCESSOR_X86 < 5 | - | ||||||
187 | // check if the CPUID instruction is supported | - | ||||||
188 | long cpuid_supported; | - | ||||||
189 | asm ("pushf\n" | - | ||||||
190 | "pop %0\n" | - | ||||||
191 | "mov %0, %1\n" | - | ||||||
192 | "xor $0x00200000, %0\n" | - | ||||||
193 | "push %0\n" | - | ||||||
194 | "popf\n" | - | ||||||
195 | "pushf\n" | - | ||||||
196 | "pop %0\n" | - | ||||||
197 | "xor %1, %0\n" // %eax is now 0 if CPUID is not supported | - | ||||||
198 | : "=a" (cpuid_supported), "=r" (tmp1) | - | ||||||
199 | ); | - | ||||||
200 | if (!cpuid_supported) | - | ||||||
201 | return 0; | - | ||||||
202 | # endif | - | ||||||
203 | - | |||||||
204 | int result; | - | ||||||
205 | asm ("xchg " PICreg", %1\n" | - | ||||||
206 | "cpuid\n" | - | ||||||
207 | "xchg " PICreg", %1\n" | - | ||||||
208 | : "=&a" (result), "=&r" (tmp1) | - | ||||||
209 | : "0" (0) | - | ||||||
210 | : "ecx", "edx"); | - | ||||||
211 | return result; executed 1162 times by 97 tests: return result; Executed by:
| 1162 | ||||||
212 | #elif defined(Q_OS_WIN) | - | ||||||
213 | // Use the __cpuid function; if the CPUID instruction isn't supported, it will return 0 | - | ||||||
214 | int info[4]; | - | ||||||
215 | __cpuid(info, 0); | - | ||||||
216 | return info[0]; | - | ||||||
217 | #elif defined(Q_CC_GHS) | - | ||||||
218 | unsigned int info[4]; | - | ||||||
219 | __CPUID(0, info); | - | ||||||
220 | return info[0]; | - | ||||||
221 | #else | - | ||||||
222 | return 0; | - | ||||||
223 | #endif | - | ||||||
224 | } | - | ||||||
225 | - | |||||||
226 | static void cpuidFeatures01(uint &ecx, uint &edx) | - | ||||||
227 | { | - | ||||||
228 | #if defined(Q_CC_GNU) | - | ||||||
229 | qregisterint tmp1; | - | ||||||
230 | asm ("xchg " PICreg", %2\n" | - | ||||||
231 | "cpuid\n" | - | ||||||
232 | "xchg " PICreg", %2\n" | - | ||||||
233 | : "=&c" (ecx), "=&d" (edx), "=&r" (tmp1) | - | ||||||
234 | : "a" (1)); | - | ||||||
235 | #elif defined(Q_OS_WIN) | - | ||||||
236 | int info[4]; | - | ||||||
237 | __cpuid(info, 1); | - | ||||||
238 | ecx = info[2]; | - | ||||||
239 | edx = info[3]; | - | ||||||
240 | #elif defined(Q_CC_GHS) | - | ||||||
241 | unsigned int info[4]; | - | ||||||
242 | __CPUID(1, info); | - | ||||||
243 | ecx = info[2]; | - | ||||||
244 | edx = info[3]; | - | ||||||
245 | #endif | - | ||||||
246 | } executed 1162 times by 97 tests: end of block Executed by:
| 1162 | ||||||
247 | - | |||||||
248 | #ifdef Q_OS_WIN | - | ||||||
249 | inline void __cpuidex(int info[4], int, __int64) { memset(info, 0, 4*sizeof(int));} | - | ||||||
250 | #endif | - | ||||||
251 | - | |||||||
252 | static void cpuidFeatures07_00(uint &ebx, uint &ecx) | - | ||||||
253 | { | - | ||||||
254 | #if defined(Q_CC_GNU) | - | ||||||
255 | qregisteruint rbx; // in case it's 64-bit | - | ||||||
256 | qregisteruint rcx = 0; | - | ||||||
257 | asm ("xchg " PICreg", %0\n" | - | ||||||
258 | "cpuid\n" | - | ||||||
259 | "xchg " PICreg", %0\n" | - | ||||||
260 | : "=&r" (rbx), "+&c" (rcx) | - | ||||||
261 | : "a" (7) | - | ||||||
262 | : "%edx"); | - | ||||||
263 | ebx = rbx; | - | ||||||
264 | ecx = rcx; | - | ||||||
265 | #elif defined(Q_OS_WIN) | - | ||||||
266 | int info[4]; | - | ||||||
267 | __cpuidex(info, 7, 0); | - | ||||||
268 | ebx = info[1]; | - | ||||||
269 | ecx = info[2]; | - | ||||||
270 | #elif defined(Q_CC_GHS) | - | ||||||
271 | unsigned int info[4]; | - | ||||||
272 | __CPUIDEX(7, 0, info); | - | ||||||
273 | ebx = info[1]; | - | ||||||
274 | ecx = info[2]; | - | ||||||
275 | #endif | - | ||||||
276 | } executed 1162 times by 97 tests: end of block Executed by:
| 1162 | ||||||
277 | - | |||||||
278 | #ifdef Q_OS_WIN | - | ||||||
279 | // fallback overload in case this intrinsic does not exist: unsigned __int64 _xgetbv(unsigned int); | - | ||||||
280 | inline quint64 _xgetbv(__int64) { return 0; } | - | ||||||
281 | #endif | - | ||||||
282 | static void xgetbv(uint in, uint &eax, uint &edx) | - | ||||||
283 | { | - | ||||||
284 | #if defined(Q_CC_GNU) || defined(Q_CC_GHS) | - | ||||||
285 | asm (".byte 0x0F, 0x01, 0xD0" // xgetbv instruction | - | ||||||
286 | : "=a" (eax), "=d" (edx) | - | ||||||
287 | : "c" (in)); | - | ||||||
288 | #elif defined(Q_OS_WIN) | - | ||||||
289 | quint64 result = _xgetbv(in); | - | ||||||
290 | eax = result; | - | ||||||
291 | edx = result >> 32; | - | ||||||
292 | #endif | - | ||||||
293 | } never executed: end of block | 0 | ||||||
294 | - | |||||||
295 | static quint64 detectProcessorFeatures() | - | ||||||
296 | { | - | ||||||
297 | // Flags from the CR0 / XCR0 state register | - | ||||||
298 | enum XCR0Flags { | - | ||||||
299 | X87 = 1 << 0, | - | ||||||
300 | XMM0_15 = 1 << 1, | - | ||||||
301 | YMM0_15Hi128 = 1 << 2, | - | ||||||
302 | BNDRegs = 1 << 3, | - | ||||||
303 | BNDCSR = 1 << 4, | - | ||||||
304 | OpMask = 1 << 5, | - | ||||||
305 | ZMM0_15Hi256 = 1 << 6, | - | ||||||
306 | ZMM16_31 = 1 << 7, | - | ||||||
307 | - | |||||||
308 | SSEState = XMM0_15, | - | ||||||
309 | AVXState = XMM0_15 | YMM0_15Hi128, | - | ||||||
310 | AVX512State = AVXState | OpMask | ZMM0_15Hi256 | ZMM16_31 | - | ||||||
311 | }; | - | ||||||
312 | static const quint64 AllAVX512 = (Q_UINT64_C(1) << CpuFeatureAVX512F) | (Q_UINT64_C(1) << CpuFeatureAVX512CD) | | - | ||||||
313 | (Q_UINT64_C(1) << CpuFeatureAVX512ER) | (Q_UINT64_C(1) << CpuFeatureAVX512PF) | | - | ||||||
314 | (Q_UINT64_C(1) << CpuFeatureAVX512BW) | (Q_UINT64_C(1) << CpuFeatureAVX512DQ) | | - | ||||||
315 | (Q_UINT64_C(1) << CpuFeatureAVX512VL) | | - | ||||||
316 | (Q_UINT64_C(1) << CpuFeatureAVX512IFMA) | (Q_UINT64_C(1) << CpuFeatureAVX512VBMI); | - | ||||||
317 | static const quint64 AllAVX2 = (Q_UINT64_C(1) << CpuFeatureAVX2) | AllAVX512; | - | ||||||
318 | static const quint64 AllAVX = (Q_UINT64_C(1) << CpuFeatureAVX) | AllAVX2; | - | ||||||
319 | - | |||||||
320 | quint64 features = 0; | - | ||||||
321 | int cpuidLevel = maxBasicCpuidSupported(); | - | ||||||
322 | #if Q_PROCESSOR_X86 < 5 | - | ||||||
323 | if (cpuidLevel < 1) | - | ||||||
324 | return 0; | - | ||||||
325 | #else | - | ||||||
326 | Q_ASSERT(cpuidLevel >= 1); | - | ||||||
327 | #endif | - | ||||||
328 | - | |||||||
329 | uint cpuid01ECX = 0, cpuid01EDX = 0; | - | ||||||
330 | cpuidFeatures01(cpuid01ECX, cpuid01EDX); | - | ||||||
331 | - | |||||||
332 | // the low 32-bits of features is cpuid01ECX | - | ||||||
333 | // note: we need to check OS support for saving the AVX register state | - | ||||||
334 | features = cpuid01ECX; | - | ||||||
335 | - | |||||||
336 | #if defined(Q_PROCESSOR_X86_32) | - | ||||||
337 | // x86 might not have SSE2 support | - | ||||||
338 | if (cpuid01EDX & (1u << 26)) | - | ||||||
339 | features |= Q_UINT64_C(1) << CpuFeatureSSE2; | - | ||||||
340 | else | - | ||||||
341 | features &= ~(Q_UINT64_C(1) << CpuFeatureSSE2); | - | ||||||
342 | // we should verify that the OS enabled saving of the SSE state... | - | ||||||
343 | #else | - | ||||||
344 | // x86-64 or x32 | - | ||||||
345 | features |= Q_UINT64_C(1) << CpuFeatureSSE2; | - | ||||||
346 | #endif | - | ||||||
347 | - | |||||||
348 | uint xgetbvA = 0, xgetbvD = 0; | - | ||||||
349 | if (cpuid01ECX & (1u << 27)) {
| 0-1162 | ||||||
350 | // XGETBV enabled | - | ||||||
351 | xgetbv(0, xgetbvA, xgetbvD); | - | ||||||
352 | } never executed: end of block | 0 | ||||||
353 | - | |||||||
354 | uint cpuid0700EBX = 0; | - | ||||||
355 | uint cpuid0700ECX = 0; | - | ||||||
356 | if (cpuidLevel >= 7) {
| 0-1162 | ||||||
357 | cpuidFeatures07_00(cpuid0700EBX, cpuid0700ECX); | - | ||||||
358 | - | |||||||
359 | // the high 32-bits of features is cpuid0700EBX | - | ||||||
360 | features |= quint64(cpuid0700EBX) << 32; | - | ||||||
361 | } executed 1162 times by 97 tests: end of block Executed by:
| 1162 | ||||||
362 | - | |||||||
363 | if ((xgetbvA & AVXState) != AVXState) {
| 0-1162 | ||||||
364 | // support for YMM registers is disabled, disable all AVX | - | ||||||
365 | features &= ~AllAVX; | - | ||||||
366 | } else if ((xgetbvA & AVX512State) != AVX512State) { executed 1162 times by 97 tests: end of block Executed by:
| 0-1162 | ||||||
367 | // support for ZMM registers or mask registers is disabled, disable all AVX512 | - | ||||||
368 | features &= ~AllAVX512; | - | ||||||
369 | } else { never executed: end of block | 0 | ||||||
370 | // this feature is out of order | - | ||||||
371 | if (cpuid0700ECX & (1u << 1))
| 0 | ||||||
372 | features |= Q_UINT64_C(1) << CpuFeatureAVX512VBMI; never executed: features |= static_cast<unsigned long long>(1ULL) << CpuFeatureAVX512VBMI; | 0 | ||||||
373 | else | - | ||||||
374 | features &= ~(Q_UINT64_C(1) << CpuFeatureAVX512VBMI); never executed: features &= ~(static_cast<unsigned long long>(1ULL) << CpuFeatureAVX512VBMI); | 0 | ||||||
375 | } | - | ||||||
376 | - | |||||||
377 | return features; executed 1162 times by 97 tests: return features; Executed by:
| 1162 | ||||||
378 | } | - | ||||||
379 | - | |||||||
380 | #elif defined(Q_PROCESSOR_MIPS_32) | - | ||||||
381 | - | |||||||
382 | #if defined(Q_OS_LINUX) | - | ||||||
383 | // | - | ||||||
384 | // Do not use QByteArray: it could use SIMD instructions itself at | - | ||||||
385 | // some point, thus creating a recursive dependency. Instead, use a | - | ||||||
386 | // QSimpleBuffer, which has the bare minimum needed to use memory | - | ||||||
387 | // dynamically and read lines from /proc/cpuinfo of arbitrary sizes. | - | ||||||
388 | // | - | ||||||
389 | struct QSimpleBuffer { | - | ||||||
390 | static const int chunk_size = 256; | - | ||||||
391 | char *data; | - | ||||||
392 | unsigned alloc; | - | ||||||
393 | unsigned size; | - | ||||||
394 | - | |||||||
395 | QSimpleBuffer(): data(0), alloc(0), size(0) {} | - | ||||||
396 | ~QSimpleBuffer() { ::free(data); } | - | ||||||
397 | - | |||||||
398 | void resize(unsigned newsize) { | - | ||||||
399 | if (newsize > alloc) { | - | ||||||
400 | unsigned newalloc = chunk_size * ((newsize / chunk_size) + 1); | - | ||||||
401 | if (newalloc < newsize) newalloc = newsize; | - | ||||||
402 | if (newalloc != alloc) { | - | ||||||
403 | data = static_cast<char*>(::realloc(data, newalloc)); | - | ||||||
404 | alloc = newalloc; | - | ||||||
405 | } | - | ||||||
406 | } | - | ||||||
407 | size = newsize; | - | ||||||
408 | } | - | ||||||
409 | void append(const QSimpleBuffer &other, unsigned appendsize) { | - | ||||||
410 | unsigned oldsize = size; | - | ||||||
411 | resize(oldsize + appendsize); | - | ||||||
412 | ::memcpy(data + oldsize, other.data, appendsize); | - | ||||||
413 | } | - | ||||||
414 | void popleft(unsigned amount) { | - | ||||||
415 | if (amount >= size) return resize(0); | - | ||||||
416 | size -= amount; | - | ||||||
417 | ::memmove(data, data + amount, size); | - | ||||||
418 | } | - | ||||||
419 | char* cString() { | - | ||||||
420 | if (!alloc) resize(1); | - | ||||||
421 | return (data[size] = '\0', data); | - | ||||||
422 | } | - | ||||||
423 | }; | - | ||||||
424 | - | |||||||
425 | // | - | ||||||
426 | // Uses a scratch "buffer" (which must be used for all reads done in the | - | ||||||
427 | // same file descriptor) to read chunks of data from a file, to read | - | ||||||
428 | // one line at a time. Lines include the trailing newline character ('\n'). | - | ||||||
429 | // On EOF, line.size is zero. | - | ||||||
430 | // | - | ||||||
431 | static void bufReadLine(int fd, QSimpleBuffer &line, QSimpleBuffer &buffer) | - | ||||||
432 | { | - | ||||||
433 | for (;;) { | - | ||||||
434 | char *newline = static_cast<char*>(::memchr(buffer.data, '\n', buffer.size)); | - | ||||||
435 | if (newline) { | - | ||||||
436 | unsigned piece_size = newline - buffer.data + 1; | - | ||||||
437 | line.append(buffer, piece_size); | - | ||||||
438 | buffer.popleft(piece_size); | - | ||||||
439 | line.resize(line.size - 1); | - | ||||||
440 | return; | - | ||||||
441 | } | - | ||||||
442 | if (buffer.size + QSimpleBuffer::chunk_size > buffer.alloc) { | - | ||||||
443 | int oldsize = buffer.size; | - | ||||||
444 | buffer.resize(buffer.size + QSimpleBuffer::chunk_size); | - | ||||||
445 | buffer.size = oldsize; | - | ||||||
446 | } | - | ||||||
447 | ssize_t read_bytes = ::qt_safe_read(fd, buffer.data + buffer.size, QSimpleBuffer::chunk_size); | - | ||||||
448 | if (read_bytes > 0) buffer.size += read_bytes; | - | ||||||
449 | else return; | - | ||||||
450 | } | - | ||||||
451 | } | - | ||||||
452 | - | |||||||
453 | // | - | ||||||
454 | // Checks if any line with a given prefix from /proc/cpuinfo contains | - | ||||||
455 | // a certain string, surrounded by spaces. | - | ||||||
456 | // | - | ||||||
457 | static bool procCpuinfoContains(const char *prefix, const char *string) | - | ||||||
458 | { | - | ||||||
459 | int cpuinfo_fd = ::qt_safe_open("/proc/cpuinfo", O_RDONLY); | - | ||||||
460 | if (cpuinfo_fd == -1) | - | ||||||
461 | return false; | - | ||||||
462 | - | |||||||
463 | unsigned string_len = ::strlen(string); | - | ||||||
464 | unsigned prefix_len = ::strlen(prefix); | - | ||||||
465 | QSimpleBuffer line, buffer; | - | ||||||
466 | bool present = false; | - | ||||||
467 | do { | - | ||||||
468 | line.resize(0); | - | ||||||
469 | bufReadLine(cpuinfo_fd, line, buffer); | - | ||||||
470 | char *colon = static_cast<char*>(::memchr(line.data, ':', line.size)); | - | ||||||
471 | if (colon && line.size > prefix_len + string_len) { | - | ||||||
472 | if (!::strncmp(prefix, line.data, prefix_len)) { | - | ||||||
473 | // prefix matches, next character must be ':' or space | - | ||||||
474 | if (line.data[prefix_len] == ':' || ::isspace(line.data[prefix_len])) { | - | ||||||
475 | // Does it contain the string? | - | ||||||
476 | char *found = ::strstr(line.cString(), string); | - | ||||||
477 | if (found && ::isspace(found[-1]) && | - | ||||||
478 | (::isspace(found[string_len]) || found[string_len] == '\0')) { | - | ||||||
479 | present = true; | - | ||||||
480 | break; | - | ||||||
481 | } | - | ||||||
482 | } | - | ||||||
483 | } | - | ||||||
484 | } | - | ||||||
485 | } while (line.size); | - | ||||||
486 | - | |||||||
487 | ::qt_safe_close(cpuinfo_fd); | - | ||||||
488 | return present; | - | ||||||
489 | } | - | ||||||
490 | #endif | - | ||||||
491 | - | |||||||
492 | static inline quint64 detectProcessorFeatures() | - | ||||||
493 | { | - | ||||||
494 | // NOTE: MIPS 74K cores are the only ones supporting DSPr2. | - | ||||||
495 | quint64 flags = 0; | - | ||||||
496 | - | |||||||
497 | #if defined __mips_dsp | - | ||||||
498 | flags |= Q_UINT64_C(1) << CpuFeatureDSP; | - | ||||||
499 | # if defined __mips_dsp_rev && __mips_dsp_rev >= 2 | - | ||||||
500 | flags |= Q_UINT64_C(1) << CpuFeatureDSPR2; | - | ||||||
501 | # elif defined(Q_OS_LINUX) | - | ||||||
502 | if (procCpuinfoContains("cpu model", "MIPS 74Kc") || procCpuinfoContains("cpu model", "MIPS 74Kf")) | - | ||||||
503 | flags |= Q_UINT64_C(1) << CpuFeatureDSPR2; | - | ||||||
504 | # endif | - | ||||||
505 | #elif defined(Q_OS_LINUX) | - | ||||||
506 | if (procCpuinfoContains("ASEs implemented", "dsp")) { | - | ||||||
507 | flags |= Q_UINT64_C(1) << CpuFeatureDSP; | - | ||||||
508 | if (procCpuinfoContains("cpu model", "MIPS 74Kc") || procCpuinfoContains("cpu model", "MIPS 74Kf")) | - | ||||||
509 | flags |= Q_UINT64_C(1) << CpuFeatureDSPR2; | - | ||||||
510 | } | - | ||||||
511 | #endif | - | ||||||
512 | - | |||||||
513 | return flags; | - | ||||||
514 | } | - | ||||||
515 | - | |||||||
516 | #else | - | ||||||
517 | static inline uint detectProcessorFeatures() | - | ||||||
518 | { | - | ||||||
519 | return 0; | - | ||||||
520 | } | - | ||||||
521 | #endif | - | ||||||
522 | - | |||||||
523 | /* | - | ||||||
524 | * Use kdesdk/scripts/generate_string_table.pl to update the table below. Note | - | ||||||
525 | * that the x86 version has a lot of blanks that must be kept and that the | - | ||||||
526 | * offset table's type is changed to make the table smaller. We also remove the | - | ||||||
527 | * terminating -1 that the script adds. | - | ||||||
528 | */ | - | ||||||
529 | - | |||||||
530 | // begin generated | - | ||||||
531 | #if defined(Q_PROCESSOR_ARM) | - | ||||||
532 | /* Data: | - | ||||||
533 | neon | - | ||||||
534 | crc32 | - | ||||||
535 | */ | - | ||||||
536 | static const char features_string[] = | - | ||||||
537 | " neon\0" | - | ||||||
538 | " crc32\0" | - | ||||||
539 | "\0"; | - | ||||||
540 | static const int features_indices[] = { 0, 6 }; | - | ||||||
541 | #elif defined(Q_PROCESSOR_MIPS) | - | ||||||
542 | /* Data: | - | ||||||
543 | dsp | - | ||||||
544 | dspr2 | - | ||||||
545 | */ | - | ||||||
546 | static const char features_string[] = | - | ||||||
547 | " dsp\0" | - | ||||||
548 | " dspr2\0" | - | ||||||
549 | "\0"; | - | ||||||
550 | - | |||||||
551 | static const int features_indices[] = { | - | ||||||
552 | 0, 5 | - | ||||||
553 | }; | - | ||||||
554 | #elif defined(Q_PROCESSOR_X86) | - | ||||||
555 | /* Data: | - | ||||||
556 | sse3 | - | ||||||
557 | sse2 | - | ||||||
558 | avx512vbmi | - | ||||||
559 | - | |||||||
560 | - | |||||||
561 | - | |||||||
562 | - | |||||||
563 | - | |||||||
564 | - | |||||||
565 | ssse3 | - | ||||||
566 | - | |||||||
567 | - | |||||||
568 | fma | - | ||||||
569 | cmpxchg16b | - | ||||||
570 | - | |||||||
571 | - | |||||||
572 | - | |||||||
573 | - | |||||||
574 | - | |||||||
575 | sse4.1 | - | ||||||
576 | sse4.2 | - | ||||||
577 | - | |||||||
578 | movbe | - | ||||||
579 | popcnt | - | ||||||
580 | - | |||||||
581 | aes | - | ||||||
582 | - | |||||||
583 | - | |||||||
584 | avx | - | ||||||
585 | f16c | - | ||||||
586 | rdrand | - | ||||||
587 | - | |||||||
588 | - | |||||||
589 | - | |||||||
590 | - | |||||||
591 | bmi | - | ||||||
592 | hle | - | ||||||
593 | avx2 | - | ||||||
594 | - | |||||||
595 | - | |||||||
596 | bmi2 | - | ||||||
597 | - | |||||||
598 | - | |||||||
599 | rtm | - | ||||||
600 | - | |||||||
601 | - | |||||||
602 | - | |||||||
603 | - | |||||||
604 | avx512f | - | ||||||
605 | avx512dq | - | ||||||
606 | rdseed | - | ||||||
607 | - | |||||||
608 | - | |||||||
609 | avx512ifma | - | ||||||
610 | - | |||||||
611 | - | |||||||
612 | - | |||||||
613 | - | |||||||
614 | avx512pf | - | ||||||
615 | avx512er | - | ||||||
616 | avx512cd | - | ||||||
617 | sha | - | ||||||
618 | avx512bw | - | ||||||
619 | avx512vl | - | ||||||
620 | */ | - | ||||||
621 | static const char features_string[] = | - | ||||||
622 | " sse3\0" | - | ||||||
623 | " sse2\0" | - | ||||||
624 | " avx512vbmi\0" | - | ||||||
625 | " ssse3\0" | - | ||||||
626 | " fma\0" | - | ||||||
627 | " cmpxchg16b\0" | - | ||||||
628 | " sse4.1\0" | - | ||||||
629 | " sse4.2\0" | - | ||||||
630 | " movbe\0" | - | ||||||
631 | " popcnt\0" | - | ||||||
632 | " aes\0" | - | ||||||
633 | " avx\0" | - | ||||||
634 | " f16c\0" | - | ||||||
635 | " rdrand\0" | - | ||||||
636 | " bmi\0" | - | ||||||
637 | " hle\0" | - | ||||||
638 | " avx2\0" | - | ||||||
639 | " bmi2\0" | - | ||||||
640 | " rtm\0" | - | ||||||
641 | " avx512f\0" | - | ||||||
642 | " avx512dq\0" | - | ||||||
643 | " rdseed\0" | - | ||||||
644 | " avx512ifma\0" | - | ||||||
645 | " avx512pf\0" | - | ||||||
646 | " avx512er\0" | - | ||||||
647 | " avx512cd\0" | - | ||||||
648 | " sha\0" | - | ||||||
649 | " avx512bw\0" | - | ||||||
650 | " avx512vl\0" | - | ||||||
651 | "\0"; | - | ||||||
652 | - | |||||||
653 | static const quint8 features_indices[] = { | - | ||||||
654 | 0, 6, 12, 5, 5, 5, 5, 5, | - | ||||||
655 | 5, 24, 5, 5, 31, 36, 5, 5, | - | ||||||
656 | 5, 5, 5, 48, 56, 5, 64, 71, | - | ||||||
657 | 5, 79, 5, 5, 84, 89, 95, 5, | - | ||||||
658 | 5, 5, 5, 103, 108, 113, 5, 5, | - | ||||||
659 | 119, 5, 5, 125, 5, 5, 5, 5, | - | ||||||
660 | 130, 139, 149, 5, 5, 157, 5, 5, | - | ||||||
661 | 5, 5, 169, 179, 189, 199, 204, 214 | - | ||||||
662 | }; | - | ||||||
663 | #else | - | ||||||
664 | static const char features_string[] = ""; | - | ||||||
665 | static const int features_indices[] = { }; | - | ||||||
666 | #endif | - | ||||||
667 | // end generated | - | ||||||
668 | - | |||||||
669 | static const int features_count = (sizeof features_indices) / (sizeof features_indices[0]); | - | ||||||
670 | - | |||||||
671 | // record what CPU features were enabled by default in this Qt build | - | ||||||
672 | static const quint64 minFeature = qCompilerCpuFeatures; | - | ||||||
673 | - | |||||||
674 | #ifdef Q_ATOMIC_INT64_IS_SUPPORTED | - | ||||||
675 | Q_CORE_EXPORT QBasicAtomicInteger<quint64> qt_cpu_features[1] = { Q_BASIC_ATOMIC_INITIALIZER(0) }; | - | ||||||
676 | #else | - | ||||||
677 | Q_CORE_EXPORT QBasicAtomicInteger<unsigned> qt_cpu_features[2] = { Q_BASIC_ATOMIC_INITIALIZER(0), Q_BASIC_ATOMIC_INITIALIZER(0) }; | - | ||||||
678 | #endif | - | ||||||
679 | - | |||||||
680 | void qDetectCpuFeatures() | - | ||||||
681 | { | - | ||||||
682 | #if defined(Q_CC_GNU) && !defined(Q_CC_CLANG) && !defined(Q_CC_INTEL) | - | ||||||
683 | # if Q_CC_GNU < 403 | - | ||||||
684 | // GCC 4.2 (at least the one that comes with Apple's XCode, on Mac) is | - | ||||||
685 | // known to be broken beyond repair in dealing with the inline assembly | - | ||||||
686 | // above. It will generate bad code that could corrupt important registers | - | ||||||
687 | // like the PIC register. The behaviour of code after this function would | - | ||||||
688 | // be totally unpredictable. | - | ||||||
689 | // | - | ||||||
690 | // For that reason, simply forego the CPUID check at all and return the set | - | ||||||
691 | // of features that we found at compile time, through the #defines from the | - | ||||||
692 | // compiler. This should at least allow code to execute, even if none of | - | ||||||
693 | // the specialized code found in Qt GUI and elsewhere will ever be enabled | - | ||||||
694 | // (it's the user's fault for using a broken compiler). | - | ||||||
695 | // | - | ||||||
696 | // This also disables the runtime checking that the processor actually | - | ||||||
697 | // contains all the features that the code required. Qt 4 ran for years | - | ||||||
698 | // like that, so it shouldn't be a problem. | - | ||||||
699 | - | |||||||
700 | qt_cpu_features[0].store(minFeature | quint32(QSimdInitialized)); | - | ||||||
701 | #ifndef Q_ATOMIC_INT64_IS_SUPPORTED | - | ||||||
702 | qt_cpu_features[1].store(minFeature >> 32); | - | ||||||
703 | #endif | - | ||||||
704 | - | |||||||
705 | return; | - | ||||||
706 | # endif | - | ||||||
707 | #endif | - | ||||||
708 | quint64 f = detectProcessorFeatures(); | - | ||||||
709 | QByteArray disable = qgetenv("QT_NO_CPU_FEATURE"); | - | ||||||
710 | if (!disable.isEmpty()) {
| 0-1162 | ||||||
711 | disable.prepend(' '); | - | ||||||
712 | for (int i = 0; i < features_count; ++i) {
| 0 | ||||||
713 | if (disable.contains(features_string + features_indices[i]))
| 0 | ||||||
714 | f &= ~(Q_UINT64_C(1) << i); never executed: f &= ~(static_cast<unsigned long long>(1ULL) << i); | 0 | ||||||
715 | } never executed: end of block | 0 | ||||||
716 | } never executed: end of block | 0 | ||||||
717 | - | |||||||
718 | #ifdef RUNNING_ON_VALGRIND | - | ||||||
719 | bool runningOnValgrind = RUNNING_ON_VALGRIND; | - | ||||||
720 | #else | - | ||||||
721 | bool runningOnValgrind = false; | - | ||||||
722 | #endif | - | ||||||
723 | if (Q_UNLIKELY(!runningOnValgrind && minFeature != 0 && (f & minFeature) != minFeature)) {
| 0-1162 | ||||||
724 | quint64 missing = minFeature & ~f; | - | ||||||
725 | fprintf(stderr, "Incompatible processor. This Qt build requires the following features:\n "); | - | ||||||
726 | for (int i = 0; i < features_count; ++i) {
| 0 | ||||||
727 | if (missing & (Q_UINT64_C(1) << i))
| 0 | ||||||
728 | fprintf(stderr, "%s", features_string + features_indices[i]); never executed: fprintf(stderr, "%s", features_string + features_indices[i]); | 0 | ||||||
729 | } never executed: end of block | 0 | ||||||
730 | fprintf(stderr, "\n"); | - | ||||||
731 | fflush(stderr); | - | ||||||
732 | qFatal("Aborted. Incompatible processor: missing feature 0x%llx -%s.", missing, | - | ||||||
733 | features_string + features_indices[qCountTrailingZeroBits(missing)]); | - | ||||||
734 | } never executed: end of block | 0 | ||||||
735 | - | |||||||
736 | qt_cpu_features[0].store(f | quint32(QSimdInitialized)); | - | ||||||
737 | #ifndef Q_ATOMIC_INT64_IS_SUPPORTED | - | ||||||
738 | qt_cpu_features[1].store(f >> 32); | - | ||||||
739 | #endif | - | ||||||
740 | } executed 1162 times by 97 tests: end of block Executed by:
| 1162 | ||||||
741 | - | |||||||
742 | void qDumpCPUFeatures() | - | ||||||
743 | { | - | ||||||
744 | quint64 features = qCpuFeatures() & ~quint64(QSimdInitialized); | - | ||||||
745 | printf("Processor features: "); | - | ||||||
746 | for (int i = 0; i < features_count; ++i) {
| 0 | ||||||
747 | if (features & (Q_UINT64_C(1) << i))
| 0 | ||||||
748 | printf("%s%s", features_string + features_indices[i], never executed: printf("%s%s", features_string + features_indices[i], minFeature & (static_cast<unsigned long long>(1ULL) << i) ? "[required]" : ""); | 0 | ||||||
749 | minFeature & (Q_UINT64_C(1) << i) ? "[required]" : ""); never executed: printf("%s%s", features_string + features_indices[i], minFeature & (static_cast<unsigned long long>(1ULL) << i) ? "[required]" : ""); | 0 | ||||||
750 | } never executed: end of block | 0 | ||||||
751 | puts(""); | - | ||||||
752 | } never executed: end of block | 0 | ||||||
753 | - | |||||||
754 | QT_END_NAMESPACE | - | ||||||
Source code | Switch to Preprocessed file |