tools/qsimd.cpp

Source codeSwitch to Preprocessed file
LineSource CodeCoverage
1/**************************************************************************** -
2** -
3** Copyright (C) 2013 Digia Plc and/or its subsidiary(-ies). -
4** Copyright (C) 2012 Intel Corporation. -
5** Contact: http://www.qt-project.org/legal -
6** -
7** This file is part of the QtCore module of the Qt Toolkit. -
8** -
9** $QT_BEGIN_LICENSE:LGPL$ -
10** Commercial License Usage -
11** Licensees holding valid commercial Qt licenses may use this file in -
12** accordance with the commercial license agreement provided with the -
13** Software or, alternatively, in accordance with the terms contained in -
14** a written agreement between you and Digia. For licensing terms and -
15** conditions see http://qt.digia.com/licensing. For further information -
16** use the contact form at http://qt.digia.com/contact-us. -
17** -
18** GNU Lesser General Public License Usage -
19** Alternatively, this file may be used under the terms of the GNU Lesser -
20** General Public License version 2.1 as published by the Free Software -
21** Foundation and appearing in the file LICENSE.LGPL included in the -
22** packaging of this file. Please review the following information to -
23** ensure the GNU Lesser General Public License version 2.1 requirements -
24** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html. -
25** -
26** In addition, as a special exception, Digia gives you certain additional -
27** rights. These rights are described in the Digia Qt LGPL Exception -
28** version 1.1, included in the file LGPL_EXCEPTION.txt in this package. -
29** -
30** GNU General Public License Usage -
31** Alternatively, this file may be used under the terms of the GNU -
32** General Public License version 3.0 as published by the Free Software -
33** Foundation and appearing in the file LICENSE.GPL included in the -
34** packaging of this file. Please review the following information to -
35** ensure the GNU General Public License version 3.0 requirements will be -
36** met: http://www.gnu.org/copyleft/gpl.html. -
37** -
38** -
39** $QT_END_LICENSE$ -
40** -
41****************************************************************************/ -
42 -
43#include "qsimd_p.h" -
44#include <QByteArray> -
45#include <stdio.h> -
46 -
47#if defined(Q_OS_WIN) -
48# if defined(Q_OS_WINCE) -
49# include <qt_windows.h> -
50# include <cmnintrin.h> -
51# endif -
52# if !defined(Q_CC_GNU) -
53# ifndef Q_OS_WINCE -
54# include <intrin.h> -
55# endif -
56# endif -
57#elif defined(Q_OS_LINUX) && defined(__arm__) -
58#include "private/qcore_unix_p.h" -
59 -
60// the kernel header definitions for HWCAP_* -
61// (the ones we need/may need anyway) -
62 -
63// copied from <asm/hwcap.h> (ARM) -
64#define HWCAP_IWMMXT 512 -
65#define HWCAP_CRUNCH 1024 -
66#define HWCAP_THUMBEE 2048 -
67#define HWCAP_NEON 4096 -
68#define HWCAP_VFPv3 8192 -
69#define HWCAP_VFPv3D16 16384 -
70 -
71// copied from <linux/auxvec.h> -
72#define AT_HWCAP 16 /* arch dependent hints at CPU capabilities */ -
73 -
74#endif -
75 -
76QT_BEGIN_NAMESPACE -
77 -
78#if defined (Q_OS_NACL) -
79static inline uint detectProcessorFeatures() -
80{ -
81 return 0; -
82} -
83#elif defined (Q_OS_WINCE) -
84static inline uint detectProcessorFeatures() -
85{ -
86 uint features = 0; -
87 -
88#if defined (ARM) -
89 if (IsProcessorFeaturePresent(PF_ARM_INTEL_WMMX)) { -
90 features = IWMMXT; -
91 return features; -
92 } -
93#elif defined(_X86_) -
94 features = 0; -
95 if (IsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE)) -
96 features |= SSE2; -
97 if (IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE)) -
98 features |= SSE3; -
99 return features; -
100#endif -
101 features = 0; -
102 return features; -
103} -
104 -
105#elif defined(Q_PROCESSOR_ARM) || defined(QT_COMPILER_SUPPORTS_IWMMXT) || defined(QT_COMPILER_SUPPORTS_NEON) -
106static inline uint detectProcessorFeatures() -
107{ -
108 uint features = 0; -
109 -
110#if defined(Q_OS_LINUX) -
111 int auxv = qt_safe_open("/proc/self/auxv", O_RDONLY); -
112 if (auxv != -1) { -
113 unsigned long vector[64]; -
114 int nread; -
115 while (features == 0) { -
116 nread = qt_safe_read(auxv, (char *)vector, sizeof vector); -
117 if (nread <= 0) { -
118 // EOF or error -
119 break; -
120 } -
121 -
122 int max = nread / (sizeof vector[0]); -
123 for (int i = 0; i < max; i += 2) -
124 if (vector[i] == AT_HWCAP) { -
125 if (vector[i+1] & HWCAP_IWMMXT) -
126 features |= IWMMXT; -
127 if (vector[i+1] & HWCAP_NEON) -
128 features |= NEON; -
129 break; -
130 } -
131 } -
132 -
133 qt_safe_close(auxv); -
134 return features; -
135 } -
136 // fall back if /proc/self/auxv wasn't found -
137#endif -
138 -
139#if defined(QT_COMPILER_SUPPORTS_IWMMXT) -
140 // runtime detection only available when running as a previlegied process -
141 features = IWMMXT; -
142#elif defined(__ARM_NEON__) -
143 features = NEON; -
144#endif -
145 -
146 return features; -
147} -
148 -
149#elif defined(Q_PROCESSOR_X86) -
150 -
151#ifdef Q_PROCESSOR_X86_32 -
152# define PICreg "%%ebx" -
153#else -
154# define PICreg "%%rbx" -
155#endif -
156 -
157static int maxBasicCpuidSupported() -
158{ -
159#if defined(Q_CC_GNU) -
160 qintptr tmp1;
executed (the execution status of this line is deduced): qintptr tmp1;
-
161 -
162# ifdef Q_PROCESSOR_X86_32 -
163 // check if the CPUID instruction is supported -
164 long cpuid_supported; -
165 asm ("pushf\n" -
166 "pop %0\n" -
167 "mov %0, %1\n" -
168 "xor $0x00200000, %0\n" -
169 "push %0\n" -
170 "popf\n" -
171 "pushf\n" -
172 "pop %0\n" -
173 "xor %1, %0\n" // %eax is now 0 if CPUID is not supported -
174 : "=a" (cpuid_supported), "=r" (tmp1) -
175 ); -
176 if (!cpuid_supported) -
177 return 0; -
178# endif -
179 -
180 int result;
executed (the execution status of this line is deduced): int result;
-
181 asm ("xchg " PICreg", %1\n"
executed (the execution status of this line is deduced): asm ("xchg " "%%rbx"", %1\n"
-
182 "cpuid\n"
executed (the execution status of this line is deduced): "cpuid\n"
-
183 "xchg " PICreg", %1\n"
executed (the execution status of this line is deduced): "xchg " "%%rbx"", %1\n"
-
184 : "=&a" (result), "=&r" (tmp1)
executed (the execution status of this line is deduced): : "=&a" (result), "=&r" (tmp1)
-
185 : "0" (0)
executed (the execution status of this line is deduced): : "0" (0)
-
186 : "ecx", "edx");
executed (the execution status of this line is deduced): : "ecx", "edx");
-
187 return result;
executed: return result;
Execution Count:4
4
188#elif defined(Q_OS_WIN) -
189 // Use the __cpuid function; if the CPUID instruction isn't supported, it will return 0 -
190 int info[4]; -
191 __cpuid(info, 0); -
192 return info[0]; -
193#else -
194 return 0; -
195#endif -
196} -
197 -
198static void cpuidFeatures01(uint &ecx, uint &edx) -
199{ -
200#if defined(Q_CC_GNU) -
201 qintptr tmp1;
executed (the execution status of this line is deduced): qintptr tmp1;
-
202 asm ("xchg " PICreg", %2\n"
executed (the execution status of this line is deduced): asm ("xchg " "%%rbx"", %2\n"
-
203 "cpuid\n"
executed (the execution status of this line is deduced): "cpuid\n"
-
204 "xchg " PICreg", %2\n"
executed (the execution status of this line is deduced): "xchg " "%%rbx"", %2\n"
-
205 : "=&c" (ecx), "=&d" (edx), "=&r" (tmp1)
executed (the execution status of this line is deduced): : "=&c" (ecx), "=&d" (edx), "=&r" (tmp1)
-
206 : "a" (1));
executed (the execution status of this line is deduced): : "a" (1));
-
207#elif defined(Q_OS_WIN) -
208 int info[4]; -
209 __cpuid(info, 1); -
210 ecx = info[2]; -
211 edx = info[3]; -
212#endif -
213}
executed: }
Execution Count:4
4
214 -
215#ifdef Q_OS_WIN -
216inline void __cpuidex(int info[4], int, __int64) { memset(info, 0, 4*sizeof(int));} -
217#endif -
218 -
219static void cpuidFeatures07_00(uint &ebx) -
220{ -
221#if defined(Q_CC_GNU) -
222 quintptr rbx; // in case it's 64-bit
executed (the execution status of this line is deduced): quintptr rbx;
-
223 asm ("xchg " PICreg", %0\n"
executed (the execution status of this line is deduced): asm ("xchg " "%%rbx"", %0\n"
-
224 "cpuid\n"
executed (the execution status of this line is deduced): "cpuid\n"
-
225 "xchg " PICreg", %0\n"
executed (the execution status of this line is deduced): "xchg " "%%rbx"", %0\n"
-
226 : "=&r" (rbx)
executed (the execution status of this line is deduced): : "=&r" (rbx)
-
227 : "a" (7), "c" (0)
executed (the execution status of this line is deduced): : "a" (7), "c" (0)
-
228 : "%edx");
executed (the execution status of this line is deduced): : "%edx");
-
229 ebx = rbx;
executed (the execution status of this line is deduced): ebx = rbx;
-
230#elif defined(Q_OS_WIN) -
231 int info[4]; -
232 __cpuidex(info, 7, 0); -
233 ebx = info[1]; -
234#endif -
235}
executed: }
Execution Count:4
4
236 -
237#ifdef Q_OS_WIN -
238// fallback overload in case this intrinsic does not exist: unsigned __int64 _xgetbv(unsigned int); -
239inline quint64 _xgetbv(__int64) { return 0; } -
240#endif -
241static void xgetbv(uint in, uint &eax, uint &edx) -
242{ -
243#ifdef Q_OS_WIN -
244 quint64 result = _xgetbv(in); -
245 eax = result; -
246 edx = result >> 32; -
247#elif defined(Q_CC_GNU) -
248 asm (".byte 0x0F, 0x01, 0xD0" // xgetbv instruction
never executed (the execution status of this line is deduced): asm (".byte 0x0F, 0x01, 0xD0"
-
249 : "=a" (eax), "=d" (edx)
never executed (the execution status of this line is deduced): : "=a" (eax), "=d" (edx)
-
250 : "c" (in));
never executed (the execution status of this line is deduced): : "c" (in));
-
251#endif -
252}
never executed: }
0
253 -
254static inline uint detectProcessorFeatures() -
255{ -
256 uint features = 0;
executed (the execution status of this line is deduced): uint features = 0;
-
257 int cpuidLevel = maxBasicCpuidSupported();
executed (the execution status of this line is deduced): int cpuidLevel = maxBasicCpuidSupported();
-
258 if (cpuidLevel < 1)
partially evaluated: cpuidLevel < 1
TRUEFALSE
no
Evaluation Count:0
yes
Evaluation Count:4
0-4
259 return 0;
never executed: return 0;
0
260 -
261 uint cpuid01ECX = 0, cpuid01EDX = 0;
executed (the execution status of this line is deduced): uint cpuid01ECX = 0, cpuid01EDX = 0;
-
262 cpuidFeatures01(cpuid01ECX, cpuid01EDX);
executed (the execution status of this line is deduced): cpuidFeatures01(cpuid01ECX, cpuid01EDX);
-
263#if defined(Q_PROCESSOR_X86_32) -
264 // x86 might not have SSE2 support -
265 if (cpuid01EDX & (1u << 26)) -
266 features |= SSE2; -
267#else -
268 // x86-64 or x32 -
269 features = SSE2;
executed (the execution status of this line is deduced): features = SSE2;
-
270#endif -
271 -
272 // common part between 32- and 64-bit -
273 if (cpuid01ECX & (1u))
partially evaluated: cpuid01ECX & (1u)
TRUEFALSE
yes
Evaluation Count:4
no
Evaluation Count:0
0-4
274 features |= SSE3;
executed: features |= SSE3;
Execution Count:4
4
275 if (cpuid01ECX & (1u << 9))
partially evaluated: cpuid01ECX & (1u << 9)
TRUEFALSE
yes
Evaluation Count:4
no
Evaluation Count:0
0-4
276 features |= SSSE3;
executed: features |= SSSE3;
Execution Count:4
4
277 if (cpuid01ECX & (1u << 19))
partially evaluated: cpuid01ECX & (1u << 19)
TRUEFALSE
yes
Evaluation Count:4
no
Evaluation Count:0
0-4
278 features |= SSE4_1;
executed: features |= SSE4_1;
Execution Count:4
4
279 if (cpuid01ECX & (1u << 20))
partially evaluated: cpuid01ECX & (1u << 20)
TRUEFALSE
yes
Evaluation Count:4
no
Evaluation Count:0
0-4
280 features |= SSE4_2;
executed: features |= SSE4_2;
Execution Count:4
4
281 if (cpuid01ECX & (1u << 25))
partially evaluated: cpuid01ECX & (1u << 25)
TRUEFALSE
yes
Evaluation Count:4
no
Evaluation Count:0
0-4
282 features |= 0; // AES, enable if needed
executed: features |= 0;
Execution Count:4
4
283 -
284 uint xgetbvA = 0, xgetbvD = 0;
executed (the execution status of this line is deduced): uint xgetbvA = 0, xgetbvD = 0;
-
285 if (cpuid01ECX & (1u << 27)) {
partially evaluated: cpuid01ECX & (1u << 27)
TRUEFALSE
no
Evaluation Count:0
yes
Evaluation Count:4
0-4
286 // XGETBV enabled -
287 xgetbv(0, xgetbvA, xgetbvD);
never executed (the execution status of this line is deduced): xgetbv(0, xgetbvA, xgetbvD);
-
288 }
never executed: }
0
289 -
290 uint cpuid0700EBX = 0;
executed (the execution status of this line is deduced): uint cpuid0700EBX = 0;
-
291 if (cpuidLevel >= 7)
partially evaluated: cpuidLevel >= 7
TRUEFALSE
yes
Evaluation Count:4
no
Evaluation Count:0
0-4
292 cpuidFeatures07_00(cpuid0700EBX);
executed: cpuidFeatures07_00(cpuid0700EBX);
Execution Count:4
4
293 -
294 if ((xgetbvA & 6) == 6) {
partially evaluated: (xgetbvA & 6) == 6
TRUEFALSE
no
Evaluation Count:0
yes
Evaluation Count:4
0-4
295 // support for YMM and XMM registers is enabled -
296 if (cpuid01ECX & (1u << 28))
never evaluated: cpuid01ECX & (1u << 28)
0
297 features |= AVX;
never executed: features |= AVX;
0
298 -
299 if (cpuid0700EBX & (1u << 5))
never evaluated: cpuid0700EBX & (1u << 5)
0
300 features |= AVX2;
never executed: features |= AVX2;
0
301 }
never executed: }
0
302 -
303 if (cpuid0700EBX & (1u << 4))
partially evaluated: cpuid0700EBX & (1u << 4)
TRUEFALSE
no
Evaluation Count:0
yes
Evaluation Count:4
0-4
304 features |= HLE; // Hardware Lock Ellision
never executed: features |= HLE;
0
305 if (cpuid0700EBX & (1u << 11))
partially evaluated: cpuid0700EBX & (1u << 11)
TRUEFALSE
no
Evaluation Count:0
yes
Evaluation Count:4
0-4
306 features |= RTM; // Restricted Transactional Memory
never executed: features |= RTM;
0
307 -
308 return features;
executed: return features;
Execution Count:4
4
309} -
310 -
311 -
312#else -
313static inline uint detectProcessorFeatures() -
314{ -
315 return 0; -
316} -
317#endif -
318 -
319/* -
320 * Use kdesdk/scripts/generate_string_table.pl to update the table below. -
321 * Here's the data (don't forget the ONE leading space): -
322 iwmmxt -
323 neon -
324 sse2 -
325 sse3 -
326 ssse3 -
327 sse4.1 -
328 sse4.2 -
329 avx -
330 avx2 -
331 hle -
332 rtm -
333 */ -
334 -
335// begin generated -
336static const char features_string[] = -
337 " iwmmxt\0" -
338 " neon\0" -
339 " sse2\0" -
340 " sse3\0" -
341 " ssse3\0" -
342 " sse4.1\0" -
343 " sse4.2\0" -
344 " avx\0" -
345 " avx2\0" -
346 " hle\0" -
347 " rtm\0" -
348 "\0"; -
349 -
350static const int features_indices[] = { -
351 0, 8, 14, 20, 26, 33, 41, 49, -
352 54, 60, 65, -1 -
353}; -
354// end generated -
355 -
356static const int features_count = (sizeof features_indices - 1) / (sizeof features_indices[0]); -
357 -
358// record what CPU features were enabled by default in this Qt build -
359// don't define for HLE, since the HLE prefix can be run on older CPUs -
360static const uint minFeature = qCompilerCpuFeatures & ~HLE; -
361 -
362#ifdef Q_OS_WIN -
363#if defined(Q_CC_GNU) -
364# define ffs __builtin_ffs -
365#else -
366int ffs(int i) -
367{ -
368#ifndef Q_OS_WINCE -
369 unsigned long result; -
370 return _BitScanForward(&result, i) ? result : 0; -
371#else -
372 return 0; -
373#endif -
374} -
375#endif -
376#endif // Q_OS_WIN -
377 -
378QBasicAtomicInt qt_cpu_features = Q_BASIC_ATOMIC_INITIALIZER(0); -
379 -
380void qDetectCpuFeatures() -
381{ -
382#if defined(Q_CC_GNU) && !defined(Q_CC_CLANG) && !defined(Q_CC_INTEL) -
383# if (__GNUC__ * 100 + __GNUC_MINOR__) < 403 -
384 // GCC 4.2 (at least the one that comes with Apple's XCode, on Mac) is -
385 // known to be broken beyond repair in dealing with the inline assembly -
386 // above. It will generate bad code that could corrupt important registers -
387 // like the PIC register. The behaviour of code after this function would -
388 // be totally unpredictable. -
389 // -
390 // For that reason, simply forego the CPUID check at all and return the set -
391 // of features that we found at compile time, through the #defines from the -
392 // compiler. This should at least allow code to execute, even if none of -
393 // the specialized code found in Qt GUI and elsewhere will ever be enabled -
394 // (it's the user's fault for using a broken compiler). -
395 // -
396 // This also disables the runtime checking that the processor actually -
397 // contains all the features that the code required. Qt 4 ran for years -
398 // like that, so it shouldn't be a problem. -
399 -
400 qt_cpu_features.store(minFeature | QSimdInitialized); -
401 return; -
402# endif -
403#endif -
404 uint f = detectProcessorFeatures();
executed (the execution status of this line is deduced): uint f = detectProcessorFeatures();
-
405 QByteArray disable = qgetenv("QT_NO_CPU_FEATURE");
executed (the execution status of this line is deduced): QByteArray disable = qgetenv("QT_NO_CPU_FEATURE");
-
406 if (!disable.isEmpty()) {
partially evaluated: !disable.isEmpty()
TRUEFALSE
no
Evaluation Count:0
yes
Evaluation Count:4
0-4
407 disable.prepend(' ');
never executed (the execution status of this line is deduced): disable.prepend(' ');
-
408 for (int i = 0; i < features_count; ++i) {
never evaluated: i < features_count
0
409 if (disable.contains(features_string + features_indices[i]))
never evaluated: disable.contains(features_string + features_indices[i])
0
410 f &= ~(1 << i);
never executed: f &= ~(1 << i);
0
411 }
never executed: }
0
412 }
never executed: }
0
413 -
414 if (minFeature != 0 && (f & minFeature) != minFeature) {
partially evaluated: minFeature != 0
TRUEFALSE
yes
Evaluation Count:4
no
Evaluation Count:0
partially evaluated: (f & minFeature) != minFeature
TRUEFALSE
no
Evaluation Count:0
yes
Evaluation Count:4
0-4
415 uint missing = minFeature & ~f;
never executed (the execution status of this line is deduced): uint missing = minFeature & ~f;
-
416 fprintf(stderr, "Incompatible processor. This Qt build requires the following features:\n ");
never executed (the execution status of this line is deduced): fprintf(stderr, "Incompatible processor. This Qt build requires the following features:\n ");
-
417 for (int i = 0; i < features_count; ++i) {
never evaluated: i < features_count
0
418 if (missing & (1 << i))
never evaluated: missing & (1 << i)
0
419 fprintf(stderr, "%s", features_string + features_indices[i]);
never executed: fprintf(stderr, "%s", features_string + features_indices[i]);
0
420 }
never executed: }
0
421 fprintf(stderr, "\n");
never executed (the execution status of this line is deduced): fprintf(stderr, "\n");
-
422 fflush(stderr);
never executed (the execution status of this line is deduced): fflush(stderr);
-
423 qFatal("Aborted. Incompatible processor: missing feature 0x%x -%s.", missing,
never executed (the execution status of this line is deduced): QMessageLogger("tools/qsimd.cpp", 423, __PRETTY_FUNCTION__).fatal("Aborted. Incompatible processor: missing feature 0x%x -%s.", missing,
-
424 features_string + features_indices[ffs(missing) - 1]);
never executed (the execution status of this line is deduced): features_string + features_indices[ffs(missing) - 1]);
-
425 }
never executed: }
0
426 -
427 qt_cpu_features.store(f | QSimdInitialized);
executed (the execution status of this line is deduced): qt_cpu_features.store(f | QSimdInitialized);
-
428}
executed: }
Execution Count:4
4
429 -
430void qDumpCPUFeatures() -
431{ -
432 uint features = qCpuFeatures();
never executed (the execution status of this line is deduced): uint features = qCpuFeatures();
-
433 printf("Processor features: ");
never executed (the execution status of this line is deduced): printf("Processor features: ");
-
434 for (int i = 0; i < features_count; ++i) {
never evaluated: i < features_count
0
435 if (features & (1 << i))
never evaluated: features & (1 << i)
0
436 printf("%s%s", features_string + features_indices[i],
never executed: printf("%s%s", features_string + features_indices[i], minFeature & (1 << i) ? "[required]" : "");
0
437 minFeature & (1 << i) ? "[required]" : "");
never executed: printf("%s%s", features_string + features_indices[i], minFeature & (1 << i) ? "[required]" : "");
0
438 }
never executed: }
0
439 puts("");
never executed (the execution status of this line is deduced): puts("");
-
440}
never executed: }
0
441 -
442QT_END_NAMESPACE -
443 -
Source codeSwitch to Preprocessed file

Generated by Squish Coco Non-Commercial