Absolute File Name: | /home/qt/qt5_coco/qt5/qtbase/src/corelib/tools/qregexp.cpp |
Switch to Source code | Preprocessed file |
Line | Source | Count |
---|---|---|
1 | - | |
2 | - | |
3 | - | |
4 | - | |
5 | - | |
6 | - | |
7 | int qFindString(const QChar *haystack, int haystackLen, int from, | - |
8 | const QChar *needle, int needleLen, Qt::CaseSensitivity cs); | - |
9 | const int NumBadChars = 64; | - |
10 | - | |
11 | - | |
12 | const int NoOccurrence = 2147483647; | - |
13 | const int EmptyCapture = 2147483647; | - |
14 | const int InftyLen = 2147483647; | - |
15 | const int InftyRep = 1025; | - |
16 | const int EOS = -1; | - |
17 | - | |
18 | static bool isWord(QChar ch) | - |
19 | { | - |
20 | return ch.isLetterOrNumber() || ch.isMark() || ch == QLatin1Char('_'); | - |
21 | } | - |
22 | - | |
23 | - | |
24 | - | |
25 | - | |
26 | - | |
27 | static void mergeInto(QVector<int> *a, const QVector<int> &b) | - |
28 | { | - |
29 | int asize = a->size(); | - |
30 | int bsize = b.size(); | - |
31 | if (asize == 0) { | - |
32 | *a = b; | - |
33 | - | |
34 | } else if (bsize == 1 && a->at(asize - 1) < b.at(0)) { | - |
35 | a->resize(asize + 1); | - |
36 | (*a)[asize] = b.at(0); | - |
37 | - | |
38 | } else if (bsize >= 1) { | - |
39 | int csize = asize + bsize; | - |
40 | QVector<int> c(csize); | - |
41 | int i = 0, j = 0, k = 0; | - |
42 | while (i < asize) { | - |
43 | if (j < bsize) { | - |
44 | if (a->at(i) == b.at(j)) { | - |
45 | ++i; | - |
46 | --csize; | - |
47 | } else if (a->at(i) < b.at(j)) { | - |
48 | c[k++] = a->at(i++); | - |
49 | } else { | - |
50 | c[k++] = b.at(j++); | - |
51 | } | - |
52 | } else { | - |
53 | memcpy(c.data() + k, a->constData() + i, (asize - i) * sizeof(int)); | - |
54 | break; | - |
55 | } | - |
56 | } | - |
57 | c.resize(csize); | - |
58 | if (j < bsize) | - |
59 | memcpy(c.data() + k, b.constData() + j, (bsize - j) * sizeof(int)); | - |
60 | *a = c; | - |
61 | } | - |
62 | } | - |
63 | static QString wc2rx(const QString &wc_str, const bool enableEscaping) | - |
64 | { | - |
65 | const int wclen = wc_str.length(); | - |
66 | QString rx; | - |
67 | int i = 0; | - |
68 | bool isEscaping = false; | - |
69 | const QChar *wc = wc_str.unicode(); | - |
70 | - | |
71 | while (i < wclen) { | - |
72 | const QChar c = wc[i++]; | - |
73 | switch (c.unicode()) { | - |
74 | case '\\': | - |
75 | if (enableEscaping) { | - |
76 | if (isEscaping) { | - |
77 | rx += QLatin1String("\\\\"); | - |
78 | } | - |
79 | if (i == wclen) { | - |
80 | rx += QLatin1String("\\\\"); | - |
81 | } | - |
82 | } else { | - |
83 | rx += QLatin1String("\\\\"); | - |
84 | } | - |
85 | isEscaping = true; | - |
86 | break; | - |
87 | case '*': | - |
88 | if (isEscaping) { | - |
89 | rx += QLatin1String("\\*"); | - |
90 | isEscaping = false; | - |
91 | } else { | - |
92 | rx += QLatin1String(".*"); | - |
93 | } | - |
94 | break; | - |
95 | case '?': | - |
96 | if (isEscaping) { | - |
97 | rx += QLatin1String("\\?"); | - |
98 | isEscaping = false; | - |
99 | } else { | - |
100 | rx += QLatin1Char('.'); | - |
101 | } | - |
102 | - | |
103 | break; | - |
104 | case '$': | - |
105 | case '(': | - |
106 | case ')': | - |
107 | case '+': | - |
108 | case '.': | - |
109 | case '^': | - |
110 | case '{': | - |
111 | case '|': | - |
112 | case '}': | - |
113 | if (isEscaping) { | - |
114 | isEscaping = false; | - |
115 | rx += QLatin1String("\\\\"); | - |
116 | } | - |
117 | rx += QLatin1Char('\\'); | - |
118 | rx += c; | - |
119 | break; | - |
120 | case '[': | - |
121 | if (isEscaping) { | - |
122 | isEscaping = false; | - |
123 | rx += QLatin1String("\\["); | - |
124 | } else { | - |
125 | rx += c; | - |
126 | if (wc[i] == QLatin1Char('^')) | - |
127 | rx += wc[i++]; | - |
128 | if (i < wclen) { | - |
129 | if (rx[i] == QLatin1Char(']')) | - |
130 | rx += wc[i++]; | - |
131 | while (i < wclen && wc[i] != QLatin1Char(']')) { | - |
132 | if (wc[i] == QLatin1Char('\\')) | - |
133 | rx += QLatin1Char('\\'); | - |
134 | rx += wc[i++]; | - |
135 | } | - |
136 | } | - |
137 | } | - |
138 | break; | - |
139 | - | |
140 | case ']': | - |
141 | if(isEscaping){ | - |
142 | isEscaping = false; | - |
143 | rx += QLatin1String("\\"); | - |
144 | } | - |
145 | rx += c; | - |
146 | break; | - |
147 | - | |
148 | default: | - |
149 | if(isEscaping){ | - |
150 | isEscaping = false; | - |
151 | rx += QLatin1String("\\\\"); | - |
152 | } | - |
153 | rx += c; | - |
154 | } | - |
155 | } | - |
156 | return rx; | - |
157 | } | - |
158 | - | |
159 | - | |
160 | static int caretIndex(int offset, QRegExp::CaretMode caretMode) | - |
161 | { | - |
162 | if (caretMode == QRegExp::CaretAtZero) { | - |
163 | return 0; | - |
164 | } else if (caretMode == QRegExp::CaretAtOffset) { | - |
165 | return offset; | - |
166 | } else { | - |
167 | return -1; | - |
168 | } | - |
169 | } | - |
170 | - | |
171 | - | |
172 | - | |
173 | - | |
174 | struct QRegExpEngineKey | - |
175 | { | - |
176 | QString pattern; | - |
177 | QRegExp::PatternSyntax patternSyntax; | - |
178 | Qt::CaseSensitivity cs; | - |
179 | - | |
180 | inline QRegExpEngineKey(const QString &pattern, QRegExp::PatternSyntax patternSyntax, | - |
181 | Qt::CaseSensitivity cs) | - |
182 | : pattern(pattern), patternSyntax(patternSyntax), cs(cs) {} | - |
183 | - | |
184 | inline void clear() { | - |
185 | pattern.clear(); | - |
186 | patternSyntax = QRegExp::RegExp; | - |
187 | cs = Qt::CaseSensitive; | - |
188 | } | - |
189 | }; | - |
190 | - | |
191 | static bool operator==(const QRegExpEngineKey &key1, const QRegExpEngineKey &key2) | - |
192 | { | - |
193 | return key1.pattern == key2.pattern && key1.patternSyntax == key2.patternSyntax | - |
194 | && key1.cs == key2.cs; | - |
195 | } | - |
196 | - | |
197 | static uint qHash(const QRegExpEngineKey &key, uint seed = 0) noexcept | - |
198 | { | - |
199 | QtPrivate::QHashCombine hash; | - |
200 | seed = hash(seed, key.pattern); | - |
201 | seed = hash(seed, key.patternSyntax); | - |
202 | seed = hash(seed, key.cs); | - |
203 | return executed 630297 times by 167 tests: seed;return seed; Executed by:
executed 630297 times by 167 tests: return seed; Executed by:
| 630297 |
204 | } | - |
205 | - | |
206 | class QRegExpEngine; | - |
207 | - | |
208 | - | |
209 | - | |
210 | - | |
211 | - | |
212 | - | |
213 | struct QRegExpMatchState | - |
214 | { | - |
215 | const QChar *in; | - |
216 | int pos; | - |
217 | int caretPos; | - |
218 | int len; | - |
219 | bool minimal; | - |
220 | int *bigArray; | - |
221 | int *inNextStack; | - |
222 | int *curStack; | - |
223 | int *nextStack; | - |
224 | int *curCapBegin; | - |
225 | int *nextCapBegin; | - |
226 | int *curCapEnd; | - |
227 | int *nextCapEnd; | - |
228 | int *tempCapBegin; | - |
229 | int *tempCapEnd; | - |
230 | int *capBegin; | - |
231 | int *capEnd; | - |
232 | int *slideTab; | - |
233 | int *captured; | - |
234 | int slideTabSize; | - |
235 | int capturedSize; | - |
236 | - | |
237 | QList<QVector<int> > sleeping; | - |
238 | - | |
239 | int matchLen; | - |
240 | int oneTestMatchedLen; | - |
241 | - | |
242 | const QRegExpEngine *eng; | - |
243 | - | |
244 | inline QRegExpMatchState() : bigArray(0), captured(0) {} | - |
245 | inline ~QRegExpMatchState() { free(bigArray); } | - |
246 | - | |
247 | void drain() { free(bigArray); bigArray = 0; captured = 0; } | - |
248 | void prepareForMatch(QRegExpEngine *eng); | - |
249 | void match(const QChar *str, int len, int pos, bool minimal, | - |
250 | bool oneTest, int caretIndex); | - |
251 | bool matchHere(); | - |
252 | bool testAnchor(int i, int a, const int *capBegin); | - |
253 | }; | - |
254 | - | |
255 | - | |
256 | - | |
257 | - | |
258 | - | |
259 | - | |
260 | - | |
261 | struct QRegExpAutomatonState | - |
262 | { | - |
263 | - | |
264 | int atom; | - |
265 | - | |
266 | int match; | - |
267 | QVector<int> outs; | - |
268 | QMap<int, int> reenter; | - |
269 | QMap<int, int> anchors; | - |
270 | - | |
271 | inline QRegExpAutomatonState() { } | - |
272 | - | |
273 | inline QRegExpAutomatonState(int a, int m) | - |
274 | : atom(a), match(m) { } | - |
275 | - | |
276 | - | |
277 | - | |
278 | - | |
279 | }; | - |
280 | - | |
281 | template<> class QTypeInfo<QRegExpAutomatonState > { public: enum { isComplex = (((Q_MOVABLE_TYPE) & Q_PRIMITIVE_TYPE) == 0), isStatic = (((Q_MOVABLE_TYPE) & (Q_MOVABLE_TYPE | Q_PRIMITIVE_TYPE)) == 0), isRelocatable = !isStatic || ((Q_MOVABLE_TYPE) & Q_RELOCATABLE_TYPE), isLarge = (sizeof(QRegExpAutomatonState)>sizeof(void*)), isPointer = false, isIntegral = QtPrivate::is_integral< QRegExpAutomatonState >::value, isDummy = (((Q_MOVABLE_TYPE) & Q_DUMMY_TYPE) != 0), sizeOf = sizeof(QRegExpAutomatonState) }; static inline const char *name() { return "QRegExpAutomatonState"; } }; | - |
282 | - | |
283 | - | |
284 | - | |
285 | - | |
286 | - | |
287 | struct QRegExpCharClassRange | - |
288 | { | - |
289 | ushort from; | - |
290 | ushort len; | - |
291 | }; | - |
292 | - | |
293 | template<> class QTypeInfo<QRegExpCharClassRange > { public: enum { isComplex = (((Q_PRIMITIVE_TYPE) & Q_PRIMITIVE_TYPE) == 0), isStatic = (((Q_PRIMITIVE_TYPE) & (Q_MOVABLE_TYPE | Q_PRIMITIVE_TYPE)) == 0), isRelocatable = !isStatic || ((Q_PRIMITIVE_TYPE) & Q_RELOCATABLE_TYPE), isLarge = (sizeof(QRegExpCharClassRange)>sizeof(void*)), isPointer = false, isIntegral = QtPrivate::is_integral< QRegExpCharClassRange >::value, isDummy = (((Q_PRIMITIVE_TYPE) & Q_DUMMY_TYPE) != 0), sizeOf = sizeof(QRegExpCharClassRange) }; static inline const char *name() { return "QRegExpCharClassRange"; } }; | - |
294 | - | |
295 | - | |
296 | - | |
297 | - | |
298 | - | |
299 | - | |
300 | struct QRegExpAtom | - |
301 | { | - |
302 | enum { NoCapture = -1, OfficialCapture = -2, UnofficialCapture = -3 }; | - |
303 | - | |
304 | int parent; | - |
305 | int capture; | - |
306 | }; | - |
307 | - | |
308 | template<> class QTypeInfo<QRegExpAtom > { public: enum { isComplex = (((Q_PRIMITIVE_TYPE) & Q_PRIMITIVE_TYPE) == 0), isStatic = (((Q_PRIMITIVE_TYPE) & (Q_MOVABLE_TYPE | Q_PRIMITIVE_TYPE)) == 0), isRelocatable = !isStatic || ((Q_PRIMITIVE_TYPE) & Q_RELOCATABLE_TYPE), isLarge = (sizeof(QRegExpAtom)>sizeof(void*)), isPointer = false, isIntegral = QtPrivate::is_integral< QRegExpAtom >::value, isDummy = (((Q_PRIMITIVE_TYPE) & Q_DUMMY_TYPE) != 0), sizeOf = sizeof(QRegExpAtom) }; static inline const char *name() { return "QRegExpAtom"; } }; | - |
309 | - | |
310 | - | |
311 | struct QRegExpLookahead; | - |
312 | - | |
313 | - | |
314 | - | |
315 | - | |
316 | - | |
317 | - | |
318 | struct QRegExpAnchorAlternation | - |
319 | { | - |
320 | int a; | - |
321 | int b; | - |
322 | }; | - |
323 | - | |
324 | template<> class QTypeInfo<QRegExpAnchorAlternation > { public: enum { isComplex = (((Q_PRIMITIVE_TYPE) & Q_PRIMITIVE_TYPE) == 0), isStatic = (((Q_PRIMITIVE_TYPE) & (Q_MOVABLE_TYPE | Q_PRIMITIVE_TYPE)) == 0), isRelocatable = !isStatic || ((Q_PRIMITIVE_TYPE) & Q_RELOCATABLE_TYPE), isLarge = (sizeof(QRegExpAnchorAlternation)>sizeof(void*)), isPointer = false, isIntegral = QtPrivate::is_integral< QRegExpAnchorAlternation >::value, isDummy = (((Q_PRIMITIVE_TYPE) & Q_DUMMY_TYPE) != 0), sizeOf = sizeof(QRegExpAnchorAlternation) }; static inline const char *name() { return "QRegExpAnchorAlternation"; } }; | - |
325 | class QRegExpCharClass | - |
326 | { | - |
327 | public: | - |
328 | QRegExpCharClass(); | - |
329 | - | |
330 | void clear(); | - |
331 | bool negative() const { return n; } | - |
332 | void setNegative(bool negative); | - |
333 | void addCategories(uint cats); | - |
334 | void addRange(ushort from, ushort to); | - |
335 | void addSingleton(ushort ch) { addRange(ch, ch); } | - |
336 | - | |
337 | bool in(QChar ch) const; | - |
338 | - | |
339 | const QVector<int> &firstOccurrence() const { return occ1; } | - |
340 | - | |
341 | - | |
342 | - | |
343 | void dump() const; | - |
344 | - | |
345 | - | |
346 | private: | - |
347 | QVector<QRegExpCharClassRange> r; | - |
348 | - | |
349 | QVector<int> occ1; | - |
350 | - | |
351 | uint c; | - |
352 | bool n; | - |
353 | }; | - |
354 | template<> class QTypeInfo<QRegExpCharClass > { public: enum { isComplex = (((Q_MOVABLE_TYPE) & Q_PRIMITIVE_TYPE) == 0), isStatic = (((Q_MOVABLE_TYPE) & (Q_MOVABLE_TYPE | Q_PRIMITIVE_TYPE)) == 0), isRelocatable = !isStatic || ((Q_MOVABLE_TYPE) & Q_RELOCATABLE_TYPE), isLarge = (sizeof(QRegExpCharClass)>sizeof(void*)), isPointer = false, isIntegral = QtPrivate::is_integral< QRegExpCharClass >::value, isDummy = (((Q_MOVABLE_TYPE) & Q_DUMMY_TYPE) != 0), sizeOf = sizeof(QRegExpCharClass) }; static inline const char *name() { return "QRegExpCharClass"; } }; | - |
355 | - | |
356 | - | |
357 | - | |
358 | - | |
359 | - | |
360 | class QRegExpEngine | - |
361 | { | - |
362 | public: | - |
363 | QRegExpEngine(Qt::CaseSensitivity cs, bool greedyQuantifiers) | - |
364 | : cs(cs), greedyQuantifiers(greedyQuantifiers) { setup(); } | - |
365 | - | |
366 | QRegExpEngine(const QRegExpEngineKey &key); | - |
367 | ~QRegExpEngine(); | - |
368 | - | |
369 | bool isValid() const { return valid; } | - |
370 | const QString &errorString() const { return yyError; } | - |
371 | int captureCount() const { return officialncap; } | - |
372 | - | |
373 | int createState(QChar ch); | - |
374 | int createState(const QRegExpCharClass &cc); | - |
375 | - | |
376 | int createState(int bref); | - |
377 | - | |
378 | - | |
379 | void addCatTransitions(const QVector<int> &from, const QVector<int> &to); | - |
380 | - | |
381 | void addPlusTransitions(const QVector<int> &from, const QVector<int> &to, int atom); | - |
382 | - | |
383 | - | |
384 | - | |
385 | int anchorAlternation(int a, int b); | - |
386 | int anchorConcatenation(int a, int b); | - |
387 | - | |
388 | - | |
389 | - | |
390 | - | |
391 | void addAnchors(int from, int to, int a); | - |
392 | - | |
393 | - | |
394 | void heuristicallyChooseHeuristic(); | - |
395 | - | |
396 | - | |
397 | - | |
398 | void dump() const; | - |
399 | - | |
400 | - | |
401 | QAtomicInt ref; | - |
402 | - | |
403 | private: | - |
404 | enum { CharClassBit = 0x10000, BackRefBit = 0x20000 }; | - |
405 | enum { InitialState = 0, FinalState = 1 }; | - |
406 | - | |
407 | void setup(); | - |
408 | int setupState(int match); | - |
409 | - | |
410 | - | |
411 | - | |
412 | - | |
413 | - | |
414 | enum { MaxLookaheads = 13, MaxBackRefs = 14 }; | - |
415 | enum { Anchor_Dollar = 0x00000001, Anchor_Caret = 0x00000002, Anchor_Word = 0x00000004, | - |
416 | Anchor_NonWord = 0x00000008, Anchor_FirstLookahead = 0x00000010, | - |
417 | Anchor_BackRef1Empty = Anchor_FirstLookahead << MaxLookaheads, | - |
418 | Anchor_BackRef0Empty = Anchor_BackRef1Empty >> 1, | - |
419 | Anchor_Alternation = unsigned(Anchor_BackRef1Empty) << MaxBackRefs, | - |
420 | - | |
421 | Anchor_LookaheadMask = (Anchor_FirstLookahead - 1) ^ | - |
422 | ((Anchor_FirstLookahead << MaxLookaheads) - 1) }; | - |
423 | - | |
424 | int startAtom(bool officialCapture); | - |
425 | void finishAtom(int atom, bool needCapture); | - |
426 | - | |
427 | - | |
428 | - | |
429 | int addLookahead(QRegExpEngine *eng, bool negative); | - |
430 | - | |
431 | - | |
432 | - | |
433 | bool goodStringMatch(QRegExpMatchState &matchState) const; | - |
434 | bool badCharMatch(QRegExpMatchState &matchState) const; | - |
435 | - | |
436 | - | |
437 | - | |
438 | - | |
439 | QVector<QRegExpAutomatonState> s; | - |
440 | - | |
441 | QVector<QRegExpAtom> f; | - |
442 | int nf; | - |
443 | int cf; | - |
444 | QVector<int> captureForOfficialCapture; | - |
445 | - | |
446 | int officialncap; | - |
447 | int ncap; | - |
448 | - | |
449 | QVector<QRegExpCharClass> cl; | - |
450 | - | |
451 | - | |
452 | QVector<QRegExpLookahead *> ahead; | - |
453 | - | |
454 | - | |
455 | QVector<QRegExpAnchorAlternation> aa; | - |
456 | - | |
457 | - | |
458 | bool caretAnchored; | - |
459 | bool trivial; | - |
460 | - | |
461 | bool valid; | - |
462 | Qt::CaseSensitivity cs; | - |
463 | bool greedyQuantifiers; | - |
464 | bool xmlSchemaExtensions; | - |
465 | - | |
466 | int nbrefs; | - |
467 | - | |
468 | - | |
469 | - | |
470 | bool useGoodStringHeuristic; | - |
471 | - | |
472 | int goodEarlyStart; | - |
473 | int goodLateStart; | - |
474 | QString goodStr; | - |
475 | - | |
476 | int minl; | - |
477 | QVector<int> occ1; | - |
478 | class Box | - |
479 | { | - |
480 | public: | - |
481 | Box(QRegExpEngine *engine); | - |
482 | Box(const Box &b) { operator=(b); } | - |
483 | - | |
484 | Box &operator=(const Box &b); | - |
485 | - | |
486 | void clear() { operator=(Box(eng)); } | - |
487 | void set(QChar ch); | - |
488 | void set(const QRegExpCharClass &cc); | - |
489 | - | |
490 | void set(int bref); | - |
491 | - | |
492 | - | |
493 | void cat(const Box &b); | - |
494 | void orx(const Box &b); | - |
495 | void plus(int atom); | - |
496 | void opt(); | - |
497 | void catAnchor(int a); | - |
498 | - | |
499 | void setupHeuristics(); | - |
500 | - | |
501 | - | |
502 | - | |
503 | void dump() const; | - |
504 | - | |
505 | - | |
506 | private: | - |
507 | void addAnchorsToEngine(const Box &to) const; | - |
508 | - | |
509 | QRegExpEngine *eng; | - |
510 | QVector<int> ls; | - |
511 | QVector<int> rs; | - |
512 | QMap<int, int> lanchors; | - |
513 | QMap<int, int> ranchors; | - |
514 | int skipanchors; | - |
515 | - | |
516 | - | |
517 | int earlyStart; | - |
518 | int lateStart; | - |
519 | QString str; | - |
520 | QString leftStr; | - |
521 | QString rightStr; | - |
522 | int maxl; | - |
523 | - | |
524 | - | |
525 | int minl; | - |
526 | - | |
527 | QVector<int> occ1; | - |
528 | - | |
529 | }; | - |
530 | - | |
531 | friend class Box; | - |
532 | - | |
533 | - | |
534 | - | |
535 | - | |
536 | enum { Tok_Eos, Tok_Dollar, Tok_LeftParen, Tok_MagicLeftParen, Tok_PosLookahead, | - |
537 | Tok_NegLookahead, Tok_RightParen, Tok_CharClass, Tok_Caret, Tok_Quantifier, Tok_Bar, | - |
538 | Tok_Word, Tok_NonWord, Tok_Char = 0x10000, Tok_BackRef = 0x20000 }; | - |
539 | int getChar(); | - |
540 | int getEscape(); | - |
541 | - | |
542 | int getRep(int def); | - |
543 | - | |
544 | - | |
545 | void skipChars(int n); | - |
546 | - | |
547 | void error(const char *msg); | - |
548 | void startTokenizer(const QChar *rx, int len); | - |
549 | int getToken(); | - |
550 | - | |
551 | const QChar *yyIn; | - |
552 | int yyPos0; | - |
553 | int yyPos; | - |
554 | int yyLen; | - |
555 | int yyCh; | - |
556 | QScopedPointer<QRegExpCharClass> yyCharClass; | - |
557 | int yyMinRep; | - |
558 | int yyMaxRep; | - |
559 | QString yyError; | - |
560 | - | |
561 | - | |
562 | - | |
563 | - | |
564 | int parse(const QChar *rx, int len); | - |
565 | void parseAtom(Box *box); | - |
566 | void parseFactor(Box *box); | - |
567 | void parseTerm(Box *box); | - |
568 | void parseExpression(Box *box); | - |
569 | - | |
570 | int yyTok; | - |
571 | bool yyMayCapture; | - |
572 | - | |
573 | friend struct QRegExpMatchState; | - |
574 | }; | - |
575 | - | |
576 | - | |
577 | - | |
578 | - | |
579 | - | |
580 | - | |
581 | struct QRegExpLookahead | - |
582 | { | - |
583 | QRegExpEngine *eng; | - |
584 | bool neg; | - |
585 | - | |
586 | inline QRegExpLookahead(QRegExpEngine *eng0, bool neg0) | - |
587 | : eng(eng0), neg(neg0) { } | - |
588 | inline ~QRegExpLookahead() { delete eng; } | - |
589 | }; | - |
590 | __attribute__((visibility("default"))) QString qt_regexp_toCanonical(const QString &pattern, QRegExp::PatternSyntax patternSyntax) | - |
591 | { | - |
592 | switch (patternSyntax) { | - |
593 | - | |
594 | case QRegExp::Wildcard: | - |
595 | return wc2rx(pattern, false); | - |
596 | case QRegExp::WildcardUnix: | - |
597 | return wc2rx(pattern, true); | - |
598 | - | |
599 | case QRegExp::FixedString: | - |
600 | return QRegExp::escape(pattern); | - |
601 | case QRegExp::W3CXmlSchema11: | - |
602 | default: | - |
603 | return pattern; | - |
604 | } | - |
605 | } | - |
606 | - | |
607 | QRegExpEngine::QRegExpEngine(const QRegExpEngineKey &key) | - |
608 | : cs(key.cs), greedyQuantifiers(key.patternSyntax == QRegExp::RegExp2), | - |
609 | xmlSchemaExtensions(key.patternSyntax == QRegExp::W3CXmlSchema11) | - |
610 | { | - |
611 | setup(); | - |
612 | - | |
613 | QString rx = qt_regexp_toCanonical(key.pattern, key.patternSyntax); | - |
614 | - | |
615 | valid = (parse(rx.unicode(), rx.length()) == rx.length()); | - |
616 | if (!valid) { | - |
617 | - | |
618 | trivial = false; | - |
619 | - | |
620 | error("missing left delim"); | - |
621 | } | - |
622 | } | - |
623 | - | |
624 | QRegExpEngine::~QRegExpEngine() | - |
625 | { | - |
626 | - | |
627 | qDeleteAll(ahead); | - |
628 | - | |
629 | } | - |
630 | - | |
631 | void QRegExpMatchState::prepareForMatch(QRegExpEngine *eng) | - |
632 | { | - |
633 | - | |
634 | - | |
635 | - | |
636 | - | |
637 | int ns = eng->s.size(); | - |
638 | int ncap = eng->ncap; | - |
639 | - | |
640 | int newSlideTabSize = qMax(eng->minl + 1, 16); | - |
641 | - | |
642 | - | |
643 | - | |
644 | int numCaptures = eng->captureCount(); | - |
645 | int newCapturedSize = 2 + 2 * numCaptures; | - |
646 | bigArray = q_check_ptr((int *)realloc(bigArray, ((3 + 4 * ncap) * ns + 4 * ncap + newSlideTabSize + newCapturedSize)*sizeof(int))); | - |
647 | - | |
648 | - | |
649 | - | |
650 | - | |
651 | slideTabSize = newSlideTabSize; | - |
652 | capturedSize = newCapturedSize; | - |
653 | inNextStack = bigArray; | - |
654 | memset(inNextStack, -1, ns * sizeof(int)); | - |
655 | curStack = inNextStack + ns; | - |
656 | nextStack = inNextStack + 2 * ns; | - |
657 | - | |
658 | curCapBegin = inNextStack + 3 * ns; | - |
659 | nextCapBegin = curCapBegin + ncap * ns; | - |
660 | curCapEnd = curCapBegin + 2 * ncap * ns; | - |
661 | nextCapEnd = curCapBegin + 3 * ncap * ns; | - |
662 | - | |
663 | tempCapBegin = curCapBegin + 4 * ncap * ns; | - |
664 | tempCapEnd = tempCapBegin + ncap; | - |
665 | capBegin = tempCapBegin + 2 * ncap; | - |
666 | capEnd = tempCapBegin + 3 * ncap; | - |
667 | - | |
668 | slideTab = tempCapBegin + 4 * ncap; | - |
669 | captured = slideTab + slideTabSize; | - |
670 | memset(captured, -1, capturedSize*sizeof(int)); | - |
671 | this->eng = eng; | - |
672 | } | - |
673 | - | |
674 | - | |
675 | - | |
676 | - | |
677 | - | |
678 | void QRegExpMatchState::match(const QChar *str0, int len0, int pos0, | - |
679 | bool minimal0, bool oneTest, int caretIndex) | - |
680 | { | - |
681 | bool matched = false; | - |
682 | QChar char_null; | - |
683 | - | |
684 | - | |
685 | if (eng->trivial && !oneTest) { | - |
686 | pos = qFindString(str0, len0, pos0, eng->goodStr.unicode(), eng->goodStr.length(), eng->cs); | - |
687 | matchLen = eng->goodStr.length(); | - |
688 | matched = (pos != -1); | - |
689 | } else | - |
690 | - | |
691 | { | - |
692 | in = str0; | - |
693 | if (in == 0) | - |
694 | in = &char_null; | - |
695 | pos = pos0; | - |
696 | caretPos = caretIndex; | - |
697 | len = len0; | - |
698 | minimal = minimal0; | - |
699 | matchLen = 0; | - |
700 | oneTestMatchedLen = 0; | - |
701 | - | |
702 | if (eng->valid && pos >= 0 && pos <= len) { | - |
703 | - | |
704 | if (oneTest) { | - |
705 | matched = matchHere(); | - |
706 | } else { | - |
707 | if (pos <= len - eng->minl) { | - |
708 | if (eng->caretAnchored) { | - |
709 | matched = matchHere(); | - |
710 | } else if (eng->useGoodStringHeuristic) { | - |
711 | matched = eng->goodStringMatch(*this); | - |
712 | } else { | - |
713 | matched = eng->badCharMatch(*this); | - |
714 | } | - |
715 | } | - |
716 | } | - |
717 | - | |
718 | - | |
719 | - | |
720 | } | - |
721 | } | - |
722 | - | |
723 | if (matched) { | - |
724 | int *c = captured; | - |
725 | *c++ = pos; | - |
726 | *c++ = matchLen; | - |
727 | - | |
728 | int numCaptures = (capturedSize - 2) >> 1; | - |
729 | - | |
730 | for (int i = 0; i < numCaptures; ++i) { | - |
731 | int j = eng->captureForOfficialCapture.at(i); | - |
732 | if (capBegin[j] != EmptyCapture) { | - |
733 | int len = capEnd[j] - capBegin[j]; | - |
734 | *c++ = (len > 0) ? pos + capBegin[j] : 0; | - |
735 | *c++ = len; | - |
736 | } else { | - |
737 | *c++ = -1; | - |
738 | *c++ = -1; | - |
739 | } | - |
740 | } | - |
741 | - | |
742 | } else { | - |
743 | - | |
744 | memset(captured, -1, capturedSize * sizeof(int)); | - |
745 | } | - |
746 | } | - |
747 | - | |
748 | - | |
749 | - | |
750 | - | |
751 | - | |
752 | - | |
753 | int QRegExpEngine::createState(QChar ch) | - |
754 | { | - |
755 | return setupState(ch.unicode()); | - |
756 | } | - |
757 | - | |
758 | int QRegExpEngine::createState(const QRegExpCharClass &cc) | - |
759 | { | - |
760 | - | |
761 | int n = cl.size(); | - |
762 | cl += QRegExpCharClass(cc); | - |
763 | return setupState(CharClassBit | n); | - |
764 | - | |
765 | - | |
766 | - | |
767 | - | |
768 | } | - |
769 | - | |
770 | - | |
771 | int QRegExpEngine::createState(int bref) | - |
772 | { | - |
773 | if (bref > nbrefs) { | - |
774 | nbrefs = bref; | - |
775 | if (nbrefs > MaxBackRefs) { | - |
776 | error("met internal limit"); | - |
777 | return 0; | - |
778 | } | - |
779 | } | - |
780 | return setupState(BackRefBit | bref); | - |
781 | } | - |
782 | void QRegExpEngine::addCatTransitions(const QVector<int> &from, const QVector<int> &to) | - |
783 | { | - |
784 | for (int i = 0; i < from.size(); i++) | - |
785 | mergeInto(&s[from.at(i)].outs, to); | - |
786 | } | - |
787 | - | |
788 | - | |
789 | void QRegExpEngine::addPlusTransitions(const QVector<int> &from, const QVector<int> &to, int atom) | - |
790 | { | - |
791 | for (int i = 0; i < from.size(); i++) { | - |
792 | QRegExpAutomatonState &st = s[from.at(i)]; | - |
793 | const QVector<int> oldOuts = st.outs; | - |
794 | mergeInto(&st.outs, to); | - |
795 | if (f.at(atom).capture != QRegExpAtom::NoCapture) { | - |
796 | for (int j = 0; j < to.size(); j++) { | - |
797 | - | |
798 | if (!st.reenter.contains(to.at(j)) && | - |
799 | !std::binary_search(oldOuts.constBegin(), oldOuts.constEnd(), to.at(j))) | - |
800 | st.reenter.insert(to.at(j), atom); | - |
801 | } | - |
802 | } | - |
803 | } | - |
804 | } | - |
805 | - | |
806 | - | |
807 | - | |
808 | - | |
809 | - | |
810 | - | |
811 | int QRegExpEngine::anchorAlternation(int a, int b) | - |
812 | { | - |
813 | if (((a & b) == a || (a & b) == b) && ((a | b) & Anchor_Alternation) == 0) | - |
814 | return a & b; | - |
815 | - | |
816 | int n = aa.size(); | - |
817 | - | |
818 | if (n > 0 && aa.at(n - 1).a == a && aa.at(n - 1).b == b) | - |
819 | return Anchor_Alternation | (n - 1); | - |
820 | - | |
821 | - | |
822 | QRegExpAnchorAlternation element = {a, b}; | - |
823 | aa.append(element); | - |
824 | return Anchor_Alternation | n; | - |
825 | } | - |
826 | - | |
827 | - | |
828 | - | |
829 | - | |
830 | int QRegExpEngine::anchorConcatenation(int a, int b) | - |
831 | { | - |
832 | if (((a | b) & Anchor_Alternation) == 0) | - |
833 | return a | b; | - |
834 | if ((b & Anchor_Alternation) != 0) | - |
835 | qSwap(a, b); | - |
836 | - | |
837 | int aprime = anchorConcatenation(aa.at(a ^ Anchor_Alternation).a, b); | - |
838 | int bprime = anchorConcatenation(aa.at(a ^ Anchor_Alternation).b, b); | - |
839 | return anchorAlternation(aprime, bprime); | - |
840 | } | - |
841 | - | |
842 | - | |
843 | - | |
844 | - | |
845 | - | |
846 | - | |
847 | void QRegExpEngine::addAnchors(int from, int to, int a) | - |
848 | { | - |
849 | QRegExpAutomatonState &st = s[from]; | - |
850 | if (st.anchors.contains(to)) | - |
851 | a = anchorAlternation(st.anchors.value(to), a); | - |
852 | st.anchors.insert(to, a); | - |
853 | } | - |
854 | void QRegExpEngine::heuristicallyChooseHeuristic() | - |
855 | { | - |
856 | if (minl == 0) { | - |
857 | useGoodStringHeuristic = false; | - |
858 | } else if (trivial) { | - |
859 | useGoodStringHeuristic = true; | - |
860 | } else { | - |
861 | - | |
862 | - | |
863 | - | |
864 | - | |
865 | - | |
866 | int goodStringScore = (64 * goodStr.length() / minl) - | - |
867 | (goodLateStart - goodEarlyStart); | - |
868 | - | |
869 | - | |
870 | - | |
871 | - | |
872 | int badCharScore = 0; | - |
873 | int step = qMax(1, NumBadChars / 32); | - |
874 | for (int i = 1; i < NumBadChars; i += step) { | - |
875 | if (occ1.at(i) == NoOccurrence) | - |
876 | badCharScore += minl; | - |
877 | else | - |
878 | badCharScore += occ1.at(i); | - |
879 | } | - |
880 | badCharScore /= minl; | - |
881 | useGoodStringHeuristic = (goodStringScore > badCharScore); | - |
882 | } | - |
883 | } | - |
884 | - | |
885 | - | |
886 | - | |
887 | void QRegExpEngine::dump() const | - |
888 | { | - |
889 | int i, j; | - |
890 | QMessageLogger(__FILE__, 16501656, __PRETTY_FUNCTION__).debug("Case %ssensitive engine", cs ? "" : "in"); | - |
891 | QMessageLogger(__FILE__, 16511657, __PRETTY_FUNCTION__).debug(" States"); | - |
892 | for (i = 0; i < s.size(); i++) { | - |
893 | QMessageLogger(__FILE__, 16531659, __PRETTY_FUNCTION__).debug(" %d%s", i, i == InitialState ? " (initial)" : i == FinalState ? " (final)" : ""); | - |
894 | - | |
895 | if (nf > 0) | - |
896 | QMessageLogger(__FILE__, 16561662, __PRETTY_FUNCTION__).debug(" in atom %d", s[i].atom); | - |
897 | - | |
898 | int m = s[i].match; | - |
899 | if ((m & CharClassBit) != 0) { | - |
900 | QMessageLogger(__FILE__, 16601666, __PRETTY_FUNCTION__).debug(" match character class %d", m ^ CharClassBit); | - |
901 | - | |
902 | cl[m ^ CharClassBit].dump(); | - |
903 | - | |
904 | - | |
905 | - | |
906 | } else if ((m & BackRefBit) != 0) { | - |
907 | QMessageLogger(__FILE__, 16671673, __PRETTY_FUNCTION__).debug(" match back-reference %d", m ^ BackRefBit); | - |
908 | } else if (m >= 0x20 && m <= 0x7e) { | - |
909 | QMessageLogger(__FILE__, 16691675, __PRETTY_FUNCTION__).debug(" match 0x%.4x (%c)", m, m); | - |
910 | } else { | - |
911 | QMessageLogger(__FILE__, 16711677, __PRETTY_FUNCTION__).debug(" match 0x%.4x", m); | - |
912 | } | - |
913 | for (j = 0; j < s[i].outs.size(); j++) { | - |
914 | int next = s[i].outs[j]; | - |
915 | QMessageLogger(__FILE__, 16751681, __PRETTY_FUNCTION__).debug(" -> %d", next); | - |
916 | if (s[i].reenter.contains(next)) | - |
917 | QMessageLogger(__FILE__, 16771683, __PRETTY_FUNCTION__).debug(" [reenter %d]", s[i].reenter[next]); | - |
918 | if (s[i].anchors.value(next) != 0) | - |
919 | QMessageLogger(__FILE__, 16791685, __PRETTY_FUNCTION__).debug(" [anchors 0x%.8x]", s[i].anchors[next]); | - |
920 | } | - |
921 | } | - |
922 | - | |
923 | if (nf > 0) { | - |
924 | QMessageLogger(__FILE__, 16841690, __PRETTY_FUNCTION__).debug(" Atom Parent Capture"); | - |
925 | for (i = 0; i < nf; i++) { | - |
926 | if (f[i].capture == QRegExpAtom::NoCapture) { | - |
927 | QMessageLogger(__FILE__, 16871693, __PRETTY_FUNCTION__).debug(" %6d %6d nil", i, f[i].parent); | - |
928 | } else { | - |
929 | int cap = f[i].capture; | - |
930 | bool official = captureForOfficialCapture.contains(cap); | - |
931 | QMessageLogger(__FILE__, 16911697, __PRETTY_FUNCTION__).debug(" %6d %6d %6d %s", i, f[i].parent, f[i].capture, | - |
932 | official ? "official" : ""); | - |
933 | } | - |
934 | } | - |
935 | } | - |
936 | - | |
937 | - | |
938 | for (i = 0; i < aa.size(); i++) | - |
939 | QMessageLogger(__FILE__, 16991705, __PRETTY_FUNCTION__).debug(" Anchor alternation 0x%.8x: 0x%.8x 0x%.9x", i, aa[i].a, aa[i].b); | - |
940 | - | |
941 | } | - |
942 | - | |
943 | - | |
944 | void QRegExpEngine::setup() | - |
945 | { | - |
946 | ref.store(1); | - |
947 | - | |
948 | f.resize(32); | - |
949 | nf = 0; | - |
950 | cf = -1; | - |
951 | - | |
952 | officialncap = 0; | - |
953 | ncap = 0; | - |
954 | - | |
955 | caretAnchored = true; | - |
956 | trivial = true; | - |
957 | - | |
958 | valid = false; | - |
959 | - | |
960 | nbrefs = 0; | - |
961 | - | |
962 | - | |
963 | useGoodStringHeuristic = true; | - |
964 | minl = 0; | - |
965 | occ1.fill(0, NumBadChars); | - |
966 | - | |
967 | } | - |
968 | - | |
969 | int QRegExpEngine::setupState(int match) | - |
970 | { | - |
971 | - | |
972 | s += QRegExpAutomatonState(cf, match); | - |
973 | - | |
974 | - | |
975 | - | |
976 | return s.size() - 1; | - |
977 | } | - |
978 | - | |
979 | - | |
980 | - | |
981 | - | |
982 | - | |
983 | - | |
984 | - | |
985 | int QRegExpEngine::startAtom(bool officialCapture) | - |
986 | { | - |
987 | if ((nf & (nf + 1)) == 0 && nf + 1 >= f.size()) | - |
988 | f.resize((nf + 1) << 1); | - |
989 | f[nf].parent = cf; | - |
990 | cf = nf++; | - |
991 | f[cf].capture = officialCapture ? QRegExpAtom::OfficialCapture : QRegExpAtom::NoCapture; | - |
992 | return cf; | - |
993 | } | - |
994 | - | |
995 | void QRegExpEngine::finishAtom(int atom, bool needCapture) | - |
996 | { | - |
997 | if (greedyQuantifiers && needCapture && f[atom].capture == QRegExpAtom::NoCapture) | - |
998 | f[atom].capture = QRegExpAtom::UnofficialCapture; | - |
999 | cf = f.at(atom).parent; | - |
1000 | } | - |
1001 | - | |
1002 | - | |
1003 | - | |
1004 | - | |
1005 | - | |
1006 | - | |
1007 | int QRegExpEngine::addLookahead(QRegExpEngine *eng, bool negative) | - |
1008 | { | - |
1009 | int n = ahead.size(); | - |
1010 | if (n == MaxLookaheads) { | - |
1011 | error("met internal limit"); | - |
1012 | return 0; | - |
1013 | } | - |
1014 | ahead += new QRegExpLookahead(eng, negative); | - |
1015 | return Anchor_FirstLookahead << n; | - |
1016 | } | - |
1017 | - | |
1018 | - | |
1019 | - | |
1020 | - | |
1021 | - | |
1022 | - | |
1023 | static bool isBetterCapture(int ncap, const int *begin1, const int *end1, const int *begin2, | - |
1024 | const int *end2) | - |
1025 | { | - |
1026 | for (int i = 0; i < ncap; i++) { | - |
1027 | int delta = begin2[i] - begin1[i]; | - |
1028 | if (delta == 0) | - |
1029 | delta = end1[i] - end2[i]; | - |
1030 | - | |
1031 | if (delta != 0) | - |
1032 | return delta > 0; | - |
1033 | } | - |
1034 | return false; | - |
1035 | } | - |
1036 | - | |
1037 | - | |
1038 | - | |
1039 | - | |
1040 | - | |
1041 | - | |
1042 | bool QRegExpMatchState::testAnchor(int i, int a, const int *capBegin) | - |
1043 | { | - |
1044 | int j; | - |
1045 | - | |
1046 | - | |
1047 | if ((a & QRegExpEngine::Anchor_Alternation) != 0) | - |
1048 | return testAnchor(i, eng->aa.at(a ^ QRegExpEngine::Anchor_Alternation).a, capBegin) | - |
1049 | || testAnchor(i, eng->aa.at(a ^ QRegExpEngine::Anchor_Alternation).b, capBegin); | - |
1050 | - | |
1051 | - | |
1052 | if ((a & QRegExpEngine::Anchor_Caret) != 0) { | - |
1053 | if (pos + i != caretPos) | - |
1054 | return false; | - |
1055 | } | - |
1056 | if ((a & QRegExpEngine::Anchor_Dollar) != 0) { | - |
1057 | if (pos + i != len) | - |
1058 | return false; | - |
1059 | } | - |
1060 | - | |
1061 | if ((a & (QRegExpEngine::Anchor_Word | QRegExpEngine::Anchor_NonWord)) != 0) { | - |
1062 | bool before = false; | - |
1063 | bool after = false; | - |
1064 | if (pos + i != 0) | - |
1065 | before = isWord(in[pos + i - 1]); | - |
1066 | if (pos + i != len) | - |
1067 | after = isWord(in[pos + i]); | - |
1068 | if ((a & QRegExpEngine::Anchor_Word) != 0 && (before == after)) | - |
1069 | return false; | - |
1070 | if ((a & QRegExpEngine::Anchor_NonWord) != 0 && (before != after)) | - |
1071 | return false; | - |
1072 | } | - |
1073 | - | |
1074 | - | |
1075 | if ((a & QRegExpEngine::Anchor_LookaheadMask) != 0) { | - |
1076 | const QVector<QRegExpLookahead *> &ahead = eng->ahead; | - |
1077 | for (j = 0; j < ahead.size(); j++) { | - |
1078 | if ((a & (QRegExpEngine::Anchor_FirstLookahead << j)) != 0) { | - |
1079 | QRegExpMatchState matchState; | - |
1080 | matchState.prepareForMatch(ahead[j]->eng); | - |
1081 | matchState.match(in + pos + i, len - pos - i, 0, | - |
1082 | true, true, caretPos - pos - i); | - |
1083 | if ((matchState.captured[0] == 0) == ahead[j]->neg) | - |
1084 | return false; | - |
1085 | } | - |
1086 | } | - |
1087 | } | - |
1088 | - | |
1089 | - | |
1090 | - | |
1091 | for (j = 0; j < eng->nbrefs; j++) { | - |
1092 | if ((a & (QRegExpEngine::Anchor_BackRef1Empty << j)) != 0) { | - |
1093 | int i = eng->captureForOfficialCapture.at(j); | - |
1094 | if (capBegin[i] != EmptyCapture) | - |
1095 | return false; | - |
1096 | } | - |
1097 | } | - |
1098 | - | |
1099 | - | |
1100 | return true; | - |
1101 | } | - |
1102 | bool QRegExpEngine::goodStringMatch(QRegExpMatchState &matchState) const | - |
1103 | { | - |
1104 | int k = matchState.pos + goodEarlyStart; | - |
1105 | QStringMatcher matcher(goodStr.unicode(), goodStr.length(), cs); | - |
1106 | while ((k = matcher.indexIn(matchState.in, matchState.len, k)) != -1) { | - |
1107 | int from = k - goodLateStart; | - |
1108 | int to = k - goodEarlyStart; | - |
1109 | if (from > matchState.pos) | - |
1110 | matchState.pos = from; | - |
1111 | - | |
1112 | while (matchState.pos <= to) { | - |
1113 | if (matchState.matchHere()) | - |
1114 | return true; | - |
1115 | ++matchState.pos; | - |
1116 | } | - |
1117 | ++k; | - |
1118 | } | - |
1119 | return false; | - |
1120 | } | - |
1121 | - | |
1122 | bool QRegExpEngine::badCharMatch(QRegExpMatchState &matchState) const | - |
1123 | { | - |
1124 | int slideHead = 0; | - |
1125 | int slideNext = 0; | - |
1126 | int i; | - |
1127 | int lastPos = matchState.len - minl; | - |
1128 | memset(matchState.slideTab, 0, matchState.slideTabSize * sizeof(int)); | - |
1129 | - | |
1130 | - | |
1131 | - | |
1132 | - | |
1133 | - | |
1134 | for (i = 0; i < minl; i++) { | - |
1135 | int sk = occ1[((matchState.in[matchState.pos + i]).unicode() % NumBadChars)]; | - |
1136 | if (sk == NoOccurrence) | - |
1137 | sk = i + 1; | - |
1138 | if (sk > 0) { | - |
1139 | int k = i + 1 - sk; | - |
1140 | if (k < 0) { | - |
1141 | sk = i + 1; | - |
1142 | k = 0; | - |
1143 | } | - |
1144 | if (sk > matchState.slideTab[k]) | - |
1145 | matchState.slideTab[k] = sk; | - |
1146 | } | - |
1147 | } | - |
1148 | - | |
1149 | if (matchState.pos > lastPos) | - |
1150 | return false; | - |
1151 | - | |
1152 | for (;;) { | - |
1153 | if (++slideNext >= matchState.slideTabSize) | - |
1154 | slideNext = 0; | - |
1155 | if (matchState.slideTab[slideHead] > 0) { | - |
1156 | if (matchState.slideTab[slideHead] - 1 > matchState.slideTab[slideNext]) | - |
1157 | matchState.slideTab[slideNext] = matchState.slideTab[slideHead] - 1; | - |
1158 | matchState.slideTab[slideHead] = 0; | - |
1159 | } else { | - |
1160 | if (matchState.matchHere()) | - |
1161 | return true; | - |
1162 | } | - |
1163 | - | |
1164 | if (matchState.pos == lastPos) | - |
1165 | break; | - |
1166 | - | |
1167 | - | |
1168 | - | |
1169 | - | |
1170 | - | |
1171 | int sk = occ1[((matchState.in[matchState.pos + minl]).unicode() % NumBadChars)]; | - |
1172 | if (sk == NoOccurrence) { | - |
1173 | matchState.slideTab[slideNext] = minl; | - |
1174 | } else if (sk > 0) { | - |
1175 | int k = slideNext + minl - sk; | - |
1176 | if (k >= matchState.slideTabSize) | - |
1177 | k -= matchState.slideTabSize; | - |
1178 | if (sk > matchState.slideTab[k]) | - |
1179 | matchState.slideTab[k] = sk; | - |
1180 | } | - |
1181 | slideHead = slideNext; | - |
1182 | ++matchState.pos; | - |
1183 | } | - |
1184 | return false; | - |
1185 | } | - |
1186 | bool QRegExpMatchState::matchHere() | - |
1187 | { | - |
1188 | int ncur = 1, nnext = 0; | - |
1189 | int i = 0, j, k, m; | - |
1190 | bool stop = false; | - |
1191 | - | |
1192 | matchLen = -1; | - |
1193 | oneTestMatchedLen = -1; | - |
1194 | curStack[0] = QRegExpEngine::InitialState; | - |
1195 | - | |
1196 | int ncap = eng->ncap; | - |
1197 | - | |
1198 | if (ncap > 0) { | - |
1199 | for (j = 0; j < ncap; j++) { | - |
1200 | curCapBegin[j] = EmptyCapture; | - |
1201 | curCapEnd[j] = EmptyCapture; | - |
1202 | } | - |
1203 | } | - |
1204 | - | |
1205 | - | |
1206 | - | |
1207 | while ((ncur > 0 || !sleeping.isEmpty()) && i <= len - pos && !stop) | - |
1208 | - | |
1209 | - | |
1210 | - | |
1211 | { | - |
1212 | int ch = (i < len - pos) ? in[pos + i].unicode() : 0; | - |
1213 | for (j = 0; j < ncur; j++) { | - |
1214 | int cur = curStack[j]; | - |
1215 | const QRegExpAutomatonState &scur = eng->s.at(cur); | - |
1216 | const QVector<int> &outs = scur.outs; | - |
1217 | for (k = 0; k < outs.size(); k++) { | - |
1218 | int next = outs.at(k); | - |
1219 | const QRegExpAutomatonState &snext = eng->s.at(next); | - |
1220 | bool inside = true; | - |
1221 | - | |
1222 | int needSomeSleep = 0; | - |
1223 | - | |
1224 | - | |
1225 | - | |
1226 | - | |
1227 | - | |
1228 | int a = scur.anchors.value(next); | - |
1229 | if (a != 0 && !testAnchor(i, a, curCapBegin + j * ncap)) | - |
1230 | inside = false; | - |
1231 | - | |
1232 | - | |
1233 | - | |
1234 | - | |
1235 | - | |
1236 | if (inside) { | - |
1237 | m = snext.match; | - |
1238 | if ((m & (QRegExpEngine::CharClassBit | QRegExpEngine::BackRefBit)) == 0) { | - |
1239 | if (eng->cs) | - |
1240 | inside = (m == ch); | - |
1241 | else | - |
1242 | inside = (QChar(m).toLower() == QChar(ch).toLower()); | - |
1243 | } else if (next == QRegExpEngine::FinalState) { | - |
1244 | matchLen = i; | - |
1245 | stop = minimal; | - |
1246 | inside = true; | - |
1247 | } else if ((m & QRegExpEngine::CharClassBit) != 0) { | - |
1248 | - | |
1249 | const QRegExpCharClass &cc = eng->cl.at(m ^ QRegExpEngine::CharClassBit); | - |
1250 | if (eng->cs) | - |
1251 | inside = cc.in(ch); | - |
1252 | else if (cc.negative()) | - |
1253 | inside = cc.in(QChar(ch).toLower()) && | - |
1254 | cc.in(QChar(ch).toUpper()); | - |
1255 | else | - |
1256 | inside = cc.in(QChar(ch).toLower()) || | - |
1257 | cc.in(QChar(ch).toUpper()); | - |
1258 | - | |
1259 | - | |
1260 | } else { | - |
1261 | int bref = m ^ QRegExpEngine::BackRefBit; | - |
1262 | int ell = j * ncap + eng->captureForOfficialCapture.at(bref - 1); | - |
1263 | - | |
1264 | inside = bref <= ncap && curCapBegin[ell] != EmptyCapture; | - |
1265 | if (inside) { | - |
1266 | if (eng->cs) | - |
1267 | inside = (in[pos + curCapBegin[ell]] == QChar(ch)); | - |
1268 | else | - |
1269 | inside = (in[pos + curCapBegin[ell]].toLower() | - |
1270 | == QChar(ch).toLower()); | - |
1271 | } | - |
1272 | - | |
1273 | if (inside) { | - |
1274 | int delta; | - |
1275 | if (curCapEnd[ell] == EmptyCapture) | - |
1276 | delta = i - curCapBegin[ell]; | - |
1277 | else | - |
1278 | delta = curCapEnd[ell] - curCapBegin[ell]; | - |
1279 | - | |
1280 | inside = (delta <= len - (pos + i)); | - |
1281 | if (inside && delta > 1) { | - |
1282 | int n = 1; | - |
1283 | if (eng->cs) { | - |
1284 | while (n < delta) { | - |
1285 | if (in[pos + curCapBegin[ell] + n] | - |
1286 | != in[pos + i + n]) | - |
1287 | break; | - |
1288 | ++n; | - |
1289 | } | - |
1290 | } else { | - |
1291 | while (n < delta) { | - |
1292 | QChar a = in[pos + curCapBegin[ell] + n]; | - |
1293 | QChar b = in[pos + i + n]; | - |
1294 | if (a.toLower() != b.toLower()) | - |
1295 | break; | - |
1296 | ++n; | - |
1297 | } | - |
1298 | } | - |
1299 | inside = (n == delta); | - |
1300 | if (inside) | - |
1301 | needSomeSleep = delta - 1; | - |
1302 | } | - |
1303 | } | - |
1304 | - | |
1305 | } | - |
1306 | } | - |
1307 | - | |
1308 | - | |
1309 | - | |
1310 | - | |
1311 | if (inside) { | - |
1312 | - | |
1313 | int *capBegin, *capEnd; | - |
1314 | - | |
1315 | - | |
1316 | - | |
1317 | - | |
1318 | - | |
1319 | if ((m = inNextStack[next]) == -1) { | - |
1320 | m = nnext++; | - |
1321 | nextStack[m] = next; | - |
1322 | inNextStack[next] = m; | - |
1323 | - | |
1324 | capBegin = nextCapBegin + m * ncap; | - |
1325 | capEnd = nextCapEnd + m * ncap; | - |
1326 | - | |
1327 | - | |
1328 | - | |
1329 | - | |
1330 | - | |
1331 | - | |
1332 | - | |
1333 | } else { | - |
1334 | capBegin = tempCapBegin; | - |
1335 | capEnd = tempCapEnd; | - |
1336 | - | |
1337 | } | - |
1338 | - | |
1339 | - | |
1340 | - | |
1341 | - | |
1342 | - | |
1343 | if (ncap > 0) { | - |
1344 | memcpy(capBegin, curCapBegin + j * ncap, ncap * sizeof(int)); | - |
1345 | memcpy(capEnd, curCapEnd + j * ncap, ncap * sizeof(int)); | - |
1346 | int c = scur.atom, n = snext.atom; | - |
1347 | int p = -1, q = -1; | - |
1348 | int cap; | - |
1349 | if ((q = scur.reenter.value(next)) != 0) { | - |
1350 | QBitArray b(eng->nf, false); | - |
1351 | b.setBit(q, true); | - |
1352 | for (int ell = q + 1; ell < eng->nf; ell++) { | - |
1353 | if (b.testBit(eng->f.at(ell).parent)) { | - |
1354 | b.setBit(ell, true); | - |
1355 | cap = eng->f.at(ell).capture; | - |
1356 | if (cap >= 0) { | - |
1357 | capBegin[cap] = EmptyCapture; | - |
1358 | capEnd[cap] = EmptyCapture; | - |
1359 | } | - |
1360 | } | - |
1361 | } | - |
1362 | p = eng->f.at(q).parent; | - |
1363 | } else { | - |
1364 | p = c; | - |
1365 | q = n; | - |
1366 | while (p != q) { | - |
1367 | if (p > q) { | - |
1368 | cap = eng->f.at(p).capture; | - |
1369 | if (cap >= 0) { | - |
1370 | if (capBegin[cap] == i) { | - |
1371 | capBegin[cap] = EmptyCapture; | - |
1372 | capEnd[cap] = EmptyCapture; | - |
1373 | } else { | - |
1374 | capEnd[cap] = i; | - |
1375 | } | - |
1376 | } | - |
1377 | p = eng->f.at(p).parent; | - |
1378 | } else { | - |
1379 | q = eng->f.at(q).parent; | - |
1380 | } | - |
1381 | } | - |
1382 | } | - |
1383 | - | |
1384 | - | |
1385 | - | |
1386 | - | |
1387 | - | |
1388 | - | |
1389 | - | |
1390 | while (n > p) { | - |
1391 | cap = eng->f.at(n).capture; | - |
1392 | if (cap >= 0) { | - |
1393 | capBegin[cap] = i; | - |
1394 | capEnd[cap] = EmptyCapture; | - |
1395 | } | - |
1396 | n = eng->f.at(n).parent; | - |
1397 | } | - |
1398 | - | |
1399 | - | |
1400 | - | |
1401 | - | |
1402 | - | |
1403 | if (capBegin == tempCapBegin && | - |
1404 | isBetterCapture(ncap, capBegin, capEnd, nextCapBegin + m * ncap, | - |
1405 | nextCapEnd + m * ncap)) { | - |
1406 | memcpy(nextCapBegin + m * ncap, capBegin, ncap * sizeof(int)); | - |
1407 | memcpy(nextCapEnd + m * ncap, capEnd, ncap * sizeof(int)); | - |
1408 | } | - |
1409 | } | - |
1410 | - | |
1411 | - | |
1412 | - | |
1413 | - | |
1414 | - | |
1415 | - | |
1416 | - | |
1417 | if (needSomeSleep > 0) { | - |
1418 | QVector<int> zzZ(2 + 2 * ncap); | - |
1419 | zzZ[0] = i + needSomeSleep; | - |
1420 | zzZ[1] = next; | - |
1421 | if (ncap > 0) { | - |
1422 | memcpy(zzZ.data() + 2, capBegin, ncap * sizeof(int)); | - |
1423 | memcpy(zzZ.data() + 2 + ncap, capEnd, ncap * sizeof(int)); | - |
1424 | } | - |
1425 | inNextStack[nextStack[--nnext]] = -1; | - |
1426 | sleeping.append(zzZ); | - |
1427 | } | - |
1428 | - | |
1429 | - | |
1430 | } | - |
1431 | } | - |
1432 | } | - |
1433 | - | |
1434 | - | |
1435 | - | |
1436 | - | |
1437 | - | |
1438 | if (ncap > 0 && (m = inNextStack[QRegExpEngine::FinalState]) != -1) { | - |
1439 | memcpy(capBegin, nextCapBegin + m * ncap, ncap * sizeof(int)); | - |
1440 | memcpy(capEnd, nextCapEnd + m * ncap, ncap * sizeof(int)); | - |
1441 | } | - |
1442 | - | |
1443 | - | |
1444 | - | |
1445 | - | |
1446 | j = 0; | - |
1447 | while (j < sleeping.count()) { | - |
1448 | if (sleeping.at(j)[0] == i) { | - |
1449 | const QVector<int> &zzZ = sleeping.at(j); | - |
1450 | int next = zzZ[1]; | - |
1451 | const int *capBegin = zzZ.data() + 2; | - |
1452 | const int *capEnd = zzZ.data() + 2 + ncap; | - |
1453 | bool copyOver = true; | - |
1454 | - | |
1455 | if ((m = inNextStack[next]) == -1) { | - |
1456 | m = nnext++; | - |
1457 | nextStack[m] = next; | - |
1458 | inNextStack[next] = m; | - |
1459 | } else { | - |
1460 | copyOver = isBetterCapture(ncap, nextCapBegin + m * ncap, nextCapEnd + m * ncap, | - |
1461 | capBegin, capEnd); | - |
1462 | } | - |
1463 | if (copyOver) { | - |
1464 | memcpy(nextCapBegin + m * ncap, capBegin, ncap * sizeof(int)); | - |
1465 | memcpy(nextCapEnd + m * ncap, capEnd, ncap * sizeof(int)); | - |
1466 | } | - |
1467 | - | |
1468 | sleeping.removeAt(j); | - |
1469 | } else { | - |
1470 | ++j; | - |
1471 | } | - |
1472 | } | - |
1473 | - | |
1474 | - | |
1475 | for (j = 0; j < nnext; j++) | - |
1476 | inNextStack[nextStack[j]] = -1; | - |
1477 | - | |
1478 | - | |
1479 | if (nnext == 1 && nextStack[0] == QRegExpEngine::FinalState | - |
1480 | - | |
1481 | && sleeping.isEmpty() | - |
1482 | - | |
1483 | ) | - |
1484 | stop = true; | - |
1485 | - | |
1486 | qSwap(curStack, nextStack); | - |
1487 | - | |
1488 | qSwap(curCapBegin, nextCapBegin); | - |
1489 | qSwap(curCapEnd, nextCapEnd); | - |
1490 | - | |
1491 | ncur = nnext; | - |
1492 | nnext = 0; | - |
1493 | ++i; | - |
1494 | } | - |
1495 | - | |
1496 | - | |
1497 | - | |
1498 | - | |
1499 | - | |
1500 | - | |
1501 | if (!sleeping.isEmpty()) | - |
1502 | sleeping.clear(); | - |
1503 | - | |
1504 | - | |
1505 | oneTestMatchedLen = i - 1; | - |
1506 | return (matchLen >= 0); | - |
1507 | } | - |
1508 | - | |
1509 | - | |
1510 | - | |
1511 | QRegExpCharClass::QRegExpCharClass() | - |
1512 | : c(0), n(false) | - |
1513 | { | - |
1514 | - | |
1515 | occ1.fill(NoOccurrence, NumBadChars); | - |
1516 | - | |
1517 | } | - |
1518 | - | |
1519 | void QRegExpCharClass::clear() | - |
1520 | { | - |
1521 | c = 0; | - |
1522 | r.resize(0);clear(); | - |
1523 | n = false; | - |
1524 | } executed 34392 times by 116 tests: end of block Executed by:
| 34392 |
1525 | - | |
1526 | void QRegExpCharClass::setNegative(bool negative) | - |
1527 | { | - |
1528 | n = negative; | - |
1529 | - | |
1530 | occ1.fill(0, NumBadChars); | - |
1531 | - | |
1532 | } | - |
1533 | - | |
1534 | void QRegExpCharClass::addCategories(uint cats) | - |
1535 | { | - |
1536 | static const int all_cats = (1 << (QChar::Mark_NonSpacing)) | | - |
1537 | (1 << (QChar::Mark_SpacingCombining)) | | - |
1538 | (1 << (QChar::Mark_Enclosing)) | | - |
1539 | (1 << (QChar::Number_DecimalDigit)) | | - |
1540 | (1 << (QChar::Number_Letter)) | | - |
1541 | (1 << (QChar::Number_Other)) | | - |
1542 | (1 << (QChar::Separator_Space)) | | - |
1543 | (1 << (QChar::Separator_Line)) | | - |
1544 | (1 << (QChar::Separator_Paragraph)) | | - |
1545 | (1 << (QChar::Other_Control)) | | - |
1546 | (1 << (QChar::Other_Format)) | | - |
1547 | (1 << (QChar::Other_Surrogate)) | | - |
1548 | (1 << (QChar::Other_PrivateUse)) | | - |
1549 | (1 << (QChar::Other_NotAssigned)) | | - |
1550 | (1 << (QChar::Letter_Uppercase)) | | - |
1551 | (1 << (QChar::Letter_Lowercase)) | | - |
1552 | (1 << (QChar::Letter_Titlecase)) | | - |
1553 | (1 << (QChar::Letter_Modifier)) | | - |
1554 | (1 << (QChar::Letter_Other)) | | - |
1555 | (1 << (QChar::Punctuation_Connector)) | | - |
1556 | (1 << (QChar::Punctuation_Dash)) | | - |
1557 | (1 << (QChar::Punctuation_Open)) | | - |
1558 | (1 << (QChar::Punctuation_Close)) | | - |
1559 | (1 << (QChar::Punctuation_InitialQuote)) | | - |
1560 | (1 << (QChar::Punctuation_FinalQuote)) | | - |
1561 | (1 << (QChar::Punctuation_Other)) | | - |
1562 | (1 << (QChar::Symbol_Math)) | | - |
1563 | (1 << (QChar::Symbol_Currency)) | | - |
1564 | (1 << (QChar::Symbol_Modifier)) | | - |
1565 | (1 << (QChar::Symbol_Other)); | - |
1566 | c |= (all_cats & cats); | - |
1567 | - | |
1568 | occ1.fill(0, NumBadChars); | - |
1569 | - | |
1570 | } | - |
1571 | - | |
1572 | void QRegExpCharClass::addRange(ushort from, ushort to) | - |
1573 | { | - |
1574 | if (from > to) | - |
1575 | qSwap(from, to); | - |
1576 | int m = r.size(); | - |
1577 | r.resize(m + 1); | - |
1578 | r[m].from = from; | - |
1579 | r[m].len = to - from + 1; | - |
1580 | - | |
1581 | - | |
1582 | int i; | - |
1583 | - | |
1584 | if (to - from < NumBadChars) { | - |
1585 | if (from % NumBadChars <= to % NumBadChars) { | - |
1586 | for (i = from % NumBadChars; i <= to % NumBadChars; i++) | - |
1587 | occ1[i] = 0; | - |
1588 | } else { | - |
1589 | for (i = 0; i <= to % NumBadChars; i++) | - |
1590 | occ1[i] = 0; | - |
1591 | for (i = from % NumBadChars; i < NumBadChars; i++) | - |
1592 | occ1[i] = 0; | - |
1593 | } | - |
1594 | } else { | - |
1595 | occ1.fill(0, NumBadChars); | - |
1596 | } | - |
1597 | - | |
1598 | } | - |
1599 | - | |
1600 | bool QRegExpCharClass::in(QChar ch) const | - |
1601 | { | - |
1602 | - | |
1603 | if (occ1.at(((ch).unicode() % NumBadChars)) == NoOccurrence) | - |
1604 | return n; | - |
1605 | - | |
1606 | - | |
1607 | if (c != 0 && (c & (1 << (ch.category()))) != 0) | - |
1608 | return !n; | - |
1609 | - | |
1610 | const int uc = ch.unicode(); | - |
1611 | int size = r.size(); | - |
1612 | - | |
1613 | for (int i = 0; i < size; ++i) { | - |
1614 | const QRegExpCharClassRange &range = r.at(i); | - |
1615 | if (uint(uc - range.from) < uint(r.at(i).len)) | - |
1616 | return !n; | - |
1617 | } | - |
1618 | return n; | - |
1619 | } | - |
1620 | - | |
1621 | - | |
1622 | void QRegExpCharClass::dump() const | - |
1623 | { | - |
1624 | int i; | - |
1625 | QMessageLogger(__FILE__, 24352441, __PRETTY_FUNCTION__).debug(" %stive character class", n ? "nega" : "posi"); | - |
1626 | - | |
1627 | if (c != 0) | - |
1628 | QMessageLogger(__FILE__, 24382444, __PRETTY_FUNCTION__).debug(" categories 0x%.8x", c); | - |
1629 | - | |
1630 | for (i = 0; i < r.size(); i++) | - |
1631 | QMessageLogger(__FILE__, 24412447, __PRETTY_FUNCTION__).debug(" 0x%.4x through 0x%.4x", r[i].from, r[i].from + r[i].len - 1); | - |
1632 | } | - |
1633 | - | |
1634 | - | |
1635 | - | |
1636 | QRegExpEngine::Box::Box(QRegExpEngine *engine) | - |
1637 | : eng(engine), skipanchors(0) | - |
1638 | - | |
1639 | , earlyStart(0), lateStart(0), maxl(0) | - |
1640 | - | |
1641 | { | - |
1642 | - | |
1643 | occ1.fill(NoOccurrence, NumBadChars); | - |
1644 | - | |
1645 | minl = 0; | - |
1646 | } | - |
1647 | - | |
1648 | QRegExpEngine::Box &QRegExpEngine::Box::operator=(const Box &b) | - |
1649 | { | - |
1650 | eng = b.eng; | - |
1651 | ls = b.ls; | - |
1652 | rs = b.rs; | - |
1653 | lanchors = b.lanchors; | - |
1654 | ranchors = b.ranchors; | - |
1655 | skipanchors = b.skipanchors; | - |
1656 | - | |
1657 | earlyStart = b.earlyStart; | - |
1658 | lateStart = b.lateStart; | - |
1659 | str = b.str; | - |
1660 | leftStr = b.leftStr; | - |
1661 | rightStr = b.rightStr; | - |
1662 | maxl = b.maxl; | - |
1663 | occ1 = b.occ1; | - |
1664 | - | |
1665 | minl = b.minl; | - |
1666 | return *this; | - |
1667 | } | - |
1668 | - | |
1669 | void QRegExpEngine::Box::set(QChar ch) | - |
1670 | { | - |
1671 | ls.resize(1); | - |
1672 | ls[0] = eng->createState(ch); | - |
1673 | rs = ls; | - |
1674 | - | |
1675 | str = ch; | - |
1676 | leftStr = ch; | - |
1677 | rightStr = ch; | - |
1678 | maxl = 1; | - |
1679 | occ1[((ch).unicode() % NumBadChars)] = 0; | - |
1680 | - | |
1681 | minl = 1; | - |
1682 | } | - |
1683 | - | |
1684 | void QRegExpEngine::Box::set(const QRegExpCharClass &cc) | - |
1685 | { | - |
1686 | ls.resize(1); | - |
1687 | ls[0] = eng->createState(cc); | - |
1688 | rs = ls; | - |
1689 | - | |
1690 | maxl = 1; | - |
1691 | occ1 = cc.firstOccurrence(); | - |
1692 | - | |
1693 | minl = 1; | - |
1694 | } | - |
1695 | - | |
1696 | - | |
1697 | void QRegExpEngine::Box::set(int bref) | - |
1698 | { | - |
1699 | ls.resize(1); | - |
1700 | ls[0] = eng->createState(bref); | - |
1701 | rs = ls; | - |
1702 | if (bref >= 1 && bref <= MaxBackRefs) | - |
1703 | skipanchors = Anchor_BackRef0Empty << bref; | - |
1704 | - | |
1705 | maxl = InftyLen; | - |
1706 | - | |
1707 | minl = 0; | - |
1708 | } | - |
1709 | - | |
1710 | - | |
1711 | void QRegExpEngine::Box::cat(const Box &b) | - |
1712 | { | - |
1713 | eng->addCatTransitions(rs, b.ls); | - |
1714 | addAnchorsToEngine(b); | - |
1715 | if (minl == 0) { | - |
1716 | lanchors.unite(b.lanchors); | - |
1717 | if (skipanchors != 0) { | - |
1718 | for (int i = 0; i < b.ls.size(); i++) { | - |
1719 | int a = eng->anchorConcatenation(lanchors.value(b.ls.at(i), 0), skipanchors); | - |
1720 | lanchors.insert(b.ls.at(i), a); | - |
1721 | } | - |
1722 | } | - |
1723 | mergeInto(&ls, b.ls); | - |
1724 | } | - |
1725 | if (b.minl == 0) { | - |
1726 | ranchors.unite(b.ranchors); | - |
1727 | if (b.skipanchors != 0) { | - |
1728 | for (int i = 0; i < rs.size(); i++) { | - |
1729 | int a = eng->anchorConcatenation(ranchors.value(rs.at(i), 0), b.skipanchors); | - |
1730 | ranchors.insert(rs.at(i), a); | - |
1731 | } | - |
1732 | } | - |
1733 | mergeInto(&rs, b.rs); | - |
1734 | } else { | - |
1735 | ranchors = b.ranchors; | - |
1736 | rs = b.rs; | - |
1737 | } | - |
1738 | - | |
1739 | - | |
1740 | if (maxl != InftyLen) { | - |
1741 | if (rightStr.length() + b.leftStr.length() > | - |
1742 | qMax(str.length(), b.str.length())) { | - |
1743 | earlyStart = minl - rightStr.length(); | - |
1744 | lateStart = maxl - rightStr.length(); | - |
1745 | str = rightStr + b.leftStr; | - |
1746 | } else if (b.str.length() > str.length()) { | - |
1747 | earlyStart = minl + b.earlyStart; | - |
1748 | lateStart = maxl + b.lateStart; | - |
1749 | str = b.str; | - |
1750 | } | - |
1751 | } | - |
1752 | - | |
1753 | if (leftStr.length() == maxl) | - |
1754 | leftStr += b.leftStr; | - |
1755 | - | |
1756 | if (b.rightStr.length() == b.maxl) { | - |
1757 | rightStr += b.rightStr; | - |
1758 | } else { | - |
1759 | rightStr = b.rightStr; | - |
1760 | } | - |
1761 | - | |
1762 | if (maxl == InftyLen || b.maxl == InftyLen) { | - |
1763 | maxl = InftyLen; | - |
1764 | } else { | - |
1765 | maxl += b.maxl; | - |
1766 | } | - |
1767 | - | |
1768 | for (int i = 0; i < NumBadChars; i++) { | - |
1769 | if (b.occ1.at(i) != NoOccurrence && minl + b.occ1.at(i) < occ1.at(i)) | - |
1770 | occ1[i] = minl + b.occ1.at(i); | - |
1771 | } | - |
1772 | - | |
1773 | - | |
1774 | minl += b.minl; | - |
1775 | if (minl == 0) | - |
1776 | skipanchors = eng->anchorConcatenation(skipanchors, b.skipanchors); | - |
1777 | else | - |
1778 | skipanchors = 0; | - |
1779 | } | - |
1780 | - | |
1781 | void QRegExpEngine::Box::orx(const Box &b) | - |
1782 | { | - |
1783 | mergeInto(&ls, b.ls); | - |
1784 | lanchors.unite(b.lanchors); | - |
1785 | mergeInto(&rs, b.rs); | - |
1786 | ranchors.unite(b.ranchors); | - |
1787 | - | |
1788 | if (b.minl == 0) { | - |
1789 | if (minl == 0) | - |
1790 | skipanchors = eng->anchorAlternation(skipanchors, b.skipanchors); | - |
1791 | else | - |
1792 | skipanchors = b.skipanchors; | - |
1793 | } | - |
1794 | - | |
1795 | - | |
1796 | for (int i = 0; i < NumBadChars; i++) { | - |
1797 | if (occ1.at(i) > b.occ1.at(i)) | - |
1798 | occ1[i] = b.occ1.at(i); | - |
1799 | } | - |
1800 | earlyStart = 0; | - |
1801 | lateStart = 0; | - |
1802 | str = QString(); | - |
1803 | leftStr = QString(); | - |
1804 | rightStr = QString(); | - |
1805 | if (b.maxl > maxl) | - |
1806 | maxl = b.maxl; | - |
1807 | - | |
1808 | if (b.minl < minl) | - |
1809 | minl = b.minl; | - |
1810 | } | - |
1811 | - | |
1812 | void QRegExpEngine::Box::plus(int atom) | - |
1813 | { | - |
1814 | - | |
1815 | eng->addPlusTransitions(rs, ls, atom); | - |
1816 | - | |
1817 | - | |
1818 | - | |
1819 | - | |
1820 | addAnchorsToEngine(*this); | - |
1821 | - | |
1822 | maxl = InftyLen; | - |
1823 | - | |
1824 | } | - |
1825 | - | |
1826 | void QRegExpEngine::Box::opt() | - |
1827 | { | - |
1828 | - | |
1829 | earlyStart = 0; | - |
1830 | lateStart = 0; | - |
1831 | str = QString(); | - |
1832 | leftStr = QString(); | - |
1833 | rightStr = QString(); | - |
1834 | - | |
1835 | skipanchors = 0; | - |
1836 | minl = 0; | - |
1837 | } | - |
1838 | - | |
1839 | void QRegExpEngine::Box::catAnchor(int a) | - |
1840 | { | - |
1841 | if (a != 0) { | - |
1842 | for (int i = 0; i < rs.size(); i++) { | - |
1843 | a = eng->anchorConcatenation(ranchors.value(rs.at(i), 0), a); | - |
1844 | ranchors.insert(rs.at(i), a); | - |
1845 | } | - |
1846 | if (minl == 0) | - |
1847 | skipanchors = eng->anchorConcatenation(skipanchors, a); | - |
1848 | } | - |
1849 | } | - |
1850 | - | |
1851 | - | |
1852 | void QRegExpEngine::Box::setupHeuristics() | - |
1853 | { | - |
1854 | eng->goodEarlyStart = earlyStart; | - |
1855 | eng->goodLateStart = lateStart; | - |
1856 | eng->goodStr = eng->cs ? str : str.toLower(); | - |
1857 | - | |
1858 | eng->minl = minl; | - |
1859 | if (eng->cs) { | - |
1860 | for (int i = 0; i < NumBadChars; i++) { | - |
1861 | if (occ1.at(i) != NoOccurrence && occ1.at(i) >= minl) | - |
1862 | occ1[i] = minl; | - |
1863 | } | - |
1864 | eng->occ1 = occ1; | - |
1865 | } else { | - |
1866 | eng->occ1.fill(0, NumBadChars); | - |
1867 | } | - |
1868 | - | |
1869 | eng->heuristicallyChooseHeuristic(); | - |
1870 | } | - |
1871 | - | |
1872 | - | |
1873 | - | |
1874 | void QRegExpEngine::Box::dump() const | - |
1875 | { | - |
1876 | int i; | - |
1877 | QMessageLogger(__FILE__, 26962702, __PRETTY_FUNCTION__).debug("Box of at least %d character%s", minl, minl == 1 ? "" : "s"); | - |
1878 | QMessageLogger(__FILE__, 26972703, __PRETTY_FUNCTION__).debug(" Left states:"); | - |
1879 | for (i = 0; i < ls.size(); i++) { | - |
1880 | if (lanchors.value(ls[i], 0) == 0) | - |
1881 | QMessageLogger(__FILE__, 27002706, __PRETTY_FUNCTION__).debug(" %d", ls[i]); | - |
1882 | else | - |
1883 | QMessageLogger(__FILE__, 27022708, __PRETTY_FUNCTION__).debug(" %d [anchors 0x%.8x]", ls[i], lanchors[ls[i]]); | - |
1884 | } | - |
1885 | QMessageLogger(__FILE__, 27042710, __PRETTY_FUNCTION__).debug(" Right states:"); | - |
1886 | for (i = 0; i < rs.size(); i++) { | - |
1887 | if (ranchors.value(rs[i], 0) == 0) | - |
1888 | QMessageLogger(__FILE__, 27072713, __PRETTY_FUNCTION__).debug(" %d", rs[i]); | - |
1889 | else | - |
1890 | QMessageLogger(__FILE__, 27092715, __PRETTY_FUNCTION__).debug(" %d [anchors 0x%.8x]", rs[i], ranchors[rs[i]]); | - |
1891 | } | - |
1892 | QMessageLogger(__FILE__, 27112717, __PRETTY_FUNCTION__).debug(" Skip anchors: 0x%.8x", skipanchors); | - |
1893 | } | - |
1894 | - | |
1895 | - | |
1896 | void QRegExpEngine::Box::addAnchorsToEngine(const Box &to) const | - |
1897 | { | - |
1898 | for (int i = 0; i < to.ls.size(); i++) { | - |
1899 | for (int j = 0; j < rs.size(); j++) { | - |
1900 | int a = eng->anchorConcatenation(ranchors.value(rs.at(j), 0), | - |
1901 | to.lanchors.value(to.ls.at(i), 0)); | - |
1902 | eng->addAnchors(rs[j], to.ls[i], a); | - |
1903 | } | - |
1904 | } | - |
1905 | } | - |
1906 | - | |
1907 | - | |
1908 | - | |
1909 | - | |
1910 | static const struct CategoriesRangeMapEntry { | - |
1911 | const char name[40]; | - |
1912 | uint first, second; | - |
1913 | } categoriesRangeMap[] = { | - |
1914 | { "AegeanNumbers", 0x10100, 0x1013F }, | - |
1915 | { "AlphabeticPresentationForms", 0xFB00, 0xFB4F }, | - |
1916 | { "AncientGreekMusicalNotation", 0x1D200, 0x1D24F }, | - |
1917 | { "AncientGreekNumbers", 0x10140, 0x1018F }, | - |
1918 | { "Arabic", 0x0600, 0x06FF }, | - |
1919 | { "ArabicPresentationForms-A", 0xFB50, 0xFDFF }, | - |
1920 | { "ArabicPresentationForms-B", 0xFE70, 0xFEFF }, | - |
1921 | { "ArabicSupplement", 0x0750, 0x077F }, | - |
1922 | { "Armenian", 0x0530, 0x058F }, | - |
1923 | { "Arrows", 0x2190, 0x21FF }, | - |
1924 | { "BasicLatin", 0x0000, 0x007F }, | - |
1925 | { "Bengali", 0x0980, 0x09FF }, | - |
1926 | { "BlockElements", 0x2580, 0x259F }, | - |
1927 | { "Bopomofo", 0x3100, 0x312F }, | - |
1928 | { "BopomofoExtended", 0x31A0, 0x31BF }, | - |
1929 | { "BoxDrawing", 0x2500, 0x257F }, | - |
1930 | { "BraillePatterns", 0x2800, 0x28FF }, | - |
1931 | { "Buginese", 0x1A00, 0x1A1F }, | - |
1932 | { "Buhid", 0x1740, 0x175F }, | - |
1933 | { "ByzantineMusicalSymbols", 0x1D000, 0x1D0FF }, | - |
1934 | { "CJKCompatibility", 0x3300, 0x33FF }, | - |
1935 | { "CJKCompatibilityForms", 0xFE30, 0xFE4F }, | - |
1936 | { "CJKCompatibilityIdeographs", 0xF900, 0xFAFF }, | - |
1937 | { "CJKCompatibilityIdeographsSupplement", 0x2F800, 0x2FA1F }, | - |
1938 | { "CJKRadicalsSupplement", 0x2E80, 0x2EFF }, | - |
1939 | { "CJKStrokes", 0x31C0, 0x31EF }, | - |
1940 | { "CJKSymbolsandPunctuation", 0x3000, 0x303F }, | - |
1941 | { "CJKUnifiedIdeographs", 0x4E00, 0x9FFF }, | - |
1942 | { "CJKUnifiedIdeographsExtensionA", 0x3400, 0x4DB5 }, | - |
1943 | { "CJKUnifiedIdeographsExtensionB", 0x20000, 0x2A6DF }, | - |
1944 | { "Cherokee", 0x13A0, 0x13FF }, | - |
1945 | { "CombiningDiacriticalMarks", 0x0300, 0x036F }, | - |
1946 | { "CombiningDiacriticalMarksSupplement", 0x1DC0, 0x1DFF }, | - |
1947 | { "CombiningHalfMarks", 0xFE20, 0xFE2F }, | - |
1948 | { "CombiningMarksforSymbols", 0x20D0, 0x20FF }, | - |
1949 | { "ControlPictures", 0x2400, 0x243F }, | - |
1950 | { "Coptic", 0x2C80, 0x2CFF }, | - |
1951 | { "CurrencySymbols", 0x20A0, 0x20CF }, | - |
1952 | { "CypriotSyllabary", 0x10800, 0x1083F }, | - |
1953 | { "Cyrillic", 0x0400, 0x04FF }, | - |
1954 | { "CyrillicSupplement", 0x0500, 0x052F }, | - |
1955 | { "Deseret", 0x10400, 0x1044F }, | - |
1956 | { "Devanagari", 0x0900, 0x097F }, | - |
1957 | { "Dingbats", 0x2700, 0x27BF }, | - |
1958 | { "EnclosedAlphanumerics", 0x2460, 0x24FF }, | - |
1959 | { "EnclosedCJKLettersandMonths", 0x3200, 0x32FF }, | - |
1960 | { "Ethiopic", 0x1200, 0x137F }, | - |
1961 | { "EthiopicExtended", 0x2D80, 0x2DDF }, | - |
1962 | { "EthiopicSupplement", 0x1380, 0x139F }, | - |
1963 | { "GeneralPunctuation", 0x2000, 0x206F }, | - |
1964 | { "GeometricShapes", 0x25A0, 0x25FF }, | - |
1965 | { "Georgian", 0x10A0, 0x10FF }, | - |
1966 | { "GeorgianSupplement", 0x2D00, 0x2D2F }, | - |
1967 | { "Glagolitic", 0x2C00, 0x2C5F }, | - |
1968 | { "Gothic", 0x10330, 0x1034F }, | - |
1969 | { "Greek", 0x0370, 0x03FF }, | - |
1970 | { "GreekExtended", 0x1F00, 0x1FFF }, | - |
1971 | { "Gujarati", 0x0A80, 0x0AFF }, | - |
1972 | { "Gurmukhi", 0x0A00, 0x0A7F }, | - |
1973 | { "HalfwidthandFullwidthForms", 0xFF00, 0xFFEF }, | - |
1974 | { "HangulCompatibilityJamo", 0x3130, 0x318F }, | - |
1975 | { "HangulJamo", 0x1100, 0x11FF }, | - |
1976 | { "HangulSyllables", 0xAC00, 0xD7A3 }, | - |
1977 | { "Hanunoo", 0x1720, 0x173F }, | - |
1978 | { "Hebrew", 0x0590, 0x05FF }, | - |
1979 | { "Hiragana", 0x3040, 0x309F }, | - |
1980 | { "IPAExtensions", 0x0250, 0x02AF }, | - |
1981 | { "IdeographicDescriptionCharacters", 0x2FF0, 0x2FFF }, | - |
1982 | { "Kanbun", 0x3190, 0x319F }, | - |
1983 | { "KangxiRadicals", 0x2F00, 0x2FDF }, | - |
1984 | { "Kannada", 0x0C80, 0x0CFF }, | - |
1985 | { "Katakana", 0x30A0, 0x30FF }, | - |
1986 | { "KatakanaPhoneticExtensions", 0x31F0, 0x31FF }, | - |
1987 | { "Kharoshthi", 0x10A00, 0x10A5F }, | - |
1988 | { "Khmer", 0x1780, 0x17FF }, | - |
1989 | { "KhmerSymbols", 0x19E0, 0x19FF }, | - |
1990 | { "Lao", 0x0E80, 0x0EFF }, | - |
1991 | { "Latin-1Supplement", 0x0080, 0x00FF }, | - |
1992 | { "LatinExtended-A", 0x0100, 0x017F }, | - |
1993 | { "LatinExtended-B", 0x0180, 0x024F }, | - |
1994 | { "LatinExtendedAdditional", 0x1E00, 0x1EFF }, | - |
1995 | { "LetterlikeSymbols", 0x2100, 0x214F }, | - |
1996 | { "Limbu", 0x1900, 0x194F }, | - |
1997 | { "LinearBIdeograms", 0x10080, 0x100FF }, | - |
1998 | { "LinearBSyllabary", 0x10000, 0x1007F }, | - |
1999 | { "Malayalam", 0x0D00, 0x0D7F }, | - |
2000 | { "MathematicalAlphanumericSymbols", 0x1D400, 0x1D7FF }, | - |
2001 | { "MathematicalOperators", 0x2200, 0x22FF }, | - |
2002 | { "MiscellaneousMathematicalSymbols-A", 0x27C0, 0x27EF }, | - |
2003 | { "MiscellaneousMathematicalSymbols-B", 0x2980, 0x29FF }, | - |
2004 | { "MiscellaneousSymbols", 0x2600, 0x26FF }, | - |
2005 | { "MiscellaneousSymbolsandArrows", 0x2B00, 0x2BFF }, | - |
2006 | { "MiscellaneousTechnical", 0x2300, 0x23FF }, | - |
2007 | { "ModifierToneLetters", 0xA700, 0xA71F }, | - |
2008 | { "Mongolian", 0x1800, 0x18AF }, | - |
2009 | { "MusicalSymbols", 0x1D100, 0x1D1FF }, | - |
2010 | { "Myanmar", 0x1000, 0x109F }, | - |
2011 | { "NewTaiLue", 0x1980, 0x19DF }, | - |
2012 | { "NumberForms", 0x2150, 0x218F }, | - |
2013 | { "Ogham", 0x1680, 0x169F }, | - |
2014 | { "OldItalic", 0x10300, 0x1032F }, | - |
2015 | { "OldPersian", 0x103A0, 0x103DF }, | - |
2016 | { "OpticalCharacterRecognition", 0x2440, 0x245F }, | - |
2017 | { "Oriya", 0x0B00, 0x0B7F }, | - |
2018 | { "Osmanya", 0x10480, 0x104AF }, | - |
2019 | { "PhoneticExtensions", 0x1D00, 0x1D7F }, | - |
2020 | { "PhoneticExtensionsSupplement", 0x1D80, 0x1DBF }, | - |
2021 | { "PrivateUse", 0xE000, 0xF8FF }, | - |
2022 | { "Runic", 0x16A0, 0x16FF }, | - |
2023 | { "Shavian", 0x10450, 0x1047F }, | - |
2024 | { "Sinhala", 0x0D80, 0x0DFF }, | - |
2025 | { "SmallFormVariants", 0xFE50, 0xFE6F }, | - |
2026 | { "SpacingModifierLetters", 0x02B0, 0x02FF }, | - |
2027 | { "Specials", 0xFFF0, 0xFFFF }, | - |
2028 | { "SuperscriptsandSubscripts", 0x2070, 0x209F }, | - |
2029 | { "SupplementalArrows-A", 0x27F0, 0x27FF }, | - |
2030 | { "SupplementalArrows-B", 0x2900, 0x297F }, | - |
2031 | { "SupplementalMathematicalOperators", 0x2A00, 0x2AFF }, | - |
2032 | { "SupplementalPunctuation", 0x2E00, 0x2E7F }, | - |
2033 | { "SupplementaryPrivateUseArea-A", 0xF0000, 0xFFFFF }, | - |
2034 | { "SupplementaryPrivateUseArea-B", 0x100000, 0x10FFFF }, | - |
2035 | { "SylotiNagri", 0xA800, 0xA82F }, | - |
2036 | { "Syriac", 0x0700, 0x074F }, | - |
2037 | { "Tagalog", 0x1700, 0x171F }, | - |
2038 | { "Tagbanwa", 0x1760, 0x177F }, | - |
2039 | { "Tags", 0xE0000, 0xE007F }, | - |
2040 | { "TaiLe", 0x1950, 0x197F }, | - |
2041 | { "TaiXuanJingSymbols", 0x1D300, 0x1D35F }, | - |
2042 | { "Tamil", 0x0B80, 0x0BFF }, | - |
2043 | { "Telugu", 0x0C00, 0x0C7F }, | - |
2044 | { "Thaana", 0x0780, 0x07BF }, | - |
2045 | { "Thai", 0x0E00, 0x0E7F }, | - |
2046 | { "Tibetan", 0x0F00, 0x0FFF }, | - |
2047 | { "Tifinagh", 0x2D30, 0x2D7F }, | - |
2048 | { "Ugaritic", 0x10380, 0x1039F }, | - |
2049 | { "UnifiedCanadianAboriginalSyllabics", 0x1400, 0x167F }, | - |
2050 | { "VariationSelectors", 0xFE00, 0xFE0F }, | - |
2051 | { "VariationSelectorsSupplement", 0xE0100, 0xE01EF }, | - |
2052 | { "VerticalForms", 0xFE10, 0xFE1F }, | - |
2053 | { "YiRadicals", 0xA490, 0xA4CF }, | - |
2054 | { "YiSyllables", 0xA000, 0xA48F }, | - |
2055 | { "YijingHexagramSymbols", 0x4DC0, 0x4DFF } | - |
2056 | }; | - |
2057 | - | |
2058 | inline bool operator<(const CategoriesRangeMapEntry &entry1, const CategoriesRangeMapEntry &entry2) | - |
2059 | { return qstrcmp(entry1.name, entry2.name) < 0; } | - |
2060 | inline bool operator<(const char *name, const CategoriesRangeMapEntry &entry) | - |
2061 | { return qstrcmp(name, entry.name) < 0; } | - |
2062 | inline bool operator<(const CategoriesRangeMapEntry &entry, const char *name) | - |
2063 | { return qstrcmp(entry.name, name) < 0; } | - |
2064 | - | |
2065 | - | |
2066 | int QRegExpEngine::getChar() | - |
2067 | { | - |
2068 | return (yyPos == yyLen) ? EOS : yyIn[yyPos++].unicode(); | - |
2069 | } | - |
2070 | - | |
2071 | int QRegExpEngine::getEscape() | - |
2072 | { | - |
2073 | - | |
2074 | const char tab[] = "afnrtv"; | - |
2075 | const char backTab[] = "\a\f\n\r\t\v"; | - |
2076 | ushort low; | - |
2077 | int i; | - |
2078 | - | |
2079 | ushort val; | - |
2080 | int prevCh = yyCh; | - |
2081 | - | |
2082 | if (prevCh == EOS) { | - |
2083 | error("unexpected end"); | - |
2084 | return Tok_Char | '\\'; | - |
2085 | } | - |
2086 | yyCh = getChar(); | - |
2087 | - | |
2088 | if ((prevCh & ~0xff) == 0) { | - |
2089 | const char *p = strchr(tab, prevCh); | - |
2090 | if (p != 0) | - |
2091 | return Tok_Char | backTab[p - tab]; | - |
2092 | } | - |
2093 | - | |
2094 | - | |
2095 | switch (prevCh) { | - |
2096 | - | |
2097 | case '0': | - |
2098 | val = 0; | - |
2099 | for (i = 0; i < 3; i++) { | - |
2100 | if (yyCh >= '0' && yyCh <= '7') | - |
2101 | val = (val << 3) | (yyCh - '0'); | - |
2102 | else | - |
2103 | break; | - |
2104 | yyCh = getChar(); | - |
2105 | } | - |
2106 | if ((val & ~0377) != 0) | - |
2107 | error("invalid octal value"); | - |
2108 | return Tok_Char | val; | - |
2109 | - | |
2110 | - | |
2111 | case 'B': | - |
2112 | return Tok_NonWord; | - |
2113 | - | |
2114 | - | |
2115 | case 'D': | - |
2116 | - | |
2117 | yyCharClass->addCategories(uint(-1) ^ (1 << (QChar::Number_DecimalDigit))); | - |
2118 | return Tok_CharClass; | - |
2119 | case 'S': | - |
2120 | - | |
2121 | yyCharClass->addCategories(uint(-1) ^ ((1 << (QChar::Separator_Space)) | | - |
2122 | (1 << (QChar::Separator_Line)) | | - |
2123 | (1 << (QChar::Separator_Paragraph)) | | - |
2124 | (1 << (QChar::Other_Control)))); | - |
2125 | yyCharClass->addRange(0x0000, 0x0008); | - |
2126 | yyCharClass->addRange(0x000e, 0x001f); | - |
2127 | yyCharClass->addRange(0x007f, 0x0084); | - |
2128 | yyCharClass->addRange(0x0086, 0x009f); | - |
2129 | return Tok_CharClass; | - |
2130 | case 'W': | - |
2131 | - | |
2132 | yyCharClass->addCategories(uint(-1) ^ ((1 << (QChar::Mark_NonSpacing)) | | - |
2133 | (1 << (QChar::Mark_SpacingCombining)) | | - |
2134 | (1 << (QChar::Mark_Enclosing)) | | - |
2135 | (1 << (QChar::Number_DecimalDigit)) | | - |
2136 | (1 << (QChar::Number_Letter)) | | - |
2137 | (1 << (QChar::Number_Other)) | | - |
2138 | (1 << (QChar::Letter_Uppercase)) | | - |
2139 | (1 << (QChar::Letter_Lowercase)) | | - |
2140 | (1 << (QChar::Letter_Titlecase)) | | - |
2141 | (1 << (QChar::Letter_Modifier)) | | - |
2142 | (1 << (QChar::Letter_Other)) | | - |
2143 | (1 << (QChar::Punctuation_Connector)))); | - |
2144 | yyCharClass->addRange(0x203f, 0x2040); | - |
2145 | yyCharClass->addSingleton(0x2040); | - |
2146 | yyCharClass->addSingleton(0x2054); | - |
2147 | yyCharClass->addSingleton(0x30fb); | - |
2148 | yyCharClass->addRange(0xfe33, 0xfe34); | - |
2149 | yyCharClass->addRange(0xfe4d, 0xfe4f); | - |
2150 | yyCharClass->addSingleton(0xff3f); | - |
2151 | yyCharClass->addSingleton(0xff65); | - |
2152 | return Tok_CharClass; | - |
2153 | - | |
2154 | - | |
2155 | case 'b': | - |
2156 | return Tok_Word; | - |
2157 | - | |
2158 | - | |
2159 | case 'd': | - |
2160 | - | |
2161 | yyCharClass->addCategories((1 << (QChar::Number_DecimalDigit))); | - |
2162 | return Tok_CharClass; | - |
2163 | case 's': | - |
2164 | - | |
2165 | yyCharClass->addCategories((1 << (QChar::Separator_Space)) | | - |
2166 | (1 << (QChar::Separator_Line)) | | - |
2167 | (1 << (QChar::Separator_Paragraph))); | - |
2168 | yyCharClass->addRange(0x0009, 0x000d); | - |
2169 | yyCharClass->addSingleton(0x0085); | - |
2170 | return Tok_CharClass; | - |
2171 | case 'w': | - |
2172 | - | |
2173 | yyCharClass->addCategories((1 << (QChar::Mark_NonSpacing)) | | - |
2174 | (1 << (QChar::Mark_SpacingCombining)) | | - |
2175 | (1 << (QChar::Mark_Enclosing)) | | - |
2176 | (1 << (QChar::Number_DecimalDigit)) | | - |
2177 | (1 << (QChar::Number_Letter)) | | - |
2178 | (1 << (QChar::Number_Other)) | | - |
2179 | (1 << (QChar::Letter_Uppercase)) | | - |
2180 | (1 << (QChar::Letter_Lowercase)) | | - |
2181 | (1 << (QChar::Letter_Titlecase)) | | - |
2182 | (1 << (QChar::Letter_Modifier)) | | - |
2183 | (1 << (QChar::Letter_Other))); | - |
2184 | yyCharClass->addSingleton(0x005f); | - |
2185 | return Tok_CharClass; | - |
2186 | case 'I': | - |
2187 | if (xmlSchemaExtensions) { | - |
2188 | yyCharClass->setNegative(!yyCharClass->negative()); | - |
2189 | - | |
2190 | } else { | - |
2191 | break; | - |
2192 | } | - |
2193 | case 'i': | - |
2194 | if (xmlSchemaExtensions) { | - |
2195 | yyCharClass->addCategories((1 << (QChar::Mark_NonSpacing)) | | - |
2196 | (1 << (QChar::Mark_SpacingCombining)) | | - |
2197 | (1 << (QChar::Mark_Enclosing)) | | - |
2198 | (1 << (QChar::Number_DecimalDigit)) | | - |
2199 | (1 << (QChar::Number_Letter)) | | - |
2200 | (1 << (QChar::Number_Other)) | | - |
2201 | (1 << (QChar::Letter_Uppercase)) | | - |
2202 | (1 << (QChar::Letter_Lowercase)) | | - |
2203 | (1 << (QChar::Letter_Titlecase)) | | - |
2204 | (1 << (QChar::Letter_Modifier)) | | - |
2205 | (1 << (QChar::Letter_Other))); | - |
2206 | yyCharClass->addSingleton(0x003a); | - |
2207 | yyCharClass->addSingleton(0x005f); | - |
2208 | yyCharClass->addRange(0x0041, 0x005a); | - |
2209 | yyCharClass->addRange(0x0061, 0x007a); | - |
2210 | yyCharClass->addRange(0xc0, 0xd6); | - |
2211 | yyCharClass->addRange(0xd8, 0xf6); | - |
2212 | yyCharClass->addRange(0xf8, 0x2ff); | - |
2213 | yyCharClass->addRange(0x370, 0x37d); | - |
2214 | yyCharClass->addRange(0x37f, 0x1fff); | - |
2215 | yyCharClass->addRange(0x200c, 0x200d); | - |
2216 | yyCharClass->addRange(0x2070, 0x218f); | - |
2217 | yyCharClass->addRange(0x2c00, 0x2fef); | - |
2218 | yyCharClass->addRange(0x3001, 0xd7ff); | - |
2219 | yyCharClass->addRange(0xf900, 0xfdcf); | - |
2220 | yyCharClass->addRange(0xfdf0, 0xfffd); | - |
2221 | yyCharClass->addRange((ushort)0x10000, (ushort)0xeffff); | - |
2222 | return Tok_CharClass; | - |
2223 | } else { | - |
2224 | break; | - |
2225 | } | - |
2226 | case 'C': | - |
2227 | if (xmlSchemaExtensions) { | - |
2228 | yyCharClass->setNegative(!yyCharClass->negative()); | - |
2229 | - | |
2230 | } else { | - |
2231 | break; | - |
2232 | } | - |
2233 | case 'c': | - |
2234 | if (xmlSchemaExtensions) { | - |
2235 | yyCharClass->addCategories((1 << (QChar::Mark_NonSpacing)) | | - |
2236 | (1 << (QChar::Mark_SpacingCombining)) | | - |
2237 | (1 << (QChar::Mark_Enclosing)) | | - |
2238 | (1 << (QChar::Number_DecimalDigit)) | | - |
2239 | (1 << (QChar::Number_Letter)) | | - |
2240 | (1 << (QChar::Number_Other)) | | - |
2241 | (1 << (QChar::Letter_Uppercase)) | | - |
2242 | (1 << (QChar::Letter_Lowercase)) | | - |
2243 | (1 << (QChar::Letter_Titlecase)) | | - |
2244 | (1 << (QChar::Letter_Modifier)) | | - |
2245 | (1 << (QChar::Letter_Other))); | - |
2246 | yyCharClass->addSingleton(0x002d); | - |
2247 | yyCharClass->addSingleton(0x002e); | - |
2248 | yyCharClass->addSingleton(0x003a); | - |
2249 | yyCharClass->addSingleton(0x005f); | - |
2250 | yyCharClass->addSingleton(0xb7); | - |
2251 | yyCharClass->addRange(0x0030, 0x0039); | - |
2252 | yyCharClass->addRange(0x0041, 0x005a); | - |
2253 | yyCharClass->addRange(0x0061, 0x007a); | - |
2254 | yyCharClass->addRange(0xc0, 0xd6); | - |
2255 | yyCharClass->addRange(0xd8, 0xf6); | - |
2256 | yyCharClass->addRange(0xf8, 0x2ff); | - |
2257 | yyCharClass->addRange(0x370, 0x37d); | - |
2258 | yyCharClass->addRange(0x37f, 0x1fff); | - |
2259 | yyCharClass->addRange(0x200c, 0x200d); | - |
2260 | yyCharClass->addRange(0x2070, 0x218f); | - |
2261 | yyCharClass->addRange(0x2c00, 0x2fef); | - |
2262 | yyCharClass->addRange(0x3001, 0xd7ff); | - |
2263 | yyCharClass->addRange(0xf900, 0xfdcf); | - |
2264 | yyCharClass->addRange(0xfdf0, 0xfffd); | - |
2265 | yyCharClass->addRange((ushort)0x10000, (ushort)0xeffff); | - |
2266 | yyCharClass->addRange(0x0300, 0x036f); | - |
2267 | yyCharClass->addRange(0x203f, 0x2040); | - |
2268 | return Tok_CharClass; | - |
2269 | } else { | - |
2270 | break; | - |
2271 | } | - |
2272 | case 'P': | - |
2273 | if (xmlSchemaExtensions) { | - |
2274 | yyCharClass->setNegative(!yyCharClass->negative()); | - |
2275 | - | |
2276 | } else { | - |
2277 | break; | - |
2278 | } | - |
2279 | case 'p': | - |
2280 | if (xmlSchemaExtensions) { | - |
2281 | if (yyCh != '{') { | - |
2282 | error("bad char class syntax"); | - |
2283 | return Tok_CharClass; | - |
2284 | } | - |
2285 | - | |
2286 | QByteArray category; | - |
2287 | yyCh = getChar(); | - |
2288 | while (yyCh != '}') { | - |
2289 | if (yyCh == EOS) { | - |
2290 | error("unexpected end"); | - |
2291 | return Tok_CharClass; | - |
2292 | } | - |
2293 | category.append(yyCh); | - |
2294 | yyCh = getChar(); | - |
2295 | } | - |
2296 | yyCh = getChar(); | - |
2297 | - | |
2298 | int catlen = category.length(); | - |
2299 | if (catlen == 1 || catlen == 2) { | - |
2300 | switch (category.at(0)) { | - |
2301 | case 'M': | - |
2302 | if (catlen == 1) { | - |
2303 | yyCharClass->addCategories((1 << (QChar::Mark_NonSpacing)) | | - |
2304 | (1 << (QChar::Mark_SpacingCombining)) | | - |
2305 | (1 << (QChar::Mark_Enclosing))); | - |
2306 | } else { | - |
2307 | switch (category.at(1)) { | - |
2308 | case 'n': yyCharClass->addCategories((1 << (QChar::Mark_NonSpacing))); break; | - |
2309 | case 'c': yyCharClass->addCategories((1 << (QChar::Mark_SpacingCombining))); break; | - |
2310 | case 'e': yyCharClass->addCategories((1 << (QChar::Mark_Enclosing))); break; | - |
2311 | default: error("invalid category"); break; | - |
2312 | } | - |
2313 | } | - |
2314 | break; | - |
2315 | case 'N': | - |
2316 | if (catlen == 1) { | - |
2317 | yyCharClass->addCategories((1 << (QChar::Number_DecimalDigit)) | | - |
2318 | (1 << (QChar::Number_Letter)) | | - |
2319 | (1 << (QChar::Number_Other))); | - |
2320 | } else { | - |
2321 | switch (category.at(1)) { | - |
2322 | case 'd': yyCharClass->addCategories((1 << (QChar::Number_DecimalDigit))); break; | - |
2323 | case 'l': yyCharClass->addCategories((1 << (QChar::Number_Letter))); break; | - |
2324 | case 'o': yyCharClass->addCategories((1 << (QChar::Number_Other))); break; | - |
2325 | default: error("invalid category"); break; | - |
2326 | } | - |
2327 | } | - |
2328 | break; | - |
2329 | case 'Z': | - |
2330 | if (catlen == 1) { | - |
2331 | yyCharClass->addCategories((1 << (QChar::Separator_Space)) | | - |
2332 | (1 << (QChar::Separator_Line)) | | - |
2333 | (1 << (QChar::Separator_Paragraph))); | - |
2334 | } else { | - |
2335 | switch (category.at(1)) { | - |
2336 | case 's': yyCharClass->addCategories((1 << (QChar::Separator_Space))); break; | - |
2337 | case 'l': yyCharClass->addCategories((1 << (QChar::Separator_Line))); break; | - |
2338 | case 'p': yyCharClass->addCategories((1 << (QChar::Separator_Paragraph))); break; | - |
2339 | default: error("invalid category"); break; | - |
2340 | } | - |
2341 | } | - |
2342 | break; | - |
2343 | case 'C': | - |
2344 | if (catlen == 1) { | - |
2345 | yyCharClass->addCategories((1 << (QChar::Other_Control)) | | - |
2346 | (1 << (QChar::Other_Format)) | | - |
2347 | (1 << (QChar::Other_Surrogate)) | | - |
2348 | (1 << (QChar::Other_PrivateUse)) | | - |
2349 | (1 << (QChar::Other_NotAssigned))); | - |
2350 | } else { | - |
2351 | switch (category.at(1)) { | - |
2352 | case 'c': yyCharClass->addCategories((1 << (QChar::Other_Control))); break; | - |
2353 | case 'f': yyCharClass->addCategories((1 << (QChar::Other_Format))); break; | - |
2354 | case 's': yyCharClass->addCategories((1 << (QChar::Other_Surrogate))); break; | - |
2355 | case 'o': yyCharClass->addCategories((1 << (QChar::Other_PrivateUse))); break; | - |
2356 | case 'n': yyCharClass->addCategories((1 << (QChar::Other_NotAssigned))); break; | - |
2357 | default: error("invalid category"); break; | - |
2358 | } | - |
2359 | } | - |
2360 | break; | - |
2361 | case 'L': | - |
2362 | if (catlen == 1) { | - |
2363 | yyCharClass->addCategories((1 << (QChar::Letter_Uppercase)) | | - |
2364 | (1 << (QChar::Letter_Lowercase)) | | - |
2365 | (1 << (QChar::Letter_Titlecase)) | | - |
2366 | (1 << (QChar::Letter_Modifier)) | | - |
2367 | (1 << (QChar::Letter_Other))); | - |
2368 | } else { | - |
2369 | switch (category.at(1)) { | - |
2370 | case 'u': yyCharClass->addCategories((1 << (QChar::Letter_Uppercase))); break; | - |
2371 | case 'l': yyCharClass->addCategories((1 << (QChar::Letter_Lowercase))); break; | - |
2372 | case 't': yyCharClass->addCategories((1 << (QChar::Letter_Titlecase))); break; | - |
2373 | case 'm': yyCharClass->addCategories((1 << (QChar::Letter_Modifier))); break; | - |
2374 | case 'o': yyCharClass->addCategories((1 << (QChar::Letter_Other))); break; | - |
2375 | default: error("invalid category"); break; | - |
2376 | } | - |
2377 | } | - |
2378 | break; | - |
2379 | case 'P': | - |
2380 | if (catlen == 1) { | - |
2381 | yyCharClass->addCategories((1 << (QChar::Punctuation_Connector)) | | - |
2382 | (1 << (QChar::Punctuation_Dash)) | | - |
2383 | (1 << (QChar::Punctuation_Open)) | | - |
2384 | (1 << (QChar::Punctuation_Close)) | | - |
2385 | (1 << (QChar::Punctuation_InitialQuote)) | | - |
2386 | (1 << (QChar::Punctuation_FinalQuote)) | | - |
2387 | (1 << (QChar::Punctuation_Other))); | - |
2388 | } else { | - |
2389 | switch (category.at(1)) { | - |
2390 | case 'c': yyCharClass->addCategories((1 << (QChar::Punctuation_Connector))); break; | - |
2391 | case 'd': yyCharClass->addCategories((1 << (QChar::Punctuation_Dash))); break; | - |
2392 | case 's': yyCharClass->addCategories((1 << (QChar::Punctuation_Open))); break; | - |
2393 | case 'e': yyCharClass->addCategories((1 << (QChar::Punctuation_Close))); break; | - |
2394 | case 'i': yyCharClass->addCategories((1 << (QChar::Punctuation_InitialQuote))); break; | - |
2395 | case 'f': yyCharClass->addCategories((1 << (QChar::Punctuation_FinalQuote))); break; | - |
2396 | case 'o': yyCharClass->addCategories((1 << (QChar::Punctuation_Other))); break; | - |
2397 | default: error("invalid category"); break; | - |
2398 | } | - |
2399 | } | - |
2400 | break; | - |
2401 | case 'S': | - |
2402 | if (catlen == 1) { | - |
2403 | yyCharClass->addCategories((1 << (QChar::Symbol_Math)) | | - |
2404 | (1 << (QChar::Symbol_Currency)) | | - |
2405 | (1 << (QChar::Symbol_Modifier)) | | - |
2406 | (1 << (QChar::Symbol_Other))); | - |
2407 | } else { | - |
2408 | switch (category.at(1)) { | - |
2409 | case 'm': yyCharClass->addCategories((1 << (QChar::Symbol_Math))); break; | - |
2410 | case 'c': yyCharClass->addCategories((1 << (QChar::Symbol_Currency))); break; | - |
2411 | case 'k': yyCharClass->addCategories((1 << (QChar::Symbol_Modifier))); break; | - |
2412 | case 'o': yyCharClass->addCategories((1 << (QChar::Symbol_Other))); break; | - |
2413 | default: error("invalid category"); break; | - |
2414 | } | - |
2415 | } | - |
2416 | break; | - |
2417 | default: | - |
2418 | error("invalid category"); | - |
2419 | break; | - |
2420 | } | - |
2421 | } else if (catlen > 2 && category.at(0) == 'I' && category.at(1) == 's') { | - |
2422 | static const int N = sizeof(categoriesRangeMap) / sizeof(categoriesRangeMap[0]); | - |
2423 | const char * const categoryFamily = category.constData() + 2; | - |
2424 | const CategoriesRangeMapEntry *r = std::lower_bound(categoriesRangeMap, categoriesRangeMap + N, categoryFamily); | - |
2425 | if (r != categoriesRangeMap + N && qstrcmp(r->name, categoryFamily) == 0) | - |
2426 | yyCharClass->addRange(r->first, r->second); | - |
2427 | else | - |
2428 | error("invalid category"); | - |
2429 | } else { | - |
2430 | error("invalid category"); | - |
2431 | } | - |
2432 | return Tok_CharClass; | - |
2433 | } else { | - |
2434 | break; | - |
2435 | } | - |
2436 | - | |
2437 | - | |
2438 | case 'x': | - |
2439 | val = 0; | - |
2440 | for (i = 0; i < 4; i++) { | - |
2441 | low = QChar(yyCh).toLower().unicode(); | - |
2442 | if (low >= '0' && low <= '9') | - |
2443 | val = (val << 4) | (low - '0'); | - |
2444 | else if (low >= 'a' && low <= 'f') | - |
2445 | val = (val << 4) | (low - 'a' + 10); | - |
2446 | else | - |
2447 | break; | - |
2448 | yyCh = getChar(); | - |
2449 | } | - |
2450 | return Tok_Char | val; | - |
2451 | - | |
2452 | default: | - |
2453 | break; | - |
2454 | } | - |
2455 | if (prevCh >= '1' && prevCh <= '9') { | - |
2456 | - | |
2457 | val = prevCh - '0'; | - |
2458 | while (yyCh >= '0' && yyCh <= '9') { | - |
2459 | val = (val * 10) + (yyCh - '0'); | - |
2460 | yyCh = getChar(); | - |
2461 | } | - |
2462 | return Tok_BackRef | val; | - |
2463 | - | |
2464 | - | |
2465 | - | |
2466 | } | - |
2467 | return Tok_Char | prevCh; | - |
2468 | } | - |
2469 | - | |
2470 | - | |
2471 | int QRegExpEngine::getRep(int def) | - |
2472 | { | - |
2473 | if (yyCh >= '0' && yyCh <= '9') { | - |
2474 | int rep = 0; | - |
2475 | do { | - |
2476 | rep = 10 * rep + yyCh - '0'; | - |
2477 | if (rep >= InftyRep) { | - |
2478 | error("bad repetition syntax"); | - |
2479 | rep = def; | - |
2480 | } | - |
2481 | yyCh = getChar(); | - |
2482 | } while (yyCh >= '0' && yyCh <= '9'); | - |
2483 | return rep; | - |
2484 | } else { | - |
2485 | return def; | - |
2486 | } | - |
2487 | } | - |
2488 | - | |
2489 | - | |
2490 | - | |
2491 | void QRegExpEngine::skipChars(int n) | - |
2492 | { | - |
2493 | if (n > 0) { | - |
2494 | yyPos += n - 1; | - |
2495 | yyCh = getChar(); | - |
2496 | } | - |
2497 | } | - |
2498 | - | |
2499 | - | |
2500 | void QRegExpEngine::error(const char *msg) | - |
2501 | { | - |
2502 | if (yyError.isEmpty()) | - |
2503 | yyError = QLatin1String(msg); | - |
2504 | } | - |
2505 | - | |
2506 | void QRegExpEngine::startTokenizer(const QChar *rx, int len) | - |
2507 | { | - |
2508 | yyIn = rx; | - |
2509 | yyPos0 = 0; | - |
2510 | yyPos = 0; | - |
2511 | yyLen = len; | - |
2512 | yyCh = getChar(); | - |
2513 | yyCharClass.reset(new QRegExpCharClass); | - |
2514 | yyMinRep = 0; | - |
2515 | yyMaxRep = 0; | - |
2516 | yyError = QString(); | - |
2517 | } | - |
2518 | - | |
2519 | int QRegExpEngine::getToken() | - |
2520 | { | - |
2521 | - | |
2522 | ushort pendingCh = 0; | - |
2523 | bool charPending; | - |
2524 | bool rangePending; | - |
2525 | int tok; | - |
2526 | - | |
2527 | int prevCh = yyCh; | - |
2528 | - | |
2529 | yyPos0 = yyPos - 1; | - |
2530 | - | |
2531 | yyCharClass->clear(); | - |
2532 | - | |
2533 | yyMinRep = 0; | - |
2534 | yyMaxRep = 0; | - |
2535 | yyCh = getChar(); | - |
2536 | - | |
2537 | switch (prevCh) { | - |
2538 | case EOS: | - |
2539 | yyPos0 = yyPos; | - |
2540 | return Tok_Eos; | - |
2541 | case '$': | - |
2542 | return Tok_Dollar; | - |
2543 | case '(': | - |
2544 | if (yyCh == '?') { | - |
2545 | prevCh = getChar(); | - |
2546 | yyCh = getChar(); | - |
2547 | switch (prevCh) { | - |
2548 | - | |
2549 | case '!': | - |
2550 | return Tok_NegLookahead; | - |
2551 | case '=': | - |
2552 | return Tok_PosLookahead; | - |
2553 | - | |
2554 | case ':': | - |
2555 | return Tok_MagicLeftParen; | - |
2556 | case '<': | - |
2557 | error("lookbehinds not supported, see QTBUG-2371"); | - |
2558 | return Tok_MagicLeftParen; | - |
2559 | default: | - |
2560 | error("bad lookahead syntax"); | - |
2561 | return Tok_MagicLeftParen; | - |
2562 | } | - |
2563 | } else { | - |
2564 | return Tok_LeftParen; | - |
2565 | } | - |
2566 | case ')': | - |
2567 | return Tok_RightParen; | - |
2568 | case '*': | - |
2569 | yyMinRep = 0; | - |
2570 | yyMaxRep = InftyRep; | - |
2571 | return Tok_Quantifier; | - |
2572 | case '+': | - |
2573 | yyMinRep = 1; | - |
2574 | yyMaxRep = InftyRep; | - |
2575 | return Tok_Quantifier; | - |
2576 | case '.': | - |
2577 | - | |
2578 | yyCharClass->setNegative(true); | - |
2579 | - | |
2580 | return Tok_CharClass; | - |
2581 | case '?': | - |
2582 | yyMinRep = 0; | - |
2583 | yyMaxRep = 1; | - |
2584 | return Tok_Quantifier; | - |
2585 | case '[': | - |
2586 | - | |
2587 | if (yyCh == '^') { | - |
2588 | yyCharClass->setNegative(true); | - |
2589 | yyCh = getChar(); | - |
2590 | } | - |
2591 | charPending = false; | - |
2592 | rangePending = false; | - |
2593 | do { | - |
2594 | if (yyCh == '-' && charPending && !rangePending) { | - |
2595 | rangePending = true; | - |
2596 | yyCh = getChar(); | - |
2597 | } else { | - |
2598 | if (charPending && !rangePending) { | - |
2599 | yyCharClass->addSingleton(pendingCh); | - |
2600 | charPending = false; | - |
2601 | } | - |
2602 | if (yyCh == '\\') { | - |
2603 | yyCh = getChar(); | - |
2604 | tok = getEscape(); | - |
2605 | if (tok == Tok_Word) | - |
2606 | tok = '\b'; | - |
2607 | } else { | - |
2608 | tok = Tok_Char | yyCh; | - |
2609 | yyCh = getChar(); | - |
2610 | } | - |
2611 | if (tok == Tok_CharClass) { | - |
2612 | if (rangePending) { | - |
2613 | yyCharClass->addSingleton('-'); | - |
2614 | yyCharClass->addSingleton(pendingCh); | - |
2615 | charPending = false; | - |
2616 | rangePending = false; | - |
2617 | } | - |
2618 | } else if ((tok & Tok_Char) != 0) { | - |
2619 | if (rangePending) { | - |
2620 | yyCharClass->addRange(pendingCh, tok ^ Tok_Char); | - |
2621 | charPending = false; | - |
2622 | rangePending = false; | - |
2623 | } else { | - |
2624 | pendingCh = tok ^ Tok_Char; | - |
2625 | charPending = true; | - |
2626 | } | - |
2627 | } else { | - |
2628 | error("bad char class syntax"); | - |
2629 | } | - |
2630 | } | - |
2631 | } while (yyCh != ']' && yyCh != EOS); | - |
2632 | if (rangePending) | - |
2633 | yyCharClass->addSingleton('-'); | - |
2634 | if (charPending) | - |
2635 | yyCharClass->addSingleton(pendingCh); | - |
2636 | if (yyCh == EOS) | - |
2637 | error("unexpected end"); | - |
2638 | else | - |
2639 | yyCh = getChar(); | - |
2640 | return Tok_CharClass; | - |
2641 | - | |
2642 | - | |
2643 | - | |
2644 | - | |
2645 | case '\\': | - |
2646 | return getEscape(); | - |
2647 | case ']': | - |
2648 | error("missing left delim"); | - |
2649 | return Tok_Char | ']'; | - |
2650 | case '^': | - |
2651 | return Tok_Caret; | - |
2652 | case '{': | - |
2653 | - | |
2654 | yyMinRep = getRep(0); | - |
2655 | yyMaxRep = yyMinRep; | - |
2656 | if (yyCh == ',') { | - |
2657 | yyCh = getChar(); | - |
2658 | yyMaxRep = getRep(InftyRep); | - |
2659 | } | - |
2660 | if (yyMaxRep < yyMinRep) | - |
2661 | error("invalid interval"); | - |
2662 | if (yyCh != '}') | - |
2663 | error("bad repetition syntax"); | - |
2664 | yyCh = getChar(); | - |
2665 | return Tok_Quantifier; | - |
2666 | - | |
2667 | - | |
2668 | - | |
2669 | - | |
2670 | case '|': | - |
2671 | return Tok_Bar; | - |
2672 | case '}': | - |
2673 | error("missing left delim"); | - |
2674 | return Tok_Char | '}'; | - |
2675 | default: | - |
2676 | return Tok_Char | prevCh; | - |
2677 | } | - |
2678 | } | - |
2679 | - | |
2680 | int QRegExpEngine::parse(const QChar *pattern, int len) | - |
2681 | { | - |
2682 | valid = true; | - |
2683 | startTokenizer(pattern, len); | - |
2684 | yyTok = getToken(); | - |
2685 | - | |
2686 | yyMayCapture = true; | - |
2687 | - | |
2688 | - | |
2689 | - | |
2690 | - | |
2691 | - | |
2692 | int atom = startAtom(false); | - |
2693 | - | |
2694 | QRegExpCharClass anything; | - |
2695 | Box box(this); | - |
2696 | box.set(anything); | - |
2697 | Box rightBox(this); | - |
2698 | rightBox.set(anything); | - |
2699 | - | |
2700 | Box middleBox(this); | - |
2701 | parseExpression(&middleBox); | - |
2702 | - | |
2703 | finishAtom(atom, false); | - |
2704 | - | |
2705 | - | |
2706 | middleBox.setupHeuristics(); | - |
2707 | - | |
2708 | box.cat(middleBox); | - |
2709 | box.cat(rightBox); | - |
2710 | yyCharClass.reset(0); | - |
2711 | - | |
2712 | - | |
2713 | for (int i = 0; i < nf; ++i) { | - |
2714 | switch (f[i].capture) { | - |
2715 | case QRegExpAtom::NoCapture: | - |
2716 | break; | - |
2717 | case QRegExpAtom::OfficialCapture: | - |
2718 | f[i].capture = ncap; | - |
2719 | captureForOfficialCapture.append(ncap); | - |
2720 | ++ncap; | - |
2721 | ++officialncap; | - |
2722 | break; | - |
2723 | case QRegExpAtom::UnofficialCapture: | - |
2724 | f[i].capture = greedyQuantifiers ? ncap++ : QRegExpAtom::NoCapture; | - |
2725 | } | - |
2726 | } | - |
2727 | - | |
2728 | - | |
2729 | - | |
2730 | if (officialncap == 0 && nbrefs == 0) { | - |
2731 | ncap = nf = 0; | - |
2732 | f.clear(); | - |
2733 | } | - |
2734 | - | |
2735 | - | |
2736 | - | |
2737 | for (int i = 0; i < nbrefs - officialncap; ++i) { | - |
2738 | captureForOfficialCapture.append(ncap); | - |
2739 | ++ncap; | - |
2740 | } | - |
2741 | - | |
2742 | - | |
2743 | - | |
2744 | if (!yyError.isEmpty()) | - |
2745 | return -1; | - |
2746 | - | |
2747 | - | |
2748 | const QRegExpAutomatonState &sinit = s.at(InitialState); | - |
2749 | caretAnchored = !sinit.anchors.isEmpty(); | - |
2750 | if (caretAnchored) { | - |
2751 | const QMap<int, int> &anchors = sinit.anchors; | - |
2752 | QMap<int, int>::const_iterator a; | - |
2753 | for (a = anchors.constBegin(); a != anchors.constEnd(); ++a) { | - |
2754 | if ( | - |
2755 | - | |
2756 | (*a & Anchor_Alternation) != 0 || | - |
2757 | - | |
2758 | (*a & Anchor_Caret) == 0) | - |
2759 | { | - |
2760 | caretAnchored = false; | - |
2761 | break; | - |
2762 | } | - |
2763 | } | - |
2764 | } | - |
2765 | - | |
2766 | - | |
2767 | - | |
2768 | int numStates = s.count(); | - |
2769 | for (int i = 0; i < numStates; ++i) { | - |
2770 | QRegExpAutomatonState &state = s[i]; | - |
2771 | if (!state.anchors.isEmpty()) { | - |
2772 | QMap<int, int>::iterator a = state.anchors.begin(); | - |
2773 | while (a != state.anchors.end()) { | - |
2774 | if (a.value() == 0) | - |
2775 | a = state.anchors.erase(a); | - |
2776 | else | - |
2777 | ++a; | - |
2778 | } | - |
2779 | } | - |
2780 | } | - |
2781 | - | |
2782 | return yyPos0; | - |
2783 | } | - |
2784 | - | |
2785 | void QRegExpEngine::parseAtom(Box *box) | - |
2786 | { | - |
2787 | - | |
2788 | QRegExpEngine *eng = 0; | - |
2789 | bool neg; | - |
2790 | int len; | - |
2791 | - | |
2792 | - | |
2793 | if ((yyTok & Tok_Char) != 0) { | - |
2794 | box->set(QChar(yyTok ^ Tok_Char)); | - |
2795 | } else { | - |
2796 | - | |
2797 | trivial = false; | - |
2798 | - | |
2799 | switch (yyTok) { | - |
2800 | case Tok_Dollar: | - |
2801 | box->catAnchor(Anchor_Dollar); | - |
2802 | break; | - |
2803 | case Tok_Caret: | - |
2804 | box->catAnchor(Anchor_Caret); | - |
2805 | break; | - |
2806 | - | |
2807 | case Tok_PosLookahead: | - |
2808 | case Tok_NegLookahead: | - |
2809 | neg = (yyTok == Tok_NegLookahead); | - |
2810 | eng = new QRegExpEngine(cs, greedyQuantifiers); | - |
2811 | len = eng->parse(yyIn + yyPos - 1, yyLen - yyPos + 1); | - |
2812 | if (len >= 0) | - |
2813 | skipChars(len); | - |
2814 | else | - |
2815 | error("bad lookahead syntax"); | - |
2816 | box->catAnchor(addLookahead(eng, neg)); | - |
2817 | yyTok = getToken(); | - |
2818 | if (yyTok != Tok_RightParen) | - |
2819 | error("bad lookahead syntax"); | - |
2820 | break; | - |
2821 | - | |
2822 | - | |
2823 | case Tok_Word: | - |
2824 | box->catAnchor(Anchor_Word); | - |
2825 | break; | - |
2826 | case Tok_NonWord: | - |
2827 | box->catAnchor(Anchor_NonWord); | - |
2828 | break; | - |
2829 | - | |
2830 | case Tok_LeftParen: | - |
2831 | case Tok_MagicLeftParen: | - |
2832 | yyTok = getToken(); | - |
2833 | parseExpression(box); | - |
2834 | if (yyTok != Tok_RightParen) | - |
2835 | error("unexpected end"); | - |
2836 | break; | - |
2837 | case Tok_CharClass: | - |
2838 | box->set(*yyCharClass); | - |
2839 | break; | - |
2840 | case Tok_Quantifier: | - |
2841 | error("bad repetition syntax"); | - |
2842 | break; | - |
2843 | default: | - |
2844 | - | |
2845 | if ((yyTok & Tok_BackRef) != 0) | - |
2846 | box->set(yyTok ^ Tok_BackRef); | - |
2847 | else | - |
2848 | - | |
2849 | error("disabled feature used"); | - |
2850 | } | - |
2851 | } | - |
2852 | yyTok = getToken(); | - |
2853 | } | - |
2854 | - | |
2855 | void QRegExpEngine::parseFactor(Box *box) | - |
2856 | { | - |
2857 | - | |
2858 | int outerAtom = greedyQuantifiers ? startAtom(false) : -1; | - |
2859 | int innerAtom = startAtom(yyMayCapture && yyTok == Tok_LeftParen); | - |
2860 | bool magicLeftParen = (yyTok == Tok_MagicLeftParen); | - |
2861 | const QChar *in = yyIn; | - |
2862 | int pos0 = yyPos0; | - |
2863 | int pos = yyPos; | - |
2864 | int len = yyLen; | - |
2865 | int ch = yyCh; | - |
2866 | QRegExpCharClass charClass; | - |
2867 | if (yyTok == Tok_CharClass) | - |
2868 | charClass = *yyCharClass; | - |
2869 | int tok = yyTok; | - |
2870 | bool mayCapture = yyMayCapture; | - |
2871 | - | |
2872 | - | |
2873 | parseAtom(box); | - |
2874 | - | |
2875 | finishAtom(innerAtom, magicLeftParen); | - |
2876 | - | |
2877 | - | |
2878 | bool hasQuantifier = (yyTok == Tok_Quantifier); | - |
2879 | if (hasQuantifier) { | - |
2880 | - | |
2881 | trivial = false; | - |
2882 | - | |
2883 | if (yyMaxRep == InftyRep) { | - |
2884 | box->plus(innerAtom); | - |
2885 | - | |
2886 | } else if (yyMaxRep == 0) { | - |
2887 | box->clear(); | - |
2888 | - | |
2889 | } | - |
2890 | if (yyMinRep == 0) | - |
2891 | box->opt(); | - |
2892 | - | |
2893 | - | |
2894 | yyMayCapture = false; | - |
2895 | int alpha = (yyMinRep == 0) ? 0 : yyMinRep - 1; | - |
2896 | int beta = (yyMaxRep == InftyRep) ? 0 : yyMaxRep - (alpha + 1); | - |
2897 | - | |
2898 | Box rightBox(this); | - |
2899 | int i; | - |
2900 | - | |
2901 | for (i = 0; i < beta; i++) { | - |
2902 | yyIn = in, yyPos0 = pos0, yyPos = pos, yyLen = len, yyCh = ch, *yyCharClass = charClass, yyMinRep = 0, yyMaxRep = 0, yyTok = tok; | - |
2903 | Box leftBox(this); | - |
2904 | parseAtom(&leftBox); | - |
2905 | leftBox.cat(rightBox); | - |
2906 | leftBox.opt(); | - |
2907 | rightBox = leftBox; | - |
2908 | } | - |
2909 | for (i = 0; i < alpha; i++) { | - |
2910 | yyIn = in, yyPos0 = pos0, yyPos = pos, yyLen = len, yyCh = ch, *yyCharClass = charClass, yyMinRep = 0, yyMaxRep = 0, yyTok = tok; | - |
2911 | Box leftBox(this); | - |
2912 | parseAtom(&leftBox); | - |
2913 | leftBox.cat(rightBox); | - |
2914 | rightBox = leftBox; | - |
2915 | } | - |
2916 | rightBox.cat(*box); | - |
2917 | *box = rightBox; | - |
2918 | - | |
2919 | yyTok = getToken(); | - |
2920 | - | |
2921 | yyMayCapture = mayCapture; | - |
2922 | - | |
2923 | } | - |
2924 | - | |
2925 | - | |
2926 | if (greedyQuantifiers) | - |
2927 | finishAtom(outerAtom, hasQuantifier); | - |
2928 | - | |
2929 | } | - |
2930 | - | |
2931 | void QRegExpEngine::parseTerm(Box *box) | - |
2932 | { | - |
2933 | - | |
2934 | if (yyTok != Tok_Eos && yyTok != Tok_RightParen && yyTok != Tok_Bar) | - |
2935 | parseFactor(box); | - |
2936 | - | |
2937 | while (yyTok != Tok_Eos && yyTok != Tok_RightParen && yyTok != Tok_Bar) { | - |
2938 | Box rightBox(this); | - |
2939 | parseFactor(&rightBox); | - |
2940 | box->cat(rightBox); | - |
2941 | } | - |
2942 | } | - |
2943 | - | |
2944 | void QRegExpEngine::parseExpression(Box *box) | - |
2945 | { | - |
2946 | parseTerm(box); | - |
2947 | while (yyTok == Tok_Bar) { | - |
2948 | - | |
2949 | trivial = false; | - |
2950 | - | |
2951 | Box rightBox(this); | - |
2952 | yyTok = getToken(); | - |
2953 | parseTerm(&rightBox); | - |
2954 | box->orx(rightBox); | - |
2955 | } | - |
2956 | } | - |
2957 | - | |
2958 | - | |
2959 | - | |
2960 | - | |
2961 | - | |
2962 | - | |
2963 | - | |
2964 | struct QRegExpPrivate | - |
2965 | { | - |
2966 | QRegExpEngine *eng; | - |
2967 | QRegExpEngineKey engineKey; | - |
2968 | bool minimal; | - |
2969 | - | |
2970 | QString t; | - |
2971 | QStringList capturedCache; | - |
2972 | - | |
2973 | QRegExpMatchState matchState; | - |
2974 | - | |
2975 | inline QRegExpPrivate() | - |
2976 | : eng(0), engineKey(QString(), QRegExp::RegExp, Qt::CaseSensitive), minimal(false) { } | - |
2977 | inline QRegExpPrivate(const QRegExpEngineKey &key) | - |
2978 | : eng(0), engineKey(key), minimal(false) {} | - |
2979 | }; | - |
2980 | - | |
2981 | - | |
2982 | typedef QCache<QRegExpEngineKey, QRegExpEngine> EngineCache; | - |
2983 | namespace { namespace Q_QGS_globalEngineCache { typedef EngineCache Type; QBasicAtomicInt guard = { QtGlobalStatic::Uninitialized }; __attribute__((visibility("hidden"))) inline Type *innerFunction() { struct HolderBase { ~HolderBase() noexcept { if (guard.load() == QtGlobalStatic::Initialized) guard.store(QtGlobalStatic::Destroyed); } }; static struct Holder : public HolderBase { Type value; Holder() noexcept(noexcept(Type ())) : value () { guard.store(QtGlobalStatic::Initialized); } } holder; return &holder.value; } } } static QGlobalStatic<EngineCache, Q_QGS_globalEngineCache::innerFunction, Q_QGS_globalEngineCache::guard> globalEngineCache; | - |
2984 | static QBasicMutex globalEngineCacheMutex; | - |
2985 | - | |
2986 | - | |
2987 | static void derefEngine(QRegExpEngine *eng, const QRegExpEngineKey &key) | - |
2988 | { | - |
2989 | if (!eng->ref.deref()) { | - |
2990 | - | |
2991 | if (globalEngineCache()) { | - |
2992 | QMutexLocker locker(&globalEngineCacheMutex); | - |
2993 | try { | - |
2994 | globalEngineCache()->insert(key, eng, 4 + key.pattern.length() / 4); | - |
2995 | } catch (const std::bad_alloc &) { | - |
2996 | - | |
2997 | delete eng; | - |
2998 | } | - |
2999 | } else { | - |
3000 | delete eng; | - |
3001 | } | - |
3002 | - | |
3003 | - | |
3004 | - | |
3005 | - | |
3006 | } | - |
3007 | } | - |
3008 | - | |
3009 | static void prepareEngine_helper(QRegExpPrivate *priv) | - |
3010 | { | - |
3011 | bool initMatchState = !priv->eng; | - |
3012 | - | |
3013 | if (!priv->eng && globalEngineCache()) { | - |
3014 | QMutexLocker locker(&globalEngineCacheMutex); | - |
3015 | priv->eng = globalEngineCache()->take(priv->engineKey); | - |
3016 | if (priv->eng != 0) | - |
3017 | priv->eng->ref.ref(); | - |
3018 | } | - |
3019 | - | |
3020 | - | |
3021 | if (!priv->eng) | - |
3022 | priv->eng = new QRegExpEngine(priv->engineKey); | - |
3023 | - | |
3024 | if (initMatchState) | - |
3025 | priv->matchState.prepareForMatch(priv->eng); | - |
3026 | } | - |
3027 | - | |
3028 | inline static void prepareEngine(QRegExpPrivate *priv) | - |
3029 | { | - |
3030 | if (priv->eng) | - |
3031 | return; | - |
3032 | prepareEngine_helper(priv); | - |
3033 | } | - |
3034 | - | |
3035 | static void prepareEngineForMatch(QRegExpPrivate *priv, const QString &str) | - |
3036 | { | - |
3037 | prepareEngine(priv); | - |
3038 | priv->matchState.prepareForMatch(priv->eng); | - |
3039 | - | |
3040 | priv->t = str; | - |
3041 | priv->capturedCache.clear(); | - |
3042 | - | |
3043 | - | |
3044 | - | |
3045 | } | - |
3046 | - | |
3047 | static void invalidateEngine(QRegExpPrivate *priv) | - |
3048 | { | - |
3049 | if (priv->eng != 0) { | - |
3050 | derefEngine(priv->eng, priv->engineKey); | - |
3051 | priv->eng = 0; | - |
3052 | priv->matchState.drain(); | - |
3053 | } | - |
3054 | } | - |
3055 | QRegExp::QRegExp() | - |
3056 | { | - |
3057 | priv = new QRegExpPrivate; | - |
3058 | prepareEngine(priv); | - |
3059 | } | - |
3060 | QRegExp::QRegExp(const QString &pattern, Qt::CaseSensitivity cs, PatternSyntax syntax) | - |
3061 | { | - |
3062 | priv = new QRegExpPrivate(QRegExpEngineKey(pattern, syntax, cs)); | - |
3063 | prepareEngine(priv); | - |
3064 | } | - |
3065 | - | |
3066 | - | |
3067 | - | |
3068 | - | |
3069 | - | |
3070 | - | |
3071 | QRegExp::QRegExp(const QRegExp &rx) | - |
3072 | { | - |
3073 | priv = new QRegExpPrivate; | - |
3074 | operator=(rx); | - |
3075 | } | - |
3076 | - | |
3077 | - | |
3078 | - | |
3079 | - | |
3080 | QRegExp::~QRegExp() | - |
3081 | { | - |
3082 | invalidateEngine(priv); | - |
3083 | delete priv; | - |
3084 | } | - |
3085 | - | |
3086 | - | |
3087 | - | |
3088 | - | |
3089 | - | |
3090 | - | |
3091 | QRegExp &QRegExp::operator=(const QRegExp &rx) | - |
3092 | { | - |
3093 | prepareEngine(rx.priv); | - |
3094 | QRegExpEngine *otherEng = rx.priv->eng; | - |
3095 | if (otherEng) | - |
3096 | otherEng->ref.ref(); | - |
3097 | invalidateEngine(priv); | - |
3098 | priv->eng = otherEng; | - |
3099 | priv->engineKey = rx.priv->engineKey; | - |
3100 | priv->minimal = rx.priv->minimal; | - |
3101 | - | |
3102 | priv->t = rx.priv->t; | - |
3103 | priv->capturedCache = rx.priv->capturedCache; | - |
3104 | - | |
3105 | if (priv->eng) | - |
3106 | priv->matchState.prepareForMatch(priv->eng); | - |
3107 | priv->matchState.captured = rx.priv->matchState.captured; | - |
3108 | return *this; | - |
3109 | } | - |
3110 | bool QRegExp::operator==(const QRegExp &rx) const | - |
3111 | { | - |
3112 | return priv->engineKey == rx.priv->engineKey && priv->minimal == rx.priv->minimal; | - |
3113 | } | - |
3114 | uint qHash(const QRegExp &key, uint seed) noexcept | - |
3115 | { | - |
3116 | QtPrivate::QHashCombine hash; | - |
3117 | seed = hash(seed, key.priv->engineKey); | - |
3118 | seed = hash(seed, key.priv->minimal); | - |
3119 | return executed 2048 times by 1 test: seed;return seed; Executed by:
executed 2048 times by 1 test: return seed; Executed by:
| 2048 |
3120 | } | - |
3121 | bool QRegExp::isEmpty() const | - |
3122 | { | - |
3123 | return priv->engineKey.pattern.isEmpty(); | - |
3124 | } | - |
3125 | bool QRegExp::isValid() const | - |
3126 | { | - |
3127 | if (priv->engineKey.pattern.isEmpty()) { | - |
3128 | return true; | - |
3129 | } else { | - |
3130 | prepareEngine(priv); | - |
3131 | return priv->eng->isValid(); | - |
3132 | } | - |
3133 | } | - |
3134 | QString QRegExp::pattern() const | - |
3135 | { | - |
3136 | return priv->engineKey.pattern; | - |
3137 | } | - |
3138 | - | |
3139 | - | |
3140 | - | |
3141 | - | |
3142 | - | |
3143 | - | |
3144 | - | |
3145 | void QRegExp::setPattern(const QString &pattern) | - |
3146 | { | - |
3147 | if (priv->engineKey.pattern != pattern) { | - |
3148 | invalidateEngine(priv); | - |
3149 | priv->engineKey.pattern = pattern; | - |
3150 | } | - |
3151 | } | - |
3152 | - | |
3153 | - | |
3154 | - | |
3155 | - | |
3156 | - | |
3157 | - | |
3158 | - | |
3159 | Qt::CaseSensitivity QRegExp::caseSensitivity() const | - |
3160 | { | - |
3161 | return priv->engineKey.cs; | - |
3162 | } | - |
3163 | void QRegExp::setCaseSensitivity(Qt::CaseSensitivity cs) | - |
3164 | { | - |
3165 | if ((bool)cs != (bool)priv->engineKey.cs) { | - |
3166 | invalidateEngine(priv); | - |
3167 | priv->engineKey.cs = cs; | - |
3168 | } | - |
3169 | } | - |
3170 | - | |
3171 | - | |
3172 | - | |
3173 | - | |
3174 | - | |
3175 | - | |
3176 | - | |
3177 | QRegExp::PatternSyntax QRegExp::patternSyntax() const | - |
3178 | { | - |
3179 | return priv->engineKey.patternSyntax; | - |
3180 | } | - |
3181 | void QRegExp::setPatternSyntax(PatternSyntax syntax) | - |
3182 | { | - |
3183 | if (syntax != priv->engineKey.patternSyntax) { | - |
3184 | invalidateEngine(priv); | - |
3185 | priv->engineKey.patternSyntax = syntax; | - |
3186 | } | - |
3187 | } | - |
3188 | - | |
3189 | - | |
3190 | - | |
3191 | - | |
3192 | - | |
3193 | - | |
3194 | - | |
3195 | bool QRegExp::isMinimal() const | - |
3196 | { | - |
3197 | return priv->minimal; | - |
3198 | } | - |
3199 | void QRegExp::setMinimal(bool minimal) | - |
3200 | { | - |
3201 | priv->minimal = minimal; | - |
3202 | } | - |
3203 | bool QRegExp::exactMatch(const QString &str) const | - |
3204 | { | - |
3205 | prepareEngineForMatch(priv, str); | - |
3206 | priv->matchState.match(str.unicode(), str.length(), 0, priv->minimal, true, 0); | - |
3207 | if (priv->matchState.captured[1] == str.length()) { | - |
3208 | return true; | - |
3209 | } else { | - |
3210 | priv->matchState.captured[0] = 0; | - |
3211 | priv->matchState.captured[1] = priv->matchState.oneTestMatchedLen; | - |
3212 | return false; | - |
3213 | } | - |
3214 | } | - |
3215 | int QRegExp::indexIn(const QString &str, int offset, CaretMode caretMode) const | - |
3216 | { | - |
3217 | prepareEngineForMatch(priv, str); | - |
3218 | if (offset < 0) | - |
3219 | offset += str.length(); | - |
3220 | priv->matchState.match(str.unicode(), str.length(), offset, | - |
3221 | priv->minimal, false, caretIndex(offset, caretMode)); | - |
3222 | return priv->matchState.captured[0]; | - |
3223 | } | - |
3224 | int QRegExp::lastIndexIn(const QString &str, int offset, CaretMode caretMode) const | - |
3225 | { | - |
3226 | prepareEngineForMatch(priv, str); | - |
3227 | if (offset < 0) | - |
3228 | offset += str.length(); | - |
3229 | if (offset < 0 || offset > str.length()) { | - |
3230 | memset(priv->matchState.captured, -1, priv->matchState.capturedSize*sizeof(int)); | - |
3231 | return -1; | - |
3232 | } | - |
3233 | - | |
3234 | while (offset >= 0) { | - |
3235 | priv->matchState.match(str.unicode(), str.length(), offset, | - |
3236 | priv->minimal, true, caretIndex(offset, caretMode)); | - |
3237 | if (priv->matchState.captured[0] == offset) | - |
3238 | return offset; | - |
3239 | --offset; | - |
3240 | } | - |
3241 | return -1; | - |
3242 | } | - |
3243 | - | |
3244 | - | |
3245 | - | |
3246 | - | |
3247 | - | |
3248 | - | |
3249 | - | |
3250 | int QRegExp::matchedLength() const | - |
3251 | { | - |
3252 | return priv->matchState.captured[1]; | - |
3253 | } | - |
3254 | - | |
3255 | - | |
3256 | - | |
3257 | - | |
3258 | - | |
3259 | - | |
3260 | - | |
3261 | int QRegExp::captureCount() const | - |
3262 | { | - |
3263 | prepareEngine(priv); | - |
3264 | return priv->eng->captureCount(); | - |
3265 | } | - |
3266 | QStringList QRegExp::capturedTexts() const | - |
3267 | { | - |
3268 | if (priv->capturedCache.isEmpty()) { | - |
3269 | prepareEngine(priv); | - |
3270 | const int *captured = priv->matchState.captured; | - |
3271 | int n = priv->matchState.capturedSize; | - |
3272 | - | |
3273 | for (int i = 0; i < n; i += 2) { | - |
3274 | QString m; | - |
3275 | if (captured[i + 1] == 0) | - |
3276 | m = QLatin1String(""); | - |
3277 | else if (captured[i] >= 0) | - |
3278 | m = priv->t.mid(captured[i], captured[i + 1]); | - |
3279 | priv->capturedCache.append(m); | - |
3280 | } | - |
3281 | priv->t.clear(); | - |
3282 | } | - |
3283 | return priv->capturedCache; | - |
3284 | } | - |
3285 | - | |
3286 | - | |
3287 | - | |
3288 | - | |
3289 | QStringList QRegExp::capturedTexts() | - |
3290 | { | - |
3291 | return const_cast<const QRegExp *>(this)->capturedTexts(); | - |
3292 | } | - |
3293 | QString QRegExp::cap(int nth) const | - |
3294 | { | - |
3295 | return capturedTexts().value(nth); | - |
3296 | } | - |
3297 | - | |
3298 | - | |
3299 | - | |
3300 | - | |
3301 | QString QRegExp::cap(int nth) | - |
3302 | { | - |
3303 | return const_cast<const QRegExp *>(this)->cap(nth); | - |
3304 | } | - |
3305 | int QRegExp::pos(int nth) const | - |
3306 | { | - |
3307 | if (nth < 0 || nth >= priv->matchState.capturedSize / 2) | - |
3308 | return -1; | - |
3309 | else | - |
3310 | return priv->matchState.captured[2 * nth]; | - |
3311 | } | - |
3312 | - | |
3313 | - | |
3314 | - | |
3315 | - | |
3316 | int QRegExp::pos(int nth) | - |
3317 | { | - |
3318 | return const_cast<const QRegExp *>(this)->pos(nth); | - |
3319 | } | - |
3320 | - | |
3321 | - | |
3322 | - | |
3323 | - | |
3324 | - | |
3325 | - | |
3326 | - | |
3327 | QString QRegExp::errorString() const | - |
3328 | { | - |
3329 | if (isValid()) { | - |
3330 | return QString::fromLatin1("no error occurred"); | - |
3331 | } else { | - |
3332 | return priv->eng->errorString(); | - |
3333 | } | - |
3334 | } | - |
3335 | - | |
3336 | - | |
3337 | - | |
3338 | - | |
3339 | QString QRegExp::errorString() | - |
3340 | { | - |
3341 | return const_cast<const QRegExp *>(this)->errorString(); | - |
3342 | } | - |
3343 | QString QRegExp::escape(const QString &str) | - |
3344 | { | - |
3345 | QString quoted; | - |
3346 | const int count = str.count(); | - |
3347 | quoted.reserve(count * 2); | - |
3348 | const QLatin1Char backslash('\\'); | - |
3349 | for (int i = 0; i < count; i++) { | - |
3350 | switch (str.at(i).toLatin1()) { | - |
3351 | case '$': | - |
3352 | case '(': | - |
3353 | case ')': | - |
3354 | case '*': | - |
3355 | case '+': | - |
3356 | case '.': | - |
3357 | case '?': | - |
3358 | case '[': | - |
3359 | case '\\': | - |
3360 | case ']': | - |
3361 | case '^': | - |
3362 | case '{': | - |
3363 | case '|': | - |
3364 | case '}': | - |
3365 | quoted.append(backslash); | - |
3366 | } | - |
3367 | quoted.append(str.at(i)); | - |
3368 | } | - |
3369 | return quoted; | - |
3370 | } | - |
3371 | QDataStream &operator<<(QDataStream &out, const QRegExp ®Exp) | - |
3372 | { | - |
3373 | return out << regExp.pattern() << (quint8)regExp.caseSensitivity() | - |
3374 | << (quint8)regExp.patternSyntax() | - |
3375 | << (quint8)!!regExp.isMinimal(); | - |
3376 | } | - |
3377 | QDataStream &operator>>(QDataStream &in, QRegExp ®Exp) | - |
3378 | { | - |
3379 | QString pattern; | - |
3380 | quint8 cs; | - |
3381 | quint8 patternSyntax; | - |
3382 | quint8 isMinimal; | - |
3383 | - | |
3384 | in >> pattern >> cs >> patternSyntax >> isMinimal; | - |
3385 | - | |
3386 | QRegExp newRegExp(pattern, Qt::CaseSensitivity(cs), | - |
3387 | QRegExp::PatternSyntax(patternSyntax)); | - |
3388 | - | |
3389 | newRegExp.setMinimal(isMinimal); | - |
3390 | regExp = newRegExp; | - |
3391 | return in; | - |
3392 | } | - |
3393 | - | |
3394 | - | |
3395 | - | |
3396 | QDebug operator<<(QDebug dbg, const QRegExp &r) | - |
3397 | { | - |
3398 | QDebugStateSaver saver(dbg); | - |
3399 | dbg.nospace() << "QRegExp(patternSyntax=" << r.patternSyntax() | - |
3400 | << ", pattern='"<< r.pattern() << "')"; | - |
3401 | return dbg; | - |
3402 | } | - |
3403 | - | |
3404 | - | |
3405 | - | |
Switch to Source code | Preprocessed file |