| Absolute File Name: | /home/qt/qt5_coco/qt5/qtbase/src/corelib/tools/qregexp.cpp |
| Source code | Switch to Preprocessed file |
| Line | Source | Count |
|---|---|---|
| 1 | /**************************************************************************** | - |
| 2 | ** | - |
| 3 | ** Copyright (C) 2016 The Qt Company Ltd. | - |
| 4 | ** Contact: https://www.qt.io/licensing/ | - |
| 5 | ** | - |
| 6 | ** This file is part of the QtCore module of the Qt Toolkit. | - |
| 7 | ** | - |
| 8 | ** $QT_BEGIN_LICENSE:LGPL$ | - |
| 9 | ** Commercial License Usage | - |
| 10 | ** Licensees holding valid commercial Qt licenses may use this file in | - |
| 11 | ** accordance with the commercial license agreement provided with the | - |
| 12 | ** Software or, alternatively, in accordance with the terms contained in | - |
| 13 | ** a written agreement between you and The Qt Company. For licensing terms | - |
| 14 | ** and conditions see https://www.qt.io/terms-conditions. For further | - |
| 15 | ** information use the contact form at https://www.qt.io/contact-us. | - |
| 16 | ** | - |
| 17 | ** GNU Lesser General Public License Usage | - |
| 18 | ** Alternatively, this file may be used under the terms of the GNU Lesser | - |
| 19 | ** General Public License version 3 as published by the Free Software | - |
| 20 | ** Foundation and appearing in the file LICENSE.LGPL3 included in the | - |
| 21 | ** packaging of this file. Please review the following information to | - |
| 22 | ** ensure the GNU Lesser General Public License version 3 requirements | - |
| 23 | ** will be met: https://www.gnu.org/licenses/lgpl-3.0.html. | - |
| 24 | ** | - |
| 25 | ** GNU General Public License Usage | - |
| 26 | ** Alternatively, this file may be used under the terms of the GNU | - |
| 27 | ** General Public License version 2.0 or (at your option) the GNU General | - |
| 28 | ** Public license version 3 or any later version approved by the KDE Free | - |
| 29 | ** Qt Foundation. The licenses are as published by the Free Software | - |
| 30 | ** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3 | - |
| 31 | ** included in the packaging of this file. Please review the following | - |
| 32 | ** information to ensure the GNU General Public License requirements will | - |
| 33 | ** be met: https://www.gnu.org/licenses/gpl-2.0.html and | - |
| 34 | ** https://www.gnu.org/licenses/gpl-3.0.html. | - |
| 35 | ** | - |
| 36 | ** $QT_END_LICENSE$ | - |
| 37 | ** | - |
| 38 | ****************************************************************************/ | - |
| 39 | - | |
| 40 | #include "qregexp.h" | - |
| 41 | - | |
| 42 | #include "qalgorithms.h" | - |
| 43 | #include "qbitarray.h" | - |
| 44 | #include "qcache.h" | - |
| 45 | #include "qdatastream.h" | - |
| 46 | #include "qdebug.h" | - |
| 47 | #include "qhashfunctions.h" | - |
| 48 | #include "qlist.h" | - |
| 49 | #include "qmap.h" | - |
| 50 | #include "qmutex.h" | - |
| 51 | #include "qstring.h" | - |
| 52 | #include "qstringlist.h" | - |
| 53 | #include "qstringmatcher.h" | - |
| 54 | #include "qvector.h" | - |
| 55 | - | |
| 56 | #include <limits.h> | - |
| 57 | #include <algorithm> | - |
| 58 | - | |
| 59 | QT_BEGIN_NAMESPACE | - |
| 60 | - | |
| 61 | int qFindString(const QChar *haystack, int haystackLen, int from, | - |
| 62 | const QChar *needle, int needleLen, Qt::CaseSensitivity cs); | - |
| 63 | - | |
| 64 | // error strings for the regexp parser | - |
| 65 | #define RXERR_OK QT_TRANSLATE_NOOP("QRegExp", "no error occurred") | - |
| 66 | #define RXERR_DISABLED QT_TRANSLATE_NOOP("QRegExp", "disabled feature used") | - |
| 67 | #define RXERR_CHARCLASS QT_TRANSLATE_NOOP("QRegExp", "bad char class syntax") | - |
| 68 | #define RXERR_LOOKAHEAD QT_TRANSLATE_NOOP("QRegExp", "bad lookahead syntax") | - |
| 69 | #define RXERR_LOOKBEHIND QT_TRANSLATE_NOOP("QRegExp", "lookbehinds not supported, see QTBUG-2371") | - |
| 70 | #define RXERR_REPETITION QT_TRANSLATE_NOOP("QRegExp", "bad repetition syntax") | - |
| 71 | #define RXERR_OCTAL QT_TRANSLATE_NOOP("QRegExp", "invalid octal value") | - |
| 72 | #define RXERR_LEFTDELIM QT_TRANSLATE_NOOP("QRegExp", "missing left delim") | - |
| 73 | #define RXERR_END QT_TRANSLATE_NOOP("QRegExp", "unexpected end") | - |
| 74 | #define RXERR_LIMIT QT_TRANSLATE_NOOP("QRegExp", "met internal limit") | - |
| 75 | #define RXERR_INTERVAL QT_TRANSLATE_NOOP("QRegExp", "invalid interval") | - |
| 76 | #define RXERR_CATEGORY QT_TRANSLATE_NOOP("QRegExp", "invalid category") | - |
| 77 | - | |
| 78 | /*! | - |
| 79 | \class QRegExp | - |
| 80 | \inmodule QtCore | - |
| 81 | \reentrant | - |
| 82 | \brief The QRegExp class provides pattern matching using regular expressions. | - |
| 83 | - | |
| 84 | \ingroup tools | - |
| 85 | \ingroup shared | - |
| 86 | - | |
| 87 | \keyword regular expression | - |
| 88 | - | |
| 89 | A regular expression, or "regexp", is a pattern for matching | - |
| 90 | substrings in a text. This is useful in many contexts, e.g., | - |
| 91 | - | |
| 92 | \table | - |
| 93 | \row \li Validation | - |
| 94 | \li A regexp can test whether a substring meets some criteria, | - |
| 95 | e.g. is an integer or contains no whitespace. | - |
| 96 | \row \li Searching | - |
| 97 | \li A regexp provides more powerful pattern matching than | - |
| 98 | simple substring matching, e.g., match one of the words | - |
| 99 | \e{mail}, \e{letter} or \e{correspondence}, but none of the | - |
| 100 | words \e{email}, \e{mailman}, \e{mailer}, \e{letterbox}, etc. | - |
| 101 | \row \li Search and Replace | - |
| 102 | \li A regexp can replace all occurrences of a substring with a | - |
| 103 | different substring, e.g., replace all occurrences of \e{&} | - |
| 104 | with \e{\&} except where the \e{&} is already followed by | - |
| 105 | an \e{amp;}. | - |
| 106 | \row \li String Splitting | - |
| 107 | \li A regexp can be used to identify where a string should be | - |
| 108 | split apart, e.g. splitting tab-delimited strings. | - |
| 109 | \endtable | - |
| 110 | - | |
| 111 | A brief introduction to regexps is presented, a description of | - |
| 112 | Qt's regexp language, some examples, and the function | - |
| 113 | documentation itself. QRegExp is modeled on Perl's regexp | - |
| 114 | language. It fully supports Unicode. QRegExp can also be used in a | - |
| 115 | simpler, \e{wildcard mode} that is similar to the functionality | - |
| 116 | found in command shells. The syntax rules used by QRegExp can be | - |
| 117 | changed with setPatternSyntax(). In particular, the pattern syntax | - |
| 118 | can be set to QRegExp::FixedString, which means the pattern to be | - |
| 119 | matched is interpreted as a plain string, i.e., special characters | - |
| 120 | (e.g., backslash) are not escaped. | - |
| 121 | - | |
| 122 | A good text on regexps is \e {Mastering Regular Expressions} | - |
| 123 | (Third Edition) by Jeffrey E. F. Friedl, ISBN 0-596-52812-4. | - |
| 124 | - | |
| 125 | \note In Qt 5, the new QRegularExpression class provides a Perl | - |
| 126 | compatible implementation of regular expressions and is recommended | - |
| 127 | in place of QRegExp. | - |
| 128 | - | |
| 129 | \tableofcontents | - |
| 130 | - | |
| 131 | \section1 Introduction | - |
| 132 | - | |
| 133 | Regexps are built up from expressions, quantifiers, and | - |
| 134 | assertions. The simplest expression is a character, e.g. \b{x} | - |
| 135 | or \b{5}. An expression can also be a set of characters | - |
| 136 | enclosed in square brackets. \b{[ABCD]} will match an \b{A} | - |
| 137 | or a \b{B} or a \b{C} or a \b{D}. We can write this same | - |
| 138 | expression as \b{[A-D]}, and an expression to match any | - |
| 139 | capital letter in the English alphabet is written as | - |
| 140 | \b{[A-Z]}. | - |
| 141 | - | |
| 142 | A quantifier specifies the number of occurrences of an expression | - |
| 143 | that must be matched. \b{x{1,1}} means match one and only one | - |
| 144 | \b{x}. \b{x{1,5}} means match a sequence of \b{x} | - |
| 145 | characters that contains at least one \b{x} but no more than | - |
| 146 | five. | - |
| 147 | - | |
| 148 | Note that in general regexps cannot be used to check for balanced | - |
| 149 | brackets or tags. For example, a regexp can be written to match an | - |
| 150 | opening html \c{<b>} and its closing \c{</b>}, if the \c{<b>} tags | - |
| 151 | are not nested, but if the \c{<b>} tags are nested, that same | - |
| 152 | regexp will match an opening \c{<b>} tag with the wrong closing | - |
| 153 | \c{</b>}. For the fragment \c{<b>bold <b>bolder</b></b>}, the | - |
| 154 | first \c{<b>} would be matched with the first \c{</b>}, which is | - |
| 155 | not correct. However, it is possible to write a regexp that will | - |
| 156 | match nested brackets or tags correctly, but only if the number of | - |
| 157 | nesting levels is fixed and known. If the number of nesting levels | - |
| 158 | is not fixed and known, it is impossible to write a regexp that | - |
| 159 | will not fail. | - |
| 160 | - | |
| 161 | Suppose we want a regexp to match integers in the range 0 to 99. | - |
| 162 | At least one digit is required, so we start with the expression | - |
| 163 | \b{[0-9]{1,1}}, which matches a single digit exactly once. This | - |
| 164 | regexp matches integers in the range 0 to 9. To match integers up | - |
| 165 | to 99, increase the maximum number of occurrences to 2, so the | - |
| 166 | regexp becomes \b{[0-9]{1,2}}. This regexp satisfies the | - |
| 167 | original requirement to match integers from 0 to 99, but it will | - |
| 168 | also match integers that occur in the middle of strings. If we | - |
| 169 | want the matched integer to be the whole string, we must use the | - |
| 170 | anchor assertions, \b{^} (caret) and \b{$} (dollar). When | - |
| 171 | \b{^} is the first character in a regexp, it means the regexp | - |
| 172 | must match from the beginning of the string. When \b{$} is the | - |
| 173 | last character of the regexp, it means the regexp must match to | - |
| 174 | the end of the string. The regexp becomes \b{^[0-9]{1,2}$}. | - |
| 175 | Note that assertions, e.g. \b{^} and \b{$}, do not match | - |
| 176 | characters but locations in the string. | - |
| 177 | - | |
| 178 | If you have seen regexps described elsewhere, they may have looked | - |
| 179 | different from the ones shown here. This is because some sets of | - |
| 180 | characters and some quantifiers are so common that they have been | - |
| 181 | given special symbols to represent them. \b{[0-9]} can be | - |
| 182 | replaced with the symbol \b{\\d}. The quantifier to match | - |
| 183 | exactly one occurrence, \b{{1,1}}, can be replaced with the | - |
| 184 | expression itself, i.e. \b{x{1,1}} is the same as \b{x}. So | - |
| 185 | our 0 to 99 matcher could be written as \b{^\\d{1,2}$}. It can | - |
| 186 | also be written \b{^\\d\\d{0,1}$}, i.e. \e{From the start of | - |
| 187 | the string, match a digit, followed immediately by 0 or 1 digits}. | - |
| 188 | In practice, it would be written as \b{^\\d\\d?$}. The \b{?} | - |
| 189 | is shorthand for the quantifier \b{{0,1}}, i.e. 0 or 1 | - |
| 190 | occurrences. \b{?} makes an expression optional. The regexp | - |
| 191 | \b{^\\d\\d?$} means \e{From the beginning of the string, match | - |
| 192 | one digit, followed immediately by 0 or 1 more digit, followed | - |
| 193 | immediately by end of string}. | - |
| 194 | - | |
| 195 | To write a regexp that matches one of the words 'mail' \e or | - |
| 196 | 'letter' \e or 'correspondence' but does not match words that | - |
| 197 | contain these words, e.g., 'email', 'mailman', 'mailer', and | - |
| 198 | 'letterbox', start with a regexp that matches 'mail'. Expressed | - |
| 199 | fully, the regexp is \b{m{1,1}a{1,1}i{1,1}l{1,1}}, but because | - |
| 200 | a character expression is automatically quantified by | - |
| 201 | \b{{1,1}}, we can simplify the regexp to \b{mail}, i.e., an | - |
| 202 | 'm' followed by an 'a' followed by an 'i' followed by an 'l'. Now | - |
| 203 | we can use the vertical bar \b{|}, which means \b{or}, to | - |
| 204 | include the other two words, so our regexp for matching any of the | - |
| 205 | three words becomes \b{mail|letter|correspondence}. Match | - |
| 206 | 'mail' \b{or} 'letter' \b{or} 'correspondence'. While this | - |
| 207 | regexp will match one of the three words we want to match, it will | - |
| 208 | also match words we don't want to match, e.g., 'email'. To | - |
| 209 | prevent the regexp from matching unwanted words, we must tell it | - |
| 210 | to begin and end the match at word boundaries. First we enclose | - |
| 211 | our regexp in parentheses, \b{(mail|letter|correspondence)}. | - |
| 212 | Parentheses group expressions together, and they identify a part | - |
| 213 | of the regexp that we wish to \l{capturing text}{capture}. | - |
| 214 | Enclosing the expression in parentheses allows us to use it as a | - |
| 215 | component in more complex regexps. It also allows us to examine | - |
| 216 | which of the three words was actually matched. To force the match | - |
| 217 | to begin and end on word boundaries, we enclose the regexp in | - |
| 218 | \b{\\b} \e{word boundary} assertions: | - |
| 219 | \b{\\b(mail|letter|correspondence)\\b}. Now the regexp means: | - |
| 220 | \e{Match a word boundary, followed by the regexp in parentheses, | - |
| 221 | followed by a word boundary}. The \b{\\b} assertion matches a | - |
| 222 | \e position in the regexp, not a \e character. A word boundary is | - |
| 223 | any non-word character, e.g., a space, newline, or the beginning | - |
| 224 | or ending of a string. | - |
| 225 | - | |
| 226 | If we want to replace ampersand characters with the HTML entity | - |
| 227 | \b{\&}, the regexp to match is simply \b{\&}. But this | - |
| 228 | regexp will also match ampersands that have already been converted | - |
| 229 | to HTML entities. We want to replace only ampersands that are not | - |
| 230 | already followed by \b{amp;}. For this, we need the negative | - |
| 231 | lookahead assertion, \b{(?!}__\b{)}. The regexp can then be | - |
| 232 | written as \b{\&(?!amp;)}, i.e. \e{Match an ampersand that is} | - |
| 233 | \b{not} \e{followed by} \b{amp;}. | - |
| 234 | - | |
| 235 | If we want to count all the occurrences of 'Eric' and 'Eirik' in a | - |
| 236 | string, two valid solutions are \b{\\b(Eric|Eirik)\\b} and | - |
| 237 | \b{\\bEi?ri[ck]\\b}. The word boundary assertion '\\b' is | - |
| 238 | required to avoid matching words that contain either name, | - |
| 239 | e.g. 'Ericsson'. Note that the second regexp matches more | - |
| 240 | spellings than we want: 'Eric', 'Erik', 'Eiric' and 'Eirik'. | - |
| 241 | - | |
| 242 | Some of the examples discussed above are implemented in the | - |
| 243 | \l{#code-examples}{code examples} section. | - |
| 244 | - | |
| 245 | \target characters-and-abbreviations-for-sets-of-characters | - |
| 246 | \section1 Characters and Abbreviations for Sets of Characters | - |
| 247 | - | |
| 248 | \table | - |
| 249 | \header \li Element \li Meaning | - |
| 250 | \row \li \b{c} | - |
| 251 | \li A character represents itself unless it has a special | - |
| 252 | regexp meaning. e.g. \b{c} matches the character \e c. | - |
| 253 | \row \li \b{\\c} | - |
| 254 | \li A character that follows a backslash matches the character | - |
| 255 | itself, except as specified below. e.g., To match a literal | - |
| 256 | caret at the beginning of a string, write \b{\\^}. | - |
| 257 | \row \li \b{\\a} | - |
| 258 | \li Matches the ASCII bell (BEL, 0x07). | - |
| 259 | \row \li \b{\\f} | - |
| 260 | \li Matches the ASCII form feed (FF, 0x0C). | - |
| 261 | \row \li \b{\\n} | - |
| 262 | \li Matches the ASCII line feed (LF, 0x0A, Unix newline). | - |
| 263 | \row \li \b{\\r} | - |
| 264 | \li Matches the ASCII carriage return (CR, 0x0D). | - |
| 265 | \row \li \b{\\t} | - |
| 266 | \li Matches the ASCII horizontal tab (HT, 0x09). | - |
| 267 | \row \li \b{\\v} | - |
| 268 | \li Matches the ASCII vertical tab (VT, 0x0B). | - |
| 269 | \row \li \b{\\x\e{hhhh}} | - |
| 270 | \li Matches the Unicode character corresponding to the | - |
| 271 | hexadecimal number \e{hhhh} (between 0x0000 and 0xFFFF). | - |
| 272 | \row \li \b{\\0\e{ooo}} (i.e., \\zero \e{ooo}) | - |
| 273 | \li matches the ASCII/Latin1 character for the octal number | - |
| 274 | \e{ooo} (between 0 and 0377). | - |
| 275 | \row \li \b{. (dot)} | - |
| 276 | \li Matches any character (including newline). | - |
| 277 | \row \li \b{\\d} | - |
| 278 | \li Matches a digit (QChar::isDigit()). | - |
| 279 | \row \li \b{\\D} | - |
| 280 | \li Matches a non-digit. | - |
| 281 | \row \li \b{\\s} | - |
| 282 | \li Matches a whitespace character (QChar::isSpace()). | - |
| 283 | \row \li \b{\\S} | - |
| 284 | \li Matches a non-whitespace character. | - |
| 285 | \row \li \b{\\w} | - |
| 286 | \li Matches a word character (QChar::isLetterOrNumber(), QChar::isMark(), or '_'). | - |
| 287 | \row \li \b{\\W} | - |
| 288 | \li Matches a non-word character. | - |
| 289 | \row \li \b{\\\e{n}} | - |
| 290 | \li The \e{n}-th backreference, e.g. \\1, \\2, etc. | - |
| 291 | \endtable | - |
| 292 | - | |
| 293 | \b{Note:} The C++ compiler transforms backslashes in strings. | - |
| 294 | To include a \b{\\} in a regexp, enter it twice, i.e. \c{\\}. | - |
| 295 | To match the backslash character itself, enter it four times, i.e. | - |
| 296 | \c{\\\\}. | - |
| 297 | - | |
| 298 | \target sets-of-characters | - |
| 299 | \section1 Sets of Characters | - |
| 300 | - | |
| 301 | Square brackets mean match any character contained in the square | - |
| 302 | brackets. The character set abbreviations described above can | - |
| 303 | appear in a character set in square brackets. Except for the | - |
| 304 | character set abbreviations and the following two exceptions, | - |
| 305 | characters do not have special meanings in square brackets. | - |
| 306 | - | |
| 307 | \table | - |
| 308 | \row \li \b{^} | - |
| 309 | - | |
| 310 | \li The caret negates the character set if it occurs as the | - |
| 311 | first character (i.e. immediately after the opening square | - |
| 312 | bracket). \b{[abc]} matches 'a' or 'b' or 'c', but | - |
| 313 | \b{[^abc]} matches anything \e but 'a' or 'b' or 'c'. | - |
| 314 | - | |
| 315 | \row \li \b{-} | - |
| 316 | - | |
| 317 | \li The dash indicates a range of characters. \b{[W-Z]} | - |
| 318 | matches 'W' or 'X' or 'Y' or 'Z'. | - |
| 319 | - | |
| 320 | \endtable | - |
| 321 | - | |
| 322 | Using the predefined character set abbreviations is more portable | - |
| 323 | than using character ranges across platforms and languages. For | - |
| 324 | example, \b{[0-9]} matches a digit in Western alphabets but | - |
| 325 | \b{\\d} matches a digit in \e any alphabet. | - |
| 326 | - | |
| 327 | Note: In other regexp documentation, sets of characters are often | - |
| 328 | called "character classes". | - |
| 329 | - | |
| 330 | \target quantifiers | - |
| 331 | \section1 Quantifiers | - |
| 332 | - | |
| 333 | By default, an expression is automatically quantified by | - |
| 334 | \b{{1,1}}, i.e. it should occur exactly once. In the following | - |
| 335 | list, \b{\e {E}} stands for expression. An expression is a | - |
| 336 | character, or an abbreviation for a set of characters, or a set of | - |
| 337 | characters in square brackets, or an expression in parentheses. | - |
| 338 | - | |
| 339 | \table | - |
| 340 | \row \li \b{\e {E}?} | - |
| 341 | - | |
| 342 | \li Matches zero or one occurrences of \e E. This quantifier | - |
| 343 | means \e{The previous expression is optional}, because it | - |
| 344 | will match whether or not the expression is found. \b{\e | - |
| 345 | {E}?} is the same as \b{\e {E}{0,1}}. e.g., \b{dents?} | - |
| 346 | matches 'dent' or 'dents'. | - |
| 347 | - | |
| 348 | \row \li \b{\e {E}+} | - |
| 349 | - | |
| 350 | \li Matches one or more occurrences of \e E. \b{\e {E}+} is | - |
| 351 | the same as \b{\e {E}{1,}}. e.g., \b{0+} matches '0', | - |
| 352 | '00', '000', etc. | - |
| 353 | - | |
| 354 | \row \li \b{\e {E}*} | - |
| 355 | - | |
| 356 | \li Matches zero or more occurrences of \e E. It is the same | - |
| 357 | as \b{\e {E}{0,}}. The \b{*} quantifier is often used | - |
| 358 | in error where \b{+} should be used. For example, if | - |
| 359 | \b{\\s*$} is used in an expression to match strings that | - |
| 360 | end in whitespace, it will match every string because | - |
| 361 | \b{\\s*$} means \e{Match zero or more whitespaces followed | - |
| 362 | by end of string}. The correct regexp to match strings that | - |
| 363 | have at least one trailing whitespace character is | - |
| 364 | \b{\\s+$}. | - |
| 365 | - | |
| 366 | \row \li \b{\e {E}{n}} | - |
| 367 | - | |
| 368 | \li Matches exactly \e n occurrences of \e E. \b{\e {E}{n}} | - |
| 369 | is the same as repeating \e E \e n times. For example, | - |
| 370 | \b{x{5}} is the same as \b{xxxxx}. It is also the same | - |
| 371 | as \b{\e {E}{n,n}}, e.g. \b{x{5,5}}. | - |
| 372 | - | |
| 373 | \row \li \b{\e {E}{n,}} | - |
| 374 | \li Matches at least \e n occurrences of \e E. | - |
| 375 | - | |
| 376 | \row \li \b{\e {E}{,m}} | - |
| 377 | \li Matches at most \e m occurrences of \e E. \b{\e {E}{,m}} | - |
| 378 | is the same as \b{\e {E}{0,m}}. | - |
| 379 | - | |
| 380 | \row \li \b{\e {E}{n,m}} | - |
| 381 | \li Matches at least \e n and at most \e m occurrences of \e E. | - |
| 382 | \endtable | - |
| 383 | - | |
| 384 | To apply a quantifier to more than just the preceding character, | - |
| 385 | use parentheses to group characters together in an expression. For | - |
| 386 | example, \b{tag+} matches a 't' followed by an 'a' followed by | - |
| 387 | at least one 'g', whereas \b{(tag)+} matches at least one | - |
| 388 | occurrence of 'tag'. | - |
| 389 | - | |
| 390 | Note: Quantifiers are normally "greedy". They always match as much | - |
| 391 | text as they can. For example, \b{0+} matches the first zero it | - |
| 392 | finds and all the consecutive zeros after the first zero. Applied | - |
| 393 | to '20005', it matches '2\underline{000}5'. Quantifiers can be made | - |
| 394 | non-greedy, see setMinimal(). | - |
| 395 | - | |
| 396 | \target capturing parentheses | - |
| 397 | \target backreferences | - |
| 398 | \section1 Capturing Text | - |
| 399 | - | |
| 400 | Parentheses allow us to group elements together so that we can | - |
| 401 | quantify and capture them. For example if we have the expression | - |
| 402 | \b{mail|letter|correspondence} that matches a string we know | - |
| 403 | that \e one of the words matched but not which one. Using | - |
| 404 | parentheses allows us to "capture" whatever is matched within | - |
| 405 | their bounds, so if we used \b{(mail|letter|correspondence)} | - |
| 406 | and matched this regexp against the string "I sent you some email" | - |
| 407 | we can use the cap() or capturedTexts() functions to extract the | - |
| 408 | matched characters, in this case 'mail'. | - |
| 409 | - | |
| 410 | We can use captured text within the regexp itself. To refer to the | - |
| 411 | captured text we use \e backreferences which are indexed from 1, | - |
| 412 | the same as for cap(). For example we could search for duplicate | - |
| 413 | words in a string using \b{\\b(\\w+)\\W+\\1\\b} which means match a | - |
| 414 | word boundary followed by one or more word characters followed by | - |
| 415 | one or more non-word characters followed by the same text as the | - |
| 416 | first parenthesized expression followed by a word boundary. | - |
| 417 | - | |
| 418 | If we want to use parentheses purely for grouping and not for | - |
| 419 | capturing we can use the non-capturing syntax, e.g. | - |
| 420 | \b{(?:green|blue)}. Non-capturing parentheses begin '(?:' and | - |
| 421 | end ')'. In this example we match either 'green' or 'blue' but we | - |
| 422 | do not capture the match so we only know whether or not we matched | - |
| 423 | but not which color we actually found. Using non-capturing | - |
| 424 | parentheses is more efficient than using capturing parentheses | - |
| 425 | since the regexp engine has to do less book-keeping. | - |
| 426 | - | |
| 427 | Both capturing and non-capturing parentheses may be nested. | - |
| 428 | - | |
| 429 | \target greedy quantifiers | - |
| 430 | - | |
| 431 | For historical reasons, quantifiers (e.g. \b{*}) that apply to | - |
| 432 | capturing parentheses are more "greedy" than other quantifiers. | - |
| 433 | For example, \b{a*(a*)} will match "aaa" with cap(1) == "aaa". | - |
| 434 | This behavior is different from what other regexp engines do | - |
| 435 | (notably, Perl). To obtain a more intuitive capturing behavior, | - |
| 436 | specify QRegExp::RegExp2 to the QRegExp constructor or call | - |
| 437 | setPatternSyntax(QRegExp::RegExp2). | - |
| 438 | - | |
| 439 | \target cap_in_a_loop | - |
| 440 | - | |
| 441 | When the number of matches cannot be determined in advance, a | - |
| 442 | common idiom is to use cap() in a loop. For example: | - |
| 443 | - | |
| 444 | \snippet code/src_corelib_tools_qregexp.cpp 0 | - |
| 445 | - | |
| 446 | \target assertions | - |
| 447 | \section1 Assertions | - |
| 448 | - | |
| 449 | Assertions make some statement about the text at the point where | - |
| 450 | they occur in the regexp but they do not match any characters. In | - |
| 451 | the following list \b{\e {E}} stands for any expression. | - |
| 452 | - | |
| 453 | \table | - |
| 454 | \row \li \b{^} | - |
| 455 | \li The caret signifies the beginning of the string. If you | - |
| 456 | wish to match a literal \c{^} you must escape it by | - |
| 457 | writing \c{\\^}. For example, \b{^#include} will only | - |
| 458 | match strings which \e begin with the characters '#include'. | - |
| 459 | (When the caret is the first character of a character set it | - |
| 460 | has a special meaning, see \l{#sets-of-characters}{Sets of Characters}.) | - |
| 461 | - | |
| 462 | \row \li \b{$} | - |
| 463 | \li The dollar signifies the end of the string. For example | - |
| 464 | \b{\\d\\s*$} will match strings which end with a digit | - |
| 465 | optionally followed by whitespace. If you wish to match a | - |
| 466 | literal \c{$} you must escape it by writing | - |
| 467 | \c{\\$}. | - |
| 468 | - | |
| 469 | \row \li \b{\\b} | - |
| 470 | \li A word boundary. For example the regexp | - |
| 471 | \b{\\bOK\\b} means match immediately after a word | - |
| 472 | boundary (e.g. start of string or whitespace) the letter 'O' | - |
| 473 | then the letter 'K' immediately before another word boundary | - |
| 474 | (e.g. end of string or whitespace). But note that the | - |
| 475 | assertion does not actually match any whitespace so if we | - |
| 476 | write \b{(\\bOK\\b)} and we have a match it will only | - |
| 477 | contain 'OK' even if the string is "It's \underline{OK} now". | - |
| 478 | - | |
| 479 | \row \li \b{\\B} | - |
| 480 | \li A non-word boundary. This assertion is true wherever | - |
| 481 | \b{\\b} is false. For example if we searched for | - |
| 482 | \b{\\Bon\\B} in "Left on" the match would fail (space | - |
| 483 | and end of string aren't non-word boundaries), but it would | - |
| 484 | match in "t\underline{on}ne". | - |
| 485 | - | |
| 486 | \row \li \b{(?=\e E)} | - |
| 487 | \li Positive lookahead. This assertion is true if the | - |
| 488 | expression matches at this point in the regexp. For example, | - |
| 489 | \b{const(?=\\s+char)} matches 'const' whenever it is | - |
| 490 | followed by 'char', as in 'static \underline{const} char *'. | - |
| 491 | (Compare with \b{const\\s+char}, which matches 'static | - |
| 492 | \underline{const char} *'.) | - |
| 493 | - | |
| 494 | \row \li \b{(?!\e E)} | - |
| 495 | \li Negative lookahead. This assertion is true if the | - |
| 496 | expression does not match at this point in the regexp. For | - |
| 497 | example, \b{const(?!\\s+char)} matches 'const' \e except | - |
| 498 | when it is followed by 'char'. | - |
| 499 | \endtable | - |
| 500 | - | |
| 501 | \target QRegExp wildcard matching | - |
| 502 | \section1 Wildcard Matching | - |
| 503 | - | |
| 504 | Most command shells such as \e bash or \e cmd.exe support "file | - |
| 505 | globbing", the ability to identify a group of files by using | - |
| 506 | wildcards. The setPatternSyntax() function is used to switch | - |
| 507 | between regexp and wildcard mode. Wildcard matching is much | - |
| 508 | simpler than full regexps and has only four features: | - |
| 509 | - | |
| 510 | \table | - |
| 511 | \row \li \b{c} | - |
| 512 | \li Any character represents itself apart from those mentioned | - |
| 513 | below. Thus \b{c} matches the character \e c. | - |
| 514 | \row \li \b{?} | - |
| 515 | \li Matches any single character. It is the same as | - |
| 516 | \b{.} in full regexps. | - |
| 517 | \row \li \b{*} | - |
| 518 | \li Matches zero or more of any characters. It is the | - |
| 519 | same as \b{.*} in full regexps. | - |
| 520 | \row \li \b{[...]} | - |
| 521 | \li Sets of characters can be represented in square brackets, | - |
| 522 | similar to full regexps. Within the character class, like | - |
| 523 | outside, backslash has no special meaning. | - |
| 524 | \endtable | - |
| 525 | - | |
| 526 | In the mode Wildcard, the wildcard characters cannot be | - |
| 527 | escaped. In the mode WildcardUnix, the character '\\' escapes the | - |
| 528 | wildcard. | - |
| 529 | - | |
| 530 | For example if we are in wildcard mode and have strings which | - |
| 531 | contain filenames we could identify HTML files with \b{*.html}. | - |
| 532 | This will match zero or more characters followed by a dot followed | - |
| 533 | by 'h', 't', 'm' and 'l'. | - |
| 534 | - | |
| 535 | To test a string against a wildcard expression, use exactMatch(). | - |
| 536 | For example: | - |
| 537 | - | |
| 538 | \snippet code/src_corelib_tools_qregexp.cpp 1 | - |
| 539 | - | |
| 540 | \target perl-users | - |
| 541 | \section1 Notes for Perl Users | - |
| 542 | - | |
| 543 | Most of the character class abbreviations supported by Perl are | - |
| 544 | supported by QRegExp, see \l{#characters-and-abbreviations-for-sets-of-characters} | - |
| 545 | {characters and abbreviations for sets of characters}. | - |
| 546 | - | |
| 547 | In QRegExp, apart from within character classes, \c{^} always | - |
| 548 | signifies the start of the string, so carets must always be | - |
| 549 | escaped unless used for that purpose. In Perl the meaning of caret | - |
| 550 | varies automagically depending on where it occurs so escaping it | - |
| 551 | is rarely necessary. The same applies to \c{$} which in | - |
| 552 | QRegExp always signifies the end of the string. | - |
| 553 | - | |
| 554 | QRegExp's quantifiers are the same as Perl's greedy quantifiers | - |
| 555 | (but see the \l{greedy quantifiers}{note above}). Non-greedy | - |
| 556 | matching cannot be applied to individual quantifiers, but can be | - |
| 557 | applied to all the quantifiers in the pattern. For example, to | - |
| 558 | match the Perl regexp \b{ro+?m} requires: | - |
| 559 | - | |
| 560 | \snippet code/src_corelib_tools_qregexp.cpp 2 | - |
| 561 | - | |
| 562 | The equivalent of Perl's \c{/i} option is | - |
| 563 | setCaseSensitivity(Qt::CaseInsensitive). | - |
| 564 | - | |
| 565 | Perl's \c{/g} option can be emulated using a \l{#cap_in_a_loop}{loop}. | - |
| 566 | - | |
| 567 | In QRegExp \b{.} matches any character, therefore all QRegExp | - |
| 568 | regexps have the equivalent of Perl's \c{/s} option. QRegExp | - |
| 569 | does not have an equivalent to Perl's \c{/m} option, but this | - |
| 570 | can be emulated in various ways for example by splitting the input | - |
| 571 | into lines or by looping with a regexp that searches for newlines. | - |
| 572 | - | |
| 573 | Because QRegExp is string oriented, there are no \\A, \\Z, or \\z | - |
| 574 | assertions. The \\G assertion is not supported but can be emulated | - |
| 575 | in a loop. | - |
| 576 | - | |
| 577 | Perl's $& is cap(0) or capturedTexts()[0]. There are no QRegExp | - |
| 578 | equivalents for $`, $' or $+. Perl's capturing variables, $1, $2, | - |
| 579 | ... correspond to cap(1) or capturedTexts()[1], cap(2) or | - |
| 580 | capturedTexts()[2], etc. | - |
| 581 | - | |
| 582 | To substitute a pattern use QString::replace(). | - |
| 583 | - | |
| 584 | Perl's extended \c{/x} syntax is not supported, nor are | - |
| 585 | directives, e.g. (?i), or regexp comments, e.g. (?#comment). On | - |
| 586 | the other hand, C++'s rules for literal strings can be used to | - |
| 587 | achieve the same: | - |
| 588 | - | |
| 589 | \snippet code/src_corelib_tools_qregexp.cpp 3 | - |
| 590 | - | |
| 591 | Both zero-width positive and zero-width negative lookahead | - |
| 592 | assertions (?=pattern) and (?!pattern) are supported with the same | - |
| 593 | syntax as Perl. Perl's lookbehind assertions, "independent" | - |
| 594 | subexpressions and conditional expressions are not supported. | - |
| 595 | - | |
| 596 | Non-capturing parentheses are also supported, with the same | - |
| 597 | (?:pattern) syntax. | - |
| 598 | - | |
| 599 | See QString::split() and QStringList::join() for equivalents | - |
| 600 | to Perl's split and join functions. | - |
| 601 | - | |
| 602 | Note: because C++ transforms \\'s they must be written \e twice in | - |
| 603 | code, e.g. \b{\\b} must be written \b{\\\\b}. | - |
| 604 | - | |
| 605 | \target code-examples | - |
| 606 | \section1 Code Examples | - |
| 607 | - | |
| 608 | \snippet code/src_corelib_tools_qregexp.cpp 4 | - |
| 609 | - | |
| 610 | The third string matches '\underline{6}'. This is a simple validation | - |
| 611 | regexp for integers in the range 0 to 99. | - |
| 612 | - | |
| 613 | \snippet code/src_corelib_tools_qregexp.cpp 5 | - |
| 614 | - | |
| 615 | The second string matches '\underline{This_is-OK}'. We've used the | - |
| 616 | character set abbreviation '\\S' (non-whitespace) and the anchors | - |
| 617 | to match strings which contain no whitespace. | - |
| 618 | - | |
| 619 | In the following example we match strings containing 'mail' or | - |
| 620 | 'letter' or 'correspondence' but only match whole words i.e. not | - |
| 621 | 'email' | - |
| 622 | - | |
| 623 | \snippet code/src_corelib_tools_qregexp.cpp 6 | - |
| 624 | - | |
| 625 | The second string matches "Please write the \underline{letter}". The | - |
| 626 | word 'letter' is also captured (because of the parentheses). We | - |
| 627 | can see what text we've captured like this: | - |
| 628 | - | |
| 629 | \snippet code/src_corelib_tools_qregexp.cpp 7 | - |
| 630 | - | |
| 631 | This will capture the text from the first set of capturing | - |
| 632 | parentheses (counting capturing left parentheses from left to | - |
| 633 | right). The parentheses are counted from 1 since cap(0) is the | - |
| 634 | whole matched regexp (equivalent to '&' in most regexp engines). | - |
| 635 | - | |
| 636 | \snippet code/src_corelib_tools_qregexp.cpp 8 | - |
| 637 | - | |
| 638 | Here we've passed the QRegExp to QString's replace() function to | - |
| 639 | replace the matched text with new text. | - |
| 640 | - | |
| 641 | \snippet code/src_corelib_tools_qregexp.cpp 9 | - |
| 642 | - | |
| 643 | We've used the indexIn() function to repeatedly match the regexp in | - |
| 644 | the string. Note that instead of moving forward by one character | - |
| 645 | at a time \c pos++ we could have written \c {pos += | - |
| 646 | rx.matchedLength()} to skip over the already matched string. The | - |
| 647 | count will equal 3, matching 'One \underline{Eric} another | - |
| 648 | \underline{Eirik}, and an Ericsson. How many Eiriks, \underline{Eric}?'; it | - |
| 649 | doesn't match 'Ericsson' or 'Eiriks' because they are not bounded | - |
| 650 | by non-word boundaries. | - |
| 651 | - | |
| 652 | One common use of regexps is to split lines of delimited data into | - |
| 653 | their component fields. | - |
| 654 | - | |
| 655 | \snippet code/src_corelib_tools_qregexp.cpp 10 | - |
| 656 | - | |
| 657 | In this example our input lines have the format company name, web | - |
| 658 | address and country. Unfortunately the regexp is rather long and | - |
| 659 | not very versatile -- the code will break if we add any more | - |
| 660 | fields. A simpler and better solution is to look for the | - |
| 661 | separator, '\\t' in this case, and take the surrounding text. The | - |
| 662 | QString::split() function can take a separator string or regexp | - |
| 663 | as an argument and split a string accordingly. | - |
| 664 | - | |
| 665 | \snippet code/src_corelib_tools_qregexp.cpp 11 | - |
| 666 | - | |
| 667 | Here field[0] is the company, field[1] the web address and so on. | - |
| 668 | - | |
| 669 | To imitate the matching of a shell we can use wildcard mode. | - |
| 670 | - | |
| 671 | \snippet code/src_corelib_tools_qregexp.cpp 12 | - |
| 672 | - | |
| 673 | Wildcard matching can be convenient because of its simplicity, but | - |
| 674 | any wildcard regexp can be defined using full regexps, e.g. | - |
| 675 | \b{.*\\.html$}. Notice that we can't match both \c .html and \c | - |
| 676 | .htm files with a wildcard unless we use \b{*.htm*} which will | - |
| 677 | also match 'test.html.bak'. A full regexp gives us the precision | - |
| 678 | we need, \b{.*\\.html?$}. | - |
| 679 | - | |
| 680 | QRegExp can match case insensitively using setCaseSensitivity(), | - |
| 681 | and can use non-greedy matching, see setMinimal(). By | - |
| 682 | default QRegExp uses full regexps but this can be changed with | - |
| 683 | setPatternSyntax(). Searching can be done forward with indexIn() or backward | - |
| 684 | with lastIndexIn(). Captured text can be accessed using | - |
| 685 | capturedTexts() which returns a string list of all captured | - |
| 686 | strings, or using cap() which returns the captured string for the | - |
| 687 | given index. The pos() function takes a match index and returns | - |
| 688 | the position in the string where the match was made (or -1 if | - |
| 689 | there was no match). | - |
| 690 | - | |
| 691 | \sa QString, QStringList, QRegExpValidator, QSortFilterProxyModel, | - |
| 692 | {tools/regexp}{Regular Expression Example} | - |
| 693 | */ | - |
| 694 | - | |
| 695 | #if defined(Q_OS_VXWORKS) && defined(EOS) | - |
| 696 | # undef EOS | - |
| 697 | #endif | - |
| 698 | - | |
| 699 | const int NumBadChars = 64; | - |
| 700 | #define BadChar(ch) ((ch).unicode() % NumBadChars) | - |
| 701 | - | |
| 702 | const int NoOccurrence = INT_MAX; | - |
| 703 | const int EmptyCapture = INT_MAX; | - |
| 704 | const int InftyLen = INT_MAX; | - |
| 705 | const int InftyRep = 1025; | - |
| 706 | const int EOS = -1; | - |
| 707 | - | |
| 708 | static bool isWord(QChar ch) | - |
| 709 | { | - |
| 710 | return ch.isLetterOrNumber() || ch.isMark() || ch == QLatin1Char('_'); | - |
| 711 | } | - |
| 712 | - | |
| 713 | /* | - |
| 714 | Merges two vectors of ints and puts the result into the first | - |
| 715 | one. | - |
| 716 | */ | - |
| 717 | static void mergeInto(QVector<int> *a, const QVector<int> &b) | - |
| 718 | { | - |
| 719 | int asize = a->size(); | - |
| 720 | int bsize = b.size(); | - |
| 721 | if (asize == 0) { | - |
| 722 | *a = b; | - |
| 723 | #ifndef QT_NO_REGEXP_OPTIM | - |
| 724 | } else if (bsize == 1 && a->at(asize - 1) < b.at(0)) { | - |
| 725 | a->resize(asize + 1); | - |
| 726 | (*a)[asize] = b.at(0); | - |
| 727 | #endif | - |
| 728 | } else if (bsize >= 1) { | - |
| 729 | int csize = asize + bsize; | - |
| 730 | QVector<int> c(csize); | - |
| 731 | int i = 0, j = 0, k = 0; | - |
| 732 | while (i < asize) { | - |
| 733 | if (j < bsize) { | - |
| 734 | if (a->at(i) == b.at(j)) { | - |
| 735 | ++i; | - |
| 736 | --csize; | - |
| 737 | } else if (a->at(i) < b.at(j)) { | - |
| 738 | c[k++] = a->at(i++); | - |
| 739 | } else { | - |
| 740 | c[k++] = b.at(j++); | - |
| 741 | } | - |
| 742 | } else { | - |
| 743 | memcpy(c.data() + k, a->constData() + i, (asize - i) * sizeof(int)); | - |
| 744 | break; | - |
| 745 | } | - |
| 746 | } | - |
| 747 | c.resize(csize); | - |
| 748 | if (j < bsize) | - |
| 749 | memcpy(c.data() + k, b.constData() + j, (bsize - j) * sizeof(int)); | - |
| 750 | *a = c; | - |
| 751 | } | - |
| 752 | } | - |
| 753 | - | |
| 754 | #ifndef QT_NO_REGEXP_WILDCARD | - |
| 755 | /* | - |
| 756 | Translates a wildcard pattern to an equivalent regular expression | - |
| 757 | pattern (e.g., *.cpp to .*\.cpp). | - |
| 758 | - | |
| 759 | If enableEscaping is true, it is possible to escape the wildcard | - |
| 760 | characters with \ | - |
| 761 | */ | - |
| 762 | static QString wc2rx(const QString &wc_str, const bool enableEscaping) | - |
| 763 | { | - |
| 764 | const int wclen = wc_str.length(); | - |
| 765 | QString rx; | - |
| 766 | int i = 0; | - |
| 767 | bool isEscaping = false; // the previous character is '\' | - |
| 768 | const QChar *wc = wc_str.unicode(); | - |
| 769 | - | |
| 770 | while (i < wclen) { | - |
| 771 | const QChar c = wc[i++]; | - |
| 772 | switch (c.unicode()) { | - |
| 773 | case '\\': | - |
| 774 | if (enableEscaping) { | - |
| 775 | if (isEscaping) { | - |
| 776 | rx += QLatin1String("\\\\"); | - |
| 777 | } // we insert the \\ later if necessary | - |
| 778 | if (i == wclen) { // the end | - |
| 779 | rx += QLatin1String("\\\\"); | - |
| 780 | } | - |
| 781 | } else { | - |
| 782 | rx += QLatin1String("\\\\"); | - |
| 783 | } | - |
| 784 | isEscaping = true; | - |
| 785 | break; | - |
| 786 | case '*': | - |
| 787 | if (isEscaping) { | - |
| 788 | rx += QLatin1String("\\*"); | - |
| 789 | isEscaping = false; | - |
| 790 | } else { | - |
| 791 | rx += QLatin1String(".*"); | - |
| 792 | } | - |
| 793 | break; | - |
| 794 | case '?': | - |
| 795 | if (isEscaping) { | - |
| 796 | rx += QLatin1String("\\?"); | - |
| 797 | isEscaping = false; | - |
| 798 | } else { | - |
| 799 | rx += QLatin1Char('.'); | - |
| 800 | } | - |
| 801 | - | |
| 802 | break; | - |
| 803 | case '$': | - |
| 804 | case '(': | - |
| 805 | case ')': | - |
| 806 | case '+': | - |
| 807 | case '.': | - |
| 808 | case '^': | - |
| 809 | case '{': | - |
| 810 | case '|': | - |
| 811 | case '}': | - |
| 812 | if (isEscaping) { | - |
| 813 | isEscaping = false; | - |
| 814 | rx += QLatin1String("\\\\"); | - |
| 815 | } | - |
| 816 | rx += QLatin1Char('\\'); | - |
| 817 | rx += c; | - |
| 818 | break; | - |
| 819 | case '[': | - |
| 820 | if (isEscaping) { | - |
| 821 | isEscaping = false; | - |
| 822 | rx += QLatin1String("\\["); | - |
| 823 | } else { | - |
| 824 | rx += c; | - |
| 825 | if (wc[i] == QLatin1Char('^')) | - |
| 826 | rx += wc[i++]; | - |
| 827 | if (i < wclen) { | - |
| 828 | if (rx[i] == QLatin1Char(']')) | - |
| 829 | rx += wc[i++]; | - |
| 830 | while (i < wclen && wc[i] != QLatin1Char(']')) { | - |
| 831 | if (wc[i] == QLatin1Char('\\')) | - |
| 832 | rx += QLatin1Char('\\'); | - |
| 833 | rx += wc[i++]; | - |
| 834 | } | - |
| 835 | } | - |
| 836 | } | - |
| 837 | break; | - |
| 838 | - | |
| 839 | case ']': | - |
| 840 | if(isEscaping){ | - |
| 841 | isEscaping = false; | - |
| 842 | rx += QLatin1String("\\"); | - |
| 843 | } | - |
| 844 | rx += c; | - |
| 845 | break; | - |
| 846 | - | |
| 847 | default: | - |
| 848 | if(isEscaping){ | - |
| 849 | isEscaping = false; | - |
| 850 | rx += QLatin1String("\\\\"); | - |
| 851 | } | - |
| 852 | rx += c; | - |
| 853 | } | - |
| 854 | } | - |
| 855 | return rx; | - |
| 856 | } | - |
| 857 | #endif | - |
| 858 | - | |
| 859 | static int caretIndex(int offset, QRegExp::CaretMode caretMode) | - |
| 860 | { | - |
| 861 | if (caretMode == QRegExp::CaretAtZero) { | - |
| 862 | return 0; | - |
| 863 | } else if (caretMode == QRegExp::CaretAtOffset) { | - |
| 864 | return offset; | - |
| 865 | } else { // QRegExp::CaretWontMatch | - |
| 866 | return -1; | - |
| 867 | } | - |
| 868 | } | - |
| 869 | - | |
| 870 | /* | - |
| 871 | The QRegExpEngineKey struct uniquely identifies an engine. | - |
| 872 | */ | - |
| 873 | struct QRegExpEngineKey | - |
| 874 | { | - |
| 875 | QString pattern; | - |
| 876 | QRegExp::PatternSyntax patternSyntax; | - |
| 877 | Qt::CaseSensitivity cs; | - |
| 878 | - | |
| 879 | inline QRegExpEngineKey(const QString &pattern, QRegExp::PatternSyntax patternSyntax, | - |
| 880 | Qt::CaseSensitivity cs) | - |
| 881 | : pattern(pattern), patternSyntax(patternSyntax), cs(cs) {} | - |
| 882 | - | |
| 883 | inline void clear() { | - |
| 884 | pattern.clear(); | - |
| 885 | patternSyntax = QRegExp::RegExp; | - |
| 886 | cs = Qt::CaseSensitive; | - |
| 887 | } | - |
| 888 | }; | - |
| 889 | - | |
| 890 | static bool operator==(const QRegExpEngineKey &key1, const QRegExpEngineKey &key2) | - |
| 891 | { | - |
| 892 | return key1.pattern == key2.pattern && key1.patternSyntax == key2.patternSyntax | - |
| 893 | && key1.cs == key2.cs; | - |
| 894 | } | - |
| 895 | - | |
| 896 | static uint qHash(const QRegExpEngineKey &key, uint seed = 0) Q_DECL_NOTHROW | - |
| 897 | { | - |
| 898 | QtPrivate::QHashCombine hash; | - |
| 899 | seed = hash(seed, key.pattern); | - |
| 900 | seed = hash(seed, key.patternSyntax); | - |
| 901 | seed = hash(seed, key.cs); | - |
| 902 | return seed; executed 630297 times by 167 tests: return seed;Executed by:
| 630297 |
| 903 | } | - |
| 904 | - | |
| 905 | class QRegExpEngine; | - |
| 906 | - | |
| 907 | //Q_DECLARE_TYPEINFO(QVector<int>, Q_MOVABLE_TYPE); | - |
| 908 | - | |
| 909 | /* | - |
| 910 | This is the engine state during matching. | - |
| 911 | */ | - |
| 912 | struct QRegExpMatchState | - |
| 913 | { | - |
| 914 | const QChar *in; // a pointer to the input string data | - |
| 915 | int pos; // the current position in the string | - |
| 916 | int caretPos; | - |
| 917 | int len; // the length of the input string | - |
| 918 | bool minimal; // minimal matching? | - |
| 919 | int *bigArray; // big array holding the data for the next pointers | - |
| 920 | int *inNextStack; // is state is nextStack? | - |
| 921 | int *curStack; // stack of current states | - |
| 922 | int *nextStack; // stack of next states | - |
| 923 | int *curCapBegin; // start of current states' captures | - |
| 924 | int *nextCapBegin; // start of next states' captures | - |
| 925 | int *curCapEnd; // end of current states' captures | - |
| 926 | int *nextCapEnd; // end of next states' captures | - |
| 927 | int *tempCapBegin; // start of temporary captures | - |
| 928 | int *tempCapEnd; // end of temporary captures | - |
| 929 | int *capBegin; // start of captures for a next state | - |
| 930 | int *capEnd; // end of captures for a next state | - |
| 931 | int *slideTab; // bump-along slide table for bad-character heuristic | - |
| 932 | int *captured; // what match() returned last | - |
| 933 | int slideTabSize; // size of slide table | - |
| 934 | int capturedSize; | - |
| 935 | #ifndef QT_NO_REGEXP_BACKREF | - |
| 936 | QList<QVector<int> > sleeping; // list of back-reference sleepers | - |
| 937 | #endif | - |
| 938 | int matchLen; // length of match | - |
| 939 | int oneTestMatchedLen; // length of partial match | - |
| 940 | - | |
| 941 | const QRegExpEngine *eng; | - |
| 942 | - | |
| 943 | inline QRegExpMatchState() : bigArray(0), captured(0) {} | - |
| 944 | inline ~QRegExpMatchState() { free(bigArray); } | - |
| 945 | - | |
| 946 | void drain() { free(bigArray); bigArray = 0; captured = 0; } // to save memory | - |
| 947 | void prepareForMatch(QRegExpEngine *eng); | - |
| 948 | void match(const QChar *str, int len, int pos, bool minimal, | - |
| 949 | bool oneTest, int caretIndex); | - |
| 950 | bool matchHere(); | - |
| 951 | bool testAnchor(int i, int a, const int *capBegin); | - |
| 952 | }; | - |
| 953 | - | |
| 954 | /* | - |
| 955 | The struct QRegExpAutomatonState represents one state in a modified NFA. The | - |
| 956 | input characters matched are stored in the state instead of on | - |
| 957 | the transitions, something possible for an automaton | - |
| 958 | constructed from a regular expression. | - |
| 959 | */ | - |
| 960 | struct QRegExpAutomatonState | - |
| 961 | { | - |
| 962 | #ifndef QT_NO_REGEXP_CAPTURE | - |
| 963 | int atom; // which atom does this state belong to? | - |
| 964 | #endif | - |
| 965 | int match; // what does it match? (see CharClassBit and BackRefBit) | - |
| 966 | QVector<int> outs; // out-transitions | - |
| 967 | QMap<int, int> reenter; // atoms reentered when transiting out | - |
| 968 | QMap<int, int> anchors; // anchors met when transiting out | - |
| 969 | - | |
| 970 | inline QRegExpAutomatonState() { } | - |
| 971 | #ifndef QT_NO_REGEXP_CAPTURE | - |
| 972 | inline QRegExpAutomatonState(int a, int m) | - |
| 973 | : atom(a), match(m) { } | - |
| 974 | #else | - |
| 975 | inline QRegExpAutomatonState(int m) | - |
| 976 | : match(m) { } | - |
| 977 | #endif | - |
| 978 | }; | - |
| 979 | - | |
| 980 | Q_DECLARE_TYPEINFO(QRegExpAutomatonState, Q_MOVABLE_TYPE); | - |
| 981 | - | |
| 982 | /* | - |
| 983 | The struct QRegExpCharClassRange represents a range of characters (e.g., | - |
| 984 | [0-9] denotes range 48 to 57). | - |
| 985 | */ | - |
| 986 | struct QRegExpCharClassRange | - |
| 987 | { | - |
| 988 | ushort from; // 48 | - |
| 989 | ushort len; // 10 | - |
| 990 | }; | - |
| 991 | - | |
| 992 | Q_DECLARE_TYPEINFO(QRegExpCharClassRange, Q_PRIMITIVE_TYPE); | - |
| 993 | - | |
| 994 | #ifndef QT_NO_REGEXP_CAPTURE | - |
| 995 | /* | - |
| 996 | The struct QRegExpAtom represents one node in the hierarchy of regular | - |
| 997 | expression atoms. | - |
| 998 | */ | - |
| 999 | struct QRegExpAtom | - |
| 1000 | { | - |
| 1001 | enum { NoCapture = -1, OfficialCapture = -2, UnofficialCapture = -3 }; | - |
| 1002 | - | |
| 1003 | int parent; // index of parent in array of atoms | - |
| 1004 | int capture; // index of capture, from 1 to ncap - 1 | - |
| 1005 | }; | - |
| 1006 | - | |
| 1007 | Q_DECLARE_TYPEINFO(QRegExpAtom, Q_PRIMITIVE_TYPE); | - |
| 1008 | #endif | - |
| 1009 | - | |
| 1010 | struct QRegExpLookahead; | - |
| 1011 | - | |
| 1012 | #ifndef QT_NO_REGEXP_ANCHOR_ALT | - |
| 1013 | /* | - |
| 1014 | The struct QRegExpAnchorAlternation represents a pair of anchors with | - |
| 1015 | OR semantics. | - |
| 1016 | */ | - |
| 1017 | struct QRegExpAnchorAlternation | - |
| 1018 | { | - |
| 1019 | int a; // this anchor... | - |
| 1020 | int b; // ...or this one | - |
| 1021 | }; | - |
| 1022 | - | |
| 1023 | Q_DECLARE_TYPEINFO(QRegExpAnchorAlternation, Q_PRIMITIVE_TYPE); | - |
| 1024 | #endif | - |
| 1025 | - | |
| 1026 | #ifndef QT_NO_REGEXP_CCLASS | - |
| 1027 | - | |
| 1028 | #define FLAG(x) (1 << (x)) | - |
| 1029 | /* | - |
| 1030 | The class QRegExpCharClass represents a set of characters, such as can | - |
| 1031 | be found in regular expressions (e.g., [a-z] denotes the set | - |
| 1032 | {a, b, ..., z}). | - |
| 1033 | */ | - |
| 1034 | class QRegExpCharClass | - |
| 1035 | { | - |
| 1036 | public: | - |
| 1037 | QRegExpCharClass(); | - |
| 1038 | - | |
| 1039 | void clear(); | - |
| 1040 | bool negative() const { return n; } | - |
| 1041 | void setNegative(bool negative); | - |
| 1042 | void addCategories(uint cats); | - |
| 1043 | void addRange(ushort from, ushort to); | - |
| 1044 | void addSingleton(ushort ch) { addRange(ch, ch); } | - |
| 1045 | - | |
| 1046 | bool in(QChar ch) const; | - |
| 1047 | #ifndef QT_NO_REGEXP_OPTIM | - |
| 1048 | const QVector<int> &firstOccurrence() const { return occ1; } | - |
| 1049 | #endif | - |
| 1050 | - | |
| 1051 | #if defined(QT_DEBUG) | - |
| 1052 | void dump() const; | - |
| 1053 | #endif | - |
| 1054 | - | |
| 1055 | private: | - |
| 1056 | QVector<QRegExpCharClassRange> r; // character ranges | - |
| 1057 | #ifndef QT_NO_REGEXP_OPTIM | - |
| 1058 | QVector<int> occ1; // first-occurrence array | - |
| 1059 | #endif | - |
| 1060 | uint c; // character classes | - |
| 1061 | bool n; // negative? | - |
| 1062 | }; | - |
| 1063 | #else | - |
| 1064 | struct QRegExpCharClass | - |
| 1065 | { | - |
| 1066 | int dummy; | - |
| 1067 | - | |
| 1068 | #ifndef QT_NO_REGEXP_OPTIM | - |
| 1069 | QRegExpCharClass() { occ1.fill(0, NumBadChars); } | - |
| 1070 | - | |
| 1071 | const QVector<int> &firstOccurrence() const { return occ1; } | - |
| 1072 | QVector<int> occ1; | - |
| 1073 | #endif | - |
| 1074 | }; | - |
| 1075 | #endif | - |
| 1076 | - | |
| 1077 | Q_DECLARE_TYPEINFO(QRegExpCharClass, Q_MOVABLE_TYPE); | - |
| 1078 | - | |
| 1079 | /* | - |
| 1080 | The QRegExpEngine class encapsulates a modified nondeterministic | - |
| 1081 | finite automaton (NFA). | - |
| 1082 | */ | - |
| 1083 | class QRegExpEngine | - |
| 1084 | { | - |
| 1085 | public: | - |
| 1086 | QRegExpEngine(Qt::CaseSensitivity cs, bool greedyQuantifiers) | - |
| 1087 | : cs(cs), greedyQuantifiers(greedyQuantifiers) { setup(); } | - |
| 1088 | - | |
| 1089 | QRegExpEngine(const QRegExpEngineKey &key); | - |
| 1090 | ~QRegExpEngine(); | - |
| 1091 | - | |
| 1092 | bool isValid() const { return valid; } | - |
| 1093 | const QString &errorString() const { return yyError; } | - |
| 1094 | int captureCount() const { return officialncap; } | - |
| 1095 | - | |
| 1096 | int createState(QChar ch); | - |
| 1097 | int createState(const QRegExpCharClass &cc); | - |
| 1098 | #ifndef QT_NO_REGEXP_BACKREF | - |
| 1099 | int createState(int bref); | - |
| 1100 | #endif | - |
| 1101 | - | |
| 1102 | void addCatTransitions(const QVector<int> &from, const QVector<int> &to); | - |
| 1103 | #ifndef QT_NO_REGEXP_CAPTURE | - |
| 1104 | void addPlusTransitions(const QVector<int> &from, const QVector<int> &to, int atom); | - |
| 1105 | #endif | - |
| 1106 | - | |
| 1107 | #ifndef QT_NO_REGEXP_ANCHOR_ALT | - |
| 1108 | int anchorAlternation(int a, int b); | - |
| 1109 | int anchorConcatenation(int a, int b); | - |
| 1110 | #else | - |
| 1111 | int anchorAlternation(int a, int b) { return a & b; } | - |
| 1112 | int anchorConcatenation(int a, int b) { return a | b; } | - |
| 1113 | #endif | - |
| 1114 | void addAnchors(int from, int to, int a); | - |
| 1115 | - | |
| 1116 | #ifndef QT_NO_REGEXP_OPTIM | - |
| 1117 | void heuristicallyChooseHeuristic(); | - |
| 1118 | #endif | - |
| 1119 | - | |
| 1120 | #if defined(QT_DEBUG) | - |
| 1121 | void dump() const; | - |
| 1122 | #endif | - |
| 1123 | - | |
| 1124 | QAtomicInt ref; | - |
| 1125 | - | |
| 1126 | private: | - |
| 1127 | enum { CharClassBit = 0x10000, BackRefBit = 0x20000 }; | - |
| 1128 | enum { InitialState = 0, FinalState = 1 }; | - |
| 1129 | - | |
| 1130 | void setup(); | - |
| 1131 | int setupState(int match); | - |
| 1132 | - | |
| 1133 | /* | - |
| 1134 | Let's hope that 13 lookaheads and 14 back-references are | - |
| 1135 | enough. | - |
| 1136 | */ | - |
| 1137 | enum { MaxLookaheads = 13, MaxBackRefs = 14 }; | - |
| 1138 | enum { Anchor_Dollar = 0x00000001, Anchor_Caret = 0x00000002, Anchor_Word = 0x00000004, | - |
| 1139 | Anchor_NonWord = 0x00000008, Anchor_FirstLookahead = 0x00000010, | - |
| 1140 | Anchor_BackRef1Empty = Anchor_FirstLookahead << MaxLookaheads, | - |
| 1141 | Anchor_BackRef0Empty = Anchor_BackRef1Empty >> 1, | - |
| 1142 | Anchor_Alternation = unsigned(Anchor_BackRef1Empty) << MaxBackRefs, | - |
| 1143 | - | |
| 1144 | Anchor_LookaheadMask = (Anchor_FirstLookahead - 1) ^ | - |
| 1145 | ((Anchor_FirstLookahead << MaxLookaheads) - 1) }; | - |
| 1146 | #ifndef QT_NO_REGEXP_CAPTURE | - |
| 1147 | int startAtom(bool officialCapture); | - |
| 1148 | void finishAtom(int atom, bool needCapture); | - |
| 1149 | #endif | - |
| 1150 | - | |
| 1151 | #ifndef QT_NO_REGEXP_LOOKAHEAD | - |
| 1152 | int addLookahead(QRegExpEngine *eng, bool negative); | - |
| 1153 | #endif | - |
| 1154 | - | |
| 1155 | #ifndef QT_NO_REGEXP_OPTIM | - |
| 1156 | bool goodStringMatch(QRegExpMatchState &matchState) const; | - |
| 1157 | bool badCharMatch(QRegExpMatchState &matchState) const; | - |
| 1158 | #else | - |
| 1159 | bool bruteMatch(QRegExpMatchState &matchState) const; | - |
| 1160 | #endif | - |
| 1161 | - | |
| 1162 | QVector<QRegExpAutomatonState> s; // array of states | - |
| 1163 | #ifndef QT_NO_REGEXP_CAPTURE | - |
| 1164 | QVector<QRegExpAtom> f; // atom hierarchy | - |
| 1165 | int nf; // number of atoms | - |
| 1166 | int cf; // current atom | - |
| 1167 | QVector<int> captureForOfficialCapture; | - |
| 1168 | #endif | - |
| 1169 | int officialncap; // number of captures, seen from the outside | - |
| 1170 | int ncap; // number of captures, seen from the inside | - |
| 1171 | #ifndef QT_NO_REGEXP_CCLASS | - |
| 1172 | QVector<QRegExpCharClass> cl; // array of character classes | - |
| 1173 | #endif | - |
| 1174 | #ifndef QT_NO_REGEXP_LOOKAHEAD | - |
| 1175 | QVector<QRegExpLookahead *> ahead; // array of lookaheads | - |
| 1176 | #endif | - |
| 1177 | #ifndef QT_NO_REGEXP_ANCHOR_ALT | - |
| 1178 | QVector<QRegExpAnchorAlternation> aa; // array of (a, b) pairs of anchors | - |
| 1179 | #endif | - |
| 1180 | #ifndef QT_NO_REGEXP_OPTIM | - |
| 1181 | bool caretAnchored; // does the regexp start with ^? | - |
| 1182 | bool trivial; // is the good-string all that needs to match? | - |
| 1183 | #endif | - |
| 1184 | bool valid; // is the regular expression valid? | - |
| 1185 | Qt::CaseSensitivity cs; // case sensitive? | - |
| 1186 | bool greedyQuantifiers; // RegExp2? | - |
| 1187 | bool xmlSchemaExtensions; | - |
| 1188 | #ifndef QT_NO_REGEXP_BACKREF | - |
| 1189 | int nbrefs; // number of back-references | - |
| 1190 | #endif | - |
| 1191 | - | |
| 1192 | #ifndef QT_NO_REGEXP_OPTIM | - |
| 1193 | bool useGoodStringHeuristic; // use goodStringMatch? otherwise badCharMatch | - |
| 1194 | - | |
| 1195 | int goodEarlyStart; // the index where goodStr can first occur in a match | - |
| 1196 | int goodLateStart; // the index where goodStr can last occur in a match | - |
| 1197 | QString goodStr; // the string that any match has to contain | - |
| 1198 | - | |
| 1199 | int minl; // the minimum length of a match | - |
| 1200 | QVector<int> occ1; // first-occurrence array | - |
| 1201 | #endif | - |
| 1202 | - | |
| 1203 | /* | - |
| 1204 | The class Box is an abstraction for a regular expression | - |
| 1205 | fragment. It can also be seen as one node in the syntax tree of | - |
| 1206 | a regular expression with synthetized attributes. | - |
| 1207 | - | |
| 1208 | Its interface is ugly for performance reasons. | - |
| 1209 | */ | - |
| 1210 | class Box | - |
| 1211 | { | - |
| 1212 | public: | - |
| 1213 | Box(QRegExpEngine *engine); | - |
| 1214 | Box(const Box &b) { operator=(b); } | - |
| 1215 | - | |
| 1216 | Box &operator=(const Box &b); | - |
| 1217 | - | |
| 1218 | void clear() { operator=(Box(eng)); } | - |
| 1219 | void set(QChar ch); | - |
| 1220 | void set(const QRegExpCharClass &cc); | - |
| 1221 | #ifndef QT_NO_REGEXP_BACKREF | - |
| 1222 | void set(int bref); | - |
| 1223 | #endif | - |
| 1224 | - | |
| 1225 | void cat(const Box &b); | - |
| 1226 | void orx(const Box &b); | - |
| 1227 | void plus(int atom); | - |
| 1228 | void opt(); | - |
| 1229 | void catAnchor(int a); | - |
| 1230 | #ifndef QT_NO_REGEXP_OPTIM | - |
| 1231 | void setupHeuristics(); | - |
| 1232 | #endif | - |
| 1233 | - | |
| 1234 | #if defined(QT_DEBUG) | - |
| 1235 | void dump() const; | - |
| 1236 | #endif | - |
| 1237 | - | |
| 1238 | private: | - |
| 1239 | void addAnchorsToEngine(const Box &to) const; | - |
| 1240 | - | |
| 1241 | QRegExpEngine *eng; // the automaton under construction | - |
| 1242 | QVector<int> ls; // the left states (firstpos) | - |
| 1243 | QVector<int> rs; // the right states (lastpos) | - |
| 1244 | QMap<int, int> lanchors; // the left anchors | - |
| 1245 | QMap<int, int> ranchors; // the right anchors | - |
| 1246 | int skipanchors; // the anchors to match if the box is skipped | - |
| 1247 | - | |
| 1248 | #ifndef QT_NO_REGEXP_OPTIM | - |
| 1249 | int earlyStart; // the index where str can first occur | - |
| 1250 | int lateStart; // the index where str can last occur | - |
| 1251 | QString str; // a string that has to occur in any match | - |
| 1252 | QString leftStr; // a string occurring at the left of this box | - |
| 1253 | QString rightStr; // a string occurring at the right of this box | - |
| 1254 | int maxl; // the maximum length of this box (possibly InftyLen) | - |
| 1255 | #endif | - |
| 1256 | - | |
| 1257 | int minl; // the minimum length of this box | - |
| 1258 | #ifndef QT_NO_REGEXP_OPTIM | - |
| 1259 | QVector<int> occ1; // first-occurrence array | - |
| 1260 | #endif | - |
| 1261 | }; | - |
| 1262 | - | |
| 1263 | friend class Box; | - |
| 1264 | - | |
| 1265 | /* | - |
| 1266 | This is the lexical analyzer for regular expressions. | - |
| 1267 | */ | - |
| 1268 | enum { Tok_Eos, Tok_Dollar, Tok_LeftParen, Tok_MagicLeftParen, Tok_PosLookahead, | - |
| 1269 | Tok_NegLookahead, Tok_RightParen, Tok_CharClass, Tok_Caret, Tok_Quantifier, Tok_Bar, | - |
| 1270 | Tok_Word, Tok_NonWord, Tok_Char = 0x10000, Tok_BackRef = 0x20000 }; | - |
| 1271 | int getChar(); | - |
| 1272 | int getEscape(); | - |
| 1273 | #ifndef QT_NO_REGEXP_INTERVAL | - |
| 1274 | int getRep(int def); | - |
| 1275 | #endif | - |
| 1276 | #ifndef QT_NO_REGEXP_LOOKAHEAD | - |
| 1277 | void skipChars(int n); | - |
| 1278 | #endif | - |
| 1279 | void error(const char *msg); | - |
| 1280 | void startTokenizer(const QChar *rx, int len); | - |
| 1281 | int getToken(); | - |
| 1282 | - | |
| 1283 | const QChar *yyIn; // a pointer to the input regular expression pattern | - |
| 1284 | int yyPos0; // the position of yyTok in the input pattern | - |
| 1285 | int yyPos; // the position of the next character to read | - |
| 1286 | int yyLen; // the length of yyIn | - |
| 1287 | int yyCh; // the last character read | - |
| 1288 | QScopedPointer<QRegExpCharClass> yyCharClass; // attribute for Tok_CharClass tokens | - |
| 1289 | int yyMinRep; // attribute for Tok_Quantifier | - |
| 1290 | int yyMaxRep; // ditto | - |
| 1291 | QString yyError; // syntax error or overflow during parsing? | - |
| 1292 | - | |
| 1293 | /* | - |
| 1294 | This is the syntactic analyzer for regular expressions. | - |
| 1295 | */ | - |
| 1296 | int parse(const QChar *rx, int len); | - |
| 1297 | void parseAtom(Box *box); | - |
| 1298 | void parseFactor(Box *box); | - |
| 1299 | void parseTerm(Box *box); | - |
| 1300 | void parseExpression(Box *box); | - |
| 1301 | - | |
| 1302 | int yyTok; // the last token read | - |
| 1303 | bool yyMayCapture; // set this to false to disable capturing | - |
| 1304 | - | |
| 1305 | friend struct QRegExpMatchState; | - |
| 1306 | }; | - |
| 1307 | - | |
| 1308 | #ifndef QT_NO_REGEXP_LOOKAHEAD | - |
| 1309 | /* | - |
| 1310 | The struct QRegExpLookahead represents a lookahead a la Perl (e.g., | - |
| 1311 | (?=foo) and (?!bar)). | - |
| 1312 | */ | - |
| 1313 | struct QRegExpLookahead | - |
| 1314 | { | - |
| 1315 | QRegExpEngine *eng; // NFA representing the embedded regular expression | - |
| 1316 | bool neg; // negative lookahead? | - |
| 1317 | - | |
| 1318 | inline QRegExpLookahead(QRegExpEngine *eng0, bool neg0) | - |
| 1319 | : eng(eng0), neg(neg0) { } | - |
| 1320 | inline ~QRegExpLookahead() { delete eng; } | - |
| 1321 | }; | - |
| 1322 | #endif | - |
| 1323 | - | |
| 1324 | /*! | - |
| 1325 | \internal | - |
| 1326 | convert the pattern string to the RegExp syntax. | - |
| 1327 | - | |
| 1328 | This is also used by QScriptEngine::newRegExp to convert to a pattern that JavaScriptCore can understan | - |
| 1329 | */ | - |
| 1330 | Q_CORE_EXPORT QString qt_regexp_toCanonical(const QString &pattern, QRegExp::PatternSyntax patternSyntax) | - |
| 1331 | { | - |
| 1332 | switch (patternSyntax) { | - |
| 1333 | #ifndef QT_NO_REGEXP_WILDCARD | - |
| 1334 | case QRegExp::Wildcard: | - |
| 1335 | return wc2rx(pattern, false); | - |
| 1336 | case QRegExp::WildcardUnix: | - |
| 1337 | return wc2rx(pattern, true); | - |
| 1338 | #endif | - |
| 1339 | case QRegExp::FixedString: | - |
| 1340 | return QRegExp::escape(pattern); | - |
| 1341 | case QRegExp::W3CXmlSchema11: | - |
| 1342 | default: | - |
| 1343 | return pattern; | - |
| 1344 | } | - |
| 1345 | } | - |
| 1346 | - | |
| 1347 | QRegExpEngine::QRegExpEngine(const QRegExpEngineKey &key) | - |
| 1348 | : cs(key.cs), greedyQuantifiers(key.patternSyntax == QRegExp::RegExp2), | - |
| 1349 | xmlSchemaExtensions(key.patternSyntax == QRegExp::W3CXmlSchema11) | - |
| 1350 | { | - |
| 1351 | setup(); | - |
| 1352 | - | |
| 1353 | QString rx = qt_regexp_toCanonical(key.pattern, key.patternSyntax); | - |
| 1354 | - | |
| 1355 | valid = (parse(rx.unicode(), rx.length()) == rx.length()); | - |
| 1356 | if (!valid) { | - |
| 1357 | #ifndef QT_NO_REGEXP_OPTIM | - |
| 1358 | trivial = false; | - |
| 1359 | #endif | - |
| 1360 | error(RXERR_LEFTDELIM); | - |
| 1361 | } | - |
| 1362 | } | - |
| 1363 | - | |
| 1364 | QRegExpEngine::~QRegExpEngine() | - |
| 1365 | { | - |
| 1366 | #ifndef QT_NO_REGEXP_LOOKAHEAD | - |
| 1367 | qDeleteAll(ahead); | - |
| 1368 | #endif | - |
| 1369 | } | - |
| 1370 | - | |
| 1371 | void QRegExpMatchState::prepareForMatch(QRegExpEngine *eng) | - |
| 1372 | { | - |
| 1373 | /* | - |
| 1374 | We use one QVector<int> for all the big data used a lot in | - |
| 1375 | matchHere() and friends. | - |
| 1376 | */ | - |
| 1377 | int ns = eng->s.size(); // number of states | - |
| 1378 | int ncap = eng->ncap; | - |
| 1379 | #ifndef QT_NO_REGEXP_OPTIM | - |
| 1380 | int newSlideTabSize = qMax(eng->minl + 1, 16); | - |
| 1381 | #else | - |
| 1382 | int newSlideTabSize = 0; | - |
| 1383 | #endif | - |
| 1384 | int numCaptures = eng->captureCount(); | - |
| 1385 | int newCapturedSize = 2 + 2 * numCaptures; | - |
| 1386 | bigArray = q_check_ptr((int *)realloc(bigArray, ((3 + 4 * ncap) * ns + 4 * ncap + newSlideTabSize + newCapturedSize)*sizeof(int))); | - |
| 1387 | - | |
| 1388 | // set all internal variables only _after_ bigArray is realloc'ed | - |
| 1389 | // to prevent a broken regexp in oom case | - |
| 1390 | - | |
| 1391 | slideTabSize = newSlideTabSize; | - |
| 1392 | capturedSize = newCapturedSize; | - |
| 1393 | inNextStack = bigArray; | - |
| 1394 | memset(inNextStack, -1, ns * sizeof(int)); | - |
| 1395 | curStack = inNextStack + ns; | - |
| 1396 | nextStack = inNextStack + 2 * ns; | - |
| 1397 | - | |
| 1398 | curCapBegin = inNextStack + 3 * ns; | - |
| 1399 | nextCapBegin = curCapBegin + ncap * ns; | - |
| 1400 | curCapEnd = curCapBegin + 2 * ncap * ns; | - |
| 1401 | nextCapEnd = curCapBegin + 3 * ncap * ns; | - |
| 1402 | - | |
| 1403 | tempCapBegin = curCapBegin + 4 * ncap * ns; | - |
| 1404 | tempCapEnd = tempCapBegin + ncap; | - |
| 1405 | capBegin = tempCapBegin + 2 * ncap; | - |
| 1406 | capEnd = tempCapBegin + 3 * ncap; | - |
| 1407 | - | |
| 1408 | slideTab = tempCapBegin + 4 * ncap; | - |
| 1409 | captured = slideTab + slideTabSize; | - |
| 1410 | memset(captured, -1, capturedSize*sizeof(int)); | - |
| 1411 | this->eng = eng; | - |
| 1412 | } | - |
| 1413 | - | |
| 1414 | /* | - |
| 1415 | Tries to match in str and returns an array of (begin, length) pairs | - |
| 1416 | for captured text. If there is no match, all pairs are (-1, -1). | - |
| 1417 | */ | - |
| 1418 | void QRegExpMatchState::match(const QChar *str0, int len0, int pos0, | - |
| 1419 | bool minimal0, bool oneTest, int caretIndex) | - |
| 1420 | { | - |
| 1421 | bool matched = false; | - |
| 1422 | QChar char_null; | - |
| 1423 | - | |
| 1424 | #ifndef QT_NO_REGEXP_OPTIM | - |
| 1425 | if (eng->trivial && !oneTest) { | - |
| 1426 | pos = qFindString(str0, len0, pos0, eng->goodStr.unicode(), eng->goodStr.length(), eng->cs); | - |
| 1427 | matchLen = eng->goodStr.length(); | - |
| 1428 | matched = (pos != -1); | - |
| 1429 | } else | - |
| 1430 | #endif | - |
| 1431 | { | - |
| 1432 | in = str0; | - |
| 1433 | if (in == 0) | - |
| 1434 | in = &char_null; | - |
| 1435 | pos = pos0; | - |
| 1436 | caretPos = caretIndex; | - |
| 1437 | len = len0; | - |
| 1438 | minimal = minimal0; | - |
| 1439 | matchLen = 0; | - |
| 1440 | oneTestMatchedLen = 0; | - |
| 1441 | - | |
| 1442 | if (eng->valid && pos >= 0 && pos <= len) { | - |
| 1443 | #ifndef QT_NO_REGEXP_OPTIM | - |
| 1444 | if (oneTest) { | - |
| 1445 | matched = matchHere(); | - |
| 1446 | } else { | - |
| 1447 | if (pos <= len - eng->minl) { | - |
| 1448 | if (eng->caretAnchored) { | - |
| 1449 | matched = matchHere(); | - |
| 1450 | } else if (eng->useGoodStringHeuristic) { | - |
| 1451 | matched = eng->goodStringMatch(*this); | - |
| 1452 | } else { | - |
| 1453 | matched = eng->badCharMatch(*this); | - |
| 1454 | } | - |
| 1455 | } | - |
| 1456 | } | - |
| 1457 | #else | - |
| 1458 | matched = oneTest ? matchHere() : eng->bruteMatch(*this); | - |
| 1459 | #endif | - |
| 1460 | } | - |
| 1461 | } | - |
| 1462 | - | |
| 1463 | if (matched) { | - |
| 1464 | int *c = captured; | - |
| 1465 | *c++ = pos; | - |
| 1466 | *c++ = matchLen; | - |
| 1467 | - | |
| 1468 | int numCaptures = (capturedSize - 2) >> 1; | - |
| 1469 | #ifndef QT_NO_REGEXP_CAPTURE | - |
| 1470 | for (int i = 0; i < numCaptures; ++i) { | - |
| 1471 | int j = eng->captureForOfficialCapture.at(i); | - |
| 1472 | if (capBegin[j] != EmptyCapture) { | - |
| 1473 | int len = capEnd[j] - capBegin[j]; | - |
| 1474 | *c++ = (len > 0) ? pos + capBegin[j] : 0; | - |
| 1475 | *c++ = len; | - |
| 1476 | } else { | - |
| 1477 | *c++ = -1; | - |
| 1478 | *c++ = -1; | - |
| 1479 | } | - |
| 1480 | } | - |
| 1481 | #endif | - |
| 1482 | } else { | - |
| 1483 | // we rely on 2's complement here | - |
| 1484 | memset(captured, -1, capturedSize * sizeof(int)); | - |
| 1485 | } | - |
| 1486 | } | - |
| 1487 | - | |
| 1488 | /* | - |
| 1489 | The three following functions add one state to the automaton and | - |
| 1490 | return the number of the state. | - |
| 1491 | */ | - |
| 1492 | - | |
| 1493 | int QRegExpEngine::createState(QChar ch) | - |
| 1494 | { | - |
| 1495 | return setupState(ch.unicode()); | - |
| 1496 | } | - |
| 1497 | - | |
| 1498 | int QRegExpEngine::createState(const QRegExpCharClass &cc) | - |
| 1499 | { | - |
| 1500 | #ifndef QT_NO_REGEXP_CCLASS | - |
| 1501 | int n = cl.size(); | - |
| 1502 | cl += QRegExpCharClass(cc); | - |
| 1503 | return setupState(CharClassBit | n); | - |
| 1504 | #else | - |
| 1505 | Q_UNUSED(cc); | - |
| 1506 | return setupState(CharClassBit); | - |
| 1507 | #endif | - |
| 1508 | } | - |
| 1509 | - | |
| 1510 | #ifndef QT_NO_REGEXP_BACKREF | - |
| 1511 | int QRegExpEngine::createState(int bref) | - |
| 1512 | { | - |
| 1513 | if (bref > nbrefs) { | - |
| 1514 | nbrefs = bref; | - |
| 1515 | if (nbrefs > MaxBackRefs) { | - |
| 1516 | error(RXERR_LIMIT); | - |
| 1517 | return 0; | - |
| 1518 | } | - |
| 1519 | } | - |
| 1520 | return setupState(BackRefBit | bref); | - |
| 1521 | } | - |
| 1522 | #endif | - |
| 1523 | - | |
| 1524 | /* | - |
| 1525 | The two following functions add a transition between all pairs of | - |
| 1526 | states (i, j) where i is found in from, and j is found in to. | - |
| 1527 | - | |
| 1528 | Cat-transitions are distinguished from plus-transitions for | - |
| 1529 | capturing. | - |
| 1530 | */ | - |
| 1531 | - | |
| 1532 | void QRegExpEngine::addCatTransitions(const QVector<int> &from, const QVector<int> &to) | - |
| 1533 | { | - |
| 1534 | for (int i = 0; i < from.size(); i++) | - |
| 1535 | mergeInto(&s[from.at(i)].outs, to); | - |
| 1536 | } | - |
| 1537 | - | |
| 1538 | #ifndef QT_NO_REGEXP_CAPTURE | - |
| 1539 | void QRegExpEngine::addPlusTransitions(const QVector<int> &from, const QVector<int> &to, int atom) | - |
| 1540 | { | - |
| 1541 | for (int i = 0; i < from.size(); i++) { | - |
| 1542 | QRegExpAutomatonState &st = s[from.at(i)]; | - |
| 1543 | const QVector<int> oldOuts = st.outs; | - |
| 1544 | mergeInto(&st.outs, to); | - |
| 1545 | if (f.at(atom).capture != QRegExpAtom::NoCapture) { | - |
| 1546 | for (int j = 0; j < to.size(); j++) { | - |
| 1547 | // ### st.reenter.contains(to.at(j)) check looks suspicious | - |
| 1548 | if (!st.reenter.contains(to.at(j)) && | - |
| 1549 | !std::binary_search(oldOuts.constBegin(), oldOuts.constEnd(), to.at(j))) | - |
| 1550 | st.reenter.insert(to.at(j), atom); | - |
| 1551 | } | - |
| 1552 | } | - |
| 1553 | } | - |
| 1554 | } | - |
| 1555 | #endif | - |
| 1556 | - | |
| 1557 | #ifndef QT_NO_REGEXP_ANCHOR_ALT | - |
| 1558 | /* | - |
| 1559 | Returns an anchor that means a OR b. | - |
| 1560 | */ | - |
| 1561 | int QRegExpEngine::anchorAlternation(int a, int b) | - |
| 1562 | { | - |
| 1563 | if (((a & b) == a || (a & b) == b) && ((a | b) & Anchor_Alternation) == 0) | - |
| 1564 | return a & b; | - |
| 1565 | - | |
| 1566 | int n = aa.size(); | - |
| 1567 | #ifndef QT_NO_REGEXP_OPTIM | - |
| 1568 | if (n > 0 && aa.at(n - 1).a == a && aa.at(n - 1).b == b) | - |
| 1569 | return Anchor_Alternation | (n - 1); | - |
| 1570 | #endif | - |
| 1571 | - | |
| 1572 | QRegExpAnchorAlternation element = {a, b}; | - |
| 1573 | aa.append(element); | - |
| 1574 | return Anchor_Alternation | n; | - |
| 1575 | } | - |
| 1576 | - | |
| 1577 | /* | - |
| 1578 | Returns an anchor that means a AND b. | - |
| 1579 | */ | - |
| 1580 | int QRegExpEngine::anchorConcatenation(int a, int b) | - |
| 1581 | { | - |
| 1582 | if (((a | b) & Anchor_Alternation) == 0) | - |
| 1583 | return a | b; | - |
| 1584 | if ((b & Anchor_Alternation) != 0) | - |
| 1585 | qSwap(a, b); | - |
| 1586 | - | |
| 1587 | int aprime = anchorConcatenation(aa.at(a ^ Anchor_Alternation).a, b); | - |
| 1588 | int bprime = anchorConcatenation(aa.at(a ^ Anchor_Alternation).b, b); | - |
| 1589 | return anchorAlternation(aprime, bprime); | - |
| 1590 | } | - |
| 1591 | #endif | - |
| 1592 | - | |
| 1593 | /* | - |
| 1594 | Adds anchor a on a transition caracterised by its from state and | - |
| 1595 | its to state. | - |
| 1596 | */ | - |
| 1597 | void QRegExpEngine::addAnchors(int from, int to, int a) | - |
| 1598 | { | - |
| 1599 | QRegExpAutomatonState &st = s[from]; | - |
| 1600 | if (st.anchors.contains(to)) | - |
| 1601 | a = anchorAlternation(st.anchors.value(to), a); | - |
| 1602 | st.anchors.insert(to, a); | - |
| 1603 | } | - |
| 1604 | - | |
| 1605 | #ifndef QT_NO_REGEXP_OPTIM | - |
| 1606 | /* | - |
| 1607 | This function chooses between the good-string and the bad-character | - |
| 1608 | heuristics. It computes two scores and chooses the heuristic with | - |
| 1609 | the highest score. | - |
| 1610 | - | |
| 1611 | Here are some common-sense constraints on the scores that should be | - |
| 1612 | respected if the formulas are ever modified: (1) If goodStr is | - |
| 1613 | empty, the good-string heuristic scores 0. (2) If the regular | - |
| 1614 | expression is trivial, the good-string heuristic should be used. | - |
| 1615 | (3) If the search is case insensitive, the good-string heuristic | - |
| 1616 | should be used, unless it scores 0. (Case insensitivity turns all | - |
| 1617 | entries of occ1 to 0.) (4) If (goodLateStart - goodEarlyStart) is | - |
| 1618 | big, the good-string heuristic should score less. | - |
| 1619 | */ | - |
| 1620 | void QRegExpEngine::heuristicallyChooseHeuristic() | - |
| 1621 | { | - |
| 1622 | if (minl == 0) { | - |
| 1623 | useGoodStringHeuristic = false; | - |
| 1624 | } else if (trivial) { | - |
| 1625 | useGoodStringHeuristic = true; | - |
| 1626 | } else { | - |
| 1627 | /* | - |
| 1628 | Magic formula: The good string has to constitute a good | - |
| 1629 | proportion of the minimum-length string, and appear at a | - |
| 1630 | more-or-less known index. | - |
| 1631 | */ | - |
| 1632 | int goodStringScore = (64 * goodStr.length() / minl) - | - |
| 1633 | (goodLateStart - goodEarlyStart); | - |
| 1634 | /* | - |
| 1635 | Less magic formula: We pick some characters at random, and | - |
| 1636 | check whether they are good or bad. | - |
| 1637 | */ | - |
| 1638 | int badCharScore = 0; | - |
| 1639 | int step = qMax(1, NumBadChars / 32); | - |
| 1640 | for (int i = 1; i < NumBadChars; i += step) { | - |
| 1641 | if (occ1.at(i) == NoOccurrence) | - |
| 1642 | badCharScore += minl; | - |
| 1643 | else | - |
| 1644 | badCharScore += occ1.at(i); | - |
| 1645 | } | - |
| 1646 | badCharScore /= minl; | - |
| 1647 | useGoodStringHeuristic = (goodStringScore > badCharScore); | - |
| 1648 | } | - |
| 1649 | } | - |
| 1650 | #endif | - |
| 1651 | - | |
| 1652 | #if defined(QT_DEBUG) | - |
| 1653 | void QRegExpEngine::dump() const | - |
| 1654 | { | - |
| 1655 | int i, j; | - |
| 1656 | qDebug("Case %ssensitive engine", cs ? "" : "in"); | - |
| 1657 | qDebug(" States"); | - |
| 1658 | for (i = 0; i < s.size(); i++) { | - |
| 1659 | qDebug(" %d%s", i, i == InitialState ? " (initial)" : i == FinalState ? " (final)" : ""); | - |
| 1660 | #ifndef QT_NO_REGEXP_CAPTURE | - |
| 1661 | if (nf > 0) | - |
| 1662 | qDebug(" in atom %d", s[i].atom); | - |
| 1663 | #endif | - |
| 1664 | int m = s[i].match; | - |
| 1665 | if ((m & CharClassBit) != 0) { | - |
| 1666 | qDebug(" match character class %d", m ^ CharClassBit); | - |
| 1667 | #ifndef QT_NO_REGEXP_CCLASS | - |
| 1668 | cl[m ^ CharClassBit].dump(); | - |
| 1669 | #else | - |
| 1670 | qDebug(" negative character class"); | - |
| 1671 | #endif | - |
| 1672 | } else if ((m & BackRefBit) != 0) { | - |
| 1673 | qDebug(" match back-reference %d", m ^ BackRefBit); | - |
| 1674 | } else if (m >= 0x20 && m <= 0x7e) { | - |
| 1675 | qDebug(" match 0x%.4x (%c)", m, m); | - |
| 1676 | } else { | - |
| 1677 | qDebug(" match 0x%.4x", m); | - |
| 1678 | } | - |
| 1679 | for (j = 0; j < s[i].outs.size(); j++) { | - |
| 1680 | int next = s[i].outs[j]; | - |
| 1681 | qDebug(" -> %d", next); | - |
| 1682 | if (s[i].reenter.contains(next)) | - |
| 1683 | qDebug(" [reenter %d]", s[i].reenter[next]); | - |
| 1684 | if (s[i].anchors.value(next) != 0) | - |
| 1685 | qDebug(" [anchors 0x%.8x]", s[i].anchors[next]); | - |
| 1686 | } | - |
| 1687 | } | - |
| 1688 | #ifndef QT_NO_REGEXP_CAPTURE | - |
| 1689 | if (nf > 0) { | - |
| 1690 | qDebug(" Atom Parent Capture"); | - |
| 1691 | for (i = 0; i < nf; i++) { | - |
| 1692 | if (f[i].capture == QRegExpAtom::NoCapture) { | - |
| 1693 | qDebug(" %6d %6d nil", i, f[i].parent); | - |
| 1694 | } else { | - |
| 1695 | int cap = f[i].capture; | - |
| 1696 | bool official = captureForOfficialCapture.contains(cap); | - |
| 1697 | qDebug(" %6d %6d %6d %s", i, f[i].parent, f[i].capture, | - |
| 1698 | official ? "official" : ""); | - |
| 1699 | } | - |
| 1700 | } | - |
| 1701 | } | - |
| 1702 | #endif | - |
| 1703 | #ifndef QT_NO_REGEXP_ANCHOR_ALT | - |
| 1704 | for (i = 0; i < aa.size(); i++) | - |
| 1705 | qDebug(" Anchor alternation 0x%.8x: 0x%.8x 0x%.9x", i, aa[i].a, aa[i].b); | - |
| 1706 | #endif | - |
| 1707 | } | - |
| 1708 | #endif | - |
| 1709 | - | |
| 1710 | void QRegExpEngine::setup() | - |
| 1711 | { | - |
| 1712 | ref.store(1); | - |
| 1713 | #ifndef QT_NO_REGEXP_CAPTURE | - |
| 1714 | f.resize(32); | - |
| 1715 | nf = 0; | - |
| 1716 | cf = -1; | - |
| 1717 | #endif | - |
| 1718 | officialncap = 0; | - |
| 1719 | ncap = 0; | - |
| 1720 | #ifndef QT_NO_REGEXP_OPTIM | - |
| 1721 | caretAnchored = true; | - |
| 1722 | trivial = true; | - |
| 1723 | #endif | - |
| 1724 | valid = false; | - |
| 1725 | #ifndef QT_NO_REGEXP_BACKREF | - |
| 1726 | nbrefs = 0; | - |
| 1727 | #endif | - |
| 1728 | #ifndef QT_NO_REGEXP_OPTIM | - |
| 1729 | useGoodStringHeuristic = true; | - |
| 1730 | minl = 0; | - |
| 1731 | occ1.fill(0, NumBadChars); | - |
| 1732 | #endif | - |
| 1733 | } | - |
| 1734 | - | |
| 1735 | int QRegExpEngine::setupState(int match) | - |
| 1736 | { | - |
| 1737 | #ifndef QT_NO_REGEXP_CAPTURE | - |
| 1738 | s += QRegExpAutomatonState(cf, match); | - |
| 1739 | #else | - |
| 1740 | s += QRegExpAutomatonState(match); | - |
| 1741 | #endif | - |
| 1742 | return s.size() - 1; | - |
| 1743 | } | - |
| 1744 | - | |
| 1745 | #ifndef QT_NO_REGEXP_CAPTURE | - |
| 1746 | /* | - |
| 1747 | Functions startAtom() and finishAtom() should be called to delimit | - |
| 1748 | atoms. When a state is created, it is assigned to the current atom. | - |
| 1749 | The information is later used for capturing. | - |
| 1750 | */ | - |
| 1751 | int QRegExpEngine::startAtom(bool officialCapture) | - |
| 1752 | { | - |
| 1753 | if ((nf & (nf + 1)) == 0 && nf + 1 >= f.size()) | - |
| 1754 | f.resize((nf + 1) << 1); | - |
| 1755 | f[nf].parent = cf; | - |
| 1756 | cf = nf++; | - |
| 1757 | f[cf].capture = officialCapture ? QRegExpAtom::OfficialCapture : QRegExpAtom::NoCapture; | - |
| 1758 | return cf; | - |
| 1759 | } | - |
| 1760 | - | |
| 1761 | void QRegExpEngine::finishAtom(int atom, bool needCapture) | - |
| 1762 | { | - |
| 1763 | if (greedyQuantifiers && needCapture && f[atom].capture == QRegExpAtom::NoCapture) | - |
| 1764 | f[atom].capture = QRegExpAtom::UnofficialCapture; | - |
| 1765 | cf = f.at(atom).parent; | - |
| 1766 | } | - |
| 1767 | #endif | - |
| 1768 | - | |
| 1769 | #ifndef QT_NO_REGEXP_LOOKAHEAD | - |
| 1770 | /* | - |
| 1771 | Creates a lookahead anchor. | - |
| 1772 | */ | - |
| 1773 | int QRegExpEngine::addLookahead(QRegExpEngine *eng, bool negative) | - |
| 1774 | { | - |
| 1775 | int n = ahead.size(); | - |
| 1776 | if (n == MaxLookaheads) { | - |
| 1777 | error(RXERR_LIMIT); | - |
| 1778 | return 0; | - |
| 1779 | } | - |
| 1780 | ahead += new QRegExpLookahead(eng, negative); | - |
| 1781 | return Anchor_FirstLookahead << n; | - |
| 1782 | } | - |
| 1783 | #endif | - |
| 1784 | - | |
| 1785 | #ifndef QT_NO_REGEXP_CAPTURE | - |
| 1786 | /* | - |
| 1787 | We want the longest leftmost captures. | - |
| 1788 | */ | - |
| 1789 | static bool isBetterCapture(int ncap, const int *begin1, const int *end1, const int *begin2, | - |
| 1790 | const int *end2) | - |
| 1791 | { | - |
| 1792 | for (int i = 0; i < ncap; i++) { | - |
| 1793 | int delta = begin2[i] - begin1[i]; // it has to start early... | - |
| 1794 | if (delta == 0) | - |
| 1795 | delta = end1[i] - end2[i]; // ...and end late | - |
| 1796 | - | |
| 1797 | if (delta != 0) | - |
| 1798 | return delta > 0; | - |
| 1799 | } | - |
| 1800 | return false; | - |
| 1801 | } | - |
| 1802 | #endif | - |
| 1803 | - | |
| 1804 | /* | - |
| 1805 | Returns \c true if anchor a matches at position pos + i in the input | - |
| 1806 | string, otherwise false. | - |
| 1807 | */ | - |
| 1808 | bool QRegExpMatchState::testAnchor(int i, int a, const int *capBegin) | - |
| 1809 | { | - |
| 1810 | int j; | - |
| 1811 | - | |
| 1812 | #ifndef QT_NO_REGEXP_ANCHOR_ALT | - |
| 1813 | if ((a & QRegExpEngine::Anchor_Alternation) != 0) | - |
| 1814 | return testAnchor(i, eng->aa.at(a ^ QRegExpEngine::Anchor_Alternation).a, capBegin) | - |
| 1815 | || testAnchor(i, eng->aa.at(a ^ QRegExpEngine::Anchor_Alternation).b, capBegin); | - |
| 1816 | #endif | - |
| 1817 | - | |
| 1818 | if ((a & QRegExpEngine::Anchor_Caret) != 0) { | - |
| 1819 | if (pos + i != caretPos) | - |
| 1820 | return false; | - |
| 1821 | } | - |
| 1822 | if ((a & QRegExpEngine::Anchor_Dollar) != 0) { | - |
| 1823 | if (pos + i != len) | - |
| 1824 | return false; | - |
| 1825 | } | - |
| 1826 | #ifndef QT_NO_REGEXP_ESCAPE | - |
| 1827 | if ((a & (QRegExpEngine::Anchor_Word | QRegExpEngine::Anchor_NonWord)) != 0) { | - |
| 1828 | bool before = false; | - |
| 1829 | bool after = false; | - |
| 1830 | if (pos + i != 0) | - |
| 1831 | before = isWord(in[pos + i - 1]); | - |
| 1832 | if (pos + i != len) | - |
| 1833 | after = isWord(in[pos + i]); | - |
| 1834 | if ((a & QRegExpEngine::Anchor_Word) != 0 && (before == after)) | - |
| 1835 | return false; | - |
| 1836 | if ((a & QRegExpEngine::Anchor_NonWord) != 0 && (before != after)) | - |
| 1837 | return false; | - |
| 1838 | } | - |
| 1839 | #endif | - |
| 1840 | #ifndef QT_NO_REGEXP_LOOKAHEAD | - |
| 1841 | if ((a & QRegExpEngine::Anchor_LookaheadMask) != 0) { | - |
| 1842 | const QVector<QRegExpLookahead *> &ahead = eng->ahead; | - |
| 1843 | for (j = 0; j < ahead.size(); j++) { | - |
| 1844 | if ((a & (QRegExpEngine::Anchor_FirstLookahead << j)) != 0) { | - |
| 1845 | QRegExpMatchState matchState; | - |
| 1846 | matchState.prepareForMatch(ahead[j]->eng); | - |
| 1847 | matchState.match(in + pos + i, len - pos - i, 0, | - |
| 1848 | true, true, caretPos - pos - i); | - |
| 1849 | if ((matchState.captured[0] == 0) == ahead[j]->neg) | - |
| 1850 | return false; | - |
| 1851 | } | - |
| 1852 | } | - |
| 1853 | } | - |
| 1854 | #endif | - |
| 1855 | #ifndef QT_NO_REGEXP_CAPTURE | - |
| 1856 | #ifndef QT_NO_REGEXP_BACKREF | - |
| 1857 | for (j = 0; j < eng->nbrefs; j++) { | - |
| 1858 | if ((a & (QRegExpEngine::Anchor_BackRef1Empty << j)) != 0) { | - |
| 1859 | int i = eng->captureForOfficialCapture.at(j); | - |
| 1860 | if (capBegin[i] != EmptyCapture) | - |
| 1861 | return false; | - |
| 1862 | } | - |
| 1863 | } | - |
| 1864 | #endif | - |
| 1865 | #endif | - |
| 1866 | return true; | - |
| 1867 | } | - |
| 1868 | - | |
| 1869 | #ifndef QT_NO_REGEXP_OPTIM | - |
| 1870 | /* | - |
| 1871 | The three following functions are what Jeffrey Friedl would call | - |
| 1872 | transmissions (or bump-alongs). Using one or the other should make | - |
| 1873 | no difference except in performance. | - |
| 1874 | */ | - |
| 1875 | - | |
| 1876 | bool QRegExpEngine::goodStringMatch(QRegExpMatchState &matchState) const | - |
| 1877 | { | - |
| 1878 | int k = matchState.pos + goodEarlyStart; | - |
| 1879 | QStringMatcher matcher(goodStr.unicode(), goodStr.length(), cs); | - |
| 1880 | while ((k = matcher.indexIn(matchState.in, matchState.len, k)) != -1) { | - |
| 1881 | int from = k - goodLateStart; | - |
| 1882 | int to = k - goodEarlyStart; | - |
| 1883 | if (from > matchState.pos) | - |
| 1884 | matchState.pos = from; | - |
| 1885 | - | |
| 1886 | while (matchState.pos <= to) { | - |
| 1887 | if (matchState.matchHere()) | - |
| 1888 | return true; | - |
| 1889 | ++matchState.pos; | - |
| 1890 | } | - |
| 1891 | ++k; | - |
| 1892 | } | - |
| 1893 | return false; | - |
| 1894 | } | - |
| 1895 | - | |
| 1896 | bool QRegExpEngine::badCharMatch(QRegExpMatchState &matchState) const | - |
| 1897 | { | - |
| 1898 | int slideHead = 0; | - |
| 1899 | int slideNext = 0; | - |
| 1900 | int i; | - |
| 1901 | int lastPos = matchState.len - minl; | - |
| 1902 | memset(matchState.slideTab, 0, matchState.slideTabSize * sizeof(int)); | - |
| 1903 | - | |
| 1904 | /* | - |
| 1905 | Set up the slide table, used for the bad-character heuristic, | - |
| 1906 | using the table of first occurrence of each character. | - |
| 1907 | */ | - |
| 1908 | for (i = 0; i < minl; i++) { | - |
| 1909 | int sk = occ1[BadChar(matchState.in[matchState.pos + i])]; | - |
| 1910 | if (sk == NoOccurrence) | - |
| 1911 | sk = i + 1; | - |
| 1912 | if (sk > 0) { | - |
| 1913 | int k = i + 1 - sk; | - |
| 1914 | if (k < 0) { | - |
| 1915 | sk = i + 1; | - |
| 1916 | k = 0; | - |
| 1917 | } | - |
| 1918 | if (sk > matchState.slideTab[k]) | - |
| 1919 | matchState.slideTab[k] = sk; | - |
| 1920 | } | - |
| 1921 | } | - |
| 1922 | - | |
| 1923 | if (matchState.pos > lastPos) | - |
| 1924 | return false; | - |
| 1925 | - | |
| 1926 | for (;;) { | - |
| 1927 | if (++slideNext >= matchState.slideTabSize) | - |
| 1928 | slideNext = 0; | - |
| 1929 | if (matchState.slideTab[slideHead] > 0) { | - |
| 1930 | if (matchState.slideTab[slideHead] - 1 > matchState.slideTab[slideNext]) | - |
| 1931 | matchState.slideTab[slideNext] = matchState.slideTab[slideHead] - 1; | - |
| 1932 | matchState.slideTab[slideHead] = 0; | - |
| 1933 | } else { | - |
| 1934 | if (matchState.matchHere()) | - |
| 1935 | return true; | - |
| 1936 | } | - |
| 1937 | - | |
| 1938 | if (matchState.pos == lastPos) | - |
| 1939 | break; | - |
| 1940 | - | |
| 1941 | /* | - |
| 1942 | Update the slide table. This code has much in common with | - |
| 1943 | the initialization code. | - |
| 1944 | */ | - |
| 1945 | int sk = occ1[BadChar(matchState.in[matchState.pos + minl])]; | - |
| 1946 | if (sk == NoOccurrence) { | - |
| 1947 | matchState.slideTab[slideNext] = minl; | - |
| 1948 | } else if (sk > 0) { | - |
| 1949 | int k = slideNext + minl - sk; | - |
| 1950 | if (k >= matchState.slideTabSize) | - |
| 1951 | k -= matchState.slideTabSize; | - |
| 1952 | if (sk > matchState.slideTab[k]) | - |
| 1953 | matchState.slideTab[k] = sk; | - |
| 1954 | } | - |
| 1955 | slideHead = slideNext; | - |
| 1956 | ++matchState.pos; | - |
| 1957 | } | - |
| 1958 | return false; | - |
| 1959 | } | - |
| 1960 | #else | - |
| 1961 | bool QRegExpEngine::bruteMatch(QRegExpMatchState &matchState) const | - |
| 1962 | { | - |
| 1963 | while (matchState.pos <= matchState.len) { | - |
| 1964 | if (matchState.matchHere()) | - |
| 1965 | return true; | - |
| 1966 | ++matchState.pos; | - |
| 1967 | } | - |
| 1968 | return false; | - |
| 1969 | } | - |
| 1970 | #endif | - |
| 1971 | - | |
| 1972 | /* | - |
| 1973 | Here's the core of the engine. It tries to do a match here and now. | - |
| 1974 | */ | - |
| 1975 | bool QRegExpMatchState::matchHere() | - |
| 1976 | { | - |
| 1977 | int ncur = 1, nnext = 0; | - |
| 1978 | int i = 0, j, k, m; | - |
| 1979 | bool stop = false; | - |
| 1980 | - | |
| 1981 | matchLen = -1; | - |
| 1982 | oneTestMatchedLen = -1; | - |
| 1983 | curStack[0] = QRegExpEngine::InitialState; | - |
| 1984 | - | |
| 1985 | int ncap = eng->ncap; | - |
| 1986 | #ifndef QT_NO_REGEXP_CAPTURE | - |
| 1987 | if (ncap > 0) { | - |
| 1988 | for (j = 0; j < ncap; j++) { | - |
| 1989 | curCapBegin[j] = EmptyCapture; | - |
| 1990 | curCapEnd[j] = EmptyCapture; | - |
| 1991 | } | - |
| 1992 | } | - |
| 1993 | #endif | - |
| 1994 | - | |
| 1995 | #ifndef QT_NO_REGEXP_BACKREF | - |
| 1996 | while ((ncur > 0 || !sleeping.isEmpty()) && i <= len - pos && !stop) | - |
| 1997 | #else | - |
| 1998 | while (ncur > 0 && i <= len - pos && !stop) | - |
| 1999 | #endif | - |
| 2000 | { | - |
| 2001 | int ch = (i < len - pos) ? in[pos + i].unicode() : 0; | - |
| 2002 | for (j = 0; j < ncur; j++) { | - |
| 2003 | int cur = curStack[j]; | - |
| 2004 | const QRegExpAutomatonState &scur = eng->s.at(cur); | - |
| 2005 | const QVector<int> &outs = scur.outs; | - |
| 2006 | for (k = 0; k < outs.size(); k++) { | - |
| 2007 | int next = outs.at(k); | - |
| 2008 | const QRegExpAutomatonState &snext = eng->s.at(next); | - |
| 2009 | bool inside = true; | - |
| 2010 | #if !defined(QT_NO_REGEXP_BACKREF) && !defined(QT_NO_REGEXP_CAPTURE) | - |
| 2011 | int needSomeSleep = 0; | - |
| 2012 | #endif | - |
| 2013 | - | |
| 2014 | /* | - |
| 2015 | First, check if the anchors are anchored properly. | - |
| 2016 | */ | - |
| 2017 | int a = scur.anchors.value(next); | - |
| 2018 | if (a != 0 && !testAnchor(i, a, curCapBegin + j * ncap)) | - |
| 2019 | inside = false; | - |
| 2020 | - | |
| 2021 | /* | - |
| 2022 | If indeed they are, check if the input character is | - |
| 2023 | correct for this transition. | - |
| 2024 | */ | - |
| 2025 | if (inside) { | - |
| 2026 | m = snext.match; | - |
| 2027 | if ((m & (QRegExpEngine::CharClassBit | QRegExpEngine::BackRefBit)) == 0) { | - |
| 2028 | if (eng->cs) | - |
| 2029 | inside = (m == ch); | - |
| 2030 | else | - |
| 2031 | inside = (QChar(m).toLower() == QChar(ch).toLower()); | - |
| 2032 | } else if (next == QRegExpEngine::FinalState) { | - |
| 2033 | matchLen = i; | - |
| 2034 | stop = minimal; | - |
| 2035 | inside = true; | - |
| 2036 | } else if ((m & QRegExpEngine::CharClassBit) != 0) { | - |
| 2037 | #ifndef QT_NO_REGEXP_CCLASS | - |
| 2038 | const QRegExpCharClass &cc = eng->cl.at(m ^ QRegExpEngine::CharClassBit); | - |
| 2039 | if (eng->cs) | - |
| 2040 | inside = cc.in(ch); | - |
| 2041 | else if (cc.negative()) | - |
| 2042 | inside = cc.in(QChar(ch).toLower()) && | - |
| 2043 | cc.in(QChar(ch).toUpper()); | - |
| 2044 | else | - |
| 2045 | inside = cc.in(QChar(ch).toLower()) || | - |
| 2046 | cc.in(QChar(ch).toUpper()); | - |
| 2047 | #endif | - |
| 2048 | #if !defined(QT_NO_REGEXP_BACKREF) && !defined(QT_NO_REGEXP_CAPTURE) | - |
| 2049 | } else { /* ((m & QRegExpEngine::BackRefBit) != 0) */ | - |
| 2050 | int bref = m ^ QRegExpEngine::BackRefBit; | - |
| 2051 | int ell = j * ncap + eng->captureForOfficialCapture.at(bref - 1); | - |
| 2052 | - | |
| 2053 | inside = bref <= ncap && curCapBegin[ell] != EmptyCapture; | - |
| 2054 | if (inside) { | - |
| 2055 | if (eng->cs) | - |
| 2056 | inside = (in[pos + curCapBegin[ell]] == QChar(ch)); | - |
| 2057 | else | - |
| 2058 | inside = (in[pos + curCapBegin[ell]].toLower() | - |
| 2059 | == QChar(ch).toLower()); | - |
| 2060 | } | - |
| 2061 | - | |
| 2062 | if (inside) { | - |
| 2063 | int delta; | - |
| 2064 | if (curCapEnd[ell] == EmptyCapture) | - |
| 2065 | delta = i - curCapBegin[ell]; | - |
| 2066 | else | - |
| 2067 | delta = curCapEnd[ell] - curCapBegin[ell]; | - |
| 2068 | - | |
| 2069 | inside = (delta <= len - (pos + i)); | - |
| 2070 | if (inside && delta > 1) { | - |
| 2071 | int n = 1; | - |
| 2072 | if (eng->cs) { | - |
| 2073 | while (n < delta) { | - |
| 2074 | if (in[pos + curCapBegin[ell] + n] | - |
| 2075 | != in[pos + i + n]) | - |
| 2076 | break; | - |
| 2077 | ++n; | - |
| 2078 | } | - |
| 2079 | } else { | - |
| 2080 | while (n < delta) { | - |
| 2081 | QChar a = in[pos + curCapBegin[ell] + n]; | - |
| 2082 | QChar b = in[pos + i + n]; | - |
| 2083 | if (a.toLower() != b.toLower()) | - |
| 2084 | break; | - |
| 2085 | ++n; | - |
| 2086 | } | - |
| 2087 | } | - |
| 2088 | inside = (n == delta); | - |
| 2089 | if (inside) | - |
| 2090 | needSomeSleep = delta - 1; | - |
| 2091 | } | - |
| 2092 | } | - |
| 2093 | #endif | - |
| 2094 | } | - |
| 2095 | } | - |
| 2096 | - | |
| 2097 | /* | - |
| 2098 | We must now update our data structures. | - |
| 2099 | */ | - |
| 2100 | if (inside) { | - |
| 2101 | #ifndef QT_NO_REGEXP_CAPTURE | - |
| 2102 | int *capBegin, *capEnd; | - |
| 2103 | #endif | - |
| 2104 | /* | - |
| 2105 | If the next state was not encountered yet, all | - |
| 2106 | is fine. | - |
| 2107 | */ | - |
| 2108 | if ((m = inNextStack[next]) == -1) { | - |
| 2109 | m = nnext++; | - |
| 2110 | nextStack[m] = next; | - |
| 2111 | inNextStack[next] = m; | - |
| 2112 | #ifndef QT_NO_REGEXP_CAPTURE | - |
| 2113 | capBegin = nextCapBegin + m * ncap; | - |
| 2114 | capEnd = nextCapEnd + m * ncap; | - |
| 2115 | - | |
| 2116 | /* | - |
| 2117 | Otherwise, we'll first maintain captures in | - |
| 2118 | temporary arrays, and decide at the end whether | - |
| 2119 | it's best to keep the previous capture zones or | - |
| 2120 | the new ones. | - |
| 2121 | */ | - |
| 2122 | } else { | - |
| 2123 | capBegin = tempCapBegin; | - |
| 2124 | capEnd = tempCapEnd; | - |
| 2125 | #endif | - |
| 2126 | } | - |
| 2127 | - | |
| 2128 | #ifndef QT_NO_REGEXP_CAPTURE | - |
| 2129 | /* | - |
| 2130 | Updating the capture zones is much of a task. | - |
| 2131 | */ | - |
| 2132 | if (ncap > 0) { | - |
| 2133 | memcpy(capBegin, curCapBegin + j * ncap, ncap * sizeof(int)); | - |
| 2134 | memcpy(capEnd, curCapEnd + j * ncap, ncap * sizeof(int)); | - |
| 2135 | int c = scur.atom, n = snext.atom; | - |
| 2136 | int p = -1, q = -1; | - |
| 2137 | int cap; | - |
| 2138 | - | |
| 2139 | /* | - |
| 2140 | Lemma 1. For any x in the range [0..nf), we | - |
| 2141 | have f[x].parent < x. | - |
| 2142 | - | |
| 2143 | Proof. By looking at startAtom(), it is | - |
| 2144 | clear that cf < nf holds all the time, and | - |
| 2145 | thus that f[nf].parent < nf. | - |
| 2146 | */ | - |
| 2147 | - | |
| 2148 | /* | - |
| 2149 | If we are reentering an atom, we empty all | - |
| 2150 | capture zones inside it. | - |
| 2151 | */ | - |
| 2152 | if ((q = scur.reenter.value(next)) != 0) { | - |
| 2153 | QBitArray b(eng->nf, false); | - |
| 2154 | b.setBit(q, true); | - |
| 2155 | for (int ell = q + 1; ell < eng->nf; ell++) { | - |
| 2156 | if (b.testBit(eng->f.at(ell).parent)) { | - |
| 2157 | b.setBit(ell, true); | - |
| 2158 | cap = eng->f.at(ell).capture; | - |
| 2159 | if (cap >= 0) { | - |
| 2160 | capBegin[cap] = EmptyCapture; | - |
| 2161 | capEnd[cap] = EmptyCapture; | - |
| 2162 | } | - |
| 2163 | } | - |
| 2164 | } | - |
| 2165 | p = eng->f.at(q).parent; | - |
| 2166 | - | |
| 2167 | /* | - |
| 2168 | Otherwise, close the capture zones we are | - |
| 2169 | leaving. We are leaving f[c].capture, | - |
| 2170 | f[f[c].parent].capture, | - |
| 2171 | f[f[f[c].parent].parent].capture, ..., | - |
| 2172 | until f[x].capture, with x such that | - |
| 2173 | f[x].parent is the youngest common ancestor | - |
| 2174 | for c and n. | - |
| 2175 | - | |
| 2176 | We go up along c's and n's ancestry until | - |
| 2177 | we find x. | - |
| 2178 | */ | - |
| 2179 | } else { | - |
| 2180 | p = c; | - |
| 2181 | q = n; | - |
| 2182 | while (p != q) { | - |
| 2183 | if (p > q) { | - |
| 2184 | cap = eng->f.at(p).capture; | - |
| 2185 | if (cap >= 0) { | - |
| 2186 | if (capBegin[cap] == i) { | - |
| 2187 | capBegin[cap] = EmptyCapture; | - |
| 2188 | capEnd[cap] = EmptyCapture; | - |
| 2189 | } else { | - |
| 2190 | capEnd[cap] = i; | - |
| 2191 | } | - |
| 2192 | } | - |
| 2193 | p = eng->f.at(p).parent; | - |
| 2194 | } else { | - |
| 2195 | q = eng->f.at(q).parent; | - |
| 2196 | } | - |
| 2197 | } | - |
| 2198 | } | - |
| 2199 | - | |
| 2200 | /* | - |
| 2201 | In any case, we now open the capture zones | - |
| 2202 | we are entering. We work upwards from n | - |
| 2203 | until we reach p (the parent of the atom we | - |
| 2204 | reenter or the youngest common ancestor). | - |
| 2205 | */ | - |
| 2206 | while (n > p) { | - |
| 2207 | cap = eng->f.at(n).capture; | - |
| 2208 | if (cap >= 0) { | - |
| 2209 | capBegin[cap] = i; | - |
| 2210 | capEnd[cap] = EmptyCapture; | - |
| 2211 | } | - |
| 2212 | n = eng->f.at(n).parent; | - |
| 2213 | } | - |
| 2214 | /* | - |
| 2215 | If the next state was already in | - |
| 2216 | nextStack, we must choose carefully which | - |
| 2217 | capture zones we want to keep. | - |
| 2218 | */ | - |
| 2219 | if (capBegin == tempCapBegin && | - |
| 2220 | isBetterCapture(ncap, capBegin, capEnd, nextCapBegin + m * ncap, | - |
| 2221 | nextCapEnd + m * ncap)) { | - |
| 2222 | memcpy(nextCapBegin + m * ncap, capBegin, ncap * sizeof(int)); | - |
| 2223 | memcpy(nextCapEnd + m * ncap, capEnd, ncap * sizeof(int)); | - |
| 2224 | } | - |
| 2225 | } | - |
| 2226 | #ifndef QT_NO_REGEXP_BACKREF | - |
| 2227 | /* | - |
| 2228 | We are done with updating the capture zones. | - |
| 2229 | It's now time to put the next state to sleep, | - |
| 2230 | if it needs to, and to remove it from | - |
| 2231 | nextStack. | - |
| 2232 | */ | - |
| 2233 | if (needSomeSleep > 0) { | - |
| 2234 | QVector<int> zzZ(2 + 2 * ncap); | - |
| 2235 | zzZ[0] = i + needSomeSleep; | - |
| 2236 | zzZ[1] = next; | - |
| 2237 | if (ncap > 0) { | - |
| 2238 | memcpy(zzZ.data() + 2, capBegin, ncap * sizeof(int)); | - |
| 2239 | memcpy(zzZ.data() + 2 + ncap, capEnd, ncap * sizeof(int)); | - |
| 2240 | } | - |
| 2241 | inNextStack[nextStack[--nnext]] = -1; | - |
| 2242 | sleeping.append(zzZ); | - |
| 2243 | } | - |
| 2244 | #endif | - |
| 2245 | #endif | - |
| 2246 | } | - |
| 2247 | } | - |
| 2248 | } | - |
| 2249 | #ifndef QT_NO_REGEXP_CAPTURE | - |
| 2250 | /* | - |
| 2251 | If we reached the final state, hurray! Copy the captured | - |
| 2252 | zone. | - |
| 2253 | */ | - |
| 2254 | if (ncap > 0 && (m = inNextStack[QRegExpEngine::FinalState]) != -1) { | - |
| 2255 | memcpy(capBegin, nextCapBegin + m * ncap, ncap * sizeof(int)); | - |
| 2256 | memcpy(capEnd, nextCapEnd + m * ncap, ncap * sizeof(int)); | - |
| 2257 | } | - |
| 2258 | #ifndef QT_NO_REGEXP_BACKREF | - |
| 2259 | /* | - |
| 2260 | It's time to wake up the sleepers. | - |
| 2261 | */ | - |
| 2262 | j = 0; | - |
| 2263 | while (j < sleeping.count()) { | - |
| 2264 | if (sleeping.at(j)[0] == i) { | - |
| 2265 | const QVector<int> &zzZ = sleeping.at(j); | - |
| 2266 | int next = zzZ[1]; | - |
| 2267 | const int *capBegin = zzZ.data() + 2; | - |
| 2268 | const int *capEnd = zzZ.data() + 2 + ncap; | - |
| 2269 | bool copyOver = true; | - |
| 2270 | - | |
| 2271 | if ((m = inNextStack[next]) == -1) { | - |
| 2272 | m = nnext++; | - |
| 2273 | nextStack[m] = next; | - |
| 2274 | inNextStack[next] = m; | - |
| 2275 | } else { | - |
| 2276 | copyOver = isBetterCapture(ncap, nextCapBegin + m * ncap, nextCapEnd + m * ncap, | - |
| 2277 | capBegin, capEnd); | - |
| 2278 | } | - |
| 2279 | if (copyOver) { | - |
| 2280 | memcpy(nextCapBegin + m * ncap, capBegin, ncap * sizeof(int)); | - |
| 2281 | memcpy(nextCapEnd + m * ncap, capEnd, ncap * sizeof(int)); | - |
| 2282 | } | - |
| 2283 | - | |
| 2284 | sleeping.removeAt(j); | - |
| 2285 | } else { | - |
| 2286 | ++j; | - |
| 2287 | } | - |
| 2288 | } | - |
| 2289 | #endif | - |
| 2290 | #endif | - |
| 2291 | for (j = 0; j < nnext; j++) | - |
| 2292 | inNextStack[nextStack[j]] = -1; | - |
| 2293 | - | |
| 2294 | // avoid needless iteration that confuses oneTestMatchedLen | - |
| 2295 | if (nnext == 1 && nextStack[0] == QRegExpEngine::FinalState | - |
| 2296 | #ifndef QT_NO_REGEXP_BACKREF | - |
| 2297 | && sleeping.isEmpty() | - |
| 2298 | #endif | - |
| 2299 | ) | - |
| 2300 | stop = true; | - |
| 2301 | - | |
| 2302 | qSwap(curStack, nextStack); | - |
| 2303 | #ifndef QT_NO_REGEXP_CAPTURE | - |
| 2304 | qSwap(curCapBegin, nextCapBegin); | - |
| 2305 | qSwap(curCapEnd, nextCapEnd); | - |
| 2306 | #endif | - |
| 2307 | ncur = nnext; | - |
| 2308 | nnext = 0; | - |
| 2309 | ++i; | - |
| 2310 | } | - |
| 2311 | - | |
| 2312 | #ifndef QT_NO_REGEXP_BACKREF | - |
| 2313 | /* | - |
| 2314 | If minimal matching is enabled, we might have some sleepers | - |
| 2315 | left. | - |
| 2316 | */ | - |
| 2317 | if (!sleeping.isEmpty()) | - |
| 2318 | sleeping.clear(); | - |
| 2319 | #endif | - |
| 2320 | - | |
| 2321 | oneTestMatchedLen = i - 1; | - |
| 2322 | return (matchLen >= 0); | - |
| 2323 | } | - |
| 2324 | - | |
| 2325 | #ifndef QT_NO_REGEXP_CCLASS | - |
| 2326 | - | |
| 2327 | QRegExpCharClass::QRegExpCharClass() | - |
| 2328 | : c(0), n(false) | - |
| 2329 | { | - |
| 2330 | #ifndef QT_NO_REGEXP_OPTIM | - |
| 2331 | occ1.fill(NoOccurrence, NumBadChars); | - |
| 2332 | #endif | - |
| 2333 | } | - |
| 2334 | - | |
| 2335 | void QRegExpCharClass::clear() | - |
| 2336 | { | - |
| 2337 | c = 0; | - |
| 2338 | r.resize(0);clear(); | - |
| 2339 | n = false; | - |
| 2340 | } executed 34392 times by 116 tests: end of blockExecuted by:
| 34392 |
| 2341 | - | |
| 2342 | void QRegExpCharClass::setNegative(bool negative) | - |
| 2343 | { | - |
| 2344 | n = negative; | - |
| 2345 | #ifndef QT_NO_REGEXP_OPTIM | - |
| 2346 | occ1.fill(0, NumBadChars); | - |
| 2347 | #endif | - |
| 2348 | } | - |
| 2349 | - | |
| 2350 | void QRegExpCharClass::addCategories(uint cats) | - |
| 2351 | { | - |
| 2352 | static const int all_cats = FLAG(QChar::Mark_NonSpacing) | | - |
| 2353 | FLAG(QChar::Mark_SpacingCombining) | | - |
| 2354 | FLAG(QChar::Mark_Enclosing) | | - |
| 2355 | FLAG(QChar::Number_DecimalDigit) | | - |
| 2356 | FLAG(QChar::Number_Letter) | | - |
| 2357 | FLAG(QChar::Number_Other) | | - |
| 2358 | FLAG(QChar::Separator_Space) | | - |
| 2359 | FLAG(QChar::Separator_Line) | | - |
| 2360 | FLAG(QChar::Separator_Paragraph) | | - |
| 2361 | FLAG(QChar::Other_Control) | | - |
| 2362 | FLAG(QChar::Other_Format) | | - |
| 2363 | FLAG(QChar::Other_Surrogate) | | - |
| 2364 | FLAG(QChar::Other_PrivateUse) | | - |
| 2365 | FLAG(QChar::Other_NotAssigned) | | - |
| 2366 | FLAG(QChar::Letter_Uppercase) | | - |
| 2367 | FLAG(QChar::Letter_Lowercase) | | - |
| 2368 | FLAG(QChar::Letter_Titlecase) | | - |
| 2369 | FLAG(QChar::Letter_Modifier) | | - |
| 2370 | FLAG(QChar::Letter_Other) | | - |
| 2371 | FLAG(QChar::Punctuation_Connector) | | - |
| 2372 | FLAG(QChar::Punctuation_Dash) | | - |
| 2373 | FLAG(QChar::Punctuation_Open) | | - |
| 2374 | FLAG(QChar::Punctuation_Close) | | - |
| 2375 | FLAG(QChar::Punctuation_InitialQuote) | | - |
| 2376 | FLAG(QChar::Punctuation_FinalQuote) | | - |
| 2377 | FLAG(QChar::Punctuation_Other) | | - |
| 2378 | FLAG(QChar::Symbol_Math) | | - |
| 2379 | FLAG(QChar::Symbol_Currency) | | - |
| 2380 | FLAG(QChar::Symbol_Modifier) | | - |
| 2381 | FLAG(QChar::Symbol_Other); | - |
| 2382 | c |= (all_cats & cats); | - |
| 2383 | #ifndef QT_NO_REGEXP_OPTIM | - |
| 2384 | occ1.fill(0, NumBadChars); | - |
| 2385 | #endif | - |
| 2386 | } | - |
| 2387 | - | |
| 2388 | void QRegExpCharClass::addRange(ushort from, ushort to) | - |
| 2389 | { | - |
| 2390 | if (from > to) | - |
| 2391 | qSwap(from, to); | - |
| 2392 | int m = r.size(); | - |
| 2393 | r.resize(m + 1); | - |
| 2394 | r[m].from = from; | - |
| 2395 | r[m].len = to - from + 1; | - |
| 2396 | - | |
| 2397 | #ifndef QT_NO_REGEXP_OPTIM | - |
| 2398 | int i; | - |
| 2399 | - | |
| 2400 | if (to - from < NumBadChars) { | - |
| 2401 | if (from % NumBadChars <= to % NumBadChars) { | - |
| 2402 | for (i = from % NumBadChars; i <= to % NumBadChars; i++) | - |
| 2403 | occ1[i] = 0; | - |
| 2404 | } else { | - |
| 2405 | for (i = 0; i <= to % NumBadChars; i++) | - |
| 2406 | occ1[i] = 0; | - |
| 2407 | for (i = from % NumBadChars; i < NumBadChars; i++) | - |
| 2408 | occ1[i] = 0; | - |
| 2409 | } | - |
| 2410 | } else { | - |
| 2411 | occ1.fill(0, NumBadChars); | - |
| 2412 | } | - |
| 2413 | #endif | - |
| 2414 | } | - |
| 2415 | - | |
| 2416 | bool QRegExpCharClass::in(QChar ch) const | - |
| 2417 | { | - |
| 2418 | #ifndef QT_NO_REGEXP_OPTIM | - |
| 2419 | if (occ1.at(BadChar(ch)) == NoOccurrence) | - |
| 2420 | return n; | - |
| 2421 | #endif | - |
| 2422 | - | |
| 2423 | if (c != 0 && (c & FLAG(ch.category())) != 0) | - |
| 2424 | return !n; | - |
| 2425 | - | |
| 2426 | const int uc = ch.unicode(); | - |
| 2427 | int size = r.size(); | - |
| 2428 | - | |
| 2429 | for (int i = 0; i < size; ++i) { | - |
| 2430 | const QRegExpCharClassRange &range = r.at(i); | - |
| 2431 | if (uint(uc - range.from) < uint(r.at(i).len)) | - |
| 2432 | return !n; | - |
| 2433 | } | - |
| 2434 | return n; | - |
| 2435 | } | - |
| 2436 | - | |
| 2437 | #if defined(QT_DEBUG) | - |
| 2438 | void QRegExpCharClass::dump() const | - |
| 2439 | { | - |
| 2440 | int i; | - |
| 2441 | qDebug(" %stive character class", n ? "nega" : "posi"); | - |
| 2442 | #ifndef QT_NO_REGEXP_CCLASS | - |
| 2443 | if (c != 0) | - |
| 2444 | qDebug(" categories 0x%.8x", c); | - |
| 2445 | #endif | - |
| 2446 | for (i = 0; i < r.size(); i++) | - |
| 2447 | qDebug(" 0x%.4x through 0x%.4x", r[i].from, r[i].from + r[i].len - 1); | - |
| 2448 | } | - |
| 2449 | #endif | - |
| 2450 | #endif | - |
| 2451 | - | |
| 2452 | QRegExpEngine::Box::Box(QRegExpEngine *engine) | - |
| 2453 | : eng(engine), skipanchors(0) | - |
| 2454 | #ifndef QT_NO_REGEXP_OPTIM | - |
| 2455 | , earlyStart(0), lateStart(0), maxl(0) | - |
| 2456 | #endif | - |
| 2457 | { | - |
| 2458 | #ifndef QT_NO_REGEXP_OPTIM | - |
| 2459 | occ1.fill(NoOccurrence, NumBadChars); | - |
| 2460 | #endif | - |
| 2461 | minl = 0; | - |
| 2462 | } | - |
| 2463 | - | |
| 2464 | QRegExpEngine::Box &QRegExpEngine::Box::operator=(const Box &b) | - |
| 2465 | { | - |
| 2466 | eng = b.eng; | - |
| 2467 | ls = b.ls; | - |
| 2468 | rs = b.rs; | - |
| 2469 | lanchors = b.lanchors; | - |
| 2470 | ranchors = b.ranchors; | - |
| 2471 | skipanchors = b.skipanchors; | - |
| 2472 | #ifndef QT_NO_REGEXP_OPTIM | - |
| 2473 | earlyStart = b.earlyStart; | - |
| 2474 | lateStart = b.lateStart; | - |
| 2475 | str = b.str; | - |
| 2476 | leftStr = b.leftStr; | - |
| 2477 | rightStr = b.rightStr; | - |
| 2478 | maxl = b.maxl; | - |
| 2479 | occ1 = b.occ1; | - |
| 2480 | #endif | - |
| 2481 | minl = b.minl; | - |
| 2482 | return *this; | - |
| 2483 | } | - |
| 2484 | - | |
| 2485 | void QRegExpEngine::Box::set(QChar ch) | - |
| 2486 | { | - |
| 2487 | ls.resize(1); | - |
| 2488 | ls[0] = eng->createState(ch); | - |
| 2489 | rs = ls; | - |
| 2490 | #ifndef QT_NO_REGEXP_OPTIM | - |
| 2491 | str = ch; | - |
| 2492 | leftStr = ch; | - |
| 2493 | rightStr = ch; | - |
| 2494 | maxl = 1; | - |
| 2495 | occ1[BadChar(ch)] = 0; | - |
| 2496 | #endif | - |
| 2497 | minl = 1; | - |
| 2498 | } | - |
| 2499 | - | |
| 2500 | void QRegExpEngine::Box::set(const QRegExpCharClass &cc) | - |
| 2501 | { | - |
| 2502 | ls.resize(1); | - |
| 2503 | ls[0] = eng->createState(cc); | - |
| 2504 | rs = ls; | - |
| 2505 | #ifndef QT_NO_REGEXP_OPTIM | - |
| 2506 | maxl = 1; | - |
| 2507 | occ1 = cc.firstOccurrence(); | - |
| 2508 | #endif | - |
| 2509 | minl = 1; | - |
| 2510 | } | - |
| 2511 | - | |
| 2512 | #ifndef QT_NO_REGEXP_BACKREF | - |
| 2513 | void QRegExpEngine::Box::set(int bref) | - |
| 2514 | { | - |
| 2515 | ls.resize(1); | - |
| 2516 | ls[0] = eng->createState(bref); | - |
| 2517 | rs = ls; | - |
| 2518 | if (bref >= 1 && bref <= MaxBackRefs) | - |
| 2519 | skipanchors = Anchor_BackRef0Empty << bref; | - |
| 2520 | #ifndef QT_NO_REGEXP_OPTIM | - |
| 2521 | maxl = InftyLen; | - |
| 2522 | #endif | - |
| 2523 | minl = 0; | - |
| 2524 | } | - |
| 2525 | #endif | - |
| 2526 | - | |
| 2527 | void QRegExpEngine::Box::cat(const Box &b) | - |
| 2528 | { | - |
| 2529 | eng->addCatTransitions(rs, b.ls); | - |
| 2530 | addAnchorsToEngine(b); | - |
| 2531 | if (minl == 0) { | - |
| 2532 | lanchors.unite(b.lanchors); | - |
| 2533 | if (skipanchors != 0) { | - |
| 2534 | for (int i = 0; i < b.ls.size(); i++) { | - |
| 2535 | int a = eng->anchorConcatenation(lanchors.value(b.ls.at(i), 0), skipanchors); | - |
| 2536 | lanchors.insert(b.ls.at(i), a); | - |
| 2537 | } | - |
| 2538 | } | - |
| 2539 | mergeInto(&ls, b.ls); | - |
| 2540 | } | - |
| 2541 | if (b.minl == 0) { | - |
| 2542 | ranchors.unite(b.ranchors); | - |
| 2543 | if (b.skipanchors != 0) { | - |
| 2544 | for (int i = 0; i < rs.size(); i++) { | - |
| 2545 | int a = eng->anchorConcatenation(ranchors.value(rs.at(i), 0), b.skipanchors); | - |
| 2546 | ranchors.insert(rs.at(i), a); | - |
| 2547 | } | - |
| 2548 | } | - |
| 2549 | mergeInto(&rs, b.rs); | - |
| 2550 | } else { | - |
| 2551 | ranchors = b.ranchors; | - |
| 2552 | rs = b.rs; | - |
| 2553 | } | - |
| 2554 | - | |
| 2555 | #ifndef QT_NO_REGEXP_OPTIM | - |
| 2556 | if (maxl != InftyLen) { | - |
| 2557 | if (rightStr.length() + b.leftStr.length() > | - |
| 2558 | qMax(str.length(), b.str.length())) { | - |
| 2559 | earlyStart = minl - rightStr.length(); | - |
| 2560 | lateStart = maxl - rightStr.length(); | - |
| 2561 | str = rightStr + b.leftStr; | - |
| 2562 | } else if (b.str.length() > str.length()) { | - |
| 2563 | earlyStart = minl + b.earlyStart; | - |
| 2564 | lateStart = maxl + b.lateStart; | - |
| 2565 | str = b.str; | - |
| 2566 | } | - |
| 2567 | } | - |
| 2568 | - | |
| 2569 | if (leftStr.length() == maxl) | - |
| 2570 | leftStr += b.leftStr; | - |
| 2571 | - | |
| 2572 | if (b.rightStr.length() == b.maxl) { | - |
| 2573 | rightStr += b.rightStr; | - |
| 2574 | } else { | - |
| 2575 | rightStr = b.rightStr; | - |
| 2576 | } | - |
| 2577 | - | |
| 2578 | if (maxl == InftyLen || b.maxl == InftyLen) { | - |
| 2579 | maxl = InftyLen; | - |
| 2580 | } else { | - |
| 2581 | maxl += b.maxl; | - |
| 2582 | } | - |
| 2583 | - | |
| 2584 | for (int i = 0; i < NumBadChars; i++) { | - |
| 2585 | if (b.occ1.at(i) != NoOccurrence && minl + b.occ1.at(i) < occ1.at(i)) | - |
| 2586 | occ1[i] = minl + b.occ1.at(i); | - |
| 2587 | } | - |
| 2588 | #endif | - |
| 2589 | - | |
| 2590 | minl += b.minl; | - |
| 2591 | if (minl == 0) | - |
| 2592 | skipanchors = eng->anchorConcatenation(skipanchors, b.skipanchors); | - |
| 2593 | else | - |
| 2594 | skipanchors = 0; | - |
| 2595 | } | - |
| 2596 | - | |
| 2597 | void QRegExpEngine::Box::orx(const Box &b) | - |
| 2598 | { | - |
| 2599 | mergeInto(&ls, b.ls); | - |
| 2600 | lanchors.unite(b.lanchors); | - |
| 2601 | mergeInto(&rs, b.rs); | - |
| 2602 | ranchors.unite(b.ranchors); | - |
| 2603 | - | |
| 2604 | if (b.minl == 0) { | - |
| 2605 | if (minl == 0) | - |
| 2606 | skipanchors = eng->anchorAlternation(skipanchors, b.skipanchors); | - |
| 2607 | else | - |
| 2608 | skipanchors = b.skipanchors; | - |
| 2609 | } | - |
| 2610 | - | |
| 2611 | #ifndef QT_NO_REGEXP_OPTIM | - |
| 2612 | for (int i = 0; i < NumBadChars; i++) { | - |
| 2613 | if (occ1.at(i) > b.occ1.at(i)) | - |
| 2614 | occ1[i] = b.occ1.at(i); | - |
| 2615 | } | - |
| 2616 | earlyStart = 0; | - |
| 2617 | lateStart = 0; | - |
| 2618 | str = QString(); | - |
| 2619 | leftStr = QString(); | - |
| 2620 | rightStr = QString(); | - |
| 2621 | if (b.maxl > maxl) | - |
| 2622 | maxl = b.maxl; | - |
| 2623 | #endif | - |
| 2624 | if (b.minl < minl) | - |
| 2625 | minl = b.minl; | - |
| 2626 | } | - |
| 2627 | - | |
| 2628 | void QRegExpEngine::Box::plus(int atom) | - |
| 2629 | { | - |
| 2630 | #ifndef QT_NO_REGEXP_CAPTURE | - |
| 2631 | eng->addPlusTransitions(rs, ls, atom); | - |
| 2632 | #else | - |
| 2633 | Q_UNUSED(atom); | - |
| 2634 | eng->addCatTransitions(rs, ls); | - |
| 2635 | #endif | - |
| 2636 | addAnchorsToEngine(*this); | - |
| 2637 | #ifndef QT_NO_REGEXP_OPTIM | - |
| 2638 | maxl = InftyLen; | - |
| 2639 | #endif | - |
| 2640 | } | - |
| 2641 | - | |
| 2642 | void QRegExpEngine::Box::opt() | - |
| 2643 | { | - |
| 2644 | #ifndef QT_NO_REGEXP_OPTIM | - |
| 2645 | earlyStart = 0; | - |
| 2646 | lateStart = 0; | - |
| 2647 | str = QString(); | - |
| 2648 | leftStr = QString(); | - |
| 2649 | rightStr = QString(); | - |
| 2650 | #endif | - |
| 2651 | skipanchors = 0; | - |
| 2652 | minl = 0; | - |
| 2653 | } | - |
| 2654 | - | |
| 2655 | void QRegExpEngine::Box::catAnchor(int a) | - |
| 2656 | { | - |
| 2657 | if (a != 0) { | - |
| 2658 | for (int i = 0; i < rs.size(); i++) { | - |
| 2659 | a = eng->anchorConcatenation(ranchors.value(rs.at(i), 0), a); | - |
| 2660 | ranchors.insert(rs.at(i), a); | - |
| 2661 | } | - |
| 2662 | if (minl == 0) | - |
| 2663 | skipanchors = eng->anchorConcatenation(skipanchors, a); | - |
| 2664 | } | - |
| 2665 | } | - |
| 2666 | - | |
| 2667 | #ifndef QT_NO_REGEXP_OPTIM | - |
| 2668 | void QRegExpEngine::Box::setupHeuristics() | - |
| 2669 | { | - |
| 2670 | eng->goodEarlyStart = earlyStart; | - |
| 2671 | eng->goodLateStart = lateStart; | - |
| 2672 | eng->goodStr = eng->cs ? str : str.toLower(); | - |
| 2673 | - | |
| 2674 | eng->minl = minl; | - |
| 2675 | if (eng->cs) { | - |
| 2676 | /* | - |
| 2677 | A regular expression such as 112|1 has occ1['2'] = 2 and minl = | - |
| 2678 | 1 at this point. An entry of occ1 has to be at most minl or | - |
| 2679 | infinity for the rest of the algorithm to go well. | - |
| 2680 | - | |
| 2681 | We waited until here before normalizing these cases (instead of | - |
| 2682 | doing it in Box::orx()) because sometimes things improve by | - |
| 2683 | themselves. Consider for example (112|1)34. | - |
| 2684 | */ | - |
| 2685 | for (int i = 0; i < NumBadChars; i++) { | - |
| 2686 | if (occ1.at(i) != NoOccurrence && occ1.at(i) >= minl) | - |
| 2687 | occ1[i] = minl; | - |
| 2688 | } | - |
| 2689 | eng->occ1 = occ1; | - |
| 2690 | } else { | - |
| 2691 | eng->occ1.fill(0, NumBadChars); | - |
| 2692 | } | - |
| 2693 | - | |
| 2694 | eng->heuristicallyChooseHeuristic(); | - |
| 2695 | } | - |
| 2696 | #endif | - |
| 2697 | - | |
| 2698 | #if defined(QT_DEBUG) | - |
| 2699 | void QRegExpEngine::Box::dump() const | - |
| 2700 | { | - |
| 2701 | int i; | - |
| 2702 | qDebug("Box of at least %d character%s", minl, minl == 1 ? "" : "s"); | - |
| 2703 | qDebug(" Left states:"); | - |
| 2704 | for (i = 0; i < ls.size(); i++) { | - |
| 2705 | if (lanchors.value(ls[i], 0) == 0) | - |
| 2706 | qDebug(" %d", ls[i]); | - |
| 2707 | else | - |
| 2708 | qDebug(" %d [anchors 0x%.8x]", ls[i], lanchors[ls[i]]); | - |
| 2709 | } | - |
| 2710 | qDebug(" Right states:"); | - |
| 2711 | for (i = 0; i < rs.size(); i++) { | - |
| 2712 | if (ranchors.value(rs[i], 0) == 0) | - |
| 2713 | qDebug(" %d", rs[i]); | - |
| 2714 | else | - |
| 2715 | qDebug(" %d [anchors 0x%.8x]", rs[i], ranchors[rs[i]]); | - |
| 2716 | } | - |
| 2717 | qDebug(" Skip anchors: 0x%.8x", skipanchors); | - |
| 2718 | } | - |
| 2719 | #endif | - |
| 2720 | - | |
| 2721 | void QRegExpEngine::Box::addAnchorsToEngine(const Box &to) const | - |
| 2722 | { | - |
| 2723 | for (int i = 0; i < to.ls.size(); i++) { | - |
| 2724 | for (int j = 0; j < rs.size(); j++) { | - |
| 2725 | int a = eng->anchorConcatenation(ranchors.value(rs.at(j), 0), | - |
| 2726 | to.lanchors.value(to.ls.at(i), 0)); | - |
| 2727 | eng->addAnchors(rs[j], to.ls[i], a); | - |
| 2728 | } | - |
| 2729 | } | - |
| 2730 | } | - |
| 2731 | - | |
| 2732 | #ifndef QT_NO_REGEXP_CCLASS | - |
| 2733 | // fast lookup hash for xml schema extensions | - |
| 2734 | // sorted by name for b-search | - |
| 2735 | static const struct CategoriesRangeMapEntry { | - |
| 2736 | const char name[40]; | - |
| 2737 | uint first, second; | - |
| 2738 | } categoriesRangeMap[] = { | - |
| 2739 | { "AegeanNumbers", 0x10100, 0x1013F }, | - |
| 2740 | { "AlphabeticPresentationForms", 0xFB00, 0xFB4F }, | - |
| 2741 | { "AncientGreekMusicalNotation", 0x1D200, 0x1D24F }, | - |
| 2742 | { "AncientGreekNumbers", 0x10140, 0x1018F }, | - |
| 2743 | { "Arabic", 0x0600, 0x06FF }, | - |
| 2744 | { "ArabicPresentationForms-A", 0xFB50, 0xFDFF }, | - |
| 2745 | { "ArabicPresentationForms-B", 0xFE70, 0xFEFF }, | - |
| 2746 | { "ArabicSupplement", 0x0750, 0x077F }, | - |
| 2747 | { "Armenian", 0x0530, 0x058F }, | - |
| 2748 | { "Arrows", 0x2190, 0x21FF }, | - |
| 2749 | { "BasicLatin", 0x0000, 0x007F }, | - |
| 2750 | { "Bengali", 0x0980, 0x09FF }, | - |
| 2751 | { "BlockElements", 0x2580, 0x259F }, | - |
| 2752 | { "Bopomofo", 0x3100, 0x312F }, | - |
| 2753 | { "BopomofoExtended", 0x31A0, 0x31BF }, | - |
| 2754 | { "BoxDrawing", 0x2500, 0x257F }, | - |
| 2755 | { "BraillePatterns", 0x2800, 0x28FF }, | - |
| 2756 | { "Buginese", 0x1A00, 0x1A1F }, | - |
| 2757 | { "Buhid", 0x1740, 0x175F }, | - |
| 2758 | { "ByzantineMusicalSymbols", 0x1D000, 0x1D0FF }, | - |
| 2759 | { "CJKCompatibility", 0x3300, 0x33FF }, | - |
| 2760 | { "CJKCompatibilityForms", 0xFE30, 0xFE4F }, | - |
| 2761 | { "CJKCompatibilityIdeographs", 0xF900, 0xFAFF }, | - |
| 2762 | { "CJKCompatibilityIdeographsSupplement", 0x2F800, 0x2FA1F }, | - |
| 2763 | { "CJKRadicalsSupplement", 0x2E80, 0x2EFF }, | - |
| 2764 | { "CJKStrokes", 0x31C0, 0x31EF }, | - |
| 2765 | { "CJKSymbolsandPunctuation", 0x3000, 0x303F }, | - |
| 2766 | { "CJKUnifiedIdeographs", 0x4E00, 0x9FFF }, | - |
| 2767 | { "CJKUnifiedIdeographsExtensionA", 0x3400, 0x4DB5 }, | - |
| 2768 | { "CJKUnifiedIdeographsExtensionB", 0x20000, 0x2A6DF }, | - |
| 2769 | { "Cherokee", 0x13A0, 0x13FF }, | - |
| 2770 | { "CombiningDiacriticalMarks", 0x0300, 0x036F }, | - |
| 2771 | { "CombiningDiacriticalMarksSupplement", 0x1DC0, 0x1DFF }, | - |
| 2772 | { "CombiningHalfMarks", 0xFE20, 0xFE2F }, | - |
| 2773 | { "CombiningMarksforSymbols", 0x20D0, 0x20FF }, | - |
| 2774 | { "ControlPictures", 0x2400, 0x243F }, | - |
| 2775 | { "Coptic", 0x2C80, 0x2CFF }, | - |
| 2776 | { "CurrencySymbols", 0x20A0, 0x20CF }, | - |
| 2777 | { "CypriotSyllabary", 0x10800, 0x1083F }, | - |
| 2778 | { "Cyrillic", 0x0400, 0x04FF }, | - |
| 2779 | { "CyrillicSupplement", 0x0500, 0x052F }, | - |
| 2780 | { "Deseret", 0x10400, 0x1044F }, | - |
| 2781 | { "Devanagari", 0x0900, 0x097F }, | - |
| 2782 | { "Dingbats", 0x2700, 0x27BF }, | - |
| 2783 | { "EnclosedAlphanumerics", 0x2460, 0x24FF }, | - |
| 2784 | { "EnclosedCJKLettersandMonths", 0x3200, 0x32FF }, | - |
| 2785 | { "Ethiopic", 0x1200, 0x137F }, | - |
| 2786 | { "EthiopicExtended", 0x2D80, 0x2DDF }, | - |
| 2787 | { "EthiopicSupplement", 0x1380, 0x139F }, | - |
| 2788 | { "GeneralPunctuation", 0x2000, 0x206F }, | - |
| 2789 | { "GeometricShapes", 0x25A0, 0x25FF }, | - |
| 2790 | { "Georgian", 0x10A0, 0x10FF }, | - |
| 2791 | { "GeorgianSupplement", 0x2D00, 0x2D2F }, | - |
| 2792 | { "Glagolitic", 0x2C00, 0x2C5F }, | - |
| 2793 | { "Gothic", 0x10330, 0x1034F }, | - |
| 2794 | { "Greek", 0x0370, 0x03FF }, | - |
| 2795 | { "GreekExtended", 0x1F00, 0x1FFF }, | - |
| 2796 | { "Gujarati", 0x0A80, 0x0AFF }, | - |
| 2797 | { "Gurmukhi", 0x0A00, 0x0A7F }, | - |
| 2798 | { "HalfwidthandFullwidthForms", 0xFF00, 0xFFEF }, | - |
| 2799 | { "HangulCompatibilityJamo", 0x3130, 0x318F }, | - |
| 2800 | { "HangulJamo", 0x1100, 0x11FF }, | - |
| 2801 | { "HangulSyllables", 0xAC00, 0xD7A3 }, | - |
| 2802 | { "Hanunoo", 0x1720, 0x173F }, | - |
| 2803 | { "Hebrew", 0x0590, 0x05FF }, | - |
| 2804 | { "Hiragana", 0x3040, 0x309F }, | - |
| 2805 | { "IPAExtensions", 0x0250, 0x02AF }, | - |
| 2806 | { "IdeographicDescriptionCharacters", 0x2FF0, 0x2FFF }, | - |
| 2807 | { "Kanbun", 0x3190, 0x319F }, | - |
| 2808 | { "KangxiRadicals", 0x2F00, 0x2FDF }, | - |
| 2809 | { "Kannada", 0x0C80, 0x0CFF }, | - |
| 2810 | { "Katakana", 0x30A0, 0x30FF }, | - |
| 2811 | { "KatakanaPhoneticExtensions", 0x31F0, 0x31FF }, | - |
| 2812 | { "Kharoshthi", 0x10A00, 0x10A5F }, | - |
| 2813 | { "Khmer", 0x1780, 0x17FF }, | - |
| 2814 | { "KhmerSymbols", 0x19E0, 0x19FF }, | - |
| 2815 | { "Lao", 0x0E80, 0x0EFF }, | - |
| 2816 | { "Latin-1Supplement", 0x0080, 0x00FF }, | - |
| 2817 | { "LatinExtended-A", 0x0100, 0x017F }, | - |
| 2818 | { "LatinExtended-B", 0x0180, 0x024F }, | - |
| 2819 | { "LatinExtendedAdditional", 0x1E00, 0x1EFF }, | - |
| 2820 | { "LetterlikeSymbols", 0x2100, 0x214F }, | - |
| 2821 | { "Limbu", 0x1900, 0x194F }, | - |
| 2822 | { "LinearBIdeograms", 0x10080, 0x100FF }, | - |
| 2823 | { "LinearBSyllabary", 0x10000, 0x1007F }, | - |
| 2824 | { "Malayalam", 0x0D00, 0x0D7F }, | - |
| 2825 | { "MathematicalAlphanumericSymbols", 0x1D400, 0x1D7FF }, | - |
| 2826 | { "MathematicalOperators", 0x2200, 0x22FF }, | - |
| 2827 | { "MiscellaneousMathematicalSymbols-A", 0x27C0, 0x27EF }, | - |
| 2828 | { "MiscellaneousMathematicalSymbols-B", 0x2980, 0x29FF }, | - |
| 2829 | { "MiscellaneousSymbols", 0x2600, 0x26FF }, | - |
| 2830 | { "MiscellaneousSymbolsandArrows", 0x2B00, 0x2BFF }, | - |
| 2831 | { "MiscellaneousTechnical", 0x2300, 0x23FF }, | - |
| 2832 | { "ModifierToneLetters", 0xA700, 0xA71F }, | - |
| 2833 | { "Mongolian", 0x1800, 0x18AF }, | - |
| 2834 | { "MusicalSymbols", 0x1D100, 0x1D1FF }, | - |
| 2835 | { "Myanmar", 0x1000, 0x109F }, | - |
| 2836 | { "NewTaiLue", 0x1980, 0x19DF }, | - |
| 2837 | { "NumberForms", 0x2150, 0x218F }, | - |
| 2838 | { "Ogham", 0x1680, 0x169F }, | - |
| 2839 | { "OldItalic", 0x10300, 0x1032F }, | - |
| 2840 | { "OldPersian", 0x103A0, 0x103DF }, | - |
| 2841 | { "OpticalCharacterRecognition", 0x2440, 0x245F }, | - |
| 2842 | { "Oriya", 0x0B00, 0x0B7F }, | - |
| 2843 | { "Osmanya", 0x10480, 0x104AF }, | - |
| 2844 | { "PhoneticExtensions", 0x1D00, 0x1D7F }, | - |
| 2845 | { "PhoneticExtensionsSupplement", 0x1D80, 0x1DBF }, | - |
| 2846 | { "PrivateUse", 0xE000, 0xF8FF }, | - |
| 2847 | { "Runic", 0x16A0, 0x16FF }, | - |
| 2848 | { "Shavian", 0x10450, 0x1047F }, | - |
| 2849 | { "Sinhala", 0x0D80, 0x0DFF }, | - |
| 2850 | { "SmallFormVariants", 0xFE50, 0xFE6F }, | - |
| 2851 | { "SpacingModifierLetters", 0x02B0, 0x02FF }, | - |
| 2852 | { "Specials", 0xFFF0, 0xFFFF }, | - |
| 2853 | { "SuperscriptsandSubscripts", 0x2070, 0x209F }, | - |
| 2854 | { "SupplementalArrows-A", 0x27F0, 0x27FF }, | - |
| 2855 | { "SupplementalArrows-B", 0x2900, 0x297F }, | - |
| 2856 | { "SupplementalMathematicalOperators", 0x2A00, 0x2AFF }, | - |
| 2857 | { "SupplementalPunctuation", 0x2E00, 0x2E7F }, | - |
| 2858 | { "SupplementaryPrivateUseArea-A", 0xF0000, 0xFFFFF }, | - |
| 2859 | { "SupplementaryPrivateUseArea-B", 0x100000, 0x10FFFF }, | - |
| 2860 | { "SylotiNagri", 0xA800, 0xA82F }, | - |
| 2861 | { "Syriac", 0x0700, 0x074F }, | - |
| 2862 | { "Tagalog", 0x1700, 0x171F }, | - |
| 2863 | { "Tagbanwa", 0x1760, 0x177F }, | - |
| 2864 | { "Tags", 0xE0000, 0xE007F }, | - |
| 2865 | { "TaiLe", 0x1950, 0x197F }, | - |
| 2866 | { "TaiXuanJingSymbols", 0x1D300, 0x1D35F }, | - |
| 2867 | { "Tamil", 0x0B80, 0x0BFF }, | - |
| 2868 | { "Telugu", 0x0C00, 0x0C7F }, | - |
| 2869 | { "Thaana", 0x0780, 0x07BF }, | - |
| 2870 | { "Thai", 0x0E00, 0x0E7F }, | - |
| 2871 | { "Tibetan", 0x0F00, 0x0FFF }, | - |
| 2872 | { "Tifinagh", 0x2D30, 0x2D7F }, | - |
| 2873 | { "Ugaritic", 0x10380, 0x1039F }, | - |
| 2874 | { "UnifiedCanadianAboriginalSyllabics", 0x1400, 0x167F }, | - |
| 2875 | { "VariationSelectors", 0xFE00, 0xFE0F }, | - |
| 2876 | { "VariationSelectorsSupplement", 0xE0100, 0xE01EF }, | - |
| 2877 | { "VerticalForms", 0xFE10, 0xFE1F }, | - |
| 2878 | { "YiRadicals", 0xA490, 0xA4CF }, | - |
| 2879 | { "YiSyllables", 0xA000, 0xA48F }, | - |
| 2880 | { "YijingHexagramSymbols", 0x4DC0, 0x4DFF } | - |
| 2881 | }; | - |
| 2882 | - | |
| 2883 | inline bool operator<(const CategoriesRangeMapEntry &entry1, const CategoriesRangeMapEntry &entry2) | - |
| 2884 | { return qstrcmp(entry1.name, entry2.name) < 0; } | - |
| 2885 | inline bool operator<(const char *name, const CategoriesRangeMapEntry &entry) | - |
| 2886 | { return qstrcmp(name, entry.name) < 0; } | - |
| 2887 | inline bool operator<(const CategoriesRangeMapEntry &entry, const char *name) | - |
| 2888 | { return qstrcmp(entry.name, name) < 0; } | - |
| 2889 | #endif // QT_NO_REGEXP_CCLASS | - |
| 2890 | - | |
| 2891 | int QRegExpEngine::getChar() | - |
| 2892 | { | - |
| 2893 | return (yyPos == yyLen) ? EOS : yyIn[yyPos++].unicode(); | - |
| 2894 | } | - |
| 2895 | - | |
| 2896 | int QRegExpEngine::getEscape() | - |
| 2897 | { | - |
| 2898 | #ifndef QT_NO_REGEXP_ESCAPE | - |
| 2899 | const char tab[] = "afnrtv"; // no b, as \b means word boundary | - |
| 2900 | const char backTab[] = "\a\f\n\r\t\v"; | - |
| 2901 | ushort low; | - |
| 2902 | int i; | - |
| 2903 | #endif | - |
| 2904 | ushort val; | - |
| 2905 | int prevCh = yyCh; | - |
| 2906 | - | |
| 2907 | if (prevCh == EOS) { | - |
| 2908 | error(RXERR_END); | - |
| 2909 | return Tok_Char | '\\'; | - |
| 2910 | } | - |
| 2911 | yyCh = getChar(); | - |
| 2912 | #ifndef QT_NO_REGEXP_ESCAPE | - |
| 2913 | if ((prevCh & ~0xff) == 0) { | - |
| 2914 | const char *p = strchr(tab, prevCh); | - |
| 2915 | if (p != 0) | - |
| 2916 | return Tok_Char | backTab[p - tab]; | - |
| 2917 | } | - |
| 2918 | #endif | - |
| 2919 | - | |
| 2920 | switch (prevCh) { | - |
| 2921 | #ifndef QT_NO_REGEXP_ESCAPE | - |
| 2922 | case '0': | - |
| 2923 | val = 0; | - |
| 2924 | for (i = 0; i < 3; i++) { | - |
| 2925 | if (yyCh >= '0' && yyCh <= '7') | - |
| 2926 | val = (val << 3) | (yyCh - '0'); | - |
| 2927 | else | - |
| 2928 | break; | - |
| 2929 | yyCh = getChar(); | - |
| 2930 | } | - |
| 2931 | if ((val & ~0377) != 0) | - |
| 2932 | error(RXERR_OCTAL); | - |
| 2933 | return Tok_Char | val; | - |
| 2934 | #endif | - |
| 2935 | #ifndef QT_NO_REGEXP_ESCAPE | - |
| 2936 | case 'B': | - |
| 2937 | return Tok_NonWord; | - |
| 2938 | #endif | - |
| 2939 | #ifndef QT_NO_REGEXP_CCLASS | - |
| 2940 | case 'D': | - |
| 2941 | // see QChar::isDigit() | - |
| 2942 | yyCharClass->addCategories(uint(-1) ^ FLAG(QChar::Number_DecimalDigit)); | - |
| 2943 | return Tok_CharClass; | - |
| 2944 | case 'S': | - |
| 2945 | // see QChar::isSpace() | - |
| 2946 | yyCharClass->addCategories(uint(-1) ^ (FLAG(QChar::Separator_Space) | | - |
| 2947 | FLAG(QChar::Separator_Line) | | - |
| 2948 | FLAG(QChar::Separator_Paragraph) | | - |
| 2949 | FLAG(QChar::Other_Control))); | - |
| 2950 | yyCharClass->addRange(0x0000, 0x0008); | - |
| 2951 | yyCharClass->addRange(0x000e, 0x001f); | - |
| 2952 | yyCharClass->addRange(0x007f, 0x0084); | - |
| 2953 | yyCharClass->addRange(0x0086, 0x009f); | - |
| 2954 | return Tok_CharClass; | - |
| 2955 | case 'W': | - |
| 2956 | // see QChar::isLetterOrNumber() and QChar::isMark() | - |
| 2957 | yyCharClass->addCategories(uint(-1) ^ (FLAG(QChar::Mark_NonSpacing) | | - |
| 2958 | FLAG(QChar::Mark_SpacingCombining) | | - |
| 2959 | FLAG(QChar::Mark_Enclosing) | | - |
| 2960 | FLAG(QChar::Number_DecimalDigit) | | - |
| 2961 | FLAG(QChar::Number_Letter) | | - |
| 2962 | FLAG(QChar::Number_Other) | | - |
| 2963 | FLAG(QChar::Letter_Uppercase) | | - |
| 2964 | FLAG(QChar::Letter_Lowercase) | | - |
| 2965 | FLAG(QChar::Letter_Titlecase) | | - |
| 2966 | FLAG(QChar::Letter_Modifier) | | - |
| 2967 | FLAG(QChar::Letter_Other) | | - |
| 2968 | FLAG(QChar::Punctuation_Connector))); | - |
| 2969 | yyCharClass->addRange(0x203f, 0x2040); | - |
| 2970 | yyCharClass->addSingleton(0x2040); | - |
| 2971 | yyCharClass->addSingleton(0x2054); | - |
| 2972 | yyCharClass->addSingleton(0x30fb); | - |
| 2973 | yyCharClass->addRange(0xfe33, 0xfe34); | - |
| 2974 | yyCharClass->addRange(0xfe4d, 0xfe4f); | - |
| 2975 | yyCharClass->addSingleton(0xff3f); | - |
| 2976 | yyCharClass->addSingleton(0xff65); | - |
| 2977 | return Tok_CharClass; | - |
| 2978 | #endif | - |
| 2979 | #ifndef QT_NO_REGEXP_ESCAPE | - |
| 2980 | case 'b': | - |
| 2981 | return Tok_Word; | - |
| 2982 | #endif | - |
| 2983 | #ifndef QT_NO_REGEXP_CCLASS | - |
| 2984 | case 'd': | - |
| 2985 | // see QChar::isDigit() | - |
| 2986 | yyCharClass->addCategories(FLAG(QChar::Number_DecimalDigit)); | - |
| 2987 | return Tok_CharClass; | - |
| 2988 | case 's': | - |
| 2989 | // see QChar::isSpace() | - |
| 2990 | yyCharClass->addCategories(FLAG(QChar::Separator_Space) | | - |
| 2991 | FLAG(QChar::Separator_Line) | | - |
| 2992 | FLAG(QChar::Separator_Paragraph)); | - |
| 2993 | yyCharClass->addRange(0x0009, 0x000d); | - |
| 2994 | yyCharClass->addSingleton(0x0085); | - |
| 2995 | return Tok_CharClass; | - |
| 2996 | case 'w': | - |
| 2997 | // see QChar::isLetterOrNumber() and QChar::isMark() | - |
| 2998 | yyCharClass->addCategories(FLAG(QChar::Mark_NonSpacing) | | - |
| 2999 | FLAG(QChar::Mark_SpacingCombining) | | - |
| 3000 | FLAG(QChar::Mark_Enclosing) | | - |
| 3001 | FLAG(QChar::Number_DecimalDigit) | | - |
| 3002 | FLAG(QChar::Number_Letter) | | - |
| 3003 | FLAG(QChar::Number_Other) | | - |
| 3004 | FLAG(QChar::Letter_Uppercase) | | - |
| 3005 | FLAG(QChar::Letter_Lowercase) | | - |
| 3006 | FLAG(QChar::Letter_Titlecase) | | - |
| 3007 | FLAG(QChar::Letter_Modifier) | | - |
| 3008 | FLAG(QChar::Letter_Other)); | - |
| 3009 | yyCharClass->addSingleton(0x005f); // '_' | - |
| 3010 | return Tok_CharClass; | - |
| 3011 | case 'I': | - |
| 3012 | if (xmlSchemaExtensions) { | - |
| 3013 | yyCharClass->setNegative(!yyCharClass->negative()); | - |
| 3014 | // fall through | - |
| 3015 | } else { | - |
| 3016 | break; | - |
| 3017 | } | - |
| 3018 | case 'i': | - |
| 3019 | if (xmlSchemaExtensions) { | - |
| 3020 | yyCharClass->addCategories(FLAG(QChar::Mark_NonSpacing) | | - |
| 3021 | FLAG(QChar::Mark_SpacingCombining) | | - |
| 3022 | FLAG(QChar::Mark_Enclosing) | | - |
| 3023 | FLAG(QChar::Number_DecimalDigit) | | - |
| 3024 | FLAG(QChar::Number_Letter) | | - |
| 3025 | FLAG(QChar::Number_Other) | | - |
| 3026 | FLAG(QChar::Letter_Uppercase) | | - |
| 3027 | FLAG(QChar::Letter_Lowercase) | | - |
| 3028 | FLAG(QChar::Letter_Titlecase) | | - |
| 3029 | FLAG(QChar::Letter_Modifier) | | - |
| 3030 | FLAG(QChar::Letter_Other)); | - |
| 3031 | yyCharClass->addSingleton(0x003a); // ':' | - |
| 3032 | yyCharClass->addSingleton(0x005f); // '_' | - |
| 3033 | yyCharClass->addRange(0x0041, 0x005a); // [A-Z] | - |
| 3034 | yyCharClass->addRange(0x0061, 0x007a); // [a-z] | - |
| 3035 | yyCharClass->addRange(0xc0, 0xd6); | - |
| 3036 | yyCharClass->addRange(0xd8, 0xf6); | - |
| 3037 | yyCharClass->addRange(0xf8, 0x2ff); | - |
| 3038 | yyCharClass->addRange(0x370, 0x37d); | - |
| 3039 | yyCharClass->addRange(0x37f, 0x1fff); | - |
| 3040 | yyCharClass->addRange(0x200c, 0x200d); | - |
| 3041 | yyCharClass->addRange(0x2070, 0x218f); | - |
| 3042 | yyCharClass->addRange(0x2c00, 0x2fef); | - |
| 3043 | yyCharClass->addRange(0x3001, 0xd7ff); | - |
| 3044 | yyCharClass->addRange(0xf900, 0xfdcf); | - |
| 3045 | yyCharClass->addRange(0xfdf0, 0xfffd); | - |
| 3046 | yyCharClass->addRange((ushort)0x10000, (ushort)0xeffff); | - |
| 3047 | return Tok_CharClass; | - |
| 3048 | } else { | - |
| 3049 | break; | - |
| 3050 | } | - |
| 3051 | case 'C': | - |
| 3052 | if (xmlSchemaExtensions) { | - |
| 3053 | yyCharClass->setNegative(!yyCharClass->negative()); | - |
| 3054 | // fall through | - |
| 3055 | } else { | - |
| 3056 | break; | - |
| 3057 | } | - |
| 3058 | case 'c': | - |
| 3059 | if (xmlSchemaExtensions) { | - |
| 3060 | yyCharClass->addCategories(FLAG(QChar::Mark_NonSpacing) | | - |
| 3061 | FLAG(QChar::Mark_SpacingCombining) | | - |
| 3062 | FLAG(QChar::Mark_Enclosing) | | - |
| 3063 | FLAG(QChar::Number_DecimalDigit) | | - |
| 3064 | FLAG(QChar::Number_Letter) | | - |
| 3065 | FLAG(QChar::Number_Other) | | - |
| 3066 | FLAG(QChar::Letter_Uppercase) | | - |
| 3067 | FLAG(QChar::Letter_Lowercase) | | - |
| 3068 | FLAG(QChar::Letter_Titlecase) | | - |
| 3069 | FLAG(QChar::Letter_Modifier) | | - |
| 3070 | FLAG(QChar::Letter_Other)); | - |
| 3071 | yyCharClass->addSingleton(0x002d); // '-' | - |
| 3072 | yyCharClass->addSingleton(0x002e); // '.' | - |
| 3073 | yyCharClass->addSingleton(0x003a); // ':' | - |
| 3074 | yyCharClass->addSingleton(0x005f); // '_' | - |
| 3075 | yyCharClass->addSingleton(0xb7); | - |
| 3076 | yyCharClass->addRange(0x0030, 0x0039); // [0-9] | - |
| 3077 | yyCharClass->addRange(0x0041, 0x005a); // [A-Z] | - |
| 3078 | yyCharClass->addRange(0x0061, 0x007a); // [a-z] | - |
| 3079 | yyCharClass->addRange(0xc0, 0xd6); | - |
| 3080 | yyCharClass->addRange(0xd8, 0xf6); | - |
| 3081 | yyCharClass->addRange(0xf8, 0x2ff); | - |
| 3082 | yyCharClass->addRange(0x370, 0x37d); | - |
| 3083 | yyCharClass->addRange(0x37f, 0x1fff); | - |
| 3084 | yyCharClass->addRange(0x200c, 0x200d); | - |
| 3085 | yyCharClass->addRange(0x2070, 0x218f); | - |
| 3086 | yyCharClass->addRange(0x2c00, 0x2fef); | - |
| 3087 | yyCharClass->addRange(0x3001, 0xd7ff); | - |
| 3088 | yyCharClass->addRange(0xf900, 0xfdcf); | - |
| 3089 | yyCharClass->addRange(0xfdf0, 0xfffd); | - |
| 3090 | yyCharClass->addRange((ushort)0x10000, (ushort)0xeffff); | - |
| 3091 | yyCharClass->addRange(0x0300, 0x036f); | - |
| 3092 | yyCharClass->addRange(0x203f, 0x2040); | - |
| 3093 | return Tok_CharClass; | - |
| 3094 | } else { | - |
| 3095 | break; | - |
| 3096 | } | - |
| 3097 | case 'P': | - |
| 3098 | if (xmlSchemaExtensions) { | - |
| 3099 | yyCharClass->setNegative(!yyCharClass->negative()); | - |
| 3100 | // fall through | - |
| 3101 | } else { | - |
| 3102 | break; | - |
| 3103 | } | - |
| 3104 | case 'p': | - |
| 3105 | if (xmlSchemaExtensions) { | - |
| 3106 | if (yyCh != '{') { | - |
| 3107 | error(RXERR_CHARCLASS); | - |
| 3108 | return Tok_CharClass; | - |
| 3109 | } | - |
| 3110 | - | |
| 3111 | QByteArray category; | - |
| 3112 | yyCh = getChar(); | - |
| 3113 | while (yyCh != '}') { | - |
| 3114 | if (yyCh == EOS) { | - |
| 3115 | error(RXERR_END); | - |
| 3116 | return Tok_CharClass; | - |
| 3117 | } | - |
| 3118 | category.append(yyCh); | - |
| 3119 | yyCh = getChar(); | - |
| 3120 | } | - |
| 3121 | yyCh = getChar(); // skip closing '}' | - |
| 3122 | - | |
| 3123 | int catlen = category.length(); | - |
| 3124 | if (catlen == 1 || catlen == 2) { | - |
| 3125 | switch (category.at(0)) { | - |
| 3126 | case 'M': | - |
| 3127 | if (catlen == 1) { | - |
| 3128 | yyCharClass->addCategories(FLAG(QChar::Mark_NonSpacing) | | - |
| 3129 | FLAG(QChar::Mark_SpacingCombining) | | - |
| 3130 | FLAG(QChar::Mark_Enclosing)); | - |
| 3131 | } else { | - |
| 3132 | switch (category.at(1)) { | - |
| 3133 | case 'n': yyCharClass->addCategories(FLAG(QChar::Mark_NonSpacing)); break; // Mn | - |
| 3134 | case 'c': yyCharClass->addCategories(FLAG(QChar::Mark_SpacingCombining)); break; // Mc | - |
| 3135 | case 'e': yyCharClass->addCategories(FLAG(QChar::Mark_Enclosing)); break; // Me | - |
| 3136 | default: error(RXERR_CATEGORY); break; | - |
| 3137 | } | - |
| 3138 | } | - |
| 3139 | break; | - |
| 3140 | case 'N': | - |
| 3141 | if (catlen == 1) { | - |
| 3142 | yyCharClass->addCategories(FLAG(QChar::Number_DecimalDigit) | | - |
| 3143 | FLAG(QChar::Number_Letter) | | - |
| 3144 | FLAG(QChar::Number_Other)); | - |
| 3145 | } else { | - |
| 3146 | switch (category.at(1)) { | - |
| 3147 | case 'd': yyCharClass->addCategories(FLAG(QChar::Number_DecimalDigit)); break; // Nd | - |
| 3148 | case 'l': yyCharClass->addCategories(FLAG(QChar::Number_Letter)); break; // Hl | - |
| 3149 | case 'o': yyCharClass->addCategories(FLAG(QChar::Number_Other)); break; // No | - |
| 3150 | default: error(RXERR_CATEGORY); break; | - |
| 3151 | } | - |
| 3152 | } | - |
| 3153 | break; | - |
| 3154 | case 'Z': | - |
| 3155 | if (catlen == 1) { | - |
| 3156 | yyCharClass->addCategories(FLAG(QChar::Separator_Space) | | - |
| 3157 | FLAG(QChar::Separator_Line) | | - |
| 3158 | FLAG(QChar::Separator_Paragraph)); | - |
| 3159 | } else { | - |
| 3160 | switch (category.at(1)) { | - |
| 3161 | case 's': yyCharClass->addCategories(FLAG(QChar::Separator_Space)); break; // Zs | - |
| 3162 | case 'l': yyCharClass->addCategories(FLAG(QChar::Separator_Line)); break; // Zl | - |
| 3163 | case 'p': yyCharClass->addCategories(FLAG(QChar::Separator_Paragraph)); break; // Zp | - |
| 3164 | default: error(RXERR_CATEGORY); break; | - |
| 3165 | } | - |
| 3166 | } | - |
| 3167 | break; | - |
| 3168 | case 'C': | - |
| 3169 | if (catlen == 1) { | - |
| 3170 | yyCharClass->addCategories(FLAG(QChar::Other_Control) | | - |
| 3171 | FLAG(QChar::Other_Format) | | - |
| 3172 | FLAG(QChar::Other_Surrogate) | | - |
| 3173 | FLAG(QChar::Other_PrivateUse) | | - |
| 3174 | FLAG(QChar::Other_NotAssigned)); | - |
| 3175 | } else { | - |
| 3176 | switch (category.at(1)) { | - |
| 3177 | case 'c': yyCharClass->addCategories(FLAG(QChar::Other_Control)); break; // Cc | - |
| 3178 | case 'f': yyCharClass->addCategories(FLAG(QChar::Other_Format)); break; // Cf | - |
| 3179 | case 's': yyCharClass->addCategories(FLAG(QChar::Other_Surrogate)); break; // Cs | - |
| 3180 | case 'o': yyCharClass->addCategories(FLAG(QChar::Other_PrivateUse)); break; // Co | - |
| 3181 | case 'n': yyCharClass->addCategories(FLAG(QChar::Other_NotAssigned)); break; // Cn | - |
| 3182 | default: error(RXERR_CATEGORY); break; | - |
| 3183 | } | - |
| 3184 | } | - |
| 3185 | break; | - |
| 3186 | case 'L': | - |
| 3187 | if (catlen == 1) { | - |
| 3188 | yyCharClass->addCategories(FLAG(QChar::Letter_Uppercase) | | - |
| 3189 | FLAG(QChar::Letter_Lowercase) | | - |
| 3190 | FLAG(QChar::Letter_Titlecase) | | - |
| 3191 | FLAG(QChar::Letter_Modifier) | | - |
| 3192 | FLAG(QChar::Letter_Other)); | - |
| 3193 | } else { | - |
| 3194 | switch (category.at(1)) { | - |
| 3195 | case 'u': yyCharClass->addCategories(FLAG(QChar::Letter_Uppercase)); break; // Lu | - |
| 3196 | case 'l': yyCharClass->addCategories(FLAG(QChar::Letter_Lowercase)); break; // Ll | - |
| 3197 | case 't': yyCharClass->addCategories(FLAG(QChar::Letter_Titlecase)); break; // Lt | - |
| 3198 | case 'm': yyCharClass->addCategories(FLAG(QChar::Letter_Modifier)); break; // Lm | - |
| 3199 | case 'o': yyCharClass->addCategories(FLAG(QChar::Letter_Other)); break; // Lo | - |
| 3200 | default: error(RXERR_CATEGORY); break; | - |
| 3201 | } | - |
| 3202 | } | - |
| 3203 | break; | - |
| 3204 | case 'P': | - |
| 3205 | if (catlen == 1) { | - |
| 3206 | yyCharClass->addCategories(FLAG(QChar::Punctuation_Connector) | | - |
| 3207 | FLAG(QChar::Punctuation_Dash) | | - |
| 3208 | FLAG(QChar::Punctuation_Open) | | - |
| 3209 | FLAG(QChar::Punctuation_Close) | | - |
| 3210 | FLAG(QChar::Punctuation_InitialQuote) | | - |
| 3211 | FLAG(QChar::Punctuation_FinalQuote) | | - |
| 3212 | FLAG(QChar::Punctuation_Other)); | - |
| 3213 | } else { | - |
| 3214 | switch (category.at(1)) { | - |
| 3215 | case 'c': yyCharClass->addCategories(FLAG(QChar::Punctuation_Connector)); break; // Pc | - |
| 3216 | case 'd': yyCharClass->addCategories(FLAG(QChar::Punctuation_Dash)); break; // Pd | - |
| 3217 | case 's': yyCharClass->addCategories(FLAG(QChar::Punctuation_Open)); break; // Ps | - |
| 3218 | case 'e': yyCharClass->addCategories(FLAG(QChar::Punctuation_Close)); break; // Pe | - |
| 3219 | case 'i': yyCharClass->addCategories(FLAG(QChar::Punctuation_InitialQuote)); break; // Pi | - |
| 3220 | case 'f': yyCharClass->addCategories(FLAG(QChar::Punctuation_FinalQuote)); break; // Pf | - |
| 3221 | case 'o': yyCharClass->addCategories(FLAG(QChar::Punctuation_Other)); break; // Po | - |
| 3222 | default: error(RXERR_CATEGORY); break; | - |
| 3223 | } | - |
| 3224 | } | - |
| 3225 | break; | - |
| 3226 | case 'S': | - |
| 3227 | if (catlen == 1) { | - |
| 3228 | yyCharClass->addCategories(FLAG(QChar::Symbol_Math) | | - |
| 3229 | FLAG(QChar::Symbol_Currency) | | - |
| 3230 | FLAG(QChar::Symbol_Modifier) | | - |
| 3231 | FLAG(QChar::Symbol_Other)); | - |
| 3232 | } else { | - |
| 3233 | switch (category.at(1)) { | - |
| 3234 | case 'm': yyCharClass->addCategories(FLAG(QChar::Symbol_Math)); break; // Sm | - |
| 3235 | case 'c': yyCharClass->addCategories(FLAG(QChar::Symbol_Currency)); break; // Sc | - |
| 3236 | case 'k': yyCharClass->addCategories(FLAG(QChar::Symbol_Modifier)); break; // Sk | - |
| 3237 | case 'o': yyCharClass->addCategories(FLAG(QChar::Symbol_Other)); break; // So | - |
| 3238 | default: error(RXERR_CATEGORY); break; | - |
| 3239 | } | - |
| 3240 | } | - |
| 3241 | break; | - |
| 3242 | default: | - |
| 3243 | error(RXERR_CATEGORY); | - |
| 3244 | break; | - |
| 3245 | } | - |
| 3246 | } else if (catlen > 2 && category.at(0) == 'I' && category.at(1) == 's') { | - |
| 3247 | static const int N = sizeof(categoriesRangeMap) / sizeof(categoriesRangeMap[0]); | - |
| 3248 | const char * const categoryFamily = category.constData() + 2; | - |
| 3249 | const CategoriesRangeMapEntry *r = std::lower_bound(categoriesRangeMap, categoriesRangeMap + N, categoryFamily); | - |
| 3250 | if (r != categoriesRangeMap + N && qstrcmp(r->name, categoryFamily) == 0) | - |
| 3251 | yyCharClass->addRange(r->first, r->second); | - |
| 3252 | else | - |
| 3253 | error(RXERR_CATEGORY); | - |
| 3254 | } else { | - |
| 3255 | error(RXERR_CATEGORY); | - |
| 3256 | } | - |
| 3257 | return Tok_CharClass; | - |
| 3258 | } else { | - |
| 3259 | break; | - |
| 3260 | } | - |
| 3261 | #endif | - |
| 3262 | #ifndef QT_NO_REGEXP_ESCAPE | - |
| 3263 | case 'x': | - |
| 3264 | val = 0; | - |
| 3265 | for (i = 0; i < 4; i++) { | - |
| 3266 | low = QChar(yyCh).toLower().unicode(); | - |
| 3267 | if (low >= '0' && low <= '9') | - |
| 3268 | val = (val << 4) | (low - '0'); | - |
| 3269 | else if (low >= 'a' && low <= 'f') | - |
| 3270 | val = (val << 4) | (low - 'a' + 10); | - |
| 3271 | else | - |
| 3272 | break; | - |
| 3273 | yyCh = getChar(); | - |
| 3274 | } | - |
| 3275 | return Tok_Char | val; | - |
| 3276 | #endif | - |
| 3277 | default: | - |
| 3278 | break; | - |
| 3279 | } | - |
| 3280 | if (prevCh >= '1' && prevCh <= '9') { | - |
| 3281 | #ifndef QT_NO_REGEXP_BACKREF | - |
| 3282 | val = prevCh - '0'; | - |
| 3283 | while (yyCh >= '0' && yyCh <= '9') { | - |
| 3284 | val = (val * 10) + (yyCh - '0'); | - |
| 3285 | yyCh = getChar(); | - |
| 3286 | } | - |
| 3287 | return Tok_BackRef | val; | - |
| 3288 | #else | - |
| 3289 | error(RXERR_DISABLED); | - |
| 3290 | #endif | - |
| 3291 | } | - |
| 3292 | return Tok_Char | prevCh; | - |
| 3293 | } | - |
| 3294 | - | |
| 3295 | #ifndef QT_NO_REGEXP_INTERVAL | - |
| 3296 | int QRegExpEngine::getRep(int def) | - |
| 3297 | { | - |
| 3298 | if (yyCh >= '0' && yyCh <= '9') { | - |
| 3299 | int rep = 0; | - |
| 3300 | do { | - |
| 3301 | rep = 10 * rep + yyCh - '0'; | - |
| 3302 | if (rep >= InftyRep) { | - |
| 3303 | error(RXERR_REPETITION); | - |
| 3304 | rep = def; | - |
| 3305 | } | - |
| 3306 | yyCh = getChar(); | - |
| 3307 | } while (yyCh >= '0' && yyCh <= '9'); | - |
| 3308 | return rep; | - |
| 3309 | } else { | - |
| 3310 | return def; | - |
| 3311 | } | - |
| 3312 | } | - |
| 3313 | #endif | - |
| 3314 | - | |
| 3315 | #ifndef QT_NO_REGEXP_LOOKAHEAD | - |
| 3316 | void QRegExpEngine::skipChars(int n) | - |
| 3317 | { | - |
| 3318 | if (n > 0) { | - |
| 3319 | yyPos += n - 1; | - |
| 3320 | yyCh = getChar(); | - |
| 3321 | } | - |
| 3322 | } | - |
| 3323 | #endif | - |
| 3324 | - | |
| 3325 | void QRegExpEngine::error(const char *msg) | - |
| 3326 | { | - |
| 3327 | if (yyError.isEmpty()) | - |
| 3328 | yyError = QLatin1String(msg); | - |
| 3329 | } | - |
| 3330 | - | |
| 3331 | void QRegExpEngine::startTokenizer(const QChar *rx, int len) | - |
| 3332 | { | - |
| 3333 | yyIn = rx; | - |
| 3334 | yyPos0 = 0; | - |
| 3335 | yyPos = 0; | - |
| 3336 | yyLen = len; | - |
| 3337 | yyCh = getChar(); | - |
| 3338 | yyCharClass.reset(new QRegExpCharClass); | - |
| 3339 | yyMinRep = 0; | - |
| 3340 | yyMaxRep = 0; | - |
| 3341 | yyError = QString(); | - |
| 3342 | } | - |
| 3343 | - | |
| 3344 | int QRegExpEngine::getToken() | - |
| 3345 | { | - |
| 3346 | #ifndef QT_NO_REGEXP_CCLASS | - |
| 3347 | ushort pendingCh = 0; | - |
| 3348 | bool charPending; | - |
| 3349 | bool rangePending; | - |
| 3350 | int tok; | - |
| 3351 | #endif | - |
| 3352 | int prevCh = yyCh; | - |
| 3353 | - | |
| 3354 | yyPos0 = yyPos - 1; | - |
| 3355 | #ifndef QT_NO_REGEXP_CCLASS | - |
| 3356 | yyCharClass->clear(); | - |
| 3357 | #endif | - |
| 3358 | yyMinRep = 0; | - |
| 3359 | yyMaxRep = 0; | - |
| 3360 | yyCh = getChar(); | - |
| 3361 | - | |
| 3362 | switch (prevCh) { | - |
| 3363 | case EOS: | - |
| 3364 | yyPos0 = yyPos; | - |
| 3365 | return Tok_Eos; | - |
| 3366 | case '$': | - |
| 3367 | return Tok_Dollar; | - |
| 3368 | case '(': | - |
| 3369 | if (yyCh == '?') { | - |
| 3370 | prevCh = getChar(); | - |
| 3371 | yyCh = getChar(); | - |
| 3372 | switch (prevCh) { | - |
| 3373 | #ifndef QT_NO_REGEXP_LOOKAHEAD | - |
| 3374 | case '!': | - |
| 3375 | return Tok_NegLookahead; | - |
| 3376 | case '=': | - |
| 3377 | return Tok_PosLookahead; | - |
| 3378 | #endif | - |
| 3379 | case ':': | - |
| 3380 | return Tok_MagicLeftParen; | - |
| 3381 | case '<': | - |
| 3382 | error(RXERR_LOOKBEHIND); | - |
| 3383 | return Tok_MagicLeftParen; | - |
| 3384 | default: | - |
| 3385 | error(RXERR_LOOKAHEAD); | - |
| 3386 | return Tok_MagicLeftParen; | - |
| 3387 | } | - |
| 3388 | } else { | - |
| 3389 | return Tok_LeftParen; | - |
| 3390 | } | - |
| 3391 | case ')': | - |
| 3392 | return Tok_RightParen; | - |
| 3393 | case '*': | - |
| 3394 | yyMinRep = 0; | - |
| 3395 | yyMaxRep = InftyRep; | - |
| 3396 | return Tok_Quantifier; | - |
| 3397 | case '+': | - |
| 3398 | yyMinRep = 1; | - |
| 3399 | yyMaxRep = InftyRep; | - |
| 3400 | return Tok_Quantifier; | - |
| 3401 | case '.': | - |
| 3402 | #ifndef QT_NO_REGEXP_CCLASS | - |
| 3403 | yyCharClass->setNegative(true); | - |
| 3404 | #endif | - |
| 3405 | return Tok_CharClass; | - |
| 3406 | case '?': | - |
| 3407 | yyMinRep = 0; | - |
| 3408 | yyMaxRep = 1; | - |
| 3409 | return Tok_Quantifier; | - |
| 3410 | case '[': | - |
| 3411 | #ifndef QT_NO_REGEXP_CCLASS | - |
| 3412 | if (yyCh == '^') { | - |
| 3413 | yyCharClass->setNegative(true); | - |
| 3414 | yyCh = getChar(); | - |
| 3415 | } | - |
| 3416 | charPending = false; | - |
| 3417 | rangePending = false; | - |
| 3418 | do { | - |
| 3419 | if (yyCh == '-' && charPending && !rangePending) { | - |
| 3420 | rangePending = true; | - |
| 3421 | yyCh = getChar(); | - |
| 3422 | } else { | - |
| 3423 | if (charPending && !rangePending) { | - |
| 3424 | yyCharClass->addSingleton(pendingCh); | - |
| 3425 | charPending = false; | - |
| 3426 | } | - |
| 3427 | if (yyCh == '\\') { | - |
| 3428 | yyCh = getChar(); | - |
| 3429 | tok = getEscape(); | - |
| 3430 | if (tok == Tok_Word) | - |
| 3431 | tok = '\b'; | - |
| 3432 | } else { | - |
| 3433 | tok = Tok_Char | yyCh; | - |
| 3434 | yyCh = getChar(); | - |
| 3435 | } | - |
| 3436 | if (tok == Tok_CharClass) { | - |
| 3437 | if (rangePending) { | - |
| 3438 | yyCharClass->addSingleton('-'); | - |
| 3439 | yyCharClass->addSingleton(pendingCh); | - |
| 3440 | charPending = false; | - |
| 3441 | rangePending = false; | - |
| 3442 | } | - |
| 3443 | } else if ((tok & Tok_Char) != 0) { | - |
| 3444 | if (rangePending) { | - |
| 3445 | yyCharClass->addRange(pendingCh, tok ^ Tok_Char); | - |
| 3446 | charPending = false; | - |
| 3447 | rangePending = false; | - |
| 3448 | } else { | - |
| 3449 | pendingCh = tok ^ Tok_Char; | - |
| 3450 | charPending = true; | - |
| 3451 | } | - |
| 3452 | } else { | - |
| 3453 | error(RXERR_CHARCLASS); | - |
| 3454 | } | - |
| 3455 | } | - |
| 3456 | } while (yyCh != ']' && yyCh != EOS); | - |
| 3457 | if (rangePending) | - |
| 3458 | yyCharClass->addSingleton('-'); | - |
| 3459 | if (charPending) | - |
| 3460 | yyCharClass->addSingleton(pendingCh); | - |
| 3461 | if (yyCh == EOS) | - |
| 3462 | error(RXERR_END); | - |
| 3463 | else | - |
| 3464 | yyCh = getChar(); | - |
| 3465 | return Tok_CharClass; | - |
| 3466 | #else | - |
| 3467 | error(RXERR_END); | - |
| 3468 | return Tok_Char | '['; | - |
| 3469 | #endif | - |
| 3470 | case '\\': | - |
| 3471 | return getEscape(); | - |
| 3472 | case ']': | - |
| 3473 | error(RXERR_LEFTDELIM); | - |
| 3474 | return Tok_Char | ']'; | - |
| 3475 | case '^': | - |
| 3476 | return Tok_Caret; | - |
| 3477 | case '{': | - |
| 3478 | #ifndef QT_NO_REGEXP_INTERVAL | - |
| 3479 | yyMinRep = getRep(0); | - |
| 3480 | yyMaxRep = yyMinRep; | - |
| 3481 | if (yyCh == ',') { | - |
| 3482 | yyCh = getChar(); | - |
| 3483 | yyMaxRep = getRep(InftyRep); | - |
| 3484 | } | - |
| 3485 | if (yyMaxRep < yyMinRep) | - |
| 3486 | error(RXERR_INTERVAL); | - |
| 3487 | if (yyCh != '}') | - |
| 3488 | error(RXERR_REPETITION); | - |
| 3489 | yyCh = getChar(); | - |
| 3490 | return Tok_Quantifier; | - |
| 3491 | #else | - |
| 3492 | error(RXERR_DISABLED); | - |
| 3493 | return Tok_Char | '{'; | - |
| 3494 | #endif | - |
| 3495 | case '|': | - |
| 3496 | return Tok_Bar; | - |
| 3497 | case '}': | - |
| 3498 | error(RXERR_LEFTDELIM); | - |
| 3499 | return Tok_Char | '}'; | - |
| 3500 | default: | - |
| 3501 | return Tok_Char | prevCh; | - |
| 3502 | } | - |
| 3503 | } | - |
| 3504 | - | |
| 3505 | int QRegExpEngine::parse(const QChar *pattern, int len) | - |
| 3506 | { | - |
| 3507 | valid = true; | - |
| 3508 | startTokenizer(pattern, len); | - |
| 3509 | yyTok = getToken(); | - |
| 3510 | #ifndef QT_NO_REGEXP_CAPTURE | - |
| 3511 | yyMayCapture = true; | - |
| 3512 | #else | - |
| 3513 | yyMayCapture = false; | - |
| 3514 | #endif | - |
| 3515 | - | |
| 3516 | #ifndef QT_NO_REGEXP_CAPTURE | - |
| 3517 | int atom = startAtom(false); | - |
| 3518 | #endif | - |
| 3519 | QRegExpCharClass anything; | - |
| 3520 | Box box(this); // create InitialState | - |
| 3521 | box.set(anything); | - |
| 3522 | Box rightBox(this); // create FinalState | - |
| 3523 | rightBox.set(anything); | - |
| 3524 | - | |
| 3525 | Box middleBox(this); | - |
| 3526 | parseExpression(&middleBox); | - |
| 3527 | #ifndef QT_NO_REGEXP_CAPTURE | - |
| 3528 | finishAtom(atom, false); | - |
| 3529 | #endif | - |
| 3530 | #ifndef QT_NO_REGEXP_OPTIM | - |
| 3531 | middleBox.setupHeuristics(); | - |
| 3532 | #endif | - |
| 3533 | box.cat(middleBox); | - |
| 3534 | box.cat(rightBox); | - |
| 3535 | yyCharClass.reset(0); | - |
| 3536 | - | |
| 3537 | #ifndef QT_NO_REGEXP_CAPTURE | - |
| 3538 | for (int i = 0; i < nf; ++i) { | - |
| 3539 | switch (f[i].capture) { | - |
| 3540 | case QRegExpAtom::NoCapture: | - |
| 3541 | break; | - |
| 3542 | case QRegExpAtom::OfficialCapture: | - |
| 3543 | f[i].capture = ncap; | - |
| 3544 | captureForOfficialCapture.append(ncap); | - |
| 3545 | ++ncap; | - |
| 3546 | ++officialncap; | - |
| 3547 | break; | - |
| 3548 | case QRegExpAtom::UnofficialCapture: | - |
| 3549 | f[i].capture = greedyQuantifiers ? ncap++ : QRegExpAtom::NoCapture; | - |
| 3550 | } | - |
| 3551 | } | - |
| 3552 | - | |
| 3553 | #ifndef QT_NO_REGEXP_BACKREF | - |
| 3554 | #ifndef QT_NO_REGEXP_OPTIM | - |
| 3555 | if (officialncap == 0 && nbrefs == 0) { | - |
| 3556 | ncap = nf = 0; | - |
| 3557 | f.clear(); | - |
| 3558 | } | - |
| 3559 | #endif | - |
| 3560 | // handle the case where there's a \5 with no corresponding capture | - |
| 3561 | // (captureForOfficialCapture.size() != officialncap) | - |
| 3562 | for (int i = 0; i < nbrefs - officialncap; ++i) { | - |
| 3563 | captureForOfficialCapture.append(ncap); | - |
| 3564 | ++ncap; | - |
| 3565 | } | - |
| 3566 | #endif | - |
| 3567 | #endif | - |
| 3568 | - | |
| 3569 | if (!yyError.isEmpty()) | - |
| 3570 | return -1; | - |
| 3571 | - | |
| 3572 | #ifndef QT_NO_REGEXP_OPTIM | - |
| 3573 | const QRegExpAutomatonState &sinit = s.at(InitialState); | - |
| 3574 | caretAnchored = !sinit.anchors.isEmpty(); | - |
| 3575 | if (caretAnchored) { | - |
| 3576 | const QMap<int, int> &anchors = sinit.anchors; | - |
| 3577 | QMap<int, int>::const_iterator a; | - |
| 3578 | for (a = anchors.constBegin(); a != anchors.constEnd(); ++a) { | - |
| 3579 | if ( | - |
| 3580 | #ifndef QT_NO_REGEXP_ANCHOR_ALT | - |
| 3581 | (*a & Anchor_Alternation) != 0 || | - |
| 3582 | #endif | - |
| 3583 | (*a & Anchor_Caret) == 0) | - |
| 3584 | { | - |
| 3585 | caretAnchored = false; | - |
| 3586 | break; | - |
| 3587 | } | - |
| 3588 | } | - |
| 3589 | } | - |
| 3590 | #endif | - |
| 3591 | - | |
| 3592 | // cleanup anchors | - |
| 3593 | int numStates = s.count(); | - |
| 3594 | for (int i = 0; i < numStates; ++i) { | - |
| 3595 | QRegExpAutomatonState &state = s[i]; | - |
| 3596 | if (!state.anchors.isEmpty()) { | - |
| 3597 | QMap<int, int>::iterator a = state.anchors.begin(); | - |
| 3598 | while (a != state.anchors.end()) { | - |
| 3599 | if (a.value() == 0) | - |
| 3600 | a = state.anchors.erase(a); | - |
| 3601 | else | - |
| 3602 | ++a; | - |
| 3603 | } | - |
| 3604 | } | - |
| 3605 | } | - |
| 3606 | - | |
| 3607 | return yyPos0; | - |
| 3608 | } | - |
| 3609 | - | |
| 3610 | void QRegExpEngine::parseAtom(Box *box) | - |
| 3611 | { | - |
| 3612 | #ifndef QT_NO_REGEXP_LOOKAHEAD | - |
| 3613 | QRegExpEngine *eng = 0; | - |
| 3614 | bool neg; | - |
| 3615 | int len; | - |
| 3616 | #endif | - |
| 3617 | - | |
| 3618 | if ((yyTok & Tok_Char) != 0) { | - |
| 3619 | box->set(QChar(yyTok ^ Tok_Char)); | - |
| 3620 | } else { | - |
| 3621 | #ifndef QT_NO_REGEXP_OPTIM | - |
| 3622 | trivial = false; | - |
| 3623 | #endif | - |
| 3624 | switch (yyTok) { | - |
| 3625 | case Tok_Dollar: | - |
| 3626 | box->catAnchor(Anchor_Dollar); | - |
| 3627 | break; | - |
| 3628 | case Tok_Caret: | - |
| 3629 | box->catAnchor(Anchor_Caret); | - |
| 3630 | break; | - |
| 3631 | #ifndef QT_NO_REGEXP_LOOKAHEAD | - |
| 3632 | case Tok_PosLookahead: | - |
| 3633 | case Tok_NegLookahead: | - |
| 3634 | neg = (yyTok == Tok_NegLookahead); | - |
| 3635 | eng = new QRegExpEngine(cs, greedyQuantifiers); | - |
| 3636 | len = eng->parse(yyIn + yyPos - 1, yyLen - yyPos + 1); | - |
| 3637 | if (len >= 0) | - |
| 3638 | skipChars(len); | - |
| 3639 | else | - |
| 3640 | error(RXERR_LOOKAHEAD); | - |
| 3641 | box->catAnchor(addLookahead(eng, neg)); | - |
| 3642 | yyTok = getToken(); | - |
| 3643 | if (yyTok != Tok_RightParen) | - |
| 3644 | error(RXERR_LOOKAHEAD); | - |
| 3645 | break; | - |
| 3646 | #endif | - |
| 3647 | #ifndef QT_NO_REGEXP_ESCAPE | - |
| 3648 | case Tok_Word: | - |
| 3649 | box->catAnchor(Anchor_Word); | - |
| 3650 | break; | - |
| 3651 | case Tok_NonWord: | - |
| 3652 | box->catAnchor(Anchor_NonWord); | - |
| 3653 | break; | - |
| 3654 | #endif | - |
| 3655 | case Tok_LeftParen: | - |
| 3656 | case Tok_MagicLeftParen: | - |
| 3657 | yyTok = getToken(); | - |
| 3658 | parseExpression(box); | - |
| 3659 | if (yyTok != Tok_RightParen) | - |
| 3660 | error(RXERR_END); | - |
| 3661 | break; | - |
| 3662 | case Tok_CharClass: | - |
| 3663 | box->set(*yyCharClass); | - |
| 3664 | break; | - |
| 3665 | case Tok_Quantifier: | - |
| 3666 | error(RXERR_REPETITION); | - |
| 3667 | break; | - |
| 3668 | default: | - |
| 3669 | #ifndef QT_NO_REGEXP_BACKREF | - |
| 3670 | if ((yyTok & Tok_BackRef) != 0) | - |
| 3671 | box->set(yyTok ^ Tok_BackRef); | - |
| 3672 | else | - |
| 3673 | #endif | - |
| 3674 | error(RXERR_DISABLED); | - |
| 3675 | } | - |
| 3676 | } | - |
| 3677 | yyTok = getToken(); | - |
| 3678 | } | - |
| 3679 | - | |
| 3680 | void QRegExpEngine::parseFactor(Box *box) | - |
| 3681 | { | - |
| 3682 | #ifndef QT_NO_REGEXP_CAPTURE | - |
| 3683 | int outerAtom = greedyQuantifiers ? startAtom(false) : -1; | - |
| 3684 | int innerAtom = startAtom(yyMayCapture && yyTok == Tok_LeftParen); | - |
| 3685 | bool magicLeftParen = (yyTok == Tok_MagicLeftParen); | - |
| 3686 | #else | - |
| 3687 | const int innerAtom = -1; | - |
| 3688 | #endif | - |
| 3689 | - | |
| 3690 | #ifndef QT_NO_REGEXP_INTERVAL | - |
| 3691 | #define YYREDO() \ | - |
| 3692 | yyIn = in, yyPos0 = pos0, yyPos = pos, yyLen = len, yyCh = ch, \ | - |
| 3693 | *yyCharClass = charClass, yyMinRep = 0, yyMaxRep = 0, yyTok = tok | - |
| 3694 | - | |
| 3695 | const QChar *in = yyIn; | - |
| 3696 | int pos0 = yyPos0; | - |
| 3697 | int pos = yyPos; | - |
| 3698 | int len = yyLen; | - |
| 3699 | int ch = yyCh; | - |
| 3700 | QRegExpCharClass charClass; | - |
| 3701 | if (yyTok == Tok_CharClass) | - |
| 3702 | charClass = *yyCharClass; | - |
| 3703 | int tok = yyTok; | - |
| 3704 | bool mayCapture = yyMayCapture; | - |
| 3705 | #endif | - |
| 3706 | - | |
| 3707 | parseAtom(box); | - |
| 3708 | #ifndef QT_NO_REGEXP_CAPTURE | - |
| 3709 | finishAtom(innerAtom, magicLeftParen); | - |
| 3710 | #endif | - |
| 3711 | - | |
| 3712 | bool hasQuantifier = (yyTok == Tok_Quantifier); | - |
| 3713 | if (hasQuantifier) { | - |
| 3714 | #ifndef QT_NO_REGEXP_OPTIM | - |
| 3715 | trivial = false; | - |
| 3716 | #endif | - |
| 3717 | if (yyMaxRep == InftyRep) { | - |
| 3718 | box->plus(innerAtom); | - |
| 3719 | #ifndef QT_NO_REGEXP_INTERVAL | - |
| 3720 | } else if (yyMaxRep == 0) { | - |
| 3721 | box->clear(); | - |
| 3722 | #endif | - |
| 3723 | } | - |
| 3724 | if (yyMinRep == 0) | - |
| 3725 | box->opt(); | - |
| 3726 | - | |
| 3727 | #ifndef QT_NO_REGEXP_INTERVAL | - |
| 3728 | yyMayCapture = false; | - |
| 3729 | int alpha = (yyMinRep == 0) ? 0 : yyMinRep - 1; | - |
| 3730 | int beta = (yyMaxRep == InftyRep) ? 0 : yyMaxRep - (alpha + 1); | - |
| 3731 | - | |
| 3732 | Box rightBox(this); | - |
| 3733 | int i; | - |
| 3734 | - | |
| 3735 | for (i = 0; i < beta; i++) { | - |
| 3736 | YYREDO(); | - |
| 3737 | Box leftBox(this); | - |
| 3738 | parseAtom(&leftBox); | - |
| 3739 | leftBox.cat(rightBox); | - |
| 3740 | leftBox.opt(); | - |
| 3741 | rightBox = leftBox; | - |
| 3742 | } | - |
| 3743 | for (i = 0; i < alpha; i++) { | - |
| 3744 | YYREDO(); | - |
| 3745 | Box leftBox(this); | - |
| 3746 | parseAtom(&leftBox); | - |
| 3747 | leftBox.cat(rightBox); | - |
| 3748 | rightBox = leftBox; | - |
| 3749 | } | - |
| 3750 | rightBox.cat(*box); | - |
| 3751 | *box = rightBox; | - |
| 3752 | #endif | - |
| 3753 | yyTok = getToken(); | - |
| 3754 | #ifndef QT_NO_REGEXP_INTERVAL | - |
| 3755 | yyMayCapture = mayCapture; | - |
| 3756 | #endif | - |
| 3757 | } | - |
| 3758 | #undef YYREDO | - |
| 3759 | #ifndef QT_NO_REGEXP_CAPTURE | - |
| 3760 | if (greedyQuantifiers) | - |
| 3761 | finishAtom(outerAtom, hasQuantifier); | - |
| 3762 | #endif | - |
| 3763 | } | - |
| 3764 | - | |
| 3765 | void QRegExpEngine::parseTerm(Box *box) | - |
| 3766 | { | - |
| 3767 | #ifndef QT_NO_REGEXP_OPTIM | - |
| 3768 | if (yyTok != Tok_Eos && yyTok != Tok_RightParen && yyTok != Tok_Bar) | - |
| 3769 | parseFactor(box); | - |
| 3770 | #endif | - |
| 3771 | while (yyTok != Tok_Eos && yyTok != Tok_RightParen && yyTok != Tok_Bar) { | - |
| 3772 | Box rightBox(this); | - |
| 3773 | parseFactor(&rightBox); | - |
| 3774 | box->cat(rightBox); | - |
| 3775 | } | - |
| 3776 | } | - |
| 3777 | - | |
| 3778 | void QRegExpEngine::parseExpression(Box *box) | - |
| 3779 | { | - |
| 3780 | parseTerm(box); | - |
| 3781 | while (yyTok == Tok_Bar) { | - |
| 3782 | #ifndef QT_NO_REGEXP_OPTIM | - |
| 3783 | trivial = false; | - |
| 3784 | #endif | - |
| 3785 | Box rightBox(this); | - |
| 3786 | yyTok = getToken(); | - |
| 3787 | parseTerm(&rightBox); | - |
| 3788 | box->orx(rightBox); | - |
| 3789 | } | - |
| 3790 | } | - |
| 3791 | - | |
| 3792 | /* | - |
| 3793 | The struct QRegExpPrivate contains the private data of a regular | - |
| 3794 | expression other than the automaton. It makes it possible for many | - |
| 3795 | QRegExp objects to use the same QRegExpEngine object with different | - |
| 3796 | QRegExpPrivate objects. | - |
| 3797 | */ | - |
| 3798 | struct QRegExpPrivate | - |
| 3799 | { | - |
| 3800 | QRegExpEngine *eng; | - |
| 3801 | QRegExpEngineKey engineKey; | - |
| 3802 | bool minimal; | - |
| 3803 | #ifndef QT_NO_REGEXP_CAPTURE | - |
| 3804 | QString t; // last string passed to QRegExp::indexIn() or lastIndexIn() | - |
| 3805 | QStringList capturedCache; // what QRegExp::capturedTexts() returned last | - |
| 3806 | #endif | - |
| 3807 | QRegExpMatchState matchState; | - |
| 3808 | - | |
| 3809 | inline QRegExpPrivate() | - |
| 3810 | : eng(0), engineKey(QString(), QRegExp::RegExp, Qt::CaseSensitive), minimal(false) { } | - |
| 3811 | inline QRegExpPrivate(const QRegExpEngineKey &key) | - |
| 3812 | : eng(0), engineKey(key), minimal(false) {} | - |
| 3813 | }; | - |
| 3814 | - | |
| 3815 | #if !defined(QT_NO_REGEXP_OPTIM) | - |
| 3816 | typedef QCache<QRegExpEngineKey, QRegExpEngine> EngineCache; | - |
| 3817 | Q_GLOBAL_STATIC(EngineCache, globalEngineCache) | - |
| 3818 | static QBasicMutex globalEngineCacheMutex; | - |
| 3819 | #endif // QT_NO_REGEXP_OPTIM | - |
| 3820 | - | |
| 3821 | static void derefEngine(QRegExpEngine *eng, const QRegExpEngineKey &key) | - |
| 3822 | { | - |
| 3823 | if (!eng->ref.deref()) { | - |
| 3824 | #if !defined(QT_NO_REGEXP_OPTIM) | - |
| 3825 | if (globalEngineCache()) { | - |
| 3826 | QMutexLocker locker(&globalEngineCacheMutex); | - |
| 3827 | QT_TRY { | - |
| 3828 | globalEngineCache()->insert(key, eng, 4 + key.pattern.length() / 4); | - |
| 3829 | } QT_CATCH(const std::bad_alloc &) { | - |
| 3830 | // in case of an exception (e.g. oom), just delete the engine | - |
| 3831 | delete eng; | - |
| 3832 | } | - |
| 3833 | } else { | - |
| 3834 | delete eng; | - |
| 3835 | } | - |
| 3836 | #else | - |
| 3837 | Q_UNUSED(key); | - |
| 3838 | delete eng; | - |
| 3839 | #endif | - |
| 3840 | } | - |
| 3841 | } | - |
| 3842 | - | |
| 3843 | static void prepareEngine_helper(QRegExpPrivate *priv) | - |
| 3844 | { | - |
| 3845 | bool initMatchState = !priv->eng; | - |
| 3846 | #if !defined(QT_NO_REGEXP_OPTIM) | - |
| 3847 | if (!priv->eng && globalEngineCache()) { | - |
| 3848 | QMutexLocker locker(&globalEngineCacheMutex); | - |
| 3849 | priv->eng = globalEngineCache()->take(priv->engineKey); | - |
| 3850 | if (priv->eng != 0) | - |
| 3851 | priv->eng->ref.ref(); | - |
| 3852 | } | - |
| 3853 | #endif // QT_NO_REGEXP_OPTIM | - |
| 3854 | - | |
| 3855 | if (!priv->eng) | - |
| 3856 | priv->eng = new QRegExpEngine(priv->engineKey); | - |
| 3857 | - | |
| 3858 | if (initMatchState) | - |
| 3859 | priv->matchState.prepareForMatch(priv->eng); | - |
| 3860 | } | - |
| 3861 | - | |
| 3862 | inline static void prepareEngine(QRegExpPrivate *priv) | - |
| 3863 | { | - |
| 3864 | if (priv->eng) | - |
| 3865 | return; | - |
| 3866 | prepareEngine_helper(priv); | - |
| 3867 | } | - |
| 3868 | - | |
| 3869 | static void prepareEngineForMatch(QRegExpPrivate *priv, const QString &str) | - |
| 3870 | { | - |
| 3871 | prepareEngine(priv); | - |
| 3872 | priv->matchState.prepareForMatch(priv->eng); | - |
| 3873 | #ifndef QT_NO_REGEXP_CAPTURE | - |
| 3874 | priv->t = str; | - |
| 3875 | priv->capturedCache.clear(); | - |
| 3876 | #else | - |
| 3877 | Q_UNUSED(str); | - |
| 3878 | #endif | - |
| 3879 | } | - |
| 3880 | - | |
| 3881 | static void invalidateEngine(QRegExpPrivate *priv) | - |
| 3882 | { | - |
| 3883 | if (priv->eng != 0) { | - |
| 3884 | derefEngine(priv->eng, priv->engineKey); | - |
| 3885 | priv->eng = 0; | - |
| 3886 | priv->matchState.drain(); | - |
| 3887 | } | - |
| 3888 | } | - |
| 3889 | - | |
| 3890 | /*! | - |
| 3891 | \enum QRegExp::CaretMode | - |
| 3892 | - | |
| 3893 | The CaretMode enum defines the different meanings of the caret | - |
| 3894 | (\b{^}) in a regular expression. The possible values are: | - |
| 3895 | - | |
| 3896 | \value CaretAtZero | - |
| 3897 | The caret corresponds to index 0 in the searched string. | - |
| 3898 | - | |
| 3899 | \value CaretAtOffset | - |
| 3900 | The caret corresponds to the start offset of the search. | - |
| 3901 | - | |
| 3902 | \value CaretWontMatch | - |
| 3903 | The caret never matches. | - |
| 3904 | */ | - |
| 3905 | - | |
| 3906 | /*! | - |
| 3907 | \enum QRegExp::PatternSyntax | - |
| 3908 | - | |
| 3909 | The syntax used to interpret the meaning of the pattern. | - |
| 3910 | - | |
| 3911 | \value RegExp A rich Perl-like pattern matching syntax. This is | - |
| 3912 | the default. | - |
| 3913 | - | |
| 3914 | \value RegExp2 Like RegExp, but with \l{greedy quantifiers}. | - |
| 3915 | (Introduced in Qt 4.2.) | - |
| 3916 | - | |
| 3917 | \value Wildcard This provides a simple pattern matching syntax | - |
| 3918 | similar to that used by shells (command interpreters) for "file | - |
| 3919 | globbing". See \l{QRegExp wildcard matching}. | - |
| 3920 | - | |
| 3921 | \value WildcardUnix This is similar to Wildcard but with the | - |
| 3922 | behavior of a Unix shell. The wildcard characters can be escaped | - |
| 3923 | with the character "\\". | - |
| 3924 | - | |
| 3925 | \value FixedString The pattern is a fixed string. This is | - |
| 3926 | equivalent to using the RegExp pattern on a string in | - |
| 3927 | which all metacharacters are escaped using escape(). | - |
| 3928 | - | |
| 3929 | \value W3CXmlSchema11 The pattern is a regular expression as | - |
| 3930 | defined by the W3C XML Schema 1.1 specification. | - |
| 3931 | - | |
| 3932 | \sa setPatternSyntax() | - |
| 3933 | */ | - |
| 3934 | - | |
| 3935 | /*! | - |
| 3936 | Constructs an empty regexp. | - |
| 3937 | - | |
| 3938 | \sa isValid(), errorString() | - |
| 3939 | */ | - |
| 3940 | QRegExp::QRegExp() | - |
| 3941 | { | - |
| 3942 | priv = new QRegExpPrivate; | - |
| 3943 | prepareEngine(priv); | - |
| 3944 | } | - |
| 3945 | - | |
| 3946 | /*! | - |
| 3947 | Constructs a regular expression object for the given \a pattern | - |
| 3948 | string. The pattern must be given using wildcard notation if \a | - |
| 3949 | syntax is \l Wildcard; the default is \l RegExp. The pattern is | - |
| 3950 | case sensitive, unless \a cs is Qt::CaseInsensitive. Matching is | - |
| 3951 | greedy (maximal), but can be changed by calling | - |
| 3952 | setMinimal(). | - |
| 3953 | - | |
| 3954 | \sa setPattern(), setCaseSensitivity(), setPatternSyntax() | - |
| 3955 | */ | - |
| 3956 | QRegExp::QRegExp(const QString &pattern, Qt::CaseSensitivity cs, PatternSyntax syntax) | - |
| 3957 | { | - |
| 3958 | priv = new QRegExpPrivate(QRegExpEngineKey(pattern, syntax, cs)); | - |
| 3959 | prepareEngine(priv); | - |
| 3960 | } | - |
| 3961 | - | |
| 3962 | /*! | - |
| 3963 | Constructs a regular expression as a copy of \a rx. | - |
| 3964 | - | |
| 3965 | \sa operator=() | - |
| 3966 | */ | - |
| 3967 | QRegExp::QRegExp(const QRegExp &rx) | - |
| 3968 | { | - |
| 3969 | priv = new QRegExpPrivate; | - |
| 3970 | operator=(rx); | - |
| 3971 | } | - |
| 3972 | - | |
| 3973 | /*! | - |
| 3974 | Destroys the regular expression and cleans up its internal data. | - |
| 3975 | */ | - |
| 3976 | QRegExp::~QRegExp() | - |
| 3977 | { | - |
| 3978 | invalidateEngine(priv); | - |
| 3979 | delete priv; | - |
| 3980 | } | - |
| 3981 | - | |
| 3982 | /*! | - |
| 3983 | Copies the regular expression \a rx and returns a reference to the | - |
| 3984 | copy. The case sensitivity, wildcard, and minimal matching options | - |
| 3985 | are also copied. | - |
| 3986 | */ | - |
| 3987 | QRegExp &QRegExp::operator=(const QRegExp &rx) | - |
| 3988 | { | - |
| 3989 | prepareEngine(rx.priv); // to allow sharing | - |
| 3990 | QRegExpEngine *otherEng = rx.priv->eng; | - |
| 3991 | if (otherEng) | - |
| 3992 | otherEng->ref.ref(); | - |
| 3993 | invalidateEngine(priv); | - |
| 3994 | priv->eng = otherEng; | - |
| 3995 | priv->engineKey = rx.priv->engineKey; | - |
| 3996 | priv->minimal = rx.priv->minimal; | - |
| 3997 | #ifndef QT_NO_REGEXP_CAPTURE | - |
| 3998 | priv->t = rx.priv->t; | - |
| 3999 | priv->capturedCache = rx.priv->capturedCache; | - |
| 4000 | #endif | - |
| 4001 | if (priv->eng) | - |
| 4002 | priv->matchState.prepareForMatch(priv->eng); | - |
| 4003 | priv->matchState.captured = rx.priv->matchState.captured; | - |
| 4004 | return *this; | - |
| 4005 | } | - |
| 4006 | - | |
| 4007 | /*! | - |
| 4008 | \fn QRegExp &QRegExp::operator=(QRegExp &&other) | - |
| 4009 | - | |
| 4010 | Move-assigns \a other to this QRegExp instance. | - |
| 4011 | - | |
| 4012 | \since 5.2 | - |
| 4013 | */ | - |
| 4014 | - | |
| 4015 | /*! | - |
| 4016 | \fn void QRegExp::swap(QRegExp &other) | - |
| 4017 | \since 4.8 | - |
| 4018 | - | |
| 4019 | Swaps regular expression \a other with this regular | - |
| 4020 | expression. This operation is very fast and never fails. | - |
| 4021 | */ | - |
| 4022 | - | |
| 4023 | /*! | - |
| 4024 | Returns \c true if this regular expression is equal to \a rx; | - |
| 4025 | otherwise returns \c false. | - |
| 4026 | - | |
| 4027 | Two QRegExp objects are equal if they have the same pattern | - |
| 4028 | strings and the same settings for case sensitivity, wildcard and | - |
| 4029 | minimal matching. | - |
| 4030 | */ | - |
| 4031 | bool QRegExp::operator==(const QRegExp &rx) const | - |
| 4032 | { | - |
| 4033 | return priv->engineKey == rx.priv->engineKey && priv->minimal == rx.priv->minimal; | - |
| 4034 | } | - |
| 4035 | - | |
| 4036 | /*! | - |
| 4037 | \since 5.6 | - |
| 4038 | \relates QRegExp | - |
| 4039 | - | |
| 4040 | Returns the hash value for \a key, using | - |
| 4041 | \a seed to seed the calculation. | - |
| 4042 | */ | - |
| 4043 | uint qHash(const QRegExp &key, uint seed) Q_DECL_NOTHROW | - |
| 4044 | { | - |
| 4045 | QtPrivate::QHashCombine hash; | - |
| 4046 | seed = hash(seed, key.priv->engineKey); | - |
| 4047 | seed = hash(seed, key.priv->minimal); | - |
| 4048 | return seed; executed 2048 times by 1 test: return seed;Executed by:
| 2048 |
| 4049 | } | - |
| 4050 | - | |
| 4051 | /*! | - |
| 4052 | \fn bool QRegExp::operator!=(const QRegExp &rx) const | - |
| 4053 | - | |
| 4054 | Returns \c true if this regular expression is not equal to \a rx; | - |
| 4055 | otherwise returns \c false. | - |
| 4056 | - | |
| 4057 | \sa operator==() | - |
| 4058 | */ | - |
| 4059 | - | |
| 4060 | /*! | - |
| 4061 | Returns \c true if the pattern string is empty; otherwise returns | - |
| 4062 | false. | - |
| 4063 | - | |
| 4064 | If you call exactMatch() with an empty pattern on an empty string | - |
| 4065 | it will return true; otherwise it returns \c false since it operates | - |
| 4066 | over the whole string. If you call indexIn() with an empty pattern | - |
| 4067 | on \e any string it will return the start offset (0 by default) | - |
| 4068 | because the empty pattern matches the 'emptiness' at the start of | - |
| 4069 | the string. In this case the length of the match returned by | - |
| 4070 | matchedLength() will be 0. | - |
| 4071 | - | |
| 4072 | See QString::isEmpty(). | - |
| 4073 | */ | - |
| 4074 | - | |
| 4075 | bool QRegExp::isEmpty() const | - |
| 4076 | { | - |
| 4077 | return priv->engineKey.pattern.isEmpty(); | - |
| 4078 | } | - |
| 4079 | - | |
| 4080 | /*! | - |
| 4081 | Returns \c true if the regular expression is valid; otherwise returns | - |
| 4082 | false. An invalid regular expression never matches. | - |
| 4083 | - | |
| 4084 | The pattern \b{[a-z} is an example of an invalid pattern, since | - |
| 4085 | it lacks a closing square bracket. | - |
| 4086 | - | |
| 4087 | Note that the validity of a regexp may also depend on the setting | - |
| 4088 | of the wildcard flag, for example \b{*.html} is a valid | - |
| 4089 | wildcard regexp but an invalid full regexp. | - |
| 4090 | - | |
| 4091 | \sa errorString() | - |
| 4092 | */ | - |
| 4093 | bool QRegExp::isValid() const | - |
| 4094 | { | - |
| 4095 | if (priv->engineKey.pattern.isEmpty()) { | - |
| 4096 | return true; | - |
| 4097 | } else { | - |
| 4098 | prepareEngine(priv); | - |
| 4099 | return priv->eng->isValid(); | - |
| 4100 | } | - |
| 4101 | } | - |
| 4102 | - | |
| 4103 | /*! | - |
| 4104 | Returns the pattern string of the regular expression. The pattern | - |
| 4105 | has either regular expression syntax or wildcard syntax, depending | - |
| 4106 | on patternSyntax(). | - |
| 4107 | - | |
| 4108 | \sa patternSyntax(), caseSensitivity() | - |
| 4109 | */ | - |
| 4110 | QString QRegExp::pattern() const | - |
| 4111 | { | - |
| 4112 | return priv->engineKey.pattern; | - |
| 4113 | } | - |
| 4114 | - | |
| 4115 | /*! | - |
| 4116 | Sets the pattern string to \a pattern. The case sensitivity, | - |
| 4117 | wildcard, and minimal matching options are not changed. | - |
| 4118 | - | |
| 4119 | \sa setPatternSyntax(), setCaseSensitivity() | - |
| 4120 | */ | - |
| 4121 | void QRegExp::setPattern(const QString &pattern) | - |
| 4122 | { | - |
| 4123 | if (priv->engineKey.pattern != pattern) { | - |
| 4124 | invalidateEngine(priv); | - |
| 4125 | priv->engineKey.pattern = pattern; | - |
| 4126 | } | - |
| 4127 | } | - |
| 4128 | - | |
| 4129 | /*! | - |
| 4130 | Returns Qt::CaseSensitive if the regexp is matched case | - |
| 4131 | sensitively; otherwise returns Qt::CaseInsensitive. | - |
| 4132 | - | |
| 4133 | \sa patternSyntax(), pattern(), isMinimal() | - |
| 4134 | */ | - |
| 4135 | Qt::CaseSensitivity QRegExp::caseSensitivity() const | - |
| 4136 | { | - |
| 4137 | return priv->engineKey.cs; | - |
| 4138 | } | - |
| 4139 | - | |
| 4140 | /*! | - |
| 4141 | Sets case sensitive matching to \a cs. | - |
| 4142 | - | |
| 4143 | If \a cs is Qt::CaseSensitive, \b{\\.txt$} matches | - |
| 4144 | \c{readme.txt} but not \c{README.TXT}. | - |
| 4145 | - | |
| 4146 | \sa setPatternSyntax(), setPattern(), setMinimal() | - |
| 4147 | */ | - |
| 4148 | void QRegExp::setCaseSensitivity(Qt::CaseSensitivity cs) | - |
| 4149 | { | - |
| 4150 | if ((bool)cs != (bool)priv->engineKey.cs) { | - |
| 4151 | invalidateEngine(priv); | - |
| 4152 | priv->engineKey.cs = cs; | - |
| 4153 | } | - |
| 4154 | } | - |
| 4155 | - | |
| 4156 | /*! | - |
| 4157 | Returns the syntax used by the regular expression. The default is | - |
| 4158 | QRegExp::RegExp. | - |
| 4159 | - | |
| 4160 | \sa pattern(), caseSensitivity() | - |
| 4161 | */ | - |
| 4162 | QRegExp::PatternSyntax QRegExp::patternSyntax() const | - |
| 4163 | { | - |
| 4164 | return priv->engineKey.patternSyntax; | - |
| 4165 | } | - |
| 4166 | - | |
| 4167 | /*! | - |
| 4168 | Sets the syntax mode for the regular expression. The default is | - |
| 4169 | QRegExp::RegExp. | - |
| 4170 | - | |
| 4171 | Setting \a syntax to QRegExp::Wildcard enables simple shell-like | - |
| 4172 | \l{QRegExp wildcard matching}. For example, \b{r*.txt} matches the | - |
| 4173 | string \c{readme.txt} in wildcard mode, but does not match | - |
| 4174 | \c{readme}. | - |
| 4175 | - | |
| 4176 | Setting \a syntax to QRegExp::FixedString means that the pattern | - |
| 4177 | is interpreted as a plain string. Special characters (e.g., | - |
| 4178 | backslash) don't need to be escaped then. | - |
| 4179 | - | |
| 4180 | \sa setPattern(), setCaseSensitivity(), escape() | - |
| 4181 | */ | - |
| 4182 | void QRegExp::setPatternSyntax(PatternSyntax syntax) | - |
| 4183 | { | - |
| 4184 | if (syntax != priv->engineKey.patternSyntax) { | - |
| 4185 | invalidateEngine(priv); | - |
| 4186 | priv->engineKey.patternSyntax = syntax; | - |
| 4187 | } | - |
| 4188 | } | - |
| 4189 | - | |
| 4190 | /*! | - |
| 4191 | Returns \c true if minimal (non-greedy) matching is enabled; | - |
| 4192 | otherwise returns \c false. | - |
| 4193 | - | |
| 4194 | \sa caseSensitivity(), setMinimal() | - |
| 4195 | */ | - |
| 4196 | bool QRegExp::isMinimal() const | - |
| 4197 | { | - |
| 4198 | return priv->minimal; | - |
| 4199 | } | - |
| 4200 | - | |
| 4201 | /*! | - |
| 4202 | Enables or disables minimal matching. If \a minimal is false, | - |
| 4203 | matching is greedy (maximal) which is the default. | - |
| 4204 | - | |
| 4205 | For example, suppose we have the input string "We must be | - |
| 4206 | <b>bold</b>, very <b>bold</b>!" and the pattern | - |
| 4207 | \b{<b>.*</b>}. With the default greedy (maximal) matching, | - |
| 4208 | the match is "We must be \underline{<b>bold</b>, very | - |
| 4209 | <b>bold</b>}!". But with minimal (non-greedy) matching, the | - |
| 4210 | first match is: "We must be \underline{<b>bold</b>}, very | - |
| 4211 | <b>bold</b>!" and the second match is "We must be <b>bold</b>, | - |
| 4212 | very \underline{<b>bold</b>}!". In practice we might use the pattern | - |
| 4213 | \b{<b>[^<]*\</b>} instead, although this will still fail for | - |
| 4214 | nested tags. | - |
| 4215 | - | |
| 4216 | \sa setCaseSensitivity() | - |
| 4217 | */ | - |
| 4218 | void QRegExp::setMinimal(bool minimal) | - |
| 4219 | { | - |
| 4220 | priv->minimal = minimal; | - |
| 4221 | } | - |
| 4222 | - | |
| 4223 | // ### Qt 5: make non-const | - |
| 4224 | /*! | - |
| 4225 | Returns \c true if \a str is matched exactly by this regular | - |
| 4226 | expression; otherwise returns \c false. You can determine how much of | - |
| 4227 | the string was matched by calling matchedLength(). | - |
| 4228 | - | |
| 4229 | For a given regexp string R, exactMatch("R") is the equivalent of | - |
| 4230 | indexIn("^R$") since exactMatch() effectively encloses the regexp | - |
| 4231 | in the start of string and end of string anchors, except that it | - |
| 4232 | sets matchedLength() differently. | - |
| 4233 | - | |
| 4234 | For example, if the regular expression is \b{blue}, then | - |
| 4235 | exactMatch() returns \c true only for input \c blue. For inputs \c | - |
| 4236 | bluebell, \c blutak and \c lightblue, exactMatch() returns \c false | - |
| 4237 | and matchedLength() will return 4, 3 and 0 respectively. | - |
| 4238 | - | |
| 4239 | Although const, this function sets matchedLength(), | - |
| 4240 | capturedTexts(), and pos(). | - |
| 4241 | - | |
| 4242 | \sa indexIn(), lastIndexIn() | - |
| 4243 | */ | - |
| 4244 | bool QRegExp::exactMatch(const QString &str) const | - |
| 4245 | { | - |
| 4246 | prepareEngineForMatch(priv, str); | - |
| 4247 | priv->matchState.match(str.unicode(), str.length(), 0, priv->minimal, true, 0); | - |
| 4248 | if (priv->matchState.captured[1] == str.length()) { | - |
| 4249 | return true; | - |
| 4250 | } else { | - |
| 4251 | priv->matchState.captured[0] = 0; | - |
| 4252 | priv->matchState.captured[1] = priv->matchState.oneTestMatchedLen; | - |
| 4253 | return false; | - |
| 4254 | } | - |
| 4255 | } | - |
| 4256 | - | |
| 4257 | // ### Qt 5: make non-const | - |
| 4258 | /*! | - |
| 4259 | Attempts to find a match in \a str from position \a offset (0 by | - |
| 4260 | default). If \a offset is -1, the search starts at the last | - |
| 4261 | character; if -2, at the next to last character; etc. | - |
| 4262 | - | |
| 4263 | Returns the position of the first match, or -1 if there was no | - |
| 4264 | match. | - |
| 4265 | - | |
| 4266 | The \a caretMode parameter can be used to instruct whether \b{^} | - |
| 4267 | should match at index 0 or at \a offset. | - |
| 4268 | - | |
| 4269 | You might prefer to use QString::indexOf(), QString::contains(), | - |
| 4270 | or even QStringList::filter(). To replace matches use | - |
| 4271 | QString::replace(). | - |
| 4272 | - | |
| 4273 | Example: | - |
| 4274 | \snippet code/src_corelib_tools_qregexp.cpp 13 | - |
| 4275 | - | |
| 4276 | Although const, this function sets matchedLength(), | - |
| 4277 | capturedTexts() and pos(). | - |
| 4278 | - | |
| 4279 | If the QRegExp is a wildcard expression (see setPatternSyntax()) | - |
| 4280 | and want to test a string against the whole wildcard expression, | - |
| 4281 | use exactMatch() instead of this function. | - |
| 4282 | - | |
| 4283 | \sa lastIndexIn(), exactMatch() | - |
| 4284 | */ | - |
| 4285 | - | |
| 4286 | int QRegExp::indexIn(const QString &str, int offset, CaretMode caretMode) const | - |
| 4287 | { | - |
| 4288 | prepareEngineForMatch(priv, str); | - |
| 4289 | if (offset < 0) | - |
| 4290 | offset += str.length(); | - |
| 4291 | priv->matchState.match(str.unicode(), str.length(), offset, | - |
| 4292 | priv->minimal, false, caretIndex(offset, caretMode)); | - |
| 4293 | return priv->matchState.captured[0]; | - |
| 4294 | } | - |
| 4295 | - | |
| 4296 | // ### Qt 5: make non-const | - |
| 4297 | /*! | - |
| 4298 | Attempts to find a match backwards in \a str from position \a | - |
| 4299 | offset. If \a offset is -1 (the default), the search starts at the | - |
| 4300 | last character; if -2, at the next to last character; etc. | - |
| 4301 | - | |
| 4302 | Returns the position of the first match, or -1 if there was no | - |
| 4303 | match. | - |
| 4304 | - | |
| 4305 | The \a caretMode parameter can be used to instruct whether \b{^} | - |
| 4306 | should match at index 0 or at \a offset. | - |
| 4307 | - | |
| 4308 | Although const, this function sets matchedLength(), | - |
| 4309 | capturedTexts() and pos(). | - |
| 4310 | - | |
| 4311 | \warning Searching backwards is much slower than searching | - |
| 4312 | forwards. | - |
| 4313 | - | |
| 4314 | \sa indexIn(), exactMatch() | - |
| 4315 | */ | - |
| 4316 | - | |
| 4317 | int QRegExp::lastIndexIn(const QString &str, int offset, CaretMode caretMode) const | - |
| 4318 | { | - |
| 4319 | prepareEngineForMatch(priv, str); | - |
| 4320 | if (offset < 0) | - |
| 4321 | offset += str.length(); | - |
| 4322 | if (offset < 0 || offset > str.length()) { | - |
| 4323 | memset(priv->matchState.captured, -1, priv->matchState.capturedSize*sizeof(int)); | - |
| 4324 | return -1; | - |
| 4325 | } | - |
| 4326 | - | |
| 4327 | while (offset >= 0) { | - |
| 4328 | priv->matchState.match(str.unicode(), str.length(), offset, | - |
| 4329 | priv->minimal, true, caretIndex(offset, caretMode)); | - |
| 4330 | if (priv->matchState.captured[0] == offset) | - |
| 4331 | return offset; | - |
| 4332 | --offset; | - |
| 4333 | } | - |
| 4334 | return -1; | - |
| 4335 | } | - |
| 4336 | - | |
| 4337 | /*! | - |
| 4338 | Returns the length of the last matched string, or -1 if there was | - |
| 4339 | no match. | - |
| 4340 | - | |
| 4341 | \sa exactMatch(), indexIn(), lastIndexIn() | - |
| 4342 | */ | - |
| 4343 | int QRegExp::matchedLength() const | - |
| 4344 | { | - |
| 4345 | return priv->matchState.captured[1]; | - |
| 4346 | } | - |
| 4347 | - | |
| 4348 | #ifndef QT_NO_REGEXP_CAPTURE | - |
| 4349 | - | |
| 4350 | /*! | - |
| 4351 | \since 4.6 | - |
| 4352 | Returns the number of captures contained in the regular expression. | - |
| 4353 | */ | - |
| 4354 | int QRegExp::captureCount() const | - |
| 4355 | { | - |
| 4356 | prepareEngine(priv); | - |
| 4357 | return priv->eng->captureCount(); | - |
| 4358 | } | - |
| 4359 | - | |
| 4360 | /*! | - |
| 4361 | Returns a list of the captured text strings. | - |
| 4362 | - | |
| 4363 | The first string in the list is the entire matched string. Each | - |
| 4364 | subsequent list element contains a string that matched a | - |
| 4365 | (capturing) subexpression of the regexp. | - |
| 4366 | - | |
| 4367 | For example: | - |
| 4368 | \snippet code/src_corelib_tools_qregexp.cpp 14 | - |
| 4369 | - | |
| 4370 | The above example also captures elements that may be present but | - |
| 4371 | which we have no interest in. This problem can be solved by using | - |
| 4372 | non-capturing parentheses: | - |
| 4373 | - | |
| 4374 | \snippet code/src_corelib_tools_qregexp.cpp 15 | - |
| 4375 | - | |
| 4376 | Note that if you want to iterate over the list, you should iterate | - |
| 4377 | over a copy, e.g. | - |
| 4378 | \snippet code/src_corelib_tools_qregexp.cpp 16 | - |
| 4379 | - | |
| 4380 | Some regexps can match an indeterminate number of times. For | - |
| 4381 | example if the input string is "Offsets: 12 14 99 231 7" and the | - |
| 4382 | regexp, \c{rx}, is \b{(\\d+)+}, we would hope to get a list of | - |
| 4383 | all the numbers matched. However, after calling | - |
| 4384 | \c{rx.indexIn(str)}, capturedTexts() will return the list ("12", | - |
| 4385 | "12"), i.e. the entire match was "12" and the first subexpression | - |
| 4386 | matched was "12". The correct approach is to use cap() in a | - |
| 4387 | \l{QRegExp#cap_in_a_loop}{loop}. | - |
| 4388 | - | |
| 4389 | The order of elements in the string list is as follows. The first | - |
| 4390 | element is the entire matching string. Each subsequent element | - |
| 4391 | corresponds to the next capturing open left parentheses. Thus | - |
| 4392 | capturedTexts()[1] is the text of the first capturing parentheses, | - |
| 4393 | capturedTexts()[2] is the text of the second and so on | - |
| 4394 | (corresponding to $1, $2, etc., in some other regexp languages). | - |
| 4395 | - | |
| 4396 | \sa cap(), pos() | - |
| 4397 | */ | - |
| 4398 | QStringList QRegExp::capturedTexts() const | - |
| 4399 | { | - |
| 4400 | if (priv->capturedCache.isEmpty()) { | - |
| 4401 | prepareEngine(priv); | - |
| 4402 | const int *captured = priv->matchState.captured; | - |
| 4403 | int n = priv->matchState.capturedSize; | - |
| 4404 | - | |
| 4405 | for (int i = 0; i < n; i += 2) { | - |
| 4406 | QString m; | - |
| 4407 | if (captured[i + 1] == 0) | - |
| 4408 | m = QLatin1String(""); // ### Qt 5: don't distinguish between null and empty | - |
| 4409 | else if (captured[i] >= 0) | - |
| 4410 | m = priv->t.mid(captured[i], captured[i + 1]); | - |
| 4411 | priv->capturedCache.append(m); | - |
| 4412 | } | - |
| 4413 | priv->t.clear(); | - |
| 4414 | } | - |
| 4415 | return priv->capturedCache; | - |
| 4416 | } | - |
| 4417 | - | |
| 4418 | /*! | - |
| 4419 | \internal | - |
| 4420 | */ | - |
| 4421 | QStringList QRegExp::capturedTexts() | - |
| 4422 | { | - |
| 4423 | return const_cast<const QRegExp *>(this)->capturedTexts(); | - |
| 4424 | } | - |
| 4425 | - | |
| 4426 | /*! | - |
| 4427 | Returns the text captured by the \a nth subexpression. The entire | - |
| 4428 | match has index 0 and the parenthesized subexpressions have | - |
| 4429 | indexes starting from 1 (excluding non-capturing parentheses). | - |
| 4430 | - | |
| 4431 | \snippet code/src_corelib_tools_qregexp.cpp 17 | - |
| 4432 | - | |
| 4433 | The order of elements matched by cap() is as follows. The first | - |
| 4434 | element, cap(0), is the entire matching string. Each subsequent | - |
| 4435 | element corresponds to the next capturing open left parentheses. | - |
| 4436 | Thus cap(1) is the text of the first capturing parentheses, cap(2) | - |
| 4437 | is the text of the second, and so on. | - |
| 4438 | - | |
| 4439 | \sa capturedTexts(), pos() | - |
| 4440 | */ | - |
| 4441 | QString QRegExp::cap(int nth) const | - |
| 4442 | { | - |
| 4443 | return capturedTexts().value(nth); | - |
| 4444 | } | - |
| 4445 | - | |
| 4446 | /*! | - |
| 4447 | \internal | - |
| 4448 | */ | - |
| 4449 | QString QRegExp::cap(int nth) | - |
| 4450 | { | - |
| 4451 | return const_cast<const QRegExp *>(this)->cap(nth); | - |
| 4452 | } | - |
| 4453 | - | |
| 4454 | /*! | - |
| 4455 | Returns the position of the \a nth captured text in the searched | - |
| 4456 | string. If \a nth is 0 (the default), pos() returns the position | - |
| 4457 | of the whole match. | - |
| 4458 | - | |
| 4459 | Example: | - |
| 4460 | \snippet code/src_corelib_tools_qregexp.cpp 18 | - |
| 4461 | - | |
| 4462 | For zero-length matches, pos() always returns -1. (For example, if | - |
| 4463 | cap(4) would return an empty string, pos(4) returns -1.) This is | - |
| 4464 | a feature of the implementation. | - |
| 4465 | - | |
| 4466 | \sa cap(), capturedTexts() | - |
| 4467 | */ | - |
| 4468 | int QRegExp::pos(int nth) const | - |
| 4469 | { | - |
| 4470 | if (nth < 0 || nth >= priv->matchState.capturedSize / 2) | - |
| 4471 | return -1; | - |
| 4472 | else | - |
| 4473 | return priv->matchState.captured[2 * nth]; | - |
| 4474 | } | - |
| 4475 | - | |
| 4476 | /*! | - |
| 4477 | \internal | - |
| 4478 | */ | - |
| 4479 | int QRegExp::pos(int nth) | - |
| 4480 | { | - |
| 4481 | return const_cast<const QRegExp *>(this)->pos(nth); | - |
| 4482 | } | - |
| 4483 | - | |
| 4484 | /*! | - |
| 4485 | Returns a text string that explains why a regexp pattern is | - |
| 4486 | invalid the case being; otherwise returns "no error occurred". | - |
| 4487 | - | |
| 4488 | \sa isValid() | - |
| 4489 | */ | - |
| 4490 | QString QRegExp::errorString() const | - |
| 4491 | { | - |
| 4492 | if (isValid()) { | - |
| 4493 | return QString::fromLatin1(RXERR_OK); | - |
| 4494 | } else { | - |
| 4495 | return priv->eng->errorString(); | - |
| 4496 | } | - |
| 4497 | } | - |
| 4498 | - | |
| 4499 | /*! | - |
| 4500 | \internal | - |
| 4501 | */ | - |
| 4502 | QString QRegExp::errorString() | - |
| 4503 | { | - |
| 4504 | return const_cast<const QRegExp *>(this)->errorString(); | - |
| 4505 | } | - |
| 4506 | #endif | - |
| 4507 | - | |
| 4508 | /*! | - |
| 4509 | Returns the string \a str with every regexp special character | - |
| 4510 | escaped with a backslash. The special characters are $, (,), *, +, | - |
| 4511 | ., ?, [, \,], ^, {, | and }. | - |
| 4512 | - | |
| 4513 | Example: | - |
| 4514 | - | |
| 4515 | \snippet code/src_corelib_tools_qregexp.cpp 19 | - |
| 4516 | - | |
| 4517 | This function is useful to construct regexp patterns dynamically: | - |
| 4518 | - | |
| 4519 | \snippet code/src_corelib_tools_qregexp.cpp 20 | - |
| 4520 | - | |
| 4521 | \sa setPatternSyntax() | - |
| 4522 | */ | - |
| 4523 | QString QRegExp::escape(const QString &str) | - |
| 4524 | { | - |
| 4525 | QString quoted; | - |
| 4526 | const int count = str.count(); | - |
| 4527 | quoted.reserve(count * 2); | - |
| 4528 | const QLatin1Char backslash('\\'); | - |
| 4529 | for (int i = 0; i < count; i++) { | - |
| 4530 | switch (str.at(i).toLatin1()) { | - |
| 4531 | case '$': | - |
| 4532 | case '(': | - |
| 4533 | case ')': | - |
| 4534 | case '*': | - |
| 4535 | case '+': | - |
| 4536 | case '.': | - |
| 4537 | case '?': | - |
| 4538 | case '[': | - |
| 4539 | case '\\': | - |
| 4540 | case ']': | - |
| 4541 | case '^': | - |
| 4542 | case '{': | - |
| 4543 | case '|': | - |
| 4544 | case '}': | - |
| 4545 | quoted.append(backslash); | - |
| 4546 | } | - |
| 4547 | quoted.append(str.at(i)); | - |
| 4548 | } | - |
| 4549 | return quoted; | - |
| 4550 | } | - |
| 4551 | - | |
| 4552 | - | |
| 4553 | #ifndef QT_NO_DATASTREAM | - |
| 4554 | /*! | - |
| 4555 | \relates QRegExp | - |
| 4556 | - | |
| 4557 | Writes the regular expression \a regExp to stream \a out. | - |
| 4558 | - | |
| 4559 | \sa {Serializing Qt Data Types} | - |
| 4560 | */ | - |
| 4561 | QDataStream &operator<<(QDataStream &out, const QRegExp ®Exp) | - |
| 4562 | { | - |
| 4563 | return out << regExp.pattern() << (quint8)regExp.caseSensitivity() | - |
| 4564 | << (quint8)regExp.patternSyntax() | - |
| 4565 | << (quint8)!!regExp.isMinimal(); | - |
| 4566 | } | - |
| 4567 | - | |
| 4568 | /*! | - |
| 4569 | \relates QRegExp | - |
| 4570 | - | |
| 4571 | Reads a regular expression from stream \a in into \a regExp. | - |
| 4572 | - | |
| 4573 | \sa {Serializing Qt Data Types} | - |
| 4574 | */ | - |
| 4575 | QDataStream &operator>>(QDataStream &in, QRegExp ®Exp) | - |
| 4576 | { | - |
| 4577 | QString pattern; | - |
| 4578 | quint8 cs; | - |
| 4579 | quint8 patternSyntax; | - |
| 4580 | quint8 isMinimal; | - |
| 4581 | - | |
| 4582 | in >> pattern >> cs >> patternSyntax >> isMinimal; | - |
| 4583 | - | |
| 4584 | QRegExp newRegExp(pattern, Qt::CaseSensitivity(cs), | - |
| 4585 | QRegExp::PatternSyntax(patternSyntax)); | - |
| 4586 | - | |
| 4587 | newRegExp.setMinimal(isMinimal); | - |
| 4588 | regExp = newRegExp; | - |
| 4589 | return in; | - |
| 4590 | } | - |
| 4591 | #endif // QT_NO_DATASTREAM | - |
| 4592 | - | |
| 4593 | #ifndef QT_NO_DEBUG_STREAM | - |
| 4594 | QDebug operator<<(QDebug dbg, const QRegExp &r) | - |
| 4595 | { | - |
| 4596 | QDebugStateSaver saver(dbg); | - |
| 4597 | dbg.nospace() << "QRegExp(patternSyntax=" << r.patternSyntax() | - |
| 4598 | << ", pattern='"<< r.pattern() << "')"; | - |
| 4599 | return dbg; | - |
| 4600 | } | - |
| 4601 | #endif | - |
| 4602 | - | |
| 4603 | QT_END_NAMESPACE | - |
| Source code | Switch to Preprocessed file |