Line | Source Code | Coverage |
---|
1 | /************************************************* | - |
2 | * Perl-Compatible Regular Expressions * | - |
3 | *************************************************/ | - |
4 | | - |
5 | /* PCRE is a library of functions to support regular expressions whose syntax | - |
6 | and semantics are as close as possible to those of the Perl 5 language. | - |
7 | | - |
8 | Written by Philip Hazel | - |
9 | Copyright (c) 1997-2012 University of Cambridge | - |
10 | | - |
11 | ----------------------------------------------------------------------------- | - |
12 | Redistribution and use in source and binary forms, with or without | - |
13 | modification, are permitted provided that the following conditions are met: | - |
14 | | - |
15 | * Redistributions of source code must retain the above copyright notice, | - |
16 | this list of conditions and the following disclaimer. | - |
17 | | - |
18 | * Redistributions in binary form must reproduce the above copyright | - |
19 | notice, this list of conditions and the following disclaimer in the | - |
20 | documentation and/or other materials provided with the distribution. | - |
21 | | - |
22 | * Neither the name of the University of Cambridge nor the names of its | - |
23 | contributors may be used to endorse or promote products derived from | - |
24 | this software without specific prior written permission. | - |
25 | | - |
26 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | - |
27 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | - |
28 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | - |
29 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE | - |
30 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | - |
31 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | - |
32 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | - |
33 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | - |
34 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | - |
35 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | - |
36 | POSSIBILITY OF SUCH DAMAGE. | - |
37 | ----------------------------------------------------------------------------- | - |
38 | */ | - |
39 | | - |
40 | | - |
41 | /* This module contains the external function pcre_study(), along with local | - |
42 | supporting functions. */ | - |
43 | | - |
44 | | - |
45 | #ifdef PCRE_HAVE_CONFIG_H | - |
46 | #include "config.h" | - |
47 | #endif | - |
48 | | - |
49 | #include "pcre_internal.h" | - |
50 | | - |
51 | #define SET_BIT(c) start_bits[c/8] |= (1 << (c&7)) | - |
52 | | - |
53 | /* Returns from set_start_bits() */ | - |
54 | | - |
55 | enum { SSB_FAIL, SSB_DONE, SSB_CONTINUE, SSB_UNKNOWN }; | - |
56 | | - |
57 | | - |
58 | | - |
59 | /************************************************* | - |
60 | * Find the minimum subject length for a group * | - |
61 | *************************************************/ | - |
62 | | - |
63 | /* Scan a parenthesized group and compute the minimum length of subject that | - |
64 | is needed to match it. This is a lower bound; it does not mean there is a | - |
65 | string of that length that matches. In UTF8 mode, the result is in characters | - |
66 | rather than bytes. | - |
67 | | - |
68 | Arguments: | - |
69 | code pointer to start of group (the bracket) | - |
70 | startcode pointer to start of the whole pattern | - |
71 | options the compiling options | - |
72 | int RECURSE depth | - |
73 | | - |
74 | Returns: the minimum length | - |
75 | -1 if \C in UTF-8 mode or (*ACCEPT) was encountered | - |
76 | -2 internal error (missing capturing bracket) | - |
77 | -3 internal error (opcode not listed) | - |
78 | */ | - |
79 | | - |
80 | static int | - |
81 | find_minlength(const pcre_uchar *code, const pcre_uchar *startcode, int options, | - |
82 | int recurse_depth) | - |
83 | { | - |
84 | int length = -1; executed (the execution status of this line is deduced): int length = -1; | - |
85 | /* PCRE_UTF16 has the same value as PCRE_UTF8. */ | - |
86 | BOOL utf = (options & PCRE_UTF8) != 0; executed (the execution status of this line is deduced): BOOL utf = (options & 0x00000800) != 0; | - |
87 | BOOL had_recurse = FALSE; executed (the execution status of this line is deduced): BOOL had_recurse = 0; | - |
88 | register int branchlength = 0; executed (the execution status of this line is deduced): register int branchlength = 0; | - |
89 | register pcre_uchar *cc = (pcre_uchar *)code + 1 + LINK_SIZE; executed (the execution status of this line is deduced): register pcre_uchar *cc = (pcre_uchar *)code + 1 + 1; | - |
90 | | - |
91 | if (*code == OP_CBRA || *code == OP_SCBRA || partially evaluated: *code == OP_CBRA no Evaluation Count:0 | yes Evaluation Count:15 |
partially evaluated: *code == OP_SCBRA no Evaluation Count:0 | yes Evaluation Count:15 |
| 0-15 |
92 | *code == OP_CBRAPOS || *code == OP_SCBRAPOS) cc += IMM2_SIZE; never executed: cc += 1; partially evaluated: *code == OP_CBRAPOS no Evaluation Count:0 | yes Evaluation Count:15 |
partially evaluated: *code == OP_SCBRAPOS no Evaluation Count:0 | yes Evaluation Count:15 |
| 0-15 |
93 | | - |
94 | /* Scan along the opcodes for this branch. If we get to the end of the | - |
95 | branch, check the length against that of the other branches. */ | - |
96 | | - |
97 | for (;;) executed (the execution status of this line is deduced): for (;;) | - |
98 | { | - |
99 | int d, min; executed (the execution status of this line is deduced): int d, min; | - |
100 | pcre_uchar *cs, *ce; executed (the execution status of this line is deduced): pcre_uchar *cs, *ce; | - |
101 | register int op = *cc; executed (the execution status of this line is deduced): register int op = *cc; | - |
102 | | - |
103 | switch (op) | - |
104 | { | - |
105 | case OP_COND: | - |
106 | case OP_SCOND: | - |
107 | | - |
108 | /* If there is only one branch in a condition, the implied branch has zero | - |
109 | length, so we don't add anything. This covers the DEFINE "condition" | - |
110 | automatically. */ | - |
111 | | - |
112 | cs = cc + GET(cc, 1); never executed (the execution status of this line is deduced): cs = cc + (cc[1]); | - |
113 | if (*cs != OP_ALT) never evaluated: *cs != OP_ALT | 0 |
114 | { | - |
115 | cc = cs + 1 + LINK_SIZE; never executed (the execution status of this line is deduced): cc = cs + 1 + 1; | - |
116 | break; | 0 |
117 | } | - |
118 | | - |
119 | /* Otherwise we can fall through and treat it the same as any other | - |
120 | subpattern. */ | - |
121 | | - |
122 | case OP_CBRA: code before this statement never executed: case OP_CBRA: | 0 |
123 | case OP_SCBRA: | - |
124 | case OP_BRA: | - |
125 | case OP_SBRA: | - |
126 | case OP_CBRAPOS: | - |
127 | case OP_SCBRAPOS: | - |
128 | case OP_BRAPOS: | - |
129 | case OP_SBRAPOS: | - |
130 | case OP_ONCE: | - |
131 | case OP_ONCE_NC: | - |
132 | d = find_minlength(cc, startcode, options, recurse_depth); never executed (the execution status of this line is deduced): d = find_minlength(cc, startcode, options, recurse_depth); | - |
133 | if (d < 0) return d; never executed: return d; never evaluated: d < 0 | 0 |
134 | branchlength += d; never executed (the execution status of this line is deduced): branchlength += d; | - |
135 | do cc += GET(cc, 1); while (*cc == OP_ALT); never executed: cc += (cc[1]); never evaluated: *cc == OP_ALT | 0 |
136 | cc += 1 + LINK_SIZE; never executed (the execution status of this line is deduced): cc += 1 + 1; | - |
137 | break; | 0 |
138 | | - |
139 | /* ACCEPT makes things far too complicated; we have to give up. */ | - |
140 | | - |
141 | case OP_ACCEPT: | - |
142 | case OP_ASSERT_ACCEPT: | - |
143 | return -1; never executed: return -1; | 0 |
144 | | - |
145 | /* Reached end of a branch; if it's a ket it is the end of a nested | - |
146 | call. If it's ALT it is an alternation in a nested call. If it is END it's | - |
147 | the end of the outer call. All can be handled by the same code. If an | - |
148 | ACCEPT was previously encountered, use the length that was in force at that | - |
149 | time, and pass back the shortest ACCEPT length. */ | - |
150 | | - |
151 | case OP_ALT: | - |
152 | case OP_KET: | - |
153 | case OP_KETRMAX: | - |
154 | case OP_KETRMIN: | - |
155 | case OP_KETRPOS: | - |
156 | case OP_END: | - |
157 | if (length < 0 || (!had_recurse && branchlength < length)) evaluated: length < 0 yes Evaluation Count:15 | yes Evaluation Count:1 |
partially evaluated: !had_recurse yes Evaluation Count:1 | no Evaluation Count:0 |
partially evaluated: branchlength < length no Evaluation Count:0 | yes Evaluation Count:1 |
| 0-15 |
158 | length = branchlength; executed: length = branchlength; Execution Count:15 | 15 |
159 | if (op != OP_ALT) return length; executed: return length; Execution Count:15 evaluated: op != OP_ALT yes Evaluation Count:15 | yes Evaluation Count:1 |
| 1-15 |
160 | cc += 1 + LINK_SIZE; executed (the execution status of this line is deduced): cc += 1 + 1; | - |
161 | branchlength = 0; executed (the execution status of this line is deduced): branchlength = 0; | - |
162 | had_recurse = FALSE; executed (the execution status of this line is deduced): had_recurse = 0; | - |
163 | break; executed: break; Execution Count:1 | 1 |
164 | | - |
165 | /* Skip over assertive subpatterns */ | - |
166 | | - |
167 | case OP_ASSERT: | - |
168 | case OP_ASSERT_NOT: | - |
169 | case OP_ASSERTBACK: | - |
170 | case OP_ASSERTBACK_NOT: | - |
171 | do cc += GET(cc, 1); while (*cc == OP_ALT); never executed: cc += (cc[1]); never evaluated: *cc == OP_ALT | 0 |
172 | /* Fall through */ | - |
173 | | - |
174 | /* Skip over things that don't match chars */ | - |
175 | | - |
176 | case OP_REVERSE: code before this statement never executed: case OP_REVERSE: | 0 |
177 | case OP_CREF: | - |
178 | case OP_NCREF: | - |
179 | case OP_RREF: | - |
180 | case OP_NRREF: | - |
181 | case OP_DEF: | - |
182 | case OP_CALLOUT: | - |
183 | case OP_SOD: | - |
184 | case OP_SOM: | - |
185 | case OP_EOD: | - |
186 | case OP_EODN: | - |
187 | case OP_CIRC: | - |
188 | case OP_CIRCM: | - |
189 | case OP_DOLL: | - |
190 | case OP_DOLLM: | - |
191 | case OP_NOT_WORD_BOUNDARY: | - |
192 | case OP_WORD_BOUNDARY: | - |
193 | cc += PRIV(OP_lengths)[*cc]; executed (the execution status of this line is deduced): cc += _pcre16_OP_lengths[*cc]; | - |
194 | break; executed: break; Execution Count:1 | 1 |
195 | | - |
196 | /* Skip over a subpattern that has a {0} or {0,x} quantifier */ | - |
197 | | - |
198 | case OP_BRAZERO: | - |
199 | case OP_BRAMINZERO: | - |
200 | case OP_BRAPOSZERO: | - |
201 | case OP_SKIPZERO: | - |
202 | cc += PRIV(OP_lengths)[*cc]; never executed (the execution status of this line is deduced): cc += _pcre16_OP_lengths[*cc]; | - |
203 | do cc += GET(cc, 1); while (*cc == OP_ALT); never executed: cc += (cc[1]); never evaluated: *cc == OP_ALT | 0 |
204 | cc += 1 + LINK_SIZE; never executed (the execution status of this line is deduced): cc += 1 + 1; | - |
205 | break; | 0 |
206 | | - |
207 | /* Handle literal characters and + repetitions */ | - |
208 | | - |
209 | case OP_CHAR: | - |
210 | case OP_CHARI: | - |
211 | case OP_NOT: | - |
212 | case OP_NOTI: | - |
213 | case OP_PLUS: | - |
214 | case OP_PLUSI: | - |
215 | case OP_MINPLUS: | - |
216 | case OP_MINPLUSI: | - |
217 | case OP_POSPLUS: | - |
218 | case OP_POSPLUSI: | - |
219 | case OP_NOTPLUS: | - |
220 | case OP_NOTPLUSI: | - |
221 | case OP_NOTMINPLUS: | - |
222 | case OP_NOTMINPLUSI: | - |
223 | case OP_NOTPOSPLUS: | - |
224 | case OP_NOTPOSPLUSI: | - |
225 | branchlength++; executed (the execution status of this line is deduced): branchlength++; | - |
226 | cc += 2; executed (the execution status of this line is deduced): cc += 2; | - |
227 | #ifdef SUPPORT_UTF | - |
228 | if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); never executed: cc += 1; partially evaluated: utf yes Evaluation Count:7 | no Evaluation Count:0 |
partially evaluated: (((cc[-1]) & 0xfc00) == 0xd800) no Evaluation Count:0 | yes Evaluation Count:7 |
| 0-7 |
229 | #endif | - |
230 | break; executed: break; Execution Count:7 | 7 |
231 | | - |
232 | case OP_TYPEPLUS: | - |
233 | case OP_TYPEMINPLUS: | - |
234 | case OP_TYPEPOSPLUS: | - |
235 | branchlength++; never executed (the execution status of this line is deduced): branchlength++; | - |
236 | cc += (cc[1] == OP_PROP || cc[1] == OP_NOTPROP)? 4 : 2; never evaluated: cc[1] == OP_PROP never evaluated: cc[1] == OP_NOTPROP | 0 |
237 | break; | 0 |
238 | | - |
239 | /* Handle exact repetitions. The count is already in characters, but we | - |
240 | need to skip over a multibyte character in UTF8 mode. */ | - |
241 | | - |
242 | case OP_EXACT: | - |
243 | case OP_EXACTI: | - |
244 | case OP_NOTEXACT: | - |
245 | case OP_NOTEXACTI: | - |
246 | branchlength += GET2(cc,1); never executed (the execution status of this line is deduced): branchlength += cc[1]; | - |
247 | cc += 2 + IMM2_SIZE; never executed (the execution status of this line is deduced): cc += 2 + 1; | - |
248 | #ifdef SUPPORT_UTF | - |
249 | if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); never executed: cc += 1; never evaluated: utf never evaluated: (((cc[-1]) & 0xfc00) == 0xd800) | 0 |
250 | #endif | - |
251 | break; | 0 |
252 | | - |
253 | case OP_TYPEEXACT: | - |
254 | branchlength += GET2(cc,1); never executed (the execution status of this line is deduced): branchlength += cc[1]; | - |
255 | cc += 2 + IMM2_SIZE + ((cc[1 + IMM2_SIZE] == OP_PROP never evaluated: cc[1 + 1] == OP_PROP | 0 |
256 | || cc[1 + IMM2_SIZE] == OP_NOTPROP)? 2 : 0); never evaluated: cc[1 + 1] == OP_NOTPROP | 0 |
257 | break; | 0 |
258 | | - |
259 | /* Handle single-char non-literal matchers */ | - |
260 | | - |
261 | case OP_PROP: | - |
262 | case OP_NOTPROP: | - |
263 | cc += 2; never executed (the execution status of this line is deduced): cc += 2; | - |
264 | /* Fall through */ | - |
265 | | - |
266 | case OP_NOT_DIGIT: code before this statement never executed: case OP_NOT_DIGIT: | 0 |
267 | case OP_DIGIT: | - |
268 | case OP_NOT_WHITESPACE: | - |
269 | case OP_WHITESPACE: | - |
270 | case OP_NOT_WORDCHAR: | - |
271 | case OP_WORDCHAR: | - |
272 | case OP_ANY: | - |
273 | case OP_ALLANY: | - |
274 | case OP_EXTUNI: | - |
275 | case OP_HSPACE: | - |
276 | case OP_NOT_HSPACE: | - |
277 | case OP_VSPACE: | - |
278 | case OP_NOT_VSPACE: | - |
279 | branchlength++; never executed (the execution status of this line is deduced): branchlength++; | - |
280 | cc++; never executed (the execution status of this line is deduced): cc++; | - |
281 | break; | 0 |
282 | | - |
283 | /* "Any newline" might match two characters, but it also might match just | - |
284 | one. */ | - |
285 | | - |
286 | case OP_ANYNL: | - |
287 | branchlength += 1; never executed (the execution status of this line is deduced): branchlength += 1; | - |
288 | cc++; never executed (the execution status of this line is deduced): cc++; | - |
289 | break; | 0 |
290 | | - |
291 | /* The single-byte matcher means we can't proceed in UTF-8 mode. (In | - |
292 | non-UTF-8 mode \C will actually be turned into OP_ALLANY, so won't ever | - |
293 | appear, but leave the code, just in case.) */ | - |
294 | | - |
295 | case OP_ANYBYTE: | - |
296 | #ifdef SUPPORT_UTF | - |
297 | if (utf) return -1; never executed: return -1; never evaluated: utf | 0 |
298 | #endif | - |
299 | branchlength++; never executed (the execution status of this line is deduced): branchlength++; | - |
300 | cc++; never executed (the execution status of this line is deduced): cc++; | - |
301 | break; | 0 |
302 | | - |
303 | /* For repeated character types, we have to test for \p and \P, which have | - |
304 | an extra two bytes of parameters. */ | - |
305 | | - |
306 | case OP_TYPESTAR: | - |
307 | case OP_TYPEMINSTAR: | - |
308 | case OP_TYPEQUERY: | - |
309 | case OP_TYPEMINQUERY: | - |
310 | case OP_TYPEPOSSTAR: | - |
311 | case OP_TYPEPOSQUERY: | - |
312 | if (cc[1] == OP_PROP || cc[1] == OP_NOTPROP) cc += 2; never executed: cc += 2; partially evaluated: cc[1] == OP_PROP no Evaluation Count:0 | yes Evaluation Count:5 |
partially evaluated: cc[1] == OP_NOTPROP no Evaluation Count:0 | yes Evaluation Count:5 |
| 0-5 |
313 | cc += PRIV(OP_lengths)[op]; executed (the execution status of this line is deduced): cc += _pcre16_OP_lengths[op]; | - |
314 | break; executed: break; Execution Count:5 | 5 |
315 | | - |
316 | case OP_TYPEUPTO: | - |
317 | case OP_TYPEMINUPTO: | - |
318 | case OP_TYPEPOSUPTO: | - |
319 | if (cc[1 + IMM2_SIZE] == OP_PROP never evaluated: cc[1 + 1] == OP_PROP | 0 |
320 | || cc[1 + IMM2_SIZE] == OP_NOTPROP) cc += 2; never executed: cc += 2; never evaluated: cc[1 + 1] == OP_NOTPROP | 0 |
321 | cc += PRIV(OP_lengths)[op]; never executed (the execution status of this line is deduced): cc += _pcre16_OP_lengths[op]; | - |
322 | break; | 0 |
323 | | - |
324 | /* Check a class for variable quantification */ | - |
325 | | - |
326 | #if defined SUPPORT_UTF || !defined COMPILE_PCRE8 | - |
327 | case OP_XCLASS: | - |
328 | cc += GET(cc, 1) - PRIV(OP_lengths)[OP_CLASS]; executed (the execution status of this line is deduced): cc += (cc[1]) - _pcre16_OP_lengths[OP_CLASS]; | - |
329 | /* Fall through */ | - |
330 | #endif | - |
331 | | - |
332 | case OP_CLASS: code before this statement executed: case OP_CLASS: Execution Count:1 | 1 |
333 | case OP_NCLASS: | - |
334 | cc += PRIV(OP_lengths)[OP_CLASS]; executed (the execution status of this line is deduced): cc += _pcre16_OP_lengths[OP_CLASS]; | - |
335 | | - |
336 | switch (*cc) | - |
337 | { | - |
338 | case OP_CRPLUS: | - |
339 | case OP_CRMINPLUS: | - |
340 | branchlength++; never executed (the execution status of this line is deduced): branchlength++; | - |
341 | /* Fall through */ | - |
342 | | - |
343 | case OP_CRSTAR: code before this statement never executed: case OP_CRSTAR: | 0 |
344 | case OP_CRMINSTAR: | - |
345 | case OP_CRQUERY: | - |
346 | case OP_CRMINQUERY: | - |
347 | cc++; executed (the execution status of this line is deduced): cc++; | - |
348 | break; executed: break; Execution Count:1 | 1 |
349 | | - |
350 | case OP_CRRANGE: | - |
351 | case OP_CRMINRANGE: | - |
352 | branchlength += GET2(cc,1); never executed (the execution status of this line is deduced): branchlength += cc[1]; | - |
353 | cc += 1 + 2 * IMM2_SIZE; never executed (the execution status of this line is deduced): cc += 1 + 2 * 1; | - |
354 | break; | 0 |
355 | | - |
356 | default: | - |
357 | branchlength++; never executed (the execution status of this line is deduced): branchlength++; | - |
358 | break; | 0 |
359 | } | - |
360 | break; executed: break; Execution Count:1 | 1 |
361 | | - |
362 | /* Backreferences and subroutine calls are treated in the same way: we find | - |
363 | the minimum length for the subpattern. A recursion, however, causes an | - |
364 | a flag to be set that causes the length of this branch to be ignored. The | - |
365 | logic is that a recursion can only make sense if there is another | - |
366 | alternation that stops the recursing. That will provide the minimum length | - |
367 | (when no recursion happens). A backreference within the group that it is | - |
368 | referencing behaves in the same way. | - |
369 | | - |
370 | If PCRE_JAVASCRIPT_COMPAT is set, a backreference to an unset bracket | - |
371 | matches an empty string (by default it causes a matching failure), so in | - |
372 | that case we must set the minimum length to zero. */ | - |
373 | | - |
374 | case OP_REF: | - |
375 | case OP_REFI: | - |
376 | if ((options & PCRE_JAVASCRIPT_COMPAT) == 0) never evaluated: (options & 0x02000000) == 0 | 0 |
377 | { | - |
378 | ce = cs = (pcre_uchar *)PRIV(find_bracket)(startcode, utf, GET2(cc, 1)); never executed (the execution status of this line is deduced): ce = cs = (pcre_uchar *)_pcre16_find_bracket(startcode, utf, cc[1]); | - |
379 | if (cs == NULL) return -2; never executed: return -2; never evaluated: cs == ((void *)0) | 0 |
380 | do ce += GET(ce, 1); while (*ce == OP_ALT); never executed: ce += (ce[1]); never evaluated: *ce == OP_ALT | 0 |
381 | if (cc > cs && cc < ce) never evaluated: cc > cs never evaluated: cc < ce | 0 |
382 | { | - |
383 | d = 0; never executed (the execution status of this line is deduced): d = 0; | - |
384 | had_recurse = TRUE; never executed (the execution status of this line is deduced): had_recurse = 1; | - |
385 | } | 0 |
386 | else | - |
387 | { | - |
388 | d = find_minlength(cs, startcode, options, recurse_depth); never executed (the execution status of this line is deduced): d = find_minlength(cs, startcode, options, recurse_depth); | - |
389 | } | 0 |
390 | } | - |
391 | else d = 0; | 0 |
392 | cc += 1 + IMM2_SIZE; never executed (the execution status of this line is deduced): cc += 1 + 1; | - |
393 | | - |
394 | /* Handle repeated back references */ | - |
395 | | - |
396 | switch (*cc) | - |
397 | { | - |
398 | case OP_CRSTAR: | - |
399 | case OP_CRMINSTAR: | - |
400 | case OP_CRQUERY: | - |
401 | case OP_CRMINQUERY: | - |
402 | min = 0; never executed (the execution status of this line is deduced): min = 0; | - |
403 | cc++; never executed (the execution status of this line is deduced): cc++; | - |
404 | break; | 0 |
405 | | - |
406 | case OP_CRPLUS: | - |
407 | case OP_CRMINPLUS: | - |
408 | min = 1; never executed (the execution status of this line is deduced): min = 1; | - |
409 | cc++; never executed (the execution status of this line is deduced): cc++; | - |
410 | break; | 0 |
411 | | - |
412 | case OP_CRRANGE: | - |
413 | case OP_CRMINRANGE: | - |
414 | min = GET2(cc, 1); never executed (the execution status of this line is deduced): min = cc[1]; | - |
415 | cc += 1 + 2 * IMM2_SIZE; never executed (the execution status of this line is deduced): cc += 1 + 2 * 1; | - |
416 | break; | 0 |
417 | | - |
418 | default: | - |
419 | min = 1; never executed (the execution status of this line is deduced): min = 1; | - |
420 | break; | 0 |
421 | } | - |
422 | | - |
423 | branchlength += min * d; never executed (the execution status of this line is deduced): branchlength += min * d; | - |
424 | break; | 0 |
425 | | - |
426 | /* We can easily detect direct recursion, but not mutual recursion. This is | - |
427 | caught by a recursion depth count. */ | - |
428 | | - |
429 | case OP_RECURSE: | - |
430 | cs = ce = (pcre_uchar *)startcode + GET(cc, 1); never executed (the execution status of this line is deduced): cs = ce = (pcre_uchar *)startcode + (cc[1]); | - |
431 | do ce += GET(ce, 1); while (*ce == OP_ALT); never executed: ce += (ce[1]); never evaluated: *ce == OP_ALT | 0 |
432 | if ((cc > cs && cc < ce) || recurse_depth > 10) never evaluated: cc > cs never evaluated: cc < ce never evaluated: recurse_depth > 10 | 0 |
433 | had_recurse = TRUE; never executed: had_recurse = 1; | 0 |
434 | else | - |
435 | { | - |
436 | branchlength += find_minlength(cs, startcode, options, recurse_depth + 1); never executed (the execution status of this line is deduced): branchlength += find_minlength(cs, startcode, options, recurse_depth + 1); | - |
437 | } | 0 |
438 | cc += 1 + LINK_SIZE; never executed (the execution status of this line is deduced): cc += 1 + 1; | - |
439 | break; | 0 |
440 | | - |
441 | /* Anything else does not or need not match a character. We can get the | - |
442 | item's length from the table, but for those that can match zero occurrences | - |
443 | of a character, we must take special action for UTF-8 characters. As it | - |
444 | happens, the "NOT" versions of these opcodes are used at present only for | - |
445 | ASCII characters, so they could be omitted from this list. However, in | - |
446 | future that may change, so we include them here so as not to leave a | - |
447 | gotcha for a future maintainer. */ | - |
448 | | - |
449 | case OP_UPTO: | - |
450 | case OP_UPTOI: | - |
451 | case OP_NOTUPTO: | - |
452 | case OP_NOTUPTOI: | - |
453 | case OP_MINUPTO: | - |
454 | case OP_MINUPTOI: | - |
455 | case OP_NOTMINUPTO: | - |
456 | case OP_NOTMINUPTOI: | - |
457 | case OP_POSUPTO: | - |
458 | case OP_POSUPTOI: | - |
459 | case OP_NOTPOSUPTO: | - |
460 | case OP_NOTPOSUPTOI: | - |
461 | | - |
462 | case OP_STAR: | - |
463 | case OP_STARI: | - |
464 | case OP_NOTSTAR: | - |
465 | case OP_NOTSTARI: | - |
466 | case OP_MINSTAR: | - |
467 | case OP_MINSTARI: | - |
468 | case OP_NOTMINSTAR: | - |
469 | case OP_NOTMINSTARI: | - |
470 | case OP_POSSTAR: | - |
471 | case OP_POSSTARI: | - |
472 | case OP_NOTPOSSTAR: | - |
473 | case OP_NOTPOSSTARI: | - |
474 | | - |
475 | case OP_QUERY: | - |
476 | case OP_QUERYI: | - |
477 | case OP_NOTQUERY: | - |
478 | case OP_NOTQUERYI: | - |
479 | case OP_MINQUERY: | - |
480 | case OP_MINQUERYI: | - |
481 | case OP_NOTMINQUERY: | - |
482 | case OP_NOTMINQUERYI: | - |
483 | case OP_POSQUERY: | - |
484 | case OP_POSQUERYI: | - |
485 | case OP_NOTPOSQUERY: | - |
486 | case OP_NOTPOSQUERYI: | - |
487 | | - |
488 | cc += PRIV(OP_lengths)[op]; executed (the execution status of this line is deduced): cc += _pcre16_OP_lengths[op]; | - |
489 | #ifdef SUPPORT_UTF | - |
490 | if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); never executed: cc += 1; partially evaluated: utf yes Evaluation Count:2 | no Evaluation Count:0 |
partially evaluated: (((cc[-1]) & 0xfc00) == 0xd800) no Evaluation Count:0 | yes Evaluation Count:2 |
| 0-2 |
491 | #endif | - |
492 | break; executed: break; Execution Count:2 | 2 |
493 | | - |
494 | /* Skip these, but we need to add in the name length. */ | - |
495 | | - |
496 | case OP_MARK: | - |
497 | case OP_PRUNE_ARG: | - |
498 | case OP_SKIP_ARG: | - |
499 | case OP_THEN_ARG: | - |
500 | cc += PRIV(OP_lengths)[op] + cc[1]; never executed (the execution status of this line is deduced): cc += _pcre16_OP_lengths[op] + cc[1]; | - |
501 | break; | 0 |
502 | | - |
503 | /* The remaining opcodes are just skipped over. */ | - |
504 | | - |
505 | case OP_CLOSE: | - |
506 | case OP_COMMIT: | - |
507 | case OP_FAIL: | - |
508 | case OP_PRUNE: | - |
509 | case OP_SET_SOM: | - |
510 | case OP_SKIP: | - |
511 | case OP_THEN: | - |
512 | cc += PRIV(OP_lengths)[op]; never executed (the execution status of this line is deduced): cc += _pcre16_OP_lengths[op]; | - |
513 | break; | 0 |
514 | | - |
515 | /* This should not occur: we list all opcodes explicitly so that when | - |
516 | new ones get added they are properly considered. */ | - |
517 | | - |
518 | default: | - |
519 | return -3; never executed: return -3; | 0 |
520 | } | - |
521 | } executed: } Execution Count:17 | 17 |
522 | /* Control never gets here */ | - |
523 | } | 0 |
524 | | - |
525 | | - |
526 | | - |
527 | /************************************************* | - |
528 | * Set a bit and maybe its alternate case * | - |
529 | *************************************************/ | - |
530 | | - |
531 | /* Given a character, set its first byte's bit in the table, and also the | - |
532 | corresponding bit for the other version of a letter if we are caseless. In | - |
533 | UTF-8 mode, for characters greater than 127, we can only do the caseless thing | - |
534 | when Unicode property support is available. | - |
535 | | - |
536 | Arguments: | - |
537 | start_bits points to the bit map | - |
538 | p points to the character | - |
539 | caseless the caseless flag | - |
540 | cd the block with char table pointers | - |
541 | utf TRUE for UTF-8 / UTF-16 mode | - |
542 | | - |
543 | Returns: pointer after the character | - |
544 | */ | - |
545 | | - |
546 | static const pcre_uchar * | - |
547 | set_table_bit(pcre_uint8 *start_bits, const pcre_uchar *p, BOOL caseless, | - |
548 | compile_data *cd, BOOL utf) | - |
549 | { | - |
550 | unsigned int c = *p; executed (the execution status of this line is deduced): unsigned int c = *p; | - |
551 | | - |
552 | #ifdef COMPILE_PCRE8 | - |
553 | SET_BIT(c); | - |
554 | | - |
555 | #ifdef SUPPORT_UTF | - |
556 | if (utf && c > 127) | - |
557 | { | - |
558 | GETCHARINC(c, p); | - |
559 | #ifdef SUPPORT_UCP | - |
560 | if (caseless) | - |
561 | { | - |
562 | pcre_uchar buff[6]; | - |
563 | c = UCD_OTHERCASE(c); | - |
564 | (void)PRIV(ord2utf)(c, buff); | - |
565 | SET_BIT(buff[0]); | - |
566 | } | - |
567 | #endif | - |
568 | return p; | - |
569 | } | - |
570 | #endif | - |
571 | | - |
572 | /* Not UTF-8 mode, or character is less than 127. */ | - |
573 | | - |
574 | if (caseless && (cd->ctypes[c] & ctype_letter) != 0) SET_BIT(cd->fcc[c]); | - |
575 | return p + 1; | - |
576 | #endif | - |
577 | | - |
578 | #ifdef COMPILE_PCRE16 | - |
579 | if (c > 0xff) partially evaluated: c > 0xff no Evaluation Count:0 | yes Evaluation Count:3 |
| 0-3 |
580 | { | - |
581 | c = 0xff; never executed (the execution status of this line is deduced): c = 0xff; | - |
582 | caseless = FALSE; never executed (the execution status of this line is deduced): caseless = 0; | - |
583 | } | 0 |
584 | SET_BIT(c); executed (the execution status of this line is deduced): start_bits[c/8] |= (1 << (c&7)); | - |
585 | | - |
586 | #ifdef SUPPORT_UTF | - |
587 | if (utf && c > 127) partially evaluated: utf yes Evaluation Count:3 | no Evaluation Count:0 |
partially evaluated: c > 127 no Evaluation Count:0 | yes Evaluation Count:3 |
| 0-3 |
588 | { | - |
589 | GETCHARINC(c, p); never executed: } never evaluated: (c & 0xfc00) == 0xd800 | 0 |
590 | #ifdef SUPPORT_UCP | - |
591 | if (caseless) never evaluated: caseless | 0 |
592 | { | - |
593 | c = UCD_OTHERCASE(c); never executed (the execution status of this line is deduced): c = (c + (_pcre16_ucd_records + _pcre16_ucd_stage2[_pcre16_ucd_stage1[(c) / 128] * 128 + (c) % 128])->other_case); | - |
594 | if (c > 0xff) never evaluated: c > 0xff | 0 |
595 | c = 0xff; never executed: c = 0xff; | 0 |
596 | SET_BIT(c); never executed (the execution status of this line is deduced): start_bits[c/8] |= (1 << (c&7)); | - |
597 | } | 0 |
598 | #endif | - |
599 | return p; never executed: return p; | 0 |
600 | } | - |
601 | #endif | - |
602 | | - |
603 | if (caseless && (cd->ctypes[c] & ctype_letter) != 0) SET_BIT(cd->fcc[c]); never executed: start_bits[cd->fcc[c]/8] |= (1 << (cd->fcc[c]&7)); partially evaluated: caseless no Evaluation Count:0 | yes Evaluation Count:3 |
never evaluated: (cd->ctypes[c] & 0x02) != 0 | 0-3 |
604 | return p + 1; executed: return p + 1; Execution Count:3 | 3 |
605 | #endif | - |
606 | } | - |
607 | | - |
608 | | - |
609 | | - |
610 | /************************************************* | - |
611 | * Set bits for a positive character type * | - |
612 | *************************************************/ | - |
613 | | - |
614 | /* This function sets starting bits for a character type. In UTF-8 mode, we can | - |
615 | only do a direct setting for bytes less than 128, as otherwise there can be | - |
616 | confusion with bytes in the middle of UTF-8 characters. In a "traditional" | - |
617 | environment, the tables will only recognize ASCII characters anyway, but in at | - |
618 | least one Windows environment, some higher bytes bits were set in the tables. | - |
619 | So we deal with that case by considering the UTF-8 encoding. | - |
620 | | - |
621 | Arguments: | - |
622 | start_bits the starting bitmap | - |
623 | cbit type the type of character wanted | - |
624 | table_limit 32 for non-UTF-8; 16 for UTF-8 | - |
625 | cd the block with char table pointers | - |
626 | | - |
627 | Returns: nothing | - |
628 | */ | - |
629 | | - |
630 | static void | - |
631 | set_type_bits(pcre_uint8 *start_bits, int cbit_type, int table_limit, | - |
632 | compile_data *cd) | - |
633 | { | - |
634 | register int c; never executed (the execution status of this line is deduced): register int c; | - |
635 | for (c = 0; c < table_limit; c++) start_bits[c] |= cd->cbits[c+cbit_type]; never executed: start_bits[c] |= cd->cbits[c+cbit_type]; never evaluated: c < table_limit | 0 |
636 | #if defined SUPPORT_UTF && defined COMPILE_PCRE8 | - |
637 | if (table_limit == 32) return; | - |
638 | for (c = 128; c < 256; c++) | - |
639 | { | - |
640 | if ((cd->cbits[c/8] & (1 << (c&7))) != 0) | - |
641 | { | - |
642 | pcre_uchar buff[6]; | - |
643 | (void)PRIV(ord2utf)(c, buff); | - |
644 | SET_BIT(buff[0]); | - |
645 | } | - |
646 | } | - |
647 | #endif | - |
648 | } | 0 |
649 | | - |
650 | | - |
651 | /************************************************* | - |
652 | * Set bits for a negative character type * | - |
653 | *************************************************/ | - |
654 | | - |
655 | /* This function sets starting bits for a negative character type such as \D. | - |
656 | In UTF-8 mode, we can only do a direct setting for bytes less than 128, as | - |
657 | otherwise there can be confusion with bytes in the middle of UTF-8 characters. | - |
658 | Unlike in the positive case, where we can set appropriate starting bits for | - |
659 | specific high-valued UTF-8 characters, in this case we have to set the bits for | - |
660 | all high-valued characters. The lowest is 0xc2, but we overkill by starting at | - |
661 | 0xc0 (192) for simplicity. | - |
662 | | - |
663 | Arguments: | - |
664 | start_bits the starting bitmap | - |
665 | cbit type the type of character wanted | - |
666 | table_limit 32 for non-UTF-8; 16 for UTF-8 | - |
667 | cd the block with char table pointers | - |
668 | | - |
669 | Returns: nothing | - |
670 | */ | - |
671 | | - |
672 | static void | - |
673 | set_nottype_bits(pcre_uint8 *start_bits, int cbit_type, int table_limit, | - |
674 | compile_data *cd) | - |
675 | { | - |
676 | register int c; never executed (the execution status of this line is deduced): register int c; | - |
677 | for (c = 0; c < table_limit; c++) start_bits[c] |= ~cd->cbits[c+cbit_type]; never executed: start_bits[c] |= ~cd->cbits[c+cbit_type]; never evaluated: c < table_limit | 0 |
678 | #if defined SUPPORT_UTF && defined COMPILE_PCRE8 | - |
679 | if (table_limit != 32) for (c = 24; c < 32; c++) start_bits[c] = 0xff; | - |
680 | #endif | - |
681 | } | 0 |
682 | | - |
683 | | - |
684 | | - |
685 | /************************************************* | - |
686 | * Create bitmap of starting bytes * | - |
687 | *************************************************/ | - |
688 | | - |
689 | /* This function scans a compiled unanchored expression recursively and | - |
690 | attempts to build a bitmap of the set of possible starting bytes. As time goes | - |
691 | by, we may be able to get more clever at doing this. The SSB_CONTINUE return is | - |
692 | useful for parenthesized groups in patterns such as (a*)b where the group | - |
693 | provides some optional starting bytes but scanning must continue at the outer | - |
694 | level to find at least one mandatory byte. At the outermost level, this | - |
695 | function fails unless the result is SSB_DONE. | - |
696 | | - |
697 | Arguments: | - |
698 | code points to an expression | - |
699 | start_bits points to a 32-byte table, initialized to 0 | - |
700 | utf TRUE if in UTF-8 / UTF-16 mode | - |
701 | cd the block with char table pointers | - |
702 | | - |
703 | Returns: SSB_FAIL => Failed to find any starting bytes | - |
704 | SSB_DONE => Found mandatory starting bytes | - |
705 | SSB_CONTINUE => Found optional starting bytes | - |
706 | SSB_UNKNOWN => Hit an unrecognized opcode | - |
707 | */ | - |
708 | | - |
709 | static int | - |
710 | set_start_bits(const pcre_uchar *code, pcre_uint8 *start_bits, BOOL utf, | - |
711 | compile_data *cd) | - |
712 | { | - |
713 | register int c; executed (the execution status of this line is deduced): register int c; | - |
714 | int yield = SSB_DONE; executed (the execution status of this line is deduced): int yield = SSB_DONE; | - |
715 | #if defined SUPPORT_UTF && defined COMPILE_PCRE8 | - |
716 | int table_limit = utf? 16:32; | - |
717 | #else | - |
718 | int table_limit = 32; executed (the execution status of this line is deduced): int table_limit = 32; | - |
719 | #endif | - |
720 | | - |
721 | #if 0 | - |
722 | /* ========================================================================= */ | - |
723 | /* The following comment and code was inserted in January 1999. In May 2006, | - |
724 | when it was observed to cause compiler warnings about unused values, I took it | - |
725 | out again. If anybody is still using OS/2, they will have to put it back | - |
726 | manually. */ | - |
727 | | - |
728 | /* This next statement and the later reference to dummy are here in order to | - |
729 | trick the optimizer of the IBM C compiler for OS/2 into generating correct | - |
730 | code. Apparently IBM isn't going to fix the problem, and we would rather not | - |
731 | disable optimization (in this module it actually makes a big difference, and | - |
732 | the pcre module can use all the optimization it can get). */ | - |
733 | | - |
734 | volatile int dummy; | - |
735 | /* ========================================================================= */ | - |
736 | #endif | - |
737 | | - |
738 | do | - |
739 | { | - |
740 | BOOL try_next = TRUE; executed (the execution status of this line is deduced): BOOL try_next = 1; | - |
741 | const pcre_uchar *tcode = code + 1 + LINK_SIZE; executed (the execution status of this line is deduced): const pcre_uchar *tcode = code + 1 + 1; | - |
742 | | - |
743 | if (*code == OP_CBRA || *code == OP_SCBRA || partially evaluated: *code == OP_CBRA no Evaluation Count:0 | yes Evaluation Count:5 |
partially evaluated: *code == OP_SCBRA no Evaluation Count:0 | yes Evaluation Count:5 |
| 0-5 |
744 | *code == OP_CBRAPOS || *code == OP_SCBRAPOS) tcode += IMM2_SIZE; never executed: tcode += 1; partially evaluated: *code == OP_CBRAPOS no Evaluation Count:0 | yes Evaluation Count:5 |
partially evaluated: *code == OP_SCBRAPOS no Evaluation Count:0 | yes Evaluation Count:5 |
| 0-5 |
745 | | - |
746 | while (try_next) /* Loop for items in this branch */ evaluated: try_next yes Evaluation Count:8 | yes Evaluation Count:2 |
| 2-8 |
747 | { | - |
748 | int rc; executed (the execution status of this line is deduced): int rc; | - |
749 | | - |
750 | switch(*tcode) | - |
751 | { | - |
752 | /* If we reach something we don't understand, it means a new opcode has | - |
753 | been created that hasn't been added to this code. Hopefully this problem | - |
754 | will be discovered during testing. */ | - |
755 | | - |
756 | default: | - |
757 | return SSB_UNKNOWN; never executed: return SSB_UNKNOWN; | 0 |
758 | | - |
759 | /* Fail for a valid opcode that implies no starting bits. */ | - |
760 | | - |
761 | case OP_ACCEPT: | - |
762 | case OP_ASSERT_ACCEPT: | - |
763 | case OP_ALLANY: | - |
764 | case OP_ANY: | - |
765 | case OP_ANYBYTE: | - |
766 | case OP_CIRC: | - |
767 | case OP_CIRCM: | - |
768 | case OP_CLOSE: | - |
769 | case OP_COMMIT: | - |
770 | case OP_COND: | - |
771 | case OP_CREF: | - |
772 | case OP_DEF: | - |
773 | case OP_DOLL: | - |
774 | case OP_DOLLM: | - |
775 | case OP_END: | - |
776 | case OP_EOD: | - |
777 | case OP_EODN: | - |
778 | case OP_EXTUNI: | - |
779 | case OP_FAIL: | - |
780 | case OP_MARK: | - |
781 | case OP_NCREF: | - |
782 | case OP_NOT: | - |
783 | case OP_NOTEXACT: | - |
784 | case OP_NOTEXACTI: | - |
785 | case OP_NOTI: | - |
786 | case OP_NOTMINPLUS: | - |
787 | case OP_NOTMINPLUSI: | - |
788 | case OP_NOTMINQUERY: | - |
789 | case OP_NOTMINQUERYI: | - |
790 | case OP_NOTMINSTAR: | - |
791 | case OP_NOTMINSTARI: | - |
792 | case OP_NOTMINUPTO: | - |
793 | case OP_NOTMINUPTOI: | - |
794 | case OP_NOTPLUS: | - |
795 | case OP_NOTPLUSI: | - |
796 | case OP_NOTPOSPLUS: | - |
797 | case OP_NOTPOSPLUSI: | - |
798 | case OP_NOTPOSQUERY: | - |
799 | case OP_NOTPOSQUERYI: | - |
800 | case OP_NOTPOSSTAR: | - |
801 | case OP_NOTPOSSTARI: | - |
802 | case OP_NOTPOSUPTO: | - |
803 | case OP_NOTPOSUPTOI: | - |
804 | case OP_NOTPROP: | - |
805 | case OP_NOTQUERY: | - |
806 | case OP_NOTQUERYI: | - |
807 | case OP_NOTSTAR: | - |
808 | case OP_NOTSTARI: | - |
809 | case OP_NOTUPTO: | - |
810 | case OP_NOTUPTOI: | - |
811 | case OP_NOT_HSPACE: | - |
812 | case OP_NOT_VSPACE: | - |
813 | case OP_NRREF: | - |
814 | case OP_PROP: | - |
815 | case OP_PRUNE: | - |
816 | case OP_PRUNE_ARG: | - |
817 | case OP_RECURSE: | - |
818 | case OP_REF: | - |
819 | case OP_REFI: | - |
820 | case OP_REVERSE: | - |
821 | case OP_RREF: | - |
822 | case OP_SCOND: | - |
823 | case OP_SET_SOM: | - |
824 | case OP_SKIP: | - |
825 | case OP_SKIP_ARG: | - |
826 | case OP_SOD: | - |
827 | case OP_SOM: | - |
828 | case OP_THEN: | - |
829 | case OP_THEN_ARG: | - |
830 | #if defined SUPPORT_UTF || !defined COMPILE_PCRE8 | - |
831 | case OP_XCLASS: | - |
832 | #endif | - |
833 | return SSB_FAIL; executed: return SSB_FAIL; Execution Count:1 | 1 |
834 | | - |
835 | /* We can ignore word boundary tests. */ | - |
836 | | - |
837 | case OP_WORD_BOUNDARY: | - |
838 | case OP_NOT_WORD_BOUNDARY: | - |
839 | tcode++; executed (the execution status of this line is deduced): tcode++; | - |
840 | break; executed: break; Execution Count:1 | 1 |
841 | | - |
842 | /* If we hit a bracket or a positive lookahead assertion, recurse to set | - |
843 | bits from within the subpattern. If it can't find anything, we have to | - |
844 | give up. If it finds some mandatory character(s), we are done for this | - |
845 | branch. Otherwise, carry on scanning after the subpattern. */ | - |
846 | | - |
847 | case OP_BRA: | - |
848 | case OP_SBRA: | - |
849 | case OP_CBRA: | - |
850 | case OP_SCBRA: | - |
851 | case OP_BRAPOS: | - |
852 | case OP_SBRAPOS: | - |
853 | case OP_CBRAPOS: | - |
854 | case OP_SCBRAPOS: | - |
855 | case OP_ONCE: | - |
856 | case OP_ONCE_NC: | - |
857 | case OP_ASSERT: | - |
858 | rc = set_start_bits(tcode, start_bits, utf, cd); never executed (the execution status of this line is deduced): rc = set_start_bits(tcode, start_bits, utf, cd); | - |
859 | if (rc == SSB_FAIL || rc == SSB_UNKNOWN) return rc; never executed: return rc; never evaluated: rc == SSB_FAIL never evaluated: rc == SSB_UNKNOWN | 0 |
860 | if (rc == SSB_DONE) try_next = FALSE; else never executed: try_next = 0; never evaluated: rc == SSB_DONE | 0 |
861 | { | - |
862 | do tcode += GET(tcode, 1); while (*tcode == OP_ALT); never executed: tcode += (tcode[1]); never evaluated: *tcode == OP_ALT | 0 |
863 | tcode += 1 + LINK_SIZE; never executed (the execution status of this line is deduced): tcode += 1 + 1; | - |
864 | } | 0 |
865 | break; | 0 |
866 | | - |
867 | /* If we hit ALT or KET, it means we haven't found anything mandatory in | - |
868 | this branch, though we might have found something optional. For ALT, we | - |
869 | continue with the next alternative, but we have to arrange that the final | - |
870 | result from subpattern is SSB_CONTINUE rather than SSB_DONE. For KET, | - |
871 | return SSB_CONTINUE: if this is the top level, that indicates failure, | - |
872 | but after a nested subpattern, it causes scanning to continue. */ | - |
873 | | - |
874 | case OP_ALT: | - |
875 | yield = SSB_CONTINUE; executed (the execution status of this line is deduced): yield = SSB_CONTINUE; | - |
876 | try_next = FALSE; executed (the execution status of this line is deduced): try_next = 0; | - |
877 | break; executed: break; Execution Count:1 | 1 |
878 | | - |
879 | case OP_KET: | - |
880 | case OP_KETRMAX: | - |
881 | case OP_KETRMIN: | - |
882 | case OP_KETRPOS: | - |
883 | return SSB_CONTINUE; executed: return SSB_CONTINUE; Execution Count:2 | 2 |
884 | | - |
885 | /* Skip over callout */ | - |
886 | | - |
887 | case OP_CALLOUT: | - |
888 | tcode += 2 + 2*LINK_SIZE; never executed (the execution status of this line is deduced): tcode += 2 + 2*1; | - |
889 | break; | 0 |
890 | | - |
891 | /* Skip over lookbehind and negative lookahead assertions */ | - |
892 | | - |
893 | case OP_ASSERT_NOT: | - |
894 | case OP_ASSERTBACK: | - |
895 | case OP_ASSERTBACK_NOT: | - |
896 | do tcode += GET(tcode, 1); while (*tcode == OP_ALT); never executed: tcode += (tcode[1]); never evaluated: *tcode == OP_ALT | 0 |
897 | tcode += 1 + LINK_SIZE; never executed (the execution status of this line is deduced): tcode += 1 + 1; | - |
898 | break; | 0 |
899 | | - |
900 | /* BRAZERO does the bracket, but carries on. */ | - |
901 | | - |
902 | case OP_BRAZERO: | - |
903 | case OP_BRAMINZERO: | - |
904 | case OP_BRAPOSZERO: | - |
905 | rc = set_start_bits(++tcode, start_bits, utf, cd); never executed (the execution status of this line is deduced): rc = set_start_bits(++tcode, start_bits, utf, cd); | - |
906 | if (rc == SSB_FAIL || rc == SSB_UNKNOWN) return rc; never executed: return rc; never evaluated: rc == SSB_FAIL never evaluated: rc == SSB_UNKNOWN | 0 |
907 | /* ========================================================================= | - |
908 | See the comment at the head of this function concerning the next line, | - |
909 | which was an old fudge for the benefit of OS/2. | - |
910 | dummy = 1; | - |
911 | ========================================================================= */ | - |
912 | do tcode += GET(tcode,1); while (*tcode == OP_ALT); never executed: tcode += (tcode[1]); never evaluated: *tcode == OP_ALT | 0 |
913 | tcode += 1 + LINK_SIZE; never executed (the execution status of this line is deduced): tcode += 1 + 1; | - |
914 | break; | 0 |
915 | | - |
916 | /* SKIPZERO skips the bracket. */ | - |
917 | | - |
918 | case OP_SKIPZERO: | - |
919 | tcode++; never executed (the execution status of this line is deduced): tcode++; | - |
920 | do tcode += GET(tcode,1); while (*tcode == OP_ALT); never executed: tcode += (tcode[1]); never evaluated: *tcode == OP_ALT | 0 |
921 | tcode += 1 + LINK_SIZE; never executed (the execution status of this line is deduced): tcode += 1 + 1; | - |
922 | break; | 0 |
923 | | - |
924 | /* Single-char * or ? sets the bit and tries the next item */ | - |
925 | | - |
926 | case OP_STAR: | - |
927 | case OP_MINSTAR: | - |
928 | case OP_POSSTAR: | - |
929 | case OP_QUERY: | - |
930 | case OP_MINQUERY: | - |
931 | case OP_POSQUERY: | - |
932 | tcode = set_table_bit(start_bits, tcode + 1, FALSE, cd, utf); executed (the execution status of this line is deduced): tcode = set_table_bit(start_bits, tcode + 1, 0, cd, utf); | - |
933 | break; executed: break; Execution Count:2 | 2 |
934 | | - |
935 | case OP_STARI: | - |
936 | case OP_MINSTARI: | - |
937 | case OP_POSSTARI: | - |
938 | case OP_QUERYI: | - |
939 | case OP_MINQUERYI: | - |
940 | case OP_POSQUERYI: | - |
941 | tcode = set_table_bit(start_bits, tcode + 1, TRUE, cd, utf); never executed (the execution status of this line is deduced): tcode = set_table_bit(start_bits, tcode + 1, 1, cd, utf); | - |
942 | break; | 0 |
943 | | - |
944 | /* Single-char upto sets the bit and tries the next */ | - |
945 | | - |
946 | case OP_UPTO: | - |
947 | case OP_MINUPTO: | - |
948 | case OP_POSUPTO: | - |
949 | tcode = set_table_bit(start_bits, tcode + 1 + IMM2_SIZE, FALSE, cd, utf); never executed (the execution status of this line is deduced): tcode = set_table_bit(start_bits, tcode + 1 + 1, 0, cd, utf); | - |
950 | break; | 0 |
951 | | - |
952 | case OP_UPTOI: | - |
953 | case OP_MINUPTOI: | - |
954 | case OP_POSUPTOI: | - |
955 | tcode = set_table_bit(start_bits, tcode + 1 + IMM2_SIZE, TRUE, cd, utf); never executed (the execution status of this line is deduced): tcode = set_table_bit(start_bits, tcode + 1 + 1, 1, cd, utf); | - |
956 | break; | 0 |
957 | | - |
958 | /* At least one single char sets the bit and stops */ | - |
959 | | - |
960 | case OP_EXACT: | - |
961 | tcode += IMM2_SIZE; never executed (the execution status of this line is deduced): tcode += 1; | - |
962 | /* Fall through */ | - |
963 | case OP_CHAR: code before this statement never executed: case OP_CHAR: | 0 |
964 | case OP_PLUS: | - |
965 | case OP_MINPLUS: | - |
966 | case OP_POSPLUS: | - |
967 | (void)set_table_bit(start_bits, tcode + 1, FALSE, cd, utf); executed (the execution status of this line is deduced): (void)set_table_bit(start_bits, tcode + 1, 0, cd, utf); | - |
968 | try_next = FALSE; executed (the execution status of this line is deduced): try_next = 0; | - |
969 | break; executed: break; Execution Count:1 | 1 |
970 | | - |
971 | case OP_EXACTI: | - |
972 | tcode += IMM2_SIZE; never executed (the execution status of this line is deduced): tcode += 1; | - |
973 | /* Fall through */ | - |
974 | case OP_CHARI: code before this statement never executed: case OP_CHARI: | 0 |
975 | case OP_PLUSI: | - |
976 | case OP_MINPLUSI: | - |
977 | case OP_POSPLUSI: | - |
978 | (void)set_table_bit(start_bits, tcode + 1, TRUE, cd, utf); never executed (the execution status of this line is deduced): (void)set_table_bit(start_bits, tcode + 1, 1, cd, utf); | - |
979 | try_next = FALSE; never executed (the execution status of this line is deduced): try_next = 0; | - |
980 | break; | 0 |
981 | | - |
982 | /* Special spacing and line-terminating items. These recognize specific | - |
983 | lists of characters. The difference between VSPACE and ANYNL is that the | - |
984 | latter can match the two-character CRLF sequence, but that is not | - |
985 | relevant for finding the first character, so their code here is | - |
986 | identical. */ | - |
987 | | - |
988 | case OP_HSPACE: | - |
989 | SET_BIT(0x09); never executed (the execution status of this line is deduced): start_bits[0x09/8] |= (1 << (0x09&7)); | - |
990 | SET_BIT(0x20); never executed (the execution status of this line is deduced): start_bits[0x20/8] |= (1 << (0x20&7)); | - |
991 | #ifdef SUPPORT_UTF | - |
992 | if (utf) | 0 |
993 | { | - |
994 | #ifdef COMPILE_PCRE8 | - |
995 | SET_BIT(0xC2); /* For U+00A0 */ | - |
996 | SET_BIT(0xE1); /* For U+1680, U+180E */ | - |
997 | SET_BIT(0xE2); /* For U+2000 - U+200A, U+202F, U+205F */ | - |
998 | SET_BIT(0xE3); /* For U+3000 */ | - |
999 | #endif | - |
1000 | #ifdef COMPILE_PCRE16 | - |
1001 | SET_BIT(0xA0); never executed (the execution status of this line is deduced): start_bits[0xA0/8] |= (1 << (0xA0&7)); | - |
1002 | SET_BIT(0xFF); /* For characters > 255 */ never executed (the execution status of this line is deduced): start_bits[0xFF/8] |= (1 << (0xFF&7)); | - |
1003 | #endif | - |
1004 | } | 0 |
1005 | else | - |
1006 | #endif /* SUPPORT_UTF */ | - |
1007 | { | - |
1008 | SET_BIT(0xA0); never executed (the execution status of this line is deduced): start_bits[0xA0/8] |= (1 << (0xA0&7)); | - |
1009 | #ifdef COMPILE_PCRE16 | - |
1010 | SET_BIT(0xFF); /* For characters > 255 */ never executed (the execution status of this line is deduced): start_bits[0xFF/8] |= (1 << (0xFF&7)); | - |
1011 | #endif | - |
1012 | } | 0 |
1013 | try_next = FALSE; never executed (the execution status of this line is deduced): try_next = 0; | - |
1014 | break; | 0 |
1015 | | - |
1016 | case OP_ANYNL: | - |
1017 | case OP_VSPACE: | - |
1018 | SET_BIT(0x0A); never executed (the execution status of this line is deduced): start_bits[0x0A/8] |= (1 << (0x0A&7)); | - |
1019 | SET_BIT(0x0B); never executed (the execution status of this line is deduced): start_bits[0x0B/8] |= (1 << (0x0B&7)); | - |
1020 | SET_BIT(0x0C); never executed (the execution status of this line is deduced): start_bits[0x0C/8] |= (1 << (0x0C&7)); | - |
1021 | SET_BIT(0x0D); never executed (the execution status of this line is deduced): start_bits[0x0D/8] |= (1 << (0x0D&7)); | - |
1022 | #ifdef SUPPORT_UTF | - |
1023 | if (utf) | 0 |
1024 | { | - |
1025 | #ifdef COMPILE_PCRE8 | - |
1026 | SET_BIT(0xC2); /* For U+0085 */ | - |
1027 | SET_BIT(0xE2); /* For U+2028, U+2029 */ | - |
1028 | #endif | - |
1029 | #ifdef COMPILE_PCRE16 | - |
1030 | SET_BIT(0x85); never executed (the execution status of this line is deduced): start_bits[0x85/8] |= (1 << (0x85&7)); | - |
1031 | SET_BIT(0xFF); /* For characters > 255 */ never executed (the execution status of this line is deduced): start_bits[0xFF/8] |= (1 << (0xFF&7)); | - |
1032 | #endif | - |
1033 | } | 0 |
1034 | else | - |
1035 | #endif /* SUPPORT_UTF */ | - |
1036 | { | - |
1037 | SET_BIT(0x85); never executed (the execution status of this line is deduced): start_bits[0x85/8] |= (1 << (0x85&7)); | - |
1038 | #ifdef COMPILE_PCRE16 | - |
1039 | SET_BIT(0xFF); /* For characters > 255 */ never executed (the execution status of this line is deduced): start_bits[0xFF/8] |= (1 << (0xFF&7)); | - |
1040 | #endif | - |
1041 | } | 0 |
1042 | try_next = FALSE; never executed (the execution status of this line is deduced): try_next = 0; | - |
1043 | break; | 0 |
1044 | | - |
1045 | /* Single character types set the bits and stop. Note that if PCRE_UCP | - |
1046 | is set, we do not see these op codes because \d etc are converted to | - |
1047 | properties. Therefore, these apply in the case when only characters less | - |
1048 | than 256 are recognized to match the types. */ | - |
1049 | | - |
1050 | case OP_NOT_DIGIT: | - |
1051 | set_nottype_bits(start_bits, cbit_digit, table_limit, cd); never executed (the execution status of this line is deduced): set_nottype_bits(start_bits, 64, table_limit, cd); | - |
1052 | try_next = FALSE; never executed (the execution status of this line is deduced): try_next = 0; | - |
1053 | break; | 0 |
1054 | | - |
1055 | case OP_DIGIT: | - |
1056 | set_type_bits(start_bits, cbit_digit, table_limit, cd); never executed (the execution status of this line is deduced): set_type_bits(start_bits, 64, table_limit, cd); | - |
1057 | try_next = FALSE; never executed (the execution status of this line is deduced): try_next = 0; | - |
1058 | break; | 0 |
1059 | | - |
1060 | /* The cbit_space table has vertical tab as whitespace; we have to | - |
1061 | ensure it is set as not whitespace. */ | - |
1062 | | - |
1063 | case OP_NOT_WHITESPACE: | - |
1064 | set_nottype_bits(start_bits, cbit_space, table_limit, cd); never executed (the execution status of this line is deduced): set_nottype_bits(start_bits, 0, table_limit, cd); | - |
1065 | start_bits[1] |= 0x08; never executed (the execution status of this line is deduced): start_bits[1] |= 0x08; | - |
1066 | try_next = FALSE; never executed (the execution status of this line is deduced): try_next = 0; | - |
1067 | break; | 0 |
1068 | | - |
1069 | /* The cbit_space table has vertical tab as whitespace; we have to | - |
1070 | not set it from the table. */ | - |
1071 | | - |
1072 | case OP_WHITESPACE: | - |
1073 | c = start_bits[1]; /* Save in case it was already set */ never executed (the execution status of this line is deduced): c = start_bits[1]; | - |
1074 | set_type_bits(start_bits, cbit_space, table_limit, cd); never executed (the execution status of this line is deduced): set_type_bits(start_bits, 0, table_limit, cd); | - |
1075 | start_bits[1] = (start_bits[1] & ~0x08) | c; never executed (the execution status of this line is deduced): start_bits[1] = (start_bits[1] & ~0x08) | c; | - |
1076 | try_next = FALSE; never executed (the execution status of this line is deduced): try_next = 0; | - |
1077 | break; | 0 |
1078 | | - |
1079 | case OP_NOT_WORDCHAR: | - |
1080 | set_nottype_bits(start_bits, cbit_word, table_limit, cd); never executed (the execution status of this line is deduced): set_nottype_bits(start_bits, 160, table_limit, cd); | - |
1081 | try_next = FALSE; never executed (the execution status of this line is deduced): try_next = 0; | - |
1082 | break; | 0 |
1083 | | - |
1084 | case OP_WORDCHAR: | - |
1085 | set_type_bits(start_bits, cbit_word, table_limit, cd); never executed (the execution status of this line is deduced): set_type_bits(start_bits, 160, table_limit, cd); | - |
1086 | try_next = FALSE; never executed (the execution status of this line is deduced): try_next = 0; | - |
1087 | break; | 0 |
1088 | | - |
1089 | /* One or more character type fudges the pointer and restarts, knowing | - |
1090 | it will hit a single character type and stop there. */ | - |
1091 | | - |
1092 | case OP_TYPEPLUS: | - |
1093 | case OP_TYPEMINPLUS: | - |
1094 | case OP_TYPEPOSPLUS: | - |
1095 | tcode++; never executed (the execution status of this line is deduced): tcode++; | - |
1096 | break; | 0 |
1097 | | - |
1098 | case OP_TYPEEXACT: | - |
1099 | tcode += 1 + IMM2_SIZE; never executed (the execution status of this line is deduced): tcode += 1 + 1; | - |
1100 | break; | 0 |
1101 | | - |
1102 | /* Zero or more repeats of character types set the bits and then | - |
1103 | try again. */ | - |
1104 | | - |
1105 | case OP_TYPEUPTO: | - |
1106 | case OP_TYPEMINUPTO: | - |
1107 | case OP_TYPEPOSUPTO: | - |
1108 | tcode += IMM2_SIZE; /* Fall through */ never executed (the execution status of this line is deduced): tcode += 1; | - |
1109 | | - |
1110 | case OP_TYPESTAR: code before this statement never executed: case OP_TYPESTAR: | 0 |
1111 | case OP_TYPEMINSTAR: | - |
1112 | case OP_TYPEPOSSTAR: | - |
1113 | case OP_TYPEQUERY: | - |
1114 | case OP_TYPEMINQUERY: | - |
1115 | case OP_TYPEPOSQUERY: | - |
1116 | switch(tcode[1]) | - |
1117 | { | - |
1118 | default: | - |
1119 | case OP_ANY: | - |
1120 | case OP_ALLANY: | - |
1121 | return SSB_FAIL; never executed: return SSB_FAIL; | 0 |
1122 | | - |
1123 | case OP_HSPACE: | - |
1124 | SET_BIT(0x09); never executed (the execution status of this line is deduced): start_bits[0x09/8] |= (1 << (0x09&7)); | - |
1125 | SET_BIT(0x20); never executed (the execution status of this line is deduced): start_bits[0x20/8] |= (1 << (0x20&7)); | - |
1126 | #ifdef COMPILE_PCRE8 | - |
1127 | if (utf) | - |
1128 | { | - |
1129 | #ifdef COMPILE_PCRE8 | - |
1130 | SET_BIT(0xC2); /* For U+00A0 */ | - |
1131 | SET_BIT(0xE1); /* For U+1680, U+180E */ | - |
1132 | SET_BIT(0xE2); /* For U+2000 - U+200A, U+202F, U+205F */ | - |
1133 | SET_BIT(0xE3); /* For U+3000 */ | - |
1134 | #endif | - |
1135 | #ifdef COMPILE_PCRE16 | - |
1136 | SET_BIT(0xA0); | - |
1137 | SET_BIT(0xFF); /* For characters > 255 */ | - |
1138 | #endif | - |
1139 | } | - |
1140 | else | - |
1141 | #endif /* SUPPORT_UTF */ | - |
1142 | SET_BIT(0xA0); never executed (the execution status of this line is deduced): start_bits[0xA0/8] |= (1 << (0xA0&7)); | - |
1143 | break; | 0 |
1144 | | - |
1145 | case OP_ANYNL: | - |
1146 | case OP_VSPACE: | - |
1147 | SET_BIT(0x0A); never executed (the execution status of this line is deduced): start_bits[0x0A/8] |= (1 << (0x0A&7)); | - |
1148 | SET_BIT(0x0B); never executed (the execution status of this line is deduced): start_bits[0x0B/8] |= (1 << (0x0B&7)); | - |
1149 | SET_BIT(0x0C); never executed (the execution status of this line is deduced): start_bits[0x0C/8] |= (1 << (0x0C&7)); | - |
1150 | SET_BIT(0x0D); never executed (the execution status of this line is deduced): start_bits[0x0D/8] |= (1 << (0x0D&7)); | - |
1151 | #ifdef COMPILE_PCRE8 | - |
1152 | if (utf) | - |
1153 | { | - |
1154 | #ifdef COMPILE_PCRE8 | - |
1155 | SET_BIT(0xC2); /* For U+0085 */ | - |
1156 | SET_BIT(0xE2); /* For U+2028, U+2029 */ | - |
1157 | #endif | - |
1158 | #ifdef COMPILE_PCRE16 | - |
1159 | SET_BIT(0x85); | - |
1160 | SET_BIT(0xFF); /* For characters > 255 */ | - |
1161 | #endif | - |
1162 | } | - |
1163 | else | - |
1164 | #endif /* SUPPORT_UTF */ | - |
1165 | SET_BIT(0x85); never executed (the execution status of this line is deduced): start_bits[0x85/8] |= (1 << (0x85&7)); | - |
1166 | break; | 0 |
1167 | | - |
1168 | case OP_NOT_DIGIT: | - |
1169 | set_nottype_bits(start_bits, cbit_digit, table_limit, cd); never executed (the execution status of this line is deduced): set_nottype_bits(start_bits, 64, table_limit, cd); | - |
1170 | break; | 0 |
1171 | | - |
1172 | case OP_DIGIT: | - |
1173 | set_type_bits(start_bits, cbit_digit, table_limit, cd); never executed (the execution status of this line is deduced): set_type_bits(start_bits, 64, table_limit, cd); | - |
1174 | break; | 0 |
1175 | | - |
1176 | /* The cbit_space table has vertical tab as whitespace; we have to | - |
1177 | ensure it gets set as not whitespace. */ | - |
1178 | | - |
1179 | case OP_NOT_WHITESPACE: | - |
1180 | set_nottype_bits(start_bits, cbit_space, table_limit, cd); never executed (the execution status of this line is deduced): set_nottype_bits(start_bits, 0, table_limit, cd); | - |
1181 | start_bits[1] |= 0x08; never executed (the execution status of this line is deduced): start_bits[1] |= 0x08; | - |
1182 | break; | 0 |
1183 | | - |
1184 | /* The cbit_space table has vertical tab as whitespace; we have to | - |
1185 | avoid setting it. */ | - |
1186 | | - |
1187 | case OP_WHITESPACE: | - |
1188 | c = start_bits[1]; /* Save in case it was already set */ never executed (the execution status of this line is deduced): c = start_bits[1]; | - |
1189 | set_type_bits(start_bits, cbit_space, table_limit, cd); never executed (the execution status of this line is deduced): set_type_bits(start_bits, 0, table_limit, cd); | - |
1190 | start_bits[1] = (start_bits[1] & ~0x08) | c; never executed (the execution status of this line is deduced): start_bits[1] = (start_bits[1] & ~0x08) | c; | - |
1191 | break; | 0 |
1192 | | - |
1193 | case OP_NOT_WORDCHAR: | - |
1194 | set_nottype_bits(start_bits, cbit_word, table_limit, cd); never executed (the execution status of this line is deduced): set_nottype_bits(start_bits, 160, table_limit, cd); | - |
1195 | break; | 0 |
1196 | | - |
1197 | case OP_WORDCHAR: | - |
1198 | set_type_bits(start_bits, cbit_word, table_limit, cd); never executed (the execution status of this line is deduced): set_type_bits(start_bits, 160, table_limit, cd); | - |
1199 | break; | 0 |
1200 | } | - |
1201 | | - |
1202 | tcode += 2; never executed (the execution status of this line is deduced): tcode += 2; | - |
1203 | break; | 0 |
1204 | | - |
1205 | /* Character class where all the information is in a bit map: set the | - |
1206 | bits and either carry on or not, according to the repeat count. If it was | - |
1207 | a negative class, and we are operating with UTF-8 characters, any byte | - |
1208 | with a value >= 0xc4 is a potentially valid starter because it starts a | - |
1209 | character with a value > 255. */ | - |
1210 | | - |
1211 | case OP_NCLASS: | - |
1212 | #if defined SUPPORT_UTF && defined COMPILE_PCRE8 | - |
1213 | if (utf) | - |
1214 | { | - |
1215 | start_bits[24] |= 0xf0; /* Bits for 0xc4 - 0xc8 */ | - |
1216 | memset(start_bits+25, 0xff, 7); /* Bits for 0xc9 - 0xff */ | - |
1217 | } | - |
1218 | #endif | - |
1219 | #ifdef COMPILE_PCRE16 | - |
1220 | SET_BIT(0xFF); /* For characters > 255 */ never executed (the execution status of this line is deduced): start_bits[0xFF/8] |= (1 << (0xFF&7)); | - |
1221 | #endif | - |
1222 | /* Fall through */ | - |
1223 | | - |
1224 | case OP_CLASS: code before this statement never executed: case OP_CLASS: | 0 |
1225 | { | - |
1226 | pcre_uint8 *map; never executed (the execution status of this line is deduced): pcre_uint8 *map; | - |
1227 | tcode++; never executed (the execution status of this line is deduced): tcode++; | - |
1228 | map = (pcre_uint8 *)tcode; never executed (the execution status of this line is deduced): map = (pcre_uint8 *)tcode; | - |
1229 | | - |
1230 | /* In UTF-8 mode, the bits in a bit map correspond to character | - |
1231 | values, not to byte values. However, the bit map we are constructing is | - |
1232 | for byte values. So we have to do a conversion for characters whose | - |
1233 | value is > 127. In fact, there are only two possible starting bytes for | - |
1234 | characters in the range 128 - 255. */ | - |
1235 | | - |
1236 | #if defined SUPPORT_UTF && defined COMPILE_PCRE8 | - |
1237 | if (utf) | - |
1238 | { | - |
1239 | for (c = 0; c < 16; c++) start_bits[c] |= map[c]; | - |
1240 | for (c = 128; c < 256; c++) | - |
1241 | { | - |
1242 | if ((map[c/8] && (1 << (c&7))) != 0) | - |
1243 | { | - |
1244 | int d = (c >> 6) | 0xc0; /* Set bit for this starter */ | - |
1245 | start_bits[d/8] |= (1 << (d&7)); /* and then skip on to the */ | - |
1246 | c = (c & 0xc0) + 0x40 - 1; /* next relevant character. */ | - |
1247 | } | - |
1248 | } | - |
1249 | } | - |
1250 | else | - |
1251 | #endif | - |
1252 | { | - |
1253 | /* In non-UTF-8 mode, the two bit maps are completely compatible. */ | - |
1254 | for (c = 0; c < 32; c++) start_bits[c] |= map[c]; never executed: start_bits[c] |= map[c]; never evaluated: c < 32 | 0 |
1255 | } | - |
1256 | | - |
1257 | /* Advance past the bit map, and act on what follows. For a zero | - |
1258 | minimum repeat, continue; otherwise stop processing. */ | - |
1259 | | - |
1260 | tcode += 32 / sizeof(pcre_uchar); never executed (the execution status of this line is deduced): tcode += 32 / sizeof(pcre_uchar); | - |
1261 | switch (*tcode) | - |
1262 | { | - |
1263 | case OP_CRSTAR: | - |
1264 | case OP_CRMINSTAR: | - |
1265 | case OP_CRQUERY: | - |
1266 | case OP_CRMINQUERY: | - |
1267 | tcode++; never executed (the execution status of this line is deduced): tcode++; | - |
1268 | break; | 0 |
1269 | | - |
1270 | case OP_CRRANGE: | - |
1271 | case OP_CRMINRANGE: | - |
1272 | if (GET2(tcode, 1) == 0) tcode += 1 + 2 * IMM2_SIZE; never executed: tcode += 1 + 2 * 1; never evaluated: tcode[1] == 0 | 0 |
1273 | else try_next = FALSE; never executed: try_next = 0; | 0 |
1274 | break; | 0 |
1275 | | - |
1276 | default: | - |
1277 | try_next = FALSE; never executed (the execution status of this line is deduced): try_next = 0; | - |
1278 | break; | 0 |
1279 | } | - |
1280 | } | - |
1281 | break; /* End of bitmap class handling */ | 0 |
1282 | | - |
1283 | } /* End of switch */ | - |
1284 | } /* End of try_next loop */ executed: } Execution Count:5 | 5 |
1285 | | - |
1286 | code += GET(code, 1); /* Advance to next branch */ executed (the execution status of this line is deduced): code += (code[1]); | - |
1287 | } executed: } Execution Count:2 | 2 |
1288 | while (*code == OP_ALT); evaluated: *code == OP_ALT yes Evaluation Count:1 | yes Evaluation Count:1 |
| 1 |
1289 | return yield; executed: return yield; Execution Count:1 | 1 |
1290 | } | - |
1291 | | - |
1292 | | - |
1293 | | - |
1294 | | - |
1295 | | - |
1296 | /************************************************* | - |
1297 | * Study a compiled expression * | - |
1298 | *************************************************/ | - |
1299 | | - |
1300 | /* This function is handed a compiled expression that it must study to produce | - |
1301 | information that will speed up the matching. It returns a pcre[16]_extra block | - |
1302 | which then gets handed back to pcre_exec(). | - |
1303 | | - |
1304 | Arguments: | - |
1305 | re points to the compiled expression | - |
1306 | options contains option bits | - |
1307 | errorptr points to where to place error messages; | - |
1308 | set NULL unless error | - |
1309 | | - |
1310 | Returns: pointer to a pcre[16]_extra block, with study_data filled in and | - |
1311 | the appropriate flags set; | - |
1312 | NULL on error or if no optimization possible | - |
1313 | */ | - |
1314 | | - |
1315 | #ifdef COMPILE_PCRE8 | - |
1316 | PCRE_EXP_DEFN pcre_extra * PCRE_CALL_CONVENTION | - |
1317 | pcre_study(const pcre *external_re, int options, const char **errorptr) | - |
1318 | #else | - |
1319 | PCRE_EXP_DEFN pcre16_extra * PCRE_CALL_CONVENTION | - |
1320 | pcre16_study(const pcre16 *external_re, int options, const char **errorptr) | - |
1321 | #endif | - |
1322 | { | - |
1323 | int min; executed (the execution status of this line is deduced): int min; | - |
1324 | BOOL bits_set = FALSE; executed (the execution status of this line is deduced): BOOL bits_set = 0; | - |
1325 | pcre_uint8 start_bits[32]; executed (the execution status of this line is deduced): pcre_uint8 start_bits[32]; | - |
1326 | PUBL(extra) *extra = NULL; executed (the execution status of this line is deduced): pcre16_extra *extra = ((void *)0); | - |
1327 | pcre_study_data *study; executed (the execution status of this line is deduced): pcre_study_data *study; | - |
1328 | const pcre_uint8 *tables; executed (the execution status of this line is deduced): const pcre_uint8 *tables; | - |
1329 | pcre_uchar *code; executed (the execution status of this line is deduced): pcre_uchar *code; | - |
1330 | compile_data compile_block; executed (the execution status of this line is deduced): compile_data compile_block; | - |
1331 | const REAL_PCRE *re = (const REAL_PCRE *)external_re; executed (the execution status of this line is deduced): const real_pcre16 *re = (const real_pcre16 *)external_re; | - |
1332 | | - |
1333 | *errorptr = NULL; executed (the execution status of this line is deduced): *errorptr = ((void *)0); | - |
1334 | | - |
1335 | if (re == NULL || re->magic_number != MAGIC_NUMBER) partially evaluated: re == ((void *)0) no Evaluation Count:0 | yes Evaluation Count:15 |
partially evaluated: re->magic_number != 0x50435245UL no Evaluation Count:0 | yes Evaluation Count:15 |
| 0-15 |
1336 | { | - |
1337 | *errorptr = "argument is not a compiled regular expression"; never executed (the execution status of this line is deduced): *errorptr = "argument is not a compiled regular expression"; | - |
1338 | return NULL; never executed: return ((void *)0); | 0 |
1339 | } | - |
1340 | | - |
1341 | if ((re->flags & PCRE_MODE) == 0) partially evaluated: (re->flags & 0x0002) == 0 no Evaluation Count:0 | yes Evaluation Count:15 |
| 0-15 |
1342 | { | - |
1343 | #ifdef COMPILE_PCRE8 | - |
1344 | *errorptr = "argument is compiled in 16 bit mode"; | - |
1345 | #else | - |
1346 | *errorptr = "argument is compiled in 8 bit mode"; never executed (the execution status of this line is deduced): *errorptr = "argument is compiled in 8 bit mode"; | - |
1347 | #endif | - |
1348 | return NULL; never executed: return ((void *)0); | 0 |
1349 | } | - |
1350 | | - |
1351 | if ((options & ~PUBLIC_STUDY_OPTIONS) != 0) partially evaluated: (options & ~0x0001) != 0 no Evaluation Count:0 | yes Evaluation Count:15 |
| 0-15 |
1352 | { | - |
1353 | *errorptr = "unknown or incorrect option bit(s) set"; never executed (the execution status of this line is deduced): *errorptr = "unknown or incorrect option bit(s) set"; | - |
1354 | return NULL; never executed: return ((void *)0); | 0 |
1355 | } | - |
1356 | | - |
1357 | code = (pcre_uchar *)re + re->name_table_offset + executed (the execution status of this line is deduced): code = (pcre_uchar *)re + re->name_table_offset + | - |
1358 | (re->name_count * re->name_entry_size); executed (the execution status of this line is deduced): (re->name_count * re->name_entry_size); | - |
1359 | | - |
1360 | /* For an anchored pattern, or an unanchored pattern that has a first char, or | - |
1361 | a multiline pattern that matches only at "line starts", there is no point in | - |
1362 | seeking a list of starting bytes. */ | - |
1363 | | - |
1364 | if ((re->options & PCRE_ANCHORED) == 0 && partially evaluated: (re->options & 0x00000010) == 0 yes Evaluation Count:15 | no Evaluation Count:0 |
| 0-15 |
1365 | (re->flags & (PCRE_FIRSTSET|PCRE_STARTLINE)) == 0) evaluated: (re->flags & (0x0010|0x0100)) == 0 yes Evaluation Count:4 | yes Evaluation Count:11 |
| 4-11 |
1366 | { | - |
1367 | int rc; executed (the execution status of this line is deduced): int rc; | - |
1368 | | - |
1369 | /* Set the character tables in the block that is passed around */ | - |
1370 | | - |
1371 | tables = re->tables; executed (the execution status of this line is deduced): tables = re->tables; | - |
1372 | | - |
1373 | #ifdef COMPILE_PCRE8 | - |
1374 | if (tables == NULL) | - |
1375 | (void)pcre_fullinfo(external_re, NULL, PCRE_INFO_DEFAULT_TABLES, | - |
1376 | (void *)(&tables)); | - |
1377 | #else | - |
1378 | if (tables == NULL) partially evaluated: tables == ((void *)0) yes Evaluation Count:4 | no Evaluation Count:0 |
| 0-4 |
1379 | (void)pcre16_fullinfo(external_re, NULL, PCRE_INFO_DEFAULT_TABLES, executed: (void)pcre16_fullinfo(external_re, ((void *)0), 11, (void *)(&tables)); Execution Count:4 | 4 |
1380 | (void *)(&tables)); executed: (void)pcre16_fullinfo(external_re, ((void *)0), 11, (void *)(&tables)); Execution Count:4 | 4 |
1381 | #endif | - |
1382 | | - |
1383 | compile_block.lcc = tables + lcc_offset; executed (the execution status of this line is deduced): compile_block.lcc = tables + 0; | - |
1384 | compile_block.fcc = tables + fcc_offset; executed (the execution status of this line is deduced): compile_block.fcc = tables + 256; | - |
1385 | compile_block.cbits = tables + cbits_offset; executed (the execution status of this line is deduced): compile_block.cbits = tables + 512; | - |
1386 | compile_block.ctypes = tables + ctypes_offset; executed (the execution status of this line is deduced): compile_block.ctypes = tables + (512 + 320); | - |
1387 | | - |
1388 | /* See if we can find a fixed set of initial characters for the pattern. */ | - |
1389 | | - |
1390 | memset(start_bits, 0, 32 * sizeof(pcre_uint8)); executed (the execution status of this line is deduced): memset(start_bits, 0, 32 * sizeof(pcre_uint8)); | - |
1391 | rc = set_start_bits(code, start_bits, (re->options & PCRE_UTF8) != 0, executed (the execution status of this line is deduced): rc = set_start_bits(code, start_bits, (re->options & 0x00000800) != 0, | - |
1392 | &compile_block); executed (the execution status of this line is deduced): &compile_block); | - |
1393 | bits_set = rc == SSB_DONE; executed (the execution status of this line is deduced): bits_set = rc == SSB_DONE; | - |
1394 | if (rc == SSB_UNKNOWN) partially evaluated: rc == SSB_UNKNOWN no Evaluation Count:0 | yes Evaluation Count:4 |
| 0-4 |
1395 | { | - |
1396 | *errorptr = "internal error: opcode not recognized"; never executed (the execution status of this line is deduced): *errorptr = "internal error: opcode not recognized"; | - |
1397 | return NULL; never executed: return ((void *)0); | 0 |
1398 | } | - |
1399 | } executed: } Execution Count:4 | 4 |
1400 | | - |
1401 | /* Find the minimum length of subject string. */ | - |
1402 | | - |
1403 | switch(min = find_minlength(code, code, re->options, 0)) | - |
1404 | { | - |
1405 | case -2: *errorptr = "internal error: missing capturing bracket"; return NULL; never executed: return ((void *)0); | 0 |
1406 | case -3: *errorptr = "internal error: opcode not recognized"; return NULL; never executed: return ((void *)0); | 0 |
1407 | default: break; executed: break; Execution Count:15 | 15 |
1408 | } | - |
1409 | | - |
1410 | /* If a set of starting bytes has been identified, or if the minimum length is | - |
1411 | greater than zero, or if JIT optimization has been requested, get a | - |
1412 | pcre[16]_extra block and a pcre_study_data block. The study data is put in the | - |
1413 | latter, which is pointed to by the former, which may also get additional data | - |
1414 | set later by the calling program. At the moment, the size of pcre_study_data | - |
1415 | is fixed. We nevertheless save it in a field for returning via the | - |
1416 | pcre_fullinfo() function so that if it becomes variable in the future, | - |
1417 | we don't have to change that code. */ | - |
1418 | | - |
1419 | if (bits_set || min > 0 partially evaluated: bits_set no Evaluation Count:0 | yes Evaluation Count:15 |
evaluated: min > 0 yes Evaluation Count:6 | yes Evaluation Count:9 |
| 0-15 |
1420 | #ifdef SUPPORT_JIT executed (the execution status of this line is deduced):
| - |
1421 | || (options & PCRE_STUDY_JIT_COMPILE) != 0 partially evaluated: (options & 0x0001) != 0 yes Evaluation Count:9 | no Evaluation Count:0 |
| 0-9 |
1422 | #endif | - |
1423 | ) | - |
1424 | { | - |
1425 | extra = (PUBL(extra) *)(PUBL(malloc)) executed (the execution status of this line is deduced): extra = (pcre16_extra *)(pcre16_malloc) | - |
1426 | (sizeof(PUBL(extra)) + sizeof(pcre_study_data)); executed (the execution status of this line is deduced): (sizeof(pcre16_extra) + sizeof(pcre_study_data)); | - |
1427 | if (extra == NULL) partially evaluated: extra == ((void *)0) no Evaluation Count:0 | yes Evaluation Count:15 |
| 0-15 |
1428 | { | - |
1429 | *errorptr = "failed to get memory"; never executed (the execution status of this line is deduced): *errorptr = "failed to get memory"; | - |
1430 | return NULL; never executed: return ((void *)0); | 0 |
1431 | } | - |
1432 | | - |
1433 | study = (pcre_study_data *)((char *)extra + sizeof(PUBL(extra))); executed (the execution status of this line is deduced): study = (pcre_study_data *)((char *)extra + sizeof(pcre16_extra)); | - |
1434 | extra->flags = PCRE_EXTRA_STUDY_DATA; executed (the execution status of this line is deduced): extra->flags = 0x0001; | - |
1435 | extra->study_data = study; executed (the execution status of this line is deduced): extra->study_data = study; | - |
1436 | | - |
1437 | study->size = sizeof(pcre_study_data); executed (the execution status of this line is deduced): study->size = sizeof(pcre_study_data); | - |
1438 | study->flags = 0; executed (the execution status of this line is deduced): study->flags = 0; | - |
1439 | | - |
1440 | /* Set the start bits always, to avoid unset memory errors if the | - |
1441 | study data is written to a file, but set the flag only if any of the bits | - |
1442 | are set, to save time looking when none are. */ | - |
1443 | | - |
1444 | if (bits_set) partially evaluated: bits_set no Evaluation Count:0 | yes Evaluation Count:15 |
| 0-15 |
1445 | { | - |
1446 | study->flags |= PCRE_STUDY_MAPPED; never executed (the execution status of this line is deduced): study->flags |= 0x0001; | - |
1447 | memcpy(study->start_bits, start_bits, sizeof(start_bits)); never executed (the execution status of this line is deduced): memcpy(study->start_bits, start_bits, sizeof(start_bits)); | - |
1448 | } | 0 |
1449 | else memset(study->start_bits, 0, 32 * sizeof(pcre_uint8)); executed: memset(study->start_bits, 0, 32 * sizeof(pcre_uint8)); Execution Count:15 | 15 |
1450 | | - |
1451 | #ifdef PCRE_DEBUG | - |
1452 | if (bits_set) | - |
1453 | { | - |
1454 | pcre_uint8 *ptr = start_bits; | - |
1455 | int i; | - |
1456 | | - |
1457 | printf("Start bits:\n"); | - |
1458 | for (i = 0; i < 32; i++) | - |
1459 | printf("%3d: %02x%s", i * 8, *ptr++, ((i + 1) & 0x7) != 0? " " : "\n"); | - |
1460 | } | - |
1461 | #endif | - |
1462 | | - |
1463 | /* Always set the minlength value in the block, because the JIT compiler | - |
1464 | makes use of it. However, don't set the bit unless the length is greater than | - |
1465 | zero - the interpretive pcre_exec() and pcre_dfa_exec() needn't waste time | - |
1466 | checking the zero case. */ | - |
1467 | | - |
1468 | if (min > 0) evaluated: min > 0 yes Evaluation Count:6 | yes Evaluation Count:9 |
| 6-9 |
1469 | { | - |
1470 | study->flags |= PCRE_STUDY_MINLEN; executed (the execution status of this line is deduced): study->flags |= 0x0002; | - |
1471 | study->minlength = min; executed (the execution status of this line is deduced): study->minlength = min; | - |
1472 | } executed: } Execution Count:6 | 6 |
1473 | else study->minlength = 0; executed: study->minlength = 0; Execution Count:9 | 9 |
1474 | | - |
1475 | /* If JIT support was compiled and requested, attempt the JIT compilation. | - |
1476 | If no starting bytes were found, and the minimum length is zero, and JIT | - |
1477 | compilation fails, abandon the extra block and return NULL. */ | - |
1478 | | - |
1479 | #ifdef SUPPORT_JIT | - |
1480 | extra->executable_jit = NULL; executed (the execution status of this line is deduced): extra->executable_jit = ((void *)0); | - |
1481 | if ((options & PCRE_STUDY_JIT_COMPILE) != 0) PRIV(jit_compile)(re, extra); executed: _pcre16_jit_compile(re, extra); Execution Count:15 partially evaluated: (options & 0x0001) != 0 yes Evaluation Count:15 | no Evaluation Count:0 |
| 0-15 |
1482 | if (study->flags == 0 && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) == 0) evaluated: study->flags == 0 yes Evaluation Count:9 | yes Evaluation Count:6 |
partially evaluated: (extra->flags & 0x0040) == 0 no Evaluation Count:0 | yes Evaluation Count:9 |
| 0-9 |
1483 | { | - |
1484 | #ifdef COMPILE_PCRE8 | - |
1485 | pcre_free_study(extra); | - |
1486 | #endif | - |
1487 | #ifdef COMPILE_PCRE16 | - |
1488 | pcre16_free_study(extra); never executed (the execution status of this line is deduced): pcre16_free_study(extra); | - |
1489 | #endif | - |
1490 | extra = NULL; never executed (the execution status of this line is deduced): extra = ((void *)0); | - |
1491 | } | 0 |
1492 | #endif | - |
1493 | } executed: } Execution Count:15 | 15 |
1494 | | - |
1495 | return extra; executed: return extra; Execution Count:15 | 15 |
1496 | } | - |
1497 | | - |
1498 | | - |
1499 | /************************************************* | - |
1500 | * Free the study data * | - |
1501 | *************************************************/ | - |
1502 | | - |
1503 | /* This function frees the memory that was obtained by pcre_study(). | - |
1504 | | - |
1505 | Argument: a pointer to the pcre[16]_extra block | - |
1506 | Returns: nothing | - |
1507 | */ | - |
1508 | | - |
1509 | #ifdef COMPILE_PCRE8 | - |
1510 | PCRE_EXP_DEFN void | - |
1511 | pcre_free_study(pcre_extra *extra) | - |
1512 | #else | - |
1513 | PCRE_EXP_DEFN void | - |
1514 | pcre16_free_study(pcre16_extra *extra) | - |
1515 | #endif | - |
1516 | { | - |
1517 | if (extra == NULL) evaluated: extra == ((void *)0) yes Evaluation Count:937 | yes Evaluation Count:15 |
| 15-937 |
1518 | return; executed: return; Execution Count:937 | 937 |
1519 | #ifdef SUPPORT_JIT | - |
1520 | if ((extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0 && partially evaluated: (extra->flags & 0x0040) != 0 yes Evaluation Count:15 | no Evaluation Count:0 |
| 0-15 |
1521 | extra->executable_jit != NULL) partially evaluated: extra->executable_jit != ((void *)0) yes Evaluation Count:15 | no Evaluation Count:0 |
| 0-15 |
1522 | PRIV(jit_free)(extra->executable_jit); executed: _pcre16_jit_free(extra->executable_jit); Execution Count:15 | 15 |
1523 | #endif | - |
1524 | PUBL(free)(extra); executed (the execution status of this line is deduced): pcre16_free(extra); | - |
1525 | } executed: } Execution Count:15 | 15 |
1526 | | - |
1527 | /* End of pcre_study.c */ | - |
1528 | | - |
| | |