Line | Source Code | Coverage |
---|
1 | /************************************************* | - |
2 | * Perl-Compatible Regular Expressions * | - |
3 | *************************************************/ | - |
4 | | - |
5 | /* PCRE is a library of functions to support regular expressions whose syntax | - |
6 | and semantics are as close as possible to those of the Perl 5 language. | - |
7 | | - |
8 | Written by Philip Hazel | - |
9 | Copyright (c) 1997-2012 University of Cambridge | - |
10 | | - |
11 | ----------------------------------------------------------------------------- | - |
12 | Redistribution and use in source and binary forms, with or without | - |
13 | modification, are permitted provided that the following conditions are met: | - |
14 | | - |
15 | * Redistributions of source code must retain the above copyright notice, | - |
16 | this list of conditions and the following disclaimer. | - |
17 | | - |
18 | * Redistributions in binary form must reproduce the above copyright | - |
19 | notice, this list of conditions and the following disclaimer in the | - |
20 | documentation and/or other materials provided with the distribution. | - |
21 | | - |
22 | * Neither the name of the University of Cambridge nor the names of its | - |
23 | contributors may be used to endorse or promote products derived from | - |
24 | this software without specific prior written permission. | - |
25 | | - |
26 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | - |
27 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | - |
28 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | - |
29 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE | - |
30 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | - |
31 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | - |
32 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | - |
33 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | - |
34 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | - |
35 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | - |
36 | POSSIBILITY OF SUCH DAMAGE. | - |
37 | ----------------------------------------------------------------------------- | - |
38 | */ | - |
39 | | - |
40 | | - |
41 | /* This module contains an internal function for validating UTF-16 character | - |
42 | strings. */ | - |
43 | | - |
44 | | - |
45 | #ifdef PCRE_HAVE_CONFIG_H | - |
46 | #include "config.h" | - |
47 | #endif | - |
48 | | - |
49 | /* Generate code with 16 bit character support. */ | - |
50 | #define COMPILE_PCRE16 | - |
51 | | - |
52 | #include "pcre_internal.h" | - |
53 | | - |
54 | | - |
55 | /************************************************* | - |
56 | * Validate a UTF-16 string * | - |
57 | *************************************************/ | - |
58 | | - |
59 | /* This function is called (optionally) at the start of compile or match, to | - |
60 | check that a supposed UTF-16 string is actually valid. The early check means | - |
61 | that subsequent code can assume it is dealing with a valid string. The check | - |
62 | can be turned off for maximum performance, but the consequences of supplying an | - |
63 | invalid string are then undefined. | - |
64 | | - |
65 | From release 8.21 more information about the details of the error are passed | - |
66 | back in the returned value: | - |
67 | | - |
68 | PCRE_UTF16_ERR0 No error | - |
69 | PCRE_UTF16_ERR1 Missing low surrogate at the end of the string | - |
70 | PCRE_UTF16_ERR2 Invalid low surrogate | - |
71 | PCRE_UTF16_ERR3 Isolated low surrogate | - |
72 | PCRE_UTF16_ERR4 Not allowed character | - |
73 | | - |
74 | Arguments: | - |
75 | string points to the string | - |
76 | length length of string, or -1 if the string is zero-terminated | - |
77 | errp pointer to an error position offset variable | - |
78 | | - |
79 | Returns: = 0 if the string is a valid UTF-16 string | - |
80 | > 0 otherwise, setting the offset of the bad character | - |
81 | */ | - |
82 | | - |
83 | int | - |
84 | PRIV(valid_utf)(PCRE_PUCHAR string, int length, int *erroroffset) | - |
85 | { | - |
86 | #ifdef SUPPORT_UTF | - |
87 | register PCRE_PUCHAR p; executed (the execution status of this line is deduced): register const pcre_uchar * p; | - |
88 | register pcre_uchar c; executed (the execution status of this line is deduced): register pcre_uchar c; | - |
89 | | - |
90 | if (length < 0) evaluated: length < 0 yes Evaluation Count:369 | yes Evaluation Count:333 |
| 333-369 |
91 | { | - |
92 | for (p = string; *p != 0; p++); executed: ; Execution Count:11293 evaluated: *p != 0 yes Evaluation Count:11293 | yes Evaluation Count:369 |
| 369-11293 |
93 | length = p - string; executed (the execution status of this line is deduced): length = p - string; | - |
94 | } executed: } Execution Count:369 | 369 |
95 | | - |
96 | for (p = string; length-- > 0; p++) evaluated: length-- > 0 yes Evaluation Count:18081 | yes Evaluation Count:701 |
| 701-18081 |
97 | { | - |
98 | c = *p; executed (the execution status of this line is deduced): c = *p; | - |
99 | | - |
100 | if ((c & 0xf800) != 0xd800) evaluated: (c & 0xf800) != 0xd800 yes Evaluation Count:18073 | yes Evaluation Count:8 |
| 8-18073 |
101 | { | - |
102 | /* Normal UTF-16 code point. Neither high nor low surrogate. */ | - |
103 | | - |
104 | /* This is probably a BOM from a different byte-order. | - |
105 | Regardless, the string is rejected. */ | - |
106 | if (c == 0xfffe) partially evaluated: c == 0xfffe no Evaluation Count:0 | yes Evaluation Count:18073 |
| 0-18073 |
107 | { | - |
108 | *erroroffset = p - string; never executed (the execution status of this line is deduced): *erroroffset = p - string; | - |
109 | return PCRE_UTF16_ERR4; never executed: return 4; | 0 |
110 | } | - |
111 | } executed: } Execution Count:18073 | 18073 |
112 | else if ((c & 0x0400) == 0) partially evaluated: (c & 0x0400) == 0 yes Evaluation Count:8 | no Evaluation Count:0 |
| 0-8 |
113 | { | - |
114 | /* High surrogate. */ | - |
115 | | - |
116 | /* Must be a followed by a low surrogate. */ | - |
117 | if (length == 0) partially evaluated: length == 0 no Evaluation Count:0 | yes Evaluation Count:8 |
| 0-8 |
118 | { | - |
119 | *erroroffset = p - string; never executed (the execution status of this line is deduced): *erroroffset = p - string; | - |
120 | return PCRE_UTF16_ERR1; never executed: return 1; | 0 |
121 | } | - |
122 | p++; executed (the execution status of this line is deduced): p++; | - |
123 | length--; executed (the execution status of this line is deduced): length--; | - |
124 | if ((*p & 0xfc00) != 0xdc00) evaluated: (*p & 0xfc00) != 0xdc00 yes Evaluation Count:1 | yes Evaluation Count:7 |
| 1-7 |
125 | { | - |
126 | *erroroffset = p - string; executed (the execution status of this line is deduced): *erroroffset = p - string; | - |
127 | return PCRE_UTF16_ERR2; executed: return 2; Execution Count:1 | 1 |
128 | } | - |
129 | } executed: } Execution Count:7 | 7 |
130 | else | - |
131 | { | - |
132 | /* Isolated low surrogate. Always an error. */ | - |
133 | *erroroffset = p - string; never executed (the execution status of this line is deduced): *erroroffset = p - string; | - |
134 | return PCRE_UTF16_ERR3; never executed: return 3; | 0 |
135 | } | - |
136 | } | - |
137 | | - |
138 | #else /* SUPPORT_UTF */ | - |
139 | (void)(string); /* Keep picky compilers happy */ | - |
140 | (void)(length); | - |
141 | #endif /* SUPPORT_UTF */ | - |
142 | | - |
143 | return PCRE_UTF16_ERR0; /* This indicates success */ executed: return 0; Execution Count:701 | 701 |
144 | } | - |
145 | | - |
146 | /* End of pcre16_valid_utf16.c */ | - |
147 | | - |
| | |