compat/GnuRegex.h
Go to the documentation of this file.00001 /* 00002 * $Id$ 00003 */ 00004 #ifndef SQUID_CONFIG_H 00005 #include "config.h" 00006 #endif 00007 00008 #ifndef SQUID_REGEXP_LIBRARY_H 00009 #define SQUID_REGEXP_LIBRARY_H 00010 00011 #if !USE_GNUREGEX /* try the system one by default */ 00012 00013 /* POSIX says that <sys/types.h> must be included (by the caller) before 00014 * <regex.h>. */ 00015 #if HAVE_SYS_TYPES_H 00016 #include <sys/types.h> 00017 #endif 00018 #if HAVE_REGEX_H 00019 #include <regex.h> 00020 #endif 00021 00022 00023 #else /* USE_GNUREGEX */ 00024 00025 #ifdef __cplusplus 00026 extern "C" { 00027 #endif 00028 00029 /* Definitions for data structures and routines for the regular 00030 * expression library, version 0.12. 00031 * 00032 * Copyright (C) 1985, 1989, 1990, 1991, 1992, 1993 Free Software Foundation, Inc. 00033 * 00034 * This program is free software; you can redistribute it and/or modify 00035 * it under the terms of the GNU General Public License as published by 00036 * the Free Software Foundation; either version 2, or (at your option) 00037 * any later version. 00038 * 00039 * This program is distributed in the hope that it will be useful, 00040 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00041 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00042 * GNU General Public License for more details. 00043 * 00044 * You should have received a copy of the GNU General Public License 00045 * along with this program; if not, write to the Free Software 00046 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA. */ 00047 00048 /* POSIX says that <sys/types.h> must be included (by the caller) before 00049 * <regex.h>. */ 00050 00051 /* The following bits are used to determine the regexp syntax we 00052 * recognize. The set/not-set meanings are chosen so that Emacs syntax 00053 * remains the value 0. The bits are given in alphabetical order, and 00054 * the definitions shifted by one from the previous bit; thus, when we 00055 * add or remove a bit, only one other definition need change. */ 00056 typedef unsigned reg_syntax_t; 00057 00058 /* If this bit is not set, then \ inside a bracket expression is literal. 00059 * If set, then such a \ quotes the following character. */ 00060 #define RE_BACKSLASH_ESCAPE_IN_LISTS (1) 00061 00062 /* If this bit is not set, then + and ? are operators, and \+ and \? are 00063 * literals. 00064 * If set, then \+ and \? are operators and + and ? are literals. */ 00065 #define RE_BK_PLUS_QM (RE_BACKSLASH_ESCAPE_IN_LISTS << 1) 00066 00067 /* If this bit is set, then character classes are supported. They are: 00068 * [:alpha:], [:upper:], [:lower:], [:digit:], [:alnum:], [:xdigit:], 00069 * [:space:], [:print:], [:punct:], [:graph:], and [:cntrl:]. 00070 * If not set, then character classes are not supported. */ 00071 #define RE_CHAR_CLASSES (RE_BK_PLUS_QM << 1) 00072 00073 /* If this bit is set, then ^ and $ are always anchors (outside bracket 00074 * expressions, of course). 00075 * If this bit is not set, then it depends: 00076 * ^ is an anchor if it is at the beginning of a regular 00077 * expression or after an open-group or an alternation operator; 00078 * $ is an anchor if it is at the end of a regular expression, or 00079 * before a close-group or an alternation operator. 00080 * 00081 * This bit could be (re)combined with RE_CONTEXT_INDEP_OPS, because 00082 * POSIX draft 11.2 says that * etc. in leading positions is undefined. 00083 * We already implemented a previous draft which made those constructs 00084 * invalid, though, so we haven't changed the code back. */ 00085 #define RE_CONTEXT_INDEP_ANCHORS (RE_CHAR_CLASSES << 1) 00086 00087 /* If this bit is set, then special characters are always special 00088 * regardless of where they are in the pattern. 00089 * If this bit is not set, then special characters are special only in 00090 * some contexts; otherwise they are ordinary. Specifically, 00091 * * + ? and intervals are only special when not after the beginning, 00092 * open-group, or alternation operator. */ 00093 #define RE_CONTEXT_INDEP_OPS (RE_CONTEXT_INDEP_ANCHORS << 1) 00094 00095 /* If this bit is set, then *, +, ?, and { cannot be first in an re or 00096 * immediately after an alternation or begin-group operator. */ 00097 #define RE_CONTEXT_INVALID_OPS (RE_CONTEXT_INDEP_OPS << 1) 00098 00099 /* If this bit is set, then . matches newline. 00100 * If not set, then it doesn't. */ 00101 #define RE_DOT_NEWLINE (RE_CONTEXT_INVALID_OPS << 1) 00102 00103 /* If this bit is set, then . doesn't match NUL. 00104 * If not set, then it does. */ 00105 #define RE_DOT_NOT_NULL (RE_DOT_NEWLINE << 1) 00106 00107 /* If this bit is set, nonmatching lists [^...] do not match newline. 00108 * If not set, they do. */ 00109 #define RE_HAT_LISTS_NOT_NEWLINE (RE_DOT_NOT_NULL << 1) 00110 00111 /* If this bit is set, either \{...\} or {...} defines an 00112 * interval, depending on RE_NO_BK_BRACES. 00113 * If not set, \{, \}, {, and } are literals. */ 00114 #define RE_INTERVALS (RE_HAT_LISTS_NOT_NEWLINE << 1) 00115 00116 /* If this bit is set, +, ? and | aren't recognized as operators. 00117 * If not set, they are. */ 00118 #define RE_LIMITED_OPS (RE_INTERVALS << 1) 00119 00120 /* If this bit is set, newline is an alternation operator. 00121 * If not set, newline is literal. */ 00122 #define RE_NEWLINE_ALT (RE_LIMITED_OPS << 1) 00123 00124 /* If this bit is set, then `{...}' defines an interval, and \{ and \} 00125 * are literals. 00126 * If not set, then `\{...\}' defines an interval. */ 00127 #define RE_NO_BK_BRACES (RE_NEWLINE_ALT << 1) 00128 00129 /* If this bit is set, (...) defines a group, and \( and \) are literals. 00130 * If not set, \(...\) defines a group, and ( and ) are literals. */ 00131 #define RE_NO_BK_PARENS (RE_NO_BK_BRACES << 1) 00132 00133 /* If this bit is set, then <digit> matches <digit>. 00134 * If not set, then <digit> is a back-reference. */ 00135 #define RE_NO_BK_REFS (RE_NO_BK_PARENS << 1) 00136 00137 /* If this bit is set, then | is an alternation operator, and \| is literal. 00138 * If not set, then \| is an alternation operator, and | is literal. */ 00139 #define RE_NO_BK_VBAR (RE_NO_BK_REFS << 1) 00140 00141 /* If this bit is set, then an ending range point collating higher 00142 * than the starting range point, as in [z-a], is invalid. 00143 * If not set, then when ending range point collates higher than the 00144 * starting range point, the range is ignored. */ 00145 #define RE_NO_EMPTY_RANGES (RE_NO_BK_VBAR << 1) 00146 00147 /* If this bit is set, then an unmatched ) is ordinary. 00148 * If not set, then an unmatched ) is invalid. */ 00149 #define RE_UNMATCHED_RIGHT_PAREN_ORD (RE_NO_EMPTY_RANGES << 1) 00150 00151 00152 /* Define combinations of the above bits for the standard possibilities. 00153 * (The [[[ comments delimit what gets put into the Texinfo file, so 00154 * don't delete them!) */ 00155 /* [[[begin syntaxes]]] */ 00156 #define RE_SYNTAX_EMACS 0 00157 00158 #define RE_SYNTAX_AWK \ 00159 (RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DOT_NOT_NULL \ 00160 | RE_NO_BK_PARENS | RE_NO_BK_REFS \ 00161 | RE_NO_BK_VBAR | RE_NO_EMPTY_RANGES \ 00162 | RE_UNMATCHED_RIGHT_PAREN_ORD) 00163 00164 #define RE_SYNTAX_POSIX_AWK \ 00165 (RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS) 00166 00167 #define RE_SYNTAX_GREP \ 00168 (RE_BK_PLUS_QM | RE_CHAR_CLASSES \ 00169 | RE_HAT_LISTS_NOT_NEWLINE | RE_INTERVALS \ 00170 | RE_NEWLINE_ALT) 00171 00172 #define RE_SYNTAX_EGREP \ 00173 (RE_CHAR_CLASSES | RE_CONTEXT_INDEP_ANCHORS \ 00174 | RE_CONTEXT_INDEP_OPS | RE_HAT_LISTS_NOT_NEWLINE \ 00175 | RE_NEWLINE_ALT | RE_NO_BK_PARENS \ 00176 | RE_NO_BK_VBAR) 00177 00178 #define RE_SYNTAX_POSIX_EGREP \ 00179 (RE_SYNTAX_EGREP | RE_INTERVALS | RE_NO_BK_BRACES) 00180 00181 /* P1003.2/D11.2, section 4.20.7.1, lines 5078ff. */ 00182 #define RE_SYNTAX_ED RE_SYNTAX_POSIX_BASIC 00183 00184 #define RE_SYNTAX_SED RE_SYNTAX_POSIX_BASIC 00185 00186 /* Syntax bits common to both basic and extended POSIX regex syntax. */ 00187 #define _RE_SYNTAX_POSIX_COMMON \ 00188 (RE_CHAR_CLASSES | RE_DOT_NEWLINE | RE_DOT_NOT_NULL \ 00189 | RE_INTERVALS | RE_NO_EMPTY_RANGES) 00190 00191 #define RE_SYNTAX_POSIX_BASIC \ 00192 (_RE_SYNTAX_POSIX_COMMON | RE_BK_PLUS_QM) 00193 00194 /* Differs from ..._POSIX_BASIC only in that RE_BK_PLUS_QM becomes 00195 * RE_LIMITED_OPS, i.e., \? \+ \| are not recognized. Actually, this 00196 * isn't minimal, since other operators, such as \`, aren't disabled. */ 00197 #define RE_SYNTAX_POSIX_MINIMAL_BASIC \ 00198 (_RE_SYNTAX_POSIX_COMMON | RE_LIMITED_OPS) 00199 00200 #define RE_SYNTAX_POSIX_EXTENDED \ 00201 (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \ 00202 | RE_CONTEXT_INDEP_OPS | RE_NO_BK_BRACES \ 00203 | RE_NO_BK_PARENS | RE_NO_BK_VBAR \ 00204 | RE_UNMATCHED_RIGHT_PAREN_ORD) 00205 00206 /* Differs from ..._POSIX_EXTENDED in that RE_CONTEXT_INVALID_OPS 00207 * replaces RE_CONTEXT_INDEP_OPS and RE_NO_BK_REFS is added. */ 00208 #define RE_SYNTAX_POSIX_MINIMAL_EXTENDED \ 00209 (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \ 00210 | RE_CONTEXT_INVALID_OPS | RE_NO_BK_BRACES \ 00211 | RE_NO_BK_PARENS | RE_NO_BK_REFS \ 00212 | RE_NO_BK_VBAR | RE_UNMATCHED_RIGHT_PAREN_ORD) 00213 /* [[[end syntaxes]]] */ 00214 00215 /* Maximum number of duplicates an interval can allow. Some systems 00216 * (erroneously) define this in other header files, but we want our 00217 * value, so remove any previous define. */ 00218 #ifdef RE_DUP_MAX 00219 #undef RE_DUP_MAX 00220 #endif 00221 #define RE_DUP_MAX ((1 << 15) - 1) 00222 00223 00224 /* POSIX `cflags' bits (i.e., information for `regcomp'). */ 00225 00226 /* If this bit is set, then use extended regular expression syntax. 00227 * If not set, then use basic regular expression syntax. */ 00228 #define REG_EXTENDED 1 00229 00230 /* If this bit is set, then ignore case when matching. 00231 * If not set, then case is significant. */ 00232 #define REG_ICASE (REG_EXTENDED << 1) 00233 00234 /* If this bit is set, then anchors do not match at newline 00235 * characters in the string. 00236 * If not set, then anchors do match at newlines. */ 00237 #define REG_NEWLINE (REG_ICASE << 1) 00238 00239 /* If this bit is set, then report only success or fail in regexec. 00240 * If not set, then returns differ between not matching and errors. */ 00241 #define REG_NOSUB (REG_NEWLINE << 1) 00242 00243 00244 /* POSIX `eflags' bits (i.e., information for regexec). */ 00245 00246 /* If this bit is set, then the beginning-of-line operator doesn't match 00247 * the beginning of the string (presumably because it's not the 00248 * beginning of a line). 00249 * If not set, then the beginning-of-line operator does match the 00250 * beginning of the string. */ 00251 #define REG_NOTBOL 1 00252 00253 /* Like REG_NOTBOL, except for the end-of-line. */ 00254 #define REG_NOTEOL (1 << 1) 00255 00256 00257 /* If any error codes are removed, changed, or added, update the 00258 * `re_error_msg' table in regex.c. */ 00259 typedef enum { 00260 REG_NOERROR = 0, /* Success. */ 00261 REG_NOMATCH, /* Didn't find a match (for regexec). */ 00262 00263 /* POSIX regcomp return error codes. (In the order listed in the 00264 * standard.) */ 00265 REG_BADPAT, /* Invalid pattern. */ 00266 REG_ECOLLATE, /* Not implemented. */ 00267 REG_ECTYPE, /* Invalid character class name. */ 00268 REG_EESCAPE, /* Trailing backslash. */ 00269 REG_ESUBREG, /* Invalid back reference. */ 00270 REG_EBRACK, /* Unmatched left bracket. */ 00271 REG_EPAREN, /* Parenthesis imbalance. */ 00272 REG_EBRACE, /* Unmatched \{. */ 00273 REG_BADBR, /* Invalid contents of \{\}. */ 00274 REG_ERANGE, /* Invalid range end. */ 00275 REG_ESPACE, /* Ran out of memory. */ 00276 REG_BADRPT, /* No preceding re for repetition op. */ 00277 00278 /* Error codes we've added. */ 00279 REG_EEND, /* Premature end. */ 00280 REG_ESIZE, /* Compiled pattern bigger than 2^16 bytes. */ 00281 REG_ERPAREN /* Unmatched ) or \); not returned from regcomp. */ 00282 } reg_errcode_t; 00283 00284 /* This data structure represents a compiled pattern. Before calling 00285 * the pattern compiler, the fields `buffer', `allocated', `fastmap', 00286 * `translate', and `no_sub' can be set. After the pattern has been 00287 * compiled, the `re_nsub' field is available. All other fields are 00288 * private to the regex routines. */ 00289 00290 struct re_pattern_buffer { 00291 /* [[[begin pattern_buffer]]] */ 00292 /* Space that holds the compiled pattern. It is declared as 00293 * `unsigned char *' because its elements are 00294 * sometimes used as array indexes. */ 00295 unsigned char *buffer; 00296 00297 /* Number of bytes to which `buffer' points. */ 00298 unsigned long allocated; 00299 00300 /* Number of bytes actually used in `buffer'. */ 00301 unsigned long used; 00302 00303 /* Syntax setting with which the pattern was compiled. */ 00304 reg_syntax_t syntax; 00305 00306 /* Pointer to a fastmap, if any, otherwise zero. re_search uses 00307 * the fastmap, if there is one, to skip over impossible 00308 * starting points for matches. */ 00309 char *fastmap; 00310 00311 /* Either a translate table to apply to all characters before 00312 * comparing them, or zero for no translation. The translation 00313 * is applied to a pattern when it is compiled and to a string 00314 * when it is matched. */ 00315 char *translate; 00316 00317 /* Number of subexpressions found by the compiler. */ 00318 size_t re_nsub; 00319 00320 /* Zero if this pattern cannot match the empty string, one else. 00321 * Well, in truth it's used only in `re_search_2', to see 00322 * whether or not we should use the fastmap, so we don't set 00323 * this absolutely perfectly; see `re_compile_fastmap' (the 00324 * `duplicate' case). */ 00325 unsigned can_be_null:1; 00326 00327 /* If REGS_UNALLOCATED, allocate space in the `regs' structure 00328 * for `max (RE_NREGS, re_nsub + 1)' groups. 00329 * If REGS_REALLOCATE, reallocate space if necessary. 00330 * If REGS_FIXED, use what's there. */ 00331 #define REGS_UNALLOCATED 0 00332 #define REGS_REALLOCATE 1 00333 #define REGS_FIXED 2 00334 unsigned regs_allocated:2; 00335 00336 /* Set to zero when `regex_compile' compiles a pattern; set to one 00337 * by `re_compile_fastmap' if it updates the fastmap. */ 00338 unsigned fastmap_accurate:1; 00339 00340 /* If set, `re_match_2' does not return information about 00341 * subexpressions. */ 00342 unsigned no_sub:1; 00343 00344 /* If set, a beginning-of-line anchor doesn't match at the 00345 * beginning of the string. */ 00346 unsigned not_bol:1; 00347 00348 /* Similarly for an end-of-line anchor. */ 00349 unsigned not_eol:1; 00350 00351 /* If true, an anchor at a newline matches. */ 00352 unsigned newline_anchor:1; 00353 00354 /* [[[end pattern_buffer]]] */ 00355 }; 00356 00357 typedef struct re_pattern_buffer regex_t; 00358 00359 00360 /* search.c (search_buffer) in Emacs needs this one opcode value. It is 00361 * defined both in `regex.c' and here. */ 00362 #define RE_EXACTN_VALUE 1 00363 00364 /* Type for byte offsets within the string. POSIX mandates this. */ 00365 typedef int regoff_t; 00366 00367 00368 /* This is the structure we store register match data in. See 00369 * regex.texinfo for a full description of what registers match. */ 00370 struct re_registers { 00371 unsigned num_regs; 00372 regoff_t *start; 00373 regoff_t *end; 00374 }; 00375 00376 00377 /* If `regs_allocated' is REGS_UNALLOCATED in the pattern buffer, 00378 * `re_match_2' returns information about at least this many registers 00379 * the first time a `regs' structure is passed. */ 00380 #ifndef RE_NREGS 00381 #define RE_NREGS 30 00382 #endif 00383 00384 00385 /* POSIX specification for registers. Aside from the different names than 00386 * `re_registers', POSIX uses an array of structures, instead of a 00387 * structure of arrays. */ 00388 typedef struct { 00389 regoff_t rm_so; /* Byte offset from string's start to substring's start. */ 00390 regoff_t rm_eo; /* Byte offset from string's start to substring's end. */ 00391 } regmatch_t; 00392 00393 /* Declarations for routines. */ 00394 00395 /* To avoid duplicating every routine declaration -- once with a 00396 * prototype (if we are ANSI), and once without (if we aren't) -- we 00397 * use the following macro to declare argument types. This 00398 * unfortunately clutters up the declarations a bit, but I think it's 00399 * worth it. */ 00400 00401 /* POSIX compatibility. */ 00402 extern int regcomp(regex_t * preg, const char *pattern, int cflags); 00403 extern int regexec(const regex_t * preg, const char *string, size_t nmatch, regmatch_t pmatch[], int eflags); 00404 extern size_t regerror(int errcode, const regex_t * preg, char *errbuf, size_t errbuf_size); 00405 extern void regfree(regex_t * preg); 00406 00407 #ifdef __cplusplus 00408 } 00409 #endif 00410 00411 #endif /* USE_GNUREGEX */ 00412 #endif /* SQUID_REGEXP_LIBRARY_H */ 00413 00414 /* 00415 * Local variables: 00416 * make-backup-files: t 00417 * version-control: t 00418 * trim-versions-without-asking: nil 00419 * End: 00420 */
Search
Introduction
- About Squid
- Why Squid?
- Squid Developers
- How to Help Out
- Getting Squid
- Donate
- Squid Deployment Case-Studies
Documentation
- FAQ | Wiki | Book
- Configuration Reference
- Configuration Guide - Visolve
- Configuration Examples
- Users guide
- Non-English
- Security Advisories
- More...
Support
- Bugzilla Database
- Mailing lists
- Contacting us
- Commercial services
- Project Sponsors
- Squid-based products
