00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022 void gkfooo() { return; }
00023
00024 #ifdef USE_GKREGEX
00025
00026 #ifdef HAVE_CONFIG_H
00027 #include "config.h"
00028 #endif
00029
00030 #ifdef _LIBC
00031
00032 # define regfree(preg) __regfree (preg)
00033 # define regexec(pr, st, nm, pm, ef) __regexec (pr, st, nm, pm, ef)
00034 # define regcomp(preg, pattern, cflags) __regcomp (preg, pattern, cflags)
00035 # define regerror(errcode, preg, errbuf, errbuf_size) \
00036 __regerror(errcode, preg, errbuf, errbuf_size)
00037 # define re_set_registers(bu, re, nu, st, en) \
00038 __re_set_registers (bu, re, nu, st, en)
00039 # define re_match_2(bufp, string1, size1, string2, size2, pos, regs, stop) \
00040 __re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
00041 # define re_match(bufp, string, size, pos, regs) \
00042 __re_match (bufp, string, size, pos, regs)
00043 # define re_search(bufp, string, size, startpos, range, regs) \
00044 __re_search (bufp, string, size, startpos, range, regs)
00045 # define re_compile_pattern(pattern, length, bufp) \
00046 __re_compile_pattern (pattern, length, bufp)
00047 # define re_set_syntax(syntax) __re_set_syntax (syntax)
00048 # define re_search_2(bufp, st1, s1, st2, s2, startpos, range, regs, stop) \
00049 __re_search_2 (bufp, st1, s1, st2, s2, startpos, range, regs, stop)
00050 # define re_compile_fastmap(bufp) __re_compile_fastmap (bufp)
00051
00052 # include "../locale/localeinfo.h"
00053 #endif
00054
00055 #include "GKlib.h"
00056
00057
00058
00059
00060
00061
00062
00063
00064
00065
00066
00067
00068
00069
00070
00071
00072
00073
00074
00075
00076
00077
00078
00079
00080
00081
00082
00083
00084
00085 #ifndef _REGEX_INTERNAL_H
00086 #define _REGEX_INTERNAL_H 1
00087
00088 #include <assert.h>
00089 #include <ctype.h>
00090 #include <stdio.h>
00091 #include <stdlib.h>
00092 #include <string.h>
00093
00094 #if defined(__MINGW32_VERSION) || defined(_MSC_VER)
00095 #define strcasecmp stricmp
00096 #endif
00097
00098 #if defined HAVE_LANGINFO_H || defined HAVE_LANGINFO_CODESET || defined _LIBC
00099 # include <langinfo.h>
00100 #endif
00101 #if defined HAVE_LOCALE_H || defined _LIBC
00102 # include <locale.h>
00103 #endif
00104 #if defined HAVE_WCHAR_H || defined _LIBC
00105 # include <wchar.h>
00106 #endif
00107 #if defined HAVE_WCTYPE_H || defined _LIBC
00108 # include <wctype.h>
00109 #endif
00110 #if defined HAVE_STDBOOL_H || defined _LIBC
00111 # include <stdbool.h>
00112 #else
00113 typedef enum { false, true } bool;
00114 #endif
00115 #if defined HAVE_STDINT_H || defined _LIBC
00116 # include <stdint.h>
00117 #endif
00118 #if defined _LIBC
00119 # include <bits/libc-lock.h>
00120 #else
00121 # define __libc_lock_define(CLASS,NAME)
00122 # define __libc_lock_init(NAME) do { } while (0)
00123 # define __libc_lock_lock(NAME) do { } while (0)
00124 # define __libc_lock_unlock(NAME) do { } while (0)
00125 #endif
00126
00127
00128 #if !defined _LIBC && !defined HAVE_ISBLANK && !defined isblank
00129 # define isblank(ch) ((ch) == ' ' || (ch) == '\t')
00130 #endif
00131
00132 #ifdef _LIBC
00133 # ifndef _RE_DEFINE_LOCALE_FUNCTIONS
00134 # define _RE_DEFINE_LOCALE_FUNCTIONS 1
00135 # include <locale/localeinfo.h>
00136 # include <locale/elem-hash.h>
00137 # include <locale/coll-lookup.h>
00138 # endif
00139 #endif
00140
00141
00142 #if (HAVE_LIBINTL_H && ENABLE_NLS) || defined _LIBC
00143 # include <libintl.h>
00144 # ifdef _LIBC
00145 # undef gettext
00146 # define gettext(msgid) \
00147 INTUSE(__dcgettext) (_libc_intl_domainname, msgid, LC_MESSAGES)
00148 # endif
00149 #else
00150 # define gettext(msgid) (msgid)
00151 #endif
00152
00153 #ifndef gettext_noop
00154
00155
00156 # define gettext_noop(String) String
00157 #endif
00158
00159
00160 #ifndef SIZE_MAX
00161 # define SIZE_MAX ((size_t) -1)
00162 #endif
00163
00164 #if (defined MB_CUR_MAX && HAVE_LOCALE_H && HAVE_WCTYPE_H && HAVE_WCHAR_H && HAVE_WCRTOMB && HAVE_MBRTOWC && HAVE_WCSCOLL) || _LIBC
00165 # define RE_ENABLE_I18N
00166 #endif
00167
00168 #if __GNUC__ >= 3
00169 # define BE(expr, val) __builtin_expect (expr, val)
00170 #else
00171 # define BE(expr, val) (expr)
00172 # define inline
00173 #endif
00174
00175
00176 #define SBC_MAX 256
00177
00178 #define COLL_ELEM_LEN_MAX 8
00179
00180
00181 #define NEWLINE_CHAR '\n'
00182 #define WIDE_NEWLINE_CHAR L'\n'
00183
00184
00185 #ifndef _LIBC
00186 # define __wctype wctype
00187 # define __iswctype iswctype
00188 # define __btowc btowc
00189 # define __mempcpy mempcpy
00190 # define __wcrtomb wcrtomb
00191 # define __regfree regfree
00192 # define attribute_hidden
00193 #endif
00194
00195 #ifdef __GNUC__
00196 # define __attribute(arg) __attribute__ (arg)
00197 #else
00198 # define __attribute(arg)
00199 #endif
00200
00201 extern const char __re_error_msgid[] attribute_hidden;
00202 extern const size_t __re_error_msgid_idx[] attribute_hidden;
00203
00204
00205
00206 typedef unsigned long int bitset_word_t;
00207
00208 #define BITSET_WORD_MAX ULONG_MAX
00209
00210 #define BITSET_WORD_BITS (sizeof (bitset_word_t) * CHAR_BIT)
00211
00212 #define BITSET_WORDS (SBC_MAX / BITSET_WORD_BITS)
00213 typedef bitset_word_t bitset_t[BITSET_WORDS];
00214 typedef bitset_word_t *re_bitset_ptr_t;
00215 typedef const bitset_word_t *re_const_bitset_ptr_t;
00216
00217 #define bitset_set(set,i) \
00218 (set[i / BITSET_WORD_BITS] |= (bitset_word_t) 1 << i % BITSET_WORD_BITS)
00219 #define bitset_clear(set,i) \
00220 (set[i / BITSET_WORD_BITS] &= ~((bitset_word_t) 1 << i % BITSET_WORD_BITS))
00221 #define bitset_contain(set,i) \
00222 (set[i / BITSET_WORD_BITS] & ((bitset_word_t) 1 << i % BITSET_WORD_BITS))
00223 #define bitset_empty(set) memset (set, '\0', sizeof (bitset_t))
00224 #define bitset_set_all(set) memset (set, '\xff', sizeof (bitset_t))
00225 #define bitset_copy(dest,src) memcpy (dest, src, sizeof (bitset_t))
00226
00227 #define PREV_WORD_CONSTRAINT 0x0001
00228 #define PREV_NOTWORD_CONSTRAINT 0x0002
00229 #define NEXT_WORD_CONSTRAINT 0x0004
00230 #define NEXT_NOTWORD_CONSTRAINT 0x0008
00231 #define PREV_NEWLINE_CONSTRAINT 0x0010
00232 #define NEXT_NEWLINE_CONSTRAINT 0x0020
00233 #define PREV_BEGBUF_CONSTRAINT 0x0040
00234 #define NEXT_ENDBUF_CONSTRAINT 0x0080
00235 #define WORD_DELIM_CONSTRAINT 0x0100
00236 #define NOT_WORD_DELIM_CONSTRAINT 0x0200
00237
00238 typedef enum
00239 {
00240 INSIDE_WORD = PREV_WORD_CONSTRAINT | NEXT_WORD_CONSTRAINT,
00241 WORD_FIRST = PREV_NOTWORD_CONSTRAINT | NEXT_WORD_CONSTRAINT,
00242 WORD_LAST = PREV_WORD_CONSTRAINT | NEXT_NOTWORD_CONSTRAINT,
00243 INSIDE_NOTWORD = PREV_NOTWORD_CONSTRAINT | NEXT_NOTWORD_CONSTRAINT,
00244 LINE_FIRST = PREV_NEWLINE_CONSTRAINT,
00245 LINE_LAST = NEXT_NEWLINE_CONSTRAINT,
00246 BUF_FIRST = PREV_BEGBUF_CONSTRAINT,
00247 BUF_LAST = NEXT_ENDBUF_CONSTRAINT,
00248 WORD_DELIM = WORD_DELIM_CONSTRAINT,
00249 NOT_WORD_DELIM = NOT_WORD_DELIM_CONSTRAINT
00250 } re_context_type;
00251
00252 typedef struct
00253 {
00254 int alloc;
00255 int nelem;
00256 int *elems;
00257 } re_node_set;
00258
00259 typedef enum
00260 {
00261 NON_TYPE = 0,
00262
00263
00264 CHARACTER = 1,
00265 END_OF_RE = 2,
00266 SIMPLE_BRACKET = 3,
00267 OP_BACK_REF = 4,
00268 OP_PERIOD = 5,
00269 #ifdef RE_ENABLE_I18N
00270 COMPLEX_BRACKET = 6,
00271 OP_UTF8_PERIOD = 7,
00272 #endif
00273
00274
00275
00276 #define EPSILON_BIT 8
00277 OP_OPEN_SUBEXP = EPSILON_BIT | 0,
00278 OP_CLOSE_SUBEXP = EPSILON_BIT | 1,
00279 OP_ALT = EPSILON_BIT | 2,
00280 OP_DUP_ASTERISK = EPSILON_BIT | 3,
00281 ANCHOR = EPSILON_BIT | 4,
00282
00283
00284 CONCAT = 16,
00285 SUBEXP = 17,
00286
00287
00288 OP_DUP_PLUS = 18,
00289 OP_DUP_QUESTION,
00290 OP_OPEN_BRACKET,
00291 OP_CLOSE_BRACKET,
00292 OP_CHARSET_RANGE,
00293 OP_OPEN_DUP_NUM,
00294 OP_CLOSE_DUP_NUM,
00295 OP_NON_MATCH_LIST,
00296 OP_OPEN_COLL_ELEM,
00297 OP_CLOSE_COLL_ELEM,
00298 OP_OPEN_EQUIV_CLASS,
00299 OP_CLOSE_EQUIV_CLASS,
00300 OP_OPEN_CHAR_CLASS,
00301 OP_CLOSE_CHAR_CLASS,
00302 OP_WORD,
00303 OP_NOTWORD,
00304 OP_SPACE,
00305 OP_NOTSPACE,
00306 BACK_SLASH
00307
00308 } re_token_type_t;
00309
00310 #ifdef RE_ENABLE_I18N
00311 typedef struct
00312 {
00313
00314 wchar_t *mbchars;
00315
00316
00317 # ifdef _LIBC
00318 int32_t *coll_syms;
00319 # endif
00320
00321
00322 # ifdef _LIBC
00323 int32_t *equiv_classes;
00324 # endif
00325
00326
00327 # ifdef _LIBC
00328 uint32_t *range_starts;
00329 uint32_t *range_ends;
00330 # else
00331 wchar_t *range_starts;
00332 wchar_t *range_ends;
00333 # endif
00334
00335
00336 wctype_t *char_classes;
00337
00338
00339 unsigned int non_match : 1;
00340
00341
00342 int nmbchars;
00343
00344
00345 int ncoll_syms;
00346
00347
00348 int nequiv_classes;
00349
00350
00351 int nranges;
00352
00353
00354 int nchar_classes;
00355 } re_charset_t;
00356 #endif
00357
00358 typedef struct
00359 {
00360 union
00361 {
00362 unsigned char c;
00363 re_bitset_ptr_t sbcset;
00364 #ifdef RE_ENABLE_I18N
00365 re_charset_t *mbcset;
00366 #endif
00367 int idx;
00368 re_context_type ctx_type;
00369 } opr;
00370 #if __GNUC__ >= 2
00371 re_token_type_t type : 8;
00372 #else
00373 re_token_type_t type;
00374 #endif
00375 unsigned int constraint : 10;
00376 unsigned int duplicated : 1;
00377 unsigned int opt_subexp : 1;
00378 #ifdef RE_ENABLE_I18N
00379 unsigned int accept_mb : 1;
00380
00381
00382 unsigned int mb_partial : 1;
00383 #endif
00384 unsigned int word_char : 1;
00385 } re_token_t;
00386
00387 #define IS_EPSILON_NODE(type) ((type) & EPSILON_BIT)
00388
00389 struct re_string_t
00390 {
00391
00392
00393 const unsigned char *raw_mbs;
00394
00395
00396
00397 unsigned char *mbs;
00398 #ifdef RE_ENABLE_I18N
00399
00400 wint_t *wcs;
00401 int *offsets;
00402 mbstate_t cur_state;
00403 #endif
00404
00405
00406 int raw_mbs_idx;
00407
00408 int valid_len;
00409
00410 int valid_raw_len;
00411
00412 int bufs_len;
00413
00414 int cur_idx;
00415
00416 int raw_len;
00417
00418 int len;
00419
00420
00421
00422 int raw_stop;
00423
00424 int stop;
00425
00426
00427
00428
00429 unsigned int tip_context;
00430
00431 RE_TRANSLATE_TYPE trans;
00432
00433 re_const_bitset_ptr_t word_char;
00434
00435 unsigned char icase;
00436 unsigned char is_utf8;
00437 unsigned char map_notascii;
00438 unsigned char mbs_allocated;
00439 unsigned char offsets_needed;
00440 unsigned char newline_anchor;
00441 unsigned char word_ops_used;
00442 int mb_cur_max;
00443 };
00444 typedef struct re_string_t re_string_t;
00445
00446
00447 struct re_dfa_t;
00448 typedef struct re_dfa_t re_dfa_t;
00449
00450 #ifndef _LIBC
00451 # ifdef __i386__
00452 # define internal_function __attribute ((regparm (3), stdcall))
00453 # else
00454 # define internal_function
00455 # endif
00456 #endif
00457
00458 static reg_errcode_t re_string_realloc_buffers (re_string_t *pstr,
00459 int new_buf_len)
00460 internal_function;
00461 #ifdef RE_ENABLE_I18N
00462 static void build_wcs_buffer (re_string_t *pstr) internal_function;
00463 static int build_wcs_upper_buffer (re_string_t *pstr) internal_function;
00464 #endif
00465 static void build_upper_buffer (re_string_t *pstr) internal_function;
00466 static void re_string_translate_buffer (re_string_t *pstr) internal_function;
00467 static unsigned int re_string_context_at (const re_string_t *input, int idx,
00468 int eflags)
00469 internal_function __attribute ((pure));
00470 #define re_string_peek_byte(pstr, offset) \
00471 ((pstr)->mbs[(pstr)->cur_idx + offset])
00472 #define re_string_fetch_byte(pstr) \
00473 ((pstr)->mbs[(pstr)->cur_idx++])
00474 #define re_string_first_byte(pstr, idx) \
00475 ((idx) == (pstr)->valid_len || (pstr)->wcs[idx] != WEOF)
00476 #define re_string_is_single_byte_char(pstr, idx) \
00477 ((pstr)->wcs[idx] != WEOF && ((pstr)->valid_len == (idx) + 1 \
00478 || (pstr)->wcs[(idx) + 1] != WEOF))
00479 #define re_string_eoi(pstr) ((pstr)->stop <= (pstr)->cur_idx)
00480 #define re_string_cur_idx(pstr) ((pstr)->cur_idx)
00481 #define re_string_get_buffer(pstr) ((pstr)->mbs)
00482 #define re_string_length(pstr) ((pstr)->len)
00483 #define re_string_byte_at(pstr,idx) ((pstr)->mbs[idx])
00484 #define re_string_skip_bytes(pstr,idx) ((pstr)->cur_idx += (idx))
00485 #define re_string_set_index(pstr,idx) ((pstr)->cur_idx = (idx))
00486
00487 #ifdef __GNUC__
00488 # define alloca(size) __builtin_alloca (size)
00489 # define HAVE_ALLOCA 1
00490 #elif defined(_MSC_VER)
00491 # include <malloc.h>
00492 # define alloca _alloca
00493 # define HAVE_ALLOCA 1
00494 #else
00495 # error No alloca()
00496 #endif
00497
00498 #ifndef _LIBC
00499 # if HAVE_ALLOCA
00500
00501
00502
00503
00504 # define __libc_use_alloca(n) ((n) < 4032)
00505 # else
00506
00507 # define __libc_use_alloca(n) 0
00508 # endif
00509 #endif
00510
00511 #define re_malloc(t,n) ((t *) malloc ((n) * sizeof (t)))
00512 #define re_realloc(p,t,n) ((t *) realloc (p, (n) * sizeof (t)))
00513 #define re_free(p) free (p)
00514
00515 struct bin_tree_t
00516 {
00517 struct bin_tree_t *parent;
00518 struct bin_tree_t *left;
00519 struct bin_tree_t *right;
00520 struct bin_tree_t *first;
00521 struct bin_tree_t *next;
00522
00523 re_token_t token;
00524
00525
00526
00527 int node_idx;
00528 };
00529 typedef struct bin_tree_t bin_tree_t;
00530
00531 #define BIN_TREE_STORAGE_SIZE \
00532 ((1024 - sizeof (void *)) / sizeof (bin_tree_t))
00533
00534 struct bin_tree_storage_t
00535 {
00536 struct bin_tree_storage_t *next;
00537 bin_tree_t data[BIN_TREE_STORAGE_SIZE];
00538 };
00539 typedef struct bin_tree_storage_t bin_tree_storage_t;
00540
00541 #define CONTEXT_WORD 1
00542 #define CONTEXT_NEWLINE (CONTEXT_WORD << 1)
00543 #define CONTEXT_BEGBUF (CONTEXT_NEWLINE << 1)
00544 #define CONTEXT_ENDBUF (CONTEXT_BEGBUF << 1)
00545
00546 #define IS_WORD_CONTEXT(c) ((c) & CONTEXT_WORD)
00547 #define IS_NEWLINE_CONTEXT(c) ((c) & CONTEXT_NEWLINE)
00548 #define IS_BEGBUF_CONTEXT(c) ((c) & CONTEXT_BEGBUF)
00549 #define IS_ENDBUF_CONTEXT(c) ((c) & CONTEXT_ENDBUF)
00550 #define IS_ORDINARY_CONTEXT(c) ((c) == 0)
00551
00552 #define IS_WORD_CHAR(ch) (isalnum (ch) || (ch) == '_')
00553 #define IS_NEWLINE(ch) ((ch) == NEWLINE_CHAR)
00554 #define IS_WIDE_WORD_CHAR(ch) (iswalnum (ch) || (ch) == L'_')
00555 #define IS_WIDE_NEWLINE(ch) ((ch) == WIDE_NEWLINE_CHAR)
00556
00557 #define NOT_SATISFY_PREV_CONSTRAINT(constraint,context) \
00558 ((((constraint) & PREV_WORD_CONSTRAINT) && !IS_WORD_CONTEXT (context)) \
00559 || ((constraint & PREV_NOTWORD_CONSTRAINT) && IS_WORD_CONTEXT (context)) \
00560 || ((constraint & PREV_NEWLINE_CONSTRAINT) && !IS_NEWLINE_CONTEXT (context))\
00561 || ((constraint & PREV_BEGBUF_CONSTRAINT) && !IS_BEGBUF_CONTEXT (context)))
00562
00563 #define NOT_SATISFY_NEXT_CONSTRAINT(constraint,context) \
00564 ((((constraint) & NEXT_WORD_CONSTRAINT) && !IS_WORD_CONTEXT (context)) \
00565 || (((constraint) & NEXT_NOTWORD_CONSTRAINT) && IS_WORD_CONTEXT (context)) \
00566 || (((constraint) & NEXT_NEWLINE_CONSTRAINT) && !IS_NEWLINE_CONTEXT (context)) \
00567 || (((constraint) & NEXT_ENDBUF_CONSTRAINT) && !IS_ENDBUF_CONTEXT (context)))
00568
00569 struct re_dfastate_t
00570 {
00571 unsigned int hash;
00572 re_node_set nodes;
00573 re_node_set non_eps_nodes;
00574 re_node_set inveclosure;
00575 re_node_set *entrance_nodes;
00576 struct re_dfastate_t **trtable, **word_trtable;
00577 unsigned int context : 4;
00578 unsigned int halt : 1;
00579
00580
00581
00582 unsigned int accept_mb : 1;
00583
00584 unsigned int has_backref : 1;
00585 unsigned int has_constraint : 1;
00586 };
00587 typedef struct re_dfastate_t re_dfastate_t;
00588
00589 struct re_state_table_entry
00590 {
00591 int num;
00592 int alloc;
00593 re_dfastate_t **array;
00594 };
00595
00596
00597
00598 typedef struct
00599 {
00600 int next_idx;
00601 int alloc;
00602 re_dfastate_t **array;
00603 } state_array_t;
00604
00605
00606
00607 typedef struct
00608 {
00609 int node;
00610 int str_idx;
00611 state_array_t path;
00612 } re_sub_match_last_t;
00613
00614
00615
00616
00617
00618 typedef struct
00619 {
00620 int str_idx;
00621 int node;
00622 state_array_t *path;
00623 int alasts;
00624 int nlasts;
00625 re_sub_match_last_t **lasts;
00626 } re_sub_match_top_t;
00627
00628 struct re_backref_cache_entry
00629 {
00630 int node;
00631 int str_idx;
00632 int subexp_from;
00633 int subexp_to;
00634 char more;
00635 char unused;
00636 unsigned short int eps_reachable_subexps_map;
00637 };
00638
00639 typedef struct
00640 {
00641
00642 re_string_t input;
00643 #if defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L)
00644 const re_dfa_t *const dfa;
00645 #else
00646 const re_dfa_t *dfa;
00647 #endif
00648
00649 int eflags;
00650
00651 int match_last;
00652 int last_node;
00653
00654 re_dfastate_t **state_log;
00655 int state_log_top;
00656
00657 int nbkref_ents;
00658 int abkref_ents;
00659 struct re_backref_cache_entry *bkref_ents;
00660 int max_mb_elem_len;
00661 int nsub_tops;
00662 int asub_tops;
00663 re_sub_match_top_t **sub_tops;
00664 } re_match_context_t;
00665
00666 typedef struct
00667 {
00668 re_dfastate_t **sifted_states;
00669 re_dfastate_t **limited_states;
00670 int last_node;
00671 int last_str_idx;
00672 re_node_set limits;
00673 } re_sift_context_t;
00674
00675 struct re_fail_stack_ent_t
00676 {
00677 int idx;
00678 int node;
00679 regmatch_t *regs;
00680 re_node_set eps_via_nodes;
00681 };
00682
00683 struct re_fail_stack_t
00684 {
00685 int num;
00686 int alloc;
00687 struct re_fail_stack_ent_t *stack;
00688 };
00689
00690 struct re_dfa_t
00691 {
00692 re_token_t *nodes;
00693 size_t nodes_alloc;
00694 size_t nodes_len;
00695 int *nexts;
00696 int *org_indices;
00697 re_node_set *edests;
00698 re_node_set *eclosures;
00699 re_node_set *inveclosures;
00700 struct re_state_table_entry *state_table;
00701 re_dfastate_t *init_state;
00702 re_dfastate_t *init_state_word;
00703 re_dfastate_t *init_state_nl;
00704 re_dfastate_t *init_state_begbuf;
00705 bin_tree_t *str_tree;
00706 bin_tree_storage_t *str_tree_storage;
00707 re_bitset_ptr_t sb_char;
00708 int str_tree_storage_idx;
00709
00710
00711 unsigned int state_hash_mask;
00712 int init_node;
00713 int nbackref;
00714
00715
00716 bitset_word_t used_bkref_map;
00717 bitset_word_t completed_bkref_map;
00718
00719 unsigned int has_plural_match : 1;
00720
00721
00722
00723 unsigned int has_mb_node : 1;
00724 unsigned int is_utf8 : 1;
00725 unsigned int map_notascii : 1;
00726 unsigned int word_ops_used : 1;
00727 int mb_cur_max;
00728 bitset_t word_char;
00729 reg_syntax_t syntax;
00730 int *subexp_map;
00731 #ifdef DEBUG
00732 char* re_str;
00733 #endif
00734 __libc_lock_define (, lock)
00735 };
00736
00737 #define re_node_set_init_empty(set) memset (set, '\0', sizeof (re_node_set))
00738 #define re_node_set_remove(set,id) \
00739 (re_node_set_remove_at (set, re_node_set_contains (set, id) - 1))
00740 #define re_node_set_empty(p) ((p)->nelem = 0)
00741 #define re_node_set_free(set) re_free ((set)->elems)
00742
00743
00744 typedef enum
00745 {
00746 SB_CHAR,
00747 MB_CHAR,
00748 EQUIV_CLASS,
00749 COLL_SYM,
00750 CHAR_CLASS
00751 } bracket_elem_type;
00752
00753 typedef struct
00754 {
00755 bracket_elem_type type;
00756 union
00757 {
00758 unsigned char ch;
00759 unsigned char *name;
00760 wchar_t wch;
00761 } opr;
00762 } bracket_elem_t;
00763
00764
00765
00766 static inline void
00767 bitset_not (bitset_t set)
00768 {
00769 int bitset_i;
00770 for (bitset_i = 0; bitset_i < BITSET_WORDS; ++bitset_i)
00771 set[bitset_i] = ~set[bitset_i];
00772 }
00773
00774 static inline void
00775 bitset_merge (bitset_t dest, const bitset_t src)
00776 {
00777 int bitset_i;
00778 for (bitset_i = 0; bitset_i < BITSET_WORDS; ++bitset_i)
00779 dest[bitset_i] |= src[bitset_i];
00780 }
00781
00782 static inline void
00783 bitset_mask (bitset_t dest, const bitset_t src)
00784 {
00785 int bitset_i;
00786 for (bitset_i = 0; bitset_i < BITSET_WORDS; ++bitset_i)
00787 dest[bitset_i] &= src[bitset_i];
00788 }
00789
00790 #ifdef RE_ENABLE_I18N
00791
00792 static inline int
00793 internal_function __attribute ((pure))
00794 re_string_char_size_at (const re_string_t *pstr, int idx)
00795 {
00796 int byte_idx;
00797 if (pstr->mb_cur_max == 1)
00798 return 1;
00799 for (byte_idx = 1; idx + byte_idx < pstr->valid_len; ++byte_idx)
00800 if (pstr->wcs[idx + byte_idx] != WEOF)
00801 break;
00802 return byte_idx;
00803 }
00804
00805 static inline wint_t
00806 internal_function __attribute ((pure))
00807 re_string_wchar_at (const re_string_t *pstr, int idx)
00808 {
00809 if (pstr->mb_cur_max == 1)
00810 return (wint_t) pstr->mbs[idx];
00811 return (wint_t) pstr->wcs[idx];
00812 }
00813
00814 static int
00815 internal_function __attribute ((pure))
00816 re_string_elem_size_at (const re_string_t *pstr, int idx)
00817 {
00818 # ifdef _LIBC
00819 const unsigned char *p, *extra;
00820 const int32_t *table, *indirect;
00821 int32_t tmp;
00822 # include <locale/weight.h>
00823 uint_fast32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
00824
00825 if (nrules != 0)
00826 {
00827 table = (const int32_t *) _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
00828 extra = (const unsigned char *)
00829 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
00830 indirect = (const int32_t *) _NL_CURRENT (LC_COLLATE,
00831 _NL_COLLATE_INDIRECTMB);
00832 p = pstr->mbs + idx;
00833 tmp = findidx (&p);
00834 return p - pstr->mbs - idx;
00835 }
00836 else
00837 # endif
00838 return 1;
00839 }
00840 #endif
00841
00842 #endif
00843
00844
00845
00846
00847
00848
00849
00850
00851
00852
00853
00854
00855
00856
00857
00858
00859
00860
00861
00862
00863
00864
00865
00866
00867
00868
00869
00870
00871 static void re_string_construct_common (const char *str, int len,
00872 re_string_t *pstr,
00873 RE_TRANSLATE_TYPE trans, int icase,
00874 const re_dfa_t *dfa) internal_function;
00875 static re_dfastate_t *create_ci_newstate (const re_dfa_t *dfa,
00876 const re_node_set *nodes,
00877 unsigned int hash) internal_function;
00878 static re_dfastate_t *create_cd_newstate (const re_dfa_t *dfa,
00879 const re_node_set *nodes,
00880 unsigned int context,
00881 unsigned int hash) internal_function;
00882
00883
00884
00885
00886
00887
00888 static reg_errcode_t
00889 internal_function
00890 re_string_allocate (re_string_t *pstr, const char *str, int len, int init_len,
00891 RE_TRANSLATE_TYPE trans, int icase, const re_dfa_t *dfa)
00892 {
00893 reg_errcode_t ret;
00894 int init_buf_len;
00895
00896
00897 if (init_len < dfa->mb_cur_max)
00898 init_len = dfa->mb_cur_max;
00899 init_buf_len = (len + 1 < init_len) ? len + 1: init_len;
00900 re_string_construct_common (str, len, pstr, trans, icase, dfa);
00901
00902 ret = re_string_realloc_buffers (pstr, init_buf_len);
00903 if (BE (ret != REG_NOERROR, 0))
00904 return ret;
00905
00906 pstr->word_char = dfa->word_char;
00907 pstr->word_ops_used = dfa->word_ops_used;
00908 pstr->mbs = pstr->mbs_allocated ? pstr->mbs : (unsigned char *) str;
00909 pstr->valid_len = (pstr->mbs_allocated || dfa->mb_cur_max > 1) ? 0 : len;
00910 pstr->valid_raw_len = pstr->valid_len;
00911 return REG_NOERROR;
00912 }
00913
00914
00915
00916 static reg_errcode_t
00917 internal_function
00918 re_string_construct (re_string_t *pstr, const char *str, int len,
00919 RE_TRANSLATE_TYPE trans, int icase, const re_dfa_t *dfa)
00920 {
00921 reg_errcode_t ret;
00922 memset (pstr, '\0', sizeof (re_string_t));
00923 re_string_construct_common (str, len, pstr, trans, icase, dfa);
00924
00925 if (len > 0)
00926 {
00927 ret = re_string_realloc_buffers (pstr, len + 1);
00928 if (BE (ret != REG_NOERROR, 0))
00929 return ret;
00930 }
00931 pstr->mbs = pstr->mbs_allocated ? pstr->mbs : (unsigned char *) str;
00932
00933 if (icase)
00934 {
00935 #ifdef RE_ENABLE_I18N
00936 if (dfa->mb_cur_max > 1)
00937 {
00938 while (1)
00939 {
00940 ret = build_wcs_upper_buffer (pstr);
00941 if (BE (ret != REG_NOERROR, 0))
00942 return ret;
00943 if (pstr->valid_raw_len >= len)
00944 break;
00945 if (pstr->bufs_len > pstr->valid_len + dfa->mb_cur_max)
00946 break;
00947 ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2);
00948 if (BE (ret != REG_NOERROR, 0))
00949 return ret;
00950 }
00951 }
00952 else
00953 #endif
00954 build_upper_buffer (pstr);
00955 }
00956 else
00957 {
00958 #ifdef RE_ENABLE_I18N
00959 if (dfa->mb_cur_max > 1)
00960 build_wcs_buffer (pstr);
00961 else
00962 #endif
00963 {
00964 if (trans != NULL)
00965 re_string_translate_buffer (pstr);
00966 else
00967 {
00968 pstr->valid_len = pstr->bufs_len;
00969 pstr->valid_raw_len = pstr->bufs_len;
00970 }
00971 }
00972 }
00973
00974 return REG_NOERROR;
00975 }
00976
00977
00978
00979 static reg_errcode_t
00980 internal_function
00981 re_string_realloc_buffers (re_string_t *pstr, int new_buf_len)
00982 {
00983 #ifdef RE_ENABLE_I18N
00984 if (pstr->mb_cur_max > 1)
00985 {
00986 wint_t *new_wcs = re_realloc (pstr->wcs, wint_t, new_buf_len);
00987 if (BE (new_wcs == NULL, 0))
00988 return REG_ESPACE;
00989 pstr->wcs = new_wcs;
00990 if (pstr->offsets != NULL)
00991 {
00992 int *new_offsets = re_realloc (pstr->offsets, int, new_buf_len);
00993 if (BE (new_offsets == NULL, 0))
00994 return REG_ESPACE;
00995 pstr->offsets = new_offsets;
00996 }
00997 }
00998 #endif
00999 if (pstr->mbs_allocated)
01000 {
01001 unsigned char *new_mbs = re_realloc (pstr->mbs, unsigned char,
01002 new_buf_len);
01003 if (BE (new_mbs == NULL, 0))
01004 return REG_ESPACE;
01005 pstr->mbs = new_mbs;
01006 }
01007 pstr->bufs_len = new_buf_len;
01008 return REG_NOERROR;
01009 }
01010
01011
01012 static void
01013 internal_function
01014 re_string_construct_common (const char *str, int len, re_string_t *pstr,
01015 RE_TRANSLATE_TYPE trans, int icase,
01016 const re_dfa_t *dfa)
01017 {
01018 pstr->raw_mbs = (const unsigned char *) str;
01019 pstr->len = len;
01020 pstr->raw_len = len;
01021 pstr->trans = trans;
01022 pstr->icase = icase ? 1 : 0;
01023 pstr->mbs_allocated = (trans != NULL || icase);
01024 pstr->mb_cur_max = dfa->mb_cur_max;
01025 pstr->is_utf8 = dfa->is_utf8;
01026 pstr->map_notascii = dfa->map_notascii;
01027 pstr->stop = pstr->len;
01028 pstr->raw_stop = pstr->stop;
01029 }
01030
01031 #ifdef RE_ENABLE_I18N
01032
01033
01034
01035
01036
01037
01038
01039
01040
01041
01042
01043
01044 static void
01045 internal_function
01046 build_wcs_buffer (re_string_t *pstr)
01047 {
01048 #ifdef _LIBC
01049 unsigned char buf[MB_LEN_MAX];
01050 assert (MB_LEN_MAX >= pstr->mb_cur_max);
01051 #else
01052 unsigned char buf[64];
01053 #endif
01054 mbstate_t prev_st;
01055 int byte_idx, end_idx, remain_len;
01056 size_t mbclen;
01057
01058
01059
01060 end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
01061 for (byte_idx = pstr->valid_len; byte_idx < end_idx;)
01062 {
01063 wchar_t wc;
01064 const char *p;
01065
01066 remain_len = end_idx - byte_idx;
01067 prev_st = pstr->cur_state;
01068
01069 if (BE (pstr->trans != NULL, 0))
01070 {
01071 int i, ch;
01072
01073 for (i = 0; i < pstr->mb_cur_max && i < remain_len; ++i)
01074 {
01075 ch = pstr->raw_mbs [pstr->raw_mbs_idx + byte_idx + i];
01076 buf[i] = pstr->mbs[byte_idx + i] = pstr->trans[ch];
01077 }
01078 p = (const char *) buf;
01079 }
01080 else
01081 p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx;
01082 mbclen = mbrtowc (&wc, p, remain_len, &pstr->cur_state);
01083 if (BE (mbclen == (size_t) -2, 0))
01084 {
01085
01086 pstr->cur_state = prev_st;
01087 break;
01088 }
01089 else if (BE (mbclen == (size_t) -1 || mbclen == 0, 0))
01090 {
01091
01092 mbclen = 1;
01093 wc = (wchar_t) pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
01094 if (BE (pstr->trans != NULL, 0))
01095 wc = pstr->trans[wc];
01096 pstr->cur_state = prev_st;
01097 }
01098
01099
01100 pstr->wcs[byte_idx++] = wc;
01101
01102 for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
01103 pstr->wcs[byte_idx++] = WEOF;
01104 }
01105 pstr->valid_len = byte_idx;
01106 pstr->valid_raw_len = byte_idx;
01107 }
01108
01109
01110
01111
01112 static reg_errcode_t
01113 internal_function
01114 build_wcs_upper_buffer (re_string_t *pstr)
01115 {
01116 mbstate_t prev_st;
01117 int src_idx, byte_idx, end_idx, remain_len;
01118 size_t mbclen;
01119 #ifdef _LIBC
01120 char buf[MB_LEN_MAX];
01121 assert (MB_LEN_MAX >= pstr->mb_cur_max);
01122 #else
01123 char buf[64];
01124 #endif
01125
01126 byte_idx = pstr->valid_len;
01127 end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
01128
01129
01130
01131 if (! pstr->map_notascii && pstr->trans == NULL && !pstr->offsets_needed)
01132 {
01133 while (byte_idx < end_idx)
01134 {
01135 wchar_t wc;
01136
01137 if (isascii (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx])
01138 && mbsinit (&pstr->cur_state))
01139 {
01140
01141 pstr->mbs[byte_idx]
01142 = toupper (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]);
01143
01144
01145 pstr->wcs[byte_idx] = (wchar_t) pstr->mbs[byte_idx];
01146 ++byte_idx;
01147 continue;
01148 }
01149
01150 remain_len = end_idx - byte_idx;
01151 prev_st = pstr->cur_state;
01152 mbclen = mbrtowc (&wc,
01153 ((const char *) pstr->raw_mbs + pstr->raw_mbs_idx
01154 + byte_idx), remain_len, &pstr->cur_state);
01155 if (BE (mbclen + 2 > 2, 1))
01156 {
01157 wchar_t wcu = wc;
01158 if (iswlower (wc))
01159 {
01160 size_t mbcdlen;
01161
01162 wcu = towupper (wc);
01163 mbcdlen = wcrtomb (buf, wcu, &prev_st);
01164 if (BE (mbclen == mbcdlen, 1))
01165 memcpy (pstr->mbs + byte_idx, buf, mbclen);
01166 else
01167 {
01168 src_idx = byte_idx;
01169 goto offsets_needed;
01170 }
01171 }
01172 else
01173 memcpy (pstr->mbs + byte_idx,
01174 pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx, mbclen);
01175 pstr->wcs[byte_idx++] = wcu;
01176
01177 for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
01178 pstr->wcs[byte_idx++] = WEOF;
01179 }
01180 else if (mbclen == (size_t) -1 || mbclen == 0)
01181 {
01182
01183 int ch = pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
01184 pstr->mbs[byte_idx] = ch;
01185
01186 pstr->wcs[byte_idx++] = (wchar_t) ch;
01187 if (BE (mbclen == (size_t) -1, 0))
01188 pstr->cur_state = prev_st;
01189 }
01190 else
01191 {
01192
01193 pstr->cur_state = prev_st;
01194 break;
01195 }
01196 }
01197 pstr->valid_len = byte_idx;
01198 pstr->valid_raw_len = byte_idx;
01199 return REG_NOERROR;
01200 }
01201 else
01202 for (src_idx = pstr->valid_raw_len; byte_idx < end_idx;)
01203 {
01204 wchar_t wc;
01205 const char *p;
01206 offsets_needed:
01207 remain_len = end_idx - byte_idx;
01208 prev_st = pstr->cur_state;
01209 if (BE (pstr->trans != NULL, 0))
01210 {
01211 int i, ch;
01212
01213 for (i = 0; i < pstr->mb_cur_max && i < remain_len; ++i)
01214 {
01215 ch = pstr->raw_mbs [pstr->raw_mbs_idx + src_idx + i];
01216 buf[i] = pstr->trans[ch];
01217 }
01218 p = (const char *) buf;
01219 }
01220 else
01221 p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + src_idx;
01222 mbclen = mbrtowc (&wc, p, remain_len, &pstr->cur_state);
01223 if (BE (mbclen + 2 > 2, 1))
01224 {
01225 wchar_t wcu = wc;
01226 if (iswlower (wc))
01227 {
01228 size_t mbcdlen;
01229
01230 wcu = towupper (wc);
01231 mbcdlen = wcrtomb ((char *) buf, wcu, &prev_st);
01232 if (BE (mbclen == mbcdlen, 1))
01233 memcpy (pstr->mbs + byte_idx, buf, mbclen);
01234 else if (mbcdlen != (size_t) -1)
01235 {
01236 size_t i;
01237
01238 if (byte_idx + mbcdlen > pstr->bufs_len)
01239 {
01240 pstr->cur_state = prev_st;
01241 break;
01242 }
01243
01244 if (pstr->offsets == NULL)
01245 {
01246 pstr->offsets = re_malloc (int, pstr->bufs_len);
01247
01248 if (pstr->offsets == NULL)
01249 return REG_ESPACE;
01250 }
01251 if (!pstr->offsets_needed)
01252 {
01253 for (i = 0; i < (size_t) byte_idx; ++i)
01254 pstr->offsets[i] = i;
01255 pstr->offsets_needed = 1;
01256 }
01257
01258 memcpy (pstr->mbs + byte_idx, buf, mbcdlen);
01259 pstr->wcs[byte_idx] = wcu;
01260 pstr->offsets[byte_idx] = src_idx;
01261 for (i = 1; i < mbcdlen; ++i)
01262 {
01263 pstr->offsets[byte_idx + i]
01264 = src_idx + (i < mbclen ? i : mbclen - 1);
01265 pstr->wcs[byte_idx + i] = WEOF;
01266 }
01267 pstr->len += mbcdlen - mbclen;
01268 if (pstr->raw_stop > src_idx)
01269 pstr->stop += mbcdlen - mbclen;
01270 end_idx = (pstr->bufs_len > pstr->len)
01271 ? pstr->len : pstr->bufs_len;
01272 byte_idx += mbcdlen;
01273 src_idx += mbclen;
01274 continue;
01275 }
01276 else
01277 memcpy (pstr->mbs + byte_idx, p, mbclen);
01278 }
01279 else
01280 memcpy (pstr->mbs + byte_idx, p, mbclen);
01281
01282 if (BE (pstr->offsets_needed != 0, 0))
01283 {
01284 size_t i;
01285 for (i = 0; i < mbclen; ++i)
01286 pstr->offsets[byte_idx + i] = src_idx + i;
01287 }
01288 src_idx += mbclen;
01289
01290 pstr->wcs[byte_idx++] = wcu;
01291
01292 for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
01293 pstr->wcs[byte_idx++] = WEOF;
01294 }
01295 else if (mbclen == (size_t) -1 || mbclen == 0)
01296 {
01297
01298 int ch = pstr->raw_mbs[pstr->raw_mbs_idx + src_idx];
01299
01300 if (BE (pstr->trans != NULL, 0))
01301 ch = pstr->trans [ch];
01302 pstr->mbs[byte_idx] = ch;
01303
01304 if (BE (pstr->offsets_needed != 0, 0))
01305 pstr->offsets[byte_idx] = src_idx;
01306 ++src_idx;
01307
01308
01309 pstr->wcs[byte_idx++] = (wchar_t) ch;
01310 if (BE (mbclen == (size_t) -1, 0))
01311 pstr->cur_state = prev_st;
01312 }
01313 else
01314 {
01315
01316 pstr->cur_state = prev_st;
01317 break;
01318 }
01319 }
01320 pstr->valid_len = byte_idx;
01321 pstr->valid_raw_len = src_idx;
01322 return REG_NOERROR;
01323 }
01324
01325
01326
01327
01328 static int
01329 internal_function
01330 re_string_skip_chars (re_string_t *pstr, int new_raw_idx, wint_t *last_wc)
01331 {
01332 mbstate_t prev_st;
01333 int rawbuf_idx;
01334 size_t mbclen;
01335 wchar_t wc = WEOF;
01336
01337
01338 for (rawbuf_idx = pstr->raw_mbs_idx + pstr->valid_raw_len;
01339 rawbuf_idx < new_raw_idx;)
01340 {
01341 int remain_len;
01342 remain_len = pstr->len - rawbuf_idx;
01343 prev_st = pstr->cur_state;
01344 mbclen = mbrtowc (&wc, (const char *) pstr->raw_mbs + rawbuf_idx,
01345 remain_len, &pstr->cur_state);
01346 if (BE (mbclen == (size_t) -2 || mbclen == (size_t) -1 || mbclen == 0, 0))
01347 {
01348
01349 if (mbclen == 0 || remain_len == 0)
01350 wc = L'\0';
01351 else
01352 wc = *(unsigned char *) (pstr->raw_mbs + rawbuf_idx);
01353 mbclen = 1;
01354 pstr->cur_state = prev_st;
01355 }
01356
01357 rawbuf_idx += mbclen;
01358 }
01359 *last_wc = (wint_t) wc;
01360 return rawbuf_idx;
01361 }
01362 #endif
01363
01364
01365
01366
01367 static void
01368 internal_function
01369 build_upper_buffer (re_string_t *pstr)
01370 {
01371 int char_idx, end_idx;
01372 end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
01373
01374 for (char_idx = pstr->valid_len; char_idx < end_idx; ++char_idx)
01375 {
01376 int ch = pstr->raw_mbs[pstr->raw_mbs_idx + char_idx];
01377 if (BE (pstr->trans != NULL, 0))
01378 ch = pstr->trans[ch];
01379 if (islower (ch))
01380 pstr->mbs[char_idx] = toupper (ch);
01381 else
01382 pstr->mbs[char_idx] = ch;
01383 }
01384 pstr->valid_len = char_idx;
01385 pstr->valid_raw_len = char_idx;
01386 }
01387
01388
01389
01390 static void
01391 internal_function
01392 re_string_translate_buffer (re_string_t *pstr)
01393 {
01394 int buf_idx, end_idx;
01395 end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
01396
01397 for (buf_idx = pstr->valid_len; buf_idx < end_idx; ++buf_idx)
01398 {
01399 int ch = pstr->raw_mbs[pstr->raw_mbs_idx + buf_idx];
01400 pstr->mbs[buf_idx] = pstr->trans[ch];
01401 }
01402
01403 pstr->valid_len = buf_idx;
01404 pstr->valid_raw_len = buf_idx;
01405 }
01406
01407
01408
01409
01410
01411 static reg_errcode_t
01412 internal_function
01413 re_string_reconstruct (re_string_t *pstr, int idx, int eflags)
01414 {
01415 int offset = idx - pstr->raw_mbs_idx;
01416 if (BE (offset < 0, 0))
01417 {
01418
01419 #ifdef RE_ENABLE_I18N
01420 if (pstr->mb_cur_max > 1)
01421 memset (&pstr->cur_state, '\0', sizeof (mbstate_t));
01422 #endif
01423 pstr->len = pstr->raw_len;
01424 pstr->stop = pstr->raw_stop;
01425 pstr->valid_len = 0;
01426 pstr->raw_mbs_idx = 0;
01427 pstr->valid_raw_len = 0;
01428 pstr->offsets_needed = 0;
01429 pstr->tip_context = ((eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
01430 : CONTEXT_NEWLINE | CONTEXT_BEGBUF);
01431 if (!pstr->mbs_allocated)
01432 pstr->mbs = (unsigned char *) pstr->raw_mbs;
01433 offset = idx;
01434 }
01435
01436 if (BE (offset != 0, 1))
01437 {
01438
01439 if (BE (offset < pstr->valid_raw_len, 1))
01440 {
01441
01442 #ifdef RE_ENABLE_I18N
01443 if (BE (pstr->offsets_needed, 0))
01444 {
01445 int low = 0, high = pstr->valid_len, mid;
01446 do
01447 {
01448 mid = (high + low) / 2;
01449 if (pstr->offsets[mid] > offset)
01450 high = mid;
01451 else if (pstr->offsets[mid] < offset)
01452 low = mid + 1;
01453 else
01454 break;
01455 }
01456 while (low < high);
01457 if (pstr->offsets[mid] < offset)
01458 ++mid;
01459 pstr->tip_context = re_string_context_at (pstr, mid - 1,
01460 eflags);
01461
01462
01463
01464
01465 if (pstr->valid_len > offset
01466 && mid == offset && pstr->offsets[mid] == offset)
01467 {
01468 memmove (pstr->wcs, pstr->wcs + offset,
01469 (pstr->valid_len - offset) * sizeof (wint_t));
01470 memmove (pstr->mbs, pstr->mbs + offset, pstr->valid_len - offset);
01471 pstr->valid_len -= offset;
01472 pstr->valid_raw_len -= offset;
01473 for (low = 0; low < pstr->valid_len; low++)
01474 pstr->offsets[low] = pstr->offsets[low + offset] - offset;
01475 }
01476 else
01477 {
01478
01479
01480 pstr->len = pstr->raw_len - idx + offset;
01481 pstr->stop = pstr->raw_stop - idx + offset;
01482 pstr->offsets_needed = 0;
01483 while (mid > 0 && pstr->offsets[mid - 1] == offset)
01484 --mid;
01485 while (mid < pstr->valid_len)
01486 if (pstr->wcs[mid] != WEOF)
01487 break;
01488 else
01489 ++mid;
01490 if (mid == pstr->valid_len)
01491 pstr->valid_len = 0;
01492 else
01493 {
01494 pstr->valid_len = pstr->offsets[mid] - offset;
01495 if (pstr->valid_len)
01496 {
01497 for (low = 0; low < pstr->valid_len; ++low)
01498 pstr->wcs[low] = WEOF;
01499 memset (pstr->mbs, 255, pstr->valid_len);
01500 }
01501 }
01502 pstr->valid_raw_len = pstr->valid_len;
01503 }
01504 }
01505 else
01506 #endif
01507 {
01508 pstr->tip_context = re_string_context_at (pstr, offset - 1,
01509 eflags);
01510 #ifdef RE_ENABLE_I18N
01511 if (pstr->mb_cur_max > 1)
01512 memmove (pstr->wcs, pstr->wcs + offset,
01513 (pstr->valid_len - offset) * sizeof (wint_t));
01514 #endif
01515 if (BE (pstr->mbs_allocated, 0))
01516 memmove (pstr->mbs, pstr->mbs + offset,
01517 pstr->valid_len - offset);
01518 pstr->valid_len -= offset;
01519 pstr->valid_raw_len -= offset;
01520 #if DEBUG
01521 assert (pstr->valid_len > 0);
01522 #endif
01523 }
01524 }
01525 else
01526 {
01527
01528 int prev_valid_len = pstr->valid_len;
01529
01530 #ifdef RE_ENABLE_I18N
01531 if (BE (pstr->offsets_needed, 0))
01532 {
01533 pstr->len = pstr->raw_len - idx + offset;
01534 pstr->stop = pstr->raw_stop - idx + offset;
01535 pstr->offsets_needed = 0;
01536 }
01537 #endif
01538 pstr->valid_len = 0;
01539 #ifdef RE_ENABLE_I18N
01540 if (pstr->mb_cur_max > 1)
01541 {
01542 int wcs_idx;
01543 wint_t wc = WEOF;
01544
01545 if (pstr->is_utf8)
01546 {
01547 const unsigned char *raw, *p, *q, *end;
01548
01549
01550
01551 raw = pstr->raw_mbs + pstr->raw_mbs_idx;
01552 end = raw + (offset - pstr->mb_cur_max);
01553 if (end < pstr->raw_mbs)
01554 end = pstr->raw_mbs;
01555 p = raw + offset - 1;
01556 #ifdef _LIBC
01557
01558
01559 if (isascii (*p) && BE (pstr->trans == NULL, 1))
01560 {
01561 memset (&pstr->cur_state, '\0', sizeof (mbstate_t));
01562
01563 wc = (wchar_t) *p;
01564 }
01565 else
01566 #endif
01567 for (; p >= end; --p)
01568 if ((*p & 0xc0) != 0x80)
01569 {
01570 mbstate_t cur_state;
01571 wchar_t wc2;
01572 int mlen = raw + pstr->len - p;
01573 unsigned char buf[6];
01574 size_t mbclen;
01575
01576 q = p;
01577 if (BE (pstr->trans != NULL, 0))
01578 {
01579 int i = mlen < 6 ? mlen : 6;
01580 while (--i >= 0)
01581 buf[i] = pstr->trans[p[i]];
01582 q = buf;
01583 }
01584
01585
01586 memset (&cur_state, 0, sizeof (cur_state));
01587 mbclen = mbrtowc (&wc2, (const char *) p, mlen,
01588 &cur_state);
01589 if (raw + offset - p <= mbclen
01590 && mbclen < (size_t) -2)
01591 {
01592 memset (&pstr->cur_state, '\0',
01593 sizeof (mbstate_t));
01594 pstr->valid_len = mbclen - (raw + offset - p);
01595 wc = wc2;
01596 }
01597 break;
01598 }
01599 }
01600
01601 if (wc == WEOF)
01602 pstr->valid_len = re_string_skip_chars (pstr, idx, &wc) - idx;
01603 if (wc == WEOF)
01604 pstr->tip_context
01605 = re_string_context_at (pstr, prev_valid_len - 1, eflags);
01606 else
01607 pstr->tip_context = ((BE (pstr->word_ops_used != 0, 0)
01608 && IS_WIDE_WORD_CHAR (wc))
01609 ? CONTEXT_WORD
01610 : ((IS_WIDE_NEWLINE (wc)
01611 && pstr->newline_anchor)
01612 ? CONTEXT_NEWLINE : 0));
01613 if (BE (pstr->valid_len, 0))
01614 {
01615 for (wcs_idx = 0; wcs_idx < pstr->valid_len; ++wcs_idx)
01616 pstr->wcs[wcs_idx] = WEOF;
01617 if (pstr->mbs_allocated)
01618 memset (pstr->mbs, 255, pstr->valid_len);
01619 }
01620 pstr->valid_raw_len = pstr->valid_len;
01621 }
01622 else
01623 #endif
01624 {
01625 int c = pstr->raw_mbs[pstr->raw_mbs_idx + offset - 1];
01626 pstr->valid_raw_len = 0;
01627 if (pstr->trans)
01628 c = pstr->trans[c];
01629 pstr->tip_context = (bitset_contain (pstr->word_char, c)
01630 ? CONTEXT_WORD
01631 : ((IS_NEWLINE (c) && pstr->newline_anchor)
01632 ? CONTEXT_NEWLINE : 0));
01633 }
01634 }
01635 if (!BE (pstr->mbs_allocated, 0))
01636 pstr->mbs += offset;
01637 }
01638 pstr->raw_mbs_idx = idx;
01639 pstr->len -= offset;
01640 pstr->stop -= offset;
01641
01642
01643 #ifdef RE_ENABLE_I18N
01644 if (pstr->mb_cur_max > 1)
01645 {
01646 if (pstr->icase)
01647 {
01648 reg_errcode_t ret = build_wcs_upper_buffer (pstr);
01649 if (BE (ret != REG_NOERROR, 0))
01650 return ret;
01651 }
01652 else
01653 build_wcs_buffer (pstr);
01654 }
01655 else
01656 #endif
01657 if (BE (pstr->mbs_allocated, 0))
01658 {
01659 if (pstr->icase)
01660 build_upper_buffer (pstr);
01661 else if (pstr->trans != NULL)
01662 re_string_translate_buffer (pstr);
01663 }
01664 else
01665 pstr->valid_len = pstr->len;
01666
01667 pstr->cur_idx = 0;
01668 return REG_NOERROR;
01669 }
01670
01671 static unsigned char
01672 internal_function __attribute ((pure))
01673 re_string_peek_byte_case (const re_string_t *pstr, int idx)
01674 {
01675 int ch, off;
01676
01677
01678 if (BE (!pstr->mbs_allocated, 1))
01679 return re_string_peek_byte (pstr, idx);
01680
01681 #ifdef RE_ENABLE_I18N
01682 if (pstr->mb_cur_max > 1
01683 && ! re_string_is_single_byte_char (pstr, pstr->cur_idx + idx))
01684 return re_string_peek_byte (pstr, idx);
01685 #endif
01686
01687 off = pstr->cur_idx + idx;
01688 #ifdef RE_ENABLE_I18N
01689 if (pstr->offsets_needed)
01690 off = pstr->offsets[off];
01691 #endif
01692
01693 ch = pstr->raw_mbs[pstr->raw_mbs_idx + off];
01694
01695 #ifdef RE_ENABLE_I18N
01696
01697
01698
01699
01700 if (pstr->offsets_needed && !isascii (ch))
01701 return re_string_peek_byte (pstr, idx);
01702 #endif
01703
01704 return ch;
01705 }
01706
01707 static unsigned char
01708 internal_function __attribute ((pure))
01709 re_string_fetch_byte_case (re_string_t *pstr)
01710 {
01711 if (BE (!pstr->mbs_allocated, 1))
01712 return re_string_fetch_byte (pstr);
01713
01714 #ifdef RE_ENABLE_I18N
01715 if (pstr->offsets_needed)
01716 {
01717 int off, ch;
01718
01719
01720
01721
01722
01723
01724
01725
01726 if (!re_string_first_byte (pstr, pstr->cur_idx))
01727 return re_string_fetch_byte (pstr);
01728
01729 off = pstr->offsets[pstr->cur_idx];
01730 ch = pstr->raw_mbs[pstr->raw_mbs_idx + off];
01731
01732 if (! isascii (ch))
01733 return re_string_fetch_byte (pstr);
01734
01735 re_string_skip_bytes (pstr,
01736 re_string_char_size_at (pstr, pstr->cur_idx));
01737 return ch;
01738 }
01739 #endif
01740
01741 return pstr->raw_mbs[pstr->raw_mbs_idx + pstr->cur_idx++];
01742 }
01743
01744 static void
01745 internal_function
01746 re_string_destruct (re_string_t *pstr)
01747 {
01748 #ifdef RE_ENABLE_I18N
01749 re_free (pstr->wcs);
01750 re_free (pstr->offsets);
01751 #endif
01752 if (pstr->mbs_allocated)
01753 re_free (pstr->mbs);
01754 }
01755
01756
01757
01758 static unsigned int
01759 internal_function
01760 re_string_context_at (const re_string_t *input, int idx, int eflags)
01761 {
01762 int c;
01763 if (BE (idx < 0, 0))
01764
01765
01766 return input->tip_context;
01767 if (BE (idx == input->len, 0))
01768 return ((eflags & REG_NOTEOL) ? CONTEXT_ENDBUF
01769 : CONTEXT_NEWLINE | CONTEXT_ENDBUF);
01770 #ifdef RE_ENABLE_I18N
01771 if (input->mb_cur_max > 1)
01772 {
01773 wint_t wc;
01774 int wc_idx = idx;
01775 while(input->wcs[wc_idx] == WEOF)
01776 {
01777 #ifdef DEBUG
01778
01779 assert (wc_idx >= 0);
01780 #endif
01781 --wc_idx;
01782 if (wc_idx < 0)
01783 return input->tip_context;
01784 }
01785 wc = input->wcs[wc_idx];
01786 if (BE (input->word_ops_used != 0, 0) && IS_WIDE_WORD_CHAR (wc))
01787 return CONTEXT_WORD;
01788 return (IS_WIDE_NEWLINE (wc) && input->newline_anchor
01789 ? CONTEXT_NEWLINE : 0);
01790 }
01791 else
01792 #endif
01793 {
01794 c = re_string_byte_at (input, idx);
01795 if (bitset_contain (input->word_char, c))
01796 return CONTEXT_WORD;
01797 return IS_NEWLINE (c) && input->newline_anchor ? CONTEXT_NEWLINE : 0;
01798 }
01799 }
01800
01801
01802
01803 static reg_errcode_t
01804 internal_function
01805 re_node_set_alloc (re_node_set *set, int size)
01806 {
01807 set->alloc = size;
01808 set->nelem = 0;
01809 set->elems = re_malloc (int, size);
01810 if (BE (set->elems == NULL, 0))
01811 return REG_ESPACE;
01812 return REG_NOERROR;
01813 }
01814
01815 static reg_errcode_t
01816 internal_function
01817 re_node_set_init_1 (re_node_set *set, int elem)
01818 {
01819 set->alloc = 1;
01820 set->nelem = 1;
01821 set->elems = re_malloc (int, 1);
01822 if (BE (set->elems == NULL, 0))
01823 {
01824 set->alloc = set->nelem = 0;
01825 return REG_ESPACE;
01826 }
01827 set->elems[0] = elem;
01828 return REG_NOERROR;
01829 }
01830
01831 static reg_errcode_t
01832 internal_function
01833 re_node_set_init_2 (re_node_set *set, int elem1, int elem2)
01834 {
01835 set->alloc = 2;
01836 set->elems = re_malloc (int, 2);
01837 if (BE (set->elems == NULL, 0))
01838 return REG_ESPACE;
01839 if (elem1 == elem2)
01840 {
01841 set->nelem = 1;
01842 set->elems[0] = elem1;
01843 }
01844 else
01845 {
01846 set->nelem = 2;
01847 if (elem1 < elem2)
01848 {
01849 set->elems[0] = elem1;
01850 set->elems[1] = elem2;
01851 }
01852 else
01853 {
01854 set->elems[0] = elem2;
01855 set->elems[1] = elem1;
01856 }
01857 }
01858 return REG_NOERROR;
01859 }
01860
01861 static reg_errcode_t
01862 internal_function
01863 re_node_set_init_copy (re_node_set *dest, const re_node_set *src)
01864 {
01865 dest->nelem = src->nelem;
01866 if (src->nelem > 0)
01867 {
01868 dest->alloc = dest->nelem;
01869 dest->elems = re_malloc (int, dest->alloc);
01870 if (BE (dest->elems == NULL, 0))
01871 {
01872 dest->alloc = dest->nelem = 0;
01873 return REG_ESPACE;
01874 }
01875 memcpy (dest->elems, src->elems, src->nelem * sizeof (int));
01876 }
01877 else
01878 re_node_set_init_empty (dest);
01879 return REG_NOERROR;
01880 }
01881
01882
01883
01884
01885
01886 static reg_errcode_t
01887 internal_function
01888 re_node_set_add_intersect (re_node_set *dest, const re_node_set *src1,
01889 const re_node_set *src2)
01890 {
01891 int i1, i2, is, id, delta, sbase;
01892 if (src1->nelem == 0 || src2->nelem == 0)
01893 return REG_NOERROR;
01894
01895
01896
01897 if (src1->nelem + src2->nelem + dest->nelem > dest->alloc)
01898 {
01899 int new_alloc = src1->nelem + src2->nelem + dest->alloc;
01900 int *new_elems = re_realloc (dest->elems, int, new_alloc);
01901 if (BE (new_elems == NULL, 0))
01902 return REG_ESPACE;
01903 dest->elems = new_elems;
01904 dest->alloc = new_alloc;
01905 }
01906
01907
01908
01909 sbase = dest->nelem + src1->nelem + src2->nelem;
01910 i1 = src1->nelem - 1;
01911 i2 = src2->nelem - 1;
01912 id = dest->nelem - 1;
01913 for (;;)
01914 {
01915 if (src1->elems[i1] == src2->elems[i2])
01916 {
01917
01918 while (id >= 0 && dest->elems[id] > src1->elems[i1])
01919 --id;
01920
01921 if (id < 0 || dest->elems[id] != src1->elems[i1])
01922 dest->elems[--sbase] = src1->elems[i1];
01923
01924 if (--i1 < 0 || --i2 < 0)
01925 break;
01926 }
01927
01928
01929 else if (src1->elems[i1] < src2->elems[i2])
01930 {
01931 if (--i2 < 0)
01932 break;
01933 }
01934 else
01935 {
01936 if (--i1 < 0)
01937 break;
01938 }
01939 }
01940
01941 id = dest->nelem - 1;
01942 is = dest->nelem + src1->nelem + src2->nelem - 1;
01943 delta = is - sbase + 1;
01944
01945
01946
01947
01948 dest->nelem += delta;
01949 if (delta > 0 && id >= 0)
01950 for (;;)
01951 {
01952 if (dest->elems[is] > dest->elems[id])
01953 {
01954
01955 dest->elems[id + delta--] = dest->elems[is--];
01956 if (delta == 0)
01957 break;
01958 }
01959 else
01960 {
01961
01962 dest->elems[id + delta] = dest->elems[id];
01963 if (--id < 0)
01964 break;
01965 }
01966 }
01967
01968
01969 memcpy (dest->elems, dest->elems + sbase, delta * sizeof (int));
01970
01971 return REG_NOERROR;
01972 }
01973
01974
01975
01976
01977 static reg_errcode_t
01978 internal_function
01979 re_node_set_init_union (re_node_set *dest, const re_node_set *src1,
01980 const re_node_set *src2)
01981 {
01982 int i1, i2, id;
01983 if (src1 != NULL && src1->nelem > 0 && src2 != NULL && src2->nelem > 0)
01984 {
01985 dest->alloc = src1->nelem + src2->nelem;
01986 dest->elems = re_malloc (int, dest->alloc);
01987 if (BE (dest->elems == NULL, 0))
01988 return REG_ESPACE;
01989 }
01990 else
01991 {
01992 if (src1 != NULL && src1->nelem > 0)
01993 return re_node_set_init_copy (dest, src1);
01994 else if (src2 != NULL && src2->nelem > 0)
01995 return re_node_set_init_copy (dest, src2);
01996 else
01997 re_node_set_init_empty (dest);
01998 return REG_NOERROR;
01999 }
02000 for (i1 = i2 = id = 0 ; i1 < src1->nelem && i2 < src2->nelem ;)
02001 {
02002 if (src1->elems[i1] > src2->elems[i2])
02003 {
02004 dest->elems[id++] = src2->elems[i2++];
02005 continue;
02006 }
02007 if (src1->elems[i1] == src2->elems[i2])
02008 ++i2;
02009 dest->elems[id++] = src1->elems[i1++];
02010 }
02011 if (i1 < src1->nelem)
02012 {
02013 memcpy (dest->elems + id, src1->elems + i1,
02014 (src1->nelem - i1) * sizeof (int));
02015 id += src1->nelem - i1;
02016 }
02017 else if (i2 < src2->nelem)
02018 {
02019 memcpy (dest->elems + id, src2->elems + i2,
02020 (src2->nelem - i2) * sizeof (int));
02021 id += src2->nelem - i2;
02022 }
02023 dest->nelem = id;
02024 return REG_NOERROR;
02025 }
02026
02027
02028
02029
02030 static reg_errcode_t
02031 internal_function
02032 re_node_set_merge (re_node_set *dest, const re_node_set *src)
02033 {
02034 int is, id, sbase, delta;
02035 if (src == NULL || src->nelem == 0)
02036 return REG_NOERROR;
02037 if (dest->alloc < 2 * src->nelem + dest->nelem)
02038 {
02039 int new_alloc = 2 * (src->nelem + dest->alloc);
02040 int *new_buffer = re_realloc (dest->elems, int, new_alloc);
02041 if (BE (new_buffer == NULL, 0))
02042 return REG_ESPACE;
02043 dest->elems = new_buffer;
02044 dest->alloc = new_alloc;
02045 }
02046
02047 if (BE (dest->nelem == 0, 0))
02048 {
02049 dest->nelem = src->nelem;
02050 memcpy (dest->elems, src->elems, src->nelem * sizeof (int));
02051 return REG_NOERROR;
02052 }
02053
02054
02055
02056 for (sbase = dest->nelem + 2 * src->nelem,
02057 is = src->nelem - 1, id = dest->nelem - 1; is >= 0 && id >= 0; )
02058 {
02059 if (dest->elems[id] == src->elems[is])
02060 is--, id--;
02061 else if (dest->elems[id] < src->elems[is])
02062 dest->elems[--sbase] = src->elems[is--];
02063 else
02064 --id;
02065 }
02066
02067 if (is >= 0)
02068 {
02069
02070 sbase -= is + 1;
02071 memcpy (dest->elems + sbase, src->elems, (is + 1) * sizeof (int));
02072 }
02073
02074 id = dest->nelem - 1;
02075 is = dest->nelem + 2 * src->nelem - 1;
02076 delta = is - sbase + 1;
02077 if (delta == 0)
02078 return REG_NOERROR;
02079
02080
02081
02082 dest->nelem += delta;
02083 for (;;)
02084 {
02085 if (dest->elems[is] > dest->elems[id])
02086 {
02087
02088 dest->elems[id + delta--] = dest->elems[is--];
02089 if (delta == 0)
02090 break;
02091 }
02092 else
02093 {
02094
02095 dest->elems[id + delta] = dest->elems[id];
02096 if (--id < 0)
02097 {
02098
02099 memcpy (dest->elems, dest->elems + sbase,
02100 delta * sizeof (int));
02101 break;
02102 }
02103 }
02104 }
02105
02106 return REG_NOERROR;
02107 }
02108
02109
02110
02111
02112
02113 static int
02114 internal_function
02115 re_node_set_insert (re_node_set *set, int elem)
02116 {
02117 int idx;
02118
02119 if (set->alloc == 0)
02120 {
02121 if (BE (re_node_set_init_1 (set, elem) == REG_NOERROR, 1))
02122 return 1;
02123 else
02124 return -1;
02125 }
02126
02127 if (BE (set->nelem, 0) == 0)
02128 {
02129
02130 set->elems[0] = elem;
02131 ++set->nelem;
02132 return 1;
02133 }
02134
02135
02136 if (set->alloc == set->nelem)
02137 {
02138 int *new_elems;
02139 set->alloc = set->alloc * 2;
02140 new_elems = re_realloc (set->elems, int, set->alloc);
02141 if (BE (new_elems == NULL, 0))
02142 return -1;
02143 set->elems = new_elems;
02144 }
02145
02146
02147
02148 if (elem < set->elems[0])
02149 {
02150 idx = 0;
02151 for (idx = set->nelem; idx > 0; idx--)
02152 set->elems[idx] = set->elems[idx - 1];
02153 }
02154 else
02155 {
02156 for (idx = set->nelem; set->elems[idx - 1] > elem; idx--)
02157 set->elems[idx] = set->elems[idx - 1];
02158 }
02159
02160
02161 set->elems[idx] = elem;
02162 ++set->nelem;
02163 return 1;
02164 }
02165
02166
02167
02168
02169
02170 static int
02171 internal_function
02172 re_node_set_insert_last (re_node_set *set, int elem)
02173 {
02174
02175 if (set->alloc == set->nelem)
02176 {
02177 int *new_elems;
02178 set->alloc = (set->alloc + 1) * 2;
02179 new_elems = re_realloc (set->elems, int, set->alloc);
02180 if (BE (new_elems == NULL, 0))
02181 return -1;
02182 set->elems = new_elems;
02183 }
02184
02185
02186 set->elems[set->nelem++] = elem;
02187 return 1;
02188 }
02189
02190
02191
02192
02193 static int
02194 internal_function __attribute ((pure))
02195 re_node_set_compare (const re_node_set *set1, const re_node_set *set2)
02196 {
02197 int i;
02198 if (set1 == NULL || set2 == NULL || set1->nelem != set2->nelem)
02199 return 0;
02200 for (i = set1->nelem ; --i >= 0 ; )
02201 if (set1->elems[i] != set2->elems[i])
02202 return 0;
02203 return 1;
02204 }
02205
02206
02207
02208 static int
02209 internal_function __attribute ((pure))
02210 re_node_set_contains (const re_node_set *set, int elem)
02211 {
02212 unsigned int idx, right, mid;
02213 if (set->nelem <= 0)
02214 return 0;
02215
02216
02217 idx = 0;
02218 right = set->nelem - 1;
02219 while (idx < right)
02220 {
02221 mid = (idx + right) / 2;
02222 if (set->elems[mid] < elem)
02223 idx = mid + 1;
02224 else
02225 right = mid;
02226 }
02227 return set->elems[idx] == elem ? idx + 1 : 0;
02228 }
02229
02230 static void
02231 internal_function
02232 re_node_set_remove_at (re_node_set *set, int idx)
02233 {
02234 if (idx < 0 || idx >= set->nelem)
02235 return;
02236 --set->nelem;
02237 for (; idx < set->nelem; idx++)
02238 set->elems[idx] = set->elems[idx + 1];
02239 }
02240
02241
02242
02243
02244
02245 static int
02246 internal_function
02247 re_dfa_add_node (re_dfa_t *dfa, re_token_t token)
02248 {
02249 int type = token.type;
02250 if (BE (dfa->nodes_len >= dfa->nodes_alloc, 0))
02251 {
02252 size_t new_nodes_alloc = dfa->nodes_alloc * 2;
02253 int *new_nexts, *new_indices;
02254 re_node_set *new_edests, *new_eclosures;
02255 re_token_t *new_nodes;
02256
02257
02258 if (BE (new_nodes_alloc < dfa->nodes_alloc, 0))
02259 return -1;
02260
02261 new_nodes = re_realloc (dfa->nodes, re_token_t, new_nodes_alloc);
02262 if (BE (new_nodes == NULL, 0))
02263 return -1;
02264 dfa->nodes = new_nodes;
02265 new_nexts = re_realloc (dfa->nexts, int, new_nodes_alloc);
02266 new_indices = re_realloc (dfa->org_indices, int, new_nodes_alloc);
02267 new_edests = re_realloc (dfa->edests, re_node_set, new_nodes_alloc);
02268 new_eclosures = re_realloc (dfa->eclosures, re_node_set, new_nodes_alloc);
02269 if (BE (new_nexts == NULL || new_indices == NULL
02270 || new_edests == NULL || new_eclosures == NULL, 0))
02271 return -1;
02272 dfa->nexts = new_nexts;
02273 dfa->org_indices = new_indices;
02274 dfa->edests = new_edests;
02275 dfa->eclosures = new_eclosures;
02276 dfa->nodes_alloc = new_nodes_alloc;
02277 }
02278 dfa->nodes[dfa->nodes_len] = token;
02279 dfa->nodes[dfa->nodes_len].constraint = 0;
02280 #ifdef RE_ENABLE_I18N
02281 dfa->nodes[dfa->nodes_len].accept_mb =
02282 (type == OP_PERIOD && dfa->mb_cur_max > 1) || type == COMPLEX_BRACKET;
02283 #endif
02284 dfa->nexts[dfa->nodes_len] = -1;
02285 re_node_set_init_empty (dfa->edests + dfa->nodes_len);
02286 re_node_set_init_empty (dfa->eclosures + dfa->nodes_len);
02287 return dfa->nodes_len++;
02288 }
02289
02290 static inline unsigned int
02291 internal_function
02292 calc_state_hash (const re_node_set *nodes, unsigned int context)
02293 {
02294 unsigned int hash = nodes->nelem + context;
02295 int i;
02296 for (i = 0 ; i < nodes->nelem ; i++)
02297 hash += nodes->elems[i];
02298 return hash;
02299 }
02300
02301
02302
02303
02304
02305
02306
02307
02308
02309
02310 static re_dfastate_t *
02311 internal_function
02312 re_acquire_state (reg_errcode_t *err, const re_dfa_t *dfa,
02313 const re_node_set *nodes)
02314 {
02315 unsigned int hash;
02316 re_dfastate_t *new_state;
02317 struct re_state_table_entry *spot;
02318 int i;
02319 if (BE (nodes->nelem == 0, 0))
02320 {
02321 *err = REG_NOERROR;
02322 return NULL;
02323 }
02324 hash = calc_state_hash (nodes, 0);
02325 spot = dfa->state_table + (hash & dfa->state_hash_mask);
02326
02327 for (i = 0 ; i < spot->num ; i++)
02328 {
02329 re_dfastate_t *state = spot->array[i];
02330 if (hash != state->hash)
02331 continue;
02332 if (re_node_set_compare (&state->nodes, nodes))
02333 return state;
02334 }
02335
02336
02337 new_state = create_ci_newstate (dfa, nodes, hash);
02338 if (BE (new_state == NULL, 0))
02339 *err = REG_ESPACE;
02340
02341 return new_state;
02342 }
02343
02344
02345
02346
02347
02348
02349
02350
02351
02352
02353
02354 static re_dfastate_t *
02355 internal_function
02356 re_acquire_state_context (reg_errcode_t *err, const re_dfa_t *dfa,
02357 const re_node_set *nodes, unsigned int context)
02358 {
02359 unsigned int hash;
02360 re_dfastate_t *new_state;
02361 struct re_state_table_entry *spot;
02362 int i;
02363 if (nodes->nelem == 0)
02364 {
02365 *err = REG_NOERROR;
02366 return NULL;
02367 }
02368 hash = calc_state_hash (nodes, context);
02369 spot = dfa->state_table + (hash & dfa->state_hash_mask);
02370
02371 for (i = 0 ; i < spot->num ; i++)
02372 {
02373 re_dfastate_t *state = spot->array[i];
02374 if (state->hash == hash
02375 && state->context == context
02376 && re_node_set_compare (state->entrance_nodes, nodes))
02377 return state;
02378 }
02379
02380 new_state = create_cd_newstate (dfa, nodes, context, hash);
02381 if (BE (new_state == NULL, 0))
02382 *err = REG_ESPACE;
02383
02384 return new_state;
02385 }
02386
02387
02388
02389
02390
02391 static reg_errcode_t
02392 register_state (const re_dfa_t *dfa, re_dfastate_t *newstate,
02393 unsigned int hash)
02394 {
02395 struct re_state_table_entry *spot;
02396 reg_errcode_t err;
02397 int i;
02398
02399 newstate->hash = hash;
02400 err = re_node_set_alloc (&newstate->non_eps_nodes, newstate->nodes.nelem);
02401 if (BE (err != REG_NOERROR, 0))
02402 return REG_ESPACE;
02403 for (i = 0; i < newstate->nodes.nelem; i++)
02404 {
02405 int elem = newstate->nodes.elems[i];
02406 if (!IS_EPSILON_NODE (dfa->nodes[elem].type))
02407 re_node_set_insert_last (&newstate->non_eps_nodes, elem);
02408 }
02409
02410 spot = dfa->state_table + (hash & dfa->state_hash_mask);
02411 if (BE (spot->alloc <= spot->num, 0))
02412 {
02413 int new_alloc = 2 * spot->num + 2;
02414 re_dfastate_t **new_array = re_realloc (spot->array, re_dfastate_t *,
02415 new_alloc);
02416 if (BE (new_array == NULL, 0))
02417 return REG_ESPACE;
02418 spot->array = new_array;
02419 spot->alloc = new_alloc;
02420 }
02421 spot->array[spot->num++] = newstate;
02422 return REG_NOERROR;
02423 }
02424
02425 static void
02426 free_state (re_dfastate_t *state)
02427 {
02428 re_node_set_free (&state->non_eps_nodes);
02429 re_node_set_free (&state->inveclosure);
02430 if (state->entrance_nodes != &state->nodes)
02431 {
02432 re_node_set_free (state->entrance_nodes);
02433 re_free (state->entrance_nodes);
02434 }
02435 re_node_set_free (&state->nodes);
02436 re_free (state->word_trtable);
02437 re_free (state->trtable);
02438 re_free (state);
02439 }
02440
02441
02442
02443
02444 static re_dfastate_t *
02445 internal_function
02446 create_ci_newstate (const re_dfa_t *dfa, const re_node_set *nodes,
02447 unsigned int hash)
02448 {
02449 int i;
02450 reg_errcode_t err;
02451 re_dfastate_t *newstate;
02452
02453 newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1);
02454 if (BE (newstate == NULL, 0))
02455 return NULL;
02456 err = re_node_set_init_copy (&newstate->nodes, nodes);
02457 if (BE (err != REG_NOERROR, 0))
02458 {
02459 re_free (newstate);
02460 return NULL;
02461 }
02462
02463 newstate->entrance_nodes = &newstate->nodes;
02464 for (i = 0 ; i < nodes->nelem ; i++)
02465 {
02466 re_token_t *node = dfa->nodes + nodes->elems[i];
02467 re_token_type_t type = node->type;
02468 if (type == CHARACTER && !node->constraint)
02469 continue;
02470 #ifdef RE_ENABLE_I18N
02471 newstate->accept_mb |= node->accept_mb;
02472 #endif
02473
02474
02475 if (type == END_OF_RE)
02476 newstate->halt = 1;
02477 else if (type == OP_BACK_REF)
02478 newstate->has_backref = 1;
02479 else if (type == ANCHOR || node->constraint)
02480 newstate->has_constraint = 1;
02481 }
02482 err = register_state (dfa, newstate, hash);
02483 if (BE (err != REG_NOERROR, 0))
02484 {
02485 free_state (newstate);
02486 newstate = NULL;
02487 }
02488 return newstate;
02489 }
02490
02491
02492
02493
02494 static re_dfastate_t *
02495 internal_function
02496 create_cd_newstate (const re_dfa_t *dfa, const re_node_set *nodes,
02497 unsigned int context, unsigned int hash)
02498 {
02499 int i, nctx_nodes = 0;
02500 reg_errcode_t err;
02501 re_dfastate_t *newstate;
02502
02503 newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1);
02504 if (BE (newstate == NULL, 0))
02505 return NULL;
02506 err = re_node_set_init_copy (&newstate->nodes, nodes);
02507 if (BE (err != REG_NOERROR, 0))
02508 {
02509 re_free (newstate);
02510 return NULL;
02511 }
02512
02513 newstate->context = context;
02514 newstate->entrance_nodes = &newstate->nodes;
02515
02516 for (i = 0 ; i < nodes->nelem ; i++)
02517 {
02518 unsigned int constraint = 0;
02519 re_token_t *node = dfa->nodes + nodes->elems[i];
02520 re_token_type_t type = node->type;
02521 if (node->constraint)
02522 constraint = node->constraint;
02523
02524 if (type == CHARACTER && !constraint)
02525 continue;
02526 #ifdef RE_ENABLE_I18N
02527 newstate->accept_mb |= node->accept_mb;
02528 #endif
02529
02530
02531 if (type == END_OF_RE)
02532 newstate->halt = 1;
02533 else if (type == OP_BACK_REF)
02534 newstate->has_backref = 1;
02535 else if (type == ANCHOR)
02536 constraint = node->opr.ctx_type;
02537
02538 if (constraint)
02539 {
02540 if (newstate->entrance_nodes == &newstate->nodes)
02541 {
02542 newstate->entrance_nodes = re_malloc (re_node_set, 1);
02543 if (BE (newstate->entrance_nodes == NULL, 0))
02544 {
02545 free_state (newstate);
02546 return NULL;
02547 }
02548 re_node_set_init_copy (newstate->entrance_nodes, nodes);
02549 nctx_nodes = 0;
02550 newstate->has_constraint = 1;
02551 }
02552
02553 if (NOT_SATISFY_PREV_CONSTRAINT (constraint,context))
02554 {
02555 re_node_set_remove_at (&newstate->nodes, i - nctx_nodes);
02556 ++nctx_nodes;
02557 }
02558 }
02559 }
02560 err = register_state (dfa, newstate, hash);
02561 if (BE (err != REG_NOERROR, 0))
02562 {
02563 free_state (newstate);
02564 newstate = NULL;
02565 }
02566 return newstate;
02567 }
02568
02569
02570
02571
02572
02573
02574
02575
02576
02577
02578
02579
02580
02581
02582
02583
02584
02585
02586
02587
02588
02589
02590
02591
02592
02593
02594
02595
02596 static reg_errcode_t re_compile_internal (regex_t *preg, const char * pattern,
02597 size_t length, reg_syntax_t syntax);
02598 static void re_compile_fastmap_iter (regex_t *bufp,
02599 const re_dfastate_t *init_state,
02600 char *fastmap);
02601 static reg_errcode_t init_dfa (re_dfa_t *dfa, size_t pat_len);
02602 #ifdef RE_ENABLE_I18N
02603 static void free_charset (re_charset_t *cset);
02604 #endif
02605 static void free_workarea_compile (regex_t *preg);
02606 static reg_errcode_t create_initial_state (re_dfa_t *dfa);
02607 #ifdef RE_ENABLE_I18N
02608 static void optimize_utf8 (re_dfa_t *dfa);
02609 #endif
02610 static reg_errcode_t analyze (regex_t *preg);
02611 static reg_errcode_t preorder (bin_tree_t *root,
02612 reg_errcode_t (fn (void *, bin_tree_t *)),
02613 void *extra);
02614 static reg_errcode_t postorder (bin_tree_t *root,
02615 reg_errcode_t (fn (void *, bin_tree_t *)),
02616 void *extra);
02617 static reg_errcode_t optimize_subexps (void *extra, bin_tree_t *node);
02618 static reg_errcode_t lower_subexps (void *extra, bin_tree_t *node);
02619 static bin_tree_t *lower_subexp (reg_errcode_t *err, regex_t *preg,
02620 bin_tree_t *node);
02621 static reg_errcode_t calc_first (void *extra, bin_tree_t *node);
02622 static reg_errcode_t calc_next (void *extra, bin_tree_t *node);
02623 static reg_errcode_t link_nfa_nodes (void *extra, bin_tree_t *node);
02624 static int duplicate_node (re_dfa_t *dfa, int org_idx, unsigned int constraint);
02625 static int search_duplicated_node (const re_dfa_t *dfa, int org_node,
02626 unsigned int constraint);
02627 static reg_errcode_t calc_eclosure (re_dfa_t *dfa);
02628 static reg_errcode_t calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa,
02629 int node, int root);
02630 static reg_errcode_t calc_inveclosure (re_dfa_t *dfa);
02631 static int fetch_number (re_string_t *input, re_token_t *token,
02632 reg_syntax_t syntax);
02633 static int peek_token (re_token_t *token, re_string_t *input,
02634 reg_syntax_t syntax) internal_function;
02635 static bin_tree_t *parse (re_string_t *regexp, regex_t *preg,
02636 reg_syntax_t syntax, reg_errcode_t *err);
02637 static bin_tree_t *parse_reg_exp (re_string_t *regexp, regex_t *preg,
02638 re_token_t *token, reg_syntax_t syntax,
02639 int nest, reg_errcode_t *err);
02640 static bin_tree_t *parse_branch (re_string_t *regexp, regex_t *preg,
02641 re_token_t *token, reg_syntax_t syntax,
02642 int nest, reg_errcode_t *err);
02643 static bin_tree_t *parse_expression (re_string_t *regexp, regex_t *preg,
02644 re_token_t *token, reg_syntax_t syntax,
02645 int nest, reg_errcode_t *err);
02646 static bin_tree_t *parse_sub_exp (re_string_t *regexp, regex_t *preg,
02647 re_token_t *token, reg_syntax_t syntax,
02648 int nest, reg_errcode_t *err);
02649 static bin_tree_t *parse_dup_op (bin_tree_t *dup_elem, re_string_t *regexp,
02650 re_dfa_t *dfa, re_token_t *token,
02651 reg_syntax_t syntax, reg_errcode_t *err);
02652 static bin_tree_t *parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa,
02653 re_token_t *token, reg_syntax_t syntax,
02654 reg_errcode_t *err);
02655 static reg_errcode_t parse_bracket_element (bracket_elem_t *elem,
02656 re_string_t *regexp,
02657 re_token_t *token, int token_len,
02658 re_dfa_t *dfa,
02659 reg_syntax_t syntax,
02660 int accept_hyphen);
02661 static reg_errcode_t parse_bracket_symbol (bracket_elem_t *elem,
02662 re_string_t *regexp,
02663 re_token_t *token);
02664 #ifdef RE_ENABLE_I18N
02665 static reg_errcode_t build_equiv_class (bitset_t sbcset,
02666 re_charset_t *mbcset,
02667 int *equiv_class_alloc,
02668 const unsigned char *name);
02669 static reg_errcode_t build_charclass (RE_TRANSLATE_TYPE trans,
02670 bitset_t sbcset,
02671 re_charset_t *mbcset,
02672 int *char_class_alloc,
02673 const unsigned char *class_name,
02674 reg_syntax_t syntax);
02675 #else
02676 static reg_errcode_t build_equiv_class (bitset_t sbcset,
02677 const unsigned char *name);
02678 static reg_errcode_t build_charclass (RE_TRANSLATE_TYPE trans,
02679 bitset_t sbcset,
02680 const unsigned char *class_name,
02681 reg_syntax_t syntax);
02682 #endif
02683 static bin_tree_t *build_charclass_op (re_dfa_t *dfa,
02684 RE_TRANSLATE_TYPE trans,
02685 const unsigned char *class_name,
02686 const unsigned char *extra,
02687 int non_match, reg_errcode_t *err);
02688 static bin_tree_t *create_tree (re_dfa_t *dfa,
02689 bin_tree_t *left, bin_tree_t *right,
02690 re_token_type_t type);
02691 static bin_tree_t *create_token_tree (re_dfa_t *dfa,
02692 bin_tree_t *left, bin_tree_t *right,
02693 const re_token_t *token);
02694 static bin_tree_t *duplicate_tree (const bin_tree_t *src, re_dfa_t *dfa);
02695 static void free_token (re_token_t *node);
02696 static reg_errcode_t free_tree (void *extra, bin_tree_t *node);
02697 static reg_errcode_t mark_opt_subexp (void *extra, bin_tree_t *node);
02698
02699
02700
02701
02702
02703
02704 const char __re_error_msgid[] attribute_hidden =
02705 {
02706 #define REG_NOERROR_IDX 0
02707 gettext_noop ("Success")
02708 "\0"
02709 #define REG_NOMATCH_IDX (REG_NOERROR_IDX + sizeof "Success")
02710 gettext_noop ("No match")
02711 "\0"
02712 #define REG_BADPAT_IDX (REG_NOMATCH_IDX + sizeof "No match")
02713 gettext_noop ("Invalid regular expression")
02714 "\0"
02715 #define REG_ECOLLATE_IDX (REG_BADPAT_IDX + sizeof "Invalid regular expression")
02716 gettext_noop ("Invalid collation character")
02717 "\0"
02718 #define REG_ECTYPE_IDX (REG_ECOLLATE_IDX + sizeof "Invalid collation character")
02719 gettext_noop ("Invalid character class name")
02720 "\0"
02721 #define REG_EESCAPE_IDX (REG_ECTYPE_IDX + sizeof "Invalid character class name")
02722 gettext_noop ("Trailing backslash")
02723 "\0"
02724 #define REG_ESUBREG_IDX (REG_EESCAPE_IDX + sizeof "Trailing backslash")
02725 gettext_noop ("Invalid back reference")
02726 "\0"
02727 #define REG_EBRACK_IDX (REG_ESUBREG_IDX + sizeof "Invalid back reference")
02728 gettext_noop ("Unmatched [ or [^")
02729 "\0"
02730 #define REG_EPAREN_IDX (REG_EBRACK_IDX + sizeof "Unmatched [ or [^")
02731 gettext_noop ("Unmatched ( or \\(")
02732 "\0"
02733 #define REG_EBRACE_IDX (REG_EPAREN_IDX + sizeof "Unmatched ( or \\(")
02734 gettext_noop ("Unmatched \\{")
02735 "\0"
02736 #define REG_BADBR_IDX (REG_EBRACE_IDX + sizeof "Unmatched \\{")
02737 gettext_noop ("Invalid content of \\{\\}")
02738 "\0"
02739 #define REG_ERANGE_IDX (REG_BADBR_IDX + sizeof "Invalid content of \\{\\}")
02740 gettext_noop ("Invalid range end")
02741 "\0"
02742 #define REG_ESPACE_IDX (REG_ERANGE_IDX + sizeof "Invalid range end")
02743 gettext_noop ("Memory exhausted")
02744 "\0"
02745 #define REG_BADRPT_IDX (REG_ESPACE_IDX + sizeof "Memory exhausted")
02746 gettext_noop ("Invalid preceding regular expression")
02747 "\0"
02748 #define REG_EEND_IDX (REG_BADRPT_IDX + sizeof "Invalid preceding regular expression")
02749 gettext_noop ("Premature end of regular expression")
02750 "\0"
02751 #define REG_ESIZE_IDX (REG_EEND_IDX + sizeof "Premature end of regular expression")
02752 gettext_noop ("Regular expression too big")
02753 "\0"
02754 #define REG_ERPAREN_IDX (REG_ESIZE_IDX + sizeof "Regular expression too big")
02755 gettext_noop ("Unmatched ) or \\)")
02756 };
02757
02758 const size_t __re_error_msgid_idx[] attribute_hidden =
02759 {
02760 REG_NOERROR_IDX,
02761 REG_NOMATCH_IDX,
02762 REG_BADPAT_IDX,
02763 REG_ECOLLATE_IDX,
02764 REG_ECTYPE_IDX,
02765 REG_EESCAPE_IDX,
02766 REG_ESUBREG_IDX,
02767 REG_EBRACK_IDX,
02768 REG_EPAREN_IDX,
02769 REG_EBRACE_IDX,
02770 REG_BADBR_IDX,
02771 REG_ERANGE_IDX,
02772 REG_ESPACE_IDX,
02773 REG_BADRPT_IDX,
02774 REG_EEND_IDX,
02775 REG_ESIZE_IDX,
02776 REG_ERPAREN_IDX
02777 };
02778
02779
02780
02781
02782
02783
02784
02785
02786
02787
02788 const char *
02789 re_compile_pattern (pattern, length, bufp)
02790 const char *pattern;
02791 size_t length;
02792 struct re_pattern_buffer *bufp;
02793 {
02794 reg_errcode_t ret;
02795
02796
02797
02798
02799 bufp->no_sub = !!(re_syntax_options & RE_NO_SUB);
02800
02801
02802 bufp->newline_anchor = 1;
02803
02804 ret = re_compile_internal (bufp, pattern, length, re_syntax_options);
02805
02806 if (!ret)
02807 return NULL;
02808 return gettext (__re_error_msgid + __re_error_msgid_idx[(int) ret]);
02809 }
02810 #ifdef _LIBC
02811 weak_alias (__re_compile_pattern, re_compile_pattern)
02812 #endif
02813
02814
02815
02816
02817
02818
02819 reg_syntax_t re_syntax_options;
02820
02821
02822
02823
02824
02825
02826
02827
02828
02829 reg_syntax_t
02830 re_set_syntax (syntax)
02831 reg_syntax_t syntax;
02832 {
02833 reg_syntax_t ret = re_syntax_options;
02834
02835 re_syntax_options = syntax;
02836 return ret;
02837 }
02838 #ifdef _LIBC
02839 weak_alias (__re_set_syntax, re_set_syntax)
02840 #endif
02841
02842 int
02843 re_compile_fastmap (bufp)
02844 struct re_pattern_buffer *bufp;
02845 {
02846 re_dfa_t *dfa = (re_dfa_t *) bufp->buffer;
02847 char *fastmap = bufp->fastmap;
02848
02849 memset (fastmap, '\0', sizeof (char) * SBC_MAX);
02850 re_compile_fastmap_iter (bufp, dfa->init_state, fastmap);
02851 if (dfa->init_state != dfa->init_state_word)
02852 re_compile_fastmap_iter (bufp, dfa->init_state_word, fastmap);
02853 if (dfa->init_state != dfa->init_state_nl)
02854 re_compile_fastmap_iter (bufp, dfa->init_state_nl, fastmap);
02855 if (dfa->init_state != dfa->init_state_begbuf)
02856 re_compile_fastmap_iter (bufp, dfa->init_state_begbuf, fastmap);
02857 bufp->fastmap_accurate = 1;
02858 return 0;
02859 }
02860 #ifdef _LIBC
02861 weak_alias (__re_compile_fastmap, re_compile_fastmap)
02862 #endif
02863
02864 static inline void
02865 __attribute ((always_inline))
02866 re_set_fastmap (char *fastmap, int icase, int ch)
02867 {
02868 fastmap[ch] = 1;
02869 if (icase)
02870 fastmap[tolower (ch)] = 1;
02871 }
02872
02873
02874
02875
02876 static void
02877 re_compile_fastmap_iter (regex_t *bufp, const re_dfastate_t *init_state,
02878 char *fastmap)
02879 {
02880 re_dfa_t *dfa = (re_dfa_t *) bufp->buffer;
02881 int node_cnt;
02882 int icase = (dfa->mb_cur_max == 1 && (bufp->syntax & RE_ICASE));
02883 for (node_cnt = 0; node_cnt < init_state->nodes.nelem; ++node_cnt)
02884 {
02885 int node = init_state->nodes.elems[node_cnt];
02886 re_token_type_t type = dfa->nodes[node].type;
02887
02888 if (type == CHARACTER)
02889 {
02890 re_set_fastmap (fastmap, icase, dfa->nodes[node].opr.c);
02891 #ifdef RE_ENABLE_I18N
02892 if ((bufp->syntax & RE_ICASE) && dfa->mb_cur_max > 1)
02893 {
02894 unsigned char *buf = alloca (dfa->mb_cur_max), *p;
02895 wchar_t wc;
02896 mbstate_t state;
02897
02898 p = buf;
02899 *p++ = dfa->nodes[node].opr.c;
02900 while (++node < dfa->nodes_len
02901 && dfa->nodes[node].type == CHARACTER
02902 && dfa->nodes[node].mb_partial)
02903 *p++ = dfa->nodes[node].opr.c;
02904 memset (&state, '\0', sizeof (state));
02905 if (mbrtowc (&wc, (const char *) buf, p - buf,
02906 &state) == p - buf
02907 && (__wcrtomb ((char *) buf, towlower (wc), &state)
02908 != (size_t) -1))
02909 re_set_fastmap (fastmap, 0, buf[0]);
02910 }
02911 #endif
02912 }
02913 else if (type == SIMPLE_BRACKET)
02914 {
02915 int i, ch;
02916 for (i = 0, ch = 0; i < BITSET_WORDS; ++i)
02917 {
02918 int j;
02919 bitset_word_t w = dfa->nodes[node].opr.sbcset[i];
02920 for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch)
02921 if (w & ((bitset_word_t) 1 << j))
02922 re_set_fastmap (fastmap, icase, ch);
02923 }
02924 }
02925 #ifdef RE_ENABLE_I18N
02926 else if (type == COMPLEX_BRACKET)
02927 {
02928 int i;
02929 re_charset_t *cset = dfa->nodes[node].opr.mbcset;
02930 if (cset->non_match || cset->ncoll_syms || cset->nequiv_classes
02931 || cset->nranges || cset->nchar_classes)
02932 {
02933 # ifdef _LIBC
02934 if (_NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES) != 0)
02935 {
02936
02937
02938
02939
02940
02941
02942 const int32_t *table = (const int32_t *)
02943 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
02944 for (i = 0; i < SBC_MAX; ++i)
02945 if (table[i] < 0)
02946 re_set_fastmap (fastmap, icase, i);
02947 }
02948 # else
02949 if (dfa->mb_cur_max > 1)
02950 for (i = 0; i < SBC_MAX; ++i)
02951 if (__btowc (i) == WEOF)
02952 re_set_fastmap (fastmap, icase, i);
02953 # endif
02954 }
02955 for (i = 0; i < cset->nmbchars; ++i)
02956 {
02957 char buf[256];
02958 mbstate_t state;
02959 memset (&state, '\0', sizeof (state));
02960 if (__wcrtomb (buf, cset->mbchars[i], &state) != (size_t) -1)
02961 re_set_fastmap (fastmap, icase, *(unsigned char *) buf);
02962 if ((bufp->syntax & RE_ICASE) && dfa->mb_cur_max > 1)
02963 {
02964 if (__wcrtomb (buf, towlower (cset->mbchars[i]), &state)
02965 != (size_t) -1)
02966 re_set_fastmap (fastmap, 0, *(unsigned char *) buf);
02967 }
02968 }
02969 }
02970 #endif
02971 else if (type == OP_PERIOD
02972 #ifdef RE_ENABLE_I18N
02973 || type == OP_UTF8_PERIOD
02974 #endif
02975 || type == END_OF_RE)
02976 {
02977 memset (fastmap, '\1', sizeof (char) * SBC_MAX);
02978 if (type == END_OF_RE)
02979 bufp->can_be_null = 1;
02980 return;
02981 }
02982 }
02983 }
02984
02985
02986
02987
02988
02989
02990
02991
02992
02993
02994
02995
02996
02997
02998
02999
03000
03001
03002
03003
03004
03005
03006
03007
03008
03009
03010
03011
03012
03013
03014
03015
03016
03017
03018
03019
03020
03021 int
03022 regcomp (preg, pattern, cflags)
03023 regex_t *__restrict preg;
03024 const char *__restrict pattern;
03025 int cflags;
03026 {
03027 reg_errcode_t ret;
03028 reg_syntax_t syntax = ((cflags & REG_EXTENDED) ? RE_SYNTAX_POSIX_EXTENDED
03029 : RE_SYNTAX_POSIX_BASIC);
03030
03031 preg->buffer = NULL;
03032 preg->allocated = 0;
03033 preg->used = 0;
03034
03035
03036 preg->fastmap = re_malloc (char, SBC_MAX);
03037 if (BE (preg->fastmap == NULL, 0))
03038 return REG_ESPACE;
03039
03040 syntax |= (cflags & REG_ICASE) ? RE_ICASE : 0;
03041
03042
03043 if (cflags & REG_NEWLINE)
03044 {
03045 syntax &= ~RE_DOT_NEWLINE;
03046 syntax |= RE_HAT_LISTS_NOT_NEWLINE;
03047
03048 preg->newline_anchor = 1;
03049 }
03050 else
03051 preg->newline_anchor = 0;
03052 preg->no_sub = !!(cflags & REG_NOSUB);
03053 preg->translate = NULL;
03054
03055 ret = re_compile_internal (preg, pattern, strlen (pattern), syntax);
03056
03057
03058
03059 if (ret == REG_ERPAREN)
03060 ret = REG_EPAREN;
03061
03062
03063 if (BE (ret == REG_NOERROR, 1))
03064
03065
03066 (void) re_compile_fastmap (preg);
03067 else
03068 {
03069
03070 re_free (preg->fastmap);
03071 preg->fastmap = NULL;
03072 }
03073
03074 return (int) ret;
03075 }
03076 #ifdef _LIBC
03077 weak_alias (__regcomp, regcomp)
03078 #endif
03079
03080
03081
03082
03083
03084 size_t
03085 regerror (
03086 int errcode,
03087 const regex_t *__restrict preg,
03088 char *__restrict errbuf,
03089 size_t errbuf_size)
03090 {
03091 const char *msg;
03092 size_t msg_size;
03093
03094 if (BE (errcode < 0
03095 || errcode >= (int) (sizeof (__re_error_msgid_idx)
03096 / sizeof (__re_error_msgid_idx[0])), 0))
03097
03098
03099
03100
03101 abort ();
03102
03103 msg = gettext (__re_error_msgid + __re_error_msgid_idx[errcode]);
03104
03105 msg_size = strlen (msg) + 1;
03106
03107 if (BE (errbuf_size != 0, 1))
03108 {
03109 if (BE (msg_size > errbuf_size, 0))
03110 {
03111 #if defined HAVE_MEMPCPY || defined _LIBC
03112 *((char *) __mempcpy (errbuf, msg, errbuf_size - 1)) = '\0';
03113 #else
03114 memcpy (errbuf, msg, errbuf_size - 1);
03115 errbuf[errbuf_size - 1] = 0;
03116 #endif
03117 }
03118 else
03119 memcpy (errbuf, msg, msg_size);
03120 }
03121
03122 return msg_size;
03123 }
03124 #ifdef _LIBC
03125 weak_alias (__regerror, regerror)
03126 #endif
03127
03128
03129 #ifdef RE_ENABLE_I18N
03130
03131
03132
03133
03134 static const bitset_t utf8_sb_map =
03135 {
03136
03137 [0 ... 0x80 / BITSET_WORD_BITS - 1] = BITSET_WORD_MAX
03138 };
03139 #endif
03140
03141
03142 static void
03143 free_dfa_content (re_dfa_t *dfa)
03144 {
03145 int i, j;
03146
03147 if (dfa->nodes)
03148 for (i = 0; i < dfa->nodes_len; ++i)
03149 free_token (dfa->nodes + i);
03150 re_free (dfa->nexts);
03151 for (i = 0; i < dfa->nodes_len; ++i)
03152 {
03153 if (dfa->eclosures != NULL)
03154 re_node_set_free (dfa->eclosures + i);
03155 if (dfa->inveclosures != NULL)
03156 re_node_set_free (dfa->inveclosures + i);
03157 if (dfa->edests != NULL)
03158 re_node_set_free (dfa->edests + i);
03159 }
03160 re_free (dfa->edests);
03161 re_free (dfa->eclosures);
03162 re_free (dfa->inveclosures);
03163 re_free (dfa->nodes);
03164
03165 if (dfa->state_table)
03166 for (i = 0; i <= dfa->state_hash_mask; ++i)
03167 {
03168 struct re_state_table_entry *entry = dfa->state_table + i;
03169 for (j = 0; j < entry->num; ++j)
03170 {
03171 re_dfastate_t *state = entry->array[j];
03172 free_state (state);
03173 }
03174 re_free (entry->array);
03175 }
03176 re_free (dfa->state_table);
03177 #ifdef RE_ENABLE_I18N
03178 if (dfa->sb_char != utf8_sb_map)
03179 re_free (dfa->sb_char);
03180 #endif
03181 re_free (dfa->subexp_map);
03182 #ifdef DEBUG
03183 re_free (dfa->re_str);
03184 #endif
03185
03186 re_free (dfa);
03187 }
03188
03189
03190
03191
03192 void
03193 regfree (preg)
03194 regex_t *preg;
03195 {
03196 re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
03197 if (BE (dfa != NULL, 1))
03198 free_dfa_content (dfa);
03199 preg->buffer = NULL;
03200 preg->allocated = 0;
03201
03202 re_free (preg->fastmap);
03203 preg->fastmap = NULL;
03204
03205 re_free (preg->translate);
03206 preg->translate = NULL;
03207 }
03208 #ifdef _LIBC
03209 weak_alias (__regfree, regfree)
03210 #endif
03211
03212
03213
03214
03215 #if defined _REGEX_RE_COMP || defined _LIBC
03216
03217
03218 static struct re_pattern_buffer re_comp_buf;
03219
03220 char *
03221 # ifdef _LIBC
03222
03223
03224
03225 weak_function
03226 # endif
03227 re_comp (s)
03228 const char *s;
03229 {
03230 reg_errcode_t ret;
03231 char *fastmap;
03232
03233 if (!s)
03234 {
03235 if (!re_comp_buf.buffer)
03236 return gettext ("No previous regular expression");
03237 return 0;
03238 }
03239
03240 if (re_comp_buf.buffer)
03241 {
03242 fastmap = re_comp_buf.fastmap;
03243 re_comp_buf.fastmap = NULL;
03244 __regfree (&re_comp_buf);
03245 memset (&re_comp_buf, '\0', sizeof (re_comp_buf));
03246 re_comp_buf.fastmap = fastmap;
03247 }
03248
03249 if (re_comp_buf.fastmap == NULL)
03250 {
03251 re_comp_buf.fastmap = (char *) malloc (SBC_MAX);
03252 if (re_comp_buf.fastmap == NULL)
03253 return (char *) gettext (__re_error_msgid
03254 + __re_error_msgid_idx[(int) REG_ESPACE]);
03255 }
03256
03257
03258
03259
03260
03261 re_comp_buf.newline_anchor = 1;
03262
03263 ret = re_compile_internal (&re_comp_buf, s, strlen (s), re_syntax_options);
03264
03265 if (!ret)
03266 return NULL;
03267
03268
03269 return (char *) gettext (__re_error_msgid + __re_error_msgid_idx[(int) ret]);
03270 }
03271
03272 #ifdef _LIBC
03273 libc_freeres_fn (free_mem)
03274 {
03275 __regfree (&re_comp_buf);
03276 }
03277 #endif
03278
03279 #endif
03280
03281
03282
03283
03284
03285 static reg_errcode_t
03286 re_compile_internal (regex_t *preg, const char * pattern, size_t length,
03287 reg_syntax_t syntax)
03288 {
03289 reg_errcode_t err = REG_NOERROR;
03290 re_dfa_t *dfa;
03291 re_string_t regexp;
03292
03293
03294 preg->fastmap_accurate = 0;
03295 preg->syntax = syntax;
03296 preg->not_bol = preg->not_eol = 0;
03297 preg->used = 0;
03298 preg->re_nsub = 0;
03299 preg->can_be_null = 0;
03300 preg->regs_allocated = REGS_UNALLOCATED;
03301
03302
03303 dfa = (re_dfa_t *) preg->buffer;
03304 if (BE (preg->allocated < sizeof (re_dfa_t), 0))
03305 {
03306
03307
03308
03309
03310 dfa = re_realloc (preg->buffer, re_dfa_t, 1);
03311 if (dfa == NULL)
03312 return REG_ESPACE;
03313 preg->allocated = sizeof (re_dfa_t);
03314 preg->buffer = (unsigned char *) dfa;
03315 }
03316 preg->used = sizeof (re_dfa_t);
03317
03318 err = init_dfa (dfa, length);
03319 if (BE (err != REG_NOERROR, 0))
03320 {
03321 free_dfa_content (dfa);
03322 preg->buffer = NULL;
03323 preg->allocated = 0;
03324 return err;
03325 }
03326 #ifdef DEBUG
03327
03328 dfa->re_str = re_malloc (char, length + 1);
03329 strncpy (dfa->re_str, pattern, length + 1);
03330 #endif
03331
03332 __libc_lock_init (dfa->lock);
03333
03334 err = re_string_construct (®exp, pattern, length, preg->translate,
03335 syntax & RE_ICASE, dfa);
03336 if (BE (err != REG_NOERROR, 0))
03337 {
03338 re_compile_internal_free_return:
03339 free_workarea_compile (preg);
03340 re_string_destruct (®exp);
03341 free_dfa_content (dfa);
03342 preg->buffer = NULL;
03343 preg->allocated = 0;
03344 return err;
03345 }
03346
03347
03348 preg->re_nsub = 0;
03349 dfa->str_tree = parse (®exp, preg, syntax, &err);
03350 if (BE (dfa->str_tree == NULL, 0))
03351 goto re_compile_internal_free_return;
03352
03353
03354 err = analyze (preg);
03355 if (BE (err != REG_NOERROR, 0))
03356 goto re_compile_internal_free_return;
03357
03358 #ifdef RE_ENABLE_I18N
03359
03360 if (dfa->is_utf8 && !(syntax & RE_ICASE) && preg->translate == NULL)
03361 optimize_utf8 (dfa);
03362 #endif
03363
03364
03365 err = create_initial_state (dfa);
03366
03367
03368 free_workarea_compile (preg);
03369 re_string_destruct (®exp);
03370
03371 if (BE (err != REG_NOERROR, 0))
03372 {
03373 free_dfa_content (dfa);
03374 preg->buffer = NULL;
03375 preg->allocated = 0;
03376 }
03377
03378 return err;
03379 }
03380
03381
03382
03383
03384 static reg_errcode_t
03385 init_dfa (re_dfa_t *dfa, size_t pat_len)
03386 {
03387 unsigned int table_size;
03388 #ifndef _LIBC
03389 char *codeset_name;
03390 #endif
03391
03392 memset (dfa, '\0', sizeof (re_dfa_t));
03393
03394
03395 dfa->str_tree_storage_idx = BIN_TREE_STORAGE_SIZE;
03396
03397
03398 if (pat_len == SIZE_MAX)
03399 return REG_ESPACE;
03400
03401 dfa->nodes_alloc = pat_len + 1;
03402 dfa->nodes = re_malloc (re_token_t, dfa->nodes_alloc);
03403
03404
03405 for (table_size = 1; ; table_size <<= 1)
03406 if (table_size > pat_len)
03407 break;
03408
03409 dfa->state_table = calloc (sizeof (struct re_state_table_entry), table_size);
03410 dfa->state_hash_mask = table_size - 1;
03411
03412 dfa->mb_cur_max = MB_CUR_MAX;
03413 #ifdef _LIBC
03414 if (dfa->mb_cur_max == 6
03415 && strcmp (_NL_CURRENT (LC_CTYPE, _NL_CTYPE_CODESET_NAME), "UTF-8") == 0)
03416 dfa->is_utf8 = 1;
03417 dfa->map_notascii = (_NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_MAP_TO_NONASCII)
03418 != 0);
03419 #else
03420 # ifdef HAVE_LANGINFO_CODESET
03421 codeset_name = nl_langinfo (CODESET);
03422 # else
03423 codeset_name = getenv ("LC_ALL");
03424 if (codeset_name == NULL || codeset_name[0] == '\0')
03425 codeset_name = getenv ("LC_CTYPE");
03426 if (codeset_name == NULL || codeset_name[0] == '\0')
03427 codeset_name = getenv ("LANG");
03428 if (codeset_name == NULL)
03429 codeset_name = "";
03430 else if (strchr (codeset_name, '.') != NULL)
03431 codeset_name = strchr (codeset_name, '.') + 1;
03432 # endif
03433
03434 if (strcasecmp (codeset_name, "UTF-8") == 0
03435 || strcasecmp (codeset_name, "UTF8") == 0)
03436 dfa->is_utf8 = 1;
03437
03438
03439
03440 dfa->map_notascii = 0;
03441 #endif
03442
03443 #ifdef RE_ENABLE_I18N
03444 if (dfa->mb_cur_max > 1)
03445 {
03446 if (dfa->is_utf8)
03447 dfa->sb_char = (re_bitset_ptr_t) utf8_sb_map;
03448 else
03449 {
03450 int i, j, ch;
03451
03452 dfa->sb_char = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1);
03453 if (BE (dfa->sb_char == NULL, 0))
03454 return REG_ESPACE;
03455
03456
03457 for (i = 0, ch = 0; i < BITSET_WORDS; ++i)
03458 for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch)
03459 {
03460 wint_t wch = __btowc (ch);
03461 if (wch != WEOF)
03462 dfa->sb_char[i] |= (bitset_word_t) 1 << j;
03463 # ifndef _LIBC
03464 if (isascii (ch) && wch != ch)
03465 dfa->map_notascii = 1;
03466 # endif
03467 }
03468 }
03469 }
03470 #endif
03471
03472 if (BE (dfa->nodes == NULL || dfa->state_table == NULL, 0))
03473 return REG_ESPACE;
03474 return REG_NOERROR;
03475 }
03476
03477
03478
03479
03480
03481 static void
03482 internal_function
03483 init_word_char (re_dfa_t *dfa)
03484 {
03485 int i, j, ch;
03486 dfa->word_ops_used = 1;
03487 for (i = 0, ch = 0; i < BITSET_WORDS; ++i)
03488 for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch)
03489 if (isalnum (ch) || ch == '_')
03490 dfa->word_char[i] |= (bitset_word_t) 1 << j;
03491 }
03492
03493
03494
03495 static void
03496 free_workarea_compile (regex_t *preg)
03497 {
03498 re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
03499 bin_tree_storage_t *storage, *next;
03500 for (storage = dfa->str_tree_storage; storage; storage = next)
03501 {
03502 next = storage->next;
03503 re_free (storage);
03504 }
03505 dfa->str_tree_storage = NULL;
03506 dfa->str_tree_storage_idx = BIN_TREE_STORAGE_SIZE;
03507 dfa->str_tree = NULL;
03508 re_free (dfa->org_indices);
03509 dfa->org_indices = NULL;
03510 }
03511
03512
03513
03514 static reg_errcode_t
03515 create_initial_state (re_dfa_t *dfa)
03516 {
03517 int first, i;
03518 reg_errcode_t err;
03519 re_node_set init_nodes;
03520
03521
03522
03523 first = dfa->str_tree->first->node_idx;
03524 dfa->init_node = first;
03525 err = re_node_set_init_copy (&init_nodes, dfa->eclosures + first);
03526 if (BE (err != REG_NOERROR, 0))
03527 return err;
03528
03529
03530
03531
03532
03533 if (dfa->nbackref > 0)
03534 for (i = 0; i < init_nodes.nelem; ++i)
03535 {
03536 int node_idx = init_nodes.elems[i];
03537 re_token_type_t type = dfa->nodes[node_idx].type;
03538
03539 int clexp_idx;
03540 if (type != OP_BACK_REF)
03541 continue;
03542 for (clexp_idx = 0; clexp_idx < init_nodes.nelem; ++clexp_idx)
03543 {
03544 re_token_t *clexp_node;
03545 clexp_node = dfa->nodes + init_nodes.elems[clexp_idx];
03546 if (clexp_node->type == OP_CLOSE_SUBEXP
03547 && clexp_node->opr.idx == dfa->nodes[node_idx].opr.idx)
03548 break;
03549 }
03550 if (clexp_idx == init_nodes.nelem)
03551 continue;
03552
03553 if (type == OP_BACK_REF)
03554 {
03555 int dest_idx = dfa->edests[node_idx].elems[0];
03556 if (!re_node_set_contains (&init_nodes, dest_idx))
03557 {
03558 re_node_set_merge (&init_nodes, dfa->eclosures + dest_idx);
03559 i = 0;
03560 }
03561 }
03562 }
03563
03564
03565 dfa->init_state = re_acquire_state_context (&err, dfa, &init_nodes, 0);
03566
03567 if (BE (dfa->init_state == NULL, 0))
03568 return err;
03569 if (dfa->init_state->has_constraint)
03570 {
03571 dfa->init_state_word = re_acquire_state_context (&err, dfa, &init_nodes,
03572 CONTEXT_WORD);
03573 dfa->init_state_nl = re_acquire_state_context (&err, dfa, &init_nodes,
03574 CONTEXT_NEWLINE);
03575 dfa->init_state_begbuf = re_acquire_state_context (&err, dfa,
03576 &init_nodes,
03577 CONTEXT_NEWLINE
03578 | CONTEXT_BEGBUF);
03579 if (BE (dfa->init_state_word == NULL || dfa->init_state_nl == NULL
03580 || dfa->init_state_begbuf == NULL, 0))
03581 return err;
03582 }
03583 else
03584 dfa->init_state_word = dfa->init_state_nl
03585 = dfa->init_state_begbuf = dfa->init_state;
03586
03587 re_node_set_free (&init_nodes);
03588 return REG_NOERROR;
03589 }
03590
03591 #ifdef RE_ENABLE_I18N
03592
03593
03594
03595
03596 static void
03597 optimize_utf8 (re_dfa_t *dfa)
03598 {
03599 int node, i, mb_chars = 0, has_period = 0;
03600
03601 for (node = 0; node < dfa->nodes_len; ++node)
03602 switch (dfa->nodes[node].type)
03603 {
03604 case CHARACTER:
03605 if (dfa->nodes[node].opr.c >= 0x80)
03606 mb_chars = 1;
03607 break;
03608 case ANCHOR:
03609 switch (dfa->nodes[node].opr.idx)
03610 {
03611 case LINE_FIRST:
03612 case LINE_LAST:
03613 case BUF_FIRST:
03614 case BUF_LAST:
03615 break;
03616 default:
03617
03618 return;
03619 }
03620 break;
03621 case OP_PERIOD:
03622 has_period = 1;
03623 break;
03624 case OP_BACK_REF:
03625 case OP_ALT:
03626 case END_OF_RE:
03627 case OP_DUP_ASTERISK:
03628 case OP_OPEN_SUBEXP:
03629 case OP_CLOSE_SUBEXP:
03630 break;
03631 case COMPLEX_BRACKET:
03632 return;
03633 case SIMPLE_BRACKET:
03634
03635 assert (0x80 % BITSET_WORD_BITS == 0);
03636 for (i = 0x80 / BITSET_WORD_BITS; i < BITSET_WORDS; ++i)
03637 if (dfa->nodes[node].opr.sbcset[i])
03638 return;
03639 break;
03640 default:
03641 abort ();
03642 }
03643
03644 if (mb_chars || has_period)
03645 for (node = 0; node < dfa->nodes_len; ++node)
03646 {
03647 if (dfa->nodes[node].type == CHARACTER
03648 && dfa->nodes[node].opr.c >= 0x80)
03649 dfa->nodes[node].mb_partial = 0;
03650 else if (dfa->nodes[node].type == OP_PERIOD)
03651 dfa->nodes[node].type = OP_UTF8_PERIOD;
03652 }
03653
03654
03655 dfa->mb_cur_max = 1;
03656 dfa->is_utf8 = 0;
03657 dfa->has_mb_node = dfa->nbackref > 0 || has_period;
03658 }
03659 #endif
03660
03661
03662
03663
03664 static reg_errcode_t
03665 analyze (regex_t *preg)
03666 {
03667 re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
03668 reg_errcode_t ret;
03669
03670
03671 dfa->nexts = re_malloc (int, dfa->nodes_alloc);
03672 dfa->org_indices = re_malloc (int, dfa->nodes_alloc);
03673 dfa->edests = re_malloc (re_node_set, dfa->nodes_alloc);
03674 dfa->eclosures = re_malloc (re_node_set, dfa->nodes_alloc);
03675 if (BE (dfa->nexts == NULL || dfa->org_indices == NULL || dfa->edests == NULL
03676 || dfa->eclosures == NULL, 0))
03677 return REG_ESPACE;
03678
03679 dfa->subexp_map = re_malloc (int, preg->re_nsub);
03680 if (dfa->subexp_map != NULL)
03681 {
03682 int i;
03683 for (i = 0; i < preg->re_nsub; i++)
03684 dfa->subexp_map[i] = i;
03685 preorder (dfa->str_tree, optimize_subexps, dfa);
03686 for (i = 0; i < preg->re_nsub; i++)
03687 if (dfa->subexp_map[i] != i)
03688 break;
03689 if (i == preg->re_nsub)
03690 {
03691 free (dfa->subexp_map);
03692 dfa->subexp_map = NULL;
03693 }
03694 }
03695
03696 ret = postorder (dfa->str_tree, lower_subexps, preg);
03697 if (BE (ret != REG_NOERROR, 0))
03698 return ret;
03699 ret = postorder (dfa->str_tree, calc_first, dfa);
03700 if (BE (ret != REG_NOERROR, 0))
03701 return ret;
03702 preorder (dfa->str_tree, calc_next, dfa);
03703 ret = preorder (dfa->str_tree, link_nfa_nodes, dfa);
03704 if (BE (ret != REG_NOERROR, 0))
03705 return ret;
03706 ret = calc_eclosure (dfa);
03707 if (BE (ret != REG_NOERROR, 0))
03708 return ret;
03709
03710
03711
03712 if ((!preg->no_sub && preg->re_nsub > 0 && dfa->has_plural_match)
03713 || dfa->nbackref)
03714 {
03715 dfa->inveclosures = re_malloc (re_node_set, dfa->nodes_len);
03716 if (BE (dfa->inveclosures == NULL, 0))
03717 return REG_ESPACE;
03718 ret = calc_inveclosure (dfa);
03719 }
03720
03721 return ret;
03722 }
03723
03724
03725
03726
03727 static reg_errcode_t
03728 postorder (bin_tree_t *root, reg_errcode_t (fn (void *, bin_tree_t *)),
03729 void *extra)
03730 {
03731 bin_tree_t *node, *prev;
03732
03733 for (node = root; ; )
03734 {
03735
03736
03737 while (node->left || node->right)
03738 if (node->left)
03739 node = node->left;
03740 else
03741 node = node->right;
03742
03743 do
03744 {
03745 reg_errcode_t err = fn (extra, node);
03746 if (BE (err != REG_NOERROR, 0))
03747 return err;
03748 if (node->parent == NULL)
03749 return REG_NOERROR;
03750 prev = node;
03751 node = node->parent;
03752 }
03753
03754 while (node->right == prev || node->right == NULL);
03755 node = node->right;
03756 }
03757 }
03758
03759 static reg_errcode_t
03760 preorder (bin_tree_t *root, reg_errcode_t (fn (void *, bin_tree_t *)),
03761 void *extra)
03762 {
03763 bin_tree_t *node;
03764
03765 for (node = root; ; )
03766 {
03767 reg_errcode_t err = fn (extra, node);
03768 if (BE (err != REG_NOERROR, 0))
03769 return err;
03770
03771
03772 if (node->left)
03773 node = node->left;
03774 else
03775 {
03776 bin_tree_t *prev = NULL;
03777 while (node->right == prev || node->right == NULL)
03778 {
03779 prev = node;
03780 node = node->parent;
03781 if (!node)
03782 return REG_NOERROR;
03783 }
03784 node = node->right;
03785 }
03786 }
03787 }
03788
03789
03790
03791
03792 static reg_errcode_t
03793 optimize_subexps (void *extra, bin_tree_t *node)
03794 {
03795 re_dfa_t *dfa = (re_dfa_t *) extra;
03796
03797 if (node->token.type == OP_BACK_REF && dfa->subexp_map)
03798 {
03799 int idx = node->token.opr.idx;
03800 node->token.opr.idx = dfa->subexp_map[idx];
03801 dfa->used_bkref_map |= 1 << node->token.opr.idx;
03802 }
03803
03804 else if (node->token.type == SUBEXP
03805 && node->left && node->left->token.type == SUBEXP)
03806 {
03807 int other_idx = node->left->token.opr.idx;
03808
03809 node->left = node->left->left;
03810 if (node->left)
03811 node->left->parent = node;
03812
03813 dfa->subexp_map[other_idx] = dfa->subexp_map[node->token.opr.idx];
03814 if (other_idx < BITSET_WORD_BITS)
03815 dfa->used_bkref_map &= ~((bitset_word_t) 1 << other_idx);
03816 }
03817
03818 return REG_NOERROR;
03819 }
03820
03821
03822
03823 static reg_errcode_t
03824 lower_subexps (void *extra, bin_tree_t *node)
03825 {
03826 regex_t *preg = (regex_t *) extra;
03827 reg_errcode_t err = REG_NOERROR;
03828
03829 if (node->left && node->left->token.type == SUBEXP)
03830 {
03831 node->left = lower_subexp (&err, preg, node->left);
03832 if (node->left)
03833 node->left->parent = node;
03834 }
03835 if (node->right && node->right->token.type == SUBEXP)
03836 {
03837 node->right = lower_subexp (&err, preg, node->right);
03838 if (node->right)
03839 node->right->parent = node;
03840 }
03841
03842 return err;
03843 }
03844
03845 static bin_tree_t *
03846 lower_subexp (reg_errcode_t *err, regex_t *preg, bin_tree_t *node)
03847 {
03848 re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
03849 bin_tree_t *body = node->left;
03850 bin_tree_t *op, *cls, *tree1, *tree;
03851
03852 if (preg->no_sub
03853
03854
03855
03856
03857 && node->left != NULL
03858 && (node->token.opr.idx >= BITSET_WORD_BITS
03859 || !(dfa->used_bkref_map
03860 & ((bitset_word_t) 1 << node->token.opr.idx))))
03861 return node->left;
03862
03863
03864
03865 op = create_tree (dfa, NULL, NULL, OP_OPEN_SUBEXP);
03866 cls = create_tree (dfa, NULL, NULL, OP_CLOSE_SUBEXP);
03867 tree1 = body ? create_tree (dfa, body, cls, CONCAT) : cls;
03868 tree = create_tree (dfa, op, tree1, CONCAT);
03869 if (BE (tree == NULL || tree1 == NULL || op == NULL || cls == NULL, 0))
03870 {
03871 *err = REG_ESPACE;
03872 return NULL;
03873 }
03874
03875 op->token.opr.idx = cls->token.opr.idx = node->token.opr.idx;
03876 op->token.opt_subexp = cls->token.opt_subexp = node->token.opt_subexp;
03877 return tree;
03878 }
03879
03880
03881
03882 static reg_errcode_t
03883 calc_first (void *extra, bin_tree_t *node)
03884 {
03885 re_dfa_t *dfa = (re_dfa_t *) extra;
03886 if (node->token.type == CONCAT)
03887 {
03888 node->first = node->left->first;
03889 node->node_idx = node->left->node_idx;
03890 }
03891 else
03892 {
03893 node->first = node;
03894 node->node_idx = re_dfa_add_node (dfa, node->token);
03895 if (BE (node->node_idx == -1, 0))
03896 return REG_ESPACE;
03897 }
03898 return REG_NOERROR;
03899 }
03900
03901
03902 static reg_errcode_t
03903 calc_next (void *extra, bin_tree_t *node)
03904 {
03905 switch (node->token.type)
03906 {
03907 case OP_DUP_ASTERISK:
03908 node->left->next = node;
03909 break;
03910 case CONCAT:
03911 node->left->next = node->right->first;
03912 node->right->next = node->next;
03913 break;
03914 default:
03915 if (node->left)
03916 node->left->next = node->next;
03917 if (node->right)
03918 node->right->next = node->next;
03919 break;
03920 }
03921 return REG_NOERROR;
03922 }
03923
03924
03925 static reg_errcode_t
03926 link_nfa_nodes (void *extra, bin_tree_t *node)
03927 {
03928 re_dfa_t *dfa = (re_dfa_t *) extra;
03929 int idx = node->node_idx;
03930 reg_errcode_t err = REG_NOERROR;
03931
03932 switch (node->token.type)
03933 {
03934 case CONCAT:
03935 break;
03936
03937 case END_OF_RE:
03938 assert (node->next == NULL);
03939 break;
03940
03941 case OP_DUP_ASTERISK:
03942 case OP_ALT:
03943 {
03944 int left, right;
03945 dfa->has_plural_match = 1;
03946 if (node->left != NULL)
03947 left = node->left->first->node_idx;
03948 else
03949 left = node->next->node_idx;
03950 if (node->right != NULL)
03951 right = node->right->first->node_idx;
03952 else
03953 right = node->next->node_idx;
03954 assert (left > -1);
03955 assert (right > -1);
03956 err = re_node_set_init_2 (dfa->edests + idx, left, right);
03957 }
03958 break;
03959
03960 case ANCHOR:
03961 case OP_OPEN_SUBEXP:
03962 case OP_CLOSE_SUBEXP:
03963 err = re_node_set_init_1 (dfa->edests + idx, node->next->node_idx);
03964 break;
03965
03966 case OP_BACK_REF:
03967 dfa->nexts[idx] = node->next->node_idx;
03968 if (node->token.type == OP_BACK_REF)
03969 re_node_set_init_1 (dfa->edests + idx, dfa->nexts[idx]);
03970 break;
03971
03972 default:
03973 assert (!IS_EPSILON_NODE (node->token.type));
03974 dfa->nexts[idx] = node->next->node_idx;
03975 break;
03976 }
03977
03978 return err;
03979 }
03980
03981
03982
03983
03984
03985 static reg_errcode_t
03986 internal_function
03987 duplicate_node_closure (re_dfa_t *dfa, int top_org_node, int top_clone_node,
03988 int root_node, unsigned int init_constraint)
03989 {
03990 int org_node, clone_node, ret;
03991 unsigned int constraint = init_constraint;
03992 for (org_node = top_org_node, clone_node = top_clone_node;;)
03993 {
03994 int org_dest, clone_dest;
03995 if (dfa->nodes[org_node].type == OP_BACK_REF)
03996 {
03997
03998
03999
04000
04001 org_dest = dfa->nexts[org_node];
04002 re_node_set_empty (dfa->edests + clone_node);
04003 clone_dest = duplicate_node (dfa, org_dest, constraint);
04004 if (BE (clone_dest == -1, 0))
04005 return REG_ESPACE;
04006 dfa->nexts[clone_node] = dfa->nexts[org_node];
04007 ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
04008 if (BE (ret < 0, 0))
04009 return REG_ESPACE;
04010 }
04011 else if (dfa->edests[org_node].nelem == 0)
04012 {
04013
04014
04015
04016 dfa->nexts[clone_node] = dfa->nexts[org_node];
04017 break;
04018 }
04019 else if (dfa->edests[org_node].nelem == 1)
04020 {
04021
04022
04023 org_dest = dfa->edests[org_node].elems[0];
04024 re_node_set_empty (dfa->edests + clone_node);
04025 if (dfa->nodes[org_node].type == ANCHOR)
04026 {
04027
04028 if (org_node == root_node && clone_node != org_node)
04029 {
04030
04031
04032
04033 ret = re_node_set_insert (dfa->edests + clone_node,
04034 org_dest);
04035 if (BE (ret < 0, 0))
04036 return REG_ESPACE;
04037 break;
04038 }
04039 constraint |= dfa->nodes[org_node].opr.ctx_type;
04040 }
04041 clone_dest = duplicate_node (dfa, org_dest, constraint);
04042 if (BE (clone_dest == -1, 0))
04043 return REG_ESPACE;
04044 ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
04045 if (BE (ret < 0, 0))
04046 return REG_ESPACE;
04047 }
04048 else
04049 {
04050
04051
04052 org_dest = dfa->edests[org_node].elems[0];
04053 re_node_set_empty (dfa->edests + clone_node);
04054
04055 clone_dest = search_duplicated_node (dfa, org_dest, constraint);
04056 if (clone_dest == -1)
04057 {
04058
04059 reg_errcode_t err;
04060 clone_dest = duplicate_node (dfa, org_dest, constraint);
04061 if (BE (clone_dest == -1, 0))
04062 return REG_ESPACE;
04063 ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
04064 if (BE (ret < 0, 0))
04065 return REG_ESPACE;
04066 err = duplicate_node_closure (dfa, org_dest, clone_dest,
04067 root_node, constraint);
04068 if (BE (err != REG_NOERROR, 0))
04069 return err;
04070 }
04071 else
04072 {
04073
04074
04075 ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
04076 if (BE (ret < 0, 0))
04077 return REG_ESPACE;
04078 }
04079
04080 org_dest = dfa->edests[org_node].elems[1];
04081 clone_dest = duplicate_node (dfa, org_dest, constraint);
04082 if (BE (clone_dest == -1, 0))
04083 return REG_ESPACE;
04084 ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
04085 if (BE (ret < 0, 0))
04086 return REG_ESPACE;
04087 }
04088 org_node = org_dest;
04089 clone_node = clone_dest;
04090 }
04091 return REG_NOERROR;
04092 }
04093
04094
04095
04096
04097 static int
04098 search_duplicated_node (const re_dfa_t *dfa, int org_node,
04099 unsigned int constraint)
04100 {
04101 int idx;
04102 for (idx = dfa->nodes_len - 1; dfa->nodes[idx].duplicated && idx > 0; --idx)
04103 {
04104 if (org_node == dfa->org_indices[idx]
04105 && constraint == dfa->nodes[idx].constraint)
04106 return idx;
04107 }
04108 return -1;
04109 }
04110
04111
04112
04113
04114
04115 static int
04116 duplicate_node (re_dfa_t *dfa, int org_idx, unsigned int constraint)
04117 {
04118 int dup_idx = re_dfa_add_node (dfa, dfa->nodes[org_idx]);
04119 if (BE (dup_idx != -1, 1))
04120 {
04121 dfa->nodes[dup_idx].constraint = constraint;
04122 if (dfa->nodes[org_idx].type == ANCHOR)
04123 dfa->nodes[dup_idx].constraint |= dfa->nodes[org_idx].opr.ctx_type;
04124 dfa->nodes[dup_idx].duplicated = 1;
04125
04126
04127 dfa->org_indices[dup_idx] = org_idx;
04128 }
04129 return dup_idx;
04130 }
04131
04132 static reg_errcode_t
04133 calc_inveclosure (re_dfa_t *dfa)
04134 {
04135 int src, idx, ret;
04136 for (idx = 0; idx < dfa->nodes_len; ++idx)
04137 re_node_set_init_empty (dfa->inveclosures + idx);
04138
04139 for (src = 0; src < dfa->nodes_len; ++src)
04140 {
04141 int *elems = dfa->eclosures[src].elems;
04142 for (idx = 0; idx < dfa->eclosures[src].nelem; ++idx)
04143 {
04144 ret = re_node_set_insert_last (dfa->inveclosures + elems[idx], src);
04145 if (BE (ret == -1, 0))
04146 return REG_ESPACE;
04147 }
04148 }
04149
04150 return REG_NOERROR;
04151 }
04152
04153
04154
04155 static reg_errcode_t
04156 calc_eclosure (re_dfa_t *dfa)
04157 {
04158 int node_idx, incomplete;
04159 #ifdef DEBUG
04160 assert (dfa->nodes_len > 0);
04161 #endif
04162 incomplete = 0;
04163
04164 for (node_idx = 0; ; ++node_idx)
04165 {
04166 reg_errcode_t err;
04167 re_node_set eclosure_elem;
04168 if (node_idx == dfa->nodes_len)
04169 {
04170 if (!incomplete)
04171 break;
04172 incomplete = 0;
04173 node_idx = 0;
04174 }
04175
04176 #ifdef DEBUG
04177 assert (dfa->eclosures[node_idx].nelem != -1);
04178 #endif
04179
04180
04181 if (dfa->eclosures[node_idx].nelem != 0)
04182 continue;
04183
04184 err = calc_eclosure_iter (&eclosure_elem, dfa, node_idx, 1);
04185 if (BE (err != REG_NOERROR, 0))
04186 return err;
04187
04188 if (dfa->eclosures[node_idx].nelem == 0)
04189 {
04190 incomplete = 1;
04191 re_node_set_free (&eclosure_elem);
04192 }
04193 }
04194 return REG_NOERROR;
04195 }
04196
04197
04198
04199 static reg_errcode_t
04200 calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa, int node, int root)
04201 {
04202 reg_errcode_t err;
04203 unsigned int constraint;
04204 int i, incomplete;
04205 re_node_set eclosure;
04206 incomplete = 0;
04207 err = re_node_set_alloc (&eclosure, dfa->edests[node].nelem + 1);
04208 if (BE (err != REG_NOERROR, 0))
04209 return err;
04210
04211
04212
04213 dfa->eclosures[node].nelem = -1;
04214
04215 constraint = ((dfa->nodes[node].type == ANCHOR)
04216 ? dfa->nodes[node].opr.ctx_type : 0);
04217
04218
04219 if (constraint
04220 && dfa->edests[node].nelem
04221 && !dfa->nodes[dfa->edests[node].elems[0]].duplicated)
04222 {
04223 err = duplicate_node_closure (dfa, node, node, node, constraint);
04224 if (BE (err != REG_NOERROR, 0))
04225 return err;
04226 }
04227
04228
04229 if (IS_EPSILON_NODE(dfa->nodes[node].type))
04230 for (i = 0; i < dfa->edests[node].nelem; ++i)
04231 {
04232 re_node_set eclosure_elem;
04233 int edest = dfa->edests[node].elems[i];
04234
04235
04236 if (dfa->eclosures[edest].nelem == -1)
04237 {
04238 incomplete = 1;
04239 continue;
04240 }
04241
04242
04243 if (dfa->eclosures[edest].nelem == 0)
04244 {
04245 err = calc_eclosure_iter (&eclosure_elem, dfa, edest, 0);
04246 if (BE (err != REG_NOERROR, 0))
04247 return err;
04248 }
04249 else
04250 eclosure_elem = dfa->eclosures[edest];
04251
04252 re_node_set_merge (&eclosure, &eclosure_elem);
04253
04254
04255 if (dfa->eclosures[edest].nelem == 0)
04256 {
04257 incomplete = 1;
04258 re_node_set_free (&eclosure_elem);
04259 }
04260 }
04261
04262
04263 re_node_set_insert (&eclosure, node);
04264 if (incomplete && !root)
04265 dfa->eclosures[node].nelem = 0;
04266 else
04267 dfa->eclosures[node] = eclosure;
04268 *new_set = eclosure;
04269 return REG_NOERROR;
04270 }
04271
04272
04273
04274
04275
04276
04277 static void
04278 internal_function
04279 fetch_token (re_token_t *result, re_string_t *input, reg_syntax_t syntax)
04280 {
04281 re_string_skip_bytes (input, peek_token (result, input, syntax));
04282 }
04283
04284
04285
04286
04287 static int
04288 internal_function
04289 peek_token (re_token_t *token, re_string_t *input, reg_syntax_t syntax)
04290 {
04291 unsigned char c;
04292
04293 if (re_string_eoi (input))
04294 {
04295 token->type = END_OF_RE;
04296 return 0;
04297 }
04298
04299 c = re_string_peek_byte (input, 0);
04300 token->opr.c = c;
04301
04302 token->word_char = 0;
04303 #ifdef RE_ENABLE_I18N
04304 token->mb_partial = 0;
04305 if (input->mb_cur_max > 1 &&
04306 !re_string_first_byte (input, re_string_cur_idx (input)))
04307 {
04308 token->type = CHARACTER;
04309 token->mb_partial = 1;
04310 return 1;
04311 }
04312 #endif
04313 if (c == '\\')
04314 {
04315 unsigned char c2;
04316 if (re_string_cur_idx (input) + 1 >= re_string_length (input))
04317 {
04318 token->type = BACK_SLASH;
04319 return 1;
04320 }
04321
04322 c2 = re_string_peek_byte_case (input, 1);
04323 token->opr.c = c2;
04324 token->type = CHARACTER;
04325 #ifdef RE_ENABLE_I18N
04326 if (input->mb_cur_max > 1)
04327 {
04328 wint_t wc = re_string_wchar_at (input,
04329 re_string_cur_idx (input) + 1);
04330 token->word_char = IS_WIDE_WORD_CHAR (wc) != 0;
04331 }
04332 else
04333 #endif
04334 token->word_char = IS_WORD_CHAR (c2) != 0;
04335
04336 switch (c2)
04337 {
04338 case '|':
04339 if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_NO_BK_VBAR))
04340 token->type = OP_ALT;
04341 break;
04342 case '1': case '2': case '3': case '4': case '5':
04343 case '6': case '7': case '8': case '9':
04344 if (!(syntax & RE_NO_BK_REFS))
04345 {
04346 token->type = OP_BACK_REF;
04347 token->opr.idx = c2 - '1';
04348 }
04349 break;
04350 case '<':
04351 if (!(syntax & RE_NO_GNU_OPS))
04352 {
04353 token->type = ANCHOR;
04354 token->opr.ctx_type = WORD_FIRST;
04355 }
04356 break;
04357 case '>':
04358 if (!(syntax & RE_NO_GNU_OPS))
04359 {
04360 token->type = ANCHOR;
04361 token->opr.ctx_type = WORD_LAST;
04362 }
04363 break;
04364 case 'b':
04365 if (!(syntax & RE_NO_GNU_OPS))
04366 {
04367 token->type = ANCHOR;
04368 token->opr.ctx_type = WORD_DELIM;
04369 }
04370 break;
04371 case 'B':
04372 if (!(syntax & RE_NO_GNU_OPS))
04373 {
04374 token->type = ANCHOR;
04375 token->opr.ctx_type = NOT_WORD_DELIM;
04376 }
04377 break;
04378 case 'w':
04379 if (!(syntax & RE_NO_GNU_OPS))
04380 token->type = OP_WORD;
04381 break;
04382 case 'W':
04383 if (!(syntax & RE_NO_GNU_OPS))
04384 token->type = OP_NOTWORD;
04385 break;
04386 case 's':
04387 if (!(syntax & RE_NO_GNU_OPS))
04388 token->type = OP_SPACE;
04389 break;
04390 case 'S':
04391 if (!(syntax & RE_NO_GNU_OPS))
04392 token->type = OP_NOTSPACE;
04393 break;
04394 case '`':
04395 if (!(syntax & RE_NO_GNU_OPS))
04396 {
04397 token->type = ANCHOR;
04398 token->opr.ctx_type = BUF_FIRST;
04399 }
04400 break;
04401 case '\'':
04402 if (!(syntax & RE_NO_GNU_OPS))
04403 {
04404 token->type = ANCHOR;
04405 token->opr.ctx_type = BUF_LAST;
04406 }
04407 break;
04408 case '(':
04409 if (!(syntax & RE_NO_BK_PARENS))
04410 token->type = OP_OPEN_SUBEXP;
04411 break;
04412 case ')':
04413 if (!(syntax & RE_NO_BK_PARENS))
04414 token->type = OP_CLOSE_SUBEXP;
04415 break;
04416 case '+':
04417 if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_BK_PLUS_QM))
04418 token->type = OP_DUP_PLUS;
04419 break;
04420 case '?':
04421 if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_BK_PLUS_QM))
04422 token->type = OP_DUP_QUESTION;
04423 break;
04424 case '{':
04425 if ((syntax & RE_INTERVALS) && (!(syntax & RE_NO_BK_BRACES)))
04426 token->type = OP_OPEN_DUP_NUM;
04427 break;
04428 case '}':
04429 if ((syntax & RE_INTERVALS) && (!(syntax & RE_NO_BK_BRACES)))
04430 token->type = OP_CLOSE_DUP_NUM;
04431 break;
04432 default:
04433 break;
04434 }
04435 return 2;
04436 }
04437
04438 token->type = CHARACTER;
04439 #ifdef RE_ENABLE_I18N
04440 if (input->mb_cur_max > 1)
04441 {
04442 wint_t wc = re_string_wchar_at (input, re_string_cur_idx (input));
04443 token->word_char = IS_WIDE_WORD_CHAR (wc) != 0;
04444 }
04445 else
04446 #endif
04447 token->word_char = IS_WORD_CHAR (token->opr.c);
04448
04449 switch (c)
04450 {
04451 case '\n':
04452 if (syntax & RE_NEWLINE_ALT)
04453 token->type = OP_ALT;
04454 break;
04455 case '|':
04456 if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_NO_BK_VBAR))
04457 token->type = OP_ALT;
04458 break;
04459 case '*':
04460 token->type = OP_DUP_ASTERISK;
04461 break;
04462 case '+':
04463 if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_BK_PLUS_QM))
04464 token->type = OP_DUP_PLUS;
04465 break;
04466 case '?':
04467 if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_BK_PLUS_QM))
04468 token->type = OP_DUP_QUESTION;
04469 break;
04470 case '{':
04471 if ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
04472 token->type = OP_OPEN_DUP_NUM;
04473 break;
04474 case '}':
04475 if ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
04476 token->type = OP_CLOSE_DUP_NUM;
04477 break;
04478 case '(':
04479 if (syntax & RE_NO_BK_PARENS)
04480 token->type = OP_OPEN_SUBEXP;
04481 break;
04482 case ')':
04483 if (syntax & RE_NO_BK_PARENS)
04484 token->type = OP_CLOSE_SUBEXP;
04485 break;
04486 case '[':
04487 token->type = OP_OPEN_BRACKET;
04488 break;
04489 case '.':
04490 token->type = OP_PERIOD;
04491 break;
04492 case '^':
04493 if (!(syntax & (RE_CONTEXT_INDEP_ANCHORS | RE_CARET_ANCHORS_HERE)) &&
04494 re_string_cur_idx (input) != 0)
04495 {
04496 char prev = re_string_peek_byte (input, -1);
04497 if (!(syntax & RE_NEWLINE_ALT) || prev != '\n')
04498 break;
04499 }
04500 token->type = ANCHOR;
04501 token->opr.ctx_type = LINE_FIRST;
04502 break;
04503 case '$':
04504 if (!(syntax & RE_CONTEXT_INDEP_ANCHORS) &&
04505 re_string_cur_idx (input) + 1 != re_string_length (input))
04506 {
04507 re_token_t next;
04508 re_string_skip_bytes (input, 1);
04509 peek_token (&next, input, syntax);
04510 re_string_skip_bytes (input, -1);
04511 if (next.type != OP_ALT && next.type != OP_CLOSE_SUBEXP)
04512 break;
04513 }
04514 token->type = ANCHOR;
04515 token->opr.ctx_type = LINE_LAST;
04516 break;
04517 default:
04518 break;
04519 }
04520 return 1;
04521 }
04522
04523
04524
04525
04526 static int
04527 internal_function
04528 peek_token_bracket (re_token_t *token, re_string_t *input, reg_syntax_t syntax)
04529 {
04530 unsigned char c;
04531 if (re_string_eoi (input))
04532 {
04533 token->type = END_OF_RE;
04534 return 0;
04535 }
04536 c = re_string_peek_byte (input, 0);
04537 token->opr.c = c;
04538
04539 #ifdef RE_ENABLE_I18N
04540 if (input->mb_cur_max > 1 &&
04541 !re_string_first_byte (input, re_string_cur_idx (input)))
04542 {
04543 token->type = CHARACTER;
04544 return 1;
04545 }
04546 #endif
04547
04548 if (c == '\\' && (syntax & RE_BACKSLASH_ESCAPE_IN_LISTS)
04549 && re_string_cur_idx (input) + 1 < re_string_length (input))
04550 {
04551
04552 unsigned char c2;
04553 re_string_skip_bytes (input, 1);
04554 c2 = re_string_peek_byte (input, 0);
04555 token->opr.c = c2;
04556 token->type = CHARACTER;
04557 return 1;
04558 }
04559 if (c == '[')
04560 {
04561 unsigned char c2;
04562 int token_len;
04563 if (re_string_cur_idx (input) + 1 < re_string_length (input))
04564 c2 = re_string_peek_byte (input, 1);
04565 else
04566 c2 = 0;
04567 token->opr.c = c2;
04568 token_len = 2;
04569 switch (c2)
04570 {
04571 case '.':
04572 token->type = OP_OPEN_COLL_ELEM;
04573 break;
04574 case '=':
04575 token->type = OP_OPEN_EQUIV_CLASS;
04576 break;
04577 case ':':
04578 if (syntax & RE_CHAR_CLASSES)
04579 {
04580 token->type = OP_OPEN_CHAR_CLASS;
04581 break;
04582 }
04583
04584 default:
04585 token->type = CHARACTER;
04586 token->opr.c = c;
04587 token_len = 1;
04588 break;
04589 }
04590 return token_len;
04591 }
04592 switch (c)
04593 {
04594 case '-':
04595 token->type = OP_CHARSET_RANGE;
04596 break;
04597 case ']':
04598 token->type = OP_CLOSE_BRACKET;
04599 break;
04600 case '^':
04601 token->type = OP_NON_MATCH_LIST;
04602 break;
04603 default:
04604 token->type = CHARACTER;
04605 }
04606 return 1;
04607 }
04608
04609
04610
04611
04612
04613
04614
04615
04616
04617
04618
04619
04620
04621
04622
04623 static bin_tree_t *
04624 parse (re_string_t *regexp, regex_t *preg, reg_syntax_t syntax,
04625 reg_errcode_t *err)
04626 {
04627 re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
04628 bin_tree_t *tree, *eor, *root;
04629 re_token_t current_token;
04630 dfa->syntax = syntax;
04631 fetch_token (¤t_token, regexp, syntax | RE_CARET_ANCHORS_HERE);
04632 tree = parse_reg_exp (regexp, preg, ¤t_token, syntax, 0, err);
04633 if (BE (*err != REG_NOERROR && tree == NULL, 0))
04634 return NULL;
04635 eor = create_tree (dfa, NULL, NULL, END_OF_RE);
04636 if (tree != NULL)
04637 root = create_tree (dfa, tree, eor, CONCAT);
04638 else
04639 root = eor;
04640 if (BE (eor == NULL || root == NULL, 0))
04641 {
04642 *err = REG_ESPACE;
04643 return NULL;
04644 }
04645 return root;
04646 }
04647
04648
04649
04650
04651
04652
04653
04654
04655
04656
04657 static bin_tree_t *
04658 parse_reg_exp (re_string_t *regexp, regex_t *preg, re_token_t *token,
04659 reg_syntax_t syntax, int nest, reg_errcode_t *err)
04660 {
04661 re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
04662 bin_tree_t *tree, *branch = NULL;
04663 tree = parse_branch (regexp, preg, token, syntax, nest, err);
04664 if (BE (*err != REG_NOERROR && tree == NULL, 0))
04665 return NULL;
04666
04667 while (token->type == OP_ALT)
04668 {
04669 fetch_token (token, regexp, syntax | RE_CARET_ANCHORS_HERE);
04670 if (token->type != OP_ALT && token->type != END_OF_RE
04671 && (nest == 0 || token->type != OP_CLOSE_SUBEXP))
04672 {
04673 branch = parse_branch (regexp, preg, token, syntax, nest, err);
04674 if (BE (*err != REG_NOERROR && branch == NULL, 0))
04675 return NULL;
04676 }
04677 else
04678 branch = NULL;
04679 tree = create_tree (dfa, tree, branch, OP_ALT);
04680 if (BE (tree == NULL, 0))
04681 {
04682 *err = REG_ESPACE;
04683 return NULL;
04684 }
04685 }
04686 return tree;
04687 }
04688
04689
04690
04691
04692
04693
04694
04695
04696
04697
04698 static bin_tree_t *
04699 parse_branch (re_string_t *regexp, regex_t *preg, re_token_t *token,
04700 reg_syntax_t syntax, int nest, reg_errcode_t *err)
04701 {
04702 bin_tree_t *tree, *exp;
04703 re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
04704 tree = parse_expression (regexp, preg, token, syntax, nest, err);
04705 if (BE (*err != REG_NOERROR && tree == NULL, 0))
04706 return NULL;
04707
04708 while (token->type != OP_ALT && token->type != END_OF_RE
04709 && (nest == 0 || token->type != OP_CLOSE_SUBEXP))
04710 {
04711 exp = parse_expression (regexp, preg, token, syntax, nest, err);
04712 if (BE (*err != REG_NOERROR && exp == NULL, 0))
04713 {
04714 return NULL;
04715 }
04716 if (tree != NULL && exp != NULL)
04717 {
04718 tree = create_tree (dfa, tree, exp, CONCAT);
04719 if (tree == NULL)
04720 {
04721 *err = REG_ESPACE;
04722 return NULL;
04723 }
04724 }
04725 else if (tree == NULL)
04726 tree = exp;
04727
04728 }
04729 return tree;
04730 }
04731
04732
04733
04734
04735
04736
04737
04738 static bin_tree_t *
04739 parse_expression (re_string_t *regexp, regex_t *preg, re_token_t *token,
04740 reg_syntax_t syntax, int nest, reg_errcode_t *err)
04741 {
04742 re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
04743 bin_tree_t *tree;
04744 switch (token->type)
04745 {
04746 case CHARACTER:
04747 tree = create_token_tree (dfa, NULL, NULL, token);
04748 if (BE (tree == NULL, 0))
04749 {
04750 *err = REG_ESPACE;
04751 return NULL;
04752 }
04753 #ifdef RE_ENABLE_I18N
04754 if (dfa->mb_cur_max > 1)
04755 {
04756 while (!re_string_eoi (regexp)
04757 && !re_string_first_byte (regexp, re_string_cur_idx (regexp)))
04758 {
04759 bin_tree_t *mbc_remain;
04760 fetch_token (token, regexp, syntax);
04761 mbc_remain = create_token_tree (dfa, NULL, NULL, token);
04762 tree = create_tree (dfa, tree, mbc_remain, CONCAT);
04763 if (BE (mbc_remain == NULL || tree == NULL, 0))
04764 {
04765 *err = REG_ESPACE;
04766 return NULL;
04767 }
04768 }
04769 }
04770 #endif
04771 break;
04772 case OP_OPEN_SUBEXP:
04773 tree = parse_sub_exp (regexp, preg, token, syntax, nest + 1, err);
04774 if (BE (*err != REG_NOERROR && tree == NULL, 0))
04775 return NULL;
04776 break;
04777 case OP_OPEN_BRACKET:
04778 tree = parse_bracket_exp (regexp, dfa, token, syntax, err);
04779 if (BE (*err != REG_NOERROR && tree == NULL, 0))
04780 return NULL;
04781 break;
04782 case OP_BACK_REF:
04783 if (!BE (dfa->completed_bkref_map & (1 << token->opr.idx), 1))
04784 {
04785 *err = REG_ESUBREG;
04786 return NULL;
04787 }
04788 dfa->used_bkref_map |= 1 << token->opr.idx;
04789 tree = create_token_tree (dfa, NULL, NULL, token);
04790 if (BE (tree == NULL, 0))
04791 {
04792 *err = REG_ESPACE;
04793 return NULL;
04794 }
04795 ++dfa->nbackref;
04796 dfa->has_mb_node = 1;
04797 break;
04798 case OP_OPEN_DUP_NUM:
04799 if (syntax & RE_CONTEXT_INVALID_DUP)
04800 {
04801 *err = REG_BADRPT;
04802 return NULL;
04803 }
04804
04805 case OP_DUP_ASTERISK:
04806 case OP_DUP_PLUS:
04807 case OP_DUP_QUESTION:
04808 if (syntax & RE_CONTEXT_INVALID_OPS)
04809 {
04810 *err = REG_BADRPT;
04811 return NULL;
04812 }
04813 else if (syntax & RE_CONTEXT_INDEP_OPS)
04814 {
04815 fetch_token (token, regexp, syntax);
04816 return parse_expression (regexp, preg, token, syntax, nest, err);
04817 }
04818
04819 case OP_CLOSE_SUBEXP:
04820 if ((token->type == OP_CLOSE_SUBEXP) &&
04821 !(syntax & RE_UNMATCHED_RIGHT_PAREN_ORD))
04822 {
04823 *err = REG_ERPAREN;
04824 return NULL;
04825 }
04826
04827 case OP_CLOSE_DUP_NUM:
04828
04829
04830
04831 token->type = CHARACTER;
04832
04833
04834 tree = create_token_tree (dfa, NULL, NULL, token);
04835 if (BE (tree == NULL, 0))
04836 {
04837 *err = REG_ESPACE;
04838 return NULL;
04839 }
04840 break;
04841 case ANCHOR:
04842 if ((token->opr.ctx_type
04843 & (WORD_DELIM | NOT_WORD_DELIM | WORD_FIRST | WORD_LAST))
04844 && dfa->word_ops_used == 0)
04845 init_word_char (dfa);
04846 if (token->opr.ctx_type == WORD_DELIM
04847 || token->opr.ctx_type == NOT_WORD_DELIM)
04848 {
04849 bin_tree_t *tree_first, *tree_last;
04850 if (token->opr.ctx_type == WORD_DELIM)
04851 {
04852 token->opr.ctx_type = WORD_FIRST;
04853 tree_first = create_token_tree (dfa, NULL, NULL, token);
04854 token->opr.ctx_type = WORD_LAST;
04855 }
04856 else
04857 {
04858 token->opr.ctx_type = INSIDE_WORD;
04859 tree_first = create_token_tree (dfa, NULL, NULL, token);
04860 token->opr.ctx_type = INSIDE_NOTWORD;
04861 }
04862 tree_last = create_token_tree (dfa, NULL, NULL, token);
04863 tree = create_tree (dfa, tree_first, tree_last, OP_ALT);
04864 if (BE (tree_first == NULL || tree_last == NULL || tree == NULL, 0))
04865 {
04866 *err = REG_ESPACE;
04867 return NULL;
04868 }
04869 }
04870 else
04871 {
04872 tree = create_token_tree (dfa, NULL, NULL, token);
04873 if (BE (tree == NULL, 0))
04874 {
04875 *err = REG_ESPACE;
04876 return NULL;
04877 }
04878 }
04879
04880
04881
04882
04883 fetch_token (token, regexp, syntax);
04884 return tree;
04885 case OP_PERIOD:
04886 tree = create_token_tree (dfa, NULL, NULL, token);
04887 if (BE (tree == NULL, 0))
04888 {
04889 *err = REG_ESPACE;
04890 return NULL;
04891 }
04892 if (dfa->mb_cur_max > 1)
04893 dfa->has_mb_node = 1;
04894 break;
04895 case OP_WORD:
04896 case OP_NOTWORD:
04897 tree = build_charclass_op (dfa, regexp->trans,
04898 (const unsigned char *) "alnum",
04899 (const unsigned char *) "_",
04900 token->type == OP_NOTWORD, err);
04901 if (BE (*err != REG_NOERROR && tree == NULL, 0))
04902 return NULL;
04903 break;
04904 case OP_SPACE:
04905 case OP_NOTSPACE:
04906 tree = build_charclass_op (dfa, regexp->trans,
04907 (const unsigned char *) "space",
04908 (const unsigned char *) "",
04909 token->type == OP_NOTSPACE, err);
04910 if (BE (*err != REG_NOERROR && tree == NULL, 0))
04911 return NULL;
04912 break;
04913 case OP_ALT:
04914 case END_OF_RE:
04915 return NULL;
04916 case BACK_SLASH:
04917 *err = REG_EESCAPE;
04918 return NULL;
04919 default:
04920
04921 #ifdef DEBUG
04922 assert (0);
04923 #endif
04924 return NULL;
04925 }
04926 fetch_token (token, regexp, syntax);
04927
04928 while (token->type == OP_DUP_ASTERISK || token->type == OP_DUP_PLUS
04929 || token->type == OP_DUP_QUESTION || token->type == OP_OPEN_DUP_NUM)
04930 {
04931 tree = parse_dup_op (tree, regexp, dfa, token, syntax, err);
04932 if (BE (*err != REG_NOERROR && tree == NULL, 0))
04933 return NULL;
04934
04935 if ((syntax & RE_CONTEXT_INVALID_DUP)
04936 && (token->type == OP_DUP_ASTERISK
04937 || token->type == OP_OPEN_DUP_NUM))
04938 {
04939 *err = REG_BADRPT;
04940 return NULL;
04941 }
04942 }
04943
04944 return tree;
04945 }
04946
04947
04948
04949
04950
04951
04952
04953
04954 static bin_tree_t *
04955 parse_sub_exp (re_string_t *regexp, regex_t *preg, re_token_t *token,
04956 reg_syntax_t syntax, int nest, reg_errcode_t *err)
04957 {
04958 re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
04959 bin_tree_t *tree;
04960 size_t cur_nsub;
04961 cur_nsub = preg->re_nsub++;
04962
04963 fetch_token (token, regexp, syntax | RE_CARET_ANCHORS_HERE);
04964
04965
04966 if (token->type == OP_CLOSE_SUBEXP)
04967 tree = NULL;
04968 else
04969 {
04970 tree = parse_reg_exp (regexp, preg, token, syntax, nest, err);
04971 if (BE (*err == REG_NOERROR && token->type != OP_CLOSE_SUBEXP, 0))
04972 *err = REG_EPAREN;
04973 if (BE (*err != REG_NOERROR, 0))
04974 return NULL;
04975 }
04976
04977 if (cur_nsub <= '9' - '1')
04978 dfa->completed_bkref_map |= 1 << cur_nsub;
04979
04980 tree = create_tree (dfa, tree, NULL, SUBEXP);
04981 if (BE (tree == NULL, 0))
04982 {
04983 *err = REG_ESPACE;
04984 return NULL;
04985 }
04986 tree->token.opr.idx = cur_nsub;
04987 return tree;
04988 }
04989
04990
04991
04992 static bin_tree_t *
04993 parse_dup_op (bin_tree_t *elem, re_string_t *regexp, re_dfa_t *dfa,
04994 re_token_t *token, reg_syntax_t syntax, reg_errcode_t *err)
04995 {
04996 bin_tree_t *tree = NULL, *old_tree = NULL;
04997 int i, start, end, start_idx = re_string_cur_idx (regexp);
04998 re_token_t start_token = *token;
04999
05000 if (token->type == OP_OPEN_DUP_NUM)
05001 {
05002 end = 0;
05003 start = fetch_number (regexp, token, syntax);
05004 if (start == -1)
05005 {
05006 if (token->type == CHARACTER && token->opr.c == ',')
05007 start = 0;
05008 else
05009 {
05010 *err = REG_BADBR;
05011 return NULL;
05012 }
05013 }
05014 if (BE (start != -2, 1))
05015 {
05016
05017 end = ((token->type == OP_CLOSE_DUP_NUM) ? start
05018 : ((token->type == CHARACTER && token->opr.c == ',')
05019 ? fetch_number (regexp, token, syntax) : -2));
05020 }
05021 if (BE (start == -2 || end == -2, 0))
05022 {
05023
05024 if (BE (!(syntax & RE_INVALID_INTERVAL_ORD), 0))
05025 {
05026 if (token->type == END_OF_RE)
05027 *err = REG_EBRACE;
05028 else
05029 *err = REG_BADBR;
05030
05031 return NULL;
05032 }
05033
05034
05035 re_string_set_index (regexp, start_idx);
05036 *token = start_token;
05037 token->type = CHARACTER;
05038
05039
05040 return elem;
05041 }
05042
05043 if (BE (end != -1 && start > end, 0))
05044 {
05045
05046 *err = REG_BADBR;
05047 return NULL;
05048 }
05049 }
05050 else
05051 {
05052 start = (token->type == OP_DUP_PLUS) ? 1 : 0;
05053 end = (token->type == OP_DUP_QUESTION) ? 1 : -1;
05054 }
05055
05056 fetch_token (token, regexp, syntax);
05057
05058 if (BE (elem == NULL, 0))
05059 return NULL;
05060 if (BE (start == 0 && end == 0, 0))
05061 {
05062 postorder (elem, free_tree, NULL);
05063 return NULL;
05064 }
05065
05066
05067 if (BE (start > 0, 0))
05068 {
05069 tree = elem;
05070 for (i = 2; i <= start; ++i)
05071 {
05072 elem = duplicate_tree (elem, dfa);
05073 tree = create_tree (dfa, tree, elem, CONCAT);
05074 if (BE (elem == NULL || tree == NULL, 0))
05075 goto parse_dup_op_espace;
05076 }
05077
05078 if (start == end)
05079 return tree;
05080
05081
05082 elem = duplicate_tree (elem, dfa);
05083 old_tree = tree;
05084 }
05085 else
05086 old_tree = NULL;
05087
05088 if (elem->token.type == SUBEXP)
05089 postorder (elem, mark_opt_subexp, (void *) (long) elem->token.opr.idx);
05090
05091 tree = create_tree (dfa, elem, NULL, (end == -1 ? OP_DUP_ASTERISK : OP_ALT));
05092 if (BE (tree == NULL, 0))
05093 goto parse_dup_op_espace;
05094
05095
05096
05097
05098 for (i = start + 2; i <= end; ++i)
05099 {
05100 elem = duplicate_tree (elem, dfa);
05101 tree = create_tree (dfa, tree, elem, CONCAT);
05102 if (BE (elem == NULL || tree == NULL, 0))
05103 goto parse_dup_op_espace;
05104
05105 tree = create_tree (dfa, tree, NULL, OP_ALT);
05106 if (BE (tree == NULL, 0))
05107 goto parse_dup_op_espace;
05108 }
05109
05110 if (old_tree)
05111 tree = create_tree (dfa, old_tree, tree, CONCAT);
05112
05113 return tree;
05114
05115 parse_dup_op_espace:
05116 *err = REG_ESPACE;
05117 return NULL;
05118 }
05119
05120
05121
05122 #define BRACKET_NAME_BUF_SIZE 32
05123
05124 #ifndef _LIBC
05125
05126
05127
05128
05129
05130
05131
05132 static reg_errcode_t
05133 internal_function
05134 # ifdef RE_ENABLE_I18N
05135 build_range_exp (bitset_t sbcset, re_charset_t *mbcset, int *range_alloc,
05136 bracket_elem_t *start_elem, bracket_elem_t *end_elem)
05137 # else
05138 build_range_exp (bitset_t sbcset, bracket_elem_t *start_elem,
05139 bracket_elem_t *end_elem)
05140 # endif
05141 {
05142 unsigned int start_ch, end_ch;
05143
05144 if (BE (start_elem->type == EQUIV_CLASS || start_elem->type == CHAR_CLASS
05145 || end_elem->type == EQUIV_CLASS || end_elem->type == CHAR_CLASS,
05146 0))
05147 return REG_ERANGE;
05148
05149
05150
05151 if (BE ((start_elem->type == COLL_SYM
05152 && strlen ((char *) start_elem->opr.name) > 1)
05153 || (end_elem->type == COLL_SYM
05154 && strlen ((char *) end_elem->opr.name) > 1), 0))
05155 return REG_ECOLLATE;
05156
05157 # ifdef RE_ENABLE_I18N
05158 {
05159 wchar_t wc;
05160 wint_t start_wc;
05161 wint_t end_wc;
05162 wchar_t cmp_buf[6] = {L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'};
05163
05164 start_ch = ((start_elem->type == SB_CHAR) ? start_elem->opr.ch
05165 : ((start_elem->type == COLL_SYM) ? start_elem->opr.name[0]
05166 : 0));
05167 end_ch = ((end_elem->type == SB_CHAR) ? end_elem->opr.ch
05168 : ((end_elem->type == COLL_SYM) ? end_elem->opr.name[0]
05169 : 0));
05170 start_wc = ((start_elem->type == SB_CHAR || start_elem->type == COLL_SYM)
05171 ? __btowc (start_ch) : start_elem->opr.wch);
05172 end_wc = ((end_elem->type == SB_CHAR || end_elem->type == COLL_SYM)
05173 ? __btowc (end_ch) : end_elem->opr.wch);
05174 if (start_wc == WEOF || end_wc == WEOF)
05175 return REG_ECOLLATE;
05176 cmp_buf[0] = start_wc;
05177 cmp_buf[4] = end_wc;
05178 if (wcscoll (cmp_buf, cmp_buf + 4) > 0)
05179 return REG_ERANGE;
05180
05181
05182
05183
05184
05185
05186 if (mbcset)
05187 {
05188
05189 if (BE (*range_alloc == mbcset->nranges, 0))
05190 {
05191
05192 wchar_t *new_array_start, *new_array_end;
05193 int new_nranges;
05194
05195
05196 new_nranges = 2 * mbcset->nranges + 1;
05197
05198
05199 new_array_start = re_realloc (mbcset->range_starts, wchar_t,
05200 new_nranges);
05201 new_array_end = re_realloc (mbcset->range_ends, wchar_t,
05202 new_nranges);
05203
05204 if (BE (new_array_start == NULL || new_array_end == NULL, 0))
05205 return REG_ESPACE;
05206
05207 mbcset->range_starts = new_array_start;
05208 mbcset->range_ends = new_array_end;
05209 *range_alloc = new_nranges;
05210 }
05211
05212 mbcset->range_starts[mbcset->nranges] = start_wc;
05213 mbcset->range_ends[mbcset->nranges++] = end_wc;
05214 }
05215
05216
05217 for (wc = 0; wc < SBC_MAX; ++wc)
05218 {
05219 cmp_buf[2] = wc;
05220 if (wcscoll (cmp_buf, cmp_buf + 2) <= 0
05221 && wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0)
05222 bitset_set (sbcset, wc);
05223 }
05224 }
05225 # else
05226 {
05227 unsigned int ch;
05228 start_ch = ((start_elem->type == SB_CHAR ) ? start_elem->opr.ch
05229 : ((start_elem->type == COLL_SYM) ? start_elem->opr.name[0]
05230 : 0));
05231 end_ch = ((end_elem->type == SB_CHAR ) ? end_elem->opr.ch
05232 : ((end_elem->type == COLL_SYM) ? end_elem->opr.name[0]
05233 : 0));
05234 if (start_ch > end_ch)
05235 return REG_ERANGE;
05236
05237 for (ch = 0; ch < SBC_MAX; ++ch)
05238 if (start_ch <= ch && ch <= end_ch)
05239 bitset_set (sbcset, ch);
05240 }
05241 # endif
05242 return REG_NOERROR;
05243 }
05244 #endif
05245
05246 #ifndef _LIBC
05247
05248
05249
05250
05251
05252
05253 static reg_errcode_t
05254 internal_function
05255 # ifdef RE_ENABLE_I18N
05256 build_collating_symbol (bitset_t sbcset, re_charset_t *mbcset,
05257 int *coll_sym_alloc, const unsigned char *name)
05258 # else
05259 build_collating_symbol (bitset_t sbcset, const unsigned char *name)
05260 # endif
05261 {
05262 size_t name_len = strlen ((const char *) name);
05263 if (BE (name_len != 1, 0))
05264 return REG_ECOLLATE;
05265 else
05266 {
05267 bitset_set (sbcset, name[0]);
05268 return REG_NOERROR;
05269 }
05270 }
05271 #endif
05272
05273
05274
05275
05276 static bin_tree_t *
05277 parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
05278 reg_syntax_t syntax, reg_errcode_t *err)
05279 {
05280 #ifdef _LIBC
05281 const unsigned char *collseqmb;
05282 const char *collseqwc;
05283 uint32_t nrules;
05284 int32_t table_size;
05285 const int32_t *symb_table;
05286 const unsigned char *extra;
05287
05288
05289
05290
05291
05292 auto inline int32_t
05293 __attribute ((always_inline))
05294 seek_collating_symbol_entry (name, name_len)
05295 const unsigned char *name;
05296 size_t name_len;
05297 {
05298 int32_t hash = elem_hash ((const char *) name, name_len);
05299 int32_t elem = hash % table_size;
05300 if (symb_table[2 * elem] != 0)
05301 {
05302 int32_t second = hash % (table_size - 2) + 1;
05303
05304 do
05305 {
05306
05307 if (symb_table[2 * elem] == hash
05308
05309 && name_len == extra[symb_table[2 * elem + 1]]
05310
05311 && memcmp (name, &extra[symb_table[2 * elem + 1] + 1],
05312 name_len) == 0)
05313 {
05314
05315 break;
05316 }
05317
05318
05319 elem += second;
05320 }
05321 while (symb_table[2 * elem] != 0);
05322 }
05323 return elem;
05324 }
05325
05326
05327
05328
05329
05330 auto inline unsigned int
05331 __attribute ((always_inline))
05332 lookup_collation_sequence_value (br_elem)
05333 bracket_elem_t *br_elem;
05334 {
05335 if (br_elem->type == SB_CHAR)
05336 {
05337
05338
05339
05340 if (nrules == 0)
05341 return collseqmb[br_elem->opr.ch];
05342 else
05343 {
05344 wint_t wc = __btowc (br_elem->opr.ch);
05345 return __collseq_table_lookup (collseqwc, wc);
05346 }
05347 }
05348 else if (br_elem->type == MB_CHAR)
05349 {
05350 return __collseq_table_lookup (collseqwc, br_elem->opr.wch);
05351 }
05352 else if (br_elem->type == COLL_SYM)
05353 {
05354 size_t sym_name_len = strlen ((char *) br_elem->opr.name);
05355 if (nrules != 0)
05356 {
05357 int32_t elem, idx;
05358 elem = seek_collating_symbol_entry (br_elem->opr.name,
05359 sym_name_len);
05360 if (symb_table[2 * elem] != 0)
05361 {
05362
05363 idx = symb_table[2 * elem + 1];
05364
05365 idx += 1 + extra[idx];
05366
05367 idx += 1 + extra[idx];
05368
05369 idx = (idx + 3) & ~3;
05370
05371 idx += sizeof (unsigned int);
05372
05373 idx += sizeof (unsigned int) *
05374 (1 + *(unsigned int *) (extra + idx));
05375
05376 return *(unsigned int *) (extra + idx);
05377 }
05378 else if (symb_table[2 * elem] == 0 && sym_name_len == 1)
05379 {
05380
05381
05382 return collseqmb[br_elem->opr.name[0]];
05383 }
05384 }
05385 else if (sym_name_len == 1)
05386 return collseqmb[br_elem->opr.name[0]];
05387 }
05388 return UINT_MAX;
05389 }
05390
05391
05392
05393
05394
05395
05396
05397
05398 auto inline reg_errcode_t
05399 __attribute ((always_inline))
05400 build_range_exp (sbcset, mbcset, range_alloc, start_elem, end_elem)
05401 re_charset_t *mbcset;
05402 int *range_alloc;
05403 bitset_t sbcset;
05404 bracket_elem_t *start_elem, *end_elem;
05405 {
05406 unsigned int ch;
05407 uint32_t start_collseq;
05408 uint32_t end_collseq;
05409
05410
05411
05412 if (BE (start_elem->type == EQUIV_CLASS || start_elem->type == CHAR_CLASS
05413 || end_elem->type == EQUIV_CLASS || end_elem->type == CHAR_CLASS,
05414 0))
05415 return REG_ERANGE;
05416
05417 start_collseq = lookup_collation_sequence_value (start_elem);
05418 end_collseq = lookup_collation_sequence_value (end_elem);
05419
05420 if (BE (start_collseq == UINT_MAX || end_collseq == UINT_MAX, 0))
05421 return REG_ECOLLATE;
05422 if (BE ((syntax & RE_NO_EMPTY_RANGES) && start_collseq > end_collseq, 0))
05423 return REG_ERANGE;
05424
05425
05426
05427
05428
05429 if (nrules > 0 || dfa->mb_cur_max > 1)
05430 {
05431
05432 if (BE (*range_alloc == mbcset->nranges, 0))
05433 {
05434
05435 uint32_t *new_array_start;
05436 uint32_t *new_array_end;
05437 int new_nranges;
05438
05439
05440 new_nranges = 2 * mbcset->nranges + 1;
05441 new_array_start = re_realloc (mbcset->range_starts, uint32_t,
05442 new_nranges);
05443 new_array_end = re_realloc (mbcset->range_ends, uint32_t,
05444 new_nranges);
05445
05446 if (BE (new_array_start == NULL || new_array_end == NULL, 0))
05447 return REG_ESPACE;
05448
05449 mbcset->range_starts = new_array_start;
05450 mbcset->range_ends = new_array_end;
05451 *range_alloc = new_nranges;
05452 }
05453
05454 mbcset->range_starts[mbcset->nranges] = start_collseq;
05455 mbcset->range_ends[mbcset->nranges++] = end_collseq;
05456 }
05457
05458
05459 for (ch = 0; ch < SBC_MAX; ch++)
05460 {
05461 uint32_t ch_collseq;
05462
05463
05464
05465 if (nrules == 0)
05466 ch_collseq = collseqmb[ch];
05467 else
05468 ch_collseq = __collseq_table_lookup (collseqwc, __btowc (ch));
05469 if (start_collseq <= ch_collseq && ch_collseq <= end_collseq)
05470 bitset_set (sbcset, ch);
05471 }
05472 return REG_NOERROR;
05473 }
05474
05475
05476
05477
05478
05479
05480
05481 auto inline reg_errcode_t
05482 __attribute ((always_inline))
05483 build_collating_symbol (sbcset, mbcset, coll_sym_alloc, name)
05484 re_charset_t *mbcset;
05485 int *coll_sym_alloc;
05486 bitset_t sbcset;
05487 const unsigned char *name;
05488 {
05489 int32_t elem, idx;
05490 size_t name_len = strlen ((const char *) name);
05491 if (nrules != 0)
05492 {
05493 elem = seek_collating_symbol_entry (name, name_len);
05494 if (symb_table[2 * elem] != 0)
05495 {
05496
05497 idx = symb_table[2 * elem + 1];
05498
05499 idx += 1 + extra[idx];
05500 }
05501 else if (symb_table[2 * elem] == 0 && name_len == 1)
05502 {
05503
05504
05505 bitset_set (sbcset, name[0]);
05506 return REG_NOERROR;
05507 }
05508 else
05509 return REG_ECOLLATE;
05510
05511
05512
05513 if (BE (*coll_sym_alloc == mbcset->ncoll_syms, 0))
05514 {
05515
05516
05517 int new_coll_sym_alloc = 2 * mbcset->ncoll_syms + 1;
05518
05519
05520 int32_t *new_coll_syms = re_realloc (mbcset->coll_syms, int32_t,
05521 new_coll_sym_alloc);
05522 if (BE (new_coll_syms == NULL, 0))
05523 return REG_ESPACE;
05524 mbcset->coll_syms = new_coll_syms;
05525 *coll_sym_alloc = new_coll_sym_alloc;
05526 }
05527 mbcset->coll_syms[mbcset->ncoll_syms++] = idx;
05528 return REG_NOERROR;
05529 }
05530 else
05531 {
05532 if (BE (name_len != 1, 0))
05533 return REG_ECOLLATE;
05534 else
05535 {
05536 bitset_set (sbcset, name[0]);
05537 return REG_NOERROR;
05538 }
05539 }
05540 }
05541 #endif
05542
05543 re_token_t br_token;
05544 re_bitset_ptr_t sbcset;
05545 #ifdef RE_ENABLE_I18N
05546 re_charset_t *mbcset;
05547 int coll_sym_alloc = 0, range_alloc = 0, mbchar_alloc = 0;
05548 int equiv_class_alloc = 0, char_class_alloc = 0;
05549 #endif
05550 int non_match = 0;
05551 bin_tree_t *work_tree;
05552 int token_len;
05553 int first_round = 1;
05554 #ifdef _LIBC
05555 collseqmb = (const unsigned char *)
05556 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB);
05557 nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
05558 if (nrules)
05559 {
05560
05561
05562
05563 collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC);
05564 table_size = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_SYMB_HASH_SIZEMB);
05565 symb_table = (const int32_t *) _NL_CURRENT (LC_COLLATE,
05566 _NL_COLLATE_SYMB_TABLEMB);
05567 extra = (const unsigned char *) _NL_CURRENT (LC_COLLATE,
05568 _NL_COLLATE_SYMB_EXTRAMB);
05569 }
05570 #endif
05571 sbcset = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1);
05572 #ifdef RE_ENABLE_I18N
05573 mbcset = (re_charset_t *) calloc (sizeof (re_charset_t), 1);
05574 #endif
05575 #ifdef RE_ENABLE_I18N
05576 if (BE (sbcset == NULL || mbcset == NULL, 0))
05577 #else
05578 if (BE (sbcset == NULL, 0))
05579 #endif
05580 {
05581 *err = REG_ESPACE;
05582 return NULL;
05583 }
05584
05585 token_len = peek_token_bracket (token, regexp, syntax);
05586 if (BE (token->type == END_OF_RE, 0))
05587 {
05588 *err = REG_BADPAT;
05589 goto parse_bracket_exp_free_return;
05590 }
05591 if (token->type == OP_NON_MATCH_LIST)
05592 {
05593 #ifdef RE_ENABLE_I18N
05594 mbcset->non_match = 1;
05595 #endif
05596 non_match = 1;
05597 if (syntax & RE_HAT_LISTS_NOT_NEWLINE)
05598 bitset_set (sbcset, '\0');
05599 re_string_skip_bytes (regexp, token_len);
05600 token_len = peek_token_bracket (token, regexp, syntax);
05601 if (BE (token->type == END_OF_RE, 0))
05602 {
05603 *err = REG_BADPAT;
05604 goto parse_bracket_exp_free_return;
05605 }
05606 }
05607
05608
05609 if (token->type == OP_CLOSE_BRACKET)
05610 token->type = CHARACTER;
05611
05612 while (1)
05613 {
05614 bracket_elem_t start_elem, end_elem;
05615 unsigned char start_name_buf[BRACKET_NAME_BUF_SIZE];
05616 unsigned char end_name_buf[BRACKET_NAME_BUF_SIZE];
05617 reg_errcode_t ret;
05618 int token_len2 = 0, is_range_exp = 0;
05619 re_token_t token2;
05620
05621 start_elem.opr.name = start_name_buf;
05622 ret = parse_bracket_element (&start_elem, regexp, token, token_len, dfa,
05623 syntax, first_round);
05624 if (BE (ret != REG_NOERROR, 0))
05625 {
05626 *err = ret;
05627 goto parse_bracket_exp_free_return;
05628 }
05629 first_round = 0;
05630
05631
05632 token_len = peek_token_bracket (token, regexp, syntax);
05633
05634
05635 if (start_elem.type != CHAR_CLASS && start_elem.type != EQUIV_CLASS)
05636 {
05637 if (BE (token->type == END_OF_RE, 0))
05638 {
05639 *err = REG_EBRACK;
05640 goto parse_bracket_exp_free_return;
05641 }
05642 if (token->type == OP_CHARSET_RANGE)
05643 {
05644 re_string_skip_bytes (regexp, token_len);
05645 token_len2 = peek_token_bracket (&token2, regexp, syntax);
05646 if (BE (token2.type == END_OF_RE, 0))
05647 {
05648 *err = REG_EBRACK;
05649 goto parse_bracket_exp_free_return;
05650 }
05651 if (token2.type == OP_CLOSE_BRACKET)
05652 {
05653
05654 re_string_skip_bytes (regexp, -token_len);
05655 token->type = CHARACTER;
05656 }
05657 else
05658 is_range_exp = 1;
05659 }
05660 }
05661
05662 if (is_range_exp == 1)
05663 {
05664 end_elem.opr.name = end_name_buf;
05665 ret = parse_bracket_element (&end_elem, regexp, &token2, token_len2,
05666 dfa, syntax, 1);
05667 if (BE (ret != REG_NOERROR, 0))
05668 {
05669 *err = ret;
05670 goto parse_bracket_exp_free_return;
05671 }
05672
05673 token_len = peek_token_bracket (token, regexp, syntax);
05674
05675 #ifdef _LIBC
05676 *err = build_range_exp (sbcset, mbcset, &range_alloc,
05677 &start_elem, &end_elem);
05678 #else
05679 # ifdef RE_ENABLE_I18N
05680 *err = build_range_exp (sbcset,
05681 dfa->mb_cur_max > 1 ? mbcset : NULL,
05682 &range_alloc, &start_elem, &end_elem);
05683 # else
05684 *err = build_range_exp (sbcset, &start_elem, &end_elem);
05685 # endif
05686 #endif
05687 if (BE (*err != REG_NOERROR, 0))
05688 goto parse_bracket_exp_free_return;
05689 }
05690 else
05691 {
05692 switch (start_elem.type)
05693 {
05694 case SB_CHAR:
05695 bitset_set (sbcset, start_elem.opr.ch);
05696 break;
05697 #ifdef RE_ENABLE_I18N
05698 case MB_CHAR:
05699
05700 if (BE (mbchar_alloc == mbcset->nmbchars, 0))
05701 {
05702 wchar_t *new_mbchars;
05703
05704
05705 mbchar_alloc = 2 * mbcset->nmbchars + 1;
05706
05707 new_mbchars = re_realloc (mbcset->mbchars, wchar_t,
05708 mbchar_alloc);
05709 if (BE (new_mbchars == NULL, 0))
05710 goto parse_bracket_exp_espace;
05711 mbcset->mbchars = new_mbchars;
05712 }
05713 mbcset->mbchars[mbcset->nmbchars++] = start_elem.opr.wch;
05714 break;
05715 #endif
05716 case EQUIV_CLASS:
05717 *err = build_equiv_class (sbcset,
05718 #ifdef RE_ENABLE_I18N
05719 mbcset, &equiv_class_alloc,
05720 #endif
05721 start_elem.opr.name);
05722 if (BE (*err != REG_NOERROR, 0))
05723 goto parse_bracket_exp_free_return;
05724 break;
05725 case COLL_SYM:
05726 *err = build_collating_symbol (sbcset,
05727 #ifdef RE_ENABLE_I18N
05728 mbcset, &coll_sym_alloc,
05729 #endif
05730 start_elem.opr.name);
05731 if (BE (*err != REG_NOERROR, 0))
05732 goto parse_bracket_exp_free_return;
05733 break;
05734 case CHAR_CLASS:
05735 *err = build_charclass (regexp->trans, sbcset,
05736 #ifdef RE_ENABLE_I18N
05737 mbcset, &char_class_alloc,
05738 #endif
05739 start_elem.opr.name, syntax);
05740 if (BE (*err != REG_NOERROR, 0))
05741 goto parse_bracket_exp_free_return;
05742 break;
05743 default:
05744 assert (0);
05745 break;
05746 }
05747 }
05748 if (BE (token->type == END_OF_RE, 0))
05749 {
05750 *err = REG_EBRACK;
05751 goto parse_bracket_exp_free_return;
05752 }
05753 if (token->type == OP_CLOSE_BRACKET)
05754 break;
05755 }
05756
05757 re_string_skip_bytes (regexp, token_len);
05758
05759
05760 if (non_match)
05761 bitset_not (sbcset);
05762
05763 #ifdef RE_ENABLE_I18N
05764
05765 if (dfa->mb_cur_max > 1)
05766 bitset_mask (sbcset, dfa->sb_char);
05767
05768 if (mbcset->nmbchars || mbcset->ncoll_syms || mbcset->nequiv_classes
05769 || mbcset->nranges || (dfa->mb_cur_max > 1 && (mbcset->nchar_classes
05770 || mbcset->non_match)))
05771 {
05772 bin_tree_t *mbc_tree;
05773 int sbc_idx;
05774
05775 dfa->has_mb_node = 1;
05776 br_token.type = COMPLEX_BRACKET;
05777 br_token.opr.mbcset = mbcset;
05778 mbc_tree = create_token_tree (dfa, NULL, NULL, &br_token);
05779 if (BE (mbc_tree == NULL, 0))
05780 goto parse_bracket_exp_espace;
05781 for (sbc_idx = 0; sbc_idx < BITSET_WORDS; ++sbc_idx)
05782 if (sbcset[sbc_idx])
05783 break;
05784
05785
05786 if (sbc_idx < BITSET_WORDS)
05787 {
05788
05789 br_token.type = SIMPLE_BRACKET;
05790 br_token.opr.sbcset = sbcset;
05791 work_tree = create_token_tree (dfa, NULL, NULL, &br_token);
05792 if (BE (work_tree == NULL, 0))
05793 goto parse_bracket_exp_espace;
05794
05795
05796 work_tree = create_tree (dfa, work_tree, mbc_tree, OP_ALT);
05797 if (BE (work_tree == NULL, 0))
05798 goto parse_bracket_exp_espace;
05799 }
05800 else
05801 {
05802 re_free (sbcset);
05803 work_tree = mbc_tree;
05804 }
05805 }
05806 else
05807 #endif
05808 {
05809 #ifdef RE_ENABLE_I18N
05810 free_charset (mbcset);
05811 #endif
05812
05813 br_token.type = SIMPLE_BRACKET;
05814 br_token.opr.sbcset = sbcset;
05815 work_tree = create_token_tree (dfa, NULL, NULL, &br_token);
05816 if (BE (work_tree == NULL, 0))
05817 goto parse_bracket_exp_espace;
05818 }
05819 return work_tree;
05820
05821 parse_bracket_exp_espace:
05822 *err = REG_ESPACE;
05823 parse_bracket_exp_free_return:
05824 re_free (sbcset);
05825 #ifdef RE_ENABLE_I18N
05826 free_charset (mbcset);
05827 #endif
05828 return NULL;
05829 }
05830
05831
05832
05833 static reg_errcode_t
05834 parse_bracket_element (bracket_elem_t *elem, re_string_t *regexp,
05835 re_token_t *token, int token_len, re_dfa_t *dfa,
05836 reg_syntax_t syntax, int accept_hyphen)
05837 {
05838 #ifdef RE_ENABLE_I18N
05839 int cur_char_size;
05840 cur_char_size = re_string_char_size_at (regexp, re_string_cur_idx (regexp));
05841 if (cur_char_size > 1)
05842 {
05843 elem->type = MB_CHAR;
05844 elem->opr.wch = re_string_wchar_at (regexp, re_string_cur_idx (regexp));
05845 re_string_skip_bytes (regexp, cur_char_size);
05846 return REG_NOERROR;
05847 }
05848 #endif
05849 re_string_skip_bytes (regexp, token_len);
05850 if (token->type == OP_OPEN_COLL_ELEM || token->type == OP_OPEN_CHAR_CLASS
05851 || token->type == OP_OPEN_EQUIV_CLASS)
05852 return parse_bracket_symbol (elem, regexp, token);
05853 if (BE (token->type == OP_CHARSET_RANGE, 0) && !accept_hyphen)
05854 {
05855
05856
05857 re_token_t token2;
05858 (void) peek_token_bracket (&token2, regexp, syntax);
05859 if (token2.type != OP_CLOSE_BRACKET)
05860
05861
05862 return REG_ERANGE;
05863 }
05864 elem->type = SB_CHAR;
05865 elem->opr.ch = token->opr.c;
05866 return REG_NOERROR;
05867 }
05868
05869
05870
05871
05872
05873 static reg_errcode_t
05874 parse_bracket_symbol (bracket_elem_t *elem, re_string_t *regexp,
05875 re_token_t *token)
05876 {
05877 unsigned char ch, delim = token->opr.c;
05878 int i = 0;
05879 if (re_string_eoi(regexp))
05880 return REG_EBRACK;
05881 for (;; ++i)
05882 {
05883 if (i >= BRACKET_NAME_BUF_SIZE)
05884 return REG_EBRACK;
05885 if (token->type == OP_OPEN_CHAR_CLASS)
05886 ch = re_string_fetch_byte_case (regexp);
05887 else
05888 ch = re_string_fetch_byte (regexp);
05889 if (re_string_eoi(regexp))
05890 return REG_EBRACK;
05891 if (ch == delim && re_string_peek_byte (regexp, 0) == ']')
05892 break;
05893 elem->opr.name[i] = ch;
05894 }
05895 re_string_skip_bytes (regexp, 1);
05896 elem->opr.name[i] = '\0';
05897 switch (token->type)
05898 {
05899 case OP_OPEN_COLL_ELEM:
05900 elem->type = COLL_SYM;
05901 break;
05902 case OP_OPEN_EQUIV_CLASS:
05903 elem->type = EQUIV_CLASS;
05904 break;
05905 case OP_OPEN_CHAR_CLASS:
05906 elem->type = CHAR_CLASS;
05907 break;
05908 default:
05909 break;
05910 }
05911 return REG_NOERROR;
05912 }
05913
05914
05915
05916
05917
05918
05919
05920 static reg_errcode_t
05921 #ifdef RE_ENABLE_I18N
05922 build_equiv_class (bitset_t sbcset, re_charset_t *mbcset,
05923 int *equiv_class_alloc, const unsigned char *name)
05924 #else
05925 build_equiv_class (bitset_t sbcset, const unsigned char *name)
05926 #endif
05927 {
05928 #ifdef _LIBC
05929 uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
05930 if (nrules != 0)
05931 {
05932 const int32_t *table, *indirect;
05933 const unsigned char *weights, *extra, *cp;
05934 unsigned char char_buf[2];
05935 int32_t idx1, idx2;
05936 unsigned int ch;
05937 size_t len;
05938
05939 # include <locale/weight.h>
05940
05941 cp = name;
05942 table = (const int32_t *) _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
05943 weights = (const unsigned char *) _NL_CURRENT (LC_COLLATE,
05944 _NL_COLLATE_WEIGHTMB);
05945 extra = (const unsigned char *) _NL_CURRENT (LC_COLLATE,
05946 _NL_COLLATE_EXTRAMB);
05947 indirect = (const int32_t *) _NL_CURRENT (LC_COLLATE,
05948 _NL_COLLATE_INDIRECTMB);
05949 idx1 = findidx (&cp);
05950 if (BE (idx1 == 0 || cp < name + strlen ((const char *) name), 0))
05951
05952 return REG_ECOLLATE;
05953
05954
05955 char_buf[1] = (unsigned char) '\0';
05956 len = weights[idx1];
05957 for (ch = 0; ch < SBC_MAX; ++ch)
05958 {
05959 char_buf[0] = ch;
05960 cp = char_buf;
05961 idx2 = findidx (&cp);
05962
05963
05964
05965 if (idx2 == 0)
05966
05967 continue;
05968 if (len == weights[idx2])
05969 {
05970 int cnt = 0;
05971 while (cnt <= len &&
05972 weights[idx1 + 1 + cnt] == weights[idx2 + 1 + cnt])
05973 ++cnt;
05974
05975 if (cnt > len)
05976 bitset_set (sbcset, ch);
05977 }
05978 }
05979
05980 if (BE (*equiv_class_alloc == mbcset->nequiv_classes, 0))
05981 {
05982
05983
05984 int new_equiv_class_alloc = 2 * mbcset->nequiv_classes + 1;
05985
05986 int32_t *new_equiv_classes = re_realloc (mbcset->equiv_classes,
05987 int32_t,
05988 new_equiv_class_alloc);
05989 if (BE (new_equiv_classes == NULL, 0))
05990 return REG_ESPACE;
05991 mbcset->equiv_classes = new_equiv_classes;
05992 *equiv_class_alloc = new_equiv_class_alloc;
05993 }
05994 mbcset->equiv_classes[mbcset->nequiv_classes++] = idx1;
05995 }
05996 else
05997 #endif
05998 {
05999 if (BE (strlen ((const char *) name) != 1, 0))
06000 return REG_ECOLLATE;
06001 bitset_set (sbcset, *name);
06002 }
06003 return REG_NOERROR;
06004 }
06005
06006
06007
06008
06009
06010
06011
06012 static reg_errcode_t
06013 #ifdef RE_ENABLE_I18N
06014 build_charclass (RE_TRANSLATE_TYPE trans, bitset_t sbcset,
06015 re_charset_t *mbcset, int *char_class_alloc,
06016 const unsigned char *class_name, reg_syntax_t syntax)
06017 #else
06018 build_charclass (RE_TRANSLATE_TYPE trans, bitset_t sbcset,
06019 const unsigned char *class_name, reg_syntax_t syntax)
06020 #endif
06021 {
06022 int i;
06023 const char *name = (const char *) class_name;
06024
06025
06026
06027 if ((syntax & RE_ICASE)
06028 && (strcmp (name, "upper") == 0 || strcmp (name, "lower") == 0))
06029 name = "alpha";
06030
06031 #ifdef RE_ENABLE_I18N
06032
06033 if (BE (*char_class_alloc == mbcset->nchar_classes, 0))
06034 {
06035
06036
06037 int new_char_class_alloc = 2 * mbcset->nchar_classes + 1;
06038
06039 wctype_t *new_char_classes = re_realloc (mbcset->char_classes, wctype_t,
06040 new_char_class_alloc);
06041 if (BE (new_char_classes == NULL, 0))
06042 return REG_ESPACE;
06043 mbcset->char_classes = new_char_classes;
06044 *char_class_alloc = new_char_class_alloc;
06045 }
06046 mbcset->char_classes[mbcset->nchar_classes++] = __wctype (name);
06047 #endif
06048
06049 #define BUILD_CHARCLASS_LOOP(ctype_func) \
06050 do { \
06051 if (BE (trans != NULL, 0)) \
06052 { \
06053 for (i = 0; i < SBC_MAX; ++i) \
06054 if (ctype_func (i)) \
06055 bitset_set (sbcset, trans[i]); \
06056 } \
06057 else \
06058 { \
06059 for (i = 0; i < SBC_MAX; ++i) \
06060 if (ctype_func (i)) \
06061 bitset_set (sbcset, i); \
06062 } \
06063 } while (0)
06064
06065 if (strcmp (name, "alnum") == 0)
06066 BUILD_CHARCLASS_LOOP (isalnum);
06067 else if (strcmp (name, "cntrl") == 0)
06068 BUILD_CHARCLASS_LOOP (iscntrl);
06069 else if (strcmp (name, "lower") == 0)
06070 BUILD_CHARCLASS_LOOP (islower);
06071 else if (strcmp (name, "space") == 0)
06072 BUILD_CHARCLASS_LOOP (isspace);
06073 else if (strcmp (name, "alpha") == 0)
06074 BUILD_CHARCLASS_LOOP (isalpha);
06075 else if (strcmp (name, "digit") == 0)
06076 BUILD_CHARCLASS_LOOP (isdigit);
06077 else if (strcmp (name, "print") == 0)
06078 BUILD_CHARCLASS_LOOP (isprint);
06079 else if (strcmp (name, "upper") == 0)
06080 BUILD_CHARCLASS_LOOP (isupper);
06081 else if (strcmp (name, "blank") == 0)
06082 BUILD_CHARCLASS_LOOP (isblank);
06083 else if (strcmp (name, "graph") == 0)
06084 BUILD_CHARCLASS_LOOP (isgraph);
06085 else if (strcmp (name, "punct") == 0)
06086 BUILD_CHARCLASS_LOOP (ispunct);
06087 else if (strcmp (name, "xdigit") == 0)
06088 BUILD_CHARCLASS_LOOP (isxdigit);
06089 else
06090 return REG_ECTYPE;
06091
06092 return REG_NOERROR;
06093 }
06094
06095 static bin_tree_t *
06096 build_charclass_op (re_dfa_t *dfa, RE_TRANSLATE_TYPE trans,
06097 const unsigned char *class_name,
06098 const unsigned char *extra, int non_match,
06099 reg_errcode_t *err)
06100 {
06101 re_bitset_ptr_t sbcset;
06102 #ifdef RE_ENABLE_I18N
06103 re_charset_t *mbcset;
06104 int alloc = 0;
06105 #endif
06106 reg_errcode_t ret;
06107 re_token_t br_token;
06108 bin_tree_t *tree;
06109
06110 sbcset = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1);
06111 #ifdef RE_ENABLE_I18N
06112 mbcset = (re_charset_t *) calloc (sizeof (re_charset_t), 1);
06113 #endif
06114
06115 #ifdef RE_ENABLE_I18N
06116 if (BE (sbcset == NULL || mbcset == NULL, 0))
06117 #else
06118 if (BE (sbcset == NULL, 0))
06119 #endif
06120 {
06121 *err = REG_ESPACE;
06122 return NULL;
06123 }
06124
06125 if (non_match)
06126 {
06127 #ifdef RE_ENABLE_I18N
06128
06129
06130
06131
06132 mbcset->non_match = 1;
06133 #endif
06134 }
06135
06136
06137 ret = build_charclass (trans, sbcset,
06138 #ifdef RE_ENABLE_I18N
06139 mbcset, &alloc,
06140 #endif
06141 class_name, 0);
06142
06143 if (BE (ret != REG_NOERROR, 0))
06144 {
06145 re_free (sbcset);
06146 #ifdef RE_ENABLE_I18N
06147 free_charset (mbcset);
06148 #endif
06149 *err = ret;
06150 return NULL;
06151 }
06152
06153 for (; *extra; extra++)
06154 bitset_set (sbcset, *extra);
06155
06156
06157 if (non_match)
06158 bitset_not (sbcset);
06159
06160 #ifdef RE_ENABLE_I18N
06161
06162 if (dfa->mb_cur_max > 1)
06163 bitset_mask (sbcset, dfa->sb_char);
06164 #endif
06165
06166
06167 br_token.type = SIMPLE_BRACKET;
06168 br_token.opr.sbcset = sbcset;
06169 tree = create_token_tree (dfa, NULL, NULL, &br_token);
06170 if (BE (tree == NULL, 0))
06171 goto build_word_op_espace;
06172
06173 #ifdef RE_ENABLE_I18N
06174 if (dfa->mb_cur_max > 1)
06175 {
06176 bin_tree_t *mbc_tree;
06177
06178 br_token.type = COMPLEX_BRACKET;
06179 br_token.opr.mbcset = mbcset;
06180 dfa->has_mb_node = 1;
06181 mbc_tree = create_token_tree (dfa, NULL, NULL, &br_token);
06182 if (BE (mbc_tree == NULL, 0))
06183 goto build_word_op_espace;
06184
06185 tree = create_tree (dfa, tree, mbc_tree, OP_ALT);
06186 if (BE (mbc_tree != NULL, 1))
06187 return tree;
06188 }
06189 else
06190 {
06191 free_charset (mbcset);
06192 return tree;
06193 }
06194 #else
06195 return tree;
06196 #endif
06197
06198 build_word_op_espace:
06199 re_free (sbcset);
06200 #ifdef RE_ENABLE_I18N
06201 free_charset (mbcset);
06202 #endif
06203 *err = REG_ESPACE;
06204 return NULL;
06205 }
06206
06207
06208
06209
06210
06211
06212 static int
06213 fetch_number (re_string_t *input, re_token_t *token, reg_syntax_t syntax)
06214 {
06215 int num = -1;
06216 unsigned char c;
06217 while (1)
06218 {
06219 fetch_token (token, input, syntax);
06220 c = token->opr.c;
06221 if (BE (token->type == END_OF_RE, 0))
06222 return -2;
06223 if (token->type == OP_CLOSE_DUP_NUM || c == ',')
06224 break;
06225 num = ((token->type != CHARACTER || c < '0' || '9' < c || num == -2)
06226 ? -2 : ((num == -1) ? c - '0' : num * 10 + c - '0'));
06227 num = (num > RE_DUP_MAX) ? -2 : num;
06228 }
06229 return num;
06230 }
06231
06232 #ifdef RE_ENABLE_I18N
06233 static void
06234 free_charset (re_charset_t *cset)
06235 {
06236 re_free (cset->mbchars);
06237 # ifdef _LIBC
06238 re_free (cset->coll_syms);
06239 re_free (cset->equiv_classes);
06240 re_free (cset->range_starts);
06241 re_free (cset->range_ends);
06242 # endif
06243 re_free (cset->char_classes);
06244 re_free (cset);
06245 }
06246 #endif
06247
06248
06249
06250
06251
06252 static bin_tree_t *
06253 create_tree (re_dfa_t *dfa, bin_tree_t *left, bin_tree_t *right,
06254 re_token_type_t type)
06255 {
06256 re_token_t t;
06257 t.type = type;
06258 return create_token_tree (dfa, left, right, &t);
06259 }
06260
06261 static bin_tree_t *
06262 create_token_tree (re_dfa_t *dfa, bin_tree_t *left, bin_tree_t *right,
06263 const re_token_t *token)
06264 {
06265 bin_tree_t *tree;
06266 if (BE (dfa->str_tree_storage_idx == BIN_TREE_STORAGE_SIZE, 0))
06267 {
06268 bin_tree_storage_t *storage = re_malloc (bin_tree_storage_t, 1);
06269
06270 if (storage == NULL)
06271 return NULL;
06272 storage->next = dfa->str_tree_storage;
06273 dfa->str_tree_storage = storage;
06274 dfa->str_tree_storage_idx = 0;
06275 }
06276 tree = &dfa->str_tree_storage->data[dfa->str_tree_storage_idx++];
06277
06278 tree->parent = NULL;
06279 tree->left = left;
06280 tree->right = right;
06281 tree->token = *token;
06282 tree->token.duplicated = 0;
06283 tree->token.opt_subexp = 0;
06284 tree->first = NULL;
06285 tree->next = NULL;
06286 tree->node_idx = -1;
06287
06288 if (left != NULL)
06289 left->parent = tree;
06290 if (right != NULL)
06291 right->parent = tree;
06292 return tree;
06293 }
06294
06295
06296
06297
06298 static reg_errcode_t
06299 mark_opt_subexp (void *extra, bin_tree_t *node)
06300 {
06301 int idx = (int) (long) extra;
06302 if (node->token.type == SUBEXP && node->token.opr.idx == idx)
06303 node->token.opt_subexp = 1;
06304
06305 return REG_NOERROR;
06306 }
06307
06308
06309
06310 static void
06311 free_token (re_token_t *node)
06312 {
06313 #ifdef RE_ENABLE_I18N
06314 if (node->type == COMPLEX_BRACKET && node->duplicated == 0)
06315 free_charset (node->opr.mbcset);
06316 else
06317 #endif
06318 if (node->type == SIMPLE_BRACKET && node->duplicated == 0)
06319 re_free (node->opr.sbcset);
06320 }
06321
06322
06323
06324
06325 static reg_errcode_t
06326 free_tree (void *extra, bin_tree_t *node)
06327 {
06328 free_token (&node->token);
06329 return REG_NOERROR;
06330 }
06331
06332
06333
06334
06335
06336
06337
06338 static bin_tree_t *
06339 duplicate_tree (const bin_tree_t *root, re_dfa_t *dfa)
06340 {
06341 const bin_tree_t *node;
06342 bin_tree_t *dup_root;
06343 bin_tree_t **p_new = &dup_root, *dup_node = root->parent;
06344
06345 for (node = root; ; )
06346 {
06347
06348 *p_new = create_token_tree (dfa, NULL, NULL, &node->token);
06349 if (*p_new == NULL)
06350 return NULL;
06351 (*p_new)->parent = dup_node;
06352 (*p_new)->token.duplicated = 1;
06353 dup_node = *p_new;
06354
06355
06356 if (node->left)
06357 {
06358 node = node->left;
06359 p_new = &dup_node->left;
06360 }
06361 else
06362 {
06363 const bin_tree_t *prev = NULL;
06364 while (node->right == prev || node->right == NULL)
06365 {
06366 prev = node;
06367 node = node->parent;
06368 dup_node = dup_node->parent;
06369 if (!node)
06370 return dup_root;
06371 }
06372 node = node->right;
06373 p_new = &dup_node->right;
06374 }
06375 }
06376 }
06377
06378
06379
06380
06381
06382
06383
06384
06385 static reg_errcode_t match_ctx_init (re_match_context_t *cache, int eflags,
06386 int n) internal_function;
06387 static void match_ctx_clean (re_match_context_t *mctx) internal_function;
06388 static void match_ctx_free (re_match_context_t *cache) internal_function;
06389 static reg_errcode_t match_ctx_add_entry (re_match_context_t *cache, int node,
06390 int str_idx, int from, int to)
06391 internal_function;
06392 static int search_cur_bkref_entry (const re_match_context_t *mctx, int str_idx)
06393 internal_function;
06394 static reg_errcode_t match_ctx_add_subtop (re_match_context_t *mctx, int node,
06395 int str_idx) internal_function;
06396 static re_sub_match_last_t * match_ctx_add_sublast (re_sub_match_top_t *subtop,
06397 int node, int str_idx)
06398 internal_function;
06399 static void sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts,
06400 re_dfastate_t **limited_sts, int last_node,
06401 int last_str_idx)
06402 internal_function;
06403 static reg_errcode_t re_search_internal (const regex_t *preg,
06404 const char *string, int length,
06405 int start, int range, int stop,
06406 size_t nmatch, regmatch_t pmatch[],
06407 int eflags) internal_function;
06408 static int re_search_2_stub (struct re_pattern_buffer *bufp,
06409 const char *string1, int length1,
06410 const char *string2, int length2,
06411 int start, int range, struct re_registers *regs,
06412 int stop, int ret_len) internal_function;
06413 static int re_search_stub (struct re_pattern_buffer *bufp,
06414 const char *string, int length, int start,
06415 int range, int stop, struct re_registers *regs,
06416 int ret_len) internal_function;
06417 static unsigned re_copy_regs (struct re_registers *regs, regmatch_t *pmatch,
06418 int nregs, int regs_allocated) internal_function;
06419 static reg_errcode_t prune_impossible_nodes (re_match_context_t *mctx)
06420 internal_function;
06421 static int check_matching (re_match_context_t *mctx, int fl_longest_match,
06422 int *p_match_first) internal_function;
06423 static int check_halt_state_context (const re_match_context_t *mctx,
06424 const re_dfastate_t *state, int idx)
06425 internal_function;
06426 static void update_regs (const re_dfa_t *dfa, regmatch_t *pmatch,
06427 regmatch_t *prev_idx_match, int cur_node,
06428 int cur_idx, int nmatch) internal_function;
06429 static reg_errcode_t push_fail_stack (struct re_fail_stack_t *fs,
06430 int str_idx, int dest_node, int nregs,
06431 regmatch_t *regs,
06432 re_node_set *eps_via_nodes)
06433 internal_function;
06434 static reg_errcode_t set_regs (const regex_t *preg,
06435 const re_match_context_t *mctx,
06436 size_t nmatch, regmatch_t *pmatch,
06437 int fl_backtrack) internal_function;
06438 static reg_errcode_t free_fail_stack_return (struct re_fail_stack_t *fs)
06439 internal_function;
06440
06441 #ifdef RE_ENABLE_I18N
06442 static int sift_states_iter_mb (const re_match_context_t *mctx,
06443 re_sift_context_t *sctx,
06444 int node_idx, int str_idx, int max_str_idx)
06445 internal_function;
06446 #endif
06447 static reg_errcode_t sift_states_backward (const re_match_context_t *mctx,
06448 re_sift_context_t *sctx)
06449 internal_function;
06450 static reg_errcode_t build_sifted_states (const re_match_context_t *mctx,
06451 re_sift_context_t *sctx, int str_idx,
06452 re_node_set *cur_dest)
06453 internal_function;
06454 static reg_errcode_t update_cur_sifted_state (const re_match_context_t *mctx,
06455 re_sift_context_t *sctx,
06456 int str_idx,
06457 re_node_set *dest_nodes)
06458 internal_function;
06459 static reg_errcode_t add_epsilon_src_nodes (const re_dfa_t *dfa,
06460 re_node_set *dest_nodes,
06461 const re_node_set *candidates)
06462 internal_function;
06463 static int check_dst_limits (const re_match_context_t *mctx,
06464 re_node_set *limits,
06465 int dst_node, int dst_idx, int src_node,
06466 int src_idx) internal_function;
06467 static int check_dst_limits_calc_pos_1 (const re_match_context_t *mctx,
06468 int boundaries, int subexp_idx,
06469 int from_node, int bkref_idx)
06470 internal_function;
06471 static int check_dst_limits_calc_pos (const re_match_context_t *mctx,
06472 int limit, int subexp_idx,
06473 int node, int str_idx,
06474 int bkref_idx) internal_function;
06475 static reg_errcode_t check_subexp_limits (const re_dfa_t *dfa,
06476 re_node_set *dest_nodes,
06477 const re_node_set *candidates,
06478 re_node_set *limits,
06479 struct re_backref_cache_entry *bkref_ents,
06480 int str_idx) internal_function;
06481 static reg_errcode_t sift_states_bkref (const re_match_context_t *mctx,
06482 re_sift_context_t *sctx,
06483 int str_idx, const re_node_set *candidates)
06484 internal_function;
06485 static reg_errcode_t merge_state_array (const re_dfa_t *dfa,
06486 re_dfastate_t **dst,
06487 re_dfastate_t **src, int num)
06488 internal_function;
06489 static re_dfastate_t *find_recover_state (reg_errcode_t *err,
06490 re_match_context_t *mctx) internal_function;
06491 static re_dfastate_t *transit_state (reg_errcode_t *err,
06492 re_match_context_t *mctx,
06493 re_dfastate_t *state) internal_function;
06494 static re_dfastate_t *merge_state_with_log (reg_errcode_t *err,
06495 re_match_context_t *mctx,
06496 re_dfastate_t *next_state)
06497 internal_function;
06498 static reg_errcode_t check_subexp_matching_top (re_match_context_t *mctx,
06499 re_node_set *cur_nodes,
06500 int str_idx) internal_function;
06501 #if 0
06502 static re_dfastate_t *transit_state_sb (reg_errcode_t *err,
06503 re_match_context_t *mctx,
06504 re_dfastate_t *pstate)
06505 internal_function;
06506 #endif
06507 #ifdef RE_ENABLE_I18N
06508 static reg_errcode_t transit_state_mb (re_match_context_t *mctx,
06509 re_dfastate_t *pstate)
06510 internal_function;
06511 #endif
06512 static reg_errcode_t transit_state_bkref (re_match_context_t *mctx,
06513 const re_node_set *nodes)
06514 internal_function;
06515 static reg_errcode_t get_subexp (re_match_context_t *mctx,
06516 int bkref_node, int bkref_str_idx)
06517 internal_function;
06518 static reg_errcode_t get_subexp_sub (re_match_context_t *mctx,
06519 const re_sub_match_top_t *sub_top,
06520 re_sub_match_last_t *sub_last,
06521 int bkref_node, int bkref_str)
06522 internal_function;
06523 static int find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes,
06524 int subexp_idx, int type) internal_function;
06525 static reg_errcode_t check_arrival (re_match_context_t *mctx,
06526 state_array_t *path, int top_node,
06527 int top_str, int last_node, int last_str,
06528 int type) internal_function;
06529 static reg_errcode_t check_arrival_add_next_nodes (re_match_context_t *mctx,
06530 int str_idx,
06531 re_node_set *cur_nodes,
06532 re_node_set *next_nodes)
06533 internal_function;
06534 static reg_errcode_t check_arrival_expand_ecl (const re_dfa_t *dfa,
06535 re_node_set *cur_nodes,
06536 int ex_subexp, int type)
06537 internal_function;
06538 static reg_errcode_t check_arrival_expand_ecl_sub (const re_dfa_t *dfa,
06539 re_node_set *dst_nodes,
06540 int target, int ex_subexp,
06541 int type) internal_function;
06542 static reg_errcode_t expand_bkref_cache (re_match_context_t *mctx,
06543 re_node_set *cur_nodes, int cur_str,
06544 int subexp_num, int type)
06545 internal_function;
06546 static int build_trtable (const re_dfa_t *dfa,
06547 re_dfastate_t *state) internal_function;
06548 #ifdef RE_ENABLE_I18N
06549 static int check_node_accept_bytes (const re_dfa_t *dfa, int node_idx,
06550 const re_string_t *input, int idx)
06551 internal_function;
06552 # ifdef _LIBC
06553 static unsigned int find_collation_sequence_value (const unsigned char *mbs,
06554 size_t name_len)
06555 internal_function;
06556 # endif
06557 #endif
06558 static int group_nodes_into_DFAstates (const re_dfa_t *dfa,
06559 const re_dfastate_t *state,
06560 re_node_set *states_node,
06561 bitset_t *states_ch) internal_function;
06562 static int check_node_accept (const re_match_context_t *mctx,
06563 const re_token_t *node, int idx)
06564 internal_function;
06565 static reg_errcode_t extend_buffers (re_match_context_t *mctx)
06566 internal_function;
06567
06568
06569
06570
06571
06572
06573
06574
06575
06576
06577
06578
06579
06580
06581
06582
06583
06584 int
06585 regexec (preg, string, nmatch, pmatch, eflags)
06586 const regex_t *__restrict preg;
06587 const char *__restrict string;
06588 size_t nmatch;
06589 regmatch_t pmatch[];
06590 int eflags;
06591 {
06592 reg_errcode_t err;
06593 int start, length;
06594 re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
06595
06596 if (eflags & ~(REG_NOTBOL | REG_NOTEOL | REG_STARTEND))
06597 return REG_BADPAT;
06598
06599 if (eflags & REG_STARTEND)
06600 {
06601 start = pmatch[0].rm_so;
06602 length = pmatch[0].rm_eo;
06603 }
06604 else
06605 {
06606 start = 0;
06607 length = strlen (string);
06608 }
06609
06610 __libc_lock_lock (dfa->lock);
06611 if (preg->no_sub)
06612 err = re_search_internal (preg, string, length, start, length - start,
06613 length, 0, NULL, eflags);
06614 else
06615 err = re_search_internal (preg, string, length, start, length - start,
06616 length, nmatch, pmatch, eflags);
06617 __libc_lock_unlock (dfa->lock);
06618 return err != REG_NOERROR;
06619 }
06620
06621 #ifdef _LIBC
06622 # include <shlib-compat.h>
06623 versioned_symbol (libc, __regexec, regexec, GLIBC_2_3_4);
06624
06625 # if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4)
06626 __typeof__ (__regexec) __compat_regexec;
06627
06628 int
06629 attribute_compat_text_section
06630 __compat_regexec (const regex_t *__restrict preg,
06631 const char *__restrict string, size_t nmatch,
06632 regmatch_t pmatch[], int eflags)
06633 {
06634 return regexec (preg, string, nmatch, pmatch,
06635 eflags & (REG_NOTBOL | REG_NOTEOL));
06636 }
06637 compat_symbol (libc, __compat_regexec, regexec, GLIBC_2_0);
06638 # endif
06639 #endif
06640
06641
06642
06643
06644
06645
06646
06647
06648
06649
06650
06651
06652
06653
06654
06655
06656
06657
06658
06659
06660
06661
06662
06663
06664
06665
06666
06667
06668
06669
06670 int
06671 re_match (bufp, string, length, start, regs)
06672 struct re_pattern_buffer *bufp;
06673 const char *string;
06674 int length, start;
06675 struct re_registers *regs;
06676 {
06677 return re_search_stub (bufp, string, length, start, 0, length, regs, 1);
06678 }
06679 #ifdef _LIBC
06680 weak_alias (__re_match, re_match)
06681 #endif
06682
06683 int
06684 re_search (bufp, string, length, start, range, regs)
06685 struct re_pattern_buffer *bufp;
06686 const char *string;
06687 int length, start, range;
06688 struct re_registers *regs;
06689 {
06690 return re_search_stub (bufp, string, length, start, range, length, regs, 0);
06691 }
06692 #ifdef _LIBC
06693 weak_alias (__re_search, re_search)
06694 #endif
06695
06696 int
06697 re_match_2 (bufp, string1, length1, string2, length2, start, regs, stop)
06698 struct re_pattern_buffer *bufp;
06699 const char *string1, *string2;
06700 int length1, length2, start, stop;
06701 struct re_registers *regs;
06702 {
06703 return re_search_2_stub (bufp, string1, length1, string2, length2,
06704 start, 0, regs, stop, 1);
06705 }
06706 #ifdef _LIBC
06707 weak_alias (__re_match_2, re_match_2)
06708 #endif
06709
06710 int
06711 re_search_2 (bufp, string1, length1, string2, length2, start, range, regs, stop)
06712 struct re_pattern_buffer *bufp;
06713 const char *string1, *string2;
06714 int length1, length2, start, range, stop;
06715 struct re_registers *regs;
06716 {
06717 return re_search_2_stub (bufp, string1, length1, string2, length2,
06718 start, range, regs, stop, 0);
06719 }
06720 #ifdef _LIBC
06721 weak_alias (__re_search_2, re_search_2)
06722 #endif
06723
06724 static int
06725 re_search_2_stub (bufp, string1, length1, string2, length2, start, range, regs,
06726 stop, ret_len)
06727 struct re_pattern_buffer *bufp;
06728 const char *string1, *string2;
06729 int length1, length2, start, range, stop, ret_len;
06730 struct re_registers *regs;
06731 {
06732 const char *str;
06733 int rval;
06734 int len = length1 + length2;
06735 int free_str = 0;
06736
06737 if (BE (length1 < 0 || length2 < 0 || stop < 0, 0))
06738 return -2;
06739
06740
06741 if (length2 > 0)
06742 if (length1 > 0)
06743 {
06744 char *s = re_malloc (char, len);
06745
06746 if (BE (s == NULL, 0))
06747 return -2;
06748 #ifdef _LIBC
06749 memcpy (__mempcpy (s, string1, length1), string2, length2);
06750 #else
06751 memcpy (s, string1, length1);
06752 memcpy (s + length1, string2, length2);
06753 #endif
06754 str = s;
06755 free_str = 1;
06756 }
06757 else
06758 str = string2;
06759 else
06760 str = string1;
06761
06762 rval = re_search_stub (bufp, str, len, start, range, stop, regs,
06763 ret_len);
06764 if (free_str)
06765 re_free ((char *) str);
06766 return rval;
06767 }
06768
06769
06770
06771
06772
06773
06774 static int
06775 re_search_stub (bufp, string, length, start, range, stop, regs, ret_len)
06776 struct re_pattern_buffer *bufp;
06777 const char *string;
06778 int length, start, range, stop, ret_len;
06779 struct re_registers *regs;
06780 {
06781 reg_errcode_t result;
06782 regmatch_t *pmatch;
06783 int nregs, rval;
06784 int eflags = 0;
06785 re_dfa_t *dfa = (re_dfa_t *) bufp->buffer;
06786
06787
06788 if (BE (start < 0 || start > length, 0))
06789 return -1;
06790 if (BE (start + range > length, 0))
06791 range = length - start;
06792 else if (BE (start + range < 0, 0))
06793 range = -start;
06794
06795 __libc_lock_lock (dfa->lock);
06796
06797 eflags |= (bufp->not_bol) ? REG_NOTBOL : 0;
06798 eflags |= (bufp->not_eol) ? REG_NOTEOL : 0;
06799
06800
06801 if (range > 0 && bufp->fastmap != NULL && !bufp->fastmap_accurate)
06802 re_compile_fastmap (bufp);
06803
06804 if (BE (bufp->no_sub, 0))
06805 regs = NULL;
06806
06807
06808 if (regs == NULL)
06809 nregs = 1;
06810 else if (BE (bufp->regs_allocated == REGS_FIXED &&
06811 regs->num_regs < bufp->re_nsub + 1, 0))
06812 {
06813 nregs = regs->num_regs;
06814 if (BE (nregs < 1, 0))
06815 {
06816
06817 regs = NULL;
06818 nregs = 1;
06819 }
06820 }
06821 else
06822 nregs = bufp->re_nsub + 1;
06823 pmatch = re_malloc (regmatch_t, nregs);
06824 if (BE (pmatch == NULL, 0))
06825 {
06826 rval = -2;
06827 goto out;
06828 }
06829
06830 result = re_search_internal (bufp, string, length, start, range, stop,
06831 nregs, pmatch, eflags);
06832
06833 rval = 0;
06834
06835
06836 if (result != REG_NOERROR)
06837 rval = -1;
06838 else if (regs != NULL)
06839 {
06840
06841 bufp->regs_allocated = re_copy_regs (regs, pmatch, nregs,
06842 bufp->regs_allocated);
06843 if (BE (bufp->regs_allocated == REGS_UNALLOCATED, 0))
06844 rval = -2;
06845 }
06846
06847 if (BE (rval == 0, 1))
06848 {
06849 if (ret_len)
06850 {
06851 assert (pmatch[0].rm_so == start);
06852 rval = pmatch[0].rm_eo - start;
06853 }
06854 else
06855 rval = pmatch[0].rm_so;
06856 }
06857 re_free (pmatch);
06858 out:
06859 __libc_lock_unlock (dfa->lock);
06860 return rval;
06861 }
06862
06863 static unsigned
06864 re_copy_regs (regs, pmatch, nregs, regs_allocated)
06865 struct re_registers *regs;
06866 regmatch_t *pmatch;
06867 int nregs, regs_allocated;
06868 {
06869 int rval = REGS_REALLOCATE;
06870 int i;
06871 int need_regs = nregs + 1;
06872
06873
06874
06875
06876 if (regs_allocated == REGS_UNALLOCATED)
06877 {
06878 regs->start = re_malloc (regoff_t, need_regs);
06879 regs->end = re_malloc (regoff_t, need_regs);
06880 if (BE (regs->start == NULL, 0) || BE (regs->end == NULL, 0))
06881 return REGS_UNALLOCATED;
06882 regs->num_regs = need_regs;
06883 }
06884 else if (regs_allocated == REGS_REALLOCATE)
06885 {
06886
06887
06888 if (BE (need_regs > regs->num_regs, 0))
06889 {
06890 regoff_t *new_start = re_realloc (regs->start, regoff_t, need_regs);
06891 regoff_t *new_end = re_realloc (regs->end, regoff_t, need_regs);
06892 if (BE (new_start == NULL, 0) || BE (new_end == NULL, 0))
06893 return REGS_UNALLOCATED;
06894 regs->start = new_start;
06895 regs->end = new_end;
06896 regs->num_regs = need_regs;
06897 }
06898 }
06899 else
06900 {
06901 assert (regs_allocated == REGS_FIXED);
06902
06903 assert (regs->num_regs >= nregs);
06904 rval = REGS_FIXED;
06905 }
06906
06907
06908 for (i = 0; i < nregs; ++i)
06909 {
06910 regs->start[i] = pmatch[i].rm_so;
06911 regs->end[i] = pmatch[i].rm_eo;
06912 }
06913 for ( ; i < regs->num_regs; ++i)
06914 regs->start[i] = regs->end[i] = -1;
06915
06916 return rval;
06917 }
06918
06919
06920
06921
06922
06923
06924
06925
06926
06927
06928
06929
06930
06931
06932 void
06933 re_set_registers (bufp, regs, num_regs, starts, ends)
06934 struct re_pattern_buffer *bufp;
06935 struct re_registers *regs;
06936 unsigned num_regs;
06937 regoff_t *starts, *ends;
06938 {
06939 if (num_regs)
06940 {
06941 bufp->regs_allocated = REGS_REALLOCATE;
06942 regs->num_regs = num_regs;
06943 regs->start = starts;
06944 regs->end = ends;
06945 }
06946 else
06947 {
06948 bufp->regs_allocated = REGS_UNALLOCATED;
06949 regs->num_regs = 0;
06950 regs->start = regs->end = (regoff_t *) 0;
06951 }
06952 }
06953 #ifdef _LIBC
06954 weak_alias (__re_set_registers, re_set_registers)
06955 #endif
06956
06957
06958
06959
06960 #if defined _REGEX_RE_COMP || defined _LIBC
06961 int
06962 # ifdef _LIBC
06963 weak_function
06964 # endif
06965 re_exec (s)
06966 const char *s;
06967 {
06968 return 0 == regexec (&re_comp_buf, s, 0, NULL, 0);
06969 }
06970 #endif
06971
06972
06973
06974
06975
06976
06977
06978
06979
06980
06981
06982
06983 static reg_errcode_t
06984 re_search_internal (preg, string, length, start, range, stop, nmatch, pmatch,
06985 eflags)
06986 const regex_t *preg;
06987 const char *string;
06988 int length, start, range, stop, eflags;
06989 size_t nmatch;
06990 regmatch_t pmatch[];
06991 {
06992 reg_errcode_t err;
06993 const re_dfa_t *dfa = (const re_dfa_t *) preg->buffer;
06994 int left_lim, right_lim, incr;
06995 int fl_longest_match, match_first, match_kind, match_last = -1;
06996 int extra_nmatch;
06997 int sb, ch;
06998 #if defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L)
06999 re_match_context_t mctx = { .dfa = dfa };
07000 #else
07001 re_match_context_t mctx;
07002 #endif
07003 char *fastmap = (preg->fastmap != NULL && preg->fastmap_accurate
07004 && range && !preg->can_be_null) ? preg->fastmap : NULL;
07005 RE_TRANSLATE_TYPE t = preg->translate;
07006
07007 #if !(defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L))
07008 memset (&mctx, '\0', sizeof (re_match_context_t));
07009 mctx.dfa = dfa;
07010 #endif
07011
07012 extra_nmatch = (nmatch > preg->re_nsub) ? nmatch - (preg->re_nsub + 1) : 0;
07013 nmatch -= extra_nmatch;
07014
07015
07016 if (BE (preg->used == 0 || dfa->init_state == NULL
07017 || dfa->init_state_word == NULL || dfa->init_state_nl == NULL
07018 || dfa->init_state_begbuf == NULL, 0))
07019 return REG_NOMATCH;
07020
07021 #ifdef DEBUG
07022
07023 assert (start + range >= 0 && start + range <= length);
07024 #endif
07025
07026
07027
07028
07029 if (dfa->init_state->nodes.nelem == 0
07030 && dfa->init_state_word->nodes.nelem == 0
07031 && (dfa->init_state_nl->nodes.nelem == 0
07032 || !preg->newline_anchor))
07033 {
07034 if (start != 0 && start + range != 0)
07035 return REG_NOMATCH;
07036 start = range = 0;
07037 }
07038
07039
07040 fl_longest_match = (nmatch != 0 || dfa->nbackref);
07041
07042 err = re_string_allocate (&mctx.input, string, length, dfa->nodes_len + 1,
07043 preg->translate, preg->syntax & RE_ICASE, dfa);
07044 if (BE (err != REG_NOERROR, 0))
07045 goto free_return;
07046 mctx.input.stop = stop;
07047 mctx.input.raw_stop = stop;
07048 mctx.input.newline_anchor = preg->newline_anchor;
07049
07050 err = match_ctx_init (&mctx, eflags, dfa->nbackref * 2);
07051 if (BE (err != REG_NOERROR, 0))
07052 goto free_return;
07053
07054
07055
07056
07057
07058 if (nmatch > 1 || dfa->has_mb_node)
07059 {
07060 mctx.state_log = re_malloc (re_dfastate_t *, mctx.input.bufs_len + 1);
07061 if (BE (mctx.state_log == NULL, 0))
07062 {
07063 err = REG_ESPACE;
07064 goto free_return;
07065 }
07066 }
07067 else
07068 mctx.state_log = NULL;
07069
07070 match_first = start;
07071 mctx.input.tip_context = (eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
07072 : CONTEXT_NEWLINE | CONTEXT_BEGBUF;
07073
07074
07075 incr = (range < 0) ? -1 : 1;
07076 left_lim = (range < 0) ? start + range : start;
07077 right_lim = (range < 0) ? start : start + range;
07078 sb = dfa->mb_cur_max == 1;
07079 match_kind =
07080 (fastmap
07081 ? ((sb || !(preg->syntax & RE_ICASE || t) ? 4 : 0)
07082 | (range >= 0 ? 2 : 0)
07083 | (t != NULL ? 1 : 0))
07084 : 8);
07085
07086 for (;; match_first += incr)
07087 {
07088 err = REG_NOMATCH;
07089 if (match_first < left_lim || right_lim < match_first)
07090 goto free_return;
07091
07092
07093
07094
07095
07096
07097 switch (match_kind)
07098 {
07099 case 8:
07100
07101 break;
07102
07103 case 7:
07104
07105 while (BE (match_first < right_lim, 1)
07106 && !fastmap[t[(unsigned char) string[match_first]]])
07107 ++match_first;
07108 goto forward_match_found_start_or_reached_end;
07109
07110 case 6:
07111
07112 while (BE (match_first < right_lim, 1)
07113 && !fastmap[(unsigned char) string[match_first]])
07114 ++match_first;
07115
07116 forward_match_found_start_or_reached_end:
07117 if (BE (match_first == right_lim, 0))
07118 {
07119 ch = match_first >= length
07120 ? 0 : (unsigned char) string[match_first];
07121 if (!fastmap[t ? t[ch] : ch])
07122 goto free_return;
07123 }
07124 break;
07125
07126 case 4:
07127 case 5:
07128
07129 while (match_first >= left_lim)
07130 {
07131 ch = match_first >= length
07132 ? 0 : (unsigned char) string[match_first];
07133 if (fastmap[t ? t[ch] : ch])
07134 break;
07135 --match_first;
07136 }
07137 if (match_first < left_lim)
07138 goto free_return;
07139 break;
07140
07141 default:
07142
07143
07144
07145 for (;;)
07146 {
07147
07148
07149 unsigned int offset = match_first - mctx.input.raw_mbs_idx;
07150 if (BE (offset >= (unsigned int) mctx.input.valid_raw_len, 0))
07151 {
07152 err = re_string_reconstruct (&mctx.input, match_first,
07153 eflags);
07154 if (BE (err != REG_NOERROR, 0))
07155 goto free_return;
07156
07157 offset = match_first - mctx.input.raw_mbs_idx;
07158 }
07159
07160
07161 ch = (match_first >= length
07162 ? 0 : re_string_byte_at (&mctx.input, offset));
07163 if (fastmap[ch])
07164 break;
07165 match_first += incr;
07166 if (match_first < left_lim || match_first > right_lim)
07167 {
07168 err = REG_NOMATCH;
07169 goto free_return;
07170 }
07171 }
07172 break;
07173 }
07174
07175
07176
07177 err = re_string_reconstruct (&mctx.input, match_first, eflags);
07178 if (BE (err != REG_NOERROR, 0))
07179 goto free_return;
07180
07181 #ifdef RE_ENABLE_I18N
07182
07183
07184 if (!sb && !re_string_first_byte (&mctx.input, 0))
07185 continue;
07186 #endif
07187
07188
07189
07190 mctx.state_log_top = mctx.nbkref_ents = mctx.max_mb_elem_len = 0;
07191 match_last = check_matching (&mctx, fl_longest_match,
07192 range >= 0 ? &match_first : NULL);
07193 if (match_last != -1)
07194 {
07195 if (BE (match_last == -2, 0))
07196 {
07197 err = REG_ESPACE;
07198 goto free_return;
07199 }
07200 else
07201 {
07202 mctx.match_last = match_last;
07203 if ((!preg->no_sub && nmatch > 1) || dfa->nbackref)
07204 {
07205 re_dfastate_t *pstate = mctx.state_log[match_last];
07206 mctx.last_node = check_halt_state_context (&mctx, pstate,
07207 match_last);
07208 }
07209 if ((!preg->no_sub && nmatch > 1 && dfa->has_plural_match)
07210 || dfa->nbackref)
07211 {
07212 err = prune_impossible_nodes (&mctx);
07213 if (err == REG_NOERROR)
07214 break;
07215 if (BE (err != REG_NOMATCH, 0))
07216 goto free_return;
07217 match_last = -1;
07218 }
07219 else
07220 break;
07221 }
07222 }
07223
07224 match_ctx_clean (&mctx);
07225 }
07226
07227 #ifdef DEBUG
07228 assert (match_last != -1);
07229 assert (err == REG_NOERROR);
07230 #endif
07231
07232
07233 if (nmatch > 0)
07234 {
07235 int reg_idx;
07236
07237
07238 for (reg_idx = 1; reg_idx < nmatch; ++reg_idx)
07239 pmatch[reg_idx].rm_so = pmatch[reg_idx].rm_eo = -1;
07240
07241
07242 pmatch[0].rm_so = 0;
07243 pmatch[0].rm_eo = mctx.match_last;
07244
07245 if (!preg->no_sub && nmatch > 1)
07246 {
07247 err = set_regs (preg, &mctx, nmatch, pmatch,
07248 dfa->has_plural_match && dfa->nbackref > 0);
07249 if (BE (err != REG_NOERROR, 0))
07250 goto free_return;
07251 }
07252
07253
07254
07255
07256 for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
07257 if (pmatch[reg_idx].rm_so != -1)
07258 {
07259 #ifdef RE_ENABLE_I18N
07260 if (BE (mctx.input.offsets_needed != 0, 0))
07261 {
07262 pmatch[reg_idx].rm_so =
07263 (pmatch[reg_idx].rm_so == mctx.input.valid_len
07264 ? mctx.input.valid_raw_len
07265 : mctx.input.offsets[pmatch[reg_idx].rm_so]);
07266 pmatch[reg_idx].rm_eo =
07267 (pmatch[reg_idx].rm_eo == mctx.input.valid_len
07268 ? mctx.input.valid_raw_len
07269 : mctx.input.offsets[pmatch[reg_idx].rm_eo]);
07270 }
07271 #else
07272 assert (mctx.input.offsets_needed == 0);
07273 #endif
07274 pmatch[reg_idx].rm_so += match_first;
07275 pmatch[reg_idx].rm_eo += match_first;
07276 }
07277 for (reg_idx = 0; reg_idx < extra_nmatch; ++reg_idx)
07278 {
07279 pmatch[nmatch + reg_idx].rm_so = -1;
07280 pmatch[nmatch + reg_idx].rm_eo = -1;
07281 }
07282
07283 if (dfa->subexp_map)
07284 for (reg_idx = 0; reg_idx + 1 < nmatch; reg_idx++)
07285 if (dfa->subexp_map[reg_idx] != reg_idx)
07286 {
07287 pmatch[reg_idx + 1].rm_so
07288 = pmatch[dfa->subexp_map[reg_idx] + 1].rm_so;
07289 pmatch[reg_idx + 1].rm_eo
07290 = pmatch[dfa->subexp_map[reg_idx] + 1].rm_eo;
07291 }
07292 }
07293
07294 free_return:
07295 re_free (mctx.state_log);
07296 if (dfa->nbackref)
07297 match_ctx_free (&mctx);
07298 re_string_destruct (&mctx.input);
07299 return err;
07300 }
07301
07302 static reg_errcode_t
07303 prune_impossible_nodes (mctx)
07304 re_match_context_t *mctx;
07305 {
07306 const re_dfa_t *const dfa = mctx->dfa;
07307 int halt_node, match_last;
07308 reg_errcode_t ret;
07309 re_dfastate_t **sifted_states;
07310 re_dfastate_t **lim_states = NULL;
07311 re_sift_context_t sctx;
07312 #ifdef DEBUG
07313 assert (mctx->state_log != NULL);
07314 #endif
07315 match_last = mctx->match_last;
07316 halt_node = mctx->last_node;
07317 sifted_states = re_malloc (re_dfastate_t *, match_last + 1);
07318 if (BE (sifted_states == NULL, 0))
07319 {
07320 ret = REG_ESPACE;
07321 goto free_return;
07322 }
07323 if (dfa->nbackref)
07324 {
07325 lim_states = re_malloc (re_dfastate_t *, match_last + 1);
07326 if (BE (lim_states == NULL, 0))
07327 {
07328 ret = REG_ESPACE;
07329 goto free_return;
07330 }
07331 while (1)
07332 {
07333 memset (lim_states, '\0',
07334 sizeof (re_dfastate_t *) * (match_last + 1));
07335 sift_ctx_init (&sctx, sifted_states, lim_states, halt_node,
07336 match_last);
07337 ret = sift_states_backward (mctx, &sctx);
07338 re_node_set_free (&sctx.limits);
07339 if (BE (ret != REG_NOERROR, 0))
07340 goto free_return;
07341 if (sifted_states[0] != NULL || lim_states[0] != NULL)
07342 break;
07343 do
07344 {
07345 --match_last;
07346 if (match_last < 0)
07347 {
07348 ret = REG_NOMATCH;
07349 goto free_return;
07350 }
07351 } while (mctx->state_log[match_last] == NULL
07352 || !mctx->state_log[match_last]->halt);
07353 halt_node = check_halt_state_context (mctx,
07354 mctx->state_log[match_last],
07355 match_last);
07356 }
07357 ret = merge_state_array (dfa, sifted_states, lim_states,
07358 match_last + 1);
07359 re_free (lim_states);
07360 lim_states = NULL;
07361 if (BE (ret != REG_NOERROR, 0))
07362 goto free_return;
07363 }
07364 else
07365 {
07366 sift_ctx_init (&sctx, sifted_states, lim_states, halt_node, match_last);
07367 ret = sift_states_backward (mctx, &sctx);
07368 re_node_set_free (&sctx.limits);
07369 if (BE (ret != REG_NOERROR, 0))
07370 goto free_return;
07371 }
07372 re_free (mctx->state_log);
07373 mctx->state_log = sifted_states;
07374 sifted_states = NULL;
07375 mctx->last_node = halt_node;
07376 mctx->match_last = match_last;
07377 ret = REG_NOERROR;
07378 free_return:
07379 re_free (sifted_states);
07380 re_free (lim_states);
07381 return ret;
07382 }
07383
07384
07385
07386
07387
07388 static inline re_dfastate_t *
07389 __attribute ((always_inline)) internal_function
07390 acquire_init_state_context (reg_errcode_t *err, const re_match_context_t *mctx,
07391 int idx)
07392 {
07393 const re_dfa_t *const dfa = mctx->dfa;
07394 if (dfa->init_state->has_constraint)
07395 {
07396 unsigned int context;
07397 context = re_string_context_at (&mctx->input, idx - 1, mctx->eflags);
07398 if (IS_WORD_CONTEXT (context))
07399 return dfa->init_state_word;
07400 else if (IS_ORDINARY_CONTEXT (context))
07401 return dfa->init_state;
07402 else if (IS_BEGBUF_CONTEXT (context) && IS_NEWLINE_CONTEXT (context))
07403 return dfa->init_state_begbuf;
07404 else if (IS_NEWLINE_CONTEXT (context))
07405 return dfa->init_state_nl;
07406 else if (IS_BEGBUF_CONTEXT (context))
07407 {
07408
07409 return re_acquire_state_context (err, dfa,
07410 dfa->init_state->entrance_nodes,
07411 context);
07412 }
07413 else
07414
07415 return dfa->init_state;
07416 }
07417 else
07418 return dfa->init_state;
07419 }
07420
07421
07422
07423
07424
07425
07426
07427
07428
07429
07430 static int
07431 internal_function
07432 check_matching (re_match_context_t *mctx, int fl_longest_match,
07433 int *p_match_first)
07434 {
07435 const re_dfa_t *const dfa = mctx->dfa;
07436 reg_errcode_t err;
07437 int match = 0;
07438 int match_last = -1;
07439 int cur_str_idx = re_string_cur_idx (&mctx->input);
07440 re_dfastate_t *cur_state;
07441 int at_init_state = p_match_first != NULL;
07442 int next_start_idx = cur_str_idx;
07443
07444 err = REG_NOERROR;
07445 cur_state = acquire_init_state_context (&err, mctx, cur_str_idx);
07446
07447 if (BE (cur_state == NULL, 0))
07448 {
07449 assert (err == REG_ESPACE);
07450 return -2;
07451 }
07452
07453 if (mctx->state_log != NULL)
07454 {
07455 mctx->state_log[cur_str_idx] = cur_state;
07456
07457
07458
07459 if (BE (dfa->nbackref, 0))
07460 {
07461 at_init_state = 0;
07462 err = check_subexp_matching_top (mctx, &cur_state->nodes, 0);
07463 if (BE (err != REG_NOERROR, 0))
07464 return err;
07465
07466 if (cur_state->has_backref)
07467 {
07468 err = transit_state_bkref (mctx, &cur_state->nodes);
07469 if (BE (err != REG_NOERROR, 0))
07470 return err;
07471 }
07472 }
07473 }
07474
07475
07476 if (BE (cur_state->halt, 0))
07477 {
07478 if (!cur_state->has_constraint
07479 || check_halt_state_context (mctx, cur_state, cur_str_idx))
07480 {
07481 if (!fl_longest_match)
07482 return cur_str_idx;
07483 else
07484 {
07485 match_last = cur_str_idx;
07486 match = 1;
07487 }
07488 }
07489 }
07490
07491 while (!re_string_eoi (&mctx->input))
07492 {
07493 re_dfastate_t *old_state = cur_state;
07494 int next_char_idx = re_string_cur_idx (&mctx->input) + 1;
07495
07496 if (BE (next_char_idx >= mctx->input.bufs_len, 0)
07497 || (BE (next_char_idx >= mctx->input.valid_len, 0)
07498 && mctx->input.valid_len < mctx->input.len))
07499 {
07500 err = extend_buffers (mctx);
07501 if (BE (err != REG_NOERROR, 0))
07502 {
07503 assert (err == REG_ESPACE);
07504 return -2;
07505 }
07506 }
07507
07508 cur_state = transit_state (&err, mctx, cur_state);
07509 if (mctx->state_log != NULL)
07510 cur_state = merge_state_with_log (&err, mctx, cur_state);
07511
07512 if (cur_state == NULL)
07513 {
07514
07515
07516
07517 if (BE (err != REG_NOERROR, 0))
07518 return -2;
07519
07520 if (mctx->state_log == NULL
07521 || (match && !fl_longest_match)
07522 || (cur_state = find_recover_state (&err, mctx)) == NULL)
07523 break;
07524 }
07525
07526 if (BE (at_init_state, 0))
07527 {
07528 if (old_state == cur_state)
07529 next_start_idx = next_char_idx;
07530 else
07531 at_init_state = 0;
07532 }
07533
07534 if (cur_state->halt)
07535 {
07536
07537
07538 if (!cur_state->has_constraint
07539 || check_halt_state_context (mctx, cur_state,
07540 re_string_cur_idx (&mctx->input)))
07541 {
07542
07543 match_last = re_string_cur_idx (&mctx->input);
07544 match = 1;
07545
07546
07547 p_match_first = NULL;
07548 if (!fl_longest_match)
07549 break;
07550 }
07551 }
07552 }
07553
07554 if (p_match_first)
07555 *p_match_first += next_start_idx;
07556
07557 return match_last;
07558 }
07559
07560
07561
07562 static int
07563 internal_function
07564 check_halt_node_context (const re_dfa_t *dfa, int node, unsigned int context)
07565 {
07566 re_token_type_t type = dfa->nodes[node].type;
07567 unsigned int constraint = dfa->nodes[node].constraint;
07568 if (type != END_OF_RE)
07569 return 0;
07570 if (!constraint)
07571 return 1;
07572 if (NOT_SATISFY_NEXT_CONSTRAINT (constraint, context))
07573 return 0;
07574 return 1;
07575 }
07576
07577
07578
07579
07580
07581 static int
07582 internal_function
07583 check_halt_state_context (const re_match_context_t *mctx,
07584 const re_dfastate_t *state, int idx)
07585 {
07586 int i;
07587 unsigned int context;
07588 #ifdef DEBUG
07589 assert (state->halt);
07590 #endif
07591 context = re_string_context_at (&mctx->input, idx, mctx->eflags);
07592 for (i = 0; i < state->nodes.nelem; ++i)
07593 if (check_halt_node_context (mctx->dfa, state->nodes.elems[i], context))
07594 return state->nodes.elems[i];
07595 return 0;
07596 }
07597
07598
07599
07600
07601
07602
07603 static int
07604 internal_function
07605 proceed_next_node (const re_match_context_t *mctx, int nregs, regmatch_t *regs,
07606 int *pidx, int node, re_node_set *eps_via_nodes,
07607 struct re_fail_stack_t *fs)
07608 {
07609 const re_dfa_t *const dfa = mctx->dfa;
07610 int i, err;
07611 if (IS_EPSILON_NODE (dfa->nodes[node].type))
07612 {
07613 re_node_set *cur_nodes = &mctx->state_log[*pidx]->nodes;
07614 re_node_set *edests = &dfa->edests[node];
07615 int dest_node;
07616 err = re_node_set_insert (eps_via_nodes, node);
07617 if (BE (err < 0, 0))
07618 return -2;
07619
07620 for (dest_node = -1, i = 0; i < edests->nelem; ++i)
07621 {
07622 int candidate = edests->elems[i];
07623 if (!re_node_set_contains (cur_nodes, candidate))
07624 continue;
07625 if (dest_node == -1)
07626 dest_node = candidate;
07627
07628 else
07629 {
07630
07631
07632 if (re_node_set_contains (eps_via_nodes, dest_node))
07633 return candidate;
07634
07635
07636 else if (fs != NULL
07637 && push_fail_stack (fs, *pidx, candidate, nregs, regs,
07638 eps_via_nodes))
07639 return -2;
07640
07641
07642 break;
07643 }
07644 }
07645 return dest_node;
07646 }
07647 else
07648 {
07649 int naccepted = 0;
07650 re_token_type_t type = dfa->nodes[node].type;
07651
07652 #ifdef RE_ENABLE_I18N
07653 if (dfa->nodes[node].accept_mb)
07654 naccepted = check_node_accept_bytes (dfa, node, &mctx->input, *pidx);
07655 else
07656 #endif
07657 if (type == OP_BACK_REF)
07658 {
07659 int subexp_idx = dfa->nodes[node].opr.idx + 1;
07660 naccepted = regs[subexp_idx].rm_eo - regs[subexp_idx].rm_so;
07661 if (fs != NULL)
07662 {
07663 if (regs[subexp_idx].rm_so == -1 || regs[subexp_idx].rm_eo == -1)
07664 return -1;
07665 else if (naccepted)
07666 {
07667 char *buf = (char *) re_string_get_buffer (&mctx->input);
07668 if (memcmp (buf + regs[subexp_idx].rm_so, buf + *pidx,
07669 naccepted) != 0)
07670 return -1;
07671 }
07672 }
07673
07674 if (naccepted == 0)
07675 {
07676 int dest_node;
07677 err = re_node_set_insert (eps_via_nodes, node);
07678 if (BE (err < 0, 0))
07679 return -2;
07680 dest_node = dfa->edests[node].elems[0];
07681 if (re_node_set_contains (&mctx->state_log[*pidx]->nodes,
07682 dest_node))
07683 return dest_node;
07684 }
07685 }
07686
07687 if (naccepted != 0
07688 || check_node_accept (mctx, dfa->nodes + node, *pidx))
07689 {
07690 int dest_node = dfa->nexts[node];
07691 *pidx = (naccepted == 0) ? *pidx + 1 : *pidx + naccepted;
07692 if (fs && (*pidx > mctx->match_last || mctx->state_log[*pidx] == NULL
07693 || !re_node_set_contains (&mctx->state_log[*pidx]->nodes,
07694 dest_node)))
07695 return -1;
07696 re_node_set_empty (eps_via_nodes);
07697 return dest_node;
07698 }
07699 }
07700 return -1;
07701 }
07702
07703 static reg_errcode_t
07704 internal_function
07705 push_fail_stack (struct re_fail_stack_t *fs, int str_idx, int dest_node,
07706 int nregs, regmatch_t *regs, re_node_set *eps_via_nodes)
07707 {
07708 reg_errcode_t err;
07709 int num = fs->num++;
07710 if (fs->num == fs->alloc)
07711 {
07712 struct re_fail_stack_ent_t *new_array;
07713 new_array = realloc (fs->stack, (sizeof (struct re_fail_stack_ent_t)
07714 * fs->alloc * 2));
07715 if (new_array == NULL)
07716 return REG_ESPACE;
07717 fs->alloc *= 2;
07718 fs->stack = new_array;
07719 }
07720 fs->stack[num].idx = str_idx;
07721 fs->stack[num].node = dest_node;
07722 fs->stack[num].regs = re_malloc (regmatch_t, nregs);
07723 if (fs->stack[num].regs == NULL)
07724 return REG_ESPACE;
07725 memcpy (fs->stack[num].regs, regs, sizeof (regmatch_t) * nregs);
07726 err = re_node_set_init_copy (&fs->stack[num].eps_via_nodes, eps_via_nodes);
07727 return err;
07728 }
07729
07730 static int
07731 internal_function
07732 pop_fail_stack (struct re_fail_stack_t *fs, int *pidx, int nregs,
07733 regmatch_t *regs, re_node_set *eps_via_nodes)
07734 {
07735 int num = --fs->num;
07736 assert (num >= 0);
07737 *pidx = fs->stack[num].idx;
07738 memcpy (regs, fs->stack[num].regs, sizeof (regmatch_t) * nregs);
07739 re_node_set_free (eps_via_nodes);
07740 re_free (fs->stack[num].regs);
07741 *eps_via_nodes = fs->stack[num].eps_via_nodes;
07742 return fs->stack[num].node;
07743 }
07744
07745
07746
07747
07748
07749
07750 static reg_errcode_t
07751 internal_function
07752 set_regs (const regex_t *preg, const re_match_context_t *mctx, size_t nmatch,
07753 regmatch_t *pmatch, int fl_backtrack)
07754 {
07755 const re_dfa_t *dfa = (const re_dfa_t *) preg->buffer;
07756 int idx, cur_node;
07757 re_node_set eps_via_nodes;
07758 struct re_fail_stack_t *fs;
07759 struct re_fail_stack_t fs_body = { 0, 2, NULL };
07760 regmatch_t *prev_idx_match;
07761 int prev_idx_match_malloced = 0;
07762
07763 #ifdef DEBUG
07764 assert (nmatch > 1);
07765 assert (mctx->state_log != NULL);
07766 #endif
07767 if (fl_backtrack)
07768 {
07769 fs = &fs_body;
07770 fs->stack = re_malloc (struct re_fail_stack_ent_t, fs->alloc);
07771 if (fs->stack == NULL)
07772 return REG_ESPACE;
07773 }
07774 else
07775 fs = NULL;
07776
07777 cur_node = dfa->init_node;
07778 re_node_set_init_empty (&eps_via_nodes);
07779
07780 if (__libc_use_alloca (nmatch * sizeof (regmatch_t)))
07781 prev_idx_match = (regmatch_t *) alloca (nmatch * sizeof (regmatch_t));
07782 else
07783 {
07784 prev_idx_match = re_malloc (regmatch_t, nmatch);
07785 if (prev_idx_match == NULL)
07786 {
07787 free_fail_stack_return (fs);
07788 return REG_ESPACE;
07789 }
07790 prev_idx_match_malloced = 1;
07791 }
07792 memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
07793
07794 for (idx = pmatch[0].rm_so; idx <= pmatch[0].rm_eo ;)
07795 {
07796 update_regs (dfa, pmatch, prev_idx_match, cur_node, idx, nmatch);
07797
07798 if (idx == pmatch[0].rm_eo && cur_node == mctx->last_node)
07799 {
07800 int reg_idx;
07801 if (fs)
07802 {
07803 for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
07804 if (pmatch[reg_idx].rm_so > -1 && pmatch[reg_idx].rm_eo == -1)
07805 break;
07806 if (reg_idx == nmatch)
07807 {
07808 re_node_set_free (&eps_via_nodes);
07809 if (prev_idx_match_malloced)
07810 re_free (prev_idx_match);
07811 return free_fail_stack_return (fs);
07812 }
07813 cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
07814 &eps_via_nodes);
07815 }
07816 else
07817 {
07818 re_node_set_free (&eps_via_nodes);
07819 if (prev_idx_match_malloced)
07820 re_free (prev_idx_match);
07821 return REG_NOERROR;
07822 }
07823 }
07824
07825
07826 cur_node = proceed_next_node (mctx, nmatch, pmatch, &idx, cur_node,
07827 &eps_via_nodes, fs);
07828
07829 if (BE (cur_node < 0, 0))
07830 {
07831 if (BE (cur_node == -2, 0))
07832 {
07833 re_node_set_free (&eps_via_nodes);
07834 if (prev_idx_match_malloced)
07835 re_free (prev_idx_match);
07836 free_fail_stack_return (fs);
07837 return REG_ESPACE;
07838 }
07839 if (fs)
07840 cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
07841 &eps_via_nodes);
07842 else
07843 {
07844 re_node_set_free (&eps_via_nodes);
07845 if (prev_idx_match_malloced)
07846 re_free (prev_idx_match);
07847 return REG_NOMATCH;
07848 }
07849 }
07850 }
07851 re_node_set_free (&eps_via_nodes);
07852 if (prev_idx_match_malloced)
07853 re_free (prev_idx_match);
07854 return free_fail_stack_return (fs);
07855 }
07856
07857 static reg_errcode_t
07858 internal_function
07859 free_fail_stack_return (struct re_fail_stack_t *fs)
07860 {
07861 if (fs)
07862 {
07863 int fs_idx;
07864 for (fs_idx = 0; fs_idx < fs->num; ++fs_idx)
07865 {
07866 re_node_set_free (&fs->stack[fs_idx].eps_via_nodes);
07867 re_free (fs->stack[fs_idx].regs);
07868 }
07869 re_free (fs->stack);
07870 }
07871 return REG_NOERROR;
07872 }
07873
07874 static void
07875 internal_function
07876 update_regs (const re_dfa_t *dfa, regmatch_t *pmatch,
07877 regmatch_t *prev_idx_match, int cur_node, int cur_idx, int nmatch)
07878 {
07879 int type = dfa->nodes[cur_node].type;
07880 if (type == OP_OPEN_SUBEXP)
07881 {
07882 int reg_num = dfa->nodes[cur_node].opr.idx + 1;
07883
07884
07885 if (reg_num < nmatch)
07886 {
07887 pmatch[reg_num].rm_so = cur_idx;
07888 pmatch[reg_num].rm_eo = -1;
07889 }
07890 }
07891 else if (type == OP_CLOSE_SUBEXP)
07892 {
07893 int reg_num = dfa->nodes[cur_node].opr.idx + 1;
07894 if (reg_num < nmatch)
07895 {
07896
07897 if (pmatch[reg_num].rm_so < cur_idx)
07898 {
07899 pmatch[reg_num].rm_eo = cur_idx;
07900
07901
07902 memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
07903 }
07904 else
07905 {
07906 if (dfa->nodes[cur_node].opt_subexp
07907 && prev_idx_match[reg_num].rm_so != -1)
07908
07909
07910
07911
07912
07913 memcpy (pmatch, prev_idx_match, sizeof (regmatch_t) * nmatch);
07914 else
07915
07916
07917 pmatch[reg_num].rm_eo = cur_idx;
07918 }
07919 }
07920 }
07921 }
07922
07923
07924
07925
07926
07927
07928
07929
07930
07931
07932
07933
07934
07935
07936
07937
07938
07939
07940
07941
07942
07943 #define STATE_NODE_CONTAINS(state,node) \
07944 ((state) != NULL && re_node_set_contains (&(state)->nodes, node))
07945
07946 static reg_errcode_t
07947 internal_function
07948 sift_states_backward (const re_match_context_t *mctx, re_sift_context_t *sctx)
07949 {
07950 reg_errcode_t err;
07951 int null_cnt = 0;
07952 int str_idx = sctx->last_str_idx;
07953 re_node_set cur_dest;
07954
07955 #ifdef DEBUG
07956 assert (mctx->state_log != NULL && mctx->state_log[str_idx] != NULL);
07957 #endif
07958
07959
07960
07961 err = re_node_set_init_1 (&cur_dest, sctx->last_node);
07962 if (BE (err != REG_NOERROR, 0))
07963 return err;
07964 err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);
07965 if (BE (err != REG_NOERROR, 0))
07966 goto free_return;
07967
07968
07969 while (str_idx > 0)
07970 {
07971
07972 null_cnt = (sctx->sifted_states[str_idx] == NULL) ? null_cnt + 1 : 0;
07973 if (null_cnt > mctx->max_mb_elem_len)
07974 {
07975 memset (sctx->sifted_states, '\0',
07976 sizeof (re_dfastate_t *) * str_idx);
07977 re_node_set_free (&cur_dest);
07978 return REG_NOERROR;
07979 }
07980 re_node_set_empty (&cur_dest);
07981 --str_idx;
07982
07983 if (mctx->state_log[str_idx])
07984 {
07985 err = build_sifted_states (mctx, sctx, str_idx, &cur_dest);
07986 if (BE (err != REG_NOERROR, 0))
07987 goto free_return;
07988 }
07989
07990
07991
07992
07993
07994 err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);
07995 if (BE (err != REG_NOERROR, 0))
07996 goto free_return;
07997 }
07998 err = REG_NOERROR;
07999 free_return:
08000 re_node_set_free (&cur_dest);
08001 return err;
08002 }
08003
08004 static reg_errcode_t
08005 internal_function
08006 build_sifted_states (const re_match_context_t *mctx, re_sift_context_t *sctx,
08007 int str_idx, re_node_set *cur_dest)
08008 {
08009 const re_dfa_t *const dfa = mctx->dfa;
08010 const re_node_set *cur_src = &mctx->state_log[str_idx]->non_eps_nodes;
08011 int i;
08012
08013
08014
08015
08016
08017
08018
08019
08020 for (i = 0; i < cur_src->nelem; i++)
08021 {
08022 int prev_node = cur_src->elems[i];
08023 int naccepted = 0;
08024 int ret;
08025
08026 #ifdef DEBUG
08027 re_token_type_t type = dfa->nodes[prev_node].type;
08028 assert (!IS_EPSILON_NODE (type));
08029 #endif
08030 #ifdef RE_ENABLE_I18N
08031
08032 if (dfa->nodes[prev_node].accept_mb)
08033 naccepted = sift_states_iter_mb (mctx, sctx, prev_node,
08034 str_idx, sctx->last_str_idx);
08035 #endif
08036
08037
08038
08039 if (!naccepted
08040 && check_node_accept (mctx, dfa->nodes + prev_node, str_idx)
08041 && STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + 1],
08042 dfa->nexts[prev_node]))
08043 naccepted = 1;
08044
08045 if (naccepted == 0)
08046 continue;
08047
08048 if (sctx->limits.nelem)
08049 {
08050 int to_idx = str_idx + naccepted;
08051 if (check_dst_limits (mctx, &sctx->limits,
08052 dfa->nexts[prev_node], to_idx,
08053 prev_node, str_idx))
08054 continue;
08055 }
08056 ret = re_node_set_insert (cur_dest, prev_node);
08057 if (BE (ret == -1, 0))
08058 return REG_ESPACE;
08059 }
08060
08061 return REG_NOERROR;
08062 }
08063
08064
08065
08066 static reg_errcode_t
08067 internal_function
08068 clean_state_log_if_needed (re_match_context_t *mctx, int next_state_log_idx)
08069 {
08070 int top = mctx->state_log_top;
08071
08072 if (next_state_log_idx >= mctx->input.bufs_len
08073 || (next_state_log_idx >= mctx->input.valid_len
08074 && mctx->input.valid_len < mctx->input.len))
08075 {
08076 reg_errcode_t err;
08077 err = extend_buffers (mctx);
08078 if (BE (err != REG_NOERROR, 0))
08079 return err;
08080 }
08081
08082 if (top < next_state_log_idx)
08083 {
08084 memset (mctx->state_log + top + 1, '\0',
08085 sizeof (re_dfastate_t *) * (next_state_log_idx - top));
08086 mctx->state_log_top = next_state_log_idx;
08087 }
08088 return REG_NOERROR;
08089 }
08090
08091 static reg_errcode_t
08092 internal_function
08093 merge_state_array (const re_dfa_t *dfa, re_dfastate_t **dst,
08094 re_dfastate_t **src, int num)
08095 {
08096 int st_idx;
08097 reg_errcode_t err;
08098 for (st_idx = 0; st_idx < num; ++st_idx)
08099 {
08100 if (dst[st_idx] == NULL)
08101 dst[st_idx] = src[st_idx];
08102 else if (src[st_idx] != NULL)
08103 {
08104 re_node_set merged_set;
08105 err = re_node_set_init_union (&merged_set, &dst[st_idx]->nodes,
08106 &src[st_idx]->nodes);
08107 if (BE (err != REG_NOERROR, 0))
08108 return err;
08109 dst[st_idx] = re_acquire_state (&err, dfa, &merged_set);
08110 re_node_set_free (&merged_set);
08111 if (BE (err != REG_NOERROR, 0))
08112 return err;
08113 }
08114 }
08115 return REG_NOERROR;
08116 }
08117
08118 static reg_errcode_t
08119 internal_function
08120 update_cur_sifted_state (const re_match_context_t *mctx,
08121 re_sift_context_t *sctx, int str_idx,
08122 re_node_set *dest_nodes)
08123 {
08124 const re_dfa_t *const dfa = mctx->dfa;
08125 reg_errcode_t err = REG_NOERROR;
08126 const re_node_set *candidates;
08127 candidates = ((mctx->state_log[str_idx] == NULL) ? NULL
08128 : &mctx->state_log[str_idx]->nodes);
08129
08130 if (dest_nodes->nelem == 0)
08131 sctx->sifted_states[str_idx] = NULL;
08132 else
08133 {
08134 if (candidates)
08135 {
08136
08137
08138 err = add_epsilon_src_nodes (dfa, dest_nodes, candidates);
08139 if (BE (err != REG_NOERROR, 0))
08140 return err;
08141
08142
08143 if (sctx->limits.nelem)
08144 {
08145 err = check_subexp_limits (dfa, dest_nodes, candidates, &sctx->limits,
08146 mctx->bkref_ents, str_idx);
08147 if (BE (err != REG_NOERROR, 0))
08148 return err;
08149 }
08150 }
08151
08152 sctx->sifted_states[str_idx] = re_acquire_state (&err, dfa, dest_nodes);
08153 if (BE (err != REG_NOERROR, 0))
08154 return err;
08155 }
08156
08157 if (candidates && mctx->state_log[str_idx]->has_backref)
08158 {
08159 err = sift_states_bkref (mctx, sctx, str_idx, candidates);
08160 if (BE (err != REG_NOERROR, 0))
08161 return err;
08162 }
08163 return REG_NOERROR;
08164 }
08165
08166 static reg_errcode_t
08167 internal_function
08168 add_epsilon_src_nodes (const re_dfa_t *dfa, re_node_set *dest_nodes,
08169 const re_node_set *candidates)
08170 {
08171 reg_errcode_t err = REG_NOERROR;
08172 int i;
08173
08174 re_dfastate_t *state = re_acquire_state (&err, dfa, dest_nodes);
08175 if (BE (err != REG_NOERROR, 0))
08176 return err;
08177
08178 if (!state->inveclosure.alloc)
08179 {
08180 err = re_node_set_alloc (&state->inveclosure, dest_nodes->nelem);
08181 if (BE (err != REG_NOERROR, 0))
08182 return REG_ESPACE;
08183 for (i = 0; i < dest_nodes->nelem; i++)
08184 re_node_set_merge (&state->inveclosure,
08185 dfa->inveclosures + dest_nodes->elems[i]);
08186 }
08187 return re_node_set_add_intersect (dest_nodes, candidates,
08188 &state->inveclosure);
08189 }
08190
08191 static reg_errcode_t
08192 internal_function
08193 sub_epsilon_src_nodes (const re_dfa_t *dfa, int node, re_node_set *dest_nodes,
08194 const re_node_set *candidates)
08195 {
08196 int ecl_idx;
08197 reg_errcode_t err;
08198 re_node_set *inv_eclosure = dfa->inveclosures + node;
08199 re_node_set except_nodes;
08200 re_node_set_init_empty (&except_nodes);
08201 for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
08202 {
08203 int cur_node = inv_eclosure->elems[ecl_idx];
08204 if (cur_node == node)
08205 continue;
08206 if (IS_EPSILON_NODE (dfa->nodes[cur_node].type))
08207 {
08208 int edst1 = dfa->edests[cur_node].elems[0];
08209 int edst2 = ((dfa->edests[cur_node].nelem > 1)
08210 ? dfa->edests[cur_node].elems[1] : -1);
08211 if ((!re_node_set_contains (inv_eclosure, edst1)
08212 && re_node_set_contains (dest_nodes, edst1))
08213 || (edst2 > 0
08214 && !re_node_set_contains (inv_eclosure, edst2)
08215 && re_node_set_contains (dest_nodes, edst2)))
08216 {
08217 err = re_node_set_add_intersect (&except_nodes, candidates,
08218 dfa->inveclosures + cur_node);
08219 if (BE (err != REG_NOERROR, 0))
08220 {
08221 re_node_set_free (&except_nodes);
08222 return err;
08223 }
08224 }
08225 }
08226 }
08227 for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
08228 {
08229 int cur_node = inv_eclosure->elems[ecl_idx];
08230 if (!re_node_set_contains (&except_nodes, cur_node))
08231 {
08232 int idx = re_node_set_contains (dest_nodes, cur_node) - 1;
08233 re_node_set_remove_at (dest_nodes, idx);
08234 }
08235 }
08236 re_node_set_free (&except_nodes);
08237 return REG_NOERROR;
08238 }
08239
08240 static int
08241 internal_function
08242 check_dst_limits (const re_match_context_t *mctx, re_node_set *limits,
08243 int dst_node, int dst_idx, int src_node, int src_idx)
08244 {
08245 const re_dfa_t *const dfa = mctx->dfa;
08246 int lim_idx, src_pos, dst_pos;
08247
08248 int dst_bkref_idx = search_cur_bkref_entry (mctx, dst_idx);
08249 int src_bkref_idx = search_cur_bkref_entry (mctx, src_idx);
08250 for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
08251 {
08252 int subexp_idx;
08253 struct re_backref_cache_entry *ent;
08254 ent = mctx->bkref_ents + limits->elems[lim_idx];
08255 subexp_idx = dfa->nodes[ent->node].opr.idx;
08256
08257 dst_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],
08258 subexp_idx, dst_node, dst_idx,
08259 dst_bkref_idx);
08260 src_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],
08261 subexp_idx, src_node, src_idx,
08262 src_bkref_idx);
08263
08264
08265
08266
08267
08268 if (src_pos == dst_pos)
08269 continue;
08270 else
08271 return 1;
08272 }
08273 return 0;
08274 }
08275
08276 static int
08277 internal_function
08278 check_dst_limits_calc_pos_1 (const re_match_context_t *mctx, int boundaries,
08279 int subexp_idx, int from_node, int bkref_idx)
08280 {
08281 const re_dfa_t *const dfa = mctx->dfa;
08282 const re_node_set *eclosures = dfa->eclosures + from_node;
08283 int node_idx;
08284
08285
08286
08287 for (node_idx = 0; node_idx < eclosures->nelem; ++node_idx)
08288 {
08289 int node = eclosures->elems[node_idx];
08290 switch (dfa->nodes[node].type)
08291 {
08292 case OP_BACK_REF:
08293 if (bkref_idx != -1)
08294 {
08295 struct re_backref_cache_entry *ent = mctx->bkref_ents + bkref_idx;
08296 do
08297 {
08298 int dst, cpos;
08299
08300 if (ent->node != node)
08301 continue;
08302
08303 if (subexp_idx < BITSET_WORD_BITS
08304 && !(ent->eps_reachable_subexps_map
08305 & ((bitset_word_t) 1 << subexp_idx)))
08306 continue;
08307
08308
08309
08310
08311
08312
08313
08314 dst = dfa->edests[node].elems[0];
08315 if (dst == from_node)
08316 {
08317 if (boundaries & 1)
08318 return -1;
08319 else
08320 return 0;
08321 }
08322
08323 cpos =
08324 check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,
08325 dst, bkref_idx);
08326 if (cpos == -1 )
08327 return -1;
08328 if (cpos == 0 && (boundaries & 2))
08329 return 0;
08330
08331 if (subexp_idx < BITSET_WORD_BITS)
08332 ent->eps_reachable_subexps_map
08333 &= ~((bitset_word_t) 1 << subexp_idx);
08334 }
08335 while (ent++->more);
08336 }
08337 break;
08338
08339 case OP_OPEN_SUBEXP:
08340 if ((boundaries & 1) && subexp_idx == dfa->nodes[node].opr.idx)
08341 return -1;
08342 break;
08343
08344 case OP_CLOSE_SUBEXP:
08345 if ((boundaries & 2) && subexp_idx == dfa->nodes[node].opr.idx)
08346 return 0;
08347 break;
08348
08349 default:
08350 break;
08351 }
08352 }
08353
08354 return (boundaries & 2) ? 1 : 0;
08355 }
08356
08357 static int
08358 internal_function
08359 check_dst_limits_calc_pos (const re_match_context_t *mctx, int limit,
08360 int subexp_idx, int from_node, int str_idx,
08361 int bkref_idx)
08362 {
08363 struct re_backref_cache_entry *lim = mctx->bkref_ents + limit;
08364 int boundaries;
08365
08366
08367 if (str_idx < lim->subexp_from)
08368 return -1;
08369
08370 if (lim->subexp_to < str_idx)
08371 return 1;
08372
08373
08374 boundaries = (str_idx == lim->subexp_from);
08375 boundaries |= (str_idx == lim->subexp_to) << 1;
08376 if (boundaries == 0)
08377 return 0;
08378
08379
08380 return check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,
08381 from_node, bkref_idx);
08382 }
08383
08384
08385
08386
08387 static reg_errcode_t
08388 internal_function
08389 check_subexp_limits (const re_dfa_t *dfa, re_node_set *dest_nodes,
08390 const re_node_set *candidates, re_node_set *limits,
08391 struct re_backref_cache_entry *bkref_ents, int str_idx)
08392 {
08393 reg_errcode_t err;
08394 int node_idx, lim_idx;
08395
08396 for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
08397 {
08398 int subexp_idx;
08399 struct re_backref_cache_entry *ent;
08400 ent = bkref_ents + limits->elems[lim_idx];
08401
08402 if (str_idx <= ent->subexp_from || ent->str_idx < str_idx)
08403 continue;
08404
08405 subexp_idx = dfa->nodes[ent->node].opr.idx;
08406 if (ent->subexp_to == str_idx)
08407 {
08408 int ops_node = -1;
08409 int cls_node = -1;
08410 for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
08411 {
08412 int node = dest_nodes->elems[node_idx];
08413 re_token_type_t type = dfa->nodes[node].type;
08414 if (type == OP_OPEN_SUBEXP
08415 && subexp_idx == dfa->nodes[node].opr.idx)
08416 ops_node = node;
08417 else if (type == OP_CLOSE_SUBEXP
08418 && subexp_idx == dfa->nodes[node].opr.idx)
08419 cls_node = node;
08420 }
08421
08422
08423
08424 if (ops_node >= 0)
08425 {
08426 err = sub_epsilon_src_nodes (dfa, ops_node, dest_nodes,
08427 candidates);
08428 if (BE (err != REG_NOERROR, 0))
08429 return err;
08430 }
08431
08432
08433 if (cls_node >= 0)
08434 for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
08435 {
08436 int node = dest_nodes->elems[node_idx];
08437 if (!re_node_set_contains (dfa->inveclosures + node,
08438 cls_node)
08439 && !re_node_set_contains (dfa->eclosures + node,
08440 cls_node))
08441 {
08442
08443
08444 err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
08445 candidates);
08446 if (BE (err != REG_NOERROR, 0))
08447 return err;
08448 --node_idx;
08449 }
08450 }
08451 }
08452 else
08453 {
08454 for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
08455 {
08456 int node = dest_nodes->elems[node_idx];
08457 re_token_type_t type = dfa->nodes[node].type;
08458 if (type == OP_CLOSE_SUBEXP || type == OP_OPEN_SUBEXP)
08459 {
08460 if (subexp_idx != dfa->nodes[node].opr.idx)
08461 continue;
08462
08463
08464 err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
08465 candidates);
08466 if (BE (err != REG_NOERROR, 0))
08467 return err;
08468 }
08469 }
08470 }
08471 }
08472 return REG_NOERROR;
08473 }
08474
08475 static reg_errcode_t
08476 internal_function
08477 sift_states_bkref (const re_match_context_t *mctx, re_sift_context_t *sctx,
08478 int str_idx, const re_node_set *candidates)
08479 {
08480 const re_dfa_t *const dfa = mctx->dfa;
08481 reg_errcode_t err;
08482 int node_idx, node;
08483 re_sift_context_t local_sctx;
08484 int first_idx = search_cur_bkref_entry (mctx, str_idx);
08485
08486 if (first_idx == -1)
08487 return REG_NOERROR;
08488
08489 local_sctx.sifted_states = NULL;
08490
08491 for (node_idx = 0; node_idx < candidates->nelem; ++node_idx)
08492 {
08493 int enabled_idx;
08494 re_token_type_t type;
08495 struct re_backref_cache_entry *entry;
08496 node = candidates->elems[node_idx];
08497 type = dfa->nodes[node].type;
08498
08499 if (node == sctx->last_node && str_idx == sctx->last_str_idx)
08500 continue;
08501 if (type != OP_BACK_REF)
08502 continue;
08503
08504 entry = mctx->bkref_ents + first_idx;
08505 enabled_idx = first_idx;
08506 do
08507 {
08508 int subexp_len;
08509 int to_idx;
08510 int dst_node;
08511 int ret;
08512 re_dfastate_t *cur_state;
08513
08514 if (entry->node != node)
08515 continue;
08516 subexp_len = entry->subexp_to - entry->subexp_from;
08517 to_idx = str_idx + subexp_len;
08518 dst_node = (subexp_len ? dfa->nexts[node]
08519 : dfa->edests[node].elems[0]);
08520
08521 if (to_idx > sctx->last_str_idx
08522 || sctx->sifted_states[to_idx] == NULL
08523 || !STATE_NODE_CONTAINS (sctx->sifted_states[to_idx], dst_node)
08524 || check_dst_limits (mctx, &sctx->limits, node,
08525 str_idx, dst_node, to_idx))
08526 continue;
08527
08528 if (local_sctx.sifted_states == NULL)
08529 {
08530 local_sctx = *sctx;
08531 err = re_node_set_init_copy (&local_sctx.limits, &sctx->limits);
08532 if (BE (err != REG_NOERROR, 0))
08533 goto free_return;
08534 }
08535 local_sctx.last_node = node;
08536 local_sctx.last_str_idx = str_idx;
08537 ret = re_node_set_insert (&local_sctx.limits, enabled_idx);
08538 if (BE (ret < 0, 0))
08539 {
08540 err = REG_ESPACE;
08541 goto free_return;
08542 }
08543 cur_state = local_sctx.sifted_states[str_idx];
08544 err = sift_states_backward (mctx, &local_sctx);
08545 if (BE (err != REG_NOERROR, 0))
08546 goto free_return;
08547 if (sctx->limited_states != NULL)
08548 {
08549 err = merge_state_array (dfa, sctx->limited_states,
08550 local_sctx.sifted_states,
08551 str_idx + 1);
08552 if (BE (err != REG_NOERROR, 0))
08553 goto free_return;
08554 }
08555 local_sctx.sifted_states[str_idx] = cur_state;
08556 re_node_set_remove (&local_sctx.limits, enabled_idx);
08557
08558
08559 entry = mctx->bkref_ents + enabled_idx;
08560 }
08561 while (enabled_idx++, entry++->more);
08562 }
08563 err = REG_NOERROR;
08564 free_return:
08565 if (local_sctx.sifted_states != NULL)
08566 {
08567 re_node_set_free (&local_sctx.limits);
08568 }
08569
08570 return err;
08571 }
08572
08573
08574 #ifdef RE_ENABLE_I18N
08575 static int
08576 internal_function
08577 sift_states_iter_mb (const re_match_context_t *mctx, re_sift_context_t *sctx,
08578 int node_idx, int str_idx, int max_str_idx)
08579 {
08580 const re_dfa_t *const dfa = mctx->dfa;
08581 int naccepted;
08582
08583 naccepted = check_node_accept_bytes (dfa, node_idx, &mctx->input, str_idx);
08584 if (naccepted > 0 && str_idx + naccepted <= max_str_idx &&
08585 !STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + naccepted],
08586 dfa->nexts[node_idx]))
08587
08588
08589
08590 naccepted = 0;
08591
08592
08593 return naccepted;
08594 }
08595 #endif
08596
08597
08598
08599
08600
08601
08602
08603
08604
08605 static re_dfastate_t *
08606 internal_function
08607 transit_state (reg_errcode_t *err, re_match_context_t *mctx,
08608 re_dfastate_t *state)
08609 {
08610 re_dfastate_t **trtable;
08611 unsigned char ch;
08612
08613 #ifdef RE_ENABLE_I18N
08614
08615 if (BE (state->accept_mb, 0))
08616 {
08617 *err = transit_state_mb (mctx, state);
08618 if (BE (*err != REG_NOERROR, 0))
08619 return NULL;
08620 }
08621 #endif
08622
08623
08624 #if 0
08625 if (0)
08626
08627 return transit_state_sb (err, mctx, state);
08628 #endif
08629
08630
08631 ch = re_string_fetch_byte (&mctx->input);
08632 for (;;)
08633 {
08634 trtable = state->trtable;
08635 if (BE (trtable != NULL, 1))
08636 return trtable[ch];
08637
08638 trtable = state->word_trtable;
08639 if (BE (trtable != NULL, 1))
08640 {
08641 unsigned int context;
08642 context
08643 = re_string_context_at (&mctx->input,
08644 re_string_cur_idx (&mctx->input) - 1,
08645 mctx->eflags);
08646 if (IS_WORD_CONTEXT (context))
08647 return trtable[ch + SBC_MAX];
08648 else
08649 return trtable[ch];
08650 }
08651
08652 if (!build_trtable (mctx->dfa, state))
08653 {
08654 *err = REG_ESPACE;
08655 return NULL;
08656 }
08657
08658
08659 }
08660 }
08661
08662
08663 re_dfastate_t *
08664 internal_function
08665 merge_state_with_log (reg_errcode_t *err, re_match_context_t *mctx,
08666 re_dfastate_t *next_state)
08667 {
08668 const re_dfa_t *const dfa = mctx->dfa;
08669 int cur_idx = re_string_cur_idx (&mctx->input);
08670
08671 if (cur_idx > mctx->state_log_top)
08672 {
08673 mctx->state_log[cur_idx] = next_state;
08674 mctx->state_log_top = cur_idx;
08675 }
08676 else if (mctx->state_log[cur_idx] == 0)
08677 {
08678 mctx->state_log[cur_idx] = next_state;
08679 }
08680 else
08681 {
08682 re_dfastate_t *pstate;
08683 unsigned int context;
08684 re_node_set next_nodes, *log_nodes, *table_nodes = NULL;
08685
08686
08687
08688
08689 pstate = mctx->state_log[cur_idx];
08690 log_nodes = pstate->entrance_nodes;
08691 if (next_state != NULL)
08692 {
08693 table_nodes = next_state->entrance_nodes;
08694 *err = re_node_set_init_union (&next_nodes, table_nodes,
08695 log_nodes);
08696 if (BE (*err != REG_NOERROR, 0))
08697 return NULL;
08698 }
08699 else
08700 next_nodes = *log_nodes;
08701
08702
08703
08704 context = re_string_context_at (&mctx->input,
08705 re_string_cur_idx (&mctx->input) - 1,
08706 mctx->eflags);
08707 next_state = mctx->state_log[cur_idx]
08708 = re_acquire_state_context (err, dfa, &next_nodes, context);
08709
08710
08711
08712 if (table_nodes != NULL)
08713 re_node_set_free (&next_nodes);
08714 }
08715
08716 if (BE (dfa->nbackref, 0) && next_state != NULL)
08717 {
08718
08719
08720
08721 *err = check_subexp_matching_top (mctx, &next_state->nodes,
08722 cur_idx);
08723 if (BE (*err != REG_NOERROR, 0))
08724 return NULL;
08725
08726
08727 if (next_state->has_backref)
08728 {
08729 *err = transit_state_bkref (mctx, &next_state->nodes);
08730 if (BE (*err != REG_NOERROR, 0))
08731 return NULL;
08732 next_state = mctx->state_log[cur_idx];
08733 }
08734 }
08735
08736 return next_state;
08737 }
08738
08739
08740
08741
08742 re_dfastate_t *
08743 internal_function
08744 find_recover_state (reg_errcode_t *err, re_match_context_t *mctx)
08745 {
08746 re_dfastate_t *cur_state;
08747 do
08748 {
08749 int max = mctx->state_log_top;
08750 int cur_str_idx = re_string_cur_idx (&mctx->input);
08751
08752 do
08753 {
08754 if (++cur_str_idx > max)
08755 return NULL;
08756 re_string_skip_bytes (&mctx->input, 1);
08757 }
08758 while (mctx->state_log[cur_str_idx] == NULL);
08759
08760 cur_state = merge_state_with_log (err, mctx, NULL);
08761 }
08762 while (*err == REG_NOERROR && cur_state == NULL);
08763 return cur_state;
08764 }
08765
08766
08767
08768
08769
08770
08771
08772
08773 static reg_errcode_t
08774 internal_function
08775 check_subexp_matching_top (re_match_context_t *mctx, re_node_set *cur_nodes,
08776 int str_idx)
08777 {
08778 const re_dfa_t *const dfa = mctx->dfa;
08779 int node_idx;
08780 reg_errcode_t err;
08781
08782
08783
08784
08785
08786
08787 for (node_idx = 0; node_idx < cur_nodes->nelem; ++node_idx)
08788 {
08789 int node = cur_nodes->elems[node_idx];
08790 if (dfa->nodes[node].type == OP_OPEN_SUBEXP
08791 && dfa->nodes[node].opr.idx < BITSET_WORD_BITS
08792 && (dfa->used_bkref_map
08793 & ((bitset_word_t) 1 << dfa->nodes[node].opr.idx)))
08794 {
08795 err = match_ctx_add_subtop (mctx, node, str_idx);
08796 if (BE (err != REG_NOERROR, 0))
08797 return err;
08798 }
08799 }
08800 return REG_NOERROR;
08801 }
08802
08803 #if 0
08804
08805
08806
08807 static re_dfastate_t *
08808 transit_state_sb (reg_errcode_t *err, re_match_context_t *mctx,
08809 re_dfastate_t *state)
08810 {
08811 const re_dfa_t *const dfa = mctx->dfa;
08812 re_node_set next_nodes;
08813 re_dfastate_t *next_state;
08814 int node_cnt, cur_str_idx = re_string_cur_idx (&mctx->input);
08815 unsigned int context;
08816
08817 *err = re_node_set_alloc (&next_nodes, state->nodes.nelem + 1);
08818 if (BE (*err != REG_NOERROR, 0))
08819 return NULL;
08820 for (node_cnt = 0; node_cnt < state->nodes.nelem; ++node_cnt)
08821 {
08822 int cur_node = state->nodes.elems[node_cnt];
08823 if (check_node_accept (mctx, dfa->nodes + cur_node, cur_str_idx))
08824 {
08825 *err = re_node_set_merge (&next_nodes,
08826 dfa->eclosures + dfa->nexts[cur_node]);
08827 if (BE (*err != REG_NOERROR, 0))
08828 {
08829 re_node_set_free (&next_nodes);
08830 return NULL;
08831 }
08832 }
08833 }
08834 context = re_string_context_at (&mctx->input, cur_str_idx, mctx->eflags);
08835 next_state = re_acquire_state_context (err, dfa, &next_nodes, context);
08836
08837
08838
08839 re_node_set_free (&next_nodes);
08840 re_string_skip_bytes (&mctx->input, 1);
08841 return next_state;
08842 }
08843 #endif
08844
08845 #ifdef RE_ENABLE_I18N
08846 static reg_errcode_t
08847 internal_function
08848 transit_state_mb (re_match_context_t *mctx, re_dfastate_t *pstate)
08849 {
08850 const re_dfa_t *const dfa = mctx->dfa;
08851 reg_errcode_t err;
08852 int i;
08853
08854 for (i = 0; i < pstate->nodes.nelem; ++i)
08855 {
08856 re_node_set dest_nodes, *new_nodes;
08857 int cur_node_idx = pstate->nodes.elems[i];
08858 int naccepted, dest_idx;
08859 unsigned int context;
08860 re_dfastate_t *dest_state;
08861
08862 if (!dfa->nodes[cur_node_idx].accept_mb)
08863 continue;
08864
08865 if (dfa->nodes[cur_node_idx].constraint)
08866 {
08867 context = re_string_context_at (&mctx->input,
08868 re_string_cur_idx (&mctx->input),
08869 mctx->eflags);
08870 if (NOT_SATISFY_NEXT_CONSTRAINT (dfa->nodes[cur_node_idx].constraint,
08871 context))
08872 continue;
08873 }
08874
08875
08876 naccepted = check_node_accept_bytes (dfa, cur_node_idx, &mctx->input,
08877 re_string_cur_idx (&mctx->input));
08878 if (naccepted == 0)
08879 continue;
08880
08881
08882 dest_idx = re_string_cur_idx (&mctx->input) + naccepted;
08883 mctx->max_mb_elem_len = ((mctx->max_mb_elem_len < naccepted) ? naccepted
08884 : mctx->max_mb_elem_len);
08885 err = clean_state_log_if_needed (mctx, dest_idx);
08886 if (BE (err != REG_NOERROR, 0))
08887 return err;
08888 #ifdef DEBUG
08889 assert (dfa->nexts[cur_node_idx] != -1);
08890 #endif
08891 new_nodes = dfa->eclosures + dfa->nexts[cur_node_idx];
08892
08893 dest_state = mctx->state_log[dest_idx];
08894 if (dest_state == NULL)
08895 dest_nodes = *new_nodes;
08896 else
08897 {
08898 err = re_node_set_init_union (&dest_nodes,
08899 dest_state->entrance_nodes, new_nodes);
08900 if (BE (err != REG_NOERROR, 0))
08901 return err;
08902 }
08903 context = re_string_context_at (&mctx->input, dest_idx - 1,
08904 mctx->eflags);
08905 mctx->state_log[dest_idx]
08906 = re_acquire_state_context (&err, dfa, &dest_nodes, context);
08907 if (dest_state != NULL)
08908 re_node_set_free (&dest_nodes);
08909 if (BE (mctx->state_log[dest_idx] == NULL && err != REG_NOERROR, 0))
08910 return err;
08911 }
08912 return REG_NOERROR;
08913 }
08914 #endif
08915
08916 static reg_errcode_t
08917 internal_function
08918 transit_state_bkref (re_match_context_t *mctx, const re_node_set *nodes)
08919 {
08920 const re_dfa_t *const dfa = mctx->dfa;
08921 reg_errcode_t err;
08922 int i;
08923 int cur_str_idx = re_string_cur_idx (&mctx->input);
08924
08925 for (i = 0; i < nodes->nelem; ++i)
08926 {
08927 int dest_str_idx, prev_nelem, bkc_idx;
08928 int node_idx = nodes->elems[i];
08929 unsigned int context;
08930 const re_token_t *node = dfa->nodes + node_idx;
08931 re_node_set *new_dest_nodes;
08932
08933
08934 if (node->type != OP_BACK_REF)
08935 continue;
08936
08937 if (node->constraint)
08938 {
08939 context = re_string_context_at (&mctx->input, cur_str_idx,
08940 mctx->eflags);
08941 if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
08942 continue;
08943 }
08944
08945
08946
08947 bkc_idx = mctx->nbkref_ents;
08948 err = get_subexp (mctx, node_idx, cur_str_idx);
08949 if (BE (err != REG_NOERROR, 0))
08950 goto free_return;
08951
08952
08953
08954 #ifdef DEBUG
08955 assert (dfa->nexts[node_idx] != -1);
08956 #endif
08957 for (; bkc_idx < mctx->nbkref_ents; ++bkc_idx)
08958 {
08959 int subexp_len;
08960 re_dfastate_t *dest_state;
08961 struct re_backref_cache_entry *bkref_ent;
08962 bkref_ent = mctx->bkref_ents + bkc_idx;
08963 if (bkref_ent->node != node_idx || bkref_ent->str_idx != cur_str_idx)
08964 continue;
08965 subexp_len = bkref_ent->subexp_to - bkref_ent->subexp_from;
08966 new_dest_nodes = (subexp_len == 0
08967 ? dfa->eclosures + dfa->edests[node_idx].elems[0]
08968 : dfa->eclosures + dfa->nexts[node_idx]);
08969 dest_str_idx = (cur_str_idx + bkref_ent->subexp_to
08970 - bkref_ent->subexp_from);
08971 context = re_string_context_at (&mctx->input, dest_str_idx - 1,
08972 mctx->eflags);
08973 dest_state = mctx->state_log[dest_str_idx];
08974 prev_nelem = ((mctx->state_log[cur_str_idx] == NULL) ? 0
08975 : mctx->state_log[cur_str_idx]->nodes.nelem);
08976
08977 if (dest_state == NULL)
08978 {
08979 mctx->state_log[dest_str_idx]
08980 = re_acquire_state_context (&err, dfa, new_dest_nodes,
08981 context);
08982 if (BE (mctx->state_log[dest_str_idx] == NULL
08983 && err != REG_NOERROR, 0))
08984 goto free_return;
08985 }
08986 else
08987 {
08988 re_node_set dest_nodes;
08989 err = re_node_set_init_union (&dest_nodes,
08990 dest_state->entrance_nodes,
08991 new_dest_nodes);
08992 if (BE (err != REG_NOERROR, 0))
08993 {
08994 re_node_set_free (&dest_nodes);
08995 goto free_return;
08996 }
08997 mctx->state_log[dest_str_idx]
08998 = re_acquire_state_context (&err, dfa, &dest_nodes, context);
08999 re_node_set_free (&dest_nodes);
09000 if (BE (mctx->state_log[dest_str_idx] == NULL
09001 && err != REG_NOERROR, 0))
09002 goto free_return;
09003 }
09004
09005
09006 if (subexp_len == 0
09007 && mctx->state_log[cur_str_idx]->nodes.nelem > prev_nelem)
09008 {
09009 err = check_subexp_matching_top (mctx, new_dest_nodes,
09010 cur_str_idx);
09011 if (BE (err != REG_NOERROR, 0))
09012 goto free_return;
09013 err = transit_state_bkref (mctx, new_dest_nodes);
09014 if (BE (err != REG_NOERROR, 0))
09015 goto free_return;
09016 }
09017 }
09018 }
09019 err = REG_NOERROR;
09020 free_return:
09021 return err;
09022 }
09023
09024
09025
09026
09027
09028
09029
09030 static reg_errcode_t
09031 internal_function
09032 get_subexp (re_match_context_t *mctx, int bkref_node, int bkref_str_idx)
09033 {
09034 const re_dfa_t *const dfa = mctx->dfa;
09035 int subexp_num, sub_top_idx;
09036 const char *buf = (const char *) re_string_get_buffer (&mctx->input);
09037
09038 int cache_idx = search_cur_bkref_entry (mctx, bkref_str_idx);
09039 if (cache_idx != -1)
09040 {
09041 const struct re_backref_cache_entry *entry
09042 = mctx->bkref_ents + cache_idx;
09043 do
09044 if (entry->node == bkref_node)
09045 return REG_NOERROR;
09046 while (entry++->more);
09047 }
09048
09049 subexp_num = dfa->nodes[bkref_node].opr.idx;
09050
09051
09052 for (sub_top_idx = 0; sub_top_idx < mctx->nsub_tops; ++sub_top_idx)
09053 {
09054 reg_errcode_t err;
09055 re_sub_match_top_t *sub_top = mctx->sub_tops[sub_top_idx];
09056 re_sub_match_last_t *sub_last;
09057 int sub_last_idx, sl_str, bkref_str_off;
09058
09059 if (dfa->nodes[sub_top->node].opr.idx != subexp_num)
09060 continue;
09061
09062 sl_str = sub_top->str_idx;
09063 bkref_str_off = bkref_str_idx;
09064
09065
09066 for (sub_last_idx = 0; sub_last_idx < sub_top->nlasts; ++sub_last_idx)
09067 {
09068 int sl_str_diff;
09069 sub_last = sub_top->lasts[sub_last_idx];
09070 sl_str_diff = sub_last->str_idx - sl_str;
09071
09072
09073 if (sl_str_diff > 0)
09074 {
09075 if (BE (bkref_str_off + sl_str_diff > mctx->input.valid_len, 0))
09076 {
09077
09078 if (bkref_str_off + sl_str_diff > mctx->input.len)
09079 break;
09080
09081 err = clean_state_log_if_needed (mctx,
09082 bkref_str_off
09083 + sl_str_diff);
09084 if (BE (err != REG_NOERROR, 0))
09085 return err;
09086 buf = (const char *) re_string_get_buffer (&mctx->input);
09087 }
09088 if (memcmp (buf + bkref_str_off, buf + sl_str, sl_str_diff) != 0)
09089
09090 break;
09091 }
09092 bkref_str_off += sl_str_diff;
09093 sl_str += sl_str_diff;
09094 err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,
09095 bkref_str_idx);
09096
09097
09098
09099 buf = (const char *) re_string_get_buffer (&mctx->input);
09100
09101 if (err == REG_NOMATCH)
09102 continue;
09103 if (BE (err != REG_NOERROR, 0))
09104 return err;
09105 }
09106
09107 if (sub_last_idx < sub_top->nlasts)
09108 continue;
09109 if (sub_last_idx > 0)
09110 ++sl_str;
09111
09112 for (; sl_str <= bkref_str_idx; ++sl_str)
09113 {
09114 int cls_node, sl_str_off;
09115 const re_node_set *nodes;
09116 sl_str_off = sl_str - sub_top->str_idx;
09117
09118
09119 if (sl_str_off > 0)
09120 {
09121 if (BE (bkref_str_off >= mctx->input.valid_len, 0))
09122 {
09123
09124 if (bkref_str_off >= mctx->input.len)
09125 break;
09126
09127 err = extend_buffers (mctx);
09128 if (BE (err != REG_NOERROR, 0))
09129 return err;
09130
09131 buf = (const char *) re_string_get_buffer (&mctx->input);
09132 }
09133 if (buf [bkref_str_off++] != buf[sl_str - 1])
09134 break;
09135
09136 }
09137 if (mctx->state_log[sl_str] == NULL)
09138 continue;
09139
09140 nodes = &mctx->state_log[sl_str]->nodes;
09141 cls_node = find_subexp_node (dfa, nodes, subexp_num,
09142 OP_CLOSE_SUBEXP);
09143 if (cls_node == -1)
09144 continue;
09145 if (sub_top->path == NULL)
09146 {
09147 sub_top->path = calloc (sizeof (state_array_t),
09148 sl_str - sub_top->str_idx + 1);
09149 if (sub_top->path == NULL)
09150 return REG_ESPACE;
09151 }
09152
09153
09154 err = check_arrival (mctx, sub_top->path, sub_top->node,
09155 sub_top->str_idx, cls_node, sl_str,
09156 OP_CLOSE_SUBEXP);
09157 if (err == REG_NOMATCH)
09158 continue;
09159 if (BE (err != REG_NOERROR, 0))
09160 return err;
09161 sub_last = match_ctx_add_sublast (sub_top, cls_node, sl_str);
09162 if (BE (sub_last == NULL, 0))
09163 return REG_ESPACE;
09164 err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,
09165 bkref_str_idx);
09166 if (err == REG_NOMATCH)
09167 continue;
09168 }
09169 }
09170 return REG_NOERROR;
09171 }
09172
09173
09174
09175
09176
09177
09178
09179 static reg_errcode_t
09180 internal_function
09181 get_subexp_sub (re_match_context_t *mctx, const re_sub_match_top_t *sub_top,
09182 re_sub_match_last_t *sub_last, int bkref_node, int bkref_str)
09183 {
09184 reg_errcode_t err;
09185 int to_idx;
09186
09187 err = check_arrival (mctx, &sub_last->path, sub_last->node,
09188 sub_last->str_idx, bkref_node, bkref_str,
09189 OP_OPEN_SUBEXP);
09190 if (err != REG_NOERROR)
09191 return err;
09192 err = match_ctx_add_entry (mctx, bkref_node, bkref_str, sub_top->str_idx,
09193 sub_last->str_idx);
09194 if (BE (err != REG_NOERROR, 0))
09195 return err;
09196 to_idx = bkref_str + sub_last->str_idx - sub_top->str_idx;
09197 return clean_state_log_if_needed (mctx, to_idx);
09198 }
09199
09200
09201
09202
09203
09204
09205
09206
09207
09208 static int
09209 internal_function
09210 find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes,
09211 int subexp_idx, int type)
09212 {
09213 int cls_idx;
09214 for (cls_idx = 0; cls_idx < nodes->nelem; ++cls_idx)
09215 {
09216 int cls_node = nodes->elems[cls_idx];
09217 const re_token_t *node = dfa->nodes + cls_node;
09218 if (node->type == type
09219 && node->opr.idx == subexp_idx)
09220 return cls_node;
09221 }
09222 return -1;
09223 }
09224
09225
09226
09227
09228
09229
09230 static reg_errcode_t
09231 internal_function
09232 check_arrival (re_match_context_t *mctx, state_array_t *path, int top_node,
09233 int top_str, int last_node, int last_str, int type)
09234 {
09235 const re_dfa_t *const dfa = mctx->dfa;
09236 reg_errcode_t err = REG_NOERROR;
09237 int subexp_num, backup_cur_idx, str_idx, null_cnt;
09238 re_dfastate_t *cur_state = NULL;
09239 re_node_set *cur_nodes, next_nodes;
09240 re_dfastate_t **backup_state_log;
09241 unsigned int context;
09242
09243 subexp_num = dfa->nodes[top_node].opr.idx;
09244
09245 if (BE (path->alloc < last_str + mctx->max_mb_elem_len + 1, 0))
09246 {
09247 re_dfastate_t **new_array;
09248 int old_alloc = path->alloc;
09249 path->alloc += last_str + mctx->max_mb_elem_len + 1;
09250 new_array = re_realloc (path->array, re_dfastate_t *, path->alloc);
09251 if (BE (new_array == NULL, 0))
09252 {
09253 path->alloc = old_alloc;
09254 return REG_ESPACE;
09255 }
09256 path->array = new_array;
09257 memset (new_array + old_alloc, '\0',
09258 sizeof (re_dfastate_t *) * (path->alloc - old_alloc));
09259 }
09260
09261 str_idx = path->next_idx ? path->next_idx : top_str;
09262
09263
09264 backup_state_log = mctx->state_log;
09265 backup_cur_idx = mctx->input.cur_idx;
09266 mctx->state_log = path->array;
09267 mctx->input.cur_idx = str_idx;
09268
09269
09270 context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags);
09271 if (str_idx == top_str)
09272 {
09273 err = re_node_set_init_1 (&next_nodes, top_node);
09274 if (BE (err != REG_NOERROR, 0))
09275 return err;
09276 err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
09277 if (BE (err != REG_NOERROR, 0))
09278 {
09279 re_node_set_free (&next_nodes);
09280 return err;
09281 }
09282 }
09283 else
09284 {
09285 cur_state = mctx->state_log[str_idx];
09286 if (cur_state && cur_state->has_backref)
09287 {
09288 err = re_node_set_init_copy (&next_nodes, &cur_state->nodes);
09289 if (BE (err != REG_NOERROR, 0))
09290 return err;
09291 }
09292 else
09293 re_node_set_init_empty (&next_nodes);
09294 }
09295 if (str_idx == top_str || (cur_state && cur_state->has_backref))
09296 {
09297 if (next_nodes.nelem)
09298 {
09299 err = expand_bkref_cache (mctx, &next_nodes, str_idx,
09300 subexp_num, type);
09301 if (BE (err != REG_NOERROR, 0))
09302 {
09303 re_node_set_free (&next_nodes);
09304 return err;
09305 }
09306 }
09307 cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
09308 if (BE (cur_state == NULL && err != REG_NOERROR, 0))
09309 {
09310 re_node_set_free (&next_nodes);
09311 return err;
09312 }
09313 mctx->state_log[str_idx] = cur_state;
09314 }
09315
09316 for (null_cnt = 0; str_idx < last_str && null_cnt <= mctx->max_mb_elem_len;)
09317 {
09318 re_node_set_empty (&next_nodes);
09319 if (mctx->state_log[str_idx + 1])
09320 {
09321 err = re_node_set_merge (&next_nodes,
09322 &mctx->state_log[str_idx + 1]->nodes);
09323 if (BE (err != REG_NOERROR, 0))
09324 {
09325 re_node_set_free (&next_nodes);
09326 return err;
09327 }
09328 }
09329 if (cur_state)
09330 {
09331 err = check_arrival_add_next_nodes (mctx, str_idx,
09332 &cur_state->non_eps_nodes,
09333 &next_nodes);
09334 if (BE (err != REG_NOERROR, 0))
09335 {
09336 re_node_set_free (&next_nodes);
09337 return err;
09338 }
09339 }
09340 ++str_idx;
09341 if (next_nodes.nelem)
09342 {
09343 err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
09344 if (BE (err != REG_NOERROR, 0))
09345 {
09346 re_node_set_free (&next_nodes);
09347 return err;
09348 }
09349 err = expand_bkref_cache (mctx, &next_nodes, str_idx,
09350 subexp_num, type);
09351 if (BE (err != REG_NOERROR, 0))
09352 {
09353 re_node_set_free (&next_nodes);
09354 return err;
09355 }
09356 }
09357 context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags);
09358 cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
09359 if (BE (cur_state == NULL && err != REG_NOERROR, 0))
09360 {
09361 re_node_set_free (&next_nodes);
09362 return err;
09363 }
09364 mctx->state_log[str_idx] = cur_state;
09365 null_cnt = cur_state == NULL ? null_cnt + 1 : 0;
09366 }
09367 re_node_set_free (&next_nodes);
09368 cur_nodes = (mctx->state_log[last_str] == NULL ? NULL
09369 : &mctx->state_log[last_str]->nodes);
09370 path->next_idx = str_idx;
09371
09372
09373 mctx->state_log = backup_state_log;
09374 mctx->input.cur_idx = backup_cur_idx;
09375
09376
09377 if (cur_nodes != NULL && re_node_set_contains (cur_nodes, last_node))
09378 return REG_NOERROR;
09379
09380 return REG_NOMATCH;
09381 }
09382
09383
09384
09385
09386
09387
09388
09389
09390
09391 static reg_errcode_t
09392 internal_function
09393 check_arrival_add_next_nodes (re_match_context_t *mctx, int str_idx,
09394 re_node_set *cur_nodes, re_node_set *next_nodes)
09395 {
09396 const re_dfa_t *const dfa = mctx->dfa;
09397 int result;
09398 int cur_idx;
09399 reg_errcode_t err = REG_NOERROR;
09400 re_node_set union_set;
09401 re_node_set_init_empty (&union_set);
09402 for (cur_idx = 0; cur_idx < cur_nodes->nelem; ++cur_idx)
09403 {
09404 int naccepted = 0;
09405 int cur_node = cur_nodes->elems[cur_idx];
09406 #ifdef DEBUG
09407 re_token_type_t type = dfa->nodes[cur_node].type;
09408 assert (!IS_EPSILON_NODE (type));
09409 #endif
09410 #ifdef RE_ENABLE_I18N
09411
09412 if (dfa->nodes[cur_node].accept_mb)
09413 {
09414 naccepted = check_node_accept_bytes (dfa, cur_node, &mctx->input,
09415 str_idx);
09416 if (naccepted > 1)
09417 {
09418 re_dfastate_t *dest_state;
09419 int next_node = dfa->nexts[cur_node];
09420 int next_idx = str_idx + naccepted;
09421 dest_state = mctx->state_log[next_idx];
09422 re_node_set_empty (&union_set);
09423 if (dest_state)
09424 {
09425 err = re_node_set_merge (&union_set, &dest_state->nodes);
09426 if (BE (err != REG_NOERROR, 0))
09427 {
09428 re_node_set_free (&union_set);
09429 return err;
09430 }
09431 }
09432 result = re_node_set_insert (&union_set, next_node);
09433 if (BE (result < 0, 0))
09434 {
09435 re_node_set_free (&union_set);
09436 return REG_ESPACE;
09437 }
09438 mctx->state_log[next_idx] = re_acquire_state (&err, dfa,
09439 &union_set);
09440 if (BE (mctx->state_log[next_idx] == NULL
09441 && err != REG_NOERROR, 0))
09442 {
09443 re_node_set_free (&union_set);
09444 return err;
09445 }
09446 }
09447 }
09448 #endif
09449 if (naccepted
09450 || check_node_accept (mctx, dfa->nodes + cur_node, str_idx))
09451 {
09452 result = re_node_set_insert (next_nodes, dfa->nexts[cur_node]);
09453 if (BE (result < 0, 0))
09454 {
09455 re_node_set_free (&union_set);
09456 return REG_ESPACE;
09457 }
09458 }
09459 }
09460 re_node_set_free (&union_set);
09461 return REG_NOERROR;
09462 }
09463
09464
09465
09466
09467
09468
09469
09470 static reg_errcode_t
09471 internal_function
09472 check_arrival_expand_ecl (const re_dfa_t *dfa, re_node_set *cur_nodes,
09473 int ex_subexp, int type)
09474 {
09475 reg_errcode_t err;
09476 int idx, outside_node;
09477 re_node_set new_nodes;
09478 #ifdef DEBUG
09479 assert (cur_nodes->nelem);
09480 #endif
09481 err = re_node_set_alloc (&new_nodes, cur_nodes->nelem);
09482 if (BE (err != REG_NOERROR, 0))
09483 return err;
09484
09485
09486
09487 for (idx = 0; idx < cur_nodes->nelem; ++idx)
09488 {
09489 int cur_node = cur_nodes->elems[idx];
09490 const re_node_set *eclosure = dfa->eclosures + cur_node;
09491 outside_node = find_subexp_node (dfa, eclosure, ex_subexp, type);
09492 if (outside_node == -1)
09493 {
09494
09495 err = re_node_set_merge (&new_nodes, eclosure);
09496 if (BE (err != REG_NOERROR, 0))
09497 {
09498 re_node_set_free (&new_nodes);
09499 return err;
09500 }
09501 }
09502 else
09503 {
09504
09505 err = check_arrival_expand_ecl_sub (dfa, &new_nodes, cur_node,
09506 ex_subexp, type);
09507 if (BE (err != REG_NOERROR, 0))
09508 {
09509 re_node_set_free (&new_nodes);
09510 return err;
09511 }
09512 }
09513 }
09514 re_node_set_free (cur_nodes);
09515 *cur_nodes = new_nodes;
09516 return REG_NOERROR;
09517 }
09518
09519
09520
09521
09522
09523 static reg_errcode_t
09524 internal_function
09525 check_arrival_expand_ecl_sub (const re_dfa_t *dfa, re_node_set *dst_nodes,
09526 int target, int ex_subexp, int type)
09527 {
09528 int cur_node;
09529 for (cur_node = target; !re_node_set_contains (dst_nodes, cur_node);)
09530 {
09531 int err;
09532
09533 if (dfa->nodes[cur_node].type == type
09534 && dfa->nodes[cur_node].opr.idx == ex_subexp)
09535 {
09536 if (type == OP_CLOSE_SUBEXP)
09537 {
09538 err = re_node_set_insert (dst_nodes, cur_node);
09539 if (BE (err == -1, 0))
09540 return REG_ESPACE;
09541 }
09542 break;
09543 }
09544 err = re_node_set_insert (dst_nodes, cur_node);
09545 if (BE (err == -1, 0))
09546 return REG_ESPACE;
09547 if (dfa->edests[cur_node].nelem == 0)
09548 break;
09549 if (dfa->edests[cur_node].nelem == 2)
09550 {
09551 err = check_arrival_expand_ecl_sub (dfa, dst_nodes,
09552 dfa->edests[cur_node].elems[1],
09553 ex_subexp, type);
09554 if (BE (err != REG_NOERROR, 0))
09555 return err;
09556 }
09557 cur_node = dfa->edests[cur_node].elems[0];
09558 }
09559 return REG_NOERROR;
09560 }
09561
09562
09563
09564
09565
09566
09567 static reg_errcode_t
09568 internal_function
09569 expand_bkref_cache (re_match_context_t *mctx, re_node_set *cur_nodes,
09570 int cur_str, int subexp_num, int type)
09571 {
09572 const re_dfa_t *const dfa = mctx->dfa;
09573 reg_errcode_t err;
09574 int cache_idx_start = search_cur_bkref_entry (mctx, cur_str);
09575 struct re_backref_cache_entry *ent;
09576
09577 if (cache_idx_start == -1)
09578 return REG_NOERROR;
09579
09580 restart:
09581 ent = mctx->bkref_ents + cache_idx_start;
09582 do
09583 {
09584 int to_idx, next_node;
09585
09586
09587 if (!re_node_set_contains (cur_nodes, ent->node))
09588 continue;
09589
09590 to_idx = cur_str + ent->subexp_to - ent->subexp_from;
09591
09592
09593 if (to_idx == cur_str)
09594 {
09595
09596
09597 re_node_set new_dests;
09598 reg_errcode_t err2, err3;
09599 next_node = dfa->edests[ent->node].elems[0];
09600 if (re_node_set_contains (cur_nodes, next_node))
09601 continue;
09602 err = re_node_set_init_1 (&new_dests, next_node);
09603 err2 = check_arrival_expand_ecl (dfa, &new_dests, subexp_num, type);
09604 err3 = re_node_set_merge (cur_nodes, &new_dests);
09605 re_node_set_free (&new_dests);
09606 if (BE (err != REG_NOERROR || err2 != REG_NOERROR
09607 || err3 != REG_NOERROR, 0))
09608 {
09609 err = (err != REG_NOERROR ? err
09610 : (err2 != REG_NOERROR ? err2 : err3));
09611 return err;
09612 }
09613
09614 goto restart;
09615 }
09616 else
09617 {
09618 re_node_set union_set;
09619 next_node = dfa->nexts[ent->node];
09620 if (mctx->state_log[to_idx])
09621 {
09622 int ret;
09623 if (re_node_set_contains (&mctx->state_log[to_idx]->nodes,
09624 next_node))
09625 continue;
09626 err = re_node_set_init_copy (&union_set,
09627 &mctx->state_log[to_idx]->nodes);
09628 ret = re_node_set_insert (&union_set, next_node);
09629 if (BE (err != REG_NOERROR || ret < 0, 0))
09630 {
09631 re_node_set_free (&union_set);
09632 err = err != REG_NOERROR ? err : REG_ESPACE;
09633 return err;
09634 }
09635 }
09636 else
09637 {
09638 err = re_node_set_init_1 (&union_set, next_node);
09639 if (BE (err != REG_NOERROR, 0))
09640 return err;
09641 }
09642 mctx->state_log[to_idx] = re_acquire_state (&err, dfa, &union_set);
09643 re_node_set_free (&union_set);
09644 if (BE (mctx->state_log[to_idx] == NULL
09645 && err != REG_NOERROR, 0))
09646 return err;
09647 }
09648 }
09649 while (ent++->more);
09650 return REG_NOERROR;
09651 }
09652
09653
09654
09655
09656 static int
09657 internal_function
09658 build_trtable (const re_dfa_t *dfa, re_dfastate_t *state)
09659 {
09660 reg_errcode_t err;
09661 int i, j, ch, need_word_trtable = 0;
09662 bitset_word_t elem, mask;
09663 bool dests_node_malloced = false;
09664 bool dest_states_malloced = false;
09665 int ndests;
09666 re_dfastate_t **trtable;
09667 re_dfastate_t **dest_states = NULL, **dest_states_word, **dest_states_nl;
09668 re_node_set follows, *dests_node;
09669 bitset_t *dests_ch;
09670 bitset_t acceptable;
09671
09672 struct dests_alloc
09673 {
09674 re_node_set dests_node[SBC_MAX];
09675 bitset_t dests_ch[SBC_MAX];
09676 } *dests_alloc;
09677
09678
09679
09680
09681
09682 if (__libc_use_alloca (sizeof (struct dests_alloc)))
09683 dests_alloc = (struct dests_alloc *) alloca (sizeof (struct dests_alloc));
09684 else
09685 {
09686 dests_alloc = re_malloc (struct dests_alloc, 1);
09687 if (BE (dests_alloc == NULL, 0))
09688 return 0;
09689 dests_node_malloced = true;
09690 }
09691 dests_node = dests_alloc->dests_node;
09692 dests_ch = dests_alloc->dests_ch;
09693
09694
09695 state->word_trtable = state->trtable = NULL;
09696
09697
09698
09699 ndests = group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch);
09700 if (BE (ndests <= 0, 0))
09701 {
09702 if (dests_node_malloced)
09703 free (dests_alloc);
09704
09705 if (ndests == 0)
09706 {
09707 state->trtable = (re_dfastate_t **)
09708 calloc (sizeof (re_dfastate_t *), SBC_MAX);
09709 return 1;
09710 }
09711 return 0;
09712 }
09713
09714 err = re_node_set_alloc (&follows, ndests + 1);
09715 if (BE (err != REG_NOERROR, 0))
09716 goto out_free;
09717
09718 if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset_t)) * SBC_MAX
09719 + ndests * 3 * sizeof (re_dfastate_t *)))
09720 dest_states = (re_dfastate_t **)
09721 alloca (ndests * 3 * sizeof (re_dfastate_t *));
09722 else
09723 {
09724 dest_states = (re_dfastate_t **)
09725 malloc (ndests * 3 * sizeof (re_dfastate_t *));
09726 if (BE (dest_states == NULL, 0))
09727 {
09728 out_free:
09729 if (dest_states_malloced)
09730 free (dest_states);
09731 re_node_set_free (&follows);
09732 for (i = 0; i < ndests; ++i)
09733 re_node_set_free (dests_node + i);
09734 if (dests_node_malloced)
09735 free (dests_alloc);
09736 return 0;
09737 }
09738 dest_states_malloced = true;
09739 }
09740 dest_states_word = dest_states + ndests;
09741 dest_states_nl = dest_states_word + ndests;
09742 bitset_empty (acceptable);
09743
09744
09745 for (i = 0; i < ndests; ++i)
09746 {
09747 int next_node;
09748 re_node_set_empty (&follows);
09749
09750 for (j = 0; j < dests_node[i].nelem; ++j)
09751 {
09752 next_node = dfa->nexts[dests_node[i].elems[j]];
09753 if (next_node != -1)
09754 {
09755 err = re_node_set_merge (&follows, dfa->eclosures + next_node);
09756 if (BE (err != REG_NOERROR, 0))
09757 goto out_free;
09758 }
09759 }
09760 dest_states[i] = re_acquire_state_context (&err, dfa, &follows, 0);
09761 if (BE (dest_states[i] == NULL && err != REG_NOERROR, 0))
09762 goto out_free;
09763
09764
09765 if (dest_states[i]->has_constraint)
09766 {
09767 dest_states_word[i] = re_acquire_state_context (&err, dfa, &follows,
09768 CONTEXT_WORD);
09769 if (BE (dest_states_word[i] == NULL && err != REG_NOERROR, 0))
09770 goto out_free;
09771
09772 if (dest_states[i] != dest_states_word[i] && dfa->mb_cur_max > 1)
09773 need_word_trtable = 1;
09774
09775 dest_states_nl[i] = re_acquire_state_context (&err, dfa, &follows,
09776 CONTEXT_NEWLINE);
09777 if (BE (dest_states_nl[i] == NULL && err != REG_NOERROR, 0))
09778 goto out_free;
09779 }
09780 else
09781 {
09782 dest_states_word[i] = dest_states[i];
09783 dest_states_nl[i] = dest_states[i];
09784 }
09785 bitset_merge (acceptable, dests_ch[i]);
09786 }
09787
09788 if (!BE (need_word_trtable, 0))
09789 {
09790
09791
09792
09793
09794 trtable = state->trtable =
09795 (re_dfastate_t **) calloc (sizeof (re_dfastate_t *), SBC_MAX);
09796 if (BE (trtable == NULL, 0))
09797 goto out_free;
09798
09799
09800 for (i = 0; i < BITSET_WORDS; ++i)
09801 for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1;
09802 elem;
09803 mask <<= 1, elem >>= 1, ++ch)
09804 if (BE (elem & 1, 0))
09805 {
09806
09807
09808 for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)
09809 ;
09810
09811
09812 if (dfa->word_char[i] & mask)
09813 trtable[ch] = dest_states_word[j];
09814 else
09815 trtable[ch] = dest_states[j];
09816 }
09817 }
09818 else
09819 {
09820
09821
09822
09823
09824
09825 trtable = state->word_trtable =
09826 (re_dfastate_t **) calloc (sizeof (re_dfastate_t *), 2 * SBC_MAX);
09827 if (BE (trtable == NULL, 0))
09828 goto out_free;
09829
09830
09831 for (i = 0; i < BITSET_WORDS; ++i)
09832 for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1;
09833 elem;
09834 mask <<= 1, elem >>= 1, ++ch)
09835 if (BE (elem & 1, 0))
09836 {
09837
09838
09839 for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)
09840 ;
09841
09842
09843 trtable[ch] = dest_states[j];
09844 trtable[ch + SBC_MAX] = dest_states_word[j];
09845 }
09846 }
09847
09848
09849 if (bitset_contain (acceptable, NEWLINE_CHAR))
09850 {
09851
09852 for (j = 0; j < ndests; ++j)
09853 if (bitset_contain (dests_ch[j], NEWLINE_CHAR))
09854 {
09855
09856 trtable[NEWLINE_CHAR] = dest_states_nl[j];
09857 if (need_word_trtable)
09858 trtable[NEWLINE_CHAR + SBC_MAX] = dest_states_nl[j];
09859
09860
09861 break;
09862 }
09863 }
09864
09865 if (dest_states_malloced)
09866 free (dest_states);
09867
09868 re_node_set_free (&follows);
09869 for (i = 0; i < ndests; ++i)
09870 re_node_set_free (dests_node + i);
09871
09872 if (dests_node_malloced)
09873 free (dests_alloc);
09874
09875 return 1;
09876 }
09877
09878
09879
09880
09881
09882
09883 static int
09884 internal_function
09885 group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state,
09886 re_node_set *dests_node, bitset_t *dests_ch)
09887 {
09888 reg_errcode_t err;
09889 int result;
09890 int i, j, k;
09891 int ndests;
09892 bitset_t accepts;
09893 const re_node_set *cur_nodes = &state->nodes;
09894 bitset_empty (accepts);
09895 ndests = 0;
09896
09897
09898 for (i = 0; i < cur_nodes->nelem; ++i)
09899 {
09900 re_token_t *node = &dfa->nodes[cur_nodes->elems[i]];
09901 re_token_type_t type = node->type;
09902 unsigned int constraint = node->constraint;
09903
09904
09905 if (type == CHARACTER)
09906 bitset_set (accepts, node->opr.c);
09907 else if (type == SIMPLE_BRACKET)
09908 {
09909 bitset_merge (accepts, node->opr.sbcset);
09910 }
09911 else if (type == OP_PERIOD)
09912 {
09913 #ifdef RE_ENABLE_I18N
09914 if (dfa->mb_cur_max > 1)
09915 bitset_merge (accepts, dfa->sb_char);
09916 else
09917 #endif
09918 bitset_set_all (accepts);
09919 if (!(dfa->syntax & RE_DOT_NEWLINE))
09920 bitset_clear (accepts, '\n');
09921 if (dfa->syntax & RE_DOT_NOT_NULL)
09922 bitset_clear (accepts, '\0');
09923 }
09924 #ifdef RE_ENABLE_I18N
09925 else if (type == OP_UTF8_PERIOD)
09926 {
09927 memset (accepts, '\xff', sizeof (bitset_t) / 2);
09928 if (!(dfa->syntax & RE_DOT_NEWLINE))
09929 bitset_clear (accepts, '\n');
09930 if (dfa->syntax & RE_DOT_NOT_NULL)
09931 bitset_clear (accepts, '\0');
09932 }
09933 #endif
09934 else
09935 continue;
09936
09937
09938
09939 if (constraint)
09940 {
09941 if (constraint & NEXT_NEWLINE_CONSTRAINT)
09942 {
09943 bool accepts_newline = bitset_contain (accepts, NEWLINE_CHAR);
09944 bitset_empty (accepts);
09945 if (accepts_newline)
09946 bitset_set (accepts, NEWLINE_CHAR);
09947 else
09948 continue;
09949 }
09950 if (constraint & NEXT_ENDBUF_CONSTRAINT)
09951 {
09952 bitset_empty (accepts);
09953 continue;
09954 }
09955
09956 if (constraint & NEXT_WORD_CONSTRAINT)
09957 {
09958 bitset_word_t any_set = 0;
09959 if (type == CHARACTER && !node->word_char)
09960 {
09961 bitset_empty (accepts);
09962 continue;
09963 }
09964 #ifdef RE_ENABLE_I18N
09965 if (dfa->mb_cur_max > 1)
09966 for (j = 0; j < BITSET_WORDS; ++j)
09967 any_set |= (accepts[j] &= (dfa->word_char[j] | ~dfa->sb_char[j]));
09968 else
09969 #endif
09970 for (j = 0; j < BITSET_WORDS; ++j)
09971 any_set |= (accepts[j] &= dfa->word_char[j]);
09972 if (!any_set)
09973 continue;
09974 }
09975 if (constraint & NEXT_NOTWORD_CONSTRAINT)
09976 {
09977 bitset_word_t any_set = 0;
09978 if (type == CHARACTER && node->word_char)
09979 {
09980 bitset_empty (accepts);
09981 continue;
09982 }
09983 #ifdef RE_ENABLE_I18N
09984 if (dfa->mb_cur_max > 1)
09985 for (j = 0; j < BITSET_WORDS; ++j)
09986 any_set |= (accepts[j] &= ~(dfa->word_char[j] & dfa->sb_char[j]));
09987 else
09988 #endif
09989 for (j = 0; j < BITSET_WORDS; ++j)
09990 any_set |= (accepts[j] &= ~dfa->word_char[j]);
09991 if (!any_set)
09992 continue;
09993 }
09994 }
09995
09996
09997
09998 for (j = 0; j < ndests; ++j)
09999 {
10000 bitset_t intersec;
10001 bitset_t remains;
10002
10003 bitset_word_t has_intersec, not_subset, not_consumed;
10004
10005
10006 if (type == CHARACTER && !bitset_contain (dests_ch[j], node->opr.c))
10007 continue;
10008
10009
10010 has_intersec = 0;
10011 for (k = 0; k < BITSET_WORDS; ++k)
10012 has_intersec |= intersec[k] = accepts[k] & dests_ch[j][k];
10013
10014 if (!has_intersec)
10015 continue;
10016
10017
10018 not_subset = not_consumed = 0;
10019 for (k = 0; k < BITSET_WORDS; ++k)
10020 {
10021 not_subset |= remains[k] = ~accepts[k] & dests_ch[j][k];
10022 not_consumed |= accepts[k] = accepts[k] & ~dests_ch[j][k];
10023 }
10024
10025
10026
10027 if (not_subset)
10028 {
10029 bitset_copy (dests_ch[ndests], remains);
10030 bitset_copy (dests_ch[j], intersec);
10031 err = re_node_set_init_copy (dests_node + ndests, &dests_node[j]);
10032 if (BE (err != REG_NOERROR, 0))
10033 goto error_return;
10034 ++ndests;
10035 }
10036
10037
10038 result = re_node_set_insert (&dests_node[j], cur_nodes->elems[i]);
10039 if (BE (result < 0, 0))
10040 goto error_return;
10041
10042
10043 if (!not_consumed)
10044 break;
10045 }
10046
10047 if (j == ndests)
10048 {
10049 bitset_copy (dests_ch[ndests], accepts);
10050 err = re_node_set_init_1 (dests_node + ndests, cur_nodes->elems[i]);
10051 if (BE (err != REG_NOERROR, 0))
10052 goto error_return;
10053 ++ndests;
10054 bitset_empty (accepts);
10055 }
10056 }
10057 return ndests;
10058 error_return:
10059 for (j = 0; j < ndests; ++j)
10060 re_node_set_free (dests_node + j);
10061 return -1;
10062 }
10063
10064 #ifdef RE_ENABLE_I18N
10065
10066
10067
10068
10069
10070
10071
10072
10073 static int
10074 internal_function
10075 check_node_accept_bytes (const re_dfa_t *dfa, int node_idx,
10076 const re_string_t *input, int str_idx)
10077 {
10078 const re_token_t *node = dfa->nodes + node_idx;
10079 int char_len, elem_len;
10080 int i;
10081
10082 if (BE (node->type == OP_UTF8_PERIOD, 0))
10083 {
10084 unsigned char c = re_string_byte_at (input, str_idx), d;
10085 if (BE (c < 0xc2, 1))
10086 return 0;
10087
10088 if (str_idx + 2 > input->len)
10089 return 0;
10090
10091 d = re_string_byte_at (input, str_idx + 1);
10092 if (c < 0xe0)
10093 return (d < 0x80 || d > 0xbf) ? 0 : 2;
10094 else if (c < 0xf0)
10095 {
10096 char_len = 3;
10097 if (c == 0xe0 && d < 0xa0)
10098 return 0;
10099 }
10100 else if (c < 0xf8)
10101 {
10102 char_len = 4;
10103 if (c == 0xf0 && d < 0x90)
10104 return 0;
10105 }
10106 else if (c < 0xfc)
10107 {
10108 char_len = 5;
10109 if (c == 0xf8 && d < 0x88)
10110 return 0;
10111 }
10112 else if (c < 0xfe)
10113 {
10114 char_len = 6;
10115 if (c == 0xfc && d < 0x84)
10116 return 0;
10117 }
10118 else
10119 return 0;
10120
10121 if (str_idx + char_len > input->len)
10122 return 0;
10123
10124 for (i = 1; i < char_len; ++i)
10125 {
10126 d = re_string_byte_at (input, str_idx + i);
10127 if (d < 0x80 || d > 0xbf)
10128 return 0;
10129 }
10130 return char_len;
10131 }
10132
10133 char_len = re_string_char_size_at (input, str_idx);
10134 if (node->type == OP_PERIOD)
10135 {
10136 if (char_len <= 1)
10137 return 0;
10138
10139
10140
10141 if ((!(dfa->syntax & RE_DOT_NEWLINE) &&
10142 re_string_byte_at (input, str_idx) == '\n') ||
10143 ((dfa->syntax & RE_DOT_NOT_NULL) &&
10144 re_string_byte_at (input, str_idx) == '\0'))
10145 return 0;
10146 return char_len;
10147 }
10148
10149 elem_len = re_string_elem_size_at (input, str_idx);
10150 if ((elem_len <= 1 && char_len <= 1) || char_len == 0)
10151 return 0;
10152
10153 if (node->type == COMPLEX_BRACKET)
10154 {
10155 const re_charset_t *cset = node->opr.mbcset;
10156 # ifdef _LIBC
10157 const unsigned char *pin
10158 = ((const unsigned char *) re_string_get_buffer (input) + str_idx);
10159 int j;
10160 uint32_t nrules;
10161 # endif
10162 int match_len = 0;
10163 wchar_t wc = ((cset->nranges || cset->nchar_classes || cset->nmbchars)
10164 ? re_string_wchar_at (input, str_idx) : 0);
10165
10166
10167 for (i = 0; i < cset->nmbchars; ++i)
10168 if (wc == cset->mbchars[i])
10169 {
10170 match_len = char_len;
10171 goto check_node_accept_bytes_match;
10172 }
10173
10174 for (i = 0; i < cset->nchar_classes; ++i)
10175 {
10176 wctype_t wt = cset->char_classes[i];
10177 if (__iswctype (wc, wt))
10178 {
10179 match_len = char_len;
10180 goto check_node_accept_bytes_match;
10181 }
10182 }
10183
10184 # ifdef _LIBC
10185 nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
10186 if (nrules != 0)
10187 {
10188 unsigned int in_collseq = 0;
10189 const int32_t *table, *indirect;
10190 const unsigned char *weights, *extra;
10191 const char *collseqwc;
10192 int32_t idx;
10193
10194 # include <locale/weight.h>
10195
10196
10197 if (cset->ncoll_syms)
10198 extra = (const unsigned char *)
10199 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
10200 for (i = 0; i < cset->ncoll_syms; ++i)
10201 {
10202 const unsigned char *coll_sym = extra + cset->coll_syms[i];
10203
10204
10205 if (*coll_sym != elem_len)
10206 continue;
10207
10208 for (j = 0; j < *coll_sym; j++)
10209 if (pin[j] != coll_sym[1 + j])
10210 break;
10211 if (j == *coll_sym)
10212 {
10213
10214 match_len = j;
10215 goto check_node_accept_bytes_match;
10216 }
10217 }
10218
10219 if (cset->nranges)
10220 {
10221 if (elem_len <= char_len)
10222 {
10223 collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC);
10224 in_collseq = __collseq_table_lookup (collseqwc, wc);
10225 }
10226 else
10227 in_collseq = find_collation_sequence_value (pin, elem_len);
10228 }
10229
10230 for (i = 0; i < cset->nranges; ++i)
10231 if (cset->range_starts[i] <= in_collseq
10232 && in_collseq <= cset->range_ends[i])
10233 {
10234 match_len = elem_len;
10235 goto check_node_accept_bytes_match;
10236 }
10237
10238
10239 if (cset->nequiv_classes)
10240 {
10241 const unsigned char *cp = pin;
10242 table = (const int32_t *)
10243 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
10244 weights = (const unsigned char *)
10245 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
10246 extra = (const unsigned char *)
10247 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
10248 indirect = (const int32_t *)
10249 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
10250 idx = findidx (&cp);
10251 if (idx > 0)
10252 for (i = 0; i < cset->nequiv_classes; ++i)
10253 {
10254 int32_t equiv_class_idx = cset->equiv_classes[i];
10255 size_t weight_len = weights[idx];
10256 if (weight_len == weights[equiv_class_idx])
10257 {
10258 int cnt = 0;
10259 while (cnt <= weight_len
10260 && (weights[equiv_class_idx + 1 + cnt]
10261 == weights[idx + 1 + cnt]))
10262 ++cnt;
10263 if (cnt > weight_len)
10264 {
10265 match_len = elem_len;
10266 goto check_node_accept_bytes_match;
10267 }
10268 }
10269 }
10270 }
10271 }
10272 else
10273 # endif
10274 {
10275
10276 #if __GNUC__ >= 2
10277 wchar_t cmp_buf[] = {L'\0', L'\0', wc, L'\0', L'\0', L'\0'};
10278 #else
10279 wchar_t cmp_buf[] = {L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'};
10280 cmp_buf[2] = wc;
10281 #endif
10282 for (i = 0; i < cset->nranges; ++i)
10283 {
10284 cmp_buf[0] = cset->range_starts[i];
10285 cmp_buf[4] = cset->range_ends[i];
10286 if (wcscoll (cmp_buf, cmp_buf + 2) <= 0
10287 && wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0)
10288 {
10289 match_len = char_len;
10290 goto check_node_accept_bytes_match;
10291 }
10292 }
10293 }
10294 check_node_accept_bytes_match:
10295 if (!cset->non_match)
10296 return match_len;
10297 else
10298 {
10299 if (match_len > 0)
10300 return 0;
10301 else
10302 return (elem_len > char_len) ? elem_len : char_len;
10303 }
10304 }
10305 return 0;
10306 }
10307
10308 # ifdef _LIBC
10309 static unsigned int
10310 internal_function
10311 find_collation_sequence_value (const unsigned char *mbs, size_t mbs_len)
10312 {
10313 uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
10314 if (nrules == 0)
10315 {
10316 if (mbs_len == 1)
10317 {
10318
10319 const unsigned char *collseq = (const unsigned char *)
10320 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB);
10321 return collseq[mbs[0]];
10322 }
10323 return UINT_MAX;
10324 }
10325 else
10326 {
10327 int32_t idx;
10328 const unsigned char *extra = (const unsigned char *)
10329 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
10330 int32_t extrasize = (const unsigned char *)
10331 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB + 1) - extra;
10332
10333 for (idx = 0; idx < extrasize;)
10334 {
10335 int mbs_cnt, found = 0;
10336 int32_t elem_mbs_len;
10337
10338 idx = idx + extra[idx] + 1;
10339 elem_mbs_len = extra[idx++];
10340 if (mbs_len == elem_mbs_len)
10341 {
10342 for (mbs_cnt = 0; mbs_cnt < elem_mbs_len; ++mbs_cnt)
10343 if (extra[idx + mbs_cnt] != mbs[mbs_cnt])
10344 break;
10345 if (mbs_cnt == elem_mbs_len)
10346
10347 found = 1;
10348 }
10349
10350 idx += elem_mbs_len;
10351
10352 idx = (idx + 3) & ~3;
10353
10354 idx += sizeof (uint32_t);
10355
10356 idx = idx + sizeof (uint32_t) * (extra[idx] + 1);
10357
10358 if (found)
10359 return *(uint32_t *) (extra + idx);
10360
10361 idx += sizeof (uint32_t);
10362 }
10363 return UINT_MAX;
10364 }
10365 }
10366 # endif
10367 #endif
10368
10369
10370
10371
10372 static int
10373 internal_function
10374 check_node_accept (const re_match_context_t *mctx, const re_token_t *node,
10375 int idx)
10376 {
10377 unsigned char ch;
10378 ch = re_string_byte_at (&mctx->input, idx);
10379 switch (node->type)
10380 {
10381 case CHARACTER:
10382 if (node->opr.c != ch)
10383 return 0;
10384 break;
10385
10386 case SIMPLE_BRACKET:
10387 if (!bitset_contain (node->opr.sbcset, ch))
10388 return 0;
10389 break;
10390
10391 #ifdef RE_ENABLE_I18N
10392 case OP_UTF8_PERIOD:
10393 if (ch >= 0x80)
10394 return 0;
10395
10396 #endif
10397 case OP_PERIOD:
10398 if ((ch == '\n' && !(mctx->dfa->syntax & RE_DOT_NEWLINE))
10399 || (ch == '\0' && (mctx->dfa->syntax & RE_DOT_NOT_NULL)))
10400 return 0;
10401 break;
10402
10403 default:
10404 return 0;
10405 }
10406
10407 if (node->constraint)
10408 {
10409
10410
10411 unsigned int context = re_string_context_at (&mctx->input, idx,
10412 mctx->eflags);
10413 if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
10414 return 0;
10415 }
10416
10417 return 1;
10418 }
10419
10420
10421
10422 static reg_errcode_t
10423 internal_function
10424 extend_buffers (re_match_context_t *mctx)
10425 {
10426 reg_errcode_t ret;
10427 re_string_t *pstr = &mctx->input;
10428
10429
10430 ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2);
10431 if (BE (ret != REG_NOERROR, 0))
10432 return ret;
10433
10434 if (mctx->state_log != NULL)
10435 {
10436
10437
10438
10439
10440 re_dfastate_t **new_array = re_realloc (mctx->state_log, re_dfastate_t *,
10441 pstr->bufs_len + 1);
10442 if (BE (new_array == NULL, 0))
10443 return REG_ESPACE;
10444 mctx->state_log = new_array;
10445 }
10446
10447
10448 if (pstr->icase)
10449 {
10450 #ifdef RE_ENABLE_I18N
10451 if (pstr->mb_cur_max > 1)
10452 {
10453 ret = build_wcs_upper_buffer (pstr);
10454 if (BE (ret != REG_NOERROR, 0))
10455 return ret;
10456 }
10457 else
10458 #endif
10459 build_upper_buffer (pstr);
10460 }
10461 else
10462 {
10463 #ifdef RE_ENABLE_I18N
10464 if (pstr->mb_cur_max > 1)
10465 build_wcs_buffer (pstr);
10466 else
10467 #endif
10468 {
10469 if (pstr->trans != NULL)
10470 re_string_translate_buffer (pstr);
10471 }
10472 }
10473 return REG_NOERROR;
10474 }
10475
10476
10477
10478
10479
10480
10481 static reg_errcode_t
10482 internal_function
10483 match_ctx_init (re_match_context_t *mctx, int eflags, int n)
10484 {
10485 mctx->eflags = eflags;
10486 mctx->match_last = -1;
10487 if (n > 0)
10488 {
10489 mctx->bkref_ents = re_malloc (struct re_backref_cache_entry, n);
10490 mctx->sub_tops = re_malloc (re_sub_match_top_t *, n);
10491 if (BE (mctx->bkref_ents == NULL || mctx->sub_tops == NULL, 0))
10492 return REG_ESPACE;
10493 }
10494
10495
10496
10497
10498
10499 mctx->abkref_ents = n;
10500 mctx->max_mb_elem_len = 1;
10501 mctx->asub_tops = n;
10502 return REG_NOERROR;
10503 }
10504
10505
10506
10507
10508
10509 static void
10510 internal_function
10511 match_ctx_clean (re_match_context_t *mctx)
10512 {
10513 int st_idx;
10514 for (st_idx = 0; st_idx < mctx->nsub_tops; ++st_idx)
10515 {
10516 int sl_idx;
10517 re_sub_match_top_t *top = mctx->sub_tops[st_idx];
10518 for (sl_idx = 0; sl_idx < top->nlasts; ++sl_idx)
10519 {
10520 re_sub_match_last_t *last = top->lasts[sl_idx];
10521 re_free (last->path.array);
10522 re_free (last);
10523 }
10524 re_free (top->lasts);
10525 if (top->path)
10526 {
10527 re_free (top->path->array);
10528 re_free (top->path);
10529 }
10530 free (top);
10531 }
10532
10533 mctx->nsub_tops = 0;
10534 mctx->nbkref_ents = 0;
10535 }
10536
10537
10538
10539 static void
10540 internal_function
10541 match_ctx_free (re_match_context_t *mctx)
10542 {
10543
10544 match_ctx_clean (mctx);
10545 re_free (mctx->sub_tops);
10546 re_free (mctx->bkref_ents);
10547 }
10548
10549
10550
10551
10552
10553
10554 static reg_errcode_t
10555 internal_function
10556 match_ctx_add_entry (re_match_context_t *mctx, int node, int str_idx, int from,
10557 int to)
10558 {
10559 if (mctx->nbkref_ents >= mctx->abkref_ents)
10560 {
10561 struct re_backref_cache_entry* new_entry;
10562 new_entry = re_realloc (mctx->bkref_ents, struct re_backref_cache_entry,
10563 mctx->abkref_ents * 2);
10564 if (BE (new_entry == NULL, 0))
10565 {
10566 re_free (mctx->bkref_ents);
10567 return REG_ESPACE;
10568 }
10569 mctx->bkref_ents = new_entry;
10570 memset (mctx->bkref_ents + mctx->nbkref_ents, '\0',
10571 sizeof (struct re_backref_cache_entry) * mctx->abkref_ents);
10572 mctx->abkref_ents *= 2;
10573 }
10574 if (mctx->nbkref_ents > 0
10575 && mctx->bkref_ents[mctx->nbkref_ents - 1].str_idx == str_idx)
10576 mctx->bkref_ents[mctx->nbkref_ents - 1].more = 1;
10577
10578 mctx->bkref_ents[mctx->nbkref_ents].node = node;
10579 mctx->bkref_ents[mctx->nbkref_ents].str_idx = str_idx;
10580 mctx->bkref_ents[mctx->nbkref_ents].subexp_from = from;
10581 mctx->bkref_ents[mctx->nbkref_ents].subexp_to = to;
10582
10583
10584
10585
10586
10587
10588
10589
10590
10591 mctx->bkref_ents[mctx->nbkref_ents].eps_reachable_subexps_map
10592 = (from == to ? ~0 : 0);
10593
10594 mctx->bkref_ents[mctx->nbkref_ents++].more = 0;
10595 if (mctx->max_mb_elem_len < to - from)
10596 mctx->max_mb_elem_len = to - from;
10597 return REG_NOERROR;
10598 }
10599
10600
10601
10602
10603 static int
10604 internal_function
10605 search_cur_bkref_entry (const re_match_context_t *mctx, int str_idx)
10606 {
10607 int left, right, mid, last;
10608 last = right = mctx->nbkref_ents;
10609 for (left = 0; left < right;)
10610 {
10611 mid = (left + right) / 2;
10612 if (mctx->bkref_ents[mid].str_idx < str_idx)
10613 left = mid + 1;
10614 else
10615 right = mid;
10616 }
10617 if (left < last && mctx->bkref_ents[left].str_idx == str_idx)
10618 return left;
10619 else
10620 return -1;
10621 }
10622
10623
10624
10625
10626 static reg_errcode_t
10627 internal_function
10628 match_ctx_add_subtop (re_match_context_t *mctx, int node, int str_idx)
10629 {
10630 #ifdef DEBUG
10631 assert (mctx->sub_tops != NULL);
10632 assert (mctx->asub_tops > 0);
10633 #endif
10634 if (BE (mctx->nsub_tops == mctx->asub_tops, 0))
10635 {
10636 int new_asub_tops = mctx->asub_tops * 2;
10637 re_sub_match_top_t **new_array = re_realloc (mctx->sub_tops,
10638 re_sub_match_top_t *,
10639 new_asub_tops);
10640 if (BE (new_array == NULL, 0))
10641 return REG_ESPACE;
10642 mctx->sub_tops = new_array;
10643 mctx->asub_tops = new_asub_tops;
10644 }
10645 mctx->sub_tops[mctx->nsub_tops] = calloc (1, sizeof (re_sub_match_top_t));
10646 if (BE (mctx->sub_tops[mctx->nsub_tops] == NULL, 0))
10647 return REG_ESPACE;
10648 mctx->sub_tops[mctx->nsub_tops]->node = node;
10649 mctx->sub_tops[mctx->nsub_tops++]->str_idx = str_idx;
10650 return REG_NOERROR;
10651 }
10652
10653
10654
10655
10656 static re_sub_match_last_t *
10657 internal_function
10658 match_ctx_add_sublast (re_sub_match_top_t *subtop, int node, int str_idx)
10659 {
10660 re_sub_match_last_t *new_entry;
10661 if (BE (subtop->nlasts == subtop->alasts, 0))
10662 {
10663 int new_alasts = 2 * subtop->alasts + 1;
10664 re_sub_match_last_t **new_array = re_realloc (subtop->lasts,
10665 re_sub_match_last_t *,
10666 new_alasts);
10667 if (BE (new_array == NULL, 0))
10668 return NULL;
10669 subtop->lasts = new_array;
10670 subtop->alasts = new_alasts;
10671 }
10672 new_entry = calloc (1, sizeof (re_sub_match_last_t));
10673 if (BE (new_entry != NULL, 1))
10674 {
10675 subtop->lasts[subtop->nlasts] = new_entry;
10676 new_entry->node = node;
10677 new_entry->str_idx = str_idx;
10678 ++subtop->nlasts;
10679 }
10680 return new_entry;
10681 }
10682
10683 static void
10684 internal_function
10685 sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts,
10686 re_dfastate_t **limited_sts, int last_node, int last_str_idx)
10687 {
10688 sctx->sifted_states = sifted_sts;
10689 sctx->limited_states = limited_sts;
10690 sctx->last_node = last_node;
10691 sctx->last_str_idx = last_str_idx;
10692 re_node_set_init_empty (&sctx->limits);
10693 }
10694
10695
10696
10697 #if _LIBC
10698 # include <shlib-compat.h>
10699 # if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3)
10700 link_warning (re_max_failures, "the 're_max_failures' variable is obsolete and will go away.")
10701 int re_max_failures = 2000;
10702 # endif
10703 #endif
10704 #endif