00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030 #include "regparse.h"
00031
00032 OnigCaseFoldType OnigDefaultCaseFoldFlag = ONIGENC_CASE_FOLD_MIN;
00033
00034 extern OnigCaseFoldType
00035 onig_get_default_case_fold_flag(void)
00036 {
00037 return OnigDefaultCaseFoldFlag;
00038 }
00039
00040 extern int
00041 onig_set_default_case_fold_flag(OnigCaseFoldType case_fold_flag)
00042 {
00043 OnigDefaultCaseFoldFlag = case_fold_flag;
00044 return 0;
00045 }
00046
00047 #ifndef PLATFORM_UNALIGNED_WORD_ACCESS
00048 static unsigned char PadBuf[WORD_ALIGNMENT_SIZE];
00049 #endif
00050
00051 static UChar*
00052 str_dup(UChar* s, UChar* end)
00053 {
00054 ptrdiff_t len = end - s;
00055
00056 if (len > 0) {
00057 UChar* r = (UChar* )xmalloc(len + 1);
00058 CHECK_NULL_RETURN(r);
00059 xmemcpy(r, s, len);
00060 r[len] = (UChar )0;
00061 return r;
00062 }
00063 else return NULL;
00064 }
00065
00066 static void
00067 swap_node(Node* a, Node* b)
00068 {
00069 Node c;
00070 c = *a; *a = *b; *b = c;
00071
00072 if (NTYPE(a) == NT_STR) {
00073 StrNode* sn = NSTR(a);
00074 if (sn->capa == 0) {
00075 size_t len = sn->end - sn->s;
00076 sn->s = sn->buf;
00077 sn->end = sn->s + len;
00078 }
00079 }
00080
00081 if (NTYPE(b) == NT_STR) {
00082 StrNode* sn = NSTR(b);
00083 if (sn->capa == 0) {
00084 size_t len = sn->end - sn->s;
00085 sn->s = sn->buf;
00086 sn->end = sn->s + len;
00087 }
00088 }
00089 }
00090
00091 static OnigDistance
00092 distance_add(OnigDistance d1, OnigDistance d2)
00093 {
00094 if (d1 == ONIG_INFINITE_DISTANCE || d2 == ONIG_INFINITE_DISTANCE)
00095 return ONIG_INFINITE_DISTANCE;
00096 else {
00097 if (d1 <= ONIG_INFINITE_DISTANCE - d2) return d1 + d2;
00098 else return ONIG_INFINITE_DISTANCE;
00099 }
00100 }
00101
00102 static OnigDistance
00103 distance_multiply(OnigDistance d, int m)
00104 {
00105 if (m == 0) return 0;
00106
00107 if (d < ONIG_INFINITE_DISTANCE / m)
00108 return d * m;
00109 else
00110 return ONIG_INFINITE_DISTANCE;
00111 }
00112
00113 static int
00114 bitset_is_empty(BitSetRef bs)
00115 {
00116 int i;
00117 for (i = 0; i < (int )BITSET_SIZE; i++) {
00118 if (bs[i] != 0) return 0;
00119 }
00120 return 1;
00121 }
00122
00123 #ifdef ONIG_DEBUG
00124 static int
00125 onig_is_prelude(void)
00126 {
00127 return !rb_const_defined(rb_cThread, rb_intern_const("MUTEX_FOR_THREAD_EXCLUSIVE"));
00128 }
00129
00130 static int
00131 bitset_on_num(BitSetRef bs)
00132 {
00133 int i, n;
00134
00135 n = 0;
00136 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
00137 if (BITSET_AT(bs, i)) n++;
00138 }
00139 return n;
00140 }
00141 #endif
00142
00143 extern int
00144 onig_bbuf_init(BBuf* buf, OnigDistance size)
00145 {
00146 if (size <= 0) {
00147 size = 0;
00148 buf->p = NULL;
00149 }
00150 else {
00151 buf->p = (UChar* )xmalloc(size);
00152 if (IS_NULL(buf->p)) return(ONIGERR_MEMORY);
00153 }
00154
00155 buf->alloc = (unsigned int)size;
00156 buf->used = 0;
00157 return 0;
00158 }
00159
00160
00161 #ifdef USE_SUBEXP_CALL
00162
00163 static int
00164 unset_addr_list_init(UnsetAddrList* uslist, int size)
00165 {
00166 UnsetAddr* p;
00167
00168 p = (UnsetAddr* )xmalloc(sizeof(UnsetAddr)* size);
00169 CHECK_NULL_RETURN_MEMERR(p);
00170 uslist->num = 0;
00171 uslist->alloc = size;
00172 uslist->us = p;
00173 return 0;
00174 }
00175
00176 static void
00177 unset_addr_list_end(UnsetAddrList* uslist)
00178 {
00179 if (IS_NOT_NULL(uslist->us))
00180 xfree(uslist->us);
00181 }
00182
00183 static int
00184 unset_addr_list_add(UnsetAddrList* uslist, int offset, struct _Node* node)
00185 {
00186 UnsetAddr* p;
00187 int size;
00188
00189 if (uslist->num >= uslist->alloc) {
00190 size = uslist->alloc * 2;
00191 p = (UnsetAddr* )xrealloc(uslist->us, sizeof(UnsetAddr) * size);
00192 CHECK_NULL_RETURN_MEMERR(p);
00193 uslist->alloc = size;
00194 uslist->us = p;
00195 }
00196
00197 uslist->us[uslist->num].offset = offset;
00198 uslist->us[uslist->num].target = node;
00199 uslist->num++;
00200 return 0;
00201 }
00202 #endif
00203
00204
00205 static int
00206 add_opcode(regex_t* reg, int opcode)
00207 {
00208 BBUF_ADD1(reg, opcode);
00209 return 0;
00210 }
00211
00212 #ifdef USE_COMBINATION_EXPLOSION_CHECK
00213 static int
00214 add_state_check_num(regex_t* reg, int num)
00215 {
00216 StateCheckNumType n = (StateCheckNumType )num;
00217
00218 BBUF_ADD(reg, &n, SIZE_STATE_CHECK_NUM);
00219 return 0;
00220 }
00221 #endif
00222
00223 static int
00224 add_rel_addr(regex_t* reg, int addr)
00225 {
00226 RelAddrType ra = (RelAddrType )addr;
00227
00228 BBUF_ADD(reg, &ra, SIZE_RELADDR);
00229 return 0;
00230 }
00231
00232 static int
00233 add_abs_addr(regex_t* reg, int addr)
00234 {
00235 AbsAddrType ra = (AbsAddrType )addr;
00236
00237 BBUF_ADD(reg, &ra, SIZE_ABSADDR);
00238 return 0;
00239 }
00240
00241 static int
00242 add_length(regex_t* reg, OnigDistance len)
00243 {
00244 LengthType l = (LengthType )len;
00245
00246 BBUF_ADD(reg, &l, SIZE_LENGTH);
00247 return 0;
00248 }
00249
00250 static int
00251 add_mem_num(regex_t* reg, int num)
00252 {
00253 MemNumType n = (MemNumType )num;
00254
00255 BBUF_ADD(reg, &n, SIZE_MEMNUM);
00256 return 0;
00257 }
00258
00259 static int
00260 add_pointer(regex_t* reg, void* addr)
00261 {
00262 PointerType ptr = (PointerType )addr;
00263
00264 BBUF_ADD(reg, &ptr, SIZE_POINTER);
00265 return 0;
00266 }
00267
00268 static int
00269 add_option(regex_t* reg, OnigOptionType option)
00270 {
00271 BBUF_ADD(reg, &option, SIZE_OPTION);
00272 return 0;
00273 }
00274
00275 static int
00276 add_opcode_rel_addr(regex_t* reg, int opcode, int addr)
00277 {
00278 int r;
00279
00280 r = add_opcode(reg, opcode);
00281 if (r) return r;
00282 r = add_rel_addr(reg, addr);
00283 return r;
00284 }
00285
00286 static int
00287 add_bytes(regex_t* reg, UChar* bytes, OnigDistance len)
00288 {
00289 BBUF_ADD(reg, bytes, len);
00290 return 0;
00291 }
00292
00293 static int
00294 add_bitset(regex_t* reg, BitSetRef bs)
00295 {
00296 BBUF_ADD(reg, bs, SIZE_BITSET);
00297 return 0;
00298 }
00299
00300 static int
00301 add_opcode_option(regex_t* reg, int opcode, OnigOptionType option)
00302 {
00303 int r;
00304
00305 r = add_opcode(reg, opcode);
00306 if (r) return r;
00307 r = add_option(reg, option);
00308 return r;
00309 }
00310
00311 static int compile_length_tree(Node* node, regex_t* reg);
00312 static int compile_tree(Node* node, regex_t* reg);
00313
00314
00315 #define IS_NEED_STR_LEN_OP_EXACT(op) \
00316 ((op) == OP_EXACTN || (op) == OP_EXACTMB2N ||\
00317 (op) == OP_EXACTMB3N || (op) == OP_EXACTMBN || (op) == OP_EXACTN_IC)
00318
00319 static int
00320 select_str_opcode(int mb_len, OnigDistance str_len, int ignore_case)
00321 {
00322 int op;
00323
00324 if (ignore_case) {
00325 switch (str_len) {
00326 case 1: op = OP_EXACT1_IC; break;
00327 default: op = OP_EXACTN_IC; break;
00328 }
00329 }
00330 else {
00331 switch (mb_len) {
00332 case 1:
00333 switch (str_len) {
00334 case 1: op = OP_EXACT1; break;
00335 case 2: op = OP_EXACT2; break;
00336 case 3: op = OP_EXACT3; break;
00337 case 4: op = OP_EXACT4; break;
00338 case 5: op = OP_EXACT5; break;
00339 default: op = OP_EXACTN; break;
00340 }
00341 break;
00342
00343 case 2:
00344 switch (str_len) {
00345 case 1: op = OP_EXACTMB2N1; break;
00346 case 2: op = OP_EXACTMB2N2; break;
00347 case 3: op = OP_EXACTMB2N3; break;
00348 default: op = OP_EXACTMB2N; break;
00349 }
00350 break;
00351
00352 case 3:
00353 op = OP_EXACTMB3N;
00354 break;
00355
00356 default:
00357 op = OP_EXACTMBN;
00358 break;
00359 }
00360 }
00361 return op;
00362 }
00363
00364 static int
00365 compile_tree_empty_check(Node* node, regex_t* reg, int empty_info)
00366 {
00367 int r;
00368 int saved_num_null_check = reg->num_null_check;
00369
00370 if (empty_info != 0) {
00371 r = add_opcode(reg, OP_NULL_CHECK_START);
00372 if (r) return r;
00373 r = add_mem_num(reg, reg->num_null_check);
00374 if (r) return r;
00375 reg->num_null_check++;
00376 }
00377
00378 r = compile_tree(node, reg);
00379 if (r) return r;
00380
00381 if (empty_info != 0) {
00382 if (empty_info == NQ_TARGET_IS_EMPTY)
00383 r = add_opcode(reg, OP_NULL_CHECK_END);
00384 else if (empty_info == NQ_TARGET_IS_EMPTY_MEM)
00385 r = add_opcode(reg, OP_NULL_CHECK_END_MEMST);
00386 else if (empty_info == NQ_TARGET_IS_EMPTY_REC)
00387 r = add_opcode(reg, OP_NULL_CHECK_END_MEMST_PUSH);
00388
00389 if (r) return r;
00390 r = add_mem_num(reg, saved_num_null_check);
00391 }
00392 return r;
00393 }
00394
00395 #ifdef USE_SUBEXP_CALL
00396 static int
00397 compile_call(CallNode* node, regex_t* reg)
00398 {
00399 int r;
00400
00401 r = add_opcode(reg, OP_CALL);
00402 if (r) return r;
00403 r = unset_addr_list_add(node->unset_addr_list, BBUF_GET_OFFSET_POS(reg),
00404 node->target);
00405 if (r) return r;
00406 r = add_abs_addr(reg, 0 );
00407 return r;
00408 }
00409 #endif
00410
00411 static int
00412 compile_tree_n_times(Node* node, int n, regex_t* reg)
00413 {
00414 int i, r;
00415
00416 for (i = 0; i < n; i++) {
00417 r = compile_tree(node, reg);
00418 if (r) return r;
00419 }
00420 return 0;
00421 }
00422
00423 static int
00424 add_compile_string_length(UChar* s ARG_UNUSED, int mb_len, OnigDistance str_len,
00425 regex_t* reg ARG_UNUSED, int ignore_case)
00426 {
00427 int len;
00428 int op = select_str_opcode(mb_len, str_len, ignore_case);
00429
00430 len = SIZE_OPCODE;
00431
00432 if (op == OP_EXACTMBN) len += SIZE_LENGTH;
00433 if (IS_NEED_STR_LEN_OP_EXACT(op))
00434 len += SIZE_LENGTH;
00435
00436 len += mb_len * str_len;
00437 return len;
00438 }
00439
00440 static int
00441 add_compile_string(UChar* s, int mb_len, OnigDistance str_len,
00442 regex_t* reg, int ignore_case)
00443 {
00444 int op = select_str_opcode(mb_len, str_len, ignore_case);
00445 add_opcode(reg, op);
00446
00447 if (op == OP_EXACTMBN)
00448 add_length(reg, mb_len);
00449
00450 if (IS_NEED_STR_LEN_OP_EXACT(op)) {
00451 if (op == OP_EXACTN_IC)
00452 add_length(reg, mb_len * str_len);
00453 else
00454 add_length(reg, str_len);
00455 }
00456
00457 add_bytes(reg, s, mb_len * str_len);
00458 return 0;
00459 }
00460
00461
00462 static int
00463 compile_length_string_node(Node* node, regex_t* reg)
00464 {
00465 int rlen, r, len, prev_len, slen, ambig;
00466 OnigEncoding enc = reg->enc;
00467 UChar *p, *prev;
00468 StrNode* sn;
00469
00470 sn = NSTR(node);
00471 if (sn->end <= sn->s)
00472 return 0;
00473
00474 ambig = NSTRING_IS_AMBIG(node);
00475
00476 p = prev = sn->s;
00477 prev_len = enclen(enc, p, sn->end);
00478 p += prev_len;
00479 slen = 1;
00480 rlen = 0;
00481
00482 for (; p < sn->end; ) {
00483 len = enclen(enc, p, sn->end);
00484 if (len == prev_len) {
00485 slen++;
00486 }
00487 else {
00488 r = add_compile_string_length(prev, prev_len, slen, reg, ambig);
00489 rlen += r;
00490 prev = p;
00491 slen = 1;
00492 prev_len = len;
00493 }
00494 p += len;
00495 }
00496 r = add_compile_string_length(prev, prev_len, slen, reg, ambig);
00497 rlen += r;
00498 return rlen;
00499 }
00500
00501 static int
00502 compile_length_string_raw_node(StrNode* sn, regex_t* reg)
00503 {
00504 if (sn->end <= sn->s)
00505 return 0;
00506
00507 return add_compile_string_length(sn->s, 1 , sn->end - sn->s, reg, 0);
00508 }
00509
00510 static int
00511 compile_string_node(Node* node, regex_t* reg)
00512 {
00513 int r, len, prev_len, slen, ambig;
00514 OnigEncoding enc = reg->enc;
00515 UChar *p, *prev, *end;
00516 StrNode* sn;
00517
00518 sn = NSTR(node);
00519 if (sn->end <= sn->s)
00520 return 0;
00521
00522 end = sn->end;
00523 ambig = NSTRING_IS_AMBIG(node);
00524
00525 p = prev = sn->s;
00526 prev_len = enclen(enc, p, end);
00527 p += prev_len;
00528 slen = 1;
00529
00530 for (; p < end; ) {
00531 len = enclen(enc, p, end);
00532 if (len == prev_len) {
00533 slen++;
00534 }
00535 else {
00536 r = add_compile_string(prev, prev_len, slen, reg, ambig);
00537 if (r) return r;
00538
00539 prev = p;
00540 slen = 1;
00541 prev_len = len;
00542 }
00543
00544 p += len;
00545 }
00546 return add_compile_string(prev, prev_len, slen, reg, ambig);
00547 }
00548
00549 static int
00550 compile_string_raw_node(StrNode* sn, regex_t* reg)
00551 {
00552 if (sn->end <= sn->s)
00553 return 0;
00554
00555 return add_compile_string(sn->s, 1 , sn->end - sn->s, reg, 0);
00556 }
00557
00558 static int
00559 add_multi_byte_cclass(BBuf* mbuf, regex_t* reg)
00560 {
00561 #ifdef PLATFORM_UNALIGNED_WORD_ACCESS
00562 add_length(reg, mbuf->used);
00563 return add_bytes(reg, mbuf->p, mbuf->used);
00564 #else
00565 int r, pad_size;
00566 UChar* p = BBUF_GET_ADD_ADDRESS(reg) + SIZE_LENGTH;
00567
00568 GET_ALIGNMENT_PAD_SIZE(p, pad_size);
00569 add_length(reg, mbuf->used + (WORD_ALIGNMENT_SIZE - 1));
00570 if (pad_size != 0) add_bytes(reg, PadBuf, pad_size);
00571
00572 r = add_bytes(reg, mbuf->p, mbuf->used);
00573
00574
00575 pad_size = (WORD_ALIGNMENT_SIZE - 1) - pad_size;
00576 if (pad_size != 0) add_bytes(reg, PadBuf, pad_size);
00577 return r;
00578 #endif
00579 }
00580
00581 static int
00582 compile_length_cclass_node(CClassNode* cc, regex_t* reg)
00583 {
00584 int len;
00585
00586 if (IS_NCCLASS_SHARE(cc)) {
00587 len = SIZE_OPCODE + SIZE_POINTER;
00588 return len;
00589 }
00590
00591 if (IS_NULL(cc->mbuf)) {
00592 len = SIZE_OPCODE + SIZE_BITSET;
00593 }
00594 else {
00595 if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {
00596 len = SIZE_OPCODE;
00597 }
00598 else {
00599 len = SIZE_OPCODE + SIZE_BITSET;
00600 }
00601 #ifdef PLATFORM_UNALIGNED_WORD_ACCESS
00602 len += SIZE_LENGTH + cc->mbuf->used;
00603 #else
00604 len += SIZE_LENGTH + cc->mbuf->used + (WORD_ALIGNMENT_SIZE - 1);
00605 #endif
00606 }
00607
00608 return len;
00609 }
00610
00611 static int
00612 compile_cclass_node(CClassNode* cc, regex_t* reg)
00613 {
00614 int r;
00615
00616 if (IS_NCCLASS_SHARE(cc)) {
00617 add_opcode(reg, OP_CCLASS_NODE);
00618 r = add_pointer(reg, cc);
00619 return r;
00620 }
00621
00622 if (IS_NULL(cc->mbuf)) {
00623 if (IS_NCCLASS_NOT(cc))
00624 add_opcode(reg, OP_CCLASS_NOT);
00625 else
00626 add_opcode(reg, OP_CCLASS);
00627
00628 r = add_bitset(reg, cc->bs);
00629 }
00630 else {
00631 if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {
00632 if (IS_NCCLASS_NOT(cc))
00633 add_opcode(reg, OP_CCLASS_MB_NOT);
00634 else
00635 add_opcode(reg, OP_CCLASS_MB);
00636
00637 r = add_multi_byte_cclass(cc->mbuf, reg);
00638 }
00639 else {
00640 if (IS_NCCLASS_NOT(cc))
00641 add_opcode(reg, OP_CCLASS_MIX_NOT);
00642 else
00643 add_opcode(reg, OP_CCLASS_MIX);
00644
00645 r = add_bitset(reg, cc->bs);
00646 if (r) return r;
00647 r = add_multi_byte_cclass(cc->mbuf, reg);
00648 }
00649 }
00650
00651 return r;
00652 }
00653
00654 static int
00655 entry_repeat_range(regex_t* reg, int id, int lower, int upper)
00656 {
00657 #define REPEAT_RANGE_ALLOC 4
00658
00659 OnigRepeatRange* p;
00660
00661 if (reg->repeat_range_alloc == 0) {
00662 p = (OnigRepeatRange* )xmalloc(sizeof(OnigRepeatRange) * REPEAT_RANGE_ALLOC);
00663 CHECK_NULL_RETURN_MEMERR(p);
00664 reg->repeat_range = p;
00665 reg->repeat_range_alloc = REPEAT_RANGE_ALLOC;
00666 }
00667 else if (reg->repeat_range_alloc <= id) {
00668 int n;
00669 n = reg->repeat_range_alloc + REPEAT_RANGE_ALLOC;
00670 p = (OnigRepeatRange* )xrealloc(reg->repeat_range,
00671 sizeof(OnigRepeatRange) * n);
00672 CHECK_NULL_RETURN_MEMERR(p);
00673 reg->repeat_range = p;
00674 reg->repeat_range_alloc = n;
00675 }
00676 else {
00677 p = reg->repeat_range;
00678 }
00679
00680 p[id].lower = lower;
00681 p[id].upper = (IS_REPEAT_INFINITE(upper) ? 0x7fffffff : upper);
00682 return 0;
00683 }
00684
00685 static int
00686 compile_range_repeat_node(QtfrNode* qn, int target_len, int empty_info,
00687 regex_t* reg)
00688 {
00689 int r;
00690 int num_repeat = reg->num_repeat;
00691
00692 r = add_opcode(reg, qn->greedy ? OP_REPEAT : OP_REPEAT_NG);
00693 if (r) return r;
00694 r = add_mem_num(reg, num_repeat);
00695 reg->num_repeat++;
00696 if (r) return r;
00697 r = add_rel_addr(reg, target_len + SIZE_OP_REPEAT_INC);
00698 if (r) return r;
00699
00700 r = entry_repeat_range(reg, num_repeat, qn->lower, qn->upper);
00701 if (r) return r;
00702
00703 r = compile_tree_empty_check(qn->target, reg, empty_info);
00704 if (r) return r;
00705
00706 if (
00707 #ifdef USE_SUBEXP_CALL
00708 reg->num_call > 0 ||
00709 #endif
00710 IS_QUANTIFIER_IN_REPEAT(qn)) {
00711 r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC_SG : OP_REPEAT_INC_NG_SG);
00712 }
00713 else {
00714 r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC : OP_REPEAT_INC_NG);
00715 }
00716 if (r) return r;
00717 r = add_mem_num(reg, num_repeat);
00718 return r;
00719 }
00720
00721 static int
00722 is_anychar_star_quantifier(QtfrNode* qn)
00723 {
00724 if (qn->greedy && IS_REPEAT_INFINITE(qn->upper) &&
00725 NTYPE(qn->target) == NT_CANY)
00726 return 1;
00727 else
00728 return 0;
00729 }
00730
00731 #define QUANTIFIER_EXPAND_LIMIT_SIZE 50
00732 #define CKN_ON (ckn > 0)
00733
00734 #ifdef USE_COMBINATION_EXPLOSION_CHECK
00735
00736 static int
00737 compile_length_quantifier_node(QtfrNode* qn, regex_t* reg)
00738 {
00739 int len, mod_tlen, cklen;
00740 int ckn;
00741 int infinite = IS_REPEAT_INFINITE(qn->upper);
00742 int empty_info = qn->target_empty_info;
00743 int tlen = compile_length_tree(qn->target, reg);
00744
00745 if (tlen < 0) return tlen;
00746
00747 ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0);
00748
00749 cklen = (CKN_ON ? SIZE_STATE_CHECK_NUM: 0);
00750
00751
00752 if (NTYPE(qn->target) == NT_CANY) {
00753 if (qn->greedy && infinite) {
00754 if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON)
00755 return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower + cklen;
00756 else
00757 return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower + cklen;
00758 }
00759 }
00760
00761 if (empty_info != 0)
00762 mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
00763 else
00764 mod_tlen = tlen;
00765
00766 if (infinite && qn->lower <= 1) {
00767 if (qn->greedy) {
00768 if (qn->lower == 1)
00769 len = SIZE_OP_JUMP;
00770 else
00771 len = 0;
00772
00773 len += SIZE_OP_PUSH + cklen + mod_tlen + SIZE_OP_JUMP;
00774 }
00775 else {
00776 if (qn->lower == 0)
00777 len = SIZE_OP_JUMP;
00778 else
00779 len = 0;
00780
00781 len += mod_tlen + SIZE_OP_PUSH + cklen;
00782 }
00783 }
00784 else if (qn->upper == 0) {
00785 if (qn->is_refered != 0)
00786 len = SIZE_OP_JUMP + tlen;
00787 else
00788 len = 0;
00789 }
00790 else if (qn->upper == 1 && qn->greedy) {
00791 if (qn->lower == 0) {
00792 if (CKN_ON) {
00793 len = SIZE_OP_STATE_CHECK_PUSH + tlen;
00794 }
00795 else {
00796 len = SIZE_OP_PUSH + tlen;
00797 }
00798 }
00799 else {
00800 len = tlen;
00801 }
00802 }
00803 else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) {
00804 len = SIZE_OP_PUSH + cklen + SIZE_OP_JUMP + tlen;
00805 }
00806 else {
00807 len = SIZE_OP_REPEAT_INC
00808 + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM;
00809 if (CKN_ON)
00810 len += SIZE_OP_STATE_CHECK;
00811 }
00812
00813 return len;
00814 }
00815
00816 static int
00817 compile_quantifier_node(QtfrNode* qn, regex_t* reg)
00818 {
00819 int r, mod_tlen;
00820 int ckn;
00821 int infinite = IS_REPEAT_INFINITE(qn->upper);
00822 int empty_info = qn->target_empty_info;
00823 int tlen = compile_length_tree(qn->target, reg);
00824
00825 if (tlen < 0) return tlen;
00826
00827 ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0);
00828
00829 if (is_anychar_star_quantifier(qn)) {
00830 r = compile_tree_n_times(qn->target, qn->lower, reg);
00831 if (r) return r;
00832 if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON) {
00833 if (IS_MULTILINE(reg->options))
00834 r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT);
00835 else
00836 r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT);
00837 if (r) return r;
00838 if (CKN_ON) {
00839 r = add_state_check_num(reg, ckn);
00840 if (r) return r;
00841 }
00842
00843 return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
00844 }
00845 else {
00846 if (IS_MULTILINE(reg->options)) {
00847 r = add_opcode(reg, (CKN_ON ?
00848 OP_STATE_CHECK_ANYCHAR_ML_STAR
00849 : OP_ANYCHAR_ML_STAR));
00850 }
00851 else {
00852 r = add_opcode(reg, (CKN_ON ?
00853 OP_STATE_CHECK_ANYCHAR_STAR
00854 : OP_ANYCHAR_STAR));
00855 }
00856 if (r) return r;
00857 if (CKN_ON)
00858 r = add_state_check_num(reg, ckn);
00859
00860 return r;
00861 }
00862 }
00863
00864 if (empty_info != 0)
00865 mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
00866 else
00867 mod_tlen = tlen;
00868
00869 if (infinite && qn->lower <= 1) {
00870 if (qn->greedy) {
00871 if (qn->lower == 1) {
00872 r = add_opcode_rel_addr(reg, OP_JUMP,
00873 (CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH));
00874 if (r) return r;
00875 }
00876
00877 if (CKN_ON) {
00878 r = add_opcode(reg, OP_STATE_CHECK_PUSH);
00879 if (r) return r;
00880 r = add_state_check_num(reg, ckn);
00881 if (r) return r;
00882 r = add_rel_addr(reg, mod_tlen + SIZE_OP_JUMP);
00883 }
00884 else {
00885 r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);
00886 }
00887 if (r) return r;
00888 r = compile_tree_empty_check(qn->target, reg, empty_info);
00889 if (r) return r;
00890 r = add_opcode_rel_addr(reg, OP_JUMP,
00891 -(mod_tlen + (int )SIZE_OP_JUMP
00892 + (int )(CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH)));
00893 }
00894 else {
00895 if (qn->lower == 0) {
00896 r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen);
00897 if (r) return r;
00898 }
00899 r = compile_tree_empty_check(qn->target, reg, empty_info);
00900 if (r) return r;
00901 if (CKN_ON) {
00902 r = add_opcode(reg, OP_STATE_CHECK_PUSH_OR_JUMP);
00903 if (r) return r;
00904 r = add_state_check_num(reg, ckn);
00905 if (r) return r;
00906 r = add_rel_addr(reg,
00907 -(mod_tlen + (int )SIZE_OP_STATE_CHECK_PUSH_OR_JUMP));
00908 }
00909 else
00910 r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH));
00911 }
00912 }
00913 else if (qn->upper == 0) {
00914 if (qn->is_refered != 0) {
00915 r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
00916 if (r) return r;
00917 r = compile_tree(qn->target, reg);
00918 }
00919 else
00920 r = 0;
00921 }
00922 else if (qn->upper == 1 && qn->greedy) {
00923 if (qn->lower == 0) {
00924 if (CKN_ON) {
00925 r = add_opcode(reg, OP_STATE_CHECK_PUSH);
00926 if (r) return r;
00927 r = add_state_check_num(reg, ckn);
00928 if (r) return r;
00929 r = add_rel_addr(reg, tlen);
00930 }
00931 else {
00932 r = add_opcode_rel_addr(reg, OP_PUSH, tlen);
00933 }
00934 if (r) return r;
00935 }
00936
00937 r = compile_tree(qn->target, reg);
00938 }
00939 else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) {
00940 if (CKN_ON) {
00941 r = add_opcode(reg, OP_STATE_CHECK_PUSH);
00942 if (r) return r;
00943 r = add_state_check_num(reg, ckn);
00944 if (r) return r;
00945 r = add_rel_addr(reg, SIZE_OP_JUMP);
00946 }
00947 else {
00948 r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP);
00949 }
00950
00951 if (r) return r;
00952 r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
00953 if (r) return r;
00954 r = compile_tree(qn->target, reg);
00955 }
00956 else {
00957 r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg);
00958 if (CKN_ON) {
00959 if (r) return r;
00960 r = add_opcode(reg, OP_STATE_CHECK);
00961 if (r) return r;
00962 r = add_state_check_num(reg, ckn);
00963 }
00964 }
00965 return r;
00966 }
00967
00968 #else
00969
00970 static int
00971 compile_length_quantifier_node(QtfrNode* qn, regex_t* reg)
00972 {
00973 int len, mod_tlen;
00974 int infinite = IS_REPEAT_INFINITE(qn->upper);
00975 int empty_info = qn->target_empty_info;
00976 int tlen = compile_length_tree(qn->target, reg);
00977
00978 if (tlen < 0) return tlen;
00979
00980
00981 if (NTYPE(qn->target) == NT_CANY) {
00982 if (qn->greedy && infinite) {
00983 if (IS_NOT_NULL(qn->next_head_exact))
00984 return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower;
00985 else
00986 return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower;
00987 }
00988 }
00989
00990 if (empty_info != 0)
00991 mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
00992 else
00993 mod_tlen = tlen;
00994
00995 if (infinite &&
00996 (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
00997 if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
00998 len = SIZE_OP_JUMP;
00999 }
01000 else {
01001 len = tlen * qn->lower;
01002 }
01003
01004 if (qn->greedy) {
01005 if (IS_NOT_NULL(qn->head_exact))
01006 len += SIZE_OP_PUSH_OR_JUMP_EXACT1 + mod_tlen + SIZE_OP_JUMP;
01007 else if (IS_NOT_NULL(qn->next_head_exact))
01008 len += SIZE_OP_PUSH_IF_PEEK_NEXT + mod_tlen + SIZE_OP_JUMP;
01009 else
01010 len += SIZE_OP_PUSH + mod_tlen + SIZE_OP_JUMP;
01011 }
01012 else
01013 len += SIZE_OP_JUMP + mod_tlen + SIZE_OP_PUSH;
01014 }
01015 else if (qn->upper == 0 && qn->is_refered != 0) {
01016 len = SIZE_OP_JUMP + tlen;
01017 }
01018 else if (!infinite && qn->greedy &&
01019 (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper
01020 <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
01021 len = tlen * qn->lower;
01022 len += (SIZE_OP_PUSH + tlen) * (qn->upper - qn->lower);
01023 }
01024 else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) {
01025 len = SIZE_OP_PUSH + SIZE_OP_JUMP + tlen;
01026 }
01027 else {
01028 len = SIZE_OP_REPEAT_INC
01029 + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM;
01030 }
01031
01032 return len;
01033 }
01034
01035 static int
01036 compile_quantifier_node(QtfrNode* qn, regex_t* reg)
01037 {
01038 int i, r, mod_tlen;
01039 int infinite = IS_REPEAT_INFINITE(qn->upper);
01040 int empty_info = qn->target_empty_info;
01041 int tlen = compile_length_tree(qn->target, reg);
01042
01043 if (tlen < 0) return tlen;
01044
01045 if (is_anychar_star_quantifier(qn)) {
01046 r = compile_tree_n_times(qn->target, qn->lower, reg);
01047 if (r) return r;
01048 if (IS_NOT_NULL(qn->next_head_exact)) {
01049 if (IS_MULTILINE(reg->options))
01050 r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT);
01051 else
01052 r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT);
01053 if (r) return r;
01054 return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
01055 }
01056 else {
01057 if (IS_MULTILINE(reg->options))
01058 return add_opcode(reg, OP_ANYCHAR_ML_STAR);
01059 else
01060 return add_opcode(reg, OP_ANYCHAR_STAR);
01061 }
01062 }
01063
01064 if (empty_info != 0)
01065 mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
01066 else
01067 mod_tlen = tlen;
01068
01069 if (infinite &&
01070 (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
01071 if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
01072 if (qn->greedy) {
01073 if (IS_NOT_NULL(qn->head_exact))
01074 r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_OR_JUMP_EXACT1);
01075 else if (IS_NOT_NULL(qn->next_head_exact))
01076 r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_IF_PEEK_NEXT);
01077 else
01078 r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH);
01079 }
01080 else {
01081 r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_JUMP);
01082 }
01083 if (r) return r;
01084 }
01085 else {
01086 r = compile_tree_n_times(qn->target, qn->lower, reg);
01087 if (r) return r;
01088 }
01089
01090 if (qn->greedy) {
01091 if (IS_NOT_NULL(qn->head_exact)) {
01092 r = add_opcode_rel_addr(reg, OP_PUSH_OR_JUMP_EXACT1,
01093 mod_tlen + SIZE_OP_JUMP);
01094 if (r) return r;
01095 add_bytes(reg, NSTR(qn->head_exact)->s, 1);
01096 r = compile_tree_empty_check(qn->target, reg, empty_info);
01097 if (r) return r;
01098 r = add_opcode_rel_addr(reg, OP_JUMP,
01099 -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_OR_JUMP_EXACT1));
01100 }
01101 else if (IS_NOT_NULL(qn->next_head_exact)) {
01102 r = add_opcode_rel_addr(reg, OP_PUSH_IF_PEEK_NEXT,
01103 mod_tlen + SIZE_OP_JUMP);
01104 if (r) return r;
01105 add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
01106 r = compile_tree_empty_check(qn->target, reg, empty_info);
01107 if (r) return r;
01108 r = add_opcode_rel_addr(reg, OP_JUMP,
01109 -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_IF_PEEK_NEXT));
01110 }
01111 else {
01112 r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);
01113 if (r) return r;
01114 r = compile_tree_empty_check(qn->target, reg, empty_info);
01115 if (r) return r;
01116 r = add_opcode_rel_addr(reg, OP_JUMP,
01117 -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH));
01118 }
01119 }
01120 else {
01121 r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen);
01122 if (r) return r;
01123 r = compile_tree_empty_check(qn->target, reg, empty_info);
01124 if (r) return r;
01125 r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH));
01126 }
01127 }
01128 else if (qn->upper == 0 && qn->is_refered != 0) {
01129 r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
01130 if (r) return r;
01131 r = compile_tree(qn->target, reg);
01132 }
01133 else if (!infinite && qn->greedy &&
01134 (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper
01135 <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
01136 int n = qn->upper - qn->lower;
01137
01138 r = compile_tree_n_times(qn->target, qn->lower, reg);
01139 if (r) return r;
01140
01141 for (i = 0; i < n; i++) {
01142 r = add_opcode_rel_addr(reg, OP_PUSH,
01143 (n - i) * tlen + (n - i - 1) * SIZE_OP_PUSH);
01144 if (r) return r;
01145 r = compile_tree(qn->target, reg);
01146 if (r) return r;
01147 }
01148 }
01149 else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) {
01150 r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP);
01151 if (r) return r;
01152 r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
01153 if (r) return r;
01154 r = compile_tree(qn->target, reg);
01155 }
01156 else {
01157 r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg);
01158 }
01159 return r;
01160 }
01161 #endif
01162
01163 static int
01164 compile_length_option_node(EncloseNode* node, regex_t* reg)
01165 {
01166 int tlen;
01167 OnigOptionType prev = reg->options;
01168
01169 reg->options = node->option;
01170 tlen = compile_length_tree(node->target, reg);
01171 reg->options = prev;
01172
01173 if (tlen < 0) return tlen;
01174
01175 if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
01176 return SIZE_OP_SET_OPTION_PUSH + SIZE_OP_SET_OPTION + SIZE_OP_FAIL
01177 + tlen + SIZE_OP_SET_OPTION;
01178 }
01179 else
01180 return tlen;
01181 }
01182
01183 static int
01184 compile_option_node(EncloseNode* node, regex_t* reg)
01185 {
01186 int r;
01187 OnigOptionType prev = reg->options;
01188
01189 if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
01190 r = add_opcode_option(reg, OP_SET_OPTION_PUSH, node->option);
01191 if (r) return r;
01192 r = add_opcode_option(reg, OP_SET_OPTION, prev);
01193 if (r) return r;
01194 r = add_opcode(reg, OP_FAIL);
01195 if (r) return r;
01196 }
01197
01198 reg->options = node->option;
01199 r = compile_tree(node->target, reg);
01200 reg->options = prev;
01201
01202 if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
01203 if (r) return r;
01204 r = add_opcode_option(reg, OP_SET_OPTION, prev);
01205 }
01206 return r;
01207 }
01208
01209 static int
01210 compile_length_enclose_node(EncloseNode* node, regex_t* reg)
01211 {
01212 int len;
01213 int tlen;
01214
01215 if (node->type == ENCLOSE_OPTION)
01216 return compile_length_option_node(node, reg);
01217
01218 if (node->target) {
01219 tlen = compile_length_tree(node->target, reg);
01220 if (tlen < 0) return tlen;
01221 }
01222 else
01223 tlen = 0;
01224
01225 switch (node->type) {
01226 case ENCLOSE_MEMORY:
01227 #ifdef USE_SUBEXP_CALL
01228 if (IS_ENCLOSE_CALLED(node)) {
01229 len = SIZE_OP_MEMORY_START_PUSH + tlen
01230 + SIZE_OP_CALL + SIZE_OP_JUMP + SIZE_OP_RETURN;
01231 if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
01232 len += (IS_ENCLOSE_RECURSION(node)
01233 ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);
01234 else
01235 len += (IS_ENCLOSE_RECURSION(node)
01236 ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);
01237 }
01238 else
01239 #endif
01240 {
01241 if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum))
01242 len = SIZE_OP_MEMORY_START_PUSH;
01243 else
01244 len = SIZE_OP_MEMORY_START;
01245
01246 len += tlen + (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)
01247 ? SIZE_OP_MEMORY_END_PUSH : SIZE_OP_MEMORY_END);
01248 }
01249 break;
01250
01251 case ENCLOSE_STOP_BACKTRACK:
01252 if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node)) {
01253 QtfrNode* qn = NQTFR(node->target);
01254 tlen = compile_length_tree(qn->target, reg);
01255 if (tlen < 0) return tlen;
01256
01257 len = tlen * qn->lower
01258 + SIZE_OP_PUSH + tlen + SIZE_OP_POP + SIZE_OP_JUMP;
01259 }
01260 else {
01261 len = SIZE_OP_PUSH_STOP_BT + tlen + SIZE_OP_POP_STOP_BT;
01262 }
01263 break;
01264
01265 default:
01266 return ONIGERR_TYPE_BUG;
01267 break;
01268 }
01269
01270 return len;
01271 }
01272
01273 static int get_char_length_tree(Node* node, regex_t* reg, int* len);
01274
01275 static int
01276 compile_enclose_node(EncloseNode* node, regex_t* reg)
01277 {
01278 int r, len;
01279
01280 if (node->type == ENCLOSE_OPTION)
01281 return compile_option_node(node, reg);
01282
01283 switch (node->type) {
01284 case ENCLOSE_MEMORY:
01285 #ifdef USE_SUBEXP_CALL
01286 if (IS_ENCLOSE_CALLED(node)) {
01287 r = add_opcode(reg, OP_CALL);
01288 if (r) return r;
01289 node->call_addr = BBUF_GET_OFFSET_POS(reg) + SIZE_ABSADDR + SIZE_OP_JUMP;
01290 node->state |= NST_ADDR_FIXED;
01291 r = add_abs_addr(reg, (int )node->call_addr);
01292 if (r) return r;
01293 len = compile_length_tree(node->target, reg);
01294 len += (SIZE_OP_MEMORY_START_PUSH + SIZE_OP_RETURN);
01295 if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
01296 len += (IS_ENCLOSE_RECURSION(node)
01297 ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);
01298 else
01299 len += (IS_ENCLOSE_RECURSION(node)
01300 ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);
01301
01302 r = add_opcode_rel_addr(reg, OP_JUMP, len);
01303 if (r) return r;
01304 }
01305 #endif
01306 if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum))
01307 r = add_opcode(reg, OP_MEMORY_START_PUSH);
01308 else
01309 r = add_opcode(reg, OP_MEMORY_START);
01310 if (r) return r;
01311 r = add_mem_num(reg, node->regnum);
01312 if (r) return r;
01313 r = compile_tree(node->target, reg);
01314 if (r) return r;
01315 #ifdef USE_SUBEXP_CALL
01316 if (IS_ENCLOSE_CALLED(node)) {
01317 if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
01318 r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node)
01319 ? OP_MEMORY_END_PUSH_REC : OP_MEMORY_END_PUSH));
01320 else
01321 r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node)
01322 ? OP_MEMORY_END_REC : OP_MEMORY_END));
01323
01324 if (r) return r;
01325 r = add_mem_num(reg, node->regnum);
01326 if (r) return r;
01327 r = add_opcode(reg, OP_RETURN);
01328 }
01329 else
01330 #endif
01331 {
01332 if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
01333 r = add_opcode(reg, OP_MEMORY_END_PUSH);
01334 else
01335 r = add_opcode(reg, OP_MEMORY_END);
01336 if (r) return r;
01337 r = add_mem_num(reg, node->regnum);
01338 }
01339 break;
01340
01341 case ENCLOSE_STOP_BACKTRACK:
01342 if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node)) {
01343 QtfrNode* qn = NQTFR(node->target);
01344 r = compile_tree_n_times(qn->target, qn->lower, reg);
01345 if (r) return r;
01346
01347 len = compile_length_tree(qn->target, reg);
01348 if (len < 0) return len;
01349
01350 r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_POP + SIZE_OP_JUMP);
01351 if (r) return r;
01352 r = compile_tree(qn->target, reg);
01353 if (r) return r;
01354 r = add_opcode(reg, OP_POP);
01355 if (r) return r;
01356 r = add_opcode_rel_addr(reg, OP_JUMP,
01357 -((int )SIZE_OP_PUSH + len + (int )SIZE_OP_POP + (int )SIZE_OP_JUMP));
01358 }
01359 else {
01360 r = add_opcode(reg, OP_PUSH_STOP_BT);
01361 if (r) return r;
01362 r = compile_tree(node->target, reg);
01363 if (r) return r;
01364 r = add_opcode(reg, OP_POP_STOP_BT);
01365 }
01366 break;
01367
01368 default:
01369 return ONIGERR_TYPE_BUG;
01370 break;
01371 }
01372
01373 return r;
01374 }
01375
01376 static int
01377 compile_length_anchor_node(AnchorNode* node, regex_t* reg)
01378 {
01379 int len;
01380 int tlen = 0;
01381
01382 if (node->target) {
01383 tlen = compile_length_tree(node->target, reg);
01384 if (tlen < 0) return tlen;
01385 }
01386
01387 switch (node->type) {
01388 case ANCHOR_PREC_READ:
01389 len = SIZE_OP_PUSH_POS + tlen + SIZE_OP_POP_POS;
01390 break;
01391 case ANCHOR_PREC_READ_NOT:
01392 len = SIZE_OP_PUSH_POS_NOT + tlen + SIZE_OP_FAIL_POS;
01393 break;
01394 case ANCHOR_LOOK_BEHIND:
01395 len = SIZE_OP_LOOK_BEHIND + tlen;
01396 break;
01397 case ANCHOR_LOOK_BEHIND_NOT:
01398 len = SIZE_OP_PUSH_LOOK_BEHIND_NOT + tlen + SIZE_OP_FAIL_LOOK_BEHIND_NOT;
01399 break;
01400
01401 default:
01402 len = SIZE_OPCODE;
01403 break;
01404 }
01405
01406 return len;
01407 }
01408
01409 static int
01410 compile_anchor_node(AnchorNode* node, regex_t* reg)
01411 {
01412 int r, len;
01413
01414 switch (node->type) {
01415 case ANCHOR_BEGIN_BUF: r = add_opcode(reg, OP_BEGIN_BUF); break;
01416 case ANCHOR_END_BUF: r = add_opcode(reg, OP_END_BUF); break;
01417 case ANCHOR_BEGIN_LINE: r = add_opcode(reg, OP_BEGIN_LINE); break;
01418 case ANCHOR_END_LINE: r = add_opcode(reg, OP_END_LINE); break;
01419 case ANCHOR_SEMI_END_BUF: r = add_opcode(reg, OP_SEMI_END_BUF); break;
01420 case ANCHOR_BEGIN_POSITION: r = add_opcode(reg, OP_BEGIN_POSITION); break;
01421
01422 case ANCHOR_WORD_BOUND: r = add_opcode(reg, OP_WORD_BOUND); break;
01423 case ANCHOR_NOT_WORD_BOUND: r = add_opcode(reg, OP_NOT_WORD_BOUND); break;
01424 #ifdef USE_WORD_BEGIN_END
01425 case ANCHOR_WORD_BEGIN: r = add_opcode(reg, OP_WORD_BEGIN); break;
01426 case ANCHOR_WORD_END: r = add_opcode(reg, OP_WORD_END); break;
01427 #endif
01428
01429 case ANCHOR_PREC_READ:
01430 r = add_opcode(reg, OP_PUSH_POS);
01431 if (r) return r;
01432 r = compile_tree(node->target, reg);
01433 if (r) return r;
01434 r = add_opcode(reg, OP_POP_POS);
01435 break;
01436
01437 case ANCHOR_PREC_READ_NOT:
01438 len = compile_length_tree(node->target, reg);
01439 if (len < 0) return len;
01440 r = add_opcode_rel_addr(reg, OP_PUSH_POS_NOT, len + SIZE_OP_FAIL_POS);
01441 if (r) return r;
01442 r = compile_tree(node->target, reg);
01443 if (r) return r;
01444 r = add_opcode(reg, OP_FAIL_POS);
01445 break;
01446
01447 case ANCHOR_LOOK_BEHIND:
01448 {
01449 int n;
01450 r = add_opcode(reg, OP_LOOK_BEHIND);
01451 if (r) return r;
01452 if (node->char_len < 0) {
01453 r = get_char_length_tree(node->target, reg, &n);
01454 if (r) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
01455 }
01456 else
01457 n = node->char_len;
01458 r = add_length(reg, n);
01459 if (r) return r;
01460 r = compile_tree(node->target, reg);
01461 }
01462 break;
01463
01464 case ANCHOR_LOOK_BEHIND_NOT:
01465 {
01466 int n;
01467 len = compile_length_tree(node->target, reg);
01468 r = add_opcode_rel_addr(reg, OP_PUSH_LOOK_BEHIND_NOT,
01469 len + SIZE_OP_FAIL_LOOK_BEHIND_NOT);
01470 if (r) return r;
01471 if (node->char_len < 0) {
01472 r = get_char_length_tree(node->target, reg, &n);
01473 if (r) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
01474 }
01475 else
01476 n = node->char_len;
01477 r = add_length(reg, n);
01478 if (r) return r;
01479 r = compile_tree(node->target, reg);
01480 if (r) return r;
01481 r = add_opcode(reg, OP_FAIL_LOOK_BEHIND_NOT);
01482 }
01483 break;
01484
01485 default:
01486 return ONIGERR_TYPE_BUG;
01487 break;
01488 }
01489
01490 return r;
01491 }
01492
01493 static int
01494 compile_length_tree(Node* node, regex_t* reg)
01495 {
01496 int len, type, r;
01497
01498 type = NTYPE(node);
01499 switch (type) {
01500 case NT_LIST:
01501 len = 0;
01502 do {
01503 r = compile_length_tree(NCAR(node), reg);
01504 if (r < 0) return r;
01505 len += r;
01506 } while (IS_NOT_NULL(node = NCDR(node)));
01507 r = len;
01508 break;
01509
01510 case NT_ALT:
01511 {
01512 int n;
01513
01514 n = r = 0;
01515 do {
01516 r += compile_length_tree(NCAR(node), reg);
01517 n++;
01518 } while (IS_NOT_NULL(node = NCDR(node)));
01519 r += (SIZE_OP_PUSH + SIZE_OP_JUMP) * (n - 1);
01520 }
01521 break;
01522
01523 case NT_STR:
01524 if (NSTRING_IS_RAW(node))
01525 r = compile_length_string_raw_node(NSTR(node), reg);
01526 else
01527 r = compile_length_string_node(node, reg);
01528 break;
01529
01530 case NT_CCLASS:
01531 r = compile_length_cclass_node(NCCLASS(node), reg);
01532 break;
01533
01534 case NT_CTYPE:
01535 case NT_CANY:
01536 r = SIZE_OPCODE;
01537 break;
01538
01539 case NT_BREF:
01540 {
01541 BRefNode* br = NBREF(node);
01542
01543 #ifdef USE_BACKREF_WITH_LEVEL
01544 if (IS_BACKREF_NEST_LEVEL(br)) {
01545 r = SIZE_OPCODE + SIZE_OPTION + SIZE_LENGTH +
01546 SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
01547 }
01548 else
01549 #endif
01550 if (br->back_num == 1) {
01551 r = ((!IS_IGNORECASE(reg->options) && br->back_static[0] <= 2)
01552 ? SIZE_OPCODE : (SIZE_OPCODE + SIZE_MEMNUM));
01553 }
01554 else {
01555 r = SIZE_OPCODE + SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
01556 }
01557 }
01558 break;
01559
01560 #ifdef USE_SUBEXP_CALL
01561 case NT_CALL:
01562 r = SIZE_OP_CALL;
01563 break;
01564 #endif
01565
01566 case NT_QTFR:
01567 r = compile_length_quantifier_node(NQTFR(node), reg);
01568 break;
01569
01570 case NT_ENCLOSE:
01571 r = compile_length_enclose_node(NENCLOSE(node), reg);
01572 break;
01573
01574 case NT_ANCHOR:
01575 r = compile_length_anchor_node(NANCHOR(node), reg);
01576 break;
01577
01578 default:
01579 return ONIGERR_TYPE_BUG;
01580 break;
01581 }
01582
01583 return r;
01584 }
01585
01586 static int
01587 compile_tree(Node* node, regex_t* reg)
01588 {
01589 int n, type, len, pos, r = 0;
01590
01591 type = NTYPE(node);
01592 switch (type) {
01593 case NT_LIST:
01594 do {
01595 r = compile_tree(NCAR(node), reg);
01596 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
01597 break;
01598
01599 case NT_ALT:
01600 {
01601 Node* x = node;
01602 len = 0;
01603 do {
01604 len += compile_length_tree(NCAR(x), reg);
01605 if (NCDR(x) != NULL) {
01606 len += SIZE_OP_PUSH + SIZE_OP_JUMP;
01607 }
01608 } while (IS_NOT_NULL(x = NCDR(x)));
01609 pos = reg->used + len;
01610
01611 do {
01612 len = compile_length_tree(NCAR(node), reg);
01613 if (IS_NOT_NULL(NCDR(node))) {
01614 r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_JUMP);
01615 if (r) break;
01616 }
01617 r = compile_tree(NCAR(node), reg);
01618 if (r) break;
01619 if (IS_NOT_NULL(NCDR(node))) {
01620 len = pos - (reg->used + SIZE_OP_JUMP);
01621 r = add_opcode_rel_addr(reg, OP_JUMP, len);
01622 if (r) break;
01623 }
01624 } while (IS_NOT_NULL(node = NCDR(node)));
01625 }
01626 break;
01627
01628 case NT_STR:
01629 if (NSTRING_IS_RAW(node))
01630 r = compile_string_raw_node(NSTR(node), reg);
01631 else
01632 r = compile_string_node(node, reg);
01633 break;
01634
01635 case NT_CCLASS:
01636 r = compile_cclass_node(NCCLASS(node), reg);
01637 break;
01638
01639 case NT_CTYPE:
01640 {
01641 int op;
01642
01643 switch (NCTYPE(node)->ctype) {
01644 case ONIGENC_CTYPE_WORD:
01645 if (NCTYPE(node)->not != 0) op = OP_NOT_WORD;
01646 else op = OP_WORD;
01647 break;
01648 default:
01649 return ONIGERR_TYPE_BUG;
01650 break;
01651 }
01652 r = add_opcode(reg, op);
01653 }
01654 break;
01655
01656 case NT_CANY:
01657 if (IS_MULTILINE(reg->options))
01658 r = add_opcode(reg, OP_ANYCHAR_ML);
01659 else
01660 r = add_opcode(reg, OP_ANYCHAR);
01661 break;
01662
01663 case NT_BREF:
01664 {
01665 BRefNode* br = NBREF(node);
01666
01667 #ifdef USE_BACKREF_WITH_LEVEL
01668 if (IS_BACKREF_NEST_LEVEL(br)) {
01669 r = add_opcode(reg, OP_BACKREF_WITH_LEVEL);
01670 if (r) return r;
01671 r = add_option(reg, (reg->options & ONIG_OPTION_IGNORECASE));
01672 if (r) return r;
01673 r = add_length(reg, br->nest_level);
01674 if (r) return r;
01675
01676 goto add_bacref_mems;
01677 }
01678 else
01679 #endif
01680 if (br->back_num == 1) {
01681 n = br->back_static[0];
01682 if (IS_IGNORECASE(reg->options)) {
01683 r = add_opcode(reg, OP_BACKREFN_IC);
01684 if (r) return r;
01685 r = add_mem_num(reg, n);
01686 }
01687 else {
01688 switch (n) {
01689 case 1: r = add_opcode(reg, OP_BACKREF1); break;
01690 case 2: r = add_opcode(reg, OP_BACKREF2); break;
01691 default:
01692 r = add_opcode(reg, OP_BACKREFN);
01693 if (r) return r;
01694 r = add_mem_num(reg, n);
01695 break;
01696 }
01697 }
01698 }
01699 else {
01700 int i;
01701 int* p;
01702
01703 if (IS_IGNORECASE(reg->options)) {
01704 r = add_opcode(reg, OP_BACKREF_MULTI_IC);
01705 }
01706 else {
01707 r = add_opcode(reg, OP_BACKREF_MULTI);
01708 }
01709 if (r) return r;
01710
01711 #ifdef USE_BACKREF_WITH_LEVEL
01712 add_bacref_mems:
01713 #endif
01714 r = add_length(reg, br->back_num);
01715 if (r) return r;
01716 p = BACKREFS_P(br);
01717 for (i = br->back_num - 1; i >= 0; i--) {
01718 r = add_mem_num(reg, p[i]);
01719 if (r) return r;
01720 }
01721 }
01722 }
01723 break;
01724
01725 #ifdef USE_SUBEXP_CALL
01726 case NT_CALL:
01727 r = compile_call(NCALL(node), reg);
01728 break;
01729 #endif
01730
01731 case NT_QTFR:
01732 r = compile_quantifier_node(NQTFR(node), reg);
01733 break;
01734
01735 case NT_ENCLOSE:
01736 r = compile_enclose_node(NENCLOSE(node), reg);
01737 break;
01738
01739 case NT_ANCHOR:
01740 r = compile_anchor_node(NANCHOR(node), reg);
01741 break;
01742
01743 default:
01744 #ifdef ONIG_DEBUG
01745 fprintf(stderr, "compile_tree: undefined node type %d\n", NTYPE(node));
01746 #endif
01747 break;
01748 }
01749
01750 return r;
01751 }
01752
01753 #ifdef USE_NAMED_GROUP
01754
01755 static int
01756 noname_disable_map(Node** plink, GroupNumRemap* map, int* counter)
01757 {
01758 int r = 0;
01759 Node* node = *plink;
01760
01761 switch (NTYPE(node)) {
01762 case NT_LIST:
01763 case NT_ALT:
01764 do {
01765 r = noname_disable_map(&(NCAR(node)), map, counter);
01766 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
01767 break;
01768
01769 case NT_QTFR:
01770 {
01771 Node** ptarget = &(NQTFR(node)->target);
01772 Node* old = *ptarget;
01773 r = noname_disable_map(ptarget, map, counter);
01774 if (*ptarget != old && NTYPE(*ptarget) == NT_QTFR) {
01775 onig_reduce_nested_quantifier(node, *ptarget);
01776 }
01777 }
01778 break;
01779
01780 case NT_ENCLOSE:
01781 {
01782 EncloseNode* en = NENCLOSE(node);
01783 if (en->type == ENCLOSE_MEMORY) {
01784 if (IS_ENCLOSE_NAMED_GROUP(en)) {
01785 (*counter)++;
01786 map[en->regnum].new_val = *counter;
01787 en->regnum = *counter;
01788 r = noname_disable_map(&(en->target), map, counter);
01789 }
01790 else {
01791 *plink = en->target;
01792 en->target = NULL_NODE;
01793 onig_node_free(node);
01794 r = noname_disable_map(plink, map, counter);
01795 }
01796 }
01797 else
01798 r = noname_disable_map(&(en->target), map, counter);
01799 }
01800 break;
01801
01802 case NT_ANCHOR:
01803 {
01804 AnchorNode* an = NANCHOR(node);
01805 switch (an->type) {
01806 case ANCHOR_PREC_READ:
01807 case ANCHOR_PREC_READ_NOT:
01808 case ANCHOR_LOOK_BEHIND:
01809 case ANCHOR_LOOK_BEHIND_NOT:
01810 r = noname_disable_map(&(an->target), map, counter);
01811 break;
01812 }
01813 }
01814 break;
01815
01816 default:
01817 break;
01818 }
01819
01820 return r;
01821 }
01822
01823 static int
01824 renumber_node_backref(Node* node, GroupNumRemap* map)
01825 {
01826 int i, pos, n, old_num;
01827 int *backs;
01828 BRefNode* bn = NBREF(node);
01829
01830 if (! IS_BACKREF_NAME_REF(bn))
01831 return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
01832
01833 old_num = bn->back_num;
01834 if (IS_NULL(bn->back_dynamic))
01835 backs = bn->back_static;
01836 else
01837 backs = bn->back_dynamic;
01838
01839 for (i = 0, pos = 0; i < old_num; i++) {
01840 n = map[backs[i]].new_val;
01841 if (n > 0) {
01842 backs[pos] = n;
01843 pos++;
01844 }
01845 }
01846
01847 bn->back_num = pos;
01848 return 0;
01849 }
01850
01851 static int
01852 renumber_by_map(Node* node, GroupNumRemap* map)
01853 {
01854 int r = 0;
01855
01856 switch (NTYPE(node)) {
01857 case NT_LIST:
01858 case NT_ALT:
01859 do {
01860 r = renumber_by_map(NCAR(node), map);
01861 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
01862 break;
01863 case NT_QTFR:
01864 r = renumber_by_map(NQTFR(node)->target, map);
01865 break;
01866 case NT_ENCLOSE:
01867 r = renumber_by_map(NENCLOSE(node)->target, map);
01868 break;
01869
01870 case NT_BREF:
01871 r = renumber_node_backref(node, map);
01872 break;
01873
01874 case NT_ANCHOR:
01875 {
01876 AnchorNode* an = NANCHOR(node);
01877 switch (an->type) {
01878 case ANCHOR_PREC_READ:
01879 case ANCHOR_PREC_READ_NOT:
01880 case ANCHOR_LOOK_BEHIND:
01881 case ANCHOR_LOOK_BEHIND_NOT:
01882 r = renumber_by_map(an->target, map);
01883 break;
01884 }
01885 }
01886 break;
01887
01888 default:
01889 break;
01890 }
01891
01892 return r;
01893 }
01894
01895 static int
01896 numbered_ref_check(Node* node)
01897 {
01898 int r = 0;
01899
01900 switch (NTYPE(node)) {
01901 case NT_LIST:
01902 case NT_ALT:
01903 do {
01904 r = numbered_ref_check(NCAR(node));
01905 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
01906 break;
01907 case NT_QTFR:
01908 r = numbered_ref_check(NQTFR(node)->target);
01909 break;
01910 case NT_ENCLOSE:
01911 r = numbered_ref_check(NENCLOSE(node)->target);
01912 break;
01913
01914 case NT_BREF:
01915 if (! IS_BACKREF_NAME_REF(NBREF(node)))
01916 return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
01917 break;
01918
01919 default:
01920 break;
01921 }
01922
01923 return r;
01924 }
01925
01926 static int
01927 disable_noname_group_capture(Node** root, regex_t* reg, ScanEnv* env)
01928 {
01929 int r, i, pos, counter;
01930 BitStatusType loc;
01931 GroupNumRemap* map;
01932
01933 map = (GroupNumRemap* )xalloca(sizeof(GroupNumRemap) * (env->num_mem + 1));
01934 CHECK_NULL_RETURN_MEMERR(map);
01935 for (i = 1; i <= env->num_mem; i++) {
01936 map[i].new_val = 0;
01937 }
01938 counter = 0;
01939 r = noname_disable_map(root, map, &counter);
01940 if (r != 0) return r;
01941
01942 r = renumber_by_map(*root, map);
01943 if (r != 0) return r;
01944
01945 for (i = 1, pos = 1; i <= env->num_mem; i++) {
01946 if (map[i].new_val > 0) {
01947 SCANENV_MEM_NODES(env)[pos] = SCANENV_MEM_NODES(env)[i];
01948 pos++;
01949 }
01950 }
01951
01952 loc = env->capture_history;
01953 BIT_STATUS_CLEAR(env->capture_history);
01954 for (i = 1; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {
01955 if (BIT_STATUS_AT(loc, i)) {
01956 BIT_STATUS_ON_AT_SIMPLE(env->capture_history, map[i].new_val);
01957 }
01958 }
01959
01960 env->num_mem = env->num_named;
01961 reg->num_mem = env->num_named;
01962
01963 return onig_renumber_name_table(reg, map);
01964 }
01965 #endif
01966
01967 #ifdef USE_SUBEXP_CALL
01968 static int
01969 unset_addr_list_fix(UnsetAddrList* uslist, regex_t* reg)
01970 {
01971 int i, offset;
01972 EncloseNode* en;
01973 AbsAddrType addr;
01974
01975 for (i = 0; i < uslist->num; i++) {
01976 en = NENCLOSE(uslist->us[i].target);
01977 if (! IS_ENCLOSE_ADDR_FIXED(en)) return ONIGERR_PARSER_BUG;
01978 addr = en->call_addr;
01979 offset = uslist->us[i].offset;
01980
01981 BBUF_WRITE(reg, offset, &addr, SIZE_ABSADDR);
01982 }
01983 return 0;
01984 }
01985 #endif
01986
01987 #ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
01988 static int
01989 quantifiers_memory_node_info(Node* node)
01990 {
01991 int r = 0;
01992
01993 switch (NTYPE(node)) {
01994 case NT_LIST:
01995 case NT_ALT:
01996 {
01997 int v;
01998 do {
01999 v = quantifiers_memory_node_info(NCAR(node));
02000 if (v > r) r = v;
02001 } while (v >= 0 && IS_NOT_NULL(node = NCDR(node)));
02002 }
02003 break;
02004
02005 #ifdef USE_SUBEXP_CALL
02006 case NT_CALL:
02007 if (IS_CALL_RECURSION(NCALL(node))) {
02008 return NQ_TARGET_IS_EMPTY_REC;
02009 }
02010 else
02011 r = quantifiers_memory_node_info(NCALL(node)->target);
02012 break;
02013 #endif
02014
02015 case NT_QTFR:
02016 {
02017 QtfrNode* qn = NQTFR(node);
02018 if (qn->upper != 0) {
02019 r = quantifiers_memory_node_info(qn->target);
02020 }
02021 }
02022 break;
02023
02024 case NT_ENCLOSE:
02025 {
02026 EncloseNode* en = NENCLOSE(node);
02027 switch (en->type) {
02028 case ENCLOSE_MEMORY:
02029 return NQ_TARGET_IS_EMPTY_MEM;
02030 break;
02031
02032 case ENCLOSE_OPTION:
02033 case ENCLOSE_STOP_BACKTRACK:
02034 r = quantifiers_memory_node_info(en->target);
02035 break;
02036 default:
02037 break;
02038 }
02039 }
02040 break;
02041
02042 case NT_BREF:
02043 case NT_STR:
02044 case NT_CTYPE:
02045 case NT_CCLASS:
02046 case NT_CANY:
02047 case NT_ANCHOR:
02048 default:
02049 break;
02050 }
02051
02052 return r;
02053 }
02054 #endif
02055
02056 static int
02057 get_min_match_length(Node* node, OnigDistance *min, ScanEnv* env)
02058 {
02059 OnigDistance tmin;
02060 int r = 0;
02061
02062 *min = 0;
02063 switch (NTYPE(node)) {
02064 case NT_BREF:
02065 {
02066 int i;
02067 int* backs;
02068 Node** nodes = SCANENV_MEM_NODES(env);
02069 BRefNode* br = NBREF(node);
02070 if (br->state & NST_RECURSION) break;
02071
02072 backs = BACKREFS_P(br);
02073 if (backs[0] > env->num_mem) return ONIGERR_INVALID_BACKREF;
02074 r = get_min_match_length(nodes[backs[0]], min, env);
02075 if (r != 0) break;
02076 for (i = 1; i < br->back_num; i++) {
02077 if (backs[i] > env->num_mem) return ONIGERR_INVALID_BACKREF;
02078 r = get_min_match_length(nodes[backs[i]], &tmin, env);
02079 if (r != 0) break;
02080 if (*min > tmin) *min = tmin;
02081 }
02082 }
02083 break;
02084
02085 #ifdef USE_SUBEXP_CALL
02086 case NT_CALL:
02087 if (IS_CALL_RECURSION(NCALL(node))) {
02088 EncloseNode* en = NENCLOSE(NCALL(node)->target);
02089 if (IS_ENCLOSE_MIN_FIXED(en))
02090 *min = en->min_len;
02091 }
02092 else
02093 r = get_min_match_length(NCALL(node)->target, min, env);
02094 break;
02095 #endif
02096
02097 case NT_LIST:
02098 do {
02099 r = get_min_match_length(NCAR(node), &tmin, env);
02100 if (r == 0) *min += tmin;
02101 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
02102 break;
02103
02104 case NT_ALT:
02105 {
02106 Node *x, *y;
02107 y = node;
02108 do {
02109 x = NCAR(y);
02110 r = get_min_match_length(x, &tmin, env);
02111 if (r != 0) break;
02112 if (y == node) *min = tmin;
02113 else if (*min > tmin) *min = tmin;
02114 } while (r == 0 && IS_NOT_NULL(y = NCDR(y)));
02115 }
02116 break;
02117
02118 case NT_STR:
02119 {
02120 StrNode* sn = NSTR(node);
02121 *min = sn->end - sn->s;
02122 }
02123 break;
02124
02125 case NT_CTYPE:
02126 *min = 1;
02127 break;
02128
02129 case NT_CCLASS:
02130 case NT_CANY:
02131 *min = 1;
02132 break;
02133
02134 case NT_QTFR:
02135 {
02136 QtfrNode* qn = NQTFR(node);
02137
02138 if (qn->lower > 0) {
02139 r = get_min_match_length(qn->target, min, env);
02140 if (r == 0)
02141 *min = distance_multiply(*min, qn->lower);
02142 }
02143 }
02144 break;
02145
02146 case NT_ENCLOSE:
02147 {
02148 EncloseNode* en = NENCLOSE(node);
02149 switch (en->type) {
02150 case ENCLOSE_MEMORY:
02151 #ifdef USE_SUBEXP_CALL
02152 if (IS_ENCLOSE_MIN_FIXED(en))
02153 *min = en->min_len;
02154 else {
02155 r = get_min_match_length(en->target, min, env);
02156 if (r == 0) {
02157 en->min_len = *min;
02158 SET_ENCLOSE_STATUS(node, NST_MIN_FIXED);
02159 }
02160 }
02161 break;
02162 #endif
02163 case ENCLOSE_OPTION:
02164 case ENCLOSE_STOP_BACKTRACK:
02165 r = get_min_match_length(en->target, min, env);
02166 break;
02167 }
02168 }
02169 break;
02170
02171 case NT_ANCHOR:
02172 default:
02173 break;
02174 }
02175
02176 return r;
02177 }
02178
02179 static int
02180 get_max_match_length(Node* node, OnigDistance *max, ScanEnv* env)
02181 {
02182 OnigDistance tmax;
02183 int r = 0;
02184
02185 *max = 0;
02186 switch (NTYPE(node)) {
02187 case NT_LIST:
02188 do {
02189 r = get_max_match_length(NCAR(node), &tmax, env);
02190 if (r == 0)
02191 *max = distance_add(*max, tmax);
02192 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
02193 break;
02194
02195 case NT_ALT:
02196 do {
02197 r = get_max_match_length(NCAR(node), &tmax, env);
02198 if (r == 0 && *max < tmax) *max = tmax;
02199 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
02200 break;
02201
02202 case NT_STR:
02203 {
02204 StrNode* sn = NSTR(node);
02205 *max = sn->end - sn->s;
02206 }
02207 break;
02208
02209 case NT_CTYPE:
02210 *max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
02211 break;
02212
02213 case NT_CCLASS:
02214 case NT_CANY:
02215 *max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
02216 break;
02217
02218 case NT_BREF:
02219 {
02220 int i;
02221 int* backs;
02222 Node** nodes = SCANENV_MEM_NODES(env);
02223 BRefNode* br = NBREF(node);
02224 if (br->state & NST_RECURSION) {
02225 *max = ONIG_INFINITE_DISTANCE;
02226 break;
02227 }
02228 backs = BACKREFS_P(br);
02229 for (i = 0; i < br->back_num; i++) {
02230 if (backs[i] > env->num_mem) return ONIGERR_INVALID_BACKREF;
02231 r = get_max_match_length(nodes[backs[i]], &tmax, env);
02232 if (r != 0) break;
02233 if (*max < tmax) *max = tmax;
02234 }
02235 }
02236 break;
02237
02238 #ifdef USE_SUBEXP_CALL
02239 case NT_CALL:
02240 if (! IS_CALL_RECURSION(NCALL(node)))
02241 r = get_max_match_length(NCALL(node)->target, max, env);
02242 else
02243 *max = ONIG_INFINITE_DISTANCE;
02244 break;
02245 #endif
02246
02247 case NT_QTFR:
02248 {
02249 QtfrNode* qn = NQTFR(node);
02250
02251 if (qn->upper != 0) {
02252 r = get_max_match_length(qn->target, max, env);
02253 if (r == 0 && *max != 0) {
02254 if (! IS_REPEAT_INFINITE(qn->upper))
02255 *max = distance_multiply(*max, qn->upper);
02256 else
02257 *max = ONIG_INFINITE_DISTANCE;
02258 }
02259 }
02260 }
02261 break;
02262
02263 case NT_ENCLOSE:
02264 {
02265 EncloseNode* en = NENCLOSE(node);
02266 switch (en->type) {
02267 case ENCLOSE_MEMORY:
02268 #ifdef USE_SUBEXP_CALL
02269 if (IS_ENCLOSE_MAX_FIXED(en))
02270 *max = en->max_len;
02271 else {
02272 r = get_max_match_length(en->target, max, env);
02273 if (r == 0) {
02274 en->max_len = *max;
02275 SET_ENCLOSE_STATUS(node, NST_MAX_FIXED);
02276 }
02277 }
02278 break;
02279 #endif
02280 case ENCLOSE_OPTION:
02281 case ENCLOSE_STOP_BACKTRACK:
02282 r = get_max_match_length(en->target, max, env);
02283 break;
02284 }
02285 }
02286 break;
02287
02288 case NT_ANCHOR:
02289 default:
02290 break;
02291 }
02292
02293 return r;
02294 }
02295
02296 #define GET_CHAR_LEN_VARLEN -1
02297 #define GET_CHAR_LEN_TOP_ALT_VARLEN -2
02298
02299
02300 static int
02301 get_char_length_tree1(Node* node, regex_t* reg, int* len, int level)
02302 {
02303 int tlen;
02304 int r = 0;
02305
02306 level++;
02307 *len = 0;
02308 switch (NTYPE(node)) {
02309 case NT_LIST:
02310 do {
02311 r = get_char_length_tree1(NCAR(node), reg, &tlen, level);
02312 if (r == 0)
02313 *len = (int)distance_add(*len, tlen);
02314 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
02315 break;
02316
02317 case NT_ALT:
02318 {
02319 int tlen2;
02320 int varlen = 0;
02321
02322 r = get_char_length_tree1(NCAR(node), reg, &tlen, level);
02323 while (r == 0 && IS_NOT_NULL(node = NCDR(node))) {
02324 r = get_char_length_tree1(NCAR(node), reg, &tlen2, level);
02325 if (r == 0) {
02326 if (tlen != tlen2)
02327 varlen = 1;
02328 }
02329 }
02330 if (r == 0) {
02331 if (varlen != 0) {
02332 if (level == 1)
02333 r = GET_CHAR_LEN_TOP_ALT_VARLEN;
02334 else
02335 r = GET_CHAR_LEN_VARLEN;
02336 }
02337 else
02338 *len = tlen;
02339 }
02340 }
02341 break;
02342
02343 case NT_STR:
02344 {
02345 StrNode* sn = NSTR(node);
02346 UChar *s = sn->s;
02347 while (s < sn->end) {
02348 s += enclen(reg->enc, s, sn->end);
02349 (*len)++;
02350 }
02351 }
02352 break;
02353
02354 case NT_QTFR:
02355 {
02356 QtfrNode* qn = NQTFR(node);
02357 if (qn->lower == qn->upper) {
02358 r = get_char_length_tree1(qn->target, reg, &tlen, level);
02359 if (r == 0)
02360 *len = (int)distance_multiply(tlen, qn->lower);
02361 }
02362 else
02363 r = GET_CHAR_LEN_VARLEN;
02364 }
02365 break;
02366
02367 #ifdef USE_SUBEXP_CALL
02368 case NT_CALL:
02369 if (! IS_CALL_RECURSION(NCALL(node)))
02370 r = get_char_length_tree1(NCALL(node)->target, reg, len, level);
02371 else
02372 r = GET_CHAR_LEN_VARLEN;
02373 break;
02374 #endif
02375
02376 case NT_CTYPE:
02377 *len = 1;
02378 break;
02379
02380 case NT_CCLASS:
02381 case NT_CANY:
02382 *len = 1;
02383 break;
02384
02385 case NT_ENCLOSE:
02386 {
02387 EncloseNode* en = NENCLOSE(node);
02388 switch (en->type) {
02389 case ENCLOSE_MEMORY:
02390 #ifdef USE_SUBEXP_CALL
02391 if (IS_ENCLOSE_CLEN_FIXED(en))
02392 *len = en->char_len;
02393 else {
02394 r = get_char_length_tree1(en->target, reg, len, level);
02395 if (r == 0) {
02396 en->char_len = *len;
02397 SET_ENCLOSE_STATUS(node, NST_CLEN_FIXED);
02398 }
02399 }
02400 break;
02401 #endif
02402 case ENCLOSE_OPTION:
02403 case ENCLOSE_STOP_BACKTRACK:
02404 r = get_char_length_tree1(en->target, reg, len, level);
02405 break;
02406 default:
02407 break;
02408 }
02409 }
02410 break;
02411
02412 case NT_ANCHOR:
02413 break;
02414
02415 default:
02416 r = GET_CHAR_LEN_VARLEN;
02417 break;
02418 }
02419
02420 return r;
02421 }
02422
02423 static int
02424 get_char_length_tree(Node* node, regex_t* reg, int* len)
02425 {
02426 return get_char_length_tree1(node, reg, len, 0);
02427 }
02428
02429
02430 static int
02431 is_not_included(Node* x, Node* y, regex_t* reg)
02432 {
02433 int i;
02434 OnigDistance len;
02435 OnigCodePoint code;
02436 UChar *p, c;
02437 int ytype;
02438
02439 retry:
02440 ytype = NTYPE(y);
02441 switch (NTYPE(x)) {
02442 case NT_CTYPE:
02443 {
02444 switch (ytype) {
02445 case NT_CTYPE:
02446 if (NCTYPE(y)->ctype == NCTYPE(x)->ctype &&
02447 NCTYPE(y)->not != NCTYPE(x)->not)
02448 return 1;
02449 else
02450 return 0;
02451 break;
02452
02453 case NT_CCLASS:
02454 swap:
02455 {
02456 Node* tmp;
02457 tmp = x; x = y; y = tmp;
02458 goto retry;
02459 }
02460 break;
02461
02462 case NT_STR:
02463 goto swap;
02464 break;
02465
02466 default:
02467 break;
02468 }
02469 }
02470 break;
02471
02472 case NT_CCLASS:
02473 {
02474 CClassNode* xc = NCCLASS(x);
02475 switch (ytype) {
02476 case NT_CTYPE:
02477 switch (NCTYPE(y)->ctype) {
02478 case ONIGENC_CTYPE_WORD:
02479 if (NCTYPE(y)->not == 0) {
02480 if (IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) {
02481 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
02482 if (BITSET_AT(xc->bs, i)) {
02483 if (IS_CODE_SB_WORD(reg->enc, i)) return 0;
02484 }
02485 }
02486 return 1;
02487 }
02488 return 0;
02489 }
02490 else {
02491 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
02492 if (! IS_CODE_SB_WORD(reg->enc, i)) {
02493 if (!IS_NCCLASS_NOT(xc)) {
02494 if (BITSET_AT(xc->bs, i))
02495 return 0;
02496 }
02497 else {
02498 if (! BITSET_AT(xc->bs, i))
02499 return 0;
02500 }
02501 }
02502 }
02503 return 1;
02504 }
02505 break;
02506
02507 default:
02508 break;
02509 }
02510 break;
02511
02512 case NT_CCLASS:
02513 {
02514 int v;
02515 CClassNode* yc = NCCLASS(y);
02516
02517 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
02518 v = BITSET_AT(xc->bs, i);
02519 if ((v != 0 && !IS_NCCLASS_NOT(xc)) ||
02520 (v == 0 && IS_NCCLASS_NOT(xc))) {
02521 v = BITSET_AT(yc->bs, i);
02522 if ((v != 0 && !IS_NCCLASS_NOT(yc)) ||
02523 (v == 0 && IS_NCCLASS_NOT(yc)))
02524 return 0;
02525 }
02526 }
02527 if ((IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) ||
02528 (IS_NULL(yc->mbuf) && !IS_NCCLASS_NOT(yc)))
02529 return 1;
02530 return 0;
02531 }
02532 break;
02533
02534 case NT_STR:
02535 goto swap;
02536 break;
02537
02538 default:
02539 break;
02540 }
02541 }
02542 break;
02543
02544 case NT_STR:
02545 {
02546 StrNode* xs = NSTR(x);
02547 if (NSTRING_LEN(x) == 0)
02548 break;
02549
02550 c = *(xs->s);
02551 switch (ytype) {
02552 case NT_CTYPE:
02553 switch (NCTYPE(y)->ctype) {
02554 case ONIGENC_CTYPE_WORD:
02555 if (ONIGENC_IS_MBC_WORD(reg->enc, xs->s, xs->end))
02556 return NCTYPE(y)->not;
02557 else
02558 return !(NCTYPE(y)->not);
02559 break;
02560 default:
02561 break;
02562 }
02563 break;
02564
02565 case NT_CCLASS:
02566 {
02567 CClassNode* cc = NCCLASS(y);
02568
02569 code = ONIGENC_MBC_TO_CODE(reg->enc, xs->s,
02570 xs->s + ONIGENC_MBC_MAXLEN(reg->enc));
02571 return (onig_is_code_in_cc(reg->enc, code, cc) != 0 ? 0 : 1);
02572 }
02573 break;
02574
02575 case NT_STR:
02576 {
02577 UChar *q;
02578 StrNode* ys = NSTR(y);
02579 len = NSTRING_LEN(x);
02580 if (len > NSTRING_LEN(y)) len = NSTRING_LEN(y);
02581 if (NSTRING_IS_AMBIG(x) || NSTRING_IS_AMBIG(y)) {
02582
02583 return 0;
02584 }
02585 else {
02586 for (i = 0, p = ys->s, q = xs->s; (OnigDistance)i < len; i++, p++, q++) {
02587 if (*p != *q) return 1;
02588 }
02589 }
02590 }
02591 break;
02592
02593 default:
02594 break;
02595 }
02596 }
02597 break;
02598
02599 default:
02600 break;
02601 }
02602
02603 return 0;
02604 }
02605
02606 static Node*
02607 get_head_value_node(Node* node, int exact, regex_t* reg)
02608 {
02609 Node* n = NULL_NODE;
02610
02611 switch (NTYPE(node)) {
02612 case NT_BREF:
02613 case NT_ALT:
02614 case NT_CANY:
02615 #ifdef USE_SUBEXP_CALL
02616 case NT_CALL:
02617 #endif
02618 break;
02619
02620 case NT_CTYPE:
02621 case NT_CCLASS:
02622 if (exact == 0) {
02623 n = node;
02624 }
02625 break;
02626
02627 case NT_LIST:
02628 n = get_head_value_node(NCAR(node), exact, reg);
02629 break;
02630
02631 case NT_STR:
02632 {
02633 StrNode* sn = NSTR(node);
02634
02635 if (sn->end <= sn->s)
02636 break;
02637
02638 if (exact != 0 &&
02639 !NSTRING_IS_RAW(node) && IS_IGNORECASE(reg->options)) {
02640 }
02641 else {
02642 n = node;
02643 }
02644 }
02645 break;
02646
02647 case NT_QTFR:
02648 {
02649 QtfrNode* qn = NQTFR(node);
02650 if (qn->lower > 0) {
02651 if (IS_NOT_NULL(qn->head_exact))
02652 n = qn->head_exact;
02653 else
02654 n = get_head_value_node(qn->target, exact, reg);
02655 }
02656 }
02657 break;
02658
02659 case NT_ENCLOSE:
02660 {
02661 EncloseNode* en = NENCLOSE(node);
02662 switch (en->type) {
02663 case ENCLOSE_OPTION:
02664 {
02665 OnigOptionType options = reg->options;
02666
02667 reg->options = NENCLOSE(node)->option;
02668 n = get_head_value_node(NENCLOSE(node)->target, exact, reg);
02669 reg->options = options;
02670 }
02671 break;
02672
02673 case ENCLOSE_MEMORY:
02674 case ENCLOSE_STOP_BACKTRACK:
02675 n = get_head_value_node(en->target, exact, reg);
02676 break;
02677 }
02678 }
02679 break;
02680
02681 case NT_ANCHOR:
02682 if (NANCHOR(node)->type == ANCHOR_PREC_READ)
02683 n = get_head_value_node(NANCHOR(node)->target, exact, reg);
02684 break;
02685
02686 default:
02687 break;
02688 }
02689
02690 return n;
02691 }
02692
02693 static int
02694 check_type_tree(Node* node, int type_mask, int enclose_mask, int anchor_mask)
02695 {
02696 int type, r = 0;
02697
02698 type = NTYPE(node);
02699 if ((NTYPE2BIT(type) & type_mask) == 0)
02700 return 1;
02701
02702 switch (type) {
02703 case NT_LIST:
02704 case NT_ALT:
02705 do {
02706 r = check_type_tree(NCAR(node), type_mask, enclose_mask,
02707 anchor_mask);
02708 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
02709 break;
02710
02711 case NT_QTFR:
02712 r = check_type_tree(NQTFR(node)->target, type_mask, enclose_mask,
02713 anchor_mask);
02714 break;
02715
02716 case NT_ENCLOSE:
02717 {
02718 EncloseNode* en = NENCLOSE(node);
02719 if ((en->type & enclose_mask) == 0)
02720 return 1;
02721
02722 r = check_type_tree(en->target, type_mask, enclose_mask, anchor_mask);
02723 }
02724 break;
02725
02726 case NT_ANCHOR:
02727 type = NANCHOR(node)->type;
02728 if ((type & anchor_mask) == 0)
02729 return 1;
02730
02731 if (NANCHOR(node)->target)
02732 r = check_type_tree(NANCHOR(node)->target,
02733 type_mask, enclose_mask, anchor_mask);
02734 break;
02735
02736 default:
02737 break;
02738 }
02739 return r;
02740 }
02741
02742 #ifdef USE_SUBEXP_CALL
02743
02744 #define RECURSION_EXIST 1
02745 #define RECURSION_INFINITE 2
02746
02747 static int
02748 subexp_inf_recursive_check(Node* node, ScanEnv* env, int head)
02749 {
02750 int type;
02751 int r = 0;
02752
02753 type = NTYPE(node);
02754 switch (type) {
02755 case NT_LIST:
02756 {
02757 Node *x;
02758 OnigDistance min;
02759 int ret;
02760
02761 x = node;
02762 do {
02763 ret = subexp_inf_recursive_check(NCAR(x), env, head);
02764 if (ret < 0 || ret == RECURSION_INFINITE) return ret;
02765 r |= ret;
02766 if (head) {
02767 ret = get_min_match_length(NCAR(x), &min, env);
02768 if (ret != 0) return ret;
02769 if (min != 0) head = 0;
02770 }
02771 } while (IS_NOT_NULL(x = NCDR(x)));
02772 }
02773 break;
02774
02775 case NT_ALT:
02776 {
02777 int ret;
02778 r = RECURSION_EXIST;
02779 do {
02780 ret = subexp_inf_recursive_check(NCAR(node), env, head);
02781 if (ret < 0 || ret == RECURSION_INFINITE) return ret;
02782 r &= ret;
02783 } while (IS_NOT_NULL(node = NCDR(node)));
02784 }
02785 break;
02786
02787 case NT_QTFR:
02788 r = subexp_inf_recursive_check(NQTFR(node)->target, env, head);
02789 if (r == RECURSION_EXIST) {
02790 if (NQTFR(node)->lower == 0) r = 0;
02791 }
02792 break;
02793
02794 case NT_ANCHOR:
02795 {
02796 AnchorNode* an = NANCHOR(node);
02797 switch (an->type) {
02798 case ANCHOR_PREC_READ:
02799 case ANCHOR_PREC_READ_NOT:
02800 case ANCHOR_LOOK_BEHIND:
02801 case ANCHOR_LOOK_BEHIND_NOT:
02802 r = subexp_inf_recursive_check(an->target, env, head);
02803 break;
02804 }
02805 }
02806 break;
02807
02808 case NT_CALL:
02809 r = subexp_inf_recursive_check(NCALL(node)->target, env, head);
02810 break;
02811
02812 case NT_ENCLOSE:
02813 if (IS_ENCLOSE_MARK2(NENCLOSE(node)))
02814 return 0;
02815 else if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
02816 return (head == 0 ? RECURSION_EXIST : RECURSION_INFINITE);
02817 else {
02818 SET_ENCLOSE_STATUS(node, NST_MARK2);
02819 r = subexp_inf_recursive_check(NENCLOSE(node)->target, env, head);
02820 CLEAR_ENCLOSE_STATUS(node, NST_MARK2);
02821 }
02822 break;
02823
02824 default:
02825 break;
02826 }
02827
02828 return r;
02829 }
02830
02831 static int
02832 subexp_inf_recursive_check_trav(Node* node, ScanEnv* env)
02833 {
02834 int type;
02835 int r = 0;
02836
02837 type = NTYPE(node);
02838 switch (type) {
02839 case NT_LIST:
02840 case NT_ALT:
02841 do {
02842 r = subexp_inf_recursive_check_trav(NCAR(node), env);
02843 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
02844 break;
02845
02846 case NT_QTFR:
02847 r = subexp_inf_recursive_check_trav(NQTFR(node)->target, env);
02848 break;
02849
02850 case NT_ANCHOR:
02851 {
02852 AnchorNode* an = NANCHOR(node);
02853 switch (an->type) {
02854 case ANCHOR_PREC_READ:
02855 case ANCHOR_PREC_READ_NOT:
02856 case ANCHOR_LOOK_BEHIND:
02857 case ANCHOR_LOOK_BEHIND_NOT:
02858 r = subexp_inf_recursive_check_trav(an->target, env);
02859 break;
02860 }
02861 }
02862 break;
02863
02864 case NT_ENCLOSE:
02865 {
02866 EncloseNode* en = NENCLOSE(node);
02867
02868 if (IS_ENCLOSE_RECURSION(en)) {
02869 SET_ENCLOSE_STATUS(node, NST_MARK1);
02870 r = subexp_inf_recursive_check(en->target, env, 1);
02871 if (r > 0) return ONIGERR_NEVER_ENDING_RECURSION;
02872 CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
02873 }
02874 r = subexp_inf_recursive_check_trav(en->target, env);
02875 }
02876
02877 break;
02878
02879 default:
02880 break;
02881 }
02882
02883 return r;
02884 }
02885
02886 static int
02887 subexp_recursive_check(Node* node)
02888 {
02889 int r = 0;
02890
02891 switch (NTYPE(node)) {
02892 case NT_LIST:
02893 case NT_ALT:
02894 do {
02895 r |= subexp_recursive_check(NCAR(node));
02896 } while (IS_NOT_NULL(node = NCDR(node)));
02897 break;
02898
02899 case NT_QTFR:
02900 r = subexp_recursive_check(NQTFR(node)->target);
02901 break;
02902
02903 case NT_ANCHOR:
02904 {
02905 AnchorNode* an = NANCHOR(node);
02906 switch (an->type) {
02907 case ANCHOR_PREC_READ:
02908 case ANCHOR_PREC_READ_NOT:
02909 case ANCHOR_LOOK_BEHIND:
02910 case ANCHOR_LOOK_BEHIND_NOT:
02911 r = subexp_recursive_check(an->target);
02912 break;
02913 }
02914 }
02915 break;
02916
02917 case NT_CALL:
02918 r = subexp_recursive_check(NCALL(node)->target);
02919 if (r != 0) SET_CALL_RECURSION(node);
02920 break;
02921
02922 case NT_ENCLOSE:
02923 if (IS_ENCLOSE_MARK2(NENCLOSE(node)))
02924 return 0;
02925 else if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
02926 return 1;
02927 else {
02928 SET_ENCLOSE_STATUS(node, NST_MARK2);
02929 r = subexp_recursive_check(NENCLOSE(node)->target);
02930 CLEAR_ENCLOSE_STATUS(node, NST_MARK2);
02931 }
02932 break;
02933
02934 default:
02935 break;
02936 }
02937
02938 return r;
02939 }
02940
02941
02942 static int
02943 subexp_recursive_check_trav(Node* node, ScanEnv* env)
02944 {
02945 #define FOUND_CALLED_NODE 1
02946
02947 int type;
02948 int r = 0;
02949
02950 type = NTYPE(node);
02951 switch (type) {
02952 case NT_LIST:
02953 case NT_ALT:
02954 {
02955 int ret;
02956 do {
02957 ret = subexp_recursive_check_trav(NCAR(node), env);
02958 if (ret == FOUND_CALLED_NODE) r = FOUND_CALLED_NODE;
02959 else if (ret < 0) return ret;
02960 } while (IS_NOT_NULL(node = NCDR(node)));
02961 }
02962 break;
02963
02964 case NT_QTFR:
02965 r = subexp_recursive_check_trav(NQTFR(node)->target, env);
02966 if (NQTFR(node)->upper == 0) {
02967 if (r == FOUND_CALLED_NODE)
02968 NQTFR(node)->is_refered = 1;
02969 }
02970 break;
02971
02972 case NT_ANCHOR:
02973 {
02974 AnchorNode* an = NANCHOR(node);
02975 switch (an->type) {
02976 case ANCHOR_PREC_READ:
02977 case ANCHOR_PREC_READ_NOT:
02978 case ANCHOR_LOOK_BEHIND:
02979 case ANCHOR_LOOK_BEHIND_NOT:
02980 r = subexp_recursive_check_trav(an->target, env);
02981 break;
02982 }
02983 }
02984 break;
02985
02986 case NT_ENCLOSE:
02987 {
02988 EncloseNode* en = NENCLOSE(node);
02989
02990 if (! IS_ENCLOSE_RECURSION(en)) {
02991 if (IS_ENCLOSE_CALLED(en)) {
02992 SET_ENCLOSE_STATUS(node, NST_MARK1);
02993 r = subexp_recursive_check(en->target);
02994 if (r != 0) SET_ENCLOSE_STATUS(node, NST_RECURSION);
02995 CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
02996 }
02997 }
02998 r = subexp_recursive_check_trav(en->target, env);
02999 if (IS_ENCLOSE_CALLED(en))
03000 r |= FOUND_CALLED_NODE;
03001 }
03002 break;
03003
03004 default:
03005 break;
03006 }
03007
03008 return r;
03009 }
03010
03011 static int
03012 setup_subexp_call(Node* node, ScanEnv* env)
03013 {
03014 int type;
03015 int r = 0;
03016
03017 type = NTYPE(node);
03018 switch (type) {
03019 case NT_LIST:
03020 do {
03021 r = setup_subexp_call(NCAR(node), env);
03022 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
03023 break;
03024
03025 case NT_ALT:
03026 do {
03027 r = setup_subexp_call(NCAR(node), env);
03028 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
03029 break;
03030
03031 case NT_QTFR:
03032 r = setup_subexp_call(NQTFR(node)->target, env);
03033 break;
03034 case NT_ENCLOSE:
03035 r = setup_subexp_call(NENCLOSE(node)->target, env);
03036 break;
03037
03038 case NT_CALL:
03039 {
03040 CallNode* cn = NCALL(node);
03041 Node** nodes = SCANENV_MEM_NODES(env);
03042
03043 if (cn->group_num != 0) {
03044 int gnum = cn->group_num;
03045
03046 #ifdef USE_NAMED_GROUP
03047 if (env->num_named > 0 &&
03048 IS_SYNTAX_BV(env->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
03049 !ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_CAPTURE_GROUP)) {
03050 return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
03051 }
03052 #endif
03053 if (gnum > env->num_mem) {
03054 onig_scan_env_set_error_string(env,
03055 ONIGERR_UNDEFINED_GROUP_REFERENCE, cn->name, cn->name_end);
03056 return ONIGERR_UNDEFINED_GROUP_REFERENCE;
03057 }
03058
03059 #ifdef USE_NAMED_GROUP
03060 set_call_attr:
03061 #endif
03062 cn->target = nodes[cn->group_num];
03063 if (IS_NULL(cn->target)) {
03064 onig_scan_env_set_error_string(env,
03065 ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end);
03066 return ONIGERR_UNDEFINED_NAME_REFERENCE;
03067 }
03068 SET_ENCLOSE_STATUS(cn->target, NST_CALLED);
03069 BIT_STATUS_ON_AT(env->bt_mem_start, cn->group_num);
03070 cn->unset_addr_list = env->unset_addr_list;
03071 }
03072 #ifdef USE_NAMED_GROUP
03073 else {
03074 int *refs;
03075
03076 int n = onig_name_to_group_numbers(env->reg, cn->name, cn->name_end,
03077 &refs);
03078 if (n <= 0) {
03079 onig_scan_env_set_error_string(env,
03080 ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end);
03081 return ONIGERR_UNDEFINED_NAME_REFERENCE;
03082 }
03083 else if (n > 1) {
03084 onig_scan_env_set_error_string(env,
03085 ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL, cn->name, cn->name_end);
03086 return ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL;
03087 }
03088 else {
03089 cn->group_num = refs[0];
03090 goto set_call_attr;
03091 }
03092 }
03093 #endif
03094 }
03095 break;
03096
03097 case NT_ANCHOR:
03098 {
03099 AnchorNode* an = NANCHOR(node);
03100
03101 switch (an->type) {
03102 case ANCHOR_PREC_READ:
03103 case ANCHOR_PREC_READ_NOT:
03104 case ANCHOR_LOOK_BEHIND:
03105 case ANCHOR_LOOK_BEHIND_NOT:
03106 r = setup_subexp_call(an->target, env);
03107 break;
03108 }
03109 }
03110 break;
03111
03112 default:
03113 break;
03114 }
03115
03116 return r;
03117 }
03118 #endif
03119
03120
03121
03122
03123
03124 static int
03125 divide_look_behind_alternatives(Node* node)
03126 {
03127 Node *head, *np, *insert_node;
03128 AnchorNode* an = NANCHOR(node);
03129 int anc_type = an->type;
03130
03131 head = an->target;
03132 np = NCAR(head);
03133 swap_node(node, head);
03134 NCAR(node) = head;
03135 NANCHOR(head)->target = np;
03136
03137 np = node;
03138 while ((np = NCDR(np)) != NULL_NODE) {
03139 insert_node = onig_node_new_anchor(anc_type);
03140 CHECK_NULL_RETURN_MEMERR(insert_node);
03141 NANCHOR(insert_node)->target = NCAR(np);
03142 NCAR(np) = insert_node;
03143 }
03144
03145 if (anc_type == ANCHOR_LOOK_BEHIND_NOT) {
03146 np = node;
03147 do {
03148 SET_NTYPE(np, NT_LIST);
03149 } while ((np = NCDR(np)) != NULL_NODE);
03150 }
03151 return 0;
03152 }
03153
03154 static int
03155 setup_look_behind(Node* node, regex_t* reg, ScanEnv* env)
03156 {
03157 int r, len;
03158 AnchorNode* an = NANCHOR(node);
03159
03160 r = get_char_length_tree(an->target, reg, &len);
03161 if (r == 0)
03162 an->char_len = len;
03163 else if (r == GET_CHAR_LEN_VARLEN)
03164 r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
03165 else if (r == GET_CHAR_LEN_TOP_ALT_VARLEN) {
03166 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND))
03167 r = divide_look_behind_alternatives(node);
03168 else
03169 r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
03170 }
03171
03172 return r;
03173 }
03174
03175 static int
03176 next_setup(Node* node, Node* next_node, regex_t* reg)
03177 {
03178 int type;
03179
03180 retry:
03181 type = NTYPE(node);
03182 if (type == NT_QTFR) {
03183 QtfrNode* qn = NQTFR(node);
03184 if (qn->greedy && IS_REPEAT_INFINITE(qn->upper)) {
03185 #ifdef USE_QTFR_PEEK_NEXT
03186 Node* n = get_head_value_node(next_node, 1, reg);
03187
03188 if (IS_NOT_NULL(n) && NSTR(n)->s[0] != '\0') {
03189 qn->next_head_exact = n;
03190 }
03191 #endif
03192
03193 if (qn->lower <= 1) {
03194 int ttype = NTYPE(qn->target);
03195 if (IS_NODE_TYPE_SIMPLE(ttype)) {
03196 Node *x, *y;
03197 x = get_head_value_node(qn->target, 0, reg);
03198 if (IS_NOT_NULL(x)) {
03199 y = get_head_value_node(next_node, 0, reg);
03200 if (IS_NOT_NULL(y) && is_not_included(x, y, reg)) {
03201 Node* en = onig_node_new_enclose(ENCLOSE_STOP_BACKTRACK);
03202 CHECK_NULL_RETURN_MEMERR(en);
03203 SET_ENCLOSE_STATUS(en, NST_STOP_BT_SIMPLE_REPEAT);
03204 swap_node(node, en);
03205 NENCLOSE(node)->target = en;
03206 }
03207 }
03208 }
03209 }
03210 }
03211 }
03212 else if (type == NT_ENCLOSE) {
03213 EncloseNode* en = NENCLOSE(node);
03214 if (en->type == ENCLOSE_MEMORY) {
03215 node = en->target;
03216 goto retry;
03217 }
03218 }
03219 return 0;
03220 }
03221
03222
03223 static int
03224 update_string_node_case_fold(regex_t* reg, Node *node)
03225 {
03226 UChar *p, *q, *end, buf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
03227 UChar *sbuf, *ebuf, *sp;
03228 int r, i, len;
03229 OnigDistance sbuf_size;
03230 StrNode* sn = NSTR(node);
03231
03232 end = sn->end;
03233 sbuf_size = (end - sn->s) * 2;
03234 sbuf = (UChar* )xmalloc(sbuf_size);
03235 CHECK_NULL_RETURN_MEMERR(sbuf);
03236 ebuf = sbuf + sbuf_size;
03237
03238 sp = sbuf;
03239 p = sn->s;
03240 while (p < end) {
03241 len = ONIGENC_MBC_CASE_FOLD(reg->enc, reg->case_fold_flag, &p, end, buf);
03242 q = buf;
03243 for (i = 0; i < len; i++) {
03244 if (sp >= ebuf) {
03245 sbuf = (UChar* )xrealloc(sbuf, sbuf_size * 2);
03246 CHECK_NULL_RETURN_MEMERR(sbuf);
03247 sp = sbuf + sbuf_size;
03248 sbuf_size *= 2;
03249 ebuf = sbuf + sbuf_size;
03250 }
03251
03252 *sp++ = buf[i];
03253 }
03254 }
03255
03256 r = onig_node_str_set(node, sbuf, sp);
03257 if (r != 0) {
03258 xfree(sbuf);
03259 return r;
03260 }
03261
03262 xfree(sbuf);
03263 return 0;
03264 }
03265
03266 static int
03267 expand_case_fold_make_rem_string(Node** rnode, UChar *s, UChar *end,
03268 regex_t* reg)
03269 {
03270 int r;
03271 Node *node;
03272
03273 node = onig_node_new_str(s, end);
03274 if (IS_NULL(node)) return ONIGERR_MEMORY;
03275
03276 r = update_string_node_case_fold(reg, node);
03277 if (r != 0) {
03278 onig_node_free(node);
03279 return r;
03280 }
03281
03282 NSTRING_SET_AMBIG(node);
03283 NSTRING_SET_DONT_GET_OPT_INFO(node);
03284 *rnode = node;
03285 return 0;
03286 }
03287
03288 static int
03289 expand_case_fold_string_alt(int item_num, OnigCaseFoldCodeItem items[],
03290 UChar *p, int slen, UChar *end,
03291 regex_t* reg, Node **rnode)
03292 {
03293 int r, i, j, len, varlen;
03294 Node *anode, *var_anode, *snode, *xnode, *an;
03295 UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
03296
03297 *rnode = var_anode = NULL_NODE;
03298
03299 varlen = 0;
03300 for (i = 0; i < item_num; i++) {
03301 if (items[i].byte_len != slen) {
03302 varlen = 1;
03303 break;
03304 }
03305 }
03306
03307 if (varlen != 0) {
03308 *rnode = var_anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
03309 if (IS_NULL(var_anode)) return ONIGERR_MEMORY;
03310
03311 xnode = onig_node_new_list(NULL, NULL);
03312 if (IS_NULL(xnode)) goto mem_err;
03313 NCAR(var_anode) = xnode;
03314
03315 anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
03316 if (IS_NULL(anode)) goto mem_err;
03317 NCAR(xnode) = anode;
03318 }
03319 else {
03320 *rnode = anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
03321 if (IS_NULL(anode)) return ONIGERR_MEMORY;
03322 }
03323
03324 snode = onig_node_new_str(p, p + slen);
03325 if (IS_NULL(snode)) goto mem_err;
03326
03327 NCAR(anode) = snode;
03328
03329 for (i = 0; i < item_num; i++) {
03330 snode = onig_node_new_str(NULL, NULL);
03331 if (IS_NULL(snode)) goto mem_err;
03332
03333 for (j = 0; j < items[i].code_len; j++) {
03334 len = ONIGENC_CODE_TO_MBC(reg->enc, items[i].code[j], buf);
03335 if (len < 0) {
03336 r = len;
03337 goto mem_err2;
03338 }
03339
03340 r = onig_node_str_cat(snode, buf, buf + len);
03341 if (r != 0) goto mem_err2;
03342 }
03343
03344 an = onig_node_new_alt(NULL_NODE, NULL_NODE);
03345 if (IS_NULL(an)) {
03346 goto mem_err2;
03347 }
03348
03349 if (items[i].byte_len != slen) {
03350 Node *rem;
03351 UChar *q = p + items[i].byte_len;
03352
03353 if (q < end) {
03354 r = expand_case_fold_make_rem_string(&rem, q, end, reg);
03355 if (r != 0) {
03356 onig_node_free(an);
03357 goto mem_err2;
03358 }
03359
03360 xnode = onig_node_list_add(NULL_NODE, snode);
03361 if (IS_NULL(xnode)) {
03362 onig_node_free(an);
03363 onig_node_free(rem);
03364 goto mem_err2;
03365 }
03366 if (IS_NULL(onig_node_list_add(xnode, rem))) {
03367 onig_node_free(an);
03368 onig_node_free(xnode);
03369 onig_node_free(rem);
03370 goto mem_err;
03371 }
03372
03373 NCAR(an) = xnode;
03374 }
03375 else {
03376 NCAR(an) = snode;
03377 }
03378
03379 NCDR(var_anode) = an;
03380 var_anode = an;
03381 }
03382 else {
03383 NCAR(an) = snode;
03384 NCDR(anode) = an;
03385 anode = an;
03386 }
03387 }
03388
03389 return varlen;
03390
03391 mem_err2:
03392 onig_node_free(snode);
03393
03394 mem_err:
03395 onig_node_free(*rnode);
03396
03397 return ONIGERR_MEMORY;
03398 }
03399
03400 static int
03401 expand_case_fold_string(Node* node, regex_t* reg)
03402 {
03403 #define THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION 8
03404
03405 int r, n, len, alt_num;
03406 UChar *start, *end, *p;
03407 Node *top_root, *root, *snode, *prev_node;
03408 OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
03409 StrNode* sn = NSTR(node);
03410
03411 if (NSTRING_IS_AMBIG(node)) return 0;
03412
03413 start = sn->s;
03414 end = sn->end;
03415 if (start >= end) return 0;
03416
03417 r = 0;
03418 top_root = root = prev_node = snode = NULL_NODE;
03419 alt_num = 1;
03420 p = start;
03421 while (p < end) {
03422 n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(reg->enc, reg->case_fold_flag,
03423 p, end, items);
03424 if (n < 0) {
03425 r = n;
03426 goto err;
03427 }
03428
03429 len = enclen(reg->enc, p, end);
03430
03431 if (n == 0) {
03432 if (IS_NULL(snode)) {
03433 if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {
03434 top_root = root = onig_node_list_add(NULL_NODE, prev_node);
03435 if (IS_NULL(root)) {
03436 onig_node_free(prev_node);
03437 goto mem_err;
03438 }
03439 }
03440
03441 prev_node = snode = onig_node_new_str(NULL, NULL);
03442 if (IS_NULL(snode)) goto mem_err;
03443 if (IS_NOT_NULL(root)) {
03444 if (IS_NULL(onig_node_list_add(root, snode))) {
03445 onig_node_free(snode);
03446 goto mem_err;
03447 }
03448 }
03449 }
03450
03451 r = onig_node_str_cat(snode, p, p + len);
03452 if (r != 0) goto err;
03453 }
03454 else {
03455 alt_num *= (n + 1);
03456 if (alt_num > THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION) break;
03457
03458 if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {
03459 top_root = root = onig_node_list_add(NULL_NODE, prev_node);
03460 if (IS_NULL(root)) {
03461 onig_node_free(prev_node);
03462 goto mem_err;
03463 }
03464 }
03465
03466 r = expand_case_fold_string_alt(n, items, p, len, end, reg, &prev_node);
03467 if (r < 0) goto mem_err;
03468 if (r == 1) {
03469 if (IS_NULL(root)) {
03470 top_root = prev_node;
03471 }
03472 else {
03473 if (IS_NULL(onig_node_list_add(root, prev_node))) {
03474 onig_node_free(prev_node);
03475 goto mem_err;
03476 }
03477 }
03478
03479 root = NCAR(prev_node);
03480 }
03481 else {
03482 if (IS_NOT_NULL(root)) {
03483 if (IS_NULL(onig_node_list_add(root, prev_node))) {
03484 onig_node_free(prev_node);
03485 goto mem_err;
03486 }
03487 }
03488 }
03489
03490 snode = NULL_NODE;
03491 }
03492
03493 p += len;
03494 }
03495
03496 if (p < end) {
03497 Node *srem;
03498
03499 r = expand_case_fold_make_rem_string(&srem, p, end, reg);
03500 if (r != 0) goto mem_err;
03501
03502 if (IS_NOT_NULL(prev_node) && IS_NULL(root)) {
03503 top_root = root = onig_node_list_add(NULL_NODE, prev_node);
03504 if (IS_NULL(root)) {
03505 onig_node_free(srem);
03506 onig_node_free(prev_node);
03507 goto mem_err;
03508 }
03509 }
03510
03511 if (IS_NULL(root)) {
03512 prev_node = srem;
03513 }
03514 else {
03515 if (IS_NULL(onig_node_list_add(root, srem))) {
03516 onig_node_free(srem);
03517 goto mem_err;
03518 }
03519 }
03520 }
03521
03522
03523 top_root = (IS_NOT_NULL(top_root) ? top_root : prev_node);
03524 swap_node(node, top_root);
03525 onig_node_free(top_root);
03526 return 0;
03527
03528 mem_err:
03529 r = ONIGERR_MEMORY;
03530
03531 err:
03532 onig_node_free(top_root);
03533 return r;
03534 }
03535
03536
03537 #ifdef USE_COMBINATION_EXPLOSION_CHECK
03538
03539 #define CEC_THRES_NUM_BIG_REPEAT 512
03540 #define CEC_INFINITE_NUM 0x7fffffff
03541
03542 #define CEC_IN_INFINITE_REPEAT (1<<0)
03543 #define CEC_IN_FINITE_REPEAT (1<<1)
03544 #define CEC_CONT_BIG_REPEAT (1<<2)
03545
03546 static int
03547 setup_comb_exp_check(Node* node, int state, ScanEnv* env)
03548 {
03549 int type;
03550 int r = state;
03551
03552 type = NTYPE(node);
03553 switch (type) {
03554 case NT_LIST:
03555 {
03556 Node* prev = NULL_NODE;
03557 do {
03558 r = setup_comb_exp_check(NCAR(node), r, env);
03559 prev = NCAR(node);
03560 } while (r >= 0 && IS_NOT_NULL(node = NCDR(node)));
03561 }
03562 break;
03563
03564 case NT_ALT:
03565 {
03566 int ret;
03567 do {
03568 ret = setup_comb_exp_check(NCAR(node), state, env);
03569 r |= ret;
03570 } while (ret >= 0 && IS_NOT_NULL(node = NCDR(node)));
03571 }
03572 break;
03573
03574 case NT_QTFR:
03575 {
03576 int child_state = state;
03577 int add_state = 0;
03578 QtfrNode* qn = NQTFR(node);
03579 Node* target = qn->target;
03580 int var_num;
03581
03582 if (! IS_REPEAT_INFINITE(qn->upper)) {
03583 if (qn->upper > 1) {
03584
03585 child_state |= CEC_IN_FINITE_REPEAT;
03586
03587
03588 if (env->backrefed_mem == 0) {
03589 if (NTYPE(qn->target) == NT_ENCLOSE) {
03590 EncloseNode* en = NENCLOSE(qn->target);
03591 if (en->type == ENCLOSE_MEMORY) {
03592 if (NTYPE(en->target) == NT_QTFR) {
03593 QtfrNode* q = NQTFR(en->target);
03594 if (IS_REPEAT_INFINITE(q->upper)
03595 && q->greedy == qn->greedy) {
03596 qn->upper = (qn->lower == 0 ? 1 : qn->lower);
03597 if (qn->upper == 1)
03598 child_state = state;
03599 }
03600 }
03601 }
03602 }
03603 }
03604 }
03605 }
03606
03607 if (state & CEC_IN_FINITE_REPEAT) {
03608 qn->comb_exp_check_num = -1;
03609 }
03610 else {
03611 if (IS_REPEAT_INFINITE(qn->upper)) {
03612 var_num = CEC_INFINITE_NUM;
03613 child_state |= CEC_IN_INFINITE_REPEAT;
03614 }
03615 else {
03616 var_num = qn->upper - qn->lower;
03617 }
03618
03619 if (var_num >= CEC_THRES_NUM_BIG_REPEAT)
03620 add_state |= CEC_CONT_BIG_REPEAT;
03621
03622 if (((state & CEC_IN_INFINITE_REPEAT) != 0 && var_num != 0) ||
03623 ((state & CEC_CONT_BIG_REPEAT) != 0 &&
03624 var_num >= CEC_THRES_NUM_BIG_REPEAT)) {
03625 if (qn->comb_exp_check_num == 0) {
03626 env->num_comb_exp_check++;
03627 qn->comb_exp_check_num = env->num_comb_exp_check;
03628 if (env->curr_max_regnum > env->comb_exp_max_regnum)
03629 env->comb_exp_max_regnum = env->curr_max_regnum;
03630 }
03631 }
03632 }
03633
03634 r = setup_comb_exp_check(target, child_state, env);
03635 r |= add_state;
03636 }
03637 break;
03638
03639 case NT_ENCLOSE:
03640 {
03641 EncloseNode* en = NENCLOSE(node);
03642
03643 switch (en->type) {
03644 case ENCLOSE_MEMORY:
03645 {
03646 if (env->curr_max_regnum < en->regnum)
03647 env->curr_max_regnum = en->regnum;
03648
03649 r = setup_comb_exp_check(en->target, state, env);
03650 }
03651 break;
03652
03653 default:
03654 r = setup_comb_exp_check(en->target, state, env);
03655 break;
03656 }
03657 }
03658 break;
03659
03660 #ifdef USE_SUBEXP_CALL
03661 case NT_CALL:
03662 if (IS_CALL_RECURSION(NCALL(node)))
03663 env->has_recursion = 1;
03664 else
03665 r = setup_comb_exp_check(NCALL(node)->target, state, env);
03666 break;
03667 #endif
03668
03669 default:
03670 break;
03671 }
03672
03673 return r;
03674 }
03675 #endif
03676
03677 #define IN_ALT (1<<0)
03678 #define IN_NOT (1<<1)
03679 #define IN_REPEAT (1<<2)
03680 #define IN_VAR_REPEAT (1<<3)
03681
03682
03683
03684
03685
03686
03687
03688
03689
03690 static int
03691 setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
03692 {
03693 int type;
03694 int r = 0;
03695
03696 restart:
03697 type = NTYPE(node);
03698 switch (type) {
03699 case NT_LIST:
03700 {
03701 Node* prev = NULL_NODE;
03702 do {
03703 r = setup_tree(NCAR(node), reg, state, env);
03704 if (IS_NOT_NULL(prev) && r == 0) {
03705 r = next_setup(prev, NCAR(node), reg);
03706 }
03707 prev = NCAR(node);
03708 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
03709 }
03710 break;
03711
03712 case NT_ALT:
03713 do {
03714 r = setup_tree(NCAR(node), reg, (state | IN_ALT), env);
03715 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
03716 break;
03717
03718 case NT_CCLASS:
03719 break;
03720
03721 case NT_STR:
03722 if (IS_IGNORECASE(reg->options) && !NSTRING_IS_RAW(node)) {
03723 r = expand_case_fold_string(node, reg);
03724 }
03725 break;
03726
03727 case NT_CTYPE:
03728 case NT_CANY:
03729 break;
03730
03731 #ifdef USE_SUBEXP_CALL
03732 case NT_CALL:
03733 break;
03734 #endif
03735
03736 case NT_BREF:
03737 {
03738 int i;
03739 int* p;
03740 Node** nodes = SCANENV_MEM_NODES(env);
03741 BRefNode* br = NBREF(node);
03742 p = BACKREFS_P(br);
03743 for (i = 0; i < br->back_num; i++) {
03744 if (p[i] > env->num_mem) return ONIGERR_INVALID_BACKREF;
03745 BIT_STATUS_ON_AT(env->backrefed_mem, p[i]);
03746 BIT_STATUS_ON_AT(env->bt_mem_start, p[i]);
03747 #ifdef USE_BACKREF_WITH_LEVEL
03748 if (IS_BACKREF_NEST_LEVEL(br)) {
03749 BIT_STATUS_ON_AT(env->bt_mem_end, p[i]);
03750 }
03751 #endif
03752 SET_ENCLOSE_STATUS(nodes[p[i]], NST_MEM_BACKREFED);
03753 }
03754 }
03755 break;
03756
03757 case NT_QTFR:
03758 {
03759 OnigDistance d;
03760 QtfrNode* qn = NQTFR(node);
03761 Node* target = qn->target;
03762
03763 if ((state & IN_REPEAT) != 0) {
03764 qn->state |= NST_IN_REPEAT;
03765 }
03766
03767 if (IS_REPEAT_INFINITE(qn->upper) || qn->upper >= 1) {
03768 r = get_min_match_length(target, &d, env);
03769 if (r) break;
03770 if (d == 0) {
03771 qn->target_empty_info = NQ_TARGET_IS_EMPTY;
03772 #ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
03773 r = quantifiers_memory_node_info(target);
03774 if (r < 0) break;
03775 if (r > 0) {
03776 qn->target_empty_info = r;
03777 }
03778 #endif
03779 #if 0
03780 r = get_max_match_length(target, &d, env);
03781 if (r == 0 && d == 0) {
03782
03783 qn->upper = 1;
03784 if (qn->lower > 1) qn->lower = 1;
03785 if (NTYPE(target) == NT_STR) {
03786 qn->upper = qn->lower = 0;
03787 }
03788 }
03789 #endif
03790 }
03791 }
03792
03793 state |= IN_REPEAT;
03794 if (qn->lower != qn->upper)
03795 state |= IN_VAR_REPEAT;
03796 r = setup_tree(target, reg, state, env);
03797 if (r) break;
03798
03799
03800 #define EXPAND_STRING_MAX_LENGTH 100
03801 if (NTYPE(target) == NT_STR) {
03802 if (!IS_REPEAT_INFINITE(qn->lower) && qn->lower == qn->upper &&
03803 qn->lower > 1 && qn->lower <= EXPAND_STRING_MAX_LENGTH) {
03804 OnigDistance len = NSTRING_LEN(target);
03805 StrNode* sn = NSTR(target);
03806
03807 if (len * qn->lower <= EXPAND_STRING_MAX_LENGTH) {
03808 int i, n = qn->lower;
03809 onig_node_conv_to_str_node(node, NSTR(target)->flag);
03810 for (i = 0; i < n; i++) {
03811 r = onig_node_str_cat(node, sn->s, sn->end);
03812 if (r) break;
03813 }
03814 onig_node_free(target);
03815 break;
03816 }
03817 }
03818 }
03819
03820 #ifdef USE_OP_PUSH_OR_JUMP_EXACT
03821 if (qn->greedy && (qn->target_empty_info != 0)) {
03822 if (NTYPE(target) == NT_QTFR) {
03823 QtfrNode* tqn = NQTFR(target);
03824 if (IS_NOT_NULL(tqn->head_exact)) {
03825 qn->head_exact = tqn->head_exact;
03826 tqn->head_exact = NULL;
03827 }
03828 }
03829 else {
03830 qn->head_exact = get_head_value_node(qn->target, 1, reg);
03831 }
03832 }
03833 #endif
03834 }
03835 break;
03836
03837 case NT_ENCLOSE:
03838 {
03839 EncloseNode* en = NENCLOSE(node);
03840
03841 switch (en->type) {
03842 case ENCLOSE_OPTION:
03843 {
03844 OnigOptionType options = reg->options;
03845 reg->options = NENCLOSE(node)->option;
03846 r = setup_tree(NENCLOSE(node)->target, reg, state, env);
03847 reg->options = options;
03848 }
03849 break;
03850
03851 case ENCLOSE_MEMORY:
03852 if ((state & (IN_ALT | IN_NOT | IN_VAR_REPEAT)) != 0) {
03853 BIT_STATUS_ON_AT(env->bt_mem_start, en->regnum);
03854
03855 }
03856 r = setup_tree(en->target, reg, state, env);
03857 break;
03858
03859 case ENCLOSE_STOP_BACKTRACK:
03860 {
03861 Node* target = en->target;
03862 r = setup_tree(target, reg, state, env);
03863 if (NTYPE(target) == NT_QTFR) {
03864 QtfrNode* tqn = NQTFR(target);
03865 if (IS_REPEAT_INFINITE(tqn->upper) && tqn->lower <= 1 &&
03866 tqn->greedy != 0) {
03867 int qtype = NTYPE(tqn->target);
03868 if (IS_NODE_TYPE_SIMPLE(qtype))
03869 SET_ENCLOSE_STATUS(node, NST_STOP_BT_SIMPLE_REPEAT);
03870 }
03871 }
03872 }
03873 break;
03874 }
03875 }
03876 break;
03877
03878 case NT_ANCHOR:
03879 {
03880 AnchorNode* an = NANCHOR(node);
03881
03882 switch (an->type) {
03883 case ANCHOR_PREC_READ:
03884 r = setup_tree(an->target, reg, state, env);
03885 break;
03886 case ANCHOR_PREC_READ_NOT:
03887 r = setup_tree(an->target, reg, (state | IN_NOT), env);
03888 break;
03889
03890
03891 #define ALLOWED_TYPE_IN_LB \
03892 ( BIT_NT_LIST | BIT_NT_ALT | BIT_NT_STR | BIT_NT_CCLASS | BIT_NT_CTYPE | \
03893 BIT_NT_CANY | BIT_NT_ANCHOR | BIT_NT_ENCLOSE | BIT_NT_QTFR | BIT_NT_CALL )
03894
03895 #define ALLOWED_ENCLOSE_IN_LB ( ENCLOSE_MEMORY )
03896 #define ALLOWED_ENCLOSE_IN_LB_NOT 0
03897
03898 #define ALLOWED_ANCHOR_IN_LB \
03899 ( ANCHOR_LOOK_BEHIND | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION )
03900 #define ALLOWED_ANCHOR_IN_LB_NOT \
03901 ( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION )
03902
03903 case ANCHOR_LOOK_BEHIND:
03904 {
03905 r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB,
03906 ALLOWED_ENCLOSE_IN_LB, ALLOWED_ANCHOR_IN_LB);
03907 if (r < 0) return r;
03908 if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
03909 r = setup_look_behind(node, reg, env);
03910 if (r != 0) return r;
03911 if (NTYPE(node) != NT_ANCHOR) goto restart;
03912 r = setup_tree(an->target, reg, state, env);
03913 }
03914 break;
03915
03916 case ANCHOR_LOOK_BEHIND_NOT:
03917 {
03918 r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB,
03919 ALLOWED_ENCLOSE_IN_LB_NOT, ALLOWED_ANCHOR_IN_LB_NOT);
03920 if (r < 0) return r;
03921 if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
03922 r = setup_look_behind(node, reg, env);
03923 if (r != 0) return r;
03924 if (NTYPE(node) != NT_ANCHOR) goto restart;
03925 r = setup_tree(an->target, reg, (state | IN_NOT), env);
03926 }
03927 break;
03928 }
03929 }
03930 break;
03931
03932 default:
03933 break;
03934 }
03935
03936 return r;
03937 }
03938
03939
03940 static int
03941 set_bm_skip(UChar* s, UChar* end, OnigEncoding enc ARG_UNUSED,
03942 UChar skip[], int** int_skip)
03943 {
03944 OnigDistance i, len;
03945
03946 len = end - s;
03947 if (len < ONIG_CHAR_TABLE_SIZE) {
03948 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) skip[i] = len;
03949
03950 for (i = 0; i < len - 1; i++)
03951 skip[s[i]] = len - 1 - i;
03952 }
03953 else {
03954 if (IS_NULL(*int_skip)) {
03955 *int_skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE);
03956 if (IS_NULL(*int_skip)) return ONIGERR_MEMORY;
03957 }
03958 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) (*int_skip)[i] = (int)len;
03959
03960 for (i = 0; i < len - 1; i++)
03961 (*int_skip)[s[i]] = (int)(len - 1 - i);
03962 }
03963 return 0;
03964 }
03965
03966 #define OPT_EXACT_MAXLEN 24
03967
03968 typedef struct {
03969 OnigDistance min;
03970 OnigDistance max;
03971 } MinMaxLen;
03972
03973 typedef struct {
03974 MinMaxLen mmd;
03975 OnigEncoding enc;
03976 OnigOptionType options;
03977 OnigCaseFoldType case_fold_flag;
03978 ScanEnv* scan_env;
03979 } OptEnv;
03980
03981 typedef struct {
03982 int left_anchor;
03983 int right_anchor;
03984 } OptAncInfo;
03985
03986 typedef struct {
03987 MinMaxLen mmd;
03988 OptAncInfo anc;
03989
03990 int reach_end;
03991 int ignore_case;
03992 int len;
03993 UChar s[OPT_EXACT_MAXLEN];
03994 } OptExactInfo;
03995
03996 typedef struct {
03997 MinMaxLen mmd;
03998 OptAncInfo anc;
03999
04000 int value;
04001 UChar map[ONIG_CHAR_TABLE_SIZE];
04002 } OptMapInfo;
04003
04004 typedef struct {
04005 MinMaxLen len;
04006
04007 OptAncInfo anc;
04008 OptExactInfo exb;
04009 OptExactInfo exm;
04010 OptExactInfo expr;
04011
04012 OptMapInfo map;
04013 } NodeOptInfo;
04014
04015
04016 static int
04017 map_position_value(OnigEncoding enc, int i)
04018 {
04019 static const short int ByteValTable[] = {
04020 5, 1, 1, 1, 1, 1, 1, 1, 1, 10, 10, 1, 1, 10, 1, 1,
04021 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
04022 12, 4, 7, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5,
04023 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5,
04024 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
04025 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 6, 5, 5, 5,
04026 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
04027 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 1
04028 };
04029
04030 if (i < (int )(sizeof(ByteValTable)/sizeof(ByteValTable[0]))) {
04031 if (i == 0 && ONIGENC_MBC_MINLEN(enc) > 1)
04032 return 20;
04033 else
04034 return (int )ByteValTable[i];
04035 }
04036 else
04037 return 4;
04038 }
04039
04040 static int
04041 distance_value(MinMaxLen* mm)
04042 {
04043
04044 static const short int dist_vals[] = {
04045 1000, 500, 333, 250, 200, 167, 143, 125, 111, 100,
04046 91, 83, 77, 71, 67, 63, 59, 56, 53, 50,
04047 48, 45, 43, 42, 40, 38, 37, 36, 34, 33,
04048 32, 31, 30, 29, 29, 28, 27, 26, 26, 25,
04049 24, 24, 23, 23, 22, 22, 21, 21, 20, 20,
04050 20, 19, 19, 19, 18, 18, 18, 17, 17, 17,
04051 16, 16, 16, 16, 15, 15, 15, 15, 14, 14,
04052 14, 14, 14, 14, 13, 13, 13, 13, 13, 13,
04053 12, 12, 12, 12, 12, 12, 11, 11, 11, 11,
04054 11, 11, 11, 11, 11, 10, 10, 10, 10, 10
04055 };
04056
04057 OnigDistance d;
04058
04059 if (mm->max == ONIG_INFINITE_DISTANCE) return 0;
04060
04061 d = mm->max - mm->min;
04062 if (d < sizeof(dist_vals)/sizeof(dist_vals[0]))
04063
04064 return (int )dist_vals[d];
04065 else
04066 return 1;
04067 }
04068
04069 static int
04070 comp_distance_value(MinMaxLen* d1, MinMaxLen* d2, int v1, int v2)
04071 {
04072 if (v2 <= 0) return -1;
04073 if (v1 <= 0) return 1;
04074
04075 v1 *= distance_value(d1);
04076 v2 *= distance_value(d2);
04077
04078 if (v2 > v1) return 1;
04079 if (v2 < v1) return -1;
04080
04081 if (d2->min < d1->min) return 1;
04082 if (d2->min > d1->min) return -1;
04083 return 0;
04084 }
04085
04086 static int
04087 is_equal_mml(MinMaxLen* a, MinMaxLen* b)
04088 {
04089 return (a->min == b->min && a->max == b->max) ? 1 : 0;
04090 }
04091
04092
04093 static void
04094 set_mml(MinMaxLen* mml, OnigDistance min, OnigDistance max)
04095 {
04096 mml->min = min;
04097 mml->max = max;
04098 }
04099
04100 static void
04101 clear_mml(MinMaxLen* mml)
04102 {
04103 mml->min = mml->max = 0;
04104 }
04105
04106 static void
04107 copy_mml(MinMaxLen* to, MinMaxLen* from)
04108 {
04109 to->min = from->min;
04110 to->max = from->max;
04111 }
04112
04113 static void
04114 add_mml(MinMaxLen* to, MinMaxLen* from)
04115 {
04116 to->min = distance_add(to->min, from->min);
04117 to->max = distance_add(to->max, from->max);
04118 }
04119
04120 #if 0
04121 static void
04122 add_len_mml(MinMaxLen* to, OnigDistance len)
04123 {
04124 to->min = distance_add(to->min, len);
04125 to->max = distance_add(to->max, len);
04126 }
04127 #endif
04128
04129 static void
04130 alt_merge_mml(MinMaxLen* to, MinMaxLen* from)
04131 {
04132 if (to->min > from->min) to->min = from->min;
04133 if (to->max < from->max) to->max = from->max;
04134 }
04135
04136 static void
04137 copy_opt_env(OptEnv* to, OptEnv* from)
04138 {
04139 *to = *from;
04140 }
04141
04142 static void
04143 clear_opt_anc_info(OptAncInfo* anc)
04144 {
04145 anc->left_anchor = 0;
04146 anc->right_anchor = 0;
04147 }
04148
04149 static void
04150 copy_opt_anc_info(OptAncInfo* to, OptAncInfo* from)
04151 {
04152 *to = *from;
04153 }
04154
04155 static void
04156 concat_opt_anc_info(OptAncInfo* to, OptAncInfo* left, OptAncInfo* right,
04157 OnigDistance left_len, OnigDistance right_len)
04158 {
04159 clear_opt_anc_info(to);
04160
04161 to->left_anchor = left->left_anchor;
04162 if (left_len == 0) {
04163 to->left_anchor |= right->left_anchor;
04164 }
04165
04166 to->right_anchor = right->right_anchor;
04167 if (right_len == 0) {
04168 to->right_anchor |= left->right_anchor;
04169 }
04170 }
04171
04172 static int
04173 is_left_anchor(int anc)
04174 {
04175 if (anc == ANCHOR_END_BUF || anc == ANCHOR_SEMI_END_BUF ||
04176 anc == ANCHOR_END_LINE || anc == ANCHOR_PREC_READ ||
04177 anc == ANCHOR_PREC_READ_NOT)
04178 return 0;
04179
04180 return 1;
04181 }
04182
04183 static int
04184 is_set_opt_anc_info(OptAncInfo* to, int anc)
04185 {
04186 if ((to->left_anchor & anc) != 0) return 1;
04187
04188 return ((to->right_anchor & anc) != 0 ? 1 : 0);
04189 }
04190
04191 static void
04192 add_opt_anc_info(OptAncInfo* to, int anc)
04193 {
04194 if (is_left_anchor(anc))
04195 to->left_anchor |= anc;
04196 else
04197 to->right_anchor |= anc;
04198 }
04199
04200 static void
04201 remove_opt_anc_info(OptAncInfo* to, int anc)
04202 {
04203 if (is_left_anchor(anc))
04204 to->left_anchor &= ~anc;
04205 else
04206 to->right_anchor &= ~anc;
04207 }
04208
04209 static void
04210 alt_merge_opt_anc_info(OptAncInfo* to, OptAncInfo* add)
04211 {
04212 to->left_anchor &= add->left_anchor;
04213 to->right_anchor &= add->right_anchor;
04214 }
04215
04216 static int
04217 is_full_opt_exact_info(OptExactInfo* ex)
04218 {
04219 return (ex->len >= OPT_EXACT_MAXLEN ? 1 : 0);
04220 }
04221
04222 static void
04223 clear_opt_exact_info(OptExactInfo* ex)
04224 {
04225 clear_mml(&ex->mmd);
04226 clear_opt_anc_info(&ex->anc);
04227 ex->reach_end = 0;
04228 ex->ignore_case = 0;
04229 ex->len = 0;
04230 ex->s[0] = '\0';
04231 }
04232
04233 static void
04234 copy_opt_exact_info(OptExactInfo* to, OptExactInfo* from)
04235 {
04236 *to = *from;
04237 }
04238
04239 static void
04240 concat_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OnigEncoding enc)
04241 {
04242 int i, j, len;
04243 UChar *p, *end;
04244 OptAncInfo tanc;
04245
04246 if (! to->ignore_case && add->ignore_case) {
04247 if (to->len >= add->len) return ;
04248
04249 to->ignore_case = 1;
04250 }
04251
04252 p = add->s;
04253 end = p + add->len;
04254 for (i = to->len; p < end; ) {
04255 len = enclen(enc, p, end);
04256 if (i + len > OPT_EXACT_MAXLEN) break;
04257 for (j = 0; j < len && p < end; j++)
04258 to->s[i++] = *p++;
04259 }
04260
04261 to->len = i;
04262 to->reach_end = (p == end ? add->reach_end : 0);
04263
04264 concat_opt_anc_info(&tanc, &to->anc, &add->anc, 1, 1);
04265 if (! to->reach_end) tanc.right_anchor = 0;
04266 copy_opt_anc_info(&to->anc, &tanc);
04267 }
04268
04269 static void
04270 concat_opt_exact_info_str(OptExactInfo* to, UChar* s, UChar* end,
04271 int raw ARG_UNUSED, OnigEncoding enc)
04272 {
04273 int i, j, len;
04274 UChar *p;
04275
04276 for (i = to->len, p = s; p < end && i < OPT_EXACT_MAXLEN; ) {
04277 len = enclen(enc, p, end);
04278 if (i + len > OPT_EXACT_MAXLEN) break;
04279 for (j = 0; j < len && p < end; j++)
04280 to->s[i++] = *p++;
04281 }
04282
04283 to->len = i;
04284 }
04285
04286 static void
04287 alt_merge_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OptEnv* env)
04288 {
04289 int i, j, len;
04290
04291 if (add->len == 0 || to->len == 0) {
04292 clear_opt_exact_info(to);
04293 return ;
04294 }
04295
04296 if (! is_equal_mml(&to->mmd, &add->mmd)) {
04297 clear_opt_exact_info(to);
04298 return ;
04299 }
04300
04301 for (i = 0; i < to->len && i < add->len; ) {
04302 if (to->s[i] != add->s[i]) break;
04303 len = enclen(env->enc, to->s + i, to->s + to->len);
04304
04305 for (j = 1; j < len; j++) {
04306 if (to->s[i+j] != add->s[i+j]) break;
04307 }
04308 if (j < len) break;
04309 i += len;
04310 }
04311
04312 if (! add->reach_end || i < add->len || i < to->len) {
04313 to->reach_end = 0;
04314 }
04315 to->len = i;
04316 to->ignore_case |= add->ignore_case;
04317
04318 alt_merge_opt_anc_info(&to->anc, &add->anc);
04319 if (! to->reach_end) to->anc.right_anchor = 0;
04320 }
04321
04322 static void
04323 select_opt_exact_info(OnigEncoding enc, OptExactInfo* now, OptExactInfo* alt)
04324 {
04325 int v1, v2;
04326
04327 v1 = now->len;
04328 v2 = alt->len;
04329
04330 if (v2 == 0) {
04331 return ;
04332 }
04333 else if (v1 == 0) {
04334 copy_opt_exact_info(now, alt);
04335 return ;
04336 }
04337 else if (v1 <= 2 && v2 <= 2) {
04338
04339 v2 = map_position_value(enc, now->s[0]);
04340 v1 = map_position_value(enc, alt->s[0]);
04341
04342 if (now->len > 1) v1 += 5;
04343 if (alt->len > 1) v2 += 5;
04344 }
04345
04346 if (now->ignore_case == 0) v1 *= 2;
04347 if (alt->ignore_case == 0) v2 *= 2;
04348
04349 if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0)
04350 copy_opt_exact_info(now, alt);
04351 }
04352
04353 static void
04354 clear_opt_map_info(OptMapInfo* map)
04355 {
04356 static const OptMapInfo clean_info = {
04357 {0, 0}, {0, 0}, 0,
04358 {
04359 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04360 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04361 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04362 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04363 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04364 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04365 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04366 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04367 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04368 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04369 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04370 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04371 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04372 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04373 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04374 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
04375 }
04376 };
04377
04378 xmemcpy(map, &clean_info, sizeof(OptMapInfo));
04379 }
04380
04381 static void
04382 copy_opt_map_info(OptMapInfo* to, OptMapInfo* from)
04383 {
04384 *to = *from;
04385 }
04386
04387 static void
04388 add_char_opt_map_info(OptMapInfo* map, UChar c, OnigEncoding enc)
04389 {
04390 if (map->map[c] == 0) {
04391 map->map[c] = 1;
04392 map->value += map_position_value(enc, c);
04393 }
04394 }
04395
04396 static int
04397 add_char_amb_opt_map_info(OptMapInfo* map, UChar* p, UChar* end,
04398 OnigEncoding enc, OnigCaseFoldType case_fold_flag)
04399 {
04400 OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
04401 UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
04402 int i, n;
04403
04404 add_char_opt_map_info(map, p[0], enc);
04405
04406 case_fold_flag = DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag);
04407 n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, case_fold_flag, p, end, items);
04408 if (n < 0) return n;
04409
04410 for (i = 0; i < n; i++) {
04411 ONIGENC_CODE_TO_MBC(enc, items[i].code[0], buf);
04412 add_char_opt_map_info(map, buf[0], enc);
04413 }
04414
04415 return 0;
04416 }
04417
04418 static void
04419 select_opt_map_info(OptMapInfo* now, OptMapInfo* alt)
04420 {
04421 const int z = 1<<15;
04422
04423 int v1, v2;
04424
04425 if (alt->value == 0) return ;
04426 if (now->value == 0) {
04427 copy_opt_map_info(now, alt);
04428 return ;
04429 }
04430
04431 v1 = z / now->value;
04432 v2 = z / alt->value;
04433 if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0)
04434 copy_opt_map_info(now, alt);
04435 }
04436
04437 static int
04438 comp_opt_exact_or_map_info(OptExactInfo* e, OptMapInfo* m)
04439 {
04440 #define COMP_EM_BASE 20
04441 int ve, vm;
04442
04443 if (m->value <= 0) return -1;
04444
04445 ve = COMP_EM_BASE * e->len * (e->ignore_case ? 1 : 2);
04446 vm = COMP_EM_BASE * 5 * 2 / m->value;
04447 return comp_distance_value(&e->mmd, &m->mmd, ve, vm);
04448 }
04449
04450 static void
04451 alt_merge_opt_map_info(OnigEncoding enc, OptMapInfo* to, OptMapInfo* add)
04452 {
04453 int i, val;
04454
04455
04456 if (to->value == 0) return ;
04457 if (add->value == 0 || to->mmd.max < add->mmd.min) {
04458 clear_opt_map_info(to);
04459 return ;
04460 }
04461
04462 alt_merge_mml(&to->mmd, &add->mmd);
04463
04464 val = 0;
04465 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) {
04466 if (add->map[i])
04467 to->map[i] = 1;
04468
04469 if (to->map[i])
04470 val += map_position_value(enc, i);
04471 }
04472 to->value = val;
04473
04474 alt_merge_opt_anc_info(&to->anc, &add->anc);
04475 }
04476
04477 static void
04478 set_bound_node_opt_info(NodeOptInfo* opt, MinMaxLen* mmd)
04479 {
04480 copy_mml(&(opt->exb.mmd), mmd);
04481 copy_mml(&(opt->expr.mmd), mmd);
04482 copy_mml(&(opt->map.mmd), mmd);
04483 }
04484
04485 static void
04486 clear_node_opt_info(NodeOptInfo* opt)
04487 {
04488 clear_mml(&opt->len);
04489 clear_opt_anc_info(&opt->anc);
04490 clear_opt_exact_info(&opt->exb);
04491 clear_opt_exact_info(&opt->exm);
04492 clear_opt_exact_info(&opt->expr);
04493 clear_opt_map_info(&opt->map);
04494 }
04495
04496 static void
04497 copy_node_opt_info(NodeOptInfo* to, NodeOptInfo* from)
04498 {
04499 *to = *from;
04500 }
04501
04502 static void
04503 concat_left_node_opt_info(OnigEncoding enc, NodeOptInfo* to, NodeOptInfo* add)
04504 {
04505 int exb_reach, exm_reach;
04506 OptAncInfo tanc;
04507
04508 concat_opt_anc_info(&tanc, &to->anc, &add->anc, to->len.max, add->len.max);
04509 copy_opt_anc_info(&to->anc, &tanc);
04510
04511 if (add->exb.len > 0 && to->len.max == 0) {
04512 concat_opt_anc_info(&tanc, &to->anc, &add->exb.anc,
04513 to->len.max, add->len.max);
04514 copy_opt_anc_info(&add->exb.anc, &tanc);
04515 }
04516
04517 if (add->map.value > 0 && to->len.max == 0) {
04518 if (add->map.mmd.max == 0)
04519 add->map.anc.left_anchor |= to->anc.left_anchor;
04520 }
04521
04522 exb_reach = to->exb.reach_end;
04523 exm_reach = to->exm.reach_end;
04524
04525 if (add->len.max != 0)
04526 to->exb.reach_end = to->exm.reach_end = 0;
04527
04528 if (add->exb.len > 0) {
04529 if (exb_reach) {
04530 concat_opt_exact_info(&to->exb, &add->exb, enc);
04531 clear_opt_exact_info(&add->exb);
04532 }
04533 else if (exm_reach) {
04534 concat_opt_exact_info(&to->exm, &add->exb, enc);
04535 clear_opt_exact_info(&add->exb);
04536 }
04537 }
04538 select_opt_exact_info(enc, &to->exm, &add->exb);
04539 select_opt_exact_info(enc, &to->exm, &add->exm);
04540
04541 if (to->expr.len > 0) {
04542 if (add->len.max > 0) {
04543 if (to->expr.len > (int )add->len.max)
04544 to->expr.len = (int)add->len.max;
04545
04546 if (to->expr.mmd.max == 0)
04547 select_opt_exact_info(enc, &to->exb, &to->expr);
04548 else
04549 select_opt_exact_info(enc, &to->exm, &to->expr);
04550 }
04551 }
04552 else if (add->expr.len > 0) {
04553 copy_opt_exact_info(&to->expr, &add->expr);
04554 }
04555
04556 select_opt_map_info(&to->map, &add->map);
04557
04558 add_mml(&to->len, &add->len);
04559 }
04560
04561 static void
04562 alt_merge_node_opt_info(NodeOptInfo* to, NodeOptInfo* add, OptEnv* env)
04563 {
04564 alt_merge_opt_anc_info (&to->anc, &add->anc);
04565 alt_merge_opt_exact_info(&to->exb, &add->exb, env);
04566 alt_merge_opt_exact_info(&to->exm, &add->exm, env);
04567 alt_merge_opt_exact_info(&to->expr, &add->expr, env);
04568 alt_merge_opt_map_info(env->enc, &to->map, &add->map);
04569
04570 alt_merge_mml(&to->len, &add->len);
04571 }
04572
04573
04574 #define MAX_NODE_OPT_INFO_REF_COUNT 5
04575
04576 static int
04577 optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
04578 {
04579 int type;
04580 int r = 0;
04581
04582 clear_node_opt_info(opt);
04583 set_bound_node_opt_info(opt, &env->mmd);
04584
04585 type = NTYPE(node);
04586 switch (type) {
04587 case NT_LIST:
04588 {
04589 OptEnv nenv;
04590 NodeOptInfo nopt;
04591 Node* nd = node;
04592
04593 copy_opt_env(&nenv, env);
04594 do {
04595 r = optimize_node_left(NCAR(nd), &nopt, &nenv);
04596 if (r == 0) {
04597 add_mml(&nenv.mmd, &nopt.len);
04598 concat_left_node_opt_info(env->enc, opt, &nopt);
04599 }
04600 } while (r == 0 && IS_NOT_NULL(nd = NCDR(nd)));
04601 }
04602 break;
04603
04604 case NT_ALT:
04605 {
04606 NodeOptInfo nopt;
04607 Node* nd = node;
04608
04609 do {
04610 r = optimize_node_left(NCAR(nd), &nopt, env);
04611 if (r == 0) {
04612 if (nd == node) copy_node_opt_info(opt, &nopt);
04613 else alt_merge_node_opt_info(opt, &nopt, env);
04614 }
04615 } while ((r == 0) && IS_NOT_NULL(nd = NCDR(nd)));
04616 }
04617 break;
04618
04619 case NT_STR:
04620 {
04621 StrNode* sn = NSTR(node);
04622 OnigDistance slen = sn->end - sn->s;
04623 int is_raw = NSTRING_IS_RAW(node);
04624
04625 if (! NSTRING_IS_AMBIG(node)) {
04626 concat_opt_exact_info_str(&opt->exb, sn->s, sn->end,
04627 NSTRING_IS_RAW(node), env->enc);
04628 if (slen > 0) {
04629 add_char_opt_map_info(&opt->map, *(sn->s), env->enc);
04630 }
04631 set_mml(&opt->len, slen, slen);
04632 }
04633 else {
04634 OnigDistance max;
04635
04636 if (NSTRING_IS_DONT_GET_OPT_INFO(node)) {
04637 int n = onigenc_strlen(env->enc, sn->s, sn->end);
04638 max = ONIGENC_MBC_MAXLEN_DIST(env->enc) * n;
04639 }
04640 else {
04641 concat_opt_exact_info_str(&opt->exb, sn->s, sn->end,
04642 is_raw, env->enc);
04643 opt->exb.ignore_case = 1;
04644
04645 if (slen > 0) {
04646 r = add_char_amb_opt_map_info(&opt->map, sn->s, sn->end,
04647 env->enc, env->case_fold_flag);
04648 if (r != 0) break;
04649 }
04650
04651 max = slen;
04652 }
04653
04654 set_mml(&opt->len, slen, max);
04655 }
04656
04657 if ((OnigDistance)opt->exb.len == slen)
04658 opt->exb.reach_end = 1;
04659 }
04660 break;
04661
04662 case NT_CCLASS:
04663 {
04664 int i, z;
04665 CClassNode* cc = NCCLASS(node);
04666
04667
04668
04669 if (IS_NOT_NULL(cc->mbuf) || IS_NCCLASS_NOT(cc)) {
04670 OnigDistance min = ONIGENC_MBC_MINLEN(env->enc);
04671 OnigDistance max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
04672
04673 set_mml(&opt->len, min, max);
04674 }
04675 else {
04676 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
04677 z = BITSET_AT(cc->bs, i);
04678 if ((z && !IS_NCCLASS_NOT(cc)) || (!z && IS_NCCLASS_NOT(cc))) {
04679 add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
04680 }
04681 }
04682 set_mml(&opt->len, 1, 1);
04683 }
04684 }
04685 break;
04686
04687 case NT_CTYPE:
04688 {
04689 int i, min, max;
04690
04691 max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
04692
04693 if (max == 1) {
04694 min = 1;
04695
04696 switch (NCTYPE(node)->ctype) {
04697 case ONIGENC_CTYPE_WORD:
04698 if (NCTYPE(node)->not != 0) {
04699 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
04700 if (! ONIGENC_IS_CODE_WORD(env->enc, i)) {
04701 add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
04702 }
04703 }
04704 }
04705 else {
04706 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
04707 if (ONIGENC_IS_CODE_WORD(env->enc, i)) {
04708 add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
04709 }
04710 }
04711 }
04712 break;
04713 }
04714 }
04715 else {
04716 min = ONIGENC_MBC_MINLEN(env->enc);
04717 }
04718 set_mml(&opt->len, min, max);
04719 }
04720 break;
04721
04722 case NT_CANY:
04723 {
04724 OnigDistance min = ONIGENC_MBC_MINLEN(env->enc);
04725 OnigDistance max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
04726 set_mml(&opt->len, min, max);
04727 }
04728 break;
04729
04730 case NT_ANCHOR:
04731 switch (NANCHOR(node)->type) {
04732 case ANCHOR_BEGIN_BUF:
04733 case ANCHOR_BEGIN_POSITION:
04734 case ANCHOR_BEGIN_LINE:
04735 case ANCHOR_END_BUF:
04736 case ANCHOR_SEMI_END_BUF:
04737 case ANCHOR_END_LINE:
04738 case ANCHOR_LOOK_BEHIND:
04739 add_opt_anc_info(&opt->anc, NANCHOR(node)->type);
04740 break;
04741
04742 case ANCHOR_PREC_READ:
04743 {
04744 NodeOptInfo nopt;
04745
04746 r = optimize_node_left(NANCHOR(node)->target, &nopt, env);
04747 if (r == 0) {
04748 if (nopt.exb.len > 0)
04749 copy_opt_exact_info(&opt->expr, &nopt.exb);
04750 else if (nopt.exm.len > 0)
04751 copy_opt_exact_info(&opt->expr, &nopt.exm);
04752
04753 opt->expr.reach_end = 0;
04754
04755 if (nopt.map.value > 0)
04756 copy_opt_map_info(&opt->map, &nopt.map);
04757 }
04758 }
04759 break;
04760
04761 case ANCHOR_PREC_READ_NOT:
04762 case ANCHOR_LOOK_BEHIND_NOT:
04763 break;
04764 }
04765 break;
04766
04767 case NT_BREF:
04768 {
04769 int i;
04770 int* backs;
04771 OnigDistance min, max, tmin, tmax;
04772 Node** nodes = SCANENV_MEM_NODES(env->scan_env);
04773 BRefNode* br = NBREF(node);
04774
04775 if (br->state & NST_RECURSION) {
04776 set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
04777 break;
04778 }
04779 backs = BACKREFS_P(br);
04780 r = get_min_match_length(nodes[backs[0]], &min, env->scan_env);
04781 if (r != 0) break;
04782 r = get_max_match_length(nodes[backs[0]], &max, env->scan_env);
04783 if (r != 0) break;
04784 for (i = 1; i < br->back_num; i++) {
04785 r = get_min_match_length(nodes[backs[i]], &tmin, env->scan_env);
04786 if (r != 0) break;
04787 r = get_max_match_length(nodes[backs[i]], &tmax, env->scan_env);
04788 if (r != 0) break;
04789 if (min > tmin) min = tmin;
04790 if (max < tmax) max = tmax;
04791 }
04792 if (r == 0) set_mml(&opt->len, min, max);
04793 }
04794 break;
04795
04796 #ifdef USE_SUBEXP_CALL
04797 case NT_CALL:
04798 if (IS_CALL_RECURSION(NCALL(node)))
04799 set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
04800 else {
04801 OnigOptionType save = env->options;
04802 env->options = NENCLOSE(NCALL(node)->target)->option;
04803 r = optimize_node_left(NCALL(node)->target, opt, env);
04804 env->options = save;
04805 }
04806 break;
04807 #endif
04808
04809 case NT_QTFR:
04810 {
04811 int i;
04812 OnigDistance min, max;
04813 NodeOptInfo nopt;
04814 QtfrNode* qn = NQTFR(node);
04815
04816 r = optimize_node_left(qn->target, &nopt, env);
04817 if (r) break;
04818
04819 if (qn->lower == 0 && IS_REPEAT_INFINITE(qn->upper)) {
04820 if (env->mmd.max == 0 &&
04821 NTYPE(qn->target) == NT_CANY && qn->greedy) {
04822 if (IS_MULTILINE(env->options))
04823 add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_ML);
04824 else
04825 add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR);
04826 }
04827 }
04828 else {
04829 if (qn->lower > 0) {
04830 copy_node_opt_info(opt, &nopt);
04831 if (nopt.exb.len > 0) {
04832 if (nopt.exb.reach_end) {
04833 for (i = 2; i <= qn->lower &&
04834 ! is_full_opt_exact_info(&opt->exb); i++) {
04835 concat_opt_exact_info(&opt->exb, &nopt.exb, env->enc);
04836 }
04837 if (i < qn->lower) {
04838 opt->exb.reach_end = 0;
04839 }
04840 }
04841 }
04842
04843 if (qn->lower != qn->upper) {
04844 opt->exb.reach_end = 0;
04845 opt->exm.reach_end = 0;
04846 }
04847 if (qn->lower > 1)
04848 opt->exm.reach_end = 0;
04849 }
04850 }
04851
04852 min = distance_multiply(nopt.len.min, qn->lower);
04853 if (IS_REPEAT_INFINITE(qn->upper))
04854 max = (nopt.len.max > 0 ? ONIG_INFINITE_DISTANCE : 0);
04855 else
04856 max = distance_multiply(nopt.len.max, qn->upper);
04857
04858 set_mml(&opt->len, min, max);
04859 }
04860 break;
04861
04862 case NT_ENCLOSE:
04863 {
04864 EncloseNode* en = NENCLOSE(node);
04865
04866 switch (en->type) {
04867 case ENCLOSE_OPTION:
04868 {
04869 OnigOptionType save = env->options;
04870
04871 env->options = en->option;
04872 r = optimize_node_left(en->target, opt, env);
04873 env->options = save;
04874 }
04875 break;
04876
04877 case ENCLOSE_MEMORY:
04878 #ifdef USE_SUBEXP_CALL
04879 en->opt_count++;
04880 if (en->opt_count > MAX_NODE_OPT_INFO_REF_COUNT) {
04881 OnigDistance min, max;
04882
04883 min = 0;
04884 max = ONIG_INFINITE_DISTANCE;
04885 if (IS_ENCLOSE_MIN_FIXED(en)) min = en->min_len;
04886 if (IS_ENCLOSE_MAX_FIXED(en)) max = en->max_len;
04887 set_mml(&opt->len, min, max);
04888 }
04889 else
04890 #endif
04891 {
04892 r = optimize_node_left(en->target, opt, env);
04893
04894 if (is_set_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK)) {
04895 if (BIT_STATUS_AT(env->scan_env->backrefed_mem, en->regnum))
04896 remove_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK);
04897 }
04898 }
04899 break;
04900
04901 case ENCLOSE_STOP_BACKTRACK:
04902 r = optimize_node_left(en->target, opt, env);
04903 break;
04904 }
04905 }
04906 break;
04907
04908 default:
04909 #ifdef ONIG_DEBUG
04910 if (!onig_is_prelude()) fprintf(stderr, "optimize_node_left: undefined node type %d\n",
04911 NTYPE(node));
04912 #endif
04913 r = ONIGERR_TYPE_BUG;
04914 break;
04915 }
04916
04917 return r;
04918 }
04919
04920 static int
04921 set_optimize_exact_info(regex_t* reg, OptExactInfo* e)
04922 {
04923 int r;
04924
04925 if (e->len == 0) return 0;
04926
04927 if (e->ignore_case) {
04928 reg->exact = (UChar* )xmalloc(e->len);
04929 CHECK_NULL_RETURN_MEMERR(reg->exact);
04930 xmemcpy(reg->exact, e->s, e->len);
04931 reg->exact_end = reg->exact + e->len;
04932 reg->optimize = ONIG_OPTIMIZE_EXACT_IC;
04933 }
04934 else {
04935 int allow_reverse;
04936
04937 reg->exact = str_dup(e->s, e->s + e->len);
04938 CHECK_NULL_RETURN_MEMERR(reg->exact);
04939 reg->exact_end = reg->exact + e->len;
04940
04941 allow_reverse =
04942 ONIGENC_IS_ALLOWED_REVERSE_MATCH(reg->enc, reg->exact, reg->exact_end);
04943
04944 if (e->len >= 3 || (e->len >= 2 && allow_reverse)) {
04945 r = set_bm_skip(reg->exact, reg->exact_end, reg->enc,
04946 reg->map, &(reg->int_map));
04947 if (r) return r;
04948
04949 reg->optimize = (allow_reverse != 0
04950 ? ONIG_OPTIMIZE_EXACT_BM : ONIG_OPTIMIZE_EXACT_BM_NOT_REV);
04951 }
04952 else {
04953 reg->optimize = ONIG_OPTIMIZE_EXACT;
04954 }
04955 }
04956
04957 reg->dmin = e->mmd.min;
04958 reg->dmax = e->mmd.max;
04959
04960 if (reg->dmin != ONIG_INFINITE_DISTANCE) {
04961 reg->threshold_len = (int)(reg->dmin + (reg->exact_end - reg->exact));
04962 }
04963
04964 return 0;
04965 }
04966
04967 static void
04968 set_optimize_map_info(regex_t* reg, OptMapInfo* m)
04969 {
04970 int i;
04971
04972 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
04973 reg->map[i] = m->map[i];
04974
04975 reg->optimize = ONIG_OPTIMIZE_MAP;
04976 reg->dmin = m->mmd.min;
04977 reg->dmax = m->mmd.max;
04978
04979 if (reg->dmin != ONIG_INFINITE_DISTANCE) {
04980 reg->threshold_len = (int)(reg->dmin + 1);
04981 }
04982 }
04983
04984 static void
04985 set_sub_anchor(regex_t* reg, OptAncInfo* anc)
04986 {
04987 reg->sub_anchor |= anc->left_anchor & ANCHOR_BEGIN_LINE;
04988 reg->sub_anchor |= anc->right_anchor & ANCHOR_END_LINE;
04989 }
04990
04991 #ifdef ONIG_DEBUG
04992 static void print_optimize_info(FILE* f, regex_t* reg);
04993 #endif
04994
04995 static int
04996 set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env)
04997 {
04998
04999 int r;
05000 NodeOptInfo opt;
05001 OptEnv env;
05002
05003 env.enc = reg->enc;
05004 env.options = reg->options;
05005 env.case_fold_flag = reg->case_fold_flag;
05006 env.scan_env = scan_env;
05007 clear_mml(&env.mmd);
05008
05009 r = optimize_node_left(node, &opt, &env);
05010 if (r) return r;
05011
05012 reg->anchor = opt.anc.left_anchor & (ANCHOR_BEGIN_BUF |
05013 ANCHOR_BEGIN_POSITION | ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_ML |
05014 ANCHOR_LOOK_BEHIND);
05015
05016 reg->anchor |= opt.anc.right_anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF);
05017
05018 if (reg->anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF)) {
05019 reg->anchor_dmin = opt.len.min;
05020 reg->anchor_dmax = opt.len.max;
05021 }
05022
05023 if (opt.exb.len > 0 || opt.exm.len > 0) {
05024 select_opt_exact_info(reg->enc, &opt.exb, &opt.exm);
05025 if (opt.map.value > 0 &&
05026 comp_opt_exact_or_map_info(&opt.exb, &opt.map) > 0) {
05027 goto set_map;
05028 }
05029 else {
05030 r = set_optimize_exact_info(reg, &opt.exb);
05031 set_sub_anchor(reg, &opt.exb.anc);
05032 }
05033 }
05034 else if (opt.map.value > 0) {
05035 set_map:
05036 set_optimize_map_info(reg, &opt.map);
05037 set_sub_anchor(reg, &opt.map.anc);
05038 }
05039 else {
05040 reg->sub_anchor |= opt.anc.left_anchor & ANCHOR_BEGIN_LINE;
05041 if (opt.len.max == 0)
05042 reg->sub_anchor |= opt.anc.right_anchor & ANCHOR_END_LINE;
05043 }
05044
05045 #if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
05046 if (!onig_is_prelude()) print_optimize_info(stderr, reg);
05047 #endif
05048 return r;
05049 }
05050
05051 static void
05052 clear_optimize_info(regex_t* reg)
05053 {
05054 reg->optimize = ONIG_OPTIMIZE_NONE;
05055 reg->anchor = 0;
05056 reg->anchor_dmin = 0;
05057 reg->anchor_dmax = 0;
05058 reg->sub_anchor = 0;
05059 reg->exact_end = (UChar* )NULL;
05060 reg->threshold_len = 0;
05061 if (IS_NOT_NULL(reg->exact)) {
05062 xfree(reg->exact);
05063 reg->exact = (UChar* )NULL;
05064 }
05065 }
05066
05067 #ifdef ONIG_DEBUG
05068
05069 static void print_enc_string(FILE* fp, OnigEncoding enc,
05070 const UChar *s, const UChar *end)
05071 {
05072 fprintf(fp, "\nPATTERN: /");
05073
05074 if (ONIGENC_MBC_MINLEN(enc) > 1) {
05075 const UChar *p;
05076 OnigCodePoint code;
05077
05078 p = s;
05079 while (p < end) {
05080 code = ONIGENC_MBC_TO_CODE(enc, p, end);
05081 if (code >= 0x80) {
05082 fprintf(fp, " 0x%04x ", (int )code);
05083 }
05084 else {
05085 fputc((int )code, fp);
05086 }
05087
05088 p += enclen(enc, p, end);
05089 }
05090 }
05091 else {
05092 while (s < end) {
05093 fputc((int )*s, fp);
05094 s++;
05095 }
05096 }
05097
05098 fprintf(fp, "/ (%s)\n", enc->name);
05099 }
05100
05101 static void
05102 print_distance_range(FILE* f, OnigDistance a, OnigDistance b)
05103 {
05104 if (a == ONIG_INFINITE_DISTANCE)
05105 fputs("inf", f);
05106 else
05107 fprintf(f, "(%"PRIuSIZE")", a);
05108
05109 fputs("-", f);
05110
05111 if (b == ONIG_INFINITE_DISTANCE)
05112 fputs("inf", f);
05113 else
05114 fprintf(f, "(%"PRIuSIZE")", b);
05115 }
05116
05117 static void
05118 print_anchor(FILE* f, int anchor)
05119 {
05120 int q = 0;
05121
05122 fprintf(f, "[");
05123
05124 if (anchor & ANCHOR_BEGIN_BUF) {
05125 fprintf(f, "begin-buf");
05126 q = 1;
05127 }
05128 if (anchor & ANCHOR_BEGIN_LINE) {
05129 if (q) fprintf(f, ", ");
05130 q = 1;
05131 fprintf(f, "begin-line");
05132 }
05133 if (anchor & ANCHOR_BEGIN_POSITION) {
05134 if (q) fprintf(f, ", ");
05135 q = 1;
05136 fprintf(f, "begin-pos");
05137 }
05138 if (anchor & ANCHOR_END_BUF) {
05139 if (q) fprintf(f, ", ");
05140 q = 1;
05141 fprintf(f, "end-buf");
05142 }
05143 if (anchor & ANCHOR_SEMI_END_BUF) {
05144 if (q) fprintf(f, ", ");
05145 q = 1;
05146 fprintf(f, "semi-end-buf");
05147 }
05148 if (anchor & ANCHOR_END_LINE) {
05149 if (q) fprintf(f, ", ");
05150 q = 1;
05151 fprintf(f, "end-line");
05152 }
05153 if (anchor & ANCHOR_ANYCHAR_STAR) {
05154 if (q) fprintf(f, ", ");
05155 q = 1;
05156 fprintf(f, "anychar-star");
05157 }
05158 if (anchor & ANCHOR_ANYCHAR_STAR_ML) {
05159 if (q) fprintf(f, ", ");
05160 fprintf(f, "anychar-star-pl");
05161 }
05162
05163 fprintf(f, "]");
05164 }
05165
05166 static void
05167 print_optimize_info(FILE* f, regex_t* reg)
05168 {
05169 static const char* on[] = { "NONE", "EXACT", "EXACT_BM", "EXACT_BM_NOT_REV",
05170 "EXACT_IC", "MAP" };
05171
05172 fprintf(f, "optimize: %s\n", on[reg->optimize]);
05173 fprintf(f, " anchor: "); print_anchor(f, reg->anchor);
05174 if ((reg->anchor & ANCHOR_END_BUF_MASK) != 0)
05175 print_distance_range(f, reg->anchor_dmin, reg->anchor_dmax);
05176 fprintf(f, "\n");
05177
05178 if (reg->optimize) {
05179 fprintf(f, " sub anchor: "); print_anchor(f, reg->sub_anchor);
05180 fprintf(f, "\n");
05181 }
05182 fprintf(f, "\n");
05183
05184 if (reg->exact) {
05185 UChar *p;
05186 fprintf(f, "exact: [");
05187 for (p = reg->exact; p < reg->exact_end; p++) {
05188 fputc(*p, f);
05189 }
05190 fprintf(f, "]: length: %ld\n", (reg->exact_end - reg->exact));
05191 }
05192 else if (reg->optimize & ONIG_OPTIMIZE_MAP) {
05193 int c, i, n = 0;
05194
05195 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
05196 if (reg->map[i]) n++;
05197
05198 fprintf(f, "map: n=%d\n", n);
05199 if (n > 0) {
05200 c = 0;
05201 fputc('[', f);
05202 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) {
05203 if (reg->map[i] != 0) {
05204 if (c > 0) fputs(", ", f);
05205 c++;
05206 if (ONIGENC_MBC_MAXLEN(reg->enc) == 1 &&
05207 ONIGENC_IS_CODE_PRINT(reg->enc, (OnigCodePoint )i))
05208 fputc(i, f);
05209 else
05210 fprintf(f, "%d", i);
05211 }
05212 }
05213 fprintf(f, "]\n");
05214 }
05215 }
05216 }
05217 #endif
05218
05219
05220 extern void
05221 onig_free_body(regex_t* reg)
05222 {
05223 if (IS_NOT_NULL(reg)) {
05224 if (IS_NOT_NULL(reg->p)) xfree(reg->p);
05225 if (IS_NOT_NULL(reg->exact)) xfree(reg->exact);
05226 if (IS_NOT_NULL(reg->int_map)) xfree(reg->int_map);
05227 if (IS_NOT_NULL(reg->int_map_backward)) xfree(reg->int_map_backward);
05228 if (IS_NOT_NULL(reg->repeat_range)) xfree(reg->repeat_range);
05229 if (IS_NOT_NULL(reg->chain)) onig_free(reg->chain);
05230
05231 #ifdef USE_NAMED_GROUP
05232 onig_names_free(reg);
05233 #endif
05234 }
05235 }
05236
05237 extern void
05238 onig_free(regex_t* reg)
05239 {
05240 if (IS_NOT_NULL(reg)) {
05241 onig_free_body(reg);
05242 xfree(reg);
05243 }
05244 }
05245
05246 size_t
05247 onig_memsize(const regex_t *reg)
05248 {
05249 size_t size = sizeof(regex_t);
05250 if (IS_NOT_NULL(reg->p)) size += reg->alloc;
05251 if (IS_NOT_NULL(reg->exact)) size += reg->exact_end - reg->exact;
05252 if (IS_NOT_NULL(reg->int_map)) size += sizeof(int) * ONIG_CHAR_TABLE_SIZE;
05253 if (IS_NOT_NULL(reg->int_map_backward)) size += sizeof(int) * ONIG_CHAR_TABLE_SIZE;
05254 if (IS_NOT_NULL(reg->repeat_range)) size += reg->repeat_range_alloc * sizeof(OnigRepeatRange);
05255 if (IS_NOT_NULL(reg->chain)) size += onig_memsize(reg->chain);
05256
05257 return size;
05258 }
05259
05260 #define REGEX_TRANSFER(to,from) do {\
05261 (to)->state = ONIG_STATE_MODIFY;\
05262 onig_free_body(to);\
05263 xmemcpy(to, from, sizeof(regex_t));\
05264 xfree(from);\
05265 } while (0)
05266
05267 extern void
05268 onig_transfer(regex_t* to, regex_t* from)
05269 {
05270 THREAD_ATOMIC_START;
05271 REGEX_TRANSFER(to, from);
05272 THREAD_ATOMIC_END;
05273 }
05274
05275 #define REGEX_CHAIN_HEAD(reg) do {\
05276 while (IS_NOT_NULL((reg)->chain)) {\
05277 (reg) = (reg)->chain;\
05278 }\
05279 } while (0)
05280
05281 extern void
05282 onig_chain_link_add(regex_t* to, regex_t* add)
05283 {
05284 THREAD_ATOMIC_START;
05285 REGEX_CHAIN_HEAD(to);
05286 to->chain = add;
05287 THREAD_ATOMIC_END;
05288 }
05289
05290 extern void
05291 onig_chain_reduce(regex_t* reg)
05292 {
05293 regex_t *head, *prev;
05294
05295 prev = reg;
05296 head = prev->chain;
05297 if (IS_NOT_NULL(head)) {
05298 reg->state = ONIG_STATE_MODIFY;
05299 while (IS_NOT_NULL(head->chain)) {
05300 prev = head;
05301 head = head->chain;
05302 }
05303 prev->chain = (regex_t* )NULL;
05304 REGEX_TRANSFER(reg, head);
05305 }
05306 }
05307
05308 #ifdef ONIG_DEBUG
05309 static void print_compiled_byte_code_list P_((FILE* f, regex_t* reg));
05310 #endif
05311 #ifdef ONIG_DEBUG_PARSE_TREE
05312 static void print_tree P_((FILE* f, Node* node));
05313 #endif
05314
05315 extern int
05316 onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
05317 OnigErrorInfo* einfo, const char *sourcefile, int sourceline)
05318 {
05319 #define COMPILE_INIT_SIZE 20
05320
05321 int r;
05322 OnigDistance init_size;
05323 Node* root;
05324 ScanEnv scan_env = {0};
05325 #ifdef USE_SUBEXP_CALL
05326 UnsetAddrList uslist;
05327 #endif
05328
05329 if (IS_NOT_NULL(einfo)) einfo->par = (UChar* )NULL;
05330
05331 scan_env.sourcefile = sourcefile;
05332 scan_env.sourceline = sourceline;
05333 reg->state = ONIG_STATE_COMPILING;
05334
05335 #ifdef ONIG_DEBUG
05336 if (!onig_is_prelude()) print_enc_string(stderr, reg->enc, pattern, pattern_end);
05337 #endif
05338
05339 if (reg->alloc == 0) {
05340 init_size = (pattern_end - pattern) * 2;
05341 if (init_size <= 0) init_size = COMPILE_INIT_SIZE;
05342 r = BBUF_INIT(reg, init_size);
05343 if (r != 0) goto end;
05344 }
05345 else
05346 reg->used = 0;
05347
05348 reg->num_mem = 0;
05349 reg->num_repeat = 0;
05350 reg->num_null_check = 0;
05351 reg->repeat_range_alloc = 0;
05352 reg->repeat_range = (OnigRepeatRange* )NULL;
05353 #ifdef USE_COMBINATION_EXPLOSION_CHECK
05354 reg->num_comb_exp_check = 0;
05355 #endif
05356
05357 r = onig_parse_make_tree(&root, pattern, pattern_end, reg, &scan_env);
05358 if (r != 0) goto err;
05359
05360 #ifdef ONIG_DEBUG_PARSE_TREE
05361 # if 0
05362 fprintf(stderr, "ORIGINAL PARSE TREE:\n");
05363 if (!onig_is_prelude()) {
05364 print_tree(stderr, root);
05365 }
05366 # endif
05367 #endif
05368
05369 #ifdef USE_NAMED_GROUP
05370
05371 if (scan_env.num_named > 0 &&
05372 IS_SYNTAX_BV(scan_env.syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
05373 !ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_CAPTURE_GROUP)) {
05374 if (scan_env.num_named != scan_env.num_mem)
05375 r = disable_noname_group_capture(&root, reg, &scan_env);
05376 else
05377 r = numbered_ref_check(root);
05378
05379 if (r != 0) goto err;
05380 }
05381 #endif
05382
05383 #ifdef USE_SUBEXP_CALL
05384 if (scan_env.num_call > 0) {
05385 r = unset_addr_list_init(&uslist, scan_env.num_call);
05386 if (r != 0) goto err;
05387 scan_env.unset_addr_list = &uslist;
05388 r = setup_subexp_call(root, &scan_env);
05389 if (r != 0) goto err_unset;
05390 r = subexp_recursive_check_trav(root, &scan_env);
05391 if (r < 0) goto err_unset;
05392 r = subexp_inf_recursive_check_trav(root, &scan_env);
05393 if (r != 0) goto err_unset;
05394
05395 reg->num_call = scan_env.num_call;
05396 }
05397 else
05398 reg->num_call = 0;
05399 #endif
05400
05401 r = setup_tree(root, reg, 0, &scan_env);
05402 if (r != 0) goto err_unset;
05403
05404 #ifdef ONIG_DEBUG_PARSE_TREE
05405 if (!onig_is_prelude()) print_tree(stderr, root);
05406 #endif
05407
05408 reg->capture_history = scan_env.capture_history;
05409 reg->bt_mem_start = scan_env.bt_mem_start;
05410 reg->bt_mem_start |= reg->capture_history;
05411 if (IS_FIND_CONDITION(reg->options))
05412 BIT_STATUS_ON_ALL(reg->bt_mem_end);
05413 else {
05414 reg->bt_mem_end = scan_env.bt_mem_end;
05415 reg->bt_mem_end |= reg->capture_history;
05416 }
05417
05418 #ifdef USE_COMBINATION_EXPLOSION_CHECK
05419 if (scan_env.backrefed_mem == 0
05420 #ifdef USE_SUBEXP_CALL
05421 || scan_env.num_call == 0
05422 #endif
05423 ) {
05424 setup_comb_exp_check(root, 0, &scan_env);
05425 #ifdef USE_SUBEXP_CALL
05426 if (scan_env.has_recursion != 0) {
05427 scan_env.num_comb_exp_check = 0;
05428 }
05429 else
05430 #endif
05431 if (scan_env.comb_exp_max_regnum > 0) {
05432 int i;
05433 for (i = 1; i <= scan_env.comb_exp_max_regnum; i++) {
05434 if (BIT_STATUS_AT(scan_env.backrefed_mem, i) != 0) {
05435 scan_env.num_comb_exp_check = 0;
05436 break;
05437 }
05438 }
05439 }
05440 }
05441
05442 reg->num_comb_exp_check = scan_env.num_comb_exp_check;
05443 #endif
05444
05445 clear_optimize_info(reg);
05446 #ifndef ONIG_DONT_OPTIMIZE
05447 r = set_optimize_info_from_tree(root, reg, &scan_env);
05448 if (r != 0) goto err_unset;
05449 #endif
05450
05451 if (IS_NOT_NULL(scan_env.mem_nodes_dynamic)) {
05452 xfree(scan_env.mem_nodes_dynamic);
05453 scan_env.mem_nodes_dynamic = (Node** )NULL;
05454 }
05455
05456 r = compile_tree(root, reg);
05457 if (r == 0) {
05458 r = add_opcode(reg, OP_END);
05459 #ifdef USE_SUBEXP_CALL
05460 if (scan_env.num_call > 0) {
05461 r = unset_addr_list_fix(&uslist, reg);
05462 unset_addr_list_end(&uslist);
05463 if (r) goto err;
05464 }
05465 #endif
05466
05467 if ((reg->num_repeat != 0) || (reg->bt_mem_end != 0))
05468 reg->stack_pop_level = STACK_POP_LEVEL_ALL;
05469 else {
05470 if (reg->bt_mem_start != 0)
05471 reg->stack_pop_level = STACK_POP_LEVEL_MEM_START;
05472 else
05473 reg->stack_pop_level = STACK_POP_LEVEL_FREE;
05474 }
05475 }
05476 #ifdef USE_SUBEXP_CALL
05477 else if (scan_env.num_call > 0) {
05478 unset_addr_list_end(&uslist);
05479 }
05480 #endif
05481 onig_node_free(root);
05482
05483 #ifdef ONIG_DEBUG_COMPILE
05484 #ifdef USE_NAMED_GROUP
05485 if (!onig_is_prelude()) onig_print_names(stderr, reg);
05486 #endif
05487 if (!onig_is_prelude()) print_compiled_byte_code_list(stderr, reg);
05488 #endif
05489
05490 end:
05491 reg->state = ONIG_STATE_NORMAL;
05492 return r;
05493
05494 err_unset:
05495 #ifdef USE_SUBEXP_CALL
05496 if (scan_env.num_call > 0) {
05497 unset_addr_list_end(&uslist);
05498 }
05499 #endif
05500 err:
05501 if (IS_NOT_NULL(scan_env.error)) {
05502 if (IS_NOT_NULL(einfo)) {
05503 einfo->enc = scan_env.enc;
05504 einfo->par = scan_env.error;
05505 einfo->par_end = scan_env.error_end;
05506 }
05507 }
05508
05509 onig_node_free(root);
05510 if (IS_NOT_NULL(scan_env.mem_nodes_dynamic))
05511 xfree(scan_env.mem_nodes_dynamic);
05512 return r;
05513 }
05514
05515 #ifdef USE_RECOMPILE_API
05516 extern int
05517 onig_recompile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
05518 OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax,
05519 OnigErrorInfo* einfo)
05520 {
05521 int r;
05522 regex_t *new_reg;
05523
05524 r = onig_new(&new_reg, pattern, pattern_end, option, enc, syntax, einfo);
05525 if (r) return r;
05526 if (ONIG_STATE(reg) == ONIG_STATE_NORMAL) {
05527 onig_transfer(reg, new_reg);
05528 }
05529 else {
05530 onig_chain_link_add(reg, new_reg);
05531 }
05532 return 0;
05533 }
05534 #endif
05535
05536 static int onig_inited = 0;
05537
05538 extern int
05539 onig_reg_init(regex_t* reg, OnigOptionType option,
05540 OnigCaseFoldType case_fold_flag,
05541 OnigEncoding enc, const OnigSyntaxType* syntax)
05542 {
05543 if (! onig_inited)
05544 onig_init();
05545
05546 if (IS_NULL(reg))
05547 return ONIGERR_INVALID_ARGUMENT;
05548
05549 if (ONIGENC_IS_UNDEF(enc))
05550 return ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED;
05551
05552 if ((option & (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP))
05553 == (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP)) {
05554 return ONIGERR_INVALID_COMBINATION_OF_OPTIONS;
05555 }
05556
05557 (reg)->state = ONIG_STATE_MODIFY;
05558
05559 if ((option & ONIG_OPTION_NEGATE_SINGLELINE) != 0) {
05560 option |= syntax->options;
05561 option &= ~ONIG_OPTION_SINGLELINE;
05562 }
05563 else
05564 option |= syntax->options;
05565
05566 (reg)->enc = enc;
05567 (reg)->options = option;
05568 (reg)->syntax = syntax;
05569 (reg)->optimize = 0;
05570 (reg)->exact = (UChar* )NULL;
05571 (reg)->int_map = (int* )NULL;
05572 (reg)->int_map_backward = (int* )NULL;
05573 (reg)->chain = (regex_t* )NULL;
05574
05575 (reg)->p = (UChar* )NULL;
05576 (reg)->alloc = 0;
05577 (reg)->used = 0;
05578 (reg)->name_table = (void* )NULL;
05579
05580 (reg)->case_fold_flag = case_fold_flag;
05581 return 0;
05582 }
05583
05584 extern int
05585 onig_new_without_alloc(regex_t* reg, const UChar* pattern,
05586 const UChar* pattern_end, OnigOptionType option, OnigEncoding enc,
05587 OnigSyntaxType* syntax, OnigErrorInfo* einfo)
05588 {
05589 int r;
05590
05591 r = onig_reg_init(reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);
05592 if (r) return r;
05593
05594 r = onig_compile(reg, pattern, pattern_end, einfo, NULL, 0);
05595 return r;
05596 }
05597
05598 extern int
05599 onig_new(regex_t** reg, const UChar* pattern, const UChar* pattern_end,
05600 OnigOptionType option, OnigEncoding enc, const OnigSyntaxType* syntax,
05601 OnigErrorInfo* einfo)
05602 {
05603 int r;
05604
05605 *reg = (regex_t* )xmalloc(sizeof(regex_t));
05606 if (IS_NULL(*reg)) return ONIGERR_MEMORY;
05607
05608 r = onig_reg_init(*reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);
05609 if (r) goto err;
05610
05611 r = onig_compile(*reg, pattern, pattern_end, einfo, NULL, 0);
05612 if (r) {
05613 err:
05614 onig_free(*reg);
05615 *reg = NULL;
05616 }
05617 return r;
05618 }
05619
05620
05621 extern int
05622 onig_init(void)
05623 {
05624 if (onig_inited != 0)
05625 return 0;
05626
05627 THREAD_SYSTEM_INIT;
05628 THREAD_ATOMIC_START;
05629
05630 onig_inited = 1;
05631
05632 onigenc_init();
05633
05634
05635 #ifdef ONIG_DEBUG_STATISTICS
05636 onig_statistics_init();
05637 #endif
05638
05639 THREAD_ATOMIC_END;
05640 return 0;
05641 }
05642
05643
05644 extern int
05645 onig_end(void)
05646 {
05647 THREAD_ATOMIC_START;
05648
05649 #ifdef ONIG_DEBUG_STATISTICS
05650 if (!onig_is_prelude()) onig_print_statistics(stderr);
05651 #endif
05652
05653 #ifdef USE_SHARED_CCLASS_TABLE
05654 onig_free_shared_cclass_table();
05655 #endif
05656
05657 #ifdef USE_PARSE_TREE_NODE_RECYCLE
05658 onig_free_node_list();
05659 #endif
05660
05661 onig_inited = 0;
05662
05663 THREAD_ATOMIC_END;
05664 THREAD_SYSTEM_END;
05665 return 0;
05666 }
05667
05668 extern int
05669 onig_is_in_code_range(const UChar* p, OnigCodePoint code)
05670 {
05671 OnigCodePoint n, *data;
05672 OnigCodePoint low, high, x;
05673
05674 GET_CODE_POINT(n, p);
05675 data = (OnigCodePoint* )p;
05676 data++;
05677
05678 for (low = 0, high = n; low < high; ) {
05679 x = (low + high) >> 1;
05680 if (code > data[x * 2 + 1])
05681 low = x + 1;
05682 else
05683 high = x;
05684 }
05685
05686 return ((low < n && code >= data[low * 2]) ? 1 : 0);
05687 }
05688
05689 extern int
05690 onig_is_code_in_cc_len(int elen, OnigCodePoint code, CClassNode* cc)
05691 {
05692 int found;
05693
05694 if (elen > 1 || (code >= SINGLE_BYTE_SIZE)) {
05695 if (IS_NULL(cc->mbuf)) {
05696 found = 0;
05697 }
05698 else {
05699 found = (onig_is_in_code_range(cc->mbuf->p, code) != 0 ? 1 : 0);
05700 }
05701 }
05702 else {
05703 found = (BITSET_AT(cc->bs, code) == 0 ? 0 : 1);
05704 }
05705
05706 if (IS_NCCLASS_NOT(cc))
05707 return !found;
05708 else
05709 return found;
05710 }
05711
05712 extern int
05713 onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code, CClassNode* cc)
05714 {
05715 int len;
05716
05717 if (ONIGENC_MBC_MINLEN(enc) > 1) {
05718 len = 2;
05719 }
05720 else {
05721 len = ONIGENC_CODE_TO_MBCLEN(enc, code);
05722 }
05723 return onig_is_code_in_cc_len(len, code, cc);
05724 }
05725
05726
05727 #ifdef ONIG_DEBUG
05728
05729
05730 #define ARG_SPECIAL -1
05731 #define ARG_NON 0
05732 #define ARG_RELADDR 1
05733 #define ARG_ABSADDR 2
05734 #define ARG_LENGTH 3
05735 #define ARG_MEMNUM 4
05736 #define ARG_OPTION 5
05737 #define ARG_STATE_CHECK 6
05738
05739 OnigOpInfoType OnigOpInfo[] = {
05740 { OP_FINISH, "finish", ARG_NON },
05741 { OP_END, "end", ARG_NON },
05742 { OP_EXACT1, "exact1", ARG_SPECIAL },
05743 { OP_EXACT2, "exact2", ARG_SPECIAL },
05744 { OP_EXACT3, "exact3", ARG_SPECIAL },
05745 { OP_EXACT4, "exact4", ARG_SPECIAL },
05746 { OP_EXACT5, "exact5", ARG_SPECIAL },
05747 { OP_EXACTN, "exactn", ARG_SPECIAL },
05748 { OP_EXACTMB2N1, "exactmb2-n1", ARG_SPECIAL },
05749 { OP_EXACTMB2N2, "exactmb2-n2", ARG_SPECIAL },
05750 { OP_EXACTMB2N3, "exactmb2-n3", ARG_SPECIAL },
05751 { OP_EXACTMB2N, "exactmb2-n", ARG_SPECIAL },
05752 { OP_EXACTMB3N, "exactmb3n" , ARG_SPECIAL },
05753 { OP_EXACTMBN, "exactmbn", ARG_SPECIAL },
05754 { OP_EXACT1_IC, "exact1-ic", ARG_SPECIAL },
05755 { OP_EXACTN_IC, "exactn-ic", ARG_SPECIAL },
05756 { OP_CCLASS, "cclass", ARG_SPECIAL },
05757 { OP_CCLASS_MB, "cclass-mb", ARG_SPECIAL },
05758 { OP_CCLASS_MIX, "cclass-mix", ARG_SPECIAL },
05759 { OP_CCLASS_NOT, "cclass-not", ARG_SPECIAL },
05760 { OP_CCLASS_MB_NOT, "cclass-mb-not", ARG_SPECIAL },
05761 { OP_CCLASS_MIX_NOT, "cclass-mix-not", ARG_SPECIAL },
05762 { OP_CCLASS_NODE, "cclass-node", ARG_SPECIAL },
05763 { OP_ANYCHAR, "anychar", ARG_NON },
05764 { OP_ANYCHAR_ML, "anychar-ml", ARG_NON },
05765 { OP_ANYCHAR_STAR, "anychar*", ARG_NON },
05766 { OP_ANYCHAR_ML_STAR, "anychar-ml*", ARG_NON },
05767 { OP_ANYCHAR_STAR_PEEK_NEXT, "anychar*-peek-next", ARG_SPECIAL },
05768 { OP_ANYCHAR_ML_STAR_PEEK_NEXT, "anychar-ml*-peek-next", ARG_SPECIAL },
05769 { OP_WORD, "word", ARG_NON },
05770 { OP_NOT_WORD, "not-word", ARG_NON },
05771 { OP_WORD_BOUND, "word-bound", ARG_NON },
05772 { OP_NOT_WORD_BOUND, "not-word-bound", ARG_NON },
05773 { OP_WORD_BEGIN, "word-begin", ARG_NON },
05774 { OP_WORD_END, "word-end", ARG_NON },
05775 { OP_BEGIN_BUF, "begin-buf", ARG_NON },
05776 { OP_END_BUF, "end-buf", ARG_NON },
05777 { OP_BEGIN_LINE, "begin-line", ARG_NON },
05778 { OP_END_LINE, "end-line", ARG_NON },
05779 { OP_SEMI_END_BUF, "semi-end-buf", ARG_NON },
05780 { OP_BEGIN_POSITION, "begin-position", ARG_NON },
05781 { OP_BACKREF1, "backref1", ARG_NON },
05782 { OP_BACKREF2, "backref2", ARG_NON },
05783 { OP_BACKREFN, "backrefn", ARG_MEMNUM },
05784 { OP_BACKREFN_IC, "backrefn-ic", ARG_SPECIAL },
05785 { OP_BACKREF_MULTI, "backref_multi", ARG_SPECIAL },
05786 { OP_BACKREF_MULTI_IC, "backref_multi-ic", ARG_SPECIAL },
05787 { OP_BACKREF_WITH_LEVEL, "backref_at_level", ARG_SPECIAL },
05788 { OP_MEMORY_START_PUSH, "mem-start-push", ARG_MEMNUM },
05789 { OP_MEMORY_START, "mem-start", ARG_MEMNUM },
05790 { OP_MEMORY_END_PUSH, "mem-end-push", ARG_MEMNUM },
05791 { OP_MEMORY_END_PUSH_REC, "mem-end-push-rec", ARG_MEMNUM },
05792 { OP_MEMORY_END, "mem-end", ARG_MEMNUM },
05793 { OP_MEMORY_END_REC, "mem-end-rec", ARG_MEMNUM },
05794 { OP_SET_OPTION_PUSH, "set-option-push", ARG_OPTION },
05795 { OP_SET_OPTION, "set-option", ARG_OPTION },
05796 { OP_FAIL, "fail", ARG_NON },
05797 { OP_JUMP, "jump", ARG_RELADDR },
05798 { OP_PUSH, "push", ARG_RELADDR },
05799 { OP_POP, "pop", ARG_NON },
05800 { OP_PUSH_OR_JUMP_EXACT1, "push-or-jump-e1", ARG_SPECIAL },
05801 { OP_PUSH_IF_PEEK_NEXT, "push-if-peek-next", ARG_SPECIAL },
05802 { OP_REPEAT, "repeat", ARG_SPECIAL },
05803 { OP_REPEAT_NG, "repeat-ng", ARG_SPECIAL },
05804 { OP_REPEAT_INC, "repeat-inc", ARG_MEMNUM },
05805 { OP_REPEAT_INC_NG, "repeat-inc-ng", ARG_MEMNUM },
05806 { OP_REPEAT_INC_SG, "repeat-inc-sg", ARG_MEMNUM },
05807 { OP_REPEAT_INC_NG_SG, "repeat-inc-ng-sg", ARG_MEMNUM },
05808 { OP_NULL_CHECK_START, "null-check-start", ARG_MEMNUM },
05809 { OP_NULL_CHECK_END, "null-check-end", ARG_MEMNUM },
05810 { OP_NULL_CHECK_END_MEMST,"null-check-end-memst", ARG_MEMNUM },
05811 { OP_NULL_CHECK_END_MEMST_PUSH,"null-check-end-memst-push", ARG_MEMNUM },
05812 { OP_PUSH_POS, "push-pos", ARG_NON },
05813 { OP_POP_POS, "pop-pos", ARG_NON },
05814 { OP_PUSH_POS_NOT, "push-pos-not", ARG_RELADDR },
05815 { OP_FAIL_POS, "fail-pos", ARG_NON },
05816 { OP_PUSH_STOP_BT, "push-stop-bt", ARG_NON },
05817 { OP_POP_STOP_BT, "pop-stop-bt", ARG_NON },
05818 { OP_LOOK_BEHIND, "look-behind", ARG_SPECIAL },
05819 { OP_PUSH_LOOK_BEHIND_NOT, "push-look-behind-not", ARG_SPECIAL },
05820 { OP_FAIL_LOOK_BEHIND_NOT, "fail-look-behind-not", ARG_NON },
05821 { OP_CALL, "call", ARG_ABSADDR },
05822 { OP_RETURN, "return", ARG_NON },
05823 { OP_STATE_CHECK_PUSH, "state-check-push", ARG_SPECIAL },
05824 { OP_STATE_CHECK_PUSH_OR_JUMP, "state-check-push-or-jump", ARG_SPECIAL },
05825 { OP_STATE_CHECK, "state-check", ARG_STATE_CHECK },
05826 { OP_STATE_CHECK_ANYCHAR_STAR, "state-check-anychar*", ARG_STATE_CHECK },
05827 { OP_STATE_CHECK_ANYCHAR_ML_STAR,
05828 "state-check-anychar-ml*", ARG_STATE_CHECK },
05829 { -1, "", ARG_NON }
05830 };
05831
05832 static const char*
05833 op2name(int opcode)
05834 {
05835 int i;
05836
05837 for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
05838 if (opcode == OnigOpInfo[i].opcode)
05839 return OnigOpInfo[i].name;
05840 }
05841 return "";
05842 }
05843
05844 static int
05845 op2arg_type(int opcode)
05846 {
05847 int i;
05848
05849 for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
05850 if (opcode == OnigOpInfo[i].opcode)
05851 return OnigOpInfo[i].arg_type;
05852 }
05853 return ARG_SPECIAL;
05854 }
05855
05856 static void
05857 Indent(FILE* f, int indent)
05858 {
05859 int i;
05860 for (i = 0; i < indent; i++) putc(' ', f);
05861 }
05862
05863 static void
05864 p_string(FILE* f, int len, UChar* s)
05865 {
05866 fputs(":", f);
05867 while (len-- > 0) { fputc(*s++, f); }
05868 }
05869
05870 static void
05871 p_len_string(FILE* f, LengthType len, int mb_len, UChar* s)
05872 {
05873 int x = len * mb_len;
05874
05875 fprintf(f, ":%d:", len);
05876 while (x-- > 0) { fputc(*s++, f); }
05877 }
05878
05879 extern void
05880 onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar* bpend, UChar** nextp,
05881 OnigEncoding enc)
05882 {
05883 int i, n, arg_type;
05884 RelAddrType addr;
05885 LengthType len;
05886 MemNumType mem;
05887 StateCheckNumType scn;
05888 OnigCodePoint code;
05889 UChar *q;
05890
05891 fprintf(f, "[%s", op2name(*bp));
05892 arg_type = op2arg_type(*bp);
05893 if (arg_type != ARG_SPECIAL) {
05894 bp++;
05895 switch (arg_type) {
05896 case ARG_NON:
05897 break;
05898 case ARG_RELADDR:
05899 GET_RELADDR_INC(addr, bp);
05900 fprintf(f, ":(%d)", addr);
05901 break;
05902 case ARG_ABSADDR:
05903 GET_ABSADDR_INC(addr, bp);
05904 fprintf(f, ":(%d)", addr);
05905 break;
05906 case ARG_LENGTH:
05907 GET_LENGTH_INC(len, bp);
05908 fprintf(f, ":%d", len);
05909 break;
05910 case ARG_MEMNUM:
05911 mem = *((MemNumType* )bp);
05912 bp += SIZE_MEMNUM;
05913 fprintf(f, ":%d", mem);
05914 break;
05915 case ARG_OPTION:
05916 {
05917 OnigOptionType option = *((OnigOptionType* )bp);
05918 bp += SIZE_OPTION;
05919 fprintf(f, ":%d", option);
05920 }
05921 break;
05922
05923 case ARG_STATE_CHECK:
05924 scn = *((StateCheckNumType* )bp);
05925 bp += SIZE_STATE_CHECK_NUM;
05926 fprintf(f, ":%d", scn);
05927 break;
05928 }
05929 }
05930 else {
05931 switch (*bp++) {
05932 case OP_EXACT1:
05933 case OP_ANYCHAR_STAR_PEEK_NEXT:
05934 case OP_ANYCHAR_ML_STAR_PEEK_NEXT:
05935 p_string(f, 1, bp++); break;
05936 case OP_EXACT2:
05937 p_string(f, 2, bp); bp += 2; break;
05938 case OP_EXACT3:
05939 p_string(f, 3, bp); bp += 3; break;
05940 case OP_EXACT4:
05941 p_string(f, 4, bp); bp += 4; break;
05942 case OP_EXACT5:
05943 p_string(f, 5, bp); bp += 5; break;
05944 case OP_EXACTN:
05945 GET_LENGTH_INC(len, bp);
05946 p_len_string(f, len, 1, bp);
05947 bp += len;
05948 break;
05949
05950 case OP_EXACTMB2N1:
05951 p_string(f, 2, bp); bp += 2; break;
05952 case OP_EXACTMB2N2:
05953 p_string(f, 4, bp); bp += 4; break;
05954 case OP_EXACTMB2N3:
05955 p_string(f, 6, bp); bp += 6; break;
05956 case OP_EXACTMB2N:
05957 GET_LENGTH_INC(len, bp);
05958 p_len_string(f, len, 2, bp);
05959 bp += len * 2;
05960 break;
05961 case OP_EXACTMB3N:
05962 GET_LENGTH_INC(len, bp);
05963 p_len_string(f, len, 3, bp);
05964 bp += len * 3;
05965 break;
05966 case OP_EXACTMBN:
05967 {
05968 int mb_len;
05969
05970 GET_LENGTH_INC(mb_len, bp);
05971 GET_LENGTH_INC(len, bp);
05972 fprintf(f, ":%d:%d:", mb_len, len);
05973 n = len * mb_len;
05974 while (n-- > 0) { fputc(*bp++, f); }
05975 }
05976 break;
05977
05978 case OP_EXACT1_IC:
05979 len = enclen(enc, bp, bpend);
05980 p_string(f, len, bp);
05981 bp += len;
05982 break;
05983 case OP_EXACTN_IC:
05984 GET_LENGTH_INC(len, bp);
05985 p_len_string(f, len, 1, bp);
05986 bp += len;
05987 break;
05988
05989 case OP_CCLASS:
05990 n = bitset_on_num((BitSetRef )bp);
05991 bp += SIZE_BITSET;
05992 fprintf(f, ":%d", n);
05993 break;
05994
05995 case OP_CCLASS_NOT:
05996 n = bitset_on_num((BitSetRef )bp);
05997 bp += SIZE_BITSET;
05998 fprintf(f, ":%d", n);
05999 break;
06000
06001 case OP_CCLASS_MB:
06002 case OP_CCLASS_MB_NOT:
06003 GET_LENGTH_INC(len, bp);
06004 q = bp;
06005 #ifndef PLATFORM_UNALIGNED_WORD_ACCESS
06006 ALIGNMENT_RIGHT(q);
06007 #endif
06008 GET_CODE_POINT(code, q);
06009 bp += len;
06010 fprintf(f, ":%d:%d", (int )code, len);
06011 break;
06012
06013 case OP_CCLASS_MIX:
06014 case OP_CCLASS_MIX_NOT:
06015 n = bitset_on_num((BitSetRef )bp);
06016 bp += SIZE_BITSET;
06017 GET_LENGTH_INC(len, bp);
06018 q = bp;
06019 #ifndef PLATFORM_UNALIGNED_WORD_ACCESS
06020 ALIGNMENT_RIGHT(q);
06021 #endif
06022 GET_CODE_POINT(code, q);
06023 bp += len;
06024 fprintf(f, ":%d:%d:%d", n, (int )code, len);
06025 break;
06026
06027 case OP_CCLASS_NODE:
06028 {
06029 CClassNode *cc;
06030
06031 GET_POINTER_INC(cc, bp);
06032 n = bitset_on_num(cc->bs);
06033 fprintf(f, ":%"PRIuPTR":%d", (uintptr_t)cc, n);
06034 }
06035 break;
06036
06037 case OP_BACKREFN_IC:
06038 mem = *((MemNumType* )bp);
06039 bp += SIZE_MEMNUM;
06040 fprintf(f, ":%d", mem);
06041 break;
06042
06043 case OP_BACKREF_MULTI_IC:
06044 case OP_BACKREF_MULTI:
06045 fputs(" ", f);
06046 GET_LENGTH_INC(len, bp);
06047 for (i = 0; i < len; i++) {
06048 GET_MEMNUM_INC(mem, bp);
06049 if (i > 0) fputs(", ", f);
06050 fprintf(f, "%d", mem);
06051 }
06052 break;
06053
06054 case OP_BACKREF_WITH_LEVEL:
06055 {
06056 OnigOptionType option;
06057 LengthType level;
06058
06059 GET_OPTION_INC(option, bp);
06060 fprintf(f, ":%d", option);
06061 GET_LENGTH_INC(level, bp);
06062 fprintf(f, ":%d", level);
06063
06064 fputs(" ", f);
06065 GET_LENGTH_INC(len, bp);
06066 for (i = 0; i < len; i++) {
06067 GET_MEMNUM_INC(mem, bp);
06068 if (i > 0) fputs(", ", f);
06069 fprintf(f, "%d", mem);
06070 }
06071 }
06072 break;
06073
06074 case OP_REPEAT:
06075 case OP_REPEAT_NG:
06076 {
06077 mem = *((MemNumType* )bp);
06078 bp += SIZE_MEMNUM;
06079 addr = *((RelAddrType* )bp);
06080 bp += SIZE_RELADDR;
06081 fprintf(f, ":%d:%d", mem, addr);
06082 }
06083 break;
06084
06085 case OP_PUSH_OR_JUMP_EXACT1:
06086 case OP_PUSH_IF_PEEK_NEXT:
06087 addr = *((RelAddrType* )bp);
06088 bp += SIZE_RELADDR;
06089 fprintf(f, ":(%d)", addr);
06090 p_string(f, 1, bp);
06091 bp += 1;
06092 break;
06093
06094 case OP_LOOK_BEHIND:
06095 GET_LENGTH_INC(len, bp);
06096 fprintf(f, ":%d", len);
06097 break;
06098
06099 case OP_PUSH_LOOK_BEHIND_NOT:
06100 GET_RELADDR_INC(addr, bp);
06101 GET_LENGTH_INC(len, bp);
06102 fprintf(f, ":%d:(%d)", len, addr);
06103 break;
06104
06105 case OP_STATE_CHECK_PUSH:
06106 case OP_STATE_CHECK_PUSH_OR_JUMP:
06107 scn = *((StateCheckNumType* )bp);
06108 bp += SIZE_STATE_CHECK_NUM;
06109 addr = *((RelAddrType* )bp);
06110 bp += SIZE_RELADDR;
06111 fprintf(f, ":%d:(%d)", scn, addr);
06112 break;
06113
06114 default:
06115 fprintf(stderr, "onig_print_compiled_byte_code: undefined code %d\n",
06116 *--bp);
06117 }
06118 }
06119 fputs("]", f);
06120 if (nextp) *nextp = bp;
06121 }
06122
06123 static void
06124 print_compiled_byte_code_list(FILE* f, regex_t* reg)
06125 {
06126 int ncode;
06127 UChar* bp = reg->p;
06128 UChar* end = reg->p + reg->used;
06129
06130 fprintf(f, "code length: %d\n", reg->used);
06131
06132 ncode = -1;
06133 while (bp < end) {
06134 ncode++;
06135 if (bp > reg->p) {
06136 if (ncode % 5 == 0)
06137 fprintf(f, "\n%ld:", bp-reg->p);
06138 else
06139 fprintf(f, " %ld:", bp-reg->p);
06140 }
06141 onig_print_compiled_byte_code(f, bp, end, &bp, reg->enc);
06142 }
06143
06144 fprintf(f, "\n");
06145 }
06146
06147 static void
06148 print_indent_tree(FILE* f, Node* node, int indent)
06149 {
06150 int i, type, container_p = 0;
06151 int add = 3;
06152 UChar* p;
06153
06154 Indent(f, indent);
06155 if (IS_NULL(node)) {
06156 fprintf(f, "ERROR: null node!!!\n");
06157 exit (0);
06158 }
06159
06160 type = NTYPE(node);
06161 switch (type) {
06162 case NT_LIST:
06163 case NT_ALT:
06164 if (NTYPE(node) == NT_LIST)
06165 fprintf(f, "<list:%"PRIxPTR">\n", (intptr_t)node);
06166 else
06167 fprintf(f, "<alt:%"PRIxPTR">\n", (intptr_t)node);
06168
06169 print_indent_tree(f, NCAR(node), indent + add);
06170 while (IS_NOT_NULL(node = NCDR(node))) {
06171 if (NTYPE(node) != type) {
06172 fprintf(f, "ERROR: list/alt right is not a cons. %d\n", NTYPE(node));
06173 exit(0);
06174 }
06175 print_indent_tree(f, NCAR(node), indent + add);
06176 }
06177 break;
06178
06179 case NT_STR:
06180 fprintf(f, "<string%s:%"PRIxPTR">",
06181 (NSTRING_IS_RAW(node) ? "-raw" : ""), (intptr_t)node);
06182 for (p = NSTR(node)->s; p < NSTR(node)->end; p++) {
06183 if (*p >= 0x20 && *p < 0x7f)
06184 fputc(*p, f);
06185 else {
06186 fprintf(f, " 0x%02x", *p);
06187 }
06188 }
06189 break;
06190
06191 case NT_CCLASS:
06192 fprintf(f, "<cclass:%"PRIxPTR">", (intptr_t)node);
06193 if (IS_NCCLASS_NOT(NCCLASS(node))) fputs(" not", f);
06194 if (NCCLASS(node)->mbuf) {
06195 BBuf* bbuf = NCCLASS(node)->mbuf;
06196 for (i = 0; i < (int)bbuf->used; i++) {
06197 if (i > 0) fprintf(f, ",");
06198 fprintf(f, "%0x", bbuf->p[i]);
06199 }
06200 }
06201 break;
06202
06203 case NT_CTYPE:
06204 fprintf(f, "<ctype:%"PRIxPTR"> ", (intptr_t)node);
06205 switch (NCTYPE(node)->ctype) {
06206 case ONIGENC_CTYPE_WORD:
06207 if (NCTYPE(node)->not != 0)
06208 fputs("not word", f);
06209 else
06210 fputs("word", f);
06211 break;
06212
06213 default:
06214 fprintf(f, "ERROR: undefined ctype.\n");
06215 exit(0);
06216 }
06217 break;
06218
06219 case NT_CANY:
06220 fprintf(f, "<anychar:%"PRIxPTR">", (intptr_t)node);
06221 break;
06222
06223 case NT_ANCHOR:
06224 fprintf(f, "<anchor:%"PRIxPTR"> ", (intptr_t)node);
06225 switch (NANCHOR(node)->type) {
06226 case ANCHOR_BEGIN_BUF: fputs("begin buf", f); break;
06227 case ANCHOR_END_BUF: fputs("end buf", f); break;
06228 case ANCHOR_BEGIN_LINE: fputs("begin line", f); break;
06229 case ANCHOR_END_LINE: fputs("end line", f); break;
06230 case ANCHOR_SEMI_END_BUF: fputs("semi end buf", f); break;
06231 case ANCHOR_BEGIN_POSITION: fputs("begin position", f); break;
06232
06233 case ANCHOR_WORD_BOUND: fputs("word bound", f); break;
06234 case ANCHOR_NOT_WORD_BOUND: fputs("not word bound", f); break;
06235 #ifdef USE_WORD_BEGIN_END
06236 case ANCHOR_WORD_BEGIN: fputs("word begin", f); break;
06237 case ANCHOR_WORD_END: fputs("word end", f); break;
06238 #endif
06239 case ANCHOR_PREC_READ: fputs("prec read", f); container_p = TRUE; break;
06240 case ANCHOR_PREC_READ_NOT: fputs("prec read not", f); container_p = TRUE; break;
06241 case ANCHOR_LOOK_BEHIND: fputs("look_behind", f); container_p = TRUE; break;
06242 case ANCHOR_LOOK_BEHIND_NOT: fputs("look_behind_not",f); container_p = TRUE; break;
06243
06244 default:
06245 fprintf(f, "ERROR: undefined anchor type.\n");
06246 break;
06247 }
06248 break;
06249
06250 case NT_BREF:
06251 {
06252 int* p;
06253 BRefNode* br = NBREF(node);
06254 p = BACKREFS_P(br);
06255 fprintf(f, "<backref:%"PRIxPTR">", (intptr_t)node);
06256 for (i = 0; i < br->back_num; i++) {
06257 if (i > 0) fputs(", ", f);
06258 fprintf(f, "%d", p[i]);
06259 }
06260 }
06261 break;
06262
06263 #ifdef USE_SUBEXP_CALL
06264 case NT_CALL:
06265 {
06266 CallNode* cn = NCALL(node);
06267 fprintf(f, "<call:%"PRIxPTR">", (intptr_t)node);
06268 p_string(f, cn->name_end - cn->name, cn->name);
06269 }
06270 break;
06271 #endif
06272
06273 case NT_QTFR:
06274 fprintf(f, "<quantifier:%"PRIxPTR">{%d,%d}%s\n", (intptr_t)node,
06275 NQTFR(node)->lower, NQTFR(node)->upper,
06276 (NQTFR(node)->greedy ? "" : "?"));
06277 print_indent_tree(f, NQTFR(node)->target, indent + add);
06278 break;
06279
06280 case NT_ENCLOSE:
06281 fprintf(f, "<enclose:%"PRIxPTR"> ", (intptr_t)node);
06282 switch (NENCLOSE(node)->type) {
06283 case ENCLOSE_OPTION:
06284 fprintf(f, "option:%d\n", NENCLOSE(node)->option);
06285 print_indent_tree(f, NENCLOSE(node)->target, indent + add);
06286 break;
06287 case ENCLOSE_MEMORY:
06288 fprintf(f, "memory:%d", NENCLOSE(node)->regnum);
06289 break;
06290 case ENCLOSE_STOP_BACKTRACK:
06291 fprintf(f, "stop-bt");
06292 break;
06293
06294 default:
06295 break;
06296 }
06297 fprintf(f, "\n");
06298 print_indent_tree(f, NENCLOSE(node)->target, indent + add);
06299 break;
06300
06301 default:
06302 fprintf(f, "print_indent_tree: undefined node type %d\n", NTYPE(node));
06303 break;
06304 }
06305
06306 if (type != NT_LIST && type != NT_ALT && type != NT_QTFR &&
06307 type != NT_ENCLOSE)
06308 fprintf(f, "\n");
06309
06310 if (container_p) print_indent_tree(f, NANCHOR(node)->target, indent + add);
06311
06312 fflush(f);
06313 }
06314 #endif
06315
06316 #ifdef ONIG_DEBUG_PARSE_TREE
06317 static void
06318 print_tree(FILE* f, Node* node)
06319 {
06320 print_indent_tree(f, node, 0);
06321 }
06322 #endif
06323