33OnigCaseFoldType OnigDefaultCaseFoldFlag = ONIGENC_CASE_FOLD_MIN;
35extern OnigCaseFoldType
36onig_get_default_case_fold_flag(
void)
38 return OnigDefaultCaseFoldFlag;
42onig_set_default_case_fold_flag(OnigCaseFoldType case_fold_flag)
44 OnigDefaultCaseFoldFlag = case_fold_flag;
49#ifndef PLATFORM_UNALIGNED_WORD_ACCESS
50static unsigned char PadBuf[WORD_ALIGNMENT_SIZE];
55str_dup(UChar* s, UChar* end)
57 ptrdiff_t
len = end - s;
71swap_node(Node* a, Node* b)
74 c = *a; *a = *b; *b = c;
76 if (NTYPE(a) == NT_STR) {
79 size_t len = sn->end - sn->s;
81 sn->end = sn->s +
len;
85 if (NTYPE(b) == NT_STR) {
88 size_t len = sn->end - sn->s;
90 sn->end = sn->s +
len;
96distance_add(OnigDistance d1, OnigDistance d2)
98 if (d1 == ONIG_INFINITE_DISTANCE || d2 == ONIG_INFINITE_DISTANCE)
99 return ONIG_INFINITE_DISTANCE;
101 if (d1 <= ONIG_INFINITE_DISTANCE - d2)
return d1 + d2;
102 else return ONIG_INFINITE_DISTANCE;
107distance_multiply(OnigDistance d,
int m)
109 if (m == 0)
return 0;
111 if (d < ONIG_INFINITE_DISTANCE / m)
114 return ONIG_INFINITE_DISTANCE;
118bitset_is_empty(BitSetRef bs)
121 for (i = 0; i < BITSET_SIZE; i++) {
122 if (bs[i] != 0)
return 0;
129bitset_on_num(BitSetRef bs)
134 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
135 if (BITSET_AT(bs, i)) n++;
143onig_reg_resize(regex_t *reg)
151 else if (reg->alloc > reg->used) {
152 unsigned char *new_ptr =
xrealloc(reg->p, reg->used);
155 reg->alloc = reg->used;
159 }
while ((reg = reg->chain) != 0);
163onig_bbuf_init(BBuf* buf, OnigDistance size)
170 buf->p = (UChar* )
xmalloc(size);
171 if (IS_NULL(buf->p))
return(ONIGERR_MEMORY);
174 buf->alloc = (
unsigned int )size;
180#ifdef USE_SUBEXP_CALL
188 CHECK_NULL_RETURN_MEMERR(p);
190 uslist->alloc = size;
207 if (uslist->num >= uslist->alloc) {
208 size = uslist->alloc * 2;
210 CHECK_NULL_RETURN_MEMERR(p);
211 uslist->alloc = size;
215 uslist->us[uslist->num].offset = offset;
216 uslist->us[uslist->num].target = node;
224add_opcode(regex_t* reg,
int opcode)
226 BBUF_ADD1(reg, opcode);
230#ifdef USE_COMBINATION_EXPLOSION_CHECK
232add_state_check_num(regex_t* reg,
int num)
234 StateCheckNumType n = (StateCheckNumType )num;
236 BBUF_ADD(reg, &n, SIZE_STATE_CHECK_NUM);
242add_rel_addr(regex_t* reg,
int addr)
244 RelAddrType ra = (RelAddrType )addr;
246 BBUF_ADD(reg, &ra, SIZE_RELADDR);
251add_abs_addr(regex_t* reg,
int addr)
253 AbsAddrType ra = (AbsAddrType )addr;
255 BBUF_ADD(reg, &ra, SIZE_ABSADDR);
260add_length(regex_t* reg, OnigDistance
len)
262 LengthType l = (LengthType )
len;
264 BBUF_ADD(reg, &l, SIZE_LENGTH);
269add_mem_num(regex_t* reg,
int num)
271 MemNumType n = (MemNumType )num;
273 BBUF_ADD(reg, &n, SIZE_MEMNUM);
279add_pointer(regex_t* reg,
void* addr)
281 PointerType ptr = (PointerType )addr;
283 BBUF_ADD(reg, &ptr, SIZE_POINTER);
289add_option(regex_t* reg, OnigOptionType option)
291 BBUF_ADD(reg, &option, SIZE_OPTION);
296add_opcode_rel_addr(regex_t* reg,
int opcode,
int addr)
300 r = add_opcode(reg, opcode);
302 r = add_rel_addr(reg, addr);
307add_bytes(regex_t* reg, UChar* bytes, OnigDistance
len)
309 BBUF_ADD(reg, bytes,
len);
314add_bitset(regex_t* reg, BitSetRef bs)
316 BBUF_ADD(reg, bs, SIZE_BITSET);
321add_opcode_option(regex_t* reg,
int opcode, OnigOptionType option)
325 r = add_opcode(reg, opcode);
327 r = add_option(reg, option);
331static int compile_length_tree(Node* node, regex_t* reg);
332static int compile_tree(Node* node, regex_t* reg);
335#define IS_NEED_STR_LEN_OP_EXACT(op) \
336 ((op) == OP_EXACTN || (op) == OP_EXACTMB2N ||\
337 (op) == OP_EXACTMB3N || (op) == OP_EXACTMBN || (op) == OP_EXACTN_IC)
340select_str_opcode(
int mb_len, OnigDistance byte_len,
int ignore_case)
343 OnigDistance str_len = roomof(byte_len, mb_len);
347 case 1: op = OP_EXACT1_IC;
break;
348 default: op = OP_EXACTN_IC;
break;
355 case 1: op = OP_EXACT1;
break;
356 case 2: op = OP_EXACT2;
break;
357 case 3: op = OP_EXACT3;
break;
358 case 4: op = OP_EXACT4;
break;
359 case 5: op = OP_EXACT5;
break;
360 default: op = OP_EXACTN;
break;
366 case 1: op = OP_EXACTMB2N1;
break;
367 case 2: op = OP_EXACTMB2N2;
break;
368 case 3: op = OP_EXACTMB2N3;
break;
369 default: op = OP_EXACTMB2N;
break;
386compile_tree_empty_check(Node* node, regex_t* reg,
int empty_info)
389 int saved_num_null_check = reg->num_null_check;
391 if (empty_info != 0) {
392 r = add_opcode(reg, OP_NULL_CHECK_START);
394 r = add_mem_num(reg, reg->num_null_check);
396 reg->num_null_check++;
399 r = compile_tree(node, reg);
402 if (empty_info != 0) {
403 if (empty_info == NQ_TARGET_IS_EMPTY)
404 r = add_opcode(reg, OP_NULL_CHECK_END);
405 else if (empty_info == NQ_TARGET_IS_EMPTY_MEM)
406 r = add_opcode(reg, OP_NULL_CHECK_END_MEMST);
407 else if (empty_info == NQ_TARGET_IS_EMPTY_REC)
408 r = add_opcode(reg, OP_NULL_CHECK_END_MEMST_PUSH);
411 r = add_mem_num(reg, saved_num_null_check);
416#ifdef USE_SUBEXP_CALL
418compile_call(
CallNode* node, regex_t* reg)
422 r = add_opcode(reg, OP_CALL);
424 r = unset_addr_list_add(node->unset_addr_list, BBUF_GET_OFFSET_POS(reg),
427 r = add_abs_addr(reg, 0 );
433compile_tree_n_times(Node* node,
int n, regex_t* reg)
437 for (i = 0; i < n; i++) {
438 r = compile_tree(node, reg);
445add_compile_string_length(UChar* s ARG_UNUSED,
int mb_len, OnigDistance byte_len,
446 regex_t* reg ARG_UNUSED,
int ignore_case)
449 int op = select_str_opcode(mb_len, byte_len, ignore_case);
453 if (op == OP_EXACTMBN)
len += SIZE_LENGTH;
454 if (IS_NEED_STR_LEN_OP_EXACT(op))
457 len += (int )byte_len;
462add_compile_string(UChar* s,
int mb_len, OnigDistance byte_len,
463 regex_t* reg,
int ignore_case)
465 int op = select_str_opcode(mb_len, byte_len, ignore_case);
468 if (op == OP_EXACTMBN)
469 add_length(reg, mb_len);
471 if (IS_NEED_STR_LEN_OP_EXACT(op)) {
472 if (op == OP_EXACTN_IC)
473 add_length(reg, byte_len);
475 add_length(reg, byte_len / mb_len);
478 add_bytes(reg, s, byte_len);
484compile_length_string_node(Node* node, regex_t* reg)
486 int rlen, r,
len, prev_len, blen, ambig;
487 OnigEncoding enc = reg->enc;
492 if (sn->end <= sn->s)
495 ambig = NSTRING_IS_AMBIG(node);
498 prev_len = enclen(enc, p, sn->end);
503 for (; p < sn->end; ) {
504 len = enclen(enc, p, sn->end);
505 if (
len == prev_len || ambig) {
509 r = add_compile_string_length(prev, prev_len, blen, reg, ambig);
517 r = add_compile_string_length(prev, prev_len, blen, reg, ambig);
523compile_length_string_raw_node(
StrNode* sn, regex_t* reg)
525 if (sn->end <= sn->s)
528 return add_compile_string_length(sn->s, 1 , sn->end - sn->s, reg, 0);
532compile_string_node(Node* node, regex_t* reg)
534 int r,
len, prev_len, blen, ambig;
535 OnigEncoding enc = reg->enc;
536 UChar *p, *prev, *end;
540 if (sn->end <= sn->s)
544 ambig = NSTRING_IS_AMBIG(node);
547 prev_len = enclen(enc, p, end);
552 len = enclen(enc, p, end);
553 if (
len == prev_len || ambig) {
557 r = add_compile_string(prev, prev_len, blen, reg, ambig);
567 return add_compile_string(prev, prev_len, blen, reg, ambig);
571compile_string_raw_node(
StrNode* sn, regex_t* reg)
573 if (sn->end <= sn->s)
576 return add_compile_string(sn->s, 1 , sn->end - sn->s, reg, 0);
580add_multi_byte_cclass(BBuf* mbuf, regex_t* reg)
582#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
583 add_length(reg, mbuf->used);
584 return add_bytes(reg, mbuf->p, mbuf->used);
587 UChar* p = BBUF_GET_ADD_ADDRESS(reg) + SIZE_LENGTH;
589 GET_ALIGNMENT_PAD_SIZE(p, pad_size);
590 add_length(reg, mbuf->used + (WORD_ALIGNMENT_SIZE - 1));
591 if (pad_size != 0) add_bytes(reg, PadBuf, pad_size);
593 r = add_bytes(reg, mbuf->p, mbuf->used);
596 pad_size = (WORD_ALIGNMENT_SIZE - 1) - pad_size;
597 if (pad_size != 0) add_bytes(reg, PadBuf, pad_size);
603compile_length_cclass_node(
CClassNode* cc, regex_t* reg)
607 if (IS_NULL(cc->mbuf)) {
608 len = SIZE_OPCODE + SIZE_BITSET;
611 if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {
615 len = SIZE_OPCODE + SIZE_BITSET;
617#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
618 len += SIZE_LENGTH + cc->mbuf->used;
620 len += SIZE_LENGTH + cc->mbuf->used + (WORD_ALIGNMENT_SIZE - 1);
628compile_cclass_node(
CClassNode* cc, regex_t* reg)
632 if (IS_NULL(cc->mbuf)) {
633 if (IS_NCCLASS_NOT(cc))
634 add_opcode(reg, OP_CCLASS_NOT);
636 add_opcode(reg, OP_CCLASS);
638 r = add_bitset(reg, cc->bs);
641 if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {
642 if (IS_NCCLASS_NOT(cc))
643 add_opcode(reg, OP_CCLASS_MB_NOT);
645 add_opcode(reg, OP_CCLASS_MB);
647 r = add_multi_byte_cclass(cc->mbuf, reg);
650 if (IS_NCCLASS_NOT(cc))
651 add_opcode(reg, OP_CCLASS_MIX_NOT);
653 add_opcode(reg, OP_CCLASS_MIX);
655 r = add_bitset(reg, cc->bs);
657 r = add_multi_byte_cclass(cc->mbuf, reg);
665entry_repeat_range(regex_t* reg,
int id,
int lower,
int upper)
667#define REPEAT_RANGE_ALLOC 4
671 if (reg->repeat_range_alloc == 0) {
673 CHECK_NULL_RETURN_MEMERR(p);
674 reg->repeat_range = p;
675 reg->repeat_range_alloc = REPEAT_RANGE_ALLOC;
677 else if (reg->repeat_range_alloc <=
id) {
679 n = reg->repeat_range_alloc + REPEAT_RANGE_ALLOC;
682 CHECK_NULL_RETURN_MEMERR(p);
683 reg->repeat_range = p;
684 reg->repeat_range_alloc = n;
687 p = reg->repeat_range;
691 p[id].upper = (IS_REPEAT_INFINITE(upper) ? 0x7fffffff : upper);
696compile_range_repeat_node(
QtfrNode* qn,
int target_len,
int empty_info,
700 int num_repeat = reg->num_repeat;
702 r = add_opcode(reg, qn->greedy ? OP_REPEAT : OP_REPEAT_NG);
704 r = add_mem_num(reg, num_repeat);
707 r = add_rel_addr(reg, target_len + SIZE_OP_REPEAT_INC);
710 r = entry_repeat_range(reg, num_repeat, qn->lower, qn->upper);
713 r = compile_tree_empty_check(qn->target, reg, empty_info);
717#ifdef USE_SUBEXP_CALL
720 IS_QUANTIFIER_IN_REPEAT(qn)) {
721 r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC_SG : OP_REPEAT_INC_NG_SG);
724 r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC : OP_REPEAT_INC_NG);
727 r = add_mem_num(reg, num_repeat);
732is_anychar_star_quantifier(
QtfrNode* qn)
734 if (qn->greedy && IS_REPEAT_INFINITE(qn->upper) &&
735 NTYPE(qn->target) == NT_CANY)
741#define QUANTIFIER_EXPAND_LIMIT_SIZE 50
742#define CKN_ON (ckn > 0)
744#ifdef USE_COMBINATION_EXPLOSION_CHECK
747compile_length_quantifier_node(
QtfrNode* qn, regex_t* reg)
749 int len, mod_tlen, cklen;
751 int infinite = IS_REPEAT_INFINITE(qn->upper);
752 int empty_info = qn->target_empty_info;
753 int tlen = compile_length_tree(qn->target, reg);
755 if (tlen < 0)
return tlen;
757 ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0);
759 cklen = (CKN_ON ? SIZE_STATE_CHECK_NUM: 0);
762 if (NTYPE(qn->target) == NT_CANY) {
763 if (qn->greedy && infinite) {
764 if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON)
765 return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower + cklen;
767 return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower + cklen;
772 mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
776 if (infinite && qn->lower <= 1) {
783 len += SIZE_OP_PUSH + cklen + mod_tlen + SIZE_OP_JUMP;
791 len += mod_tlen + SIZE_OP_PUSH + cklen;
794 else if (qn->upper == 0) {
795 if (qn->is_referred != 0)
796 len = SIZE_OP_JUMP + tlen;
800 else if (qn->upper == 1 && qn->greedy) {
801 if (qn->lower == 0) {
803 len = SIZE_OP_STATE_CHECK_PUSH + tlen;
806 len = SIZE_OP_PUSH + tlen;
813 else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) {
814 len = SIZE_OP_PUSH + cklen + SIZE_OP_JUMP + tlen;
817 len = SIZE_OP_REPEAT_INC
818 + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM;
820 len += SIZE_OP_STATE_CHECK;
827compile_quantifier_node(
QtfrNode* qn, regex_t* reg)
831 int infinite = IS_REPEAT_INFINITE(qn->upper);
832 int empty_info = qn->target_empty_info;
833 int tlen = compile_length_tree(qn->target, reg);
835 if (tlen < 0)
return tlen;
837 ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0);
839 if (is_anychar_star_quantifier(qn)) {
840 r = compile_tree_n_times(qn->target, qn->lower, reg);
842 if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON) {
843 if (IS_MULTILINE(reg->options))
844 r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT);
846 r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT);
849 r = add_state_check_num(reg, ckn);
853 return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
856 if (IS_MULTILINE(reg->options)) {
857 r = add_opcode(reg, (CKN_ON ?
858 OP_STATE_CHECK_ANYCHAR_ML_STAR
859 : OP_ANYCHAR_ML_STAR));
862 r = add_opcode(reg, (CKN_ON ?
863 OP_STATE_CHECK_ANYCHAR_STAR
868 r = add_state_check_num(reg, ckn);
875 mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
879 if (infinite && qn->lower <= 1) {
881 if (qn->lower == 1) {
882 r = add_opcode_rel_addr(reg, OP_JUMP,
883 (CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH));
888 r = add_opcode(reg, OP_STATE_CHECK_PUSH);
890 r = add_state_check_num(reg, ckn);
892 r = add_rel_addr(reg, mod_tlen + SIZE_OP_JUMP);
895 r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);
898 r = compile_tree_empty_check(qn->target, reg, empty_info);
900 r = add_opcode_rel_addr(reg, OP_JUMP,
901 -(mod_tlen + (
int )SIZE_OP_JUMP
902 + (
int )(CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH)));
905 if (qn->lower == 0) {
906 r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen);
909 r = compile_tree_empty_check(qn->target, reg, empty_info);
912 r = add_opcode(reg, OP_STATE_CHECK_PUSH_OR_JUMP);
914 r = add_state_check_num(reg, ckn);
916 r = add_rel_addr(reg,
917 -(mod_tlen + (
int )SIZE_OP_STATE_CHECK_PUSH_OR_JUMP));
920 r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (
int )SIZE_OP_PUSH));
923 else if (qn->upper == 0) {
924 if (qn->is_referred != 0) {
925 r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
927 r = compile_tree(qn->target, reg);
932 else if (qn->upper == 1 && qn->greedy) {
933 if (qn->lower == 0) {
935 r = add_opcode(reg, OP_STATE_CHECK_PUSH);
937 r = add_state_check_num(reg, ckn);
939 r = add_rel_addr(reg, tlen);
942 r = add_opcode_rel_addr(reg, OP_PUSH, tlen);
947 r = compile_tree(qn->target, reg);
949 else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) {
951 r = add_opcode(reg, OP_STATE_CHECK_PUSH);
953 r = add_state_check_num(reg, ckn);
955 r = add_rel_addr(reg, SIZE_OP_JUMP);
958 r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP);
962 r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
964 r = compile_tree(qn->target, reg);
967 r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg);
970 r = add_opcode(reg, OP_STATE_CHECK);
972 r = add_state_check_num(reg, ckn);
981compile_length_quantifier_node(
QtfrNode* qn, regex_t* reg)
984 int infinite = IS_REPEAT_INFINITE(qn->upper);
985 int empty_info = qn->target_empty_info;
986 int tlen = compile_length_tree(qn->target, reg);
988 if (tlen < 0)
return tlen;
991 if (NTYPE(qn->target) == NT_CANY) {
992 if (qn->greedy && infinite) {
993 if (IS_NOT_NULL(qn->next_head_exact))
994 return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower;
996 return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower;
1000 if (empty_info != 0)
1001 mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
1006 (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
1007 if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
1011 len = tlen * qn->lower;
1015#ifdef USE_OP_PUSH_OR_JUMP_EXACT
1016 if (IS_NOT_NULL(qn->head_exact))
1017 len += SIZE_OP_PUSH_OR_JUMP_EXACT1 + mod_tlen + SIZE_OP_JUMP;
1020 if (IS_NOT_NULL(qn->next_head_exact))
1021 len += SIZE_OP_PUSH_IF_PEEK_NEXT + mod_tlen + SIZE_OP_JUMP;
1023 len += SIZE_OP_PUSH + mod_tlen + SIZE_OP_JUMP;
1026 len += SIZE_OP_JUMP + mod_tlen + SIZE_OP_PUSH;
1028 else if (qn->upper == 0 && qn->is_referred != 0) {
1029 len = SIZE_OP_JUMP + tlen;
1031 else if (!infinite && qn->greedy &&
1032 (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper
1033 <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
1034 len = tlen * qn->lower;
1035 len += (SIZE_OP_PUSH + tlen) * (qn->upper - qn->lower);
1037 else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) {
1038 len = SIZE_OP_PUSH + SIZE_OP_JUMP + tlen;
1041 len = SIZE_OP_REPEAT_INC
1042 + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM;
1049compile_quantifier_node(
QtfrNode* qn, regex_t* reg)
1052 int infinite = IS_REPEAT_INFINITE(qn->upper);
1053 int empty_info = qn->target_empty_info;
1054 int tlen = compile_length_tree(qn->target, reg);
1056 if (tlen < 0)
return tlen;
1058 if (is_anychar_star_quantifier(qn)) {
1059 r = compile_tree_n_times(qn->target, qn->lower, reg);
1061 if (IS_NOT_NULL(qn->next_head_exact)) {
1062 if (IS_MULTILINE(reg->options))
1063 r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT);
1065 r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT);
1067 return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
1070 if (IS_MULTILINE(reg->options))
1071 return add_opcode(reg, OP_ANYCHAR_ML_STAR);
1073 return add_opcode(reg, OP_ANYCHAR_STAR);
1077 if (empty_info != 0)
1078 mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
1083 (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
1084 if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
1086#ifdef USE_OP_PUSH_OR_JUMP_EXACT
1087 if (IS_NOT_NULL(qn->head_exact))
1088 r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_OR_JUMP_EXACT1);
1091 if (IS_NOT_NULL(qn->next_head_exact))
1092 r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_IF_PEEK_NEXT);
1094 r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH);
1097 r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_JUMP);
1102 r = compile_tree_n_times(qn->target, qn->lower, reg);
1107#ifdef USE_OP_PUSH_OR_JUMP_EXACT
1108 if (IS_NOT_NULL(qn->head_exact)) {
1109 r = add_opcode_rel_addr(reg, OP_PUSH_OR_JUMP_EXACT1,
1110 mod_tlen + SIZE_OP_JUMP);
1112 add_bytes(reg, NSTR(qn->head_exact)->s, 1);
1113 r = compile_tree_empty_check(qn->target, reg, empty_info);
1115 r = add_opcode_rel_addr(reg, OP_JUMP,
1116 -(mod_tlen + (
int )SIZE_OP_JUMP + (
int )SIZE_OP_PUSH_OR_JUMP_EXACT1));
1120 if (IS_NOT_NULL(qn->next_head_exact)) {
1121 r = add_opcode_rel_addr(reg, OP_PUSH_IF_PEEK_NEXT,
1122 mod_tlen + SIZE_OP_JUMP);
1124 add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
1125 r = compile_tree_empty_check(qn->target, reg, empty_info);
1127 r = add_opcode_rel_addr(reg, OP_JUMP,
1128 -(mod_tlen + (
int )SIZE_OP_JUMP + (
int )SIZE_OP_PUSH_IF_PEEK_NEXT));
1131 r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);
1133 r = compile_tree_empty_check(qn->target, reg, empty_info);
1135 r = add_opcode_rel_addr(reg, OP_JUMP,
1136 -(mod_tlen + (
int )SIZE_OP_JUMP + (
int )SIZE_OP_PUSH));
1140 r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen);
1142 r = compile_tree_empty_check(qn->target, reg, empty_info);
1144 r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (
int )SIZE_OP_PUSH));
1147 else if (qn->upper == 0 && qn->is_referred != 0) {
1148 r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
1150 r = compile_tree(qn->target, reg);
1152 else if (!infinite && qn->greedy &&
1153 (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper
1154 <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
1155 int n = qn->upper - qn->lower;
1157 r = compile_tree_n_times(qn->target, qn->lower, reg);
1160 for (i = 0; i < n; i++) {
1161 r = add_opcode_rel_addr(reg, OP_PUSH,
1162 (n - i) * tlen + (n - i - 1) * SIZE_OP_PUSH);
1164 r = compile_tree(qn->target, reg);
1168 else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) {
1169 r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP);
1171 r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
1173 r = compile_tree(qn->target, reg);
1176 r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg);
1183compile_length_option_node(
EncloseNode* node, regex_t* reg)
1186 OnigOptionType prev = reg->options;
1188 reg->options = node->option;
1189 tlen = compile_length_tree(node->target, reg);
1190 reg->options = prev;
1192 if (tlen < 0)
return tlen;
1194 if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
1195 return SIZE_OP_SET_OPTION_PUSH + SIZE_OP_SET_OPTION + SIZE_OP_FAIL
1196 + tlen + SIZE_OP_SET_OPTION;
1203compile_option_node(
EncloseNode* node, regex_t* reg)
1206 OnigOptionType prev = reg->options;
1208 if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
1209 r = add_opcode_option(reg, OP_SET_OPTION_PUSH, node->option);
1211 r = add_opcode_option(reg, OP_SET_OPTION, prev);
1213 r = add_opcode(reg, OP_FAIL);
1217 reg->options = node->option;
1218 r = compile_tree(node->target, reg);
1219 reg->options = prev;
1221 if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
1223 r = add_opcode_option(reg, OP_SET_OPTION, prev);
1229compile_length_enclose_node(
EncloseNode* node, regex_t* reg)
1234 if (node->type == ENCLOSE_OPTION)
1235 return compile_length_option_node(node, reg);
1238 tlen = compile_length_tree(node->target, reg);
1239 if (tlen < 0)
return tlen;
1244 switch (node->type) {
1245 case ENCLOSE_MEMORY:
1246#ifdef USE_SUBEXP_CALL
1247 if (IS_ENCLOSE_CALLED(node)) {
1248 len = SIZE_OP_MEMORY_START_PUSH + tlen
1249 + SIZE_OP_CALL + SIZE_OP_JUMP + SIZE_OP_RETURN;
1250 if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1251 len += (IS_ENCLOSE_RECURSION(node)
1252 ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);
1254 len += (IS_ENCLOSE_RECURSION(node)
1255 ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);
1257 else if (IS_ENCLOSE_RECURSION(node)) {
1258 len = SIZE_OP_MEMORY_START_PUSH;
1259 len += tlen + (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)
1260 ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_REC);
1265 if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum))
1266 len = SIZE_OP_MEMORY_START_PUSH;
1268 len = SIZE_OP_MEMORY_START;
1270 len += tlen + (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)
1271 ? SIZE_OP_MEMORY_END_PUSH : SIZE_OP_MEMORY_END);
1275 case ENCLOSE_STOP_BACKTRACK:
1279#ifndef USE_MATCH_CACHE
1280 if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node)) {
1281 QtfrNode* qn = NQTFR(node->target);
1282 tlen = compile_length_tree(qn->target, reg);
1283 if (tlen < 0)
return tlen;
1285 len = tlen * qn->lower
1286 + SIZE_OP_PUSH + tlen + SIZE_OP_POP + SIZE_OP_JUMP;
1290 len = SIZE_OP_PUSH_STOP_BT + tlen + SIZE_OP_POP_STOP_BT;
1291#ifndef USE_MATCH_CACHE
1296 case ENCLOSE_CONDITION:
1297 len = SIZE_OP_CONDITION;
1298 if (NTYPE(node->target) == NT_ALT) {
1299 Node* x = node->target;
1301 tlen = compile_length_tree(NCAR(x), reg);
1302 if (tlen < 0)
return tlen;
1303 len += tlen + SIZE_OP_JUMP;
1304 if (NCDR(x) == NULL)
return ONIGERR_PARSER_BUG;
1306 tlen = compile_length_tree(NCAR(x), reg);
1307 if (tlen < 0)
return tlen;
1309 if (NCDR(x) != NULL)
return ONIGERR_INVALID_CONDITION_PATTERN;
1312 return ONIGERR_PARSER_BUG;
1316 case ENCLOSE_ABSENT:
1317 len = SIZE_OP_PUSH_ABSENT_POS + SIZE_OP_ABSENT + tlen + SIZE_OP_ABSENT_END;
1321 return ONIGERR_TYPE_BUG;
1328static int get_char_length_tree(Node* node, regex_t* reg,
int*
len);
1331compile_enclose_node(
EncloseNode* node, regex_t* reg)
1335 if (node->type == ENCLOSE_OPTION)
1336 return compile_option_node(node, reg);
1338 switch (node->type) {
1339 case ENCLOSE_MEMORY:
1340#ifdef USE_SUBEXP_CALL
1341 if (IS_ENCLOSE_CALLED(node)) {
1342 r = add_opcode(reg, OP_CALL);
1344 node->call_addr = BBUF_GET_OFFSET_POS(reg) + SIZE_ABSADDR + SIZE_OP_JUMP;
1345 node->state |= NST_ADDR_FIXED;
1346 r = add_abs_addr(reg, (
int )node->call_addr);
1348 len = compile_length_tree(node->target, reg);
1349 len += (SIZE_OP_MEMORY_START_PUSH + SIZE_OP_RETURN);
1350 if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1351 len += (IS_ENCLOSE_RECURSION(node)
1352 ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);
1354 len += (IS_ENCLOSE_RECURSION(node)
1355 ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);
1357 r = add_opcode_rel_addr(reg, OP_JUMP,
len);
1361 if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum))
1362 r = add_opcode(reg, OP_MEMORY_START_PUSH);
1364 r = add_opcode(reg, OP_MEMORY_START);
1366 r = add_mem_num(reg, node->regnum);
1368 r = compile_tree(node->target, reg);
1370#ifdef USE_SUBEXP_CALL
1371 if (IS_ENCLOSE_CALLED(node)) {
1372 if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1373 r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node)
1374 ? OP_MEMORY_END_PUSH_REC : OP_MEMORY_END_PUSH));
1376 r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node)
1377 ? OP_MEMORY_END_REC : OP_MEMORY_END));
1380 r = add_mem_num(reg, node->regnum);
1382 r = add_opcode(reg, OP_RETURN);
1384 else if (IS_ENCLOSE_RECURSION(node)) {
1385 if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1386 r = add_opcode(reg, OP_MEMORY_END_PUSH_REC);
1388 r = add_opcode(reg, OP_MEMORY_END_REC);
1390 r = add_mem_num(reg, node->regnum);
1395 if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1396 r = add_opcode(reg, OP_MEMORY_END_PUSH);
1398 r = add_opcode(reg, OP_MEMORY_END);
1400 r = add_mem_num(reg, node->regnum);
1404 case ENCLOSE_STOP_BACKTRACK:
1408#ifndef USE_MATCH_CACHE
1409 if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node)) {
1410 QtfrNode* qn = NQTFR(node->target);
1411 r = compile_tree_n_times(qn->target, qn->lower, reg);
1414 len = compile_length_tree(qn->target, reg);
1417 r = add_opcode_rel_addr(reg, OP_PUSH,
len + SIZE_OP_POP + SIZE_OP_JUMP);
1419 r = compile_tree(qn->target, reg);
1421 r = add_opcode(reg, OP_POP);
1423 r = add_opcode_rel_addr(reg, OP_JUMP,
1424 -((
int )SIZE_OP_PUSH +
len + (
int )SIZE_OP_POP + (
int )SIZE_OP_JUMP));
1428 r = add_opcode(reg, OP_PUSH_STOP_BT);
1430 r = compile_tree(node->target, reg);
1432 r = add_opcode(reg, OP_POP_STOP_BT);
1433#ifndef USE_MATCH_CACHE
1438 case ENCLOSE_CONDITION:
1439 r = add_opcode(reg, OP_CONDITION);
1441 r = add_mem_num(reg, node->regnum);
1444 if (NTYPE(node->target) == NT_ALT) {
1445 Node* x = node->target;
1448 len = compile_length_tree(NCAR(x), reg);
1450 if (NCDR(x) == NULL)
return ONIGERR_PARSER_BUG;
1452 len2 = compile_length_tree(NCAR(x), reg);
1453 if (len2 < 0)
return len2;
1454 if (NCDR(x) != NULL)
return ONIGERR_INVALID_CONDITION_PATTERN;
1457 r = add_rel_addr(reg,
len + SIZE_OP_JUMP);
1459 r = compile_tree(NCAR(x), reg);
1461 r = add_opcode_rel_addr(reg, OP_JUMP, len2);
1464 r = compile_tree(NCAR(x), reg);
1467 return ONIGERR_PARSER_BUG;
1471 case ENCLOSE_ABSENT:
1472 len = compile_length_tree(node->target, reg);
1475 r = add_opcode(reg, OP_PUSH_ABSENT_POS);
1477 r = add_opcode_rel_addr(reg, OP_ABSENT,
len + SIZE_OP_ABSENT_END);
1479 r = compile_tree(node->target, reg);
1481 r = add_opcode(reg, OP_ABSENT_END);
1485 return ONIGERR_TYPE_BUG;
1493compile_length_anchor_node(
AnchorNode* node, regex_t* reg)
1499 tlen = compile_length_tree(node->target, reg);
1500 if (tlen < 0)
return tlen;
1503 switch (node->type) {
1504 case ANCHOR_PREC_READ:
1505 len = SIZE_OP_PUSH_POS + tlen + SIZE_OP_POP_POS;
1507 case ANCHOR_PREC_READ_NOT:
1508 len = SIZE_OP_PUSH_POS_NOT + tlen + SIZE_OP_FAIL_POS;
1510 case ANCHOR_LOOK_BEHIND:
1511 len = SIZE_OP_LOOK_BEHIND + tlen;
1513 case ANCHOR_LOOK_BEHIND_NOT:
1514 len = SIZE_OP_PUSH_LOOK_BEHIND_NOT + tlen + SIZE_OP_FAIL_LOOK_BEHIND_NOT;
1526compile_anchor_node(
AnchorNode* node, regex_t* reg)
1530 switch (node->type) {
1531 case ANCHOR_BEGIN_BUF: r = add_opcode(reg, OP_BEGIN_BUF);
break;
1532 case ANCHOR_END_BUF: r = add_opcode(reg, OP_END_BUF);
break;
1533 case ANCHOR_BEGIN_LINE: r = add_opcode(reg, OP_BEGIN_LINE);
break;
1534 case ANCHOR_END_LINE: r = add_opcode(reg, OP_END_LINE);
break;
1535 case ANCHOR_SEMI_END_BUF: r = add_opcode(reg, OP_SEMI_END_BUF);
break;
1536 case ANCHOR_BEGIN_POSITION: r = add_opcode(reg, OP_BEGIN_POSITION);
break;
1538 case ANCHOR_WORD_BOUND:
1539 if (node->ascii_range) r = add_opcode(reg, OP_ASCII_WORD_BOUND);
1540 else r = add_opcode(reg, OP_WORD_BOUND);
1542 case ANCHOR_NOT_WORD_BOUND:
1543 if (node->ascii_range) r = add_opcode(reg, OP_NOT_ASCII_WORD_BOUND);
1544 else r = add_opcode(reg, OP_NOT_WORD_BOUND);
1546#ifdef USE_WORD_BEGIN_END
1547 case ANCHOR_WORD_BEGIN:
1548 if (node->ascii_range) r = add_opcode(reg, OP_ASCII_WORD_BEGIN);
1549 else r = add_opcode(reg, OP_WORD_BEGIN);
1551 case ANCHOR_WORD_END:
1552 if (node->ascii_range) r = add_opcode(reg, OP_ASCII_WORD_END);
1553 else r = add_opcode(reg, OP_WORD_END);
1556 case ANCHOR_KEEP: r = add_opcode(reg, OP_KEEP);
break;
1558 case ANCHOR_PREC_READ:
1559 r = add_opcode(reg, OP_PUSH_POS);
1561 r = compile_tree(node->target, reg);
1563 r = add_opcode(reg, OP_POP_POS);
1566 case ANCHOR_PREC_READ_NOT:
1567 len = compile_length_tree(node->target, reg);
1569 r = add_opcode_rel_addr(reg, OP_PUSH_POS_NOT,
len + SIZE_OP_FAIL_POS);
1571 r = compile_tree(node->target, reg);
1573 r = add_opcode(reg, OP_FAIL_POS);
1576 case ANCHOR_LOOK_BEHIND:
1579 r = add_opcode(reg, OP_LOOK_BEHIND);
1581 if (node->char_len < 0) {
1582 r = get_char_length_tree(node->target, reg, &n);
1583 if (r)
return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
1587 r = add_length(reg, n);
1589 r = compile_tree(node->target, reg);
1593 case ANCHOR_LOOK_BEHIND_NOT:
1596 len = compile_length_tree(node->target, reg);
1597 r = add_opcode_rel_addr(reg, OP_PUSH_LOOK_BEHIND_NOT,
1598 len + SIZE_OP_FAIL_LOOK_BEHIND_NOT);
1600 if (node->char_len < 0) {
1601 r = get_char_length_tree(node->target, reg, &n);
1602 if (r)
return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
1606 r = add_length(reg, n);
1608 r = compile_tree(node->target, reg);
1610 r = add_opcode(reg, OP_FAIL_LOOK_BEHIND_NOT);
1615 return ONIGERR_TYPE_BUG;
1623compile_length_tree(Node* node, regex_t* reg)
1632 r = compile_length_tree(NCAR(node), reg);
1633 if (r < 0)
return r;
1635 }
while (IS_NOT_NULL(node = NCDR(node)));
1644 r = compile_length_tree(NCAR(node), reg);
1645 if (r < 0)
return r;
1648 }
while (IS_NOT_NULL(node = NCDR(node)));
1650 r += (SIZE_OP_PUSH + SIZE_OP_JUMP) * (n - 1);
1655 if (NSTRING_IS_RAW(node))
1656 r = compile_length_string_raw_node(NSTR(node), reg);
1658 r = compile_length_string_node(node, reg);
1662 r = compile_length_cclass_node(NCCLASS(node), reg);
1674#ifdef USE_BACKREF_WITH_LEVEL
1675 if (IS_BACKREF_NEST_LEVEL(br)) {
1676 r = SIZE_OPCODE + SIZE_OPTION + SIZE_LENGTH +
1677 SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
1681 if (br->back_num == 1) {
1682 r = ((!IS_IGNORECASE(reg->options) && br->back_static[0] <= 2)
1683 ? SIZE_OPCODE : (SIZE_OPCODE + SIZE_MEMNUM));
1686 r = SIZE_OPCODE + SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
1691#ifdef USE_SUBEXP_CALL
1698 r = compile_length_quantifier_node(NQTFR(node), reg);
1702 r = compile_length_enclose_node(NENCLOSE(node), reg);
1706 r = compile_length_anchor_node(NANCHOR(node), reg);
1710 return ONIGERR_TYPE_BUG;
1718compile_tree(Node* node, regex_t* reg)
1726 r = compile_tree(NCAR(node), reg);
1727 }
while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
1735 len += compile_length_tree(NCAR(x), reg);
1736 if (NCDR(x) != NULL) {
1737 len += SIZE_OP_PUSH + SIZE_OP_JUMP;
1739 }
while (IS_NOT_NULL(x = NCDR(x)));
1740 pos = reg->used +
len;
1743 len = compile_length_tree(NCAR(node), reg);
1744 if (IS_NOT_NULL(NCDR(node))) {
1745 r = add_opcode_rel_addr(reg, OP_PUSH,
len + SIZE_OP_JUMP);
1748 r = compile_tree(NCAR(node), reg);
1750 if (IS_NOT_NULL(NCDR(node))) {
1751 len = pos - (reg->used + SIZE_OP_JUMP);
1752 r = add_opcode_rel_addr(reg, OP_JUMP,
len);
1755 }
while (IS_NOT_NULL(node = NCDR(node)));
1760 if (NSTRING_IS_RAW(node))
1761 r = compile_string_raw_node(NSTR(node), reg);
1763 r = compile_string_node(node, reg);
1767 r = compile_cclass_node(NCCLASS(node), reg);
1774 switch (NCTYPE(node)->ctype) {
1775 case ONIGENC_CTYPE_WORD:
1776 if (NCTYPE(node)->ascii_range != 0) {
1777 if (NCTYPE(node)->not != 0) op = OP_NOT_ASCII_WORD;
1778 else op = OP_ASCII_WORD;
1781 if (NCTYPE(node)->not != 0) op = OP_NOT_WORD;
1786 return ONIGERR_TYPE_BUG;
1789 r = add_opcode(reg, op);
1794 if (IS_MULTILINE(reg->options))
1795 r = add_opcode(reg, OP_ANYCHAR_ML);
1797 r = add_opcode(reg, OP_ANYCHAR);
1804#ifdef USE_BACKREF_WITH_LEVEL
1805 if (IS_BACKREF_NEST_LEVEL(br)) {
1806 r = add_opcode(reg, OP_BACKREF_WITH_LEVEL);
1808 r = add_option(reg, (reg->options & ONIG_OPTION_IGNORECASE));
1810 r = add_length(reg, br->nest_level);
1813 goto add_bacref_mems;
1817 if (br->back_num == 1) {
1818 n = br->back_static[0];
1819 if (IS_IGNORECASE(reg->options)) {
1820 r = add_opcode(reg, OP_BACKREFN_IC);
1822 r = add_mem_num(reg, n);
1826 case 1: r = add_opcode(reg, OP_BACKREF1);
break;
1827 case 2: r = add_opcode(reg, OP_BACKREF2);
break;
1829 r = add_opcode(reg, OP_BACKREFN);
1831 r = add_mem_num(reg, n);
1840 if (IS_IGNORECASE(reg->options)) {
1841 r = add_opcode(reg, OP_BACKREF_MULTI_IC);
1844 r = add_opcode(reg, OP_BACKREF_MULTI);
1848#ifdef USE_BACKREF_WITH_LEVEL
1851 r = add_length(reg, br->back_num);
1854 for (i = br->back_num - 1; i >= 0; i--) {
1855 r = add_mem_num(reg, p[i]);
1862#ifdef USE_SUBEXP_CALL
1864 r = compile_call(NCALL(node), reg);
1869 r = compile_quantifier_node(NQTFR(node), reg);
1873 r = compile_enclose_node(NENCLOSE(node), reg);
1877 r = compile_anchor_node(NANCHOR(node), reg);
1882 fprintf(stderr,
"compile_tree: undefined node type %d\n", NTYPE(node));
1890#ifdef USE_NAMED_GROUP
1893noname_disable_map(Node** plink,
GroupNumRemap* map,
int* counter)
1896 Node* node = *plink;
1898 switch (NTYPE(node)) {
1902 r = noname_disable_map(&(NCAR(node)), map, counter);
1903 }
while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
1908 Node** ptarget = &(NQTFR(node)->target);
1909 Node* old = *ptarget;
1910 r = noname_disable_map(ptarget, map, counter);
1911 if (*ptarget != old && NTYPE(*ptarget) == NT_QTFR) {
1912 onig_reduce_nested_quantifier(node, *ptarget);
1920 if (en->type == ENCLOSE_MEMORY) {
1921 if (IS_ENCLOSE_NAMED_GROUP(en)) {
1923 map[en->regnum].new_val = *counter;
1924 en->regnum = *counter;
1926 else if (en->regnum != 0) {
1927 *plink = en->target;
1928 en->target = NULL_NODE;
1929 onig_node_free(node);
1930 r = noname_disable_map(plink, map, counter);
1934 r = noname_disable_map(&(en->target), map, counter);
1939 if (NANCHOR(node)->target)
1940 r = noname_disable_map(&(NANCHOR(node)->target), map, counter);
1951renumber_node_backref(Node* node,
GroupNumRemap* map,
const int num_mem)
1953 int i, pos, n, old_num;
1957 if (! IS_BACKREF_NAME_REF(bn))
1958 return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
1960 old_num = bn->back_num;
1961 if (IS_NULL(bn->back_dynamic))
1962 backs = bn->back_static;
1964 backs = bn->back_dynamic;
1966 for (i = 0, pos = 0; i < old_num; i++) {
1967 if (backs[i] > num_mem)
return ONIGERR_INVALID_BACKREF;
1968 n = map[backs[i]].new_val;
1980renumber_by_map(Node* node,
GroupNumRemap* map,
const int num_mem)
1984 switch (NTYPE(node)) {
1988 r = renumber_by_map(NCAR(node), map, num_mem);
1989 }
while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
1992 r = renumber_by_map(NQTFR(node)->target, map, num_mem);
1997 if (en->type == ENCLOSE_CONDITION) {
1998 if (en->regnum > num_mem)
return ONIGERR_INVALID_BACKREF;
1999 en->regnum = map[en->regnum].new_val;
2001 r = renumber_by_map(en->target, map, num_mem);
2006 r = renumber_node_backref(node, map, num_mem);
2010 if (NANCHOR(node)->target)
2011 r = renumber_by_map(NANCHOR(node)->target, map, num_mem);
2022numbered_ref_check(Node* node)
2026 switch (NTYPE(node)) {
2030 r = numbered_ref_check(NCAR(node));
2031 }
while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2034 r = numbered_ref_check(NQTFR(node)->target);
2037 r = numbered_ref_check(NENCLOSE(node)->target);
2041 if (! IS_BACKREF_NAME_REF(NBREF(node)))
2042 return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
2046 if (NANCHOR(node)->target)
2047 r = numbered_ref_check(NANCHOR(node)->target);
2058disable_noname_group_capture(Node** root, regex_t* reg,
ScanEnv* env)
2060 int r, i, pos, counter;
2065 CHECK_NULL_RETURN_MEMERR(map);
2066 for (i = 1; i <= env->num_mem; i++) {
2070 r = noname_disable_map(root, map, &counter);
2071 if (r != 0)
return r;
2073 r = renumber_by_map(*root, map, env->num_mem);
2074 if (r != 0)
return r;
2076 for (i = 1, pos = 1; i <= env->num_mem; i++) {
2077 if (map[i].new_val > 0) {
2078 SCANENV_MEM_NODES(env)[pos] = SCANENV_MEM_NODES(env)[i];
2083 loc = env->capture_history;
2084 BIT_STATUS_CLEAR(env->capture_history);
2085 for (i = 1; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {
2086 if (BIT_STATUS_AT(loc, i)) {
2087 BIT_STATUS_ON_AT_SIMPLE(env->capture_history, map[i].new_val);
2091 env->num_mem = env->num_named;
2092 reg->num_mem = env->num_named;
2094 return onig_renumber_name_table(reg, map);
2098#ifdef USE_SUBEXP_CALL
2106 for (i = 0; i < uslist->num; i++) {
2107 en = NENCLOSE(uslist->us[i].target);
2108 if (! IS_ENCLOSE_ADDR_FIXED(en))
return ONIGERR_PARSER_BUG;
2109 addr = en->call_addr;
2110 offset = uslist->us[i].offset;
2112 BBUF_WRITE(reg, offset, &addr, SIZE_ABSADDR);
2118#ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
2120quantifiers_memory_node_info(Node* node)
2124 switch (NTYPE(node)) {
2130 v = quantifiers_memory_node_info(NCAR(node));
2132 }
while (v >= 0 && IS_NOT_NULL(node = NCDR(node)));
2136# ifdef USE_SUBEXP_CALL
2138 if (IS_CALL_RECURSION(NCALL(node))) {
2139 return NQ_TARGET_IS_EMPTY_REC;
2142 r = quantifiers_memory_node_info(NCALL(node)->target);
2149 if (qn->upper != 0) {
2150 r = quantifiers_memory_node_info(qn->target);
2159 case ENCLOSE_MEMORY:
2160 return NQ_TARGET_IS_EMPTY_MEM;
2163 case ENCLOSE_OPTION:
2164 case ENCLOSE_STOP_BACKTRACK:
2165 case ENCLOSE_CONDITION:
2166 case ENCLOSE_ABSENT:
2167 r = quantifiers_memory_node_info(en->target);
2190get_min_match_length(Node* node, OnigDistance *min,
ScanEnv* env)
2196 switch (NTYPE(node)) {
2201 Node** nodes = SCANENV_MEM_NODES(env);
2203 if (br->state & NST_RECURSION)
break;
2205 backs = BACKREFS_P(br);
2206 if (backs[0] > env->num_mem)
return ONIGERR_INVALID_BACKREF;
2207 r = get_min_match_length(nodes[backs[0]], min, env);
2209 for (i = 1; i < br->back_num; i++) {
2210 if (backs[i] > env->num_mem)
return ONIGERR_INVALID_BACKREF;
2211 r = get_min_match_length(nodes[backs[i]], &tmin, env);
2213 if (*min > tmin) *min = tmin;
2218#ifdef USE_SUBEXP_CALL
2220 if (IS_CALL_RECURSION(NCALL(node))) {
2222 if (IS_ENCLOSE_MIN_FIXED(en))
2226 r = get_min_match_length(NCALL(node)->target, min, env);
2232 r = get_min_match_length(NCAR(node), &tmin, env);
2233 if (r == 0) *min += tmin;
2234 }
while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2243 r = get_min_match_length(x, &tmin, env);
2245 if (y == node) *min = tmin;
2246 else if (*min > tmin) *min = tmin;
2247 }
while (r == 0 && IS_NOT_NULL(y = NCDR(y)));
2254 *min = sn->end - sn->s;
2271 if (qn->lower > 0) {
2272 r = get_min_match_length(qn->target, min, env);
2274 *min = distance_multiply(*min, qn->lower);
2283 case ENCLOSE_MEMORY:
2284 if (IS_ENCLOSE_MIN_FIXED(en))
2287 if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
2290 SET_ENCLOSE_STATUS(node, NST_MARK1);
2291 r = get_min_match_length(en->target, min, env);
2292 CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
2295 SET_ENCLOSE_STATUS(node, NST_MIN_FIXED);
2301 case ENCLOSE_OPTION:
2302 case ENCLOSE_STOP_BACKTRACK:
2303 case ENCLOSE_CONDITION:
2304 r = get_min_match_length(en->target, min, env);
2307 case ENCLOSE_ABSENT:
2322get_max_match_length(Node* node, OnigDistance *max,
ScanEnv* env)
2328 switch (NTYPE(node)) {
2331 r = get_max_match_length(NCAR(node), &tmax, env);
2333 *max = distance_add(*max, tmax);
2334 }
while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2339 r = get_max_match_length(NCAR(node), &tmax, env);
2340 if (r == 0 && *max < tmax) *max = tmax;
2341 }
while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2347 *max = sn->end - sn->s;
2352 *max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
2357 *max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
2364 Node** nodes = SCANENV_MEM_NODES(env);
2366 if (br->state & NST_RECURSION) {
2367 *max = ONIG_INFINITE_DISTANCE;
2370 backs = BACKREFS_P(br);
2371 for (i = 0; i < br->back_num; i++) {
2372 if (backs[i] > env->num_mem)
return ONIGERR_INVALID_BACKREF;
2373 r = get_max_match_length(nodes[backs[i]], &tmax, env);
2375 if (*max < tmax) *max = tmax;
2380#ifdef USE_SUBEXP_CALL
2382 if (! IS_CALL_RECURSION(NCALL(node)))
2383 r = get_max_match_length(NCALL(node)->target, max, env);
2385 *max = ONIG_INFINITE_DISTANCE;
2393 if (qn->upper != 0) {
2394 r = get_max_match_length(qn->target, max, env);
2395 if (r == 0 && *max != 0) {
2396 if (! IS_REPEAT_INFINITE(qn->upper))
2397 *max = distance_multiply(*max, qn->upper);
2399 *max = ONIG_INFINITE_DISTANCE;
2409 case ENCLOSE_MEMORY:
2410 if (IS_ENCLOSE_MAX_FIXED(en))
2413 if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
2414 *max = ONIG_INFINITE_DISTANCE;
2416 SET_ENCLOSE_STATUS(node, NST_MARK1);
2417 r = get_max_match_length(en->target, max, env);
2418 CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
2421 SET_ENCLOSE_STATUS(node, NST_MAX_FIXED);
2427 case ENCLOSE_OPTION:
2428 case ENCLOSE_STOP_BACKTRACK:
2429 case ENCLOSE_CONDITION:
2430 r = get_max_match_length(en->target, max, env);
2433 case ENCLOSE_ABSENT:
2447#define GET_CHAR_LEN_VARLEN -1
2448#define GET_CHAR_LEN_TOP_ALT_VARLEN -2
2452get_char_length_tree1(Node* node, regex_t* reg,
int*
len,
int level)
2459 switch (NTYPE(node)) {
2462 r = get_char_length_tree1(NCAR(node), reg, &tlen, level);
2464 *
len = (int )distance_add(*
len, tlen);
2465 }
while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2473 r = get_char_length_tree1(NCAR(node), reg, &tlen, level);
2474 while (r == 0 && IS_NOT_NULL(node = NCDR(node))) {
2475 r = get_char_length_tree1(NCAR(node), reg, &tlen2, level);
2484 r = GET_CHAR_LEN_TOP_ALT_VARLEN;
2486 r = GET_CHAR_LEN_VARLEN;
2498 while (s < sn->end) {
2499 s += enclen(reg->enc, s, sn->end);
2508 if (qn->lower == qn->upper) {
2509 r = get_char_length_tree1(qn->target, reg, &tlen, level);
2511 *
len = (int )distance_multiply(tlen, qn->lower);
2514 r = GET_CHAR_LEN_VARLEN;
2518#ifdef USE_SUBEXP_CALL
2520 if (! IS_CALL_RECURSION(NCALL(node)))
2521 r = get_char_length_tree1(NCALL(node)->target, reg,
len, level);
2523 r = GET_CHAR_LEN_VARLEN;
2540 case ENCLOSE_MEMORY:
2541#ifdef USE_SUBEXP_CALL
2542 if (IS_ENCLOSE_CLEN_FIXED(en))
2543 *
len = en->char_len;
2545 r = get_char_length_tree1(en->target, reg,
len, level);
2547 en->char_len = *
len;
2548 SET_ENCLOSE_STATUS(node, NST_CLEN_FIXED);
2553 case ENCLOSE_OPTION:
2554 case ENCLOSE_STOP_BACKTRACK:
2555 case ENCLOSE_CONDITION:
2556 r = get_char_length_tree1(en->target, reg,
len, level);
2558 case ENCLOSE_ABSENT:
2569 r = GET_CHAR_LEN_VARLEN;
2577get_char_length_tree(Node* node, regex_t* reg,
int*
len)
2579 return get_char_length_tree1(node, reg,
len, 0);
2584is_not_included(Node* x, Node* y, regex_t* reg)
2599 if (NCTYPE(y)->ctype == NCTYPE(x)->ctype &&
2600 NCTYPE(y)->not != NCTYPE(x)->not &&
2601 NCTYPE(y)->ascii_range == NCTYPE(x)->ascii_range)
2611 tmp = x; x = y; y = tmp;
2631 switch (NCTYPE(y)->ctype) {
2632 case ONIGENC_CTYPE_WORD:
2633 if (NCTYPE(y)->not == 0) {
2634 if (IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) {
2635 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
2636 if (BITSET_AT(xc->bs, i)) {
2637 if (NCTYPE(y)->ascii_range) {
2638 if (IS_CODE_SB_WORD(reg->enc, i))
return 0;
2641 if (ONIGENC_IS_CODE_WORD(reg->enc, i))
return 0;
2650 if (IS_NOT_NULL(xc->mbuf))
return 0;
2651 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
2653 if (NCTYPE(y)->ascii_range)
2654 is_word = IS_CODE_SB_WORD(reg->enc, i);
2656 is_word = ONIGENC_IS_CODE_WORD(reg->enc, i);
2658 if (!IS_NCCLASS_NOT(xc)) {
2659 if (BITSET_AT(xc->bs, i))
2663 if (! BITSET_AT(xc->bs, i))
2682 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
2683 v = BITSET_AT(xc->bs, i);
2684 if ((v != 0 && !IS_NCCLASS_NOT(xc)) ||
2685 (v == 0 && IS_NCCLASS_NOT(xc))) {
2686 v = BITSET_AT(yc->bs, i);
2687 if ((v != 0 && !IS_NCCLASS_NOT(yc)) ||
2688 (v == 0 && IS_NCCLASS_NOT(yc)))
2692 if ((IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) ||
2693 (IS_NULL(yc->mbuf) && !IS_NCCLASS_NOT(yc)))
2712 if (NSTRING_LEN(x) == 0)
2717 switch (NCTYPE(y)->ctype) {
2718 case ONIGENC_CTYPE_WORD:
2719 if (NCTYPE(y)->ascii_range) {
2720 if (ONIGENC_IS_MBC_ASCII_WORD(reg->enc, xs->s, xs->end))
2721 return NCTYPE(y)->not;
2723 return !(NCTYPE(y)->not);
2726 if (ONIGENC_IS_MBC_WORD(reg->enc, xs->s, xs->end))
2727 return NCTYPE(y)->not;
2729 return !(NCTYPE(y)->not);
2741 code = ONIGENC_MBC_TO_CODE(reg->enc, xs->s,
2742 xs->s + ONIGENC_MBC_MAXLEN(reg->enc));
2743 return (onig_is_code_in_cc(reg->enc, code, cc) != 0 ? 0 : 1);
2751 len = NSTRING_LEN(x);
2752 if (
len > NSTRING_LEN(y))
len = NSTRING_LEN(y);
2753 if (NSTRING_IS_AMBIG(x) || NSTRING_IS_AMBIG(y)) {
2758 for (i = 0, p = ys->s, q = xs->s; (OnigDistance )i <
len; i++, p++, q++) {
2759 if (*p != *q)
return 1;
2779get_head_value_node(Node* node,
int exact, regex_t* reg)
2781 Node* n = NULL_NODE;
2783 switch (NTYPE(node)) {
2787#ifdef USE_SUBEXP_CALL
2800 n = get_head_value_node(NCAR(node), exact, reg);
2807 if (sn->end <= sn->s)
2811 !NSTRING_IS_RAW(node) && IS_IGNORECASE(reg->options)) {
2822 if (qn->lower > 0) {
2823#ifdef USE_OP_PUSH_OR_JUMP_EXACT
2824 if (IS_NOT_NULL(qn->head_exact))
2828 n = get_head_value_node(qn->target, exact, reg);
2837 case ENCLOSE_OPTION:
2839 OnigOptionType options = reg->options;
2841 reg->options = NENCLOSE(node)->option;
2842 n = get_head_value_node(NENCLOSE(node)->target, exact, reg);
2843 reg->options = options;
2847 case ENCLOSE_MEMORY:
2848 case ENCLOSE_STOP_BACKTRACK:
2849 case ENCLOSE_CONDITION:
2850 n = get_head_value_node(en->target, exact, reg);
2853 case ENCLOSE_ABSENT:
2860 if (NANCHOR(node)->type == ANCHOR_PREC_READ)
2861 n = get_head_value_node(NANCHOR(node)->target, exact, reg);
2872check_type_tree(Node* node,
int type_mask,
int enclose_mask,
int anchor_mask)
2877 if ((NTYPE2BIT(type) & type_mask) == 0)
2884 r = check_type_tree(NCAR(node), type_mask, enclose_mask,
2886 }
while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2890 r = check_type_tree(NQTFR(node)->target, type_mask, enclose_mask,
2897 if ((en->type & enclose_mask) == 0)
2900 r = check_type_tree(en->target, type_mask, enclose_mask, anchor_mask);
2905 type = NANCHOR(node)->type;
2906 if ((type & anchor_mask) == 0)
2909 if (NANCHOR(node)->target)
2910 r = check_type_tree(NANCHOR(node)->target,
2911 type_mask, enclose_mask, anchor_mask);
2920#ifdef USE_SUBEXP_CALL
2922# define RECURSION_EXIST 1
2923# define RECURSION_INFINITE 2
2926subexp_inf_recursive_check(Node* node,
ScanEnv* env,
int head)
2941 ret = subexp_inf_recursive_check(NCAR(x), env, head);
2942 if (ret < 0 || ret == RECURSION_INFINITE)
return ret;
2945 ret = get_min_match_length(NCAR(x), &min, env);
2946 if (ret != 0)
return ret;
2947 if (min != 0) head = 0;
2949 }
while (IS_NOT_NULL(x = NCDR(x)));
2956 r = RECURSION_EXIST;
2958 ret = subexp_inf_recursive_check(NCAR(node), env, head);
2959 if (ret < 0 || ret == RECURSION_INFINITE)
return ret;
2961 }
while (IS_NOT_NULL(node = NCDR(node)));
2966 r = subexp_inf_recursive_check(NQTFR(node)->target, env, head);
2967 if (r == RECURSION_EXIST) {
2968 if (NQTFR(node)->lower == 0) r = 0;
2976 case ANCHOR_PREC_READ:
2977 case ANCHOR_PREC_READ_NOT:
2978 case ANCHOR_LOOK_BEHIND:
2979 case ANCHOR_LOOK_BEHIND_NOT:
2980 r = subexp_inf_recursive_check(an->target, env, head);
2987 r = subexp_inf_recursive_check(NCALL(node)->target, env, head);
2991 if (IS_ENCLOSE_MARK2(NENCLOSE(node)))
2993 else if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
2994 return (head == 0 ? RECURSION_EXIST : RECURSION_INFINITE);
2996 SET_ENCLOSE_STATUS(node, NST_MARK2);
2997 r = subexp_inf_recursive_check(NENCLOSE(node)->target, env, head);
2998 CLEAR_ENCLOSE_STATUS(node, NST_MARK2);
3010subexp_inf_recursive_check_trav(Node* node,
ScanEnv* env)
3020 r = subexp_inf_recursive_check_trav(NCAR(node), env);
3021 }
while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3025 r = subexp_inf_recursive_check_trav(NQTFR(node)->target, env);
3032 case ANCHOR_PREC_READ:
3033 case ANCHOR_PREC_READ_NOT:
3034 case ANCHOR_LOOK_BEHIND:
3035 case ANCHOR_LOOK_BEHIND_NOT:
3036 r = subexp_inf_recursive_check_trav(an->target, env);
3046 if (IS_ENCLOSE_RECURSION(en)) {
3047 SET_ENCLOSE_STATUS(node, NST_MARK1);
3048 r = subexp_inf_recursive_check(en->target, env, 1);
3049 if (r > 0)
return ONIGERR_NEVER_ENDING_RECURSION;
3050 CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
3052 r = subexp_inf_recursive_check_trav(en->target, env);
3065subexp_recursive_check(Node* node)
3069 switch (NTYPE(node)) {
3073 r |= subexp_recursive_check(NCAR(node));
3074 }
while (IS_NOT_NULL(node = NCDR(node)));
3078 r = subexp_recursive_check(NQTFR(node)->target);
3085 case ANCHOR_PREC_READ:
3086 case ANCHOR_PREC_READ_NOT:
3087 case ANCHOR_LOOK_BEHIND:
3088 case ANCHOR_LOOK_BEHIND_NOT:
3089 r = subexp_recursive_check(an->target);
3096 r = subexp_recursive_check(NCALL(node)->target);
3097 if (r != 0) SET_CALL_RECURSION(node);
3101 if (IS_ENCLOSE_MARK2(NENCLOSE(node)))
3103 else if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
3106 SET_ENCLOSE_STATUS(node, NST_MARK2);
3107 r = subexp_recursive_check(NENCLOSE(node)->target);
3108 CLEAR_ENCLOSE_STATUS(node, NST_MARK2);
3121subexp_recursive_check_trav(Node* node,
ScanEnv* env)
3123# define FOUND_CALLED_NODE 1
3135 ret = subexp_recursive_check_trav(NCAR(node), env);
3136 if (ret == FOUND_CALLED_NODE) r = FOUND_CALLED_NODE;
3137 else if (ret < 0)
return ret;
3138 }
while (IS_NOT_NULL(node = NCDR(node)));
3143 r = subexp_recursive_check_trav(NQTFR(node)->target, env);
3144 if (NQTFR(node)->upper == 0) {
3145 if (r == FOUND_CALLED_NODE)
3146 NQTFR(node)->is_referred = 1;
3154 case ANCHOR_PREC_READ:
3155 case ANCHOR_PREC_READ_NOT:
3156 case ANCHOR_LOOK_BEHIND:
3157 case ANCHOR_LOOK_BEHIND_NOT:
3158 r = subexp_recursive_check_trav(an->target, env);
3168 if (! IS_ENCLOSE_RECURSION(en)) {
3169 if (IS_ENCLOSE_CALLED(en)) {
3170 SET_ENCLOSE_STATUS(node, NST_MARK1);
3171 r = subexp_recursive_check(en->target);
3172 if (r != 0) SET_ENCLOSE_STATUS(node, NST_RECURSION);
3173 CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
3176 r = subexp_recursive_check_trav(en->target, env);
3177 if (IS_ENCLOSE_CALLED(en))
3178 r |= FOUND_CALLED_NODE;
3190setup_subexp_call(Node* node,
ScanEnv* env)
3199 r = setup_subexp_call(NCAR(node), env);
3200 }
while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3205 r = setup_subexp_call(NCAR(node), env);
3206 }
while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3210 r = setup_subexp_call(NQTFR(node)->target, env);
3213 r = setup_subexp_call(NENCLOSE(node)->target, env);
3219 Node** nodes = SCANENV_MEM_NODES(env);
3221 if (cn->group_num != 0) {
3222 int gnum = cn->group_num;
3224# ifdef USE_NAMED_GROUP
3225 if (env->num_named > 0 &&
3226 IS_SYNTAX_BV(env->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
3227 !ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_CAPTURE_GROUP)) {
3228 return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
3231 if (gnum > env->num_mem) {
3232 onig_scan_env_set_error_string(env,
3233 ONIGERR_UNDEFINED_GROUP_REFERENCE, cn->name, cn->name_end);
3234 return ONIGERR_UNDEFINED_GROUP_REFERENCE;
3237# ifdef USE_NAMED_GROUP
3240 cn->target = nodes[cn->group_num];
3241 if (IS_NULL(cn->target)) {
3242 onig_scan_env_set_error_string(env,
3243 ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end);
3244 return ONIGERR_UNDEFINED_NAME_REFERENCE;
3246 SET_ENCLOSE_STATUS(cn->target, NST_CALLED);
3247 BIT_STATUS_ON_AT(env->bt_mem_start, cn->group_num);
3248 cn->unset_addr_list = env->unset_addr_list;
3250# ifdef USE_NAMED_GROUP
3251# ifdef USE_PERL_SUBEXP_CALL
3252 else if (cn->name == cn->name_end) {
3259 int n = onig_name_to_group_numbers(env->reg, cn->name, cn->name_end,
3262 onig_scan_env_set_error_string(env,
3263 ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end);
3264 return ONIGERR_UNDEFINED_NAME_REFERENCE;
3267 ! IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME_CALL)) {
3268 onig_scan_env_set_error_string(env,
3269 ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL, cn->name, cn->name_end);
3270 return ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL;
3273 cn->group_num = refs[0];
3286 case ANCHOR_PREC_READ:
3287 case ANCHOR_PREC_READ_NOT:
3288 case ANCHOR_LOOK_BEHIND:
3289 case ANCHOR_LOOK_BEHIND_NOT:
3290 r = setup_subexp_call(an->target, env);
3304#define IN_ALT (1<<0)
3305#define IN_NOT (1<<1)
3306#define IN_REPEAT (1<<2)
3307#define IN_VAR_REPEAT (1<<3)
3308#define IN_CALL (1<<4)
3309#define IN_RECCALL (1<<5)
3310#define IN_LOOK_BEHIND (1<<6)
3317divide_look_behind_alternatives(Node* node)
3319 Node *head, *np, *insert_node;
3321 int anc_type = an->type;
3325 swap_node(node, head);
3327 NANCHOR(head)->target = np;
3330 while ((np = NCDR(np)) != NULL_NODE) {
3331 insert_node = onig_node_new_anchor(anc_type);
3332 CHECK_NULL_RETURN_MEMERR(insert_node);
3333 NANCHOR(insert_node)->target = NCAR(np);
3334 NCAR(np) = insert_node;
3337 if (anc_type == ANCHOR_LOOK_BEHIND_NOT) {
3340 SET_NTYPE(np, NT_LIST);
3341 }
while ((np = NCDR(np)) != NULL_NODE);
3347setup_look_behind(Node* node, regex_t* reg,
ScanEnv* env)
3352 r = get_char_length_tree(an->target, reg, &
len);
3355 else if (r == GET_CHAR_LEN_VARLEN)
3356 r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
3357 else if (r == GET_CHAR_LEN_TOP_ALT_VARLEN) {
3358 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND))
3359 r = divide_look_behind_alternatives(node);
3361 r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
3368next_setup(Node* node, Node* next_node, regex_t* reg)
3374 if (type == NT_QTFR) {
3376 if (qn->greedy && IS_REPEAT_INFINITE(qn->upper)) {
3377#ifdef USE_QTFR_PEEK_NEXT
3378 Node* n = get_head_value_node(next_node, 1, reg);
3380 if (IS_NOT_NULL(n) && NSTR(n)->s[0] !=
'\0') {
3381 qn->next_head_exact = n;
3385 if (qn->lower <= 1) {
3386 int ttype = NTYPE(qn->target);
3387 if (IS_NODE_TYPE_SIMPLE(ttype)) {
3389 x = get_head_value_node(qn->target, 0, reg);
3390 if (IS_NOT_NULL(x)) {
3391 y = get_head_value_node(next_node, 0, reg);
3392 if (IS_NOT_NULL(y) && is_not_included(x, y, reg)) {
3393 Node* en = onig_node_new_enclose(ENCLOSE_STOP_BACKTRACK);
3394 CHECK_NULL_RETURN_MEMERR(en);
3395 SET_ENCLOSE_STATUS(en, NST_STOP_BT_SIMPLE_REPEAT);
3396 swap_node(node, en);
3397 NENCLOSE(node)->target = en;
3404 else if (type == NT_ENCLOSE) {
3406 if (en->type == ENCLOSE_MEMORY && !IS_ENCLOSE_CALLED(en)) {
3416update_string_node_case_fold(regex_t* reg, Node *node)
3418 UChar *p, *end, buf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
3419 UChar *sbuf, *ebuf, *sp;
3421 OnigDistance sbuf_size;
3425 sbuf_size = (end - sn->s) * 2;
3426 sbuf = (UChar* )
xmalloc(sbuf_size);
3427 CHECK_NULL_RETURN_MEMERR(sbuf);
3428 ebuf = sbuf + sbuf_size;
3433 len = ONIGENC_MBC_CASE_FOLD(reg->enc, reg->case_fold_flag, &p, end, buf);
3434 for (i = 0; i <
len; i++) {
3436 UChar* p = (UChar* )
xrealloc(sbuf, sbuf_size * 2);
3439 return ONIGERR_MEMORY;
3442 sp = sbuf + sbuf_size;
3444 ebuf = sbuf + sbuf_size;
3451 r = onig_node_str_set(node, sbuf, sp);
3458expand_case_fold_make_rem_string(Node** rnode, UChar *s, UChar *end,
3464 node = onig_node_new_str(s, end);
3465 if (IS_NULL(node))
return ONIGERR_MEMORY;
3467 r = update_string_node_case_fold(reg, node);
3469 onig_node_free(node);
3473 NSTRING_SET_AMBIG(node);
3474 NSTRING_SET_DONT_GET_OPT_INFO(node);
3485 for (i = 0; i < item_num; i++) {
3486 if (items[i].byte_len != slen) {
3489 if (items[i].code_len != 1) {
3498 UChar *p,
int slen, UChar *end,
3499 regex_t* reg, Node **rnode)
3501 int r, i, j,
len, varlen;
3502 Node *anode, *var_anode, *snode, *xnode, *an;
3503 UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
3505 *rnode = var_anode = NULL_NODE;
3508 for (i = 0; i < item_num; i++) {
3509 if (items[i].byte_len != slen) {
3516 *rnode = var_anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
3517 if (IS_NULL(var_anode))
return ONIGERR_MEMORY;
3519 xnode = onig_node_new_list(NULL, NULL);
3520 if (IS_NULL(xnode))
goto mem_err;
3521 NCAR(var_anode) = xnode;
3523 anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
3524 if (IS_NULL(anode))
goto mem_err;
3525 NCAR(xnode) = anode;
3528 *rnode = anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
3529 if (IS_NULL(anode))
return ONIGERR_MEMORY;
3532 snode = onig_node_new_str(p, p + slen);
3533 if (IS_NULL(snode))
goto mem_err;
3535 NCAR(anode) = snode;
3537 for (i = 0; i < item_num; i++) {
3538 snode = onig_node_new_str(NULL, NULL);
3539 if (IS_NULL(snode))
goto mem_err;
3541 for (j = 0; j < items[i].code_len; j++) {
3542 len = ONIGENC_CODE_TO_MBC(reg->enc, items[i].code[j], buf);
3548 r = onig_node_str_cat(snode, buf, buf +
len);
3549 if (r != 0)
goto mem_err2;
3552 an = onig_node_new_alt(NULL_NODE, NULL_NODE);
3557 if (items[i].byte_len != slen) {
3559 UChar *q = p + items[i].byte_len;
3562 r = expand_case_fold_make_rem_string(&rem, q, end, reg);
3568 xnode = onig_node_list_add(NULL_NODE, snode);
3569 if (IS_NULL(xnode)) {
3571 onig_node_free(rem);
3574 if (IS_NULL(onig_node_list_add(xnode, rem))) {
3576 onig_node_free(xnode);
3577 onig_node_free(rem);
3587 NCDR(var_anode) = an;
3600 onig_node_free(snode);
3603 onig_node_free(*rnode);
3605 return ONIGERR_MEMORY;
3608#define THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION 8
3611expand_case_fold_string(Node* node, regex_t* reg,
int state)
3613 int r, n,
len, alt_num;
3615 int is_in_look_behind;
3616 UChar *start, *end, *p;
3617 Node *top_root, *root, *snode, *prev_node;
3621 if (NSTRING_IS_AMBIG(node))
return 0;
3627 if (start >= end)
return 0;
3629 is_in_look_behind = (state & IN_LOOK_BEHIND) != 0;
3632 top_root = root = prev_node = snode = NULL_NODE;
3636 n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(reg->enc, reg->case_fold_flag,
3643 len = enclen(reg->enc, p, end);
3645 varlen = is_case_fold_variable_len(n, items,
len);
3646 if (n == 0 || varlen == 0 || is_in_look_behind) {
3647 if (IS_NULL(snode)) {
3648 if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {
3649 onig_node_free(top_root);
3650 top_root = root = onig_node_list_add(NULL_NODE, prev_node);
3651 if (IS_NULL(root)) {
3652 onig_node_free(prev_node);
3657 prev_node = snode = onig_node_new_str(NULL, NULL);
3658 if (IS_NULL(snode))
goto mem_err;
3659 if (IS_NOT_NULL(root)) {
3660 if (IS_NULL(onig_node_list_add(root, snode))) {
3661 onig_node_free(snode);
3667 r = onig_node_str_cat(snode, p, p +
len);
3668 if (r != 0)
goto err;
3672 if (alt_num > THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION)
break;
3674 if (IS_NOT_NULL(snode)) {
3675 r = update_string_node_case_fold(reg, snode);
3677 NSTRING_SET_AMBIG(snode);
3680 if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {
3681 onig_node_free(top_root);
3682 top_root = root = onig_node_list_add(NULL_NODE, prev_node);
3683 if (IS_NULL(root)) {
3684 onig_node_free(prev_node);
3689 r = expand_case_fold_string_alt(n, items, p,
len, end, reg, &prev_node);
3690 if (r < 0)
goto mem_err;
3692 if (IS_NULL(root)) {
3693 top_root = prev_node;
3696 if (IS_NULL(onig_node_list_add(root, prev_node))) {
3697 onig_node_free(prev_node);
3702 root = NCAR(prev_node);
3705 if (IS_NOT_NULL(root)) {
3706 if (IS_NULL(onig_node_list_add(root, prev_node))) {
3707 onig_node_free(prev_node);
3718 if (IS_NOT_NULL(snode)) {
3719 r = update_string_node_case_fold(reg, snode);
3721 NSTRING_SET_AMBIG(snode);
3728 r = expand_case_fold_make_rem_string(&srem, p, end, reg);
3729 if (r != 0)
goto mem_err;
3731 if (IS_NOT_NULL(prev_node) && IS_NULL(root)) {
3732 onig_node_free(top_root);
3733 top_root = root = onig_node_list_add(NULL_NODE, prev_node);
3734 if (IS_NULL(root)) {
3735 onig_node_free(srem);
3736 onig_node_free(prev_node);
3741 if (IS_NULL(root)) {
3745 if (IS_NULL(onig_node_list_add(root, srem))) {
3746 onig_node_free(srem);
3753 top_root = (IS_NOT_NULL(top_root) ? top_root : prev_node);
3754 swap_node(node, top_root);
3755 onig_node_free(top_root);
3762 onig_node_free(top_root);
3767#ifdef USE_COMBINATION_EXPLOSION_CHECK
3769# define CEC_THRES_NUM_BIG_REPEAT 512
3770# define CEC_INFINITE_NUM 0x7fffffff
3772# define CEC_IN_INFINITE_REPEAT (1<<0)
3773# define CEC_IN_FINITE_REPEAT (1<<1)
3774# define CEC_CONT_BIG_REPEAT (1<<2)
3777setup_comb_exp_check(Node* node,
int state,
ScanEnv* env)
3787 r = setup_comb_exp_check(NCAR(node), r, env);
3788 }
while (r >= 0 && IS_NOT_NULL(node = NCDR(node)));
3796 ret = setup_comb_exp_check(NCAR(node), state, env);
3798 }
while (ret >= 0 && IS_NOT_NULL(node = NCDR(node)));
3804 int child_state = state;
3807 Node* target = qn->target;
3810 if (! IS_REPEAT_INFINITE(qn->upper)) {
3811 if (qn->upper > 1) {
3813 child_state |= CEC_IN_FINITE_REPEAT;
3816 if (env->backrefed_mem == 0) {
3817 if (NTYPE(qn->target) == NT_ENCLOSE) {
3819 if (en->type == ENCLOSE_MEMORY) {
3820 if (NTYPE(en->target) == NT_QTFR) {
3822 if (IS_REPEAT_INFINITE(q->upper)
3823 && q->greedy == qn->greedy) {
3824 qn->upper = (qn->lower == 0 ? 1 : qn->lower);
3826 child_state = state;
3835 if (state & CEC_IN_FINITE_REPEAT) {
3836 qn->comb_exp_check_num = -1;
3839 if (IS_REPEAT_INFINITE(qn->upper)) {
3840 var_num = CEC_INFINITE_NUM;
3841 child_state |= CEC_IN_INFINITE_REPEAT;
3844 var_num = qn->upper - qn->lower;
3847 if (var_num >= CEC_THRES_NUM_BIG_REPEAT)
3848 add_state |= CEC_CONT_BIG_REPEAT;
3850 if (((state & CEC_IN_INFINITE_REPEAT) != 0 && var_num != 0) ||
3851 ((state & CEC_CONT_BIG_REPEAT) != 0 &&
3852 var_num >= CEC_THRES_NUM_BIG_REPEAT)) {
3853 if (qn->comb_exp_check_num == 0) {
3854 env->num_comb_exp_check++;
3855 qn->comb_exp_check_num = env->num_comb_exp_check;
3856 if (env->curr_max_regnum > env->comb_exp_max_regnum)
3857 env->comb_exp_max_regnum = env->curr_max_regnum;
3862 r = setup_comb_exp_check(target, child_state, env);
3872 case ENCLOSE_MEMORY:
3874 if (env->curr_max_regnum < en->regnum)
3875 env->curr_max_regnum = en->regnum;
3877 r = setup_comb_exp_check(en->target, state, env);
3882 r = setup_comb_exp_check(en->target, state, env);
3888# ifdef USE_SUBEXP_CALL
3890 if (IS_CALL_RECURSION(NCALL(node)))
3891 env->has_recursion = 1;
3893 r = setup_comb_exp_check(NCALL(node)->target, state, env);
3914setup_tree(Node* node, regex_t* reg,
int state,
ScanEnv* env)
3924 Node* prev = NULL_NODE;
3926 r = setup_tree(NCAR(node), reg, state, env);
3927 if (IS_NOT_NULL(prev) && r == 0) {
3928 r = next_setup(prev, NCAR(node), reg);
3931 }
while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3937 r = setup_tree(NCAR(node), reg, (state | IN_ALT), env);
3938 }
while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3945 if (IS_IGNORECASE(reg->options) && !NSTRING_IS_RAW(node)) {
3946 r = expand_case_fold_string(node, reg, state);
3954#ifdef USE_SUBEXP_CALL
3963 Node** nodes = SCANENV_MEM_NODES(env);
3966 for (i = 0; i < br->back_num; i++) {
3967 if (p[i] > env->num_mem)
return ONIGERR_INVALID_BACKREF;
3968 BIT_STATUS_ON_AT(env->backrefed_mem, p[i]);
3969 BIT_STATUS_ON_AT(env->bt_mem_start, p[i]);
3970#ifdef USE_BACKREF_WITH_LEVEL
3971 if (IS_BACKREF_NEST_LEVEL(br)) {
3972 BIT_STATUS_ON_AT(env->bt_mem_end, p[i]);
3975 SET_ENCLOSE_STATUS(nodes[p[i]], NST_MEM_BACKREFED);
3984 Node* target = qn->target;
3986 if ((state & IN_REPEAT) != 0) {
3987 qn->state |= NST_IN_REPEAT;
3990 if (IS_REPEAT_INFINITE(qn->upper) || qn->upper >= 1) {
3991 r = get_min_match_length(target, &d, env);
3994 qn->target_empty_info = NQ_TARGET_IS_EMPTY;
3995#ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
3996 r = quantifiers_memory_node_info(target);
3999 qn->target_empty_info = r;
4003 r = get_max_match_length(target, &d, env);
4004 if (r == 0 && d == 0) {
4007 if (qn->lower > 1) qn->lower = 1;
4008 if (NTYPE(target) == NT_STR) {
4009 qn->upper = qn->lower = 0;
4017 if (qn->lower != qn->upper)
4018 state |= IN_VAR_REPEAT;
4019 r = setup_tree(target, reg, state, env);
4023#define EXPAND_STRING_MAX_LENGTH 100
4024 if (NTYPE(target) == NT_STR) {
4025 if (qn->lower > 1) {
4026 int i, n = qn->lower;
4027 OnigDistance
len = NSTRING_LEN(target);
4031 np = onig_node_new_str(sn->s, sn->end);
4032 if (IS_NULL(np))
return ONIGERR_MEMORY;
4033 NSTR(np)->flag = sn->flag;
4035 for (i = 1; i < n && (i+1) *
len <= EXPAND_STRING_MAX_LENGTH; i++) {
4036 r = onig_node_str_cat(np, sn->s, sn->end);
4042 if (i < qn->upper || IS_REPEAT_INFINITE(qn->upper)) {
4046 if (! IS_REPEAT_INFINITE(qn->upper))
4049 np1 = onig_node_new_list(np, NULL);
4052 return ONIGERR_MEMORY;
4054 swap_node(np1, node);
4055 np2 = onig_node_list_add(node, np1);
4057 onig_node_free(np1);
4058 return ONIGERR_MEMORY;
4062 swap_node(np, node);
4069#ifdef USE_OP_PUSH_OR_JUMP_EXACT
4070 if (qn->greedy && (qn->target_empty_info != 0)) {
4071 if (NTYPE(target) == NT_QTFR) {
4073 if (IS_NOT_NULL(tqn->head_exact)) {
4074 qn->head_exact = tqn->head_exact;
4075 tqn->head_exact = NULL;
4079 qn->head_exact = get_head_value_node(qn->target, 1, reg);
4091 case ENCLOSE_OPTION:
4093 OnigOptionType options = reg->options;
4094 reg->options = NENCLOSE(node)->option;
4095 r = setup_tree(NENCLOSE(node)->target, reg, state, env);
4096 reg->options = options;
4100 case ENCLOSE_MEMORY:
4101 if ((state & (IN_ALT | IN_NOT | IN_VAR_REPEAT | IN_CALL)) != 0) {
4102 BIT_STATUS_ON_AT(env->bt_mem_start, en->regnum);
4105 if (IS_ENCLOSE_CALLED(en))
4107 if (IS_ENCLOSE_RECURSION(en))
4108 state |= IN_RECCALL;
4109 else if ((state & IN_RECCALL) != 0)
4110 SET_CALL_RECURSION(node);
4111 r = setup_tree(en->target, reg, state, env);
4114 case ENCLOSE_STOP_BACKTRACK:
4116 Node* target = en->target;
4117 r = setup_tree(target, reg, state, env);
4118 if (NTYPE(target) == NT_QTFR) {
4120 if (IS_REPEAT_INFINITE(tqn->upper) && tqn->lower <= 1 &&
4122 int qtype = NTYPE(tqn->target);
4123 if (IS_NODE_TYPE_SIMPLE(qtype))
4124 SET_ENCLOSE_STATUS(node, NST_STOP_BT_SIMPLE_REPEAT);
4130 case ENCLOSE_CONDITION:
4131#ifdef USE_NAMED_GROUP
4132 if (! IS_ENCLOSE_NAME_REF(NENCLOSE(node)) &&
4133 env->num_named > 0 &&
4134 IS_SYNTAX_BV(env->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
4135 !ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_CAPTURE_GROUP)) {
4136 return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
4139 if (NENCLOSE(node)->regnum > env->num_mem)
4140 return ONIGERR_INVALID_BACKREF;
4141 r = setup_tree(NENCLOSE(node)->target, reg, state, env);
4144 case ENCLOSE_ABSENT:
4145 r = setup_tree(NENCLOSE(node)->target, reg, state, env);
4156 case ANCHOR_PREC_READ:
4157 r = setup_tree(an->target, reg, state, env);
4159 case ANCHOR_PREC_READ_NOT:
4160 r = setup_tree(an->target, reg, (state | IN_NOT), env);
4164#define ALLOWED_TYPE_IN_LB \
4165 ( BIT_NT_LIST | BIT_NT_ALT | BIT_NT_STR | BIT_NT_CCLASS | BIT_NT_CTYPE | \
4166 BIT_NT_CANY | BIT_NT_ANCHOR | BIT_NT_ENCLOSE | BIT_NT_QTFR | BIT_NT_CALL )
4168#define ALLOWED_ENCLOSE_IN_LB ( ENCLOSE_MEMORY | ENCLOSE_OPTION )
4169#define ALLOWED_ENCLOSE_IN_LB_NOT ENCLOSE_OPTION
4171#define ALLOWED_ANCHOR_IN_LB \
4172( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | \
4173 ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION | ANCHOR_KEEP | \
4174 ANCHOR_WORD_BOUND | ANCHOR_NOT_WORD_BOUND | \
4175 ANCHOR_WORD_BEGIN | ANCHOR_WORD_END )
4176#define ALLOWED_ANCHOR_IN_LB_NOT \
4177( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | \
4178 ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION | ANCHOR_KEEP | \
4179 ANCHOR_WORD_BOUND | ANCHOR_NOT_WORD_BOUND | \
4180 ANCHOR_WORD_BEGIN | ANCHOR_WORD_END )
4182 case ANCHOR_LOOK_BEHIND:
4184 r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB,
4185 ALLOWED_ENCLOSE_IN_LB, ALLOWED_ANCHOR_IN_LB);
4186 if (r < 0)
return r;
4187 if (r > 0)
return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
4188 if (NTYPE(node) != NT_ANCHOR)
goto restart;
4189 r = setup_tree(an->target, reg, (state | IN_LOOK_BEHIND), env);
4190 if (r != 0)
return r;
4191 r = setup_look_behind(node, reg, env);
4195 case ANCHOR_LOOK_BEHIND_NOT:
4197 r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB,
4198 ALLOWED_ENCLOSE_IN_LB_NOT, ALLOWED_ANCHOR_IN_LB_NOT);
4199 if (r < 0)
return r;
4200 if (r > 0)
return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
4201 if (NTYPE(node) != NT_ANCHOR)
goto restart;
4202 r = setup_tree(an->target, reg, (state | IN_NOT | IN_LOOK_BEHIND),
4204 if (r != 0)
return r;
4205 r = setup_look_behind(node, reg, env);
4219#ifndef USE_SUNDAY_QUICK_SEARCH
4222set_bm_skip(UChar* s, UChar* end, regex_t* reg,
4223 UChar skip[],
int** int_skip,
int ignore_case)
4225 OnigDistance i,
len;
4226 int clen, flen, n, j, k;
4227 UChar *p, buf[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM][ONIGENC_MBC_CASE_FOLD_MAXLEN];
4229 OnigEncoding enc = reg->enc;
4232 if (
len < ONIG_CHAR_TABLE_SIZE) {
4233 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) skip[i] = (UChar )
len;
4236 for (i = 0; i <
len - 1; i += clen) {
4239 n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag,
4241 clen = enclen(enc, p, end);
4243 clen = (int )(end - p);
4245 for (j = 0; j < n; j++) {
4246 if ((items[j].code_len != 1) || (items[j].byte_len != clen))
4248 flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf[j]);
4252 for (j = 0; j < clen; j++) {
4253 skip[s[i + j]] = (UChar )(
len - 1 - i - j);
4254 for (k = 0; k < n; k++) {
4255 skip[buf[k][j]] = (UChar )(
len - 1 - i - j);
4261# if OPT_EXACT_MAXLEN < ONIG_CHAR_TABLE_SIZE
4263 return ONIGERR_TYPE_BUG;
4265 if (IS_NULL(*int_skip)) {
4266 *int_skip = (
int* )
xmalloc(
sizeof(
int) * ONIG_CHAR_TABLE_SIZE);
4267 if (IS_NULL(*int_skip))
return ONIGERR_MEMORY;
4269 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) (*int_skip)[i] = (int )
len;
4272 for (i = 0; i <
len - 1; i += clen) {
4275 n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag,
4277 clen = enclen(enc, p, end);
4279 clen = (int )(end - p);
4281 for (j = 0; j < n; j++) {
4282 if ((items[j].code_len != 1) || (items[j].byte_len != clen))
4284 flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf[j]);
4288 for (j = 0; j < clen; j++) {
4289 (*int_skip)[s[i + j]] = (int )(
len - 1 - i - j);
4290 for (k = 0; k < n; k++) {
4291 (*int_skip)[buf[k][j]] = (int )(
len - 1 - i - j);
4304set_bm_skip(UChar* s, UChar* end, regex_t* reg,
4305 UChar skip[],
int** int_skip,
int ignore_case)
4307 OnigDistance i,
len;
4308 int clen, flen, n, j, k;
4309 UChar *p, buf[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM][ONIGENC_MBC_CASE_FOLD_MAXLEN];
4311 OnigEncoding enc = reg->enc;
4314 if (
len < ONIG_CHAR_TABLE_SIZE) {
4315 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) skip[i] = (UChar )(
len + 1);
4318 for (i = 0; i <
len; i += clen) {
4321 n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag,
4323 clen = enclen(enc, p, end);
4325 clen = (int )(end - p);
4327 for (j = 0; j < n; j++) {
4328 if ((items[j].code_len != 1) || (items[j].byte_len != clen))
4330 flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf[j]);
4334 for (j = 0; j < clen; j++) {
4335 skip[s[i + j]] = (UChar )(
len - i - j);
4336 for (k = 0; k < n; k++) {
4337 skip[buf[k][j]] = (UChar )(
len - i - j);
4343# if OPT_EXACT_MAXLEN < ONIG_CHAR_TABLE_SIZE
4345 return ONIGERR_TYPE_BUG;
4347 if (IS_NULL(*int_skip)) {
4348 *int_skip = (
int* )
xmalloc(
sizeof(
int) * ONIG_CHAR_TABLE_SIZE);
4349 if (IS_NULL(*int_skip))
return ONIGERR_MEMORY;
4351 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) (*int_skip)[i] = (int )(
len + 1);
4354 for (i = 0; i <
len; i += clen) {
4357 n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag,
4359 clen = enclen(enc, p, end);
4361 clen = (int )(end - p);
4363 for (j = 0; j < n; j++) {
4364 if ((items[j].code_len != 1) || (items[j].byte_len != clen))
4366 flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf[j]);
4370 for (j = 0; j < clen; j++) {
4371 (*int_skip)[s[i + j]] = (int )(
len - i - j);
4372 for (k = 0; k < n; k++) {
4373 (*int_skip)[buf[k][j]] = (int )(
len - i - j);
4391 OnigOptionType options;
4392 OnigCaseFoldType case_fold_flag;
4408 UChar s[OPT_EXACT_MAXLEN];
4416 UChar map[ONIG_CHAR_TABLE_SIZE];
4432map_position_value(OnigEncoding enc,
int i)
4434 static const short int ByteValTable[] = {
4435 5, 1, 1, 1, 1, 1, 1, 1, 1, 10, 10, 1, 1, 10, 1, 1,
4436 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
4437 12, 4, 7, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5,
4438 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5,
4439 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
4440 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 6, 5, 5, 5,
4441 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
4442 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 1
4445 if (i < numberof(ByteValTable)) {
4446 if (i == 0 && ONIGENC_MBC_MINLEN(enc) > 1)
4449 return (
int )ByteValTable[i];
4459 static const short int dist_vals[] = {
4460 1000, 500, 333, 250, 200, 167, 143, 125, 111, 100,
4461 91, 83, 77, 71, 67, 63, 59, 56, 53, 50,
4462 48, 45, 43, 42, 40, 38, 37, 36, 34, 33,
4463 32, 31, 30, 29, 29, 28, 27, 26, 26, 25,
4464 24, 24, 23, 23, 22, 22, 21, 21, 20, 20,
4465 20, 19, 19, 19, 18, 18, 18, 17, 17, 17,
4466 16, 16, 16, 16, 15, 15, 15, 15, 14, 14,
4467 14, 14, 14, 14, 13, 13, 13, 13, 13, 13,
4468 12, 12, 12, 12, 12, 12, 11, 11, 11, 11,
4469 11, 11, 11, 11, 11, 10, 10, 10, 10, 10
4474 if (mm->max == ONIG_INFINITE_DISTANCE)
return 0;
4476 d = mm->max - mm->min;
4477 if (d < numberof(dist_vals))
4479 return (
int )dist_vals[d];
4487 if (v2 <= 0)
return -1;
4488 if (v1 <= 0)
return 1;
4490 v1 *= distance_value(d1);
4491 v2 *= distance_value(d2);
4493 if (v2 > v1)
return 1;
4494 if (v2 < v1)
return -1;
4496 if (d2->min < d1->min)
return 1;
4497 if (d2->min > d1->min)
return -1;
4504 return (a->min == b->min && a->max == b->max) ? 1 : 0;
4509set_mml(
MinMaxLen* mml, OnigDistance min, OnigDistance max)
4518 mml->min = mml->max = 0;
4524 to->min = from->min;
4525 to->max = from->max;
4531 to->min = distance_add(to->min, from->min);
4532 to->max = distance_add(to->max, from->max);
4539 to->min = distance_add(to->min,
len);
4540 to->max = distance_add(to->max,
len);
4547 if (to->min > from->min) to->min = from->min;
4548 if (to->max < from->max) to->max = from->max;
4560 anc->left_anchor = 0;
4561 anc->right_anchor = 0;
4572 OnigDistance left_len, OnigDistance right_len)
4574 clear_opt_anc_info(to);
4576 to->left_anchor = left->left_anchor;
4577 if (left_len == 0) {
4578 to->left_anchor |= right->left_anchor;
4581 to->right_anchor = right->right_anchor;
4582 if (right_len == 0) {
4583 to->right_anchor |= left->right_anchor;
4586 to->right_anchor |= (left->right_anchor & ANCHOR_PREC_READ_NOT);
4591is_left_anchor(
int anc)
4593 if (anc == ANCHOR_END_BUF || anc == ANCHOR_SEMI_END_BUF ||
4594 anc == ANCHOR_END_LINE || anc == ANCHOR_PREC_READ ||
4595 anc == ANCHOR_PREC_READ_NOT)
4604 if ((to->left_anchor & anc) != 0)
return 1;
4606 return ((to->right_anchor & anc) != 0 ? 1 : 0);
4612 if (is_left_anchor(anc))
4613 to->left_anchor |= anc;
4615 to->right_anchor |= anc;
4621 if (is_left_anchor(anc))
4622 to->left_anchor &= ~anc;
4624 to->right_anchor &= ~anc;
4630 to->left_anchor &= add->left_anchor;
4631 to->right_anchor &= add->right_anchor;
4637 return (ex->len >= OPT_EXACT_MAXLEN ? 1 : 0);
4643 clear_mml(&ex->mmd);
4644 clear_opt_anc_info(&ex->anc);
4646 ex->ignore_case = -1;
4664 if (to->ignore_case < 0)
4665 to->ignore_case = add->ignore_case;
4666 else if (to->ignore_case != add->ignore_case)
4671 for (i = to->len; p < end; ) {
4672 len = enclen(enc, p, end);
4673 if (i +
len > OPT_EXACT_MAXLEN)
break;
4674 for (j = 0; j <
len && p < end; j++)
4679 to->reach_end = (p == end ? add->reach_end : 0);
4681 concat_opt_anc_info(&tanc, &to->anc, &add->anc, 1, 1);
4682 if (! to->reach_end) tanc.right_anchor = 0;
4683 copy_opt_anc_info(&to->anc, &tanc);
4687concat_opt_exact_info_str(
OptExactInfo* to, UChar* s, UChar* end,
4688 int raw ARG_UNUSED, OnigEncoding enc)
4693 for (i = to->len, p = s; p < end && i < OPT_EXACT_MAXLEN; ) {
4694 len = enclen(enc, p, end);
4695 if (i +
len > OPT_EXACT_MAXLEN)
break;
4696 for (j = 0; j <
len && p < end; j++)
4708 if (add->len == 0 || to->len == 0) {
4709 clear_opt_exact_info(to);
4713 if (! is_equal_mml(&to->mmd, &add->mmd)) {
4714 clear_opt_exact_info(to);
4718 for (i = 0; i < to->len && i < add->len; ) {
4719 if (to->s[i] != add->s[i])
break;
4720 len = enclen(env->enc, to->s + i, to->s + to->len);
4722 for (j = 1; j <
len; j++) {
4723 if (to->s[i+j] != add->s[i+j])
break;
4729 if (! add->reach_end || i < add->
len || i < to->
len) {
4733 if (to->ignore_case < 0)
4734 to->ignore_case = add->ignore_case;
4735 else if (add->ignore_case >= 0)
4736 to->ignore_case |= add->ignore_case;
4738 alt_merge_opt_anc_info(&to->anc, &add->anc);
4739 if (! to->reach_end) to->anc.right_anchor = 0;
4754 copy_opt_exact_info(now, alt);
4757 else if (v1 <= 2 && v2 <= 2) {
4759 v2 = map_position_value(enc, now->s[0]);
4760 v1 = map_position_value(enc, alt->s[0]);
4762 if (now->len > 1) v1 += 5;
4763 if (alt->len > 1) v2 += 5;
4766 if (now->ignore_case <= 0) v1 *= 2;
4767 if (alt->ignore_case <= 0) v2 *= 2;
4769 if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0)
4770 copy_opt_exact_info(now, alt);
4779 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4780 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4781 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4782 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4783 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4784 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4785 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4786 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4787 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4788 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4789 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4790 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4791 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4792 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4793 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4794 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
4798 xmemcpy(map, &clean_info,
sizeof(
OptMapInfo));
4808add_char_opt_map_info(
OptMapInfo* map, UChar c, OnigEncoding enc)
4810 if (map->map[c] == 0) {
4812 map->value += map_position_value(enc, c);
4817add_char_amb_opt_map_info(
OptMapInfo* map, UChar* p, UChar* end,
4818 OnigEncoding enc, OnigCaseFoldType case_fold_flag)
4821 UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
4824 add_char_opt_map_info(map, p[0], enc);
4826 case_fold_flag = DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag);
4827 n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, case_fold_flag, p, end, items);
4828 if (n < 0)
return n;
4830 for (i = 0; i < n; i++) {
4831 ONIGENC_CODE_TO_MBC(enc, items[i].code[0], buf);
4832 add_char_opt_map_info(map, buf[0], enc);
4841 const int z = 1<<15;
4845 if (alt->value == 0) return ;
4846 if (now->value == 0) {
4847 copy_opt_map_info(now, alt);
4851 v1 = z / now->value;
4852 v2 = z / alt->value;
4853 if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0)
4854 copy_opt_map_info(now, alt);
4860#define COMP_EM_BASE 20
4863 if (m->value <= 0)
return -1;
4865 ve = COMP_EM_BASE * e->len * (e->ignore_case > 0 ? 1 : 2);
4866 vm = COMP_EM_BASE * 5 * 2 / m->value;
4867 return comp_distance_value(&e->mmd, &m->mmd, ve, vm);
4876 if (to->value == 0) return ;
4877 if (add->value == 0 || to->mmd.max < add->mmd.min) {
4878 clear_opt_map_info(to);
4882 alt_merge_mml(&to->mmd, &add->mmd);
4885 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) {
4890 val += map_position_value(enc, i);
4894 alt_merge_opt_anc_info(&to->anc, &add->anc);
4900 copy_mml(&(opt->exb.mmd), mmd);
4901 copy_mml(&(opt->expr.mmd), mmd);
4902 copy_mml(&(opt->map.mmd), mmd);
4908 clear_mml(&opt->len);
4909 clear_opt_anc_info(&opt->anc);
4910 clear_opt_exact_info(&opt->exb);
4911 clear_opt_exact_info(&opt->exm);
4912 clear_opt_exact_info(&opt->expr);
4913 clear_opt_map_info(&opt->map);
4925 int exb_reach, exm_reach;
4928 concat_opt_anc_info(&tanc, &to->anc, &add->anc, to->len.max, add->len.max);
4929 copy_opt_anc_info(&to->anc, &tanc);
4931 if (add->exb.len > 0 && to->len.max == 0) {
4932 concat_opt_anc_info(&tanc, &to->anc, &add->exb.anc,
4933 to->len.max, add->len.max);
4934 copy_opt_anc_info(&add->exb.anc, &tanc);
4937 if (add->map.value > 0 && to->len.max == 0) {
4938 if (add->map.mmd.max == 0)
4939 add->map.anc.left_anchor |= to->anc.left_anchor;
4942 exb_reach = to->exb.reach_end;
4943 exm_reach = to->exm.reach_end;
4945 if (add->len.max != 0)
4946 to->exb.reach_end = to->exm.reach_end = 0;
4948 if (add->exb.len > 0) {
4950 concat_opt_exact_info(&to->exb, &add->exb, enc);
4951 clear_opt_exact_info(&add->exb);
4953 else if (exm_reach) {
4954 concat_opt_exact_info(&to->exm, &add->exb, enc);
4955 clear_opt_exact_info(&add->exb);
4958 select_opt_exact_info(enc, &to->exm, &add->exb);
4959 select_opt_exact_info(enc, &to->exm, &add->exm);
4961 if (to->expr.len > 0) {
4962 if (add->len.max > 0) {
4963 if (to->expr.len > (
int )add->len.max)
4964 to->expr.len = (int )add->len.max;
4966 if (to->expr.mmd.max == 0)
4967 select_opt_exact_info(enc, &to->exb, &to->expr);
4969 select_opt_exact_info(enc, &to->exm, &to->expr);
4972 else if (add->expr.len > 0) {
4973 copy_opt_exact_info(&to->expr, &add->expr);
4976 select_opt_map_info(&to->map, &add->map);
4978 add_mml(&to->len, &add->len);
4984 alt_merge_opt_anc_info (&to->anc, &add->anc);
4985 alt_merge_opt_exact_info(&to->exb, &add->exb, env);
4986 alt_merge_opt_exact_info(&to->exm, &add->exm, env);
4987 alt_merge_opt_exact_info(&to->expr, &add->expr, env);
4988 alt_merge_opt_map_info(env->enc, &to->map, &add->map);
4990 alt_merge_mml(&to->len, &add->len);
4994#define MAX_NODE_OPT_INFO_REF_COUNT 5
5002 clear_node_opt_info(opt);
5003 set_bound_node_opt_info(opt, &env->mmd);
5013 copy_opt_env(&nenv, env);
5015 r = optimize_node_left(NCAR(nd), &nopt, &nenv);
5017 add_mml(&nenv.mmd, &nopt.len);
5018 concat_left_node_opt_info(env->enc, opt, &nopt);
5020 }
while (r == 0 && IS_NOT_NULL(nd = NCDR(nd)));
5030 r = optimize_node_left(NCAR(nd), &nopt, env);
5032 if (nd == node) copy_node_opt_info(opt, &nopt);
5033 else alt_merge_node_opt_info(opt, &nopt, env);
5035 }
while ((r == 0) && IS_NOT_NULL(nd = NCDR(nd)));
5042 OnigDistance slen = sn->end - sn->s;
5043 int is_raw = NSTRING_IS_RAW(node);
5045 if (! NSTRING_IS_AMBIG(node)) {
5046 concat_opt_exact_info_str(&opt->exb, sn->s, sn->end,
5048 opt->exb.ignore_case = 0;
5050 add_char_opt_map_info(&opt->map, *(sn->s), env->enc);
5052 set_mml(&opt->len, slen, slen);
5057 if (NSTRING_IS_DONT_GET_OPT_INFO(node)) {
5058 int n = onigenc_strlen(env->enc, sn->s, sn->end);
5059 max = ONIGENC_MBC_MAXLEN_DIST(env->enc) * (OnigDistance)n;
5062 concat_opt_exact_info_str(&opt->exb, sn->s, sn->end,
5064 opt->exb.ignore_case = 1;
5067 r = add_char_amb_opt_map_info(&opt->map, sn->s, sn->end,
5068 env->enc, env->case_fold_flag);
5075 set_mml(&opt->len, slen, max);
5078 if ((OnigDistance )opt->exb.len == slen)
5079 opt->exb.reach_end = 1;
5090 if (IS_NOT_NULL(cc->mbuf) || IS_NCCLASS_NOT(cc)) {
5091 OnigDistance min = ONIGENC_MBC_MINLEN(env->enc);
5092 OnigDistance max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
5094 set_mml(&opt->len, min, max);
5097 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
5098 z = BITSET_AT(cc->bs, i);
5099 if ((z && !IS_NCCLASS_NOT(cc)) || (!z && IS_NCCLASS_NOT(cc))) {
5100 add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
5103 set_mml(&opt->len, 1, 1);
5113 max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
5118 maxcode = NCTYPE(node)->ascii_range ? 0x80 : SINGLE_BYTE_SIZE;
5119 switch (NCTYPE(node)->ctype) {
5120 case ONIGENC_CTYPE_WORD:
5121 if (NCTYPE(node)->not != 0) {
5122 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
5123 if (! ONIGENC_IS_CODE_WORD(env->enc, i) || i >= maxcode) {
5124 add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
5129 for (i = 0; i < maxcode; i++) {
5130 if (ONIGENC_IS_CODE_WORD(env->enc, i)) {
5131 add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
5139 min = ONIGENC_MBC_MINLEN(env->enc);
5141 set_mml(&opt->len, min, max);
5147 OnigDistance min = ONIGENC_MBC_MINLEN(env->enc);
5148 OnigDistance max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
5149 set_mml(&opt->len, min, max);
5154 switch (NANCHOR(node)->type) {
5155 case ANCHOR_BEGIN_BUF:
5156 case ANCHOR_BEGIN_POSITION:
5157 case ANCHOR_BEGIN_LINE:
5158 case ANCHOR_END_BUF:
5159 case ANCHOR_SEMI_END_BUF:
5160 case ANCHOR_END_LINE:
5161 case ANCHOR_LOOK_BEHIND:
5162 case ANCHOR_PREC_READ_NOT:
5163 add_opt_anc_info(&opt->anc, NANCHOR(node)->type);
5166 case ANCHOR_PREC_READ:
5170 r = optimize_node_left(NANCHOR(node)->target, &nopt, env);
5172 if (nopt.exb.len > 0)
5173 copy_opt_exact_info(&opt->expr, &nopt.exb);
5174 else if (nopt.exm.len > 0)
5175 copy_opt_exact_info(&opt->expr, &nopt.exm);
5177 opt->expr.reach_end = 0;
5179 if (nopt.map.value > 0)
5180 copy_opt_map_info(&opt->map, &nopt.map);
5185 case ANCHOR_LOOK_BEHIND_NOT:
5194 OnigDistance min, max, tmin, tmax;
5195 Node** nodes = SCANENV_MEM_NODES(env->scan_env);
5198 if (br->state & NST_RECURSION) {
5199 set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
5202 backs = BACKREFS_P(br);
5203 r = get_min_match_length(nodes[backs[0]], &min, env->scan_env);
5205 r = get_max_match_length(nodes[backs[0]], &max, env->scan_env);
5207 for (i = 1; i < br->back_num; i++) {
5208 r = get_min_match_length(nodes[backs[i]], &tmin, env->scan_env);
5210 r = get_max_match_length(nodes[backs[i]], &tmax, env->scan_env);
5212 if (min > tmin) min = tmin;
5213 if (max < tmax) max = tmax;
5215 if (r == 0) set_mml(&opt->len, min, max);
5219#ifdef USE_SUBEXP_CALL
5221 if (IS_CALL_RECURSION(NCALL(node)))
5222 set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
5224 OnigOptionType save = env->options;
5225 env->options = NENCLOSE(NCALL(node)->target)->option;
5226 r = optimize_node_left(NCALL(node)->target, opt, env);
5227 env->options = save;
5235 OnigDistance min, max;
5239 r = optimize_node_left(qn->target, &nopt, env);
5242 if ( IS_REPEAT_INFINITE(qn->upper)) {
5243 if (env->mmd.max == 0 &&
5244 NTYPE(qn->target) == NT_CANY && qn->greedy) {
5245 if (IS_MULTILINE(env->options))
5247 add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_ML);
5249 add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR);
5253 if (qn->lower > 0) {
5254 copy_node_opt_info(opt, &nopt);
5255 if (nopt.exb.len > 0) {
5256 if (nopt.exb.reach_end) {
5257 for (i = 2; i <= qn->lower &&
5258 ! is_full_opt_exact_info(&opt->exb); i++) {
5259 concat_opt_exact_info(&opt->exb, &nopt.exb, env->enc);
5261 if (i < qn->lower) {
5262 opt->exb.reach_end = 0;
5267 if (qn->lower != qn->upper) {
5268 opt->exb.reach_end = 0;
5269 opt->exm.reach_end = 0;
5272 opt->exm.reach_end = 0;
5276 min = distance_multiply(nopt.len.min, qn->lower);
5277 if (IS_REPEAT_INFINITE(qn->upper))
5278 max = (nopt.len.max > 0 ? ONIG_INFINITE_DISTANCE : 0);
5280 max = distance_multiply(nopt.len.max, qn->upper);
5282 set_mml(&opt->len, min, max);
5291 case ENCLOSE_OPTION:
5293 OnigOptionType save = env->options;
5295 env->options = en->option;
5296 r = optimize_node_left(en->target, opt, env);
5297 env->options = save;
5301 case ENCLOSE_MEMORY:
5302#ifdef USE_SUBEXP_CALL
5304 if (en->opt_count > MAX_NODE_OPT_INFO_REF_COUNT) {
5305 OnigDistance min, max;
5308 max = ONIG_INFINITE_DISTANCE;
5309 if (IS_ENCLOSE_MIN_FIXED(en)) min = en->min_len;
5310 if (IS_ENCLOSE_MAX_FIXED(en)) max = en->max_len;
5311 set_mml(&opt->len, min, max);
5316 r = optimize_node_left(en->target, opt, env);
5318 if (is_set_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK)) {
5319 if (BIT_STATUS_AT(env->scan_env->backrefed_mem, en->regnum))
5320 remove_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK);
5325 case ENCLOSE_STOP_BACKTRACK:
5326 case ENCLOSE_CONDITION:
5327 r = optimize_node_left(en->target, opt, env);
5330 case ENCLOSE_ABSENT:
5331 set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
5339 fprintf(stderr,
"optimize_node_left: undefined node type %d\n",
5342 r = ONIGERR_TYPE_BUG;
5355 if (e->len == 0)
return 0;
5357 reg->exact = (UChar* )
xmalloc(e->len);
5358 CHECK_NULL_RETURN_MEMERR(reg->exact);
5359 xmemcpy(reg->exact, e->s, e->len);
5360 reg->exact_end = reg->exact + e->len;
5363 ONIGENC_IS_ALLOWED_REVERSE_MATCH(reg->enc, reg->exact, reg->exact_end);
5365 if (e->ignore_case > 0) {
5366 if (e->len >= 3 || (e->len >= 2 && allow_reverse)) {
5367 r = set_bm_skip(reg->exact, reg->exact_end, reg,
5368 reg->map, &(reg->int_map), 1);
5370 reg->optimize = (allow_reverse != 0
5371 ? ONIG_OPTIMIZE_EXACT_BM_IC : ONIG_OPTIMIZE_EXACT_BM_NOT_REV_IC);
5374 reg->optimize = ONIG_OPTIMIZE_EXACT_IC;
5378 reg->optimize = ONIG_OPTIMIZE_EXACT_IC;
5382 if (e->len >= 3 || (e->len >= 2 && allow_reverse)) {
5383 r = set_bm_skip(reg->exact, reg->exact_end, reg,
5384 reg->map, &(reg->int_map), 0);
5386 reg->optimize = (allow_reverse != 0
5387 ? ONIG_OPTIMIZE_EXACT_BM : ONIG_OPTIMIZE_EXACT_BM_NOT_REV);
5390 reg->optimize = ONIG_OPTIMIZE_EXACT;
5394 reg->optimize = ONIG_OPTIMIZE_EXACT;
5398 reg->dmin = e->mmd.min;
5399 reg->dmax = e->mmd.max;
5401 if (reg->dmin != ONIG_INFINITE_DISTANCE) {
5402 reg->threshold_len = (int )(reg->dmin + (reg->exact_end - reg->exact));
5409set_optimize_map_info(regex_t* reg,
OptMapInfo* m)
5413 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
5414 reg->map[i] = m->map[i];
5416 reg->optimize = ONIG_OPTIMIZE_MAP;
5417 reg->dmin = m->mmd.min;
5418 reg->dmax = m->mmd.max;
5420 if (reg->dmin != ONIG_INFINITE_DISTANCE) {
5421 reg->threshold_len = (int )(reg->dmin + 1);
5428 reg->sub_anchor |= anc->left_anchor & ANCHOR_BEGIN_LINE;
5429 reg->sub_anchor |= anc->right_anchor & ANCHOR_END_LINE;
5432#if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
5433static void print_optimize_info(FILE* f, regex_t* reg);
5437set_optimize_info_from_tree(Node* node, regex_t* reg,
ScanEnv* scan_env)
5445 env.options = reg->options;
5446 env.case_fold_flag = reg->case_fold_flag;
5447 env.scan_env = scan_env;
5448 clear_mml(&env.mmd);
5450 r = optimize_node_left(node, &opt, &env);
5453 reg->anchor = opt.anc.left_anchor & (ANCHOR_BEGIN_BUF |
5454 ANCHOR_BEGIN_POSITION | ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_ML |
5455 ANCHOR_LOOK_BEHIND);
5457 if ((opt.anc.left_anchor & (ANCHOR_LOOK_BEHIND | ANCHOR_PREC_READ_NOT)) != 0)
5458 reg->anchor &= ~ANCHOR_ANYCHAR_STAR_ML;
5460 reg->anchor |= opt.anc.right_anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF |
5461 ANCHOR_PREC_READ_NOT);
5463 if (reg->anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF)) {
5464 reg->anchor_dmin = opt.len.min;
5465 reg->anchor_dmax = opt.len.max;
5468 if (opt.exb.len > 0 || opt.exm.len > 0) {
5469 select_opt_exact_info(reg->enc, &opt.exb, &opt.exm);
5470 if (opt.map.value > 0 &&
5471 comp_opt_exact_or_map_info(&opt.exb, &opt.map) > 0) {
5475 r = set_optimize_exact_info(reg, &opt.exb);
5476 set_sub_anchor(reg, &opt.exb.anc);
5479 else if (opt.map.value > 0) {
5481 set_optimize_map_info(reg, &opt.map);
5482 set_sub_anchor(reg, &opt.map.anc);
5485 reg->sub_anchor |= opt.anc.left_anchor & ANCHOR_BEGIN_LINE;
5486 if (opt.len.max == 0)
5487 reg->sub_anchor |= opt.anc.right_anchor & ANCHOR_END_LINE;
5490#if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
5491 print_optimize_info(stderr, reg);
5497clear_optimize_info(regex_t* reg)
5499 reg->optimize = ONIG_OPTIMIZE_NONE;
5501 reg->anchor_dmin = 0;
5502 reg->anchor_dmax = 0;
5503 reg->sub_anchor = 0;
5504 reg->exact_end = (UChar* )NULL;
5505 reg->threshold_len = 0;
5507 reg->exact = (UChar* )NULL;
5512static void print_enc_string(FILE* fp, OnigEncoding enc,
5513 const UChar *s,
const UChar *end)
5515 fprintf(fp,
"\nPATTERN: /");
5517 if (ONIGENC_MBC_MINLEN(enc) > 1) {
5523 code = ONIGENC_MBC_TO_CODE(enc, p, end);
5525 fprintf(fp,
" 0x%04x ", (
int )code);
5528 fputc((
int )code, fp);
5531 p += enclen(enc, p, end);
5536 fputc((
int )*s, fp);
5541 fprintf(fp,
"/ (%s)\n", enc->name);
5545#if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
5547print_distance_range(FILE* f, OnigDistance a, OnigDistance b)
5549 if (a == ONIG_INFINITE_DISTANCE)
5552 fprintf(f,
"(%"PRIuPTR
")", a);
5556 if (b == ONIG_INFINITE_DISTANCE)
5559 fprintf(f,
"(%"PRIuPTR
")", b);
5563print_anchor(FILE* f,
int anchor)
5569 if (anchor & ANCHOR_BEGIN_BUF) {
5570 fprintf(f,
"begin-buf");
5573 if (anchor & ANCHOR_BEGIN_LINE) {
5574 if (q) fprintf(f,
", ");
5576 fprintf(f,
"begin-line");
5578 if (anchor & ANCHOR_BEGIN_POSITION) {
5579 if (q) fprintf(f,
", ");
5581 fprintf(f,
"begin-pos");
5583 if (anchor & ANCHOR_END_BUF) {
5584 if (q) fprintf(f,
", ");
5586 fprintf(f,
"end-buf");
5588 if (anchor & ANCHOR_SEMI_END_BUF) {
5589 if (q) fprintf(f,
", ");
5591 fprintf(f,
"semi-end-buf");
5593 if (anchor & ANCHOR_END_LINE) {
5594 if (q) fprintf(f,
", ");
5596 fprintf(f,
"end-line");
5598 if (anchor & ANCHOR_ANYCHAR_STAR) {
5599 if (q) fprintf(f,
", ");
5601 fprintf(f,
"anychar-star");
5603 if (anchor & ANCHOR_ANYCHAR_STAR_ML) {
5604 if (q) fprintf(f,
", ");
5605 fprintf(f,
"anychar-star-ml");
5612print_optimize_info(FILE* f, regex_t* reg)
5614 static const char* on[] = {
"NONE",
"EXACT",
"EXACT_BM",
"EXACT_BM_NOT_REV",
5616 "EXACT_BM_IC",
"EXACT_BM_NOT_REV_IC" };
5618 fprintf(f,
"optimize: %s\n", on[reg->optimize]);
5619 fprintf(f,
" anchor: "); print_anchor(f, reg->anchor);
5620 if ((reg->anchor & ANCHOR_END_BUF_MASK) != 0)
5621 print_distance_range(f, reg->anchor_dmin, reg->anchor_dmax);
5624 if (reg->optimize) {
5625 fprintf(f,
" sub anchor: "); print_anchor(f, reg->sub_anchor);
5632 fprintf(f,
"exact: [");
5633 for (p = reg->exact; p < reg->exact_end; p++) {
5636 fprintf(f,
"]: length: %"PRIdPTR
"\n", (reg->exact_end - reg->exact));
5638 else if (reg->optimize & ONIG_OPTIMIZE_MAP) {
5641 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
5642 if (reg->map[i]) n++;
5644 fprintf(f,
"map: n=%d\n", n);
5648 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) {
5649 if (reg->map[i] != 0) {
5650 if (c > 0) fputs(
", ", f);
5652 if (ONIGENC_MBC_MAXLEN(reg->enc) == 1 &&
5653 ONIGENC_IS_CODE_PRINT(reg->enc, (OnigCodePoint )i))
5656 fprintf(f,
"%d", i);
5667onig_free_body(regex_t* reg)
5669 if (IS_NOT_NULL(reg)) {
5672 xfree(reg->int_map);
5673 xfree(reg->int_map_backward);
5674 xfree(reg->repeat_range);
5675 onig_free(reg->chain);
5677#ifdef USE_NAMED_GROUP
5678 onig_names_free(reg);
5684onig_free(regex_t* reg)
5686 if (IS_NOT_NULL(reg)) {
5687 onig_free_body(reg);
5693dup_copy(
const void *ptr,
size_t size)
5696 if (IS_NOT_NULL(newptr)) {
5697 memcpy(newptr, ptr, size);
5703onig_reg_copy(regex_t** nreg, regex_t* oreg)
5705 if (IS_NOT_NULL(oreg)) {
5706 regex_t *reg = *nreg = (regex_t* )
xmalloc(
sizeof(regex_t));
5707 if (IS_NULL(reg))
return ONIGERR_MEMORY;
5711# define COPY_FAILED(mem, size) IS_NULL(reg->mem = dup_copy(reg->mem, size))
5713 if (IS_NOT_NULL(reg->exact)) {
5714 size_t exact_size = reg->exact_end - reg->exact;
5715 if (COPY_FAILED(exact, exact_size))
5717 (reg)->exact_end = (reg)->exact + exact_size;
5720 if (IS_NOT_NULL(reg->int_map)) {
5721 if (COPY_FAILED(int_map,
sizeof(
int) * ONIG_CHAR_TABLE_SIZE))
5724 if (IS_NOT_NULL(reg->int_map_backward)) {
5725 if (COPY_FAILED(int_map_backward,
sizeof(
int) * ONIG_CHAR_TABLE_SIZE))
5726 goto err_int_map_backward;
5728 if (IS_NOT_NULL(reg->p)) {
5729 if (COPY_FAILED(p, reg->alloc))
5732 if (IS_NOT_NULL(reg->repeat_range)) {
5733 if (COPY_FAILED(repeat_range, reg->repeat_range_alloc *
sizeof(
OnigRepeatRange)))
5734 goto err_repeat_range;
5736 if (IS_NOT_NULL(reg->name_table)) {
5737 if (onig_names_copy(reg, oreg))
5738 goto err_name_table;
5740 if (IS_NOT_NULL(reg->chain)) {
5741 if (onig_reg_copy(®->chain, reg->chain))
5748 onig_names_free(reg);
5750 xfree(reg->repeat_range);
5754 xfree(reg->int_map_backward);
5755 err_int_map_backward:
5756 xfree(reg->int_map);
5761 return ONIGERR_MEMORY;
5768onig_memsize(
const regex_t *reg)
5770 size_t size =
sizeof(regex_t);
5771 if (IS_NULL(reg))
return 0;
5772 if (IS_NOT_NULL(reg->p)) size += reg->alloc;
5773 if (IS_NOT_NULL(reg->exact)) size += reg->exact_end - reg->exact;
5774 if (IS_NOT_NULL(reg->int_map)) size +=
sizeof(int) * ONIG_CHAR_TABLE_SIZE;
5775 if (IS_NOT_NULL(reg->int_map_backward)) size +=
sizeof(int) * ONIG_CHAR_TABLE_SIZE;
5776 if (IS_NOT_NULL(reg->repeat_range)) size += reg->repeat_range_alloc *
sizeof(
OnigRepeatRange);
5777 if (IS_NOT_NULL(reg->chain)) size += onig_memsize(reg->chain);
5783onig_region_memsize(
const OnigRegion *regs)
5785 size_t size =
sizeof(*regs);
5786 if (IS_NULL(regs))
return 0;
5787 size += regs->allocated * (
sizeof(*regs->beg) +
sizeof(*regs->end));
5792#define REGEX_TRANSFER(to,from) do {\
5793 onig_free_body(to);\
5794 xmemcpy(to, from, sizeof(regex_t));\
5800onig_transfer(regex_t* to, regex_t* from)
5802 REGEX_TRANSFER(to, from);
5806#ifdef ONIG_DEBUG_COMPILE
5807static void print_compiled_byte_code_list(FILE* f, regex_t* reg);
5809#ifdef ONIG_DEBUG_PARSE_TREE
5810static void print_tree(FILE* f, Node* node);
5815onig_compile(regex_t* reg,
const UChar* pattern,
const UChar* pattern_end,
5818 return onig_compile_ruby(reg, pattern, pattern_end, einfo, NULL, 0);
5824onig_compile_ruby(regex_t* reg,
const UChar* pattern,
const UChar* pattern_end,
5825 OnigErrorInfo* einfo,
const char *sourcefile,
int sourceline)
5828onig_compile(regex_t* reg,
const UChar* pattern,
const UChar* pattern_end,
5832#define COMPILE_INIT_SIZE 20
5835 OnigDistance init_size;
5838#ifdef USE_SUBEXP_CALL
5842 if (IS_NOT_NULL(einfo)) einfo->par = (UChar* )NULL;
5845 scan_env.sourcefile = sourcefile;
5846 scan_env.sourceline = sourceline;
5850 print_enc_string(stderr, reg->enc, pattern, pattern_end);
5853 if (reg->alloc == 0) {
5854 init_size = (pattern_end - pattern) * 2;
5855 if (init_size <= 0) init_size = COMPILE_INIT_SIZE;
5856 r = BBUF_INIT(reg, init_size);
5857 if (r != 0)
goto end;
5863 reg->num_repeat = 0;
5864 reg->num_null_check = 0;
5865 reg->repeat_range_alloc = 0;
5867#ifdef USE_COMBINATION_EXPLOSION_CHECK
5868 reg->num_comb_exp_check = 0;
5871 r = onig_parse_make_tree(&root, pattern, pattern_end, reg, &scan_env);
5872 if (r != 0)
goto err;
5874#ifdef ONIG_DEBUG_PARSE_TREE
5876 fprintf(stderr,
"ORIGINAL PARSE TREE:\n");
5877 print_tree(stderr, root);
5881#ifdef USE_NAMED_GROUP
5883 if (scan_env.num_named > 0 &&
5884 IS_SYNTAX_BV(scan_env.syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
5885 !ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_CAPTURE_GROUP)) {
5886 if (scan_env.num_named != scan_env.num_mem)
5887 r = disable_noname_group_capture(&root, reg, &scan_env);
5889 r = numbered_ref_check(root);
5891 if (r != 0)
goto err;
5895#ifdef USE_SUBEXP_CALL
5896 if (scan_env.num_call > 0) {
5897 r = unset_addr_list_init(&uslist, scan_env.num_call);
5898 if (r != 0)
goto err;
5899 scan_env.unset_addr_list = &uslist;
5900 r = setup_subexp_call(root, &scan_env);
5901 if (r != 0)
goto err_unset;
5902 r = subexp_recursive_check_trav(root, &scan_env);
5903 if (r < 0)
goto err_unset;
5904 r = subexp_inf_recursive_check_trav(root, &scan_env);
5905 if (r != 0)
goto err_unset;
5907 reg->num_call = scan_env.num_call;
5913 r = setup_tree(root, reg, 0, &scan_env);
5914 if (r != 0)
goto err_unset;
5916#ifdef ONIG_DEBUG_PARSE_TREE
5917 print_tree(stderr, root);
5920 reg->capture_history = scan_env.capture_history;
5921 reg->bt_mem_start = scan_env.bt_mem_start;
5922 reg->bt_mem_start |= reg->capture_history;
5923 if (IS_FIND_CONDITION(reg->options))
5924 BIT_STATUS_ON_ALL(reg->bt_mem_end);
5926 reg->bt_mem_end = scan_env.bt_mem_end;
5927 reg->bt_mem_end |= reg->capture_history;
5930#ifdef USE_COMBINATION_EXPLOSION_CHECK
5931 if (scan_env.backrefed_mem == 0
5932# ifdef USE_SUBEXP_CALL
5933 || scan_env.num_call == 0
5936 setup_comb_exp_check(root, 0, &scan_env);
5937# ifdef USE_SUBEXP_CALL
5938 if (scan_env.has_recursion != 0) {
5939 scan_env.num_comb_exp_check = 0;
5943 if (scan_env.comb_exp_max_regnum > 0) {
5945 for (i = 1; i <= scan_env.comb_exp_max_regnum; i++) {
5946 if (BIT_STATUS_AT(scan_env.backrefed_mem, i) != 0) {
5947 scan_env.num_comb_exp_check = 0;
5954 reg->num_comb_exp_check = scan_env.num_comb_exp_check;
5957 clear_optimize_info(reg);
5958#ifndef ONIG_DONT_OPTIMIZE
5959 r = set_optimize_info_from_tree(root, reg, &scan_env);
5960 if (r != 0)
goto err_unset;
5963 if (IS_NOT_NULL(scan_env.mem_nodes_dynamic)) {
5964 xfree(scan_env.mem_nodes_dynamic);
5965 scan_env.mem_nodes_dynamic = (Node** )NULL;
5968 r = compile_tree(root, reg);
5970 r = add_opcode(reg, OP_END);
5971#ifdef USE_SUBEXP_CALL
5972 if (scan_env.num_call > 0) {
5973 r = unset_addr_list_fix(&uslist, reg);
5974 unset_addr_list_end(&uslist);
5979 if ((reg->num_repeat != 0) || (reg->bt_mem_end != 0))
5980 reg->stack_pop_level = STACK_POP_LEVEL_ALL;
5982 if (reg->bt_mem_start != 0)
5983 reg->stack_pop_level = STACK_POP_LEVEL_MEM_START;
5985 reg->stack_pop_level = STACK_POP_LEVEL_FREE;
5988#ifdef USE_SUBEXP_CALL
5989 else if (scan_env.num_call > 0) {
5990 unset_addr_list_end(&uslist);
5993 onig_node_free(root);
5995#ifdef ONIG_DEBUG_COMPILE
5996# ifdef USE_NAMED_GROUP
5997 onig_print_names(stderr, reg);
5999 print_compiled_byte_code_list(stderr, reg);
6003 onig_reg_resize(reg);
6007#ifdef USE_SUBEXP_CALL
6008 if (scan_env.num_call > 0) {
6009 unset_addr_list_end(&uslist);
6013 if (IS_NOT_NULL(scan_env.error)) {
6014 if (IS_NOT_NULL(einfo)) {
6015 einfo->enc = scan_env.enc;
6016 einfo->par = scan_env.error;
6017 einfo->par_end = scan_env.error_end;
6021 onig_node_free(root);
6022 xfree(scan_env.mem_nodes_dynamic);
6027static int onig_inited = 0;
6030onig_reg_init(regex_t* reg, OnigOptionType option,
6031 OnigCaseFoldType case_fold_flag,
6038 return ONIGERR_INVALID_ARGUMENT;
6040 if (ONIGENC_IS_UNDEF(enc))
6041 return ONIGERR_DEFAULT_ENCODING_IS_NOT_SET;
6043 if ((option & (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP))
6044 == (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP)) {
6045 return ONIGERR_INVALID_COMBINATION_OF_OPTIONS;
6048 if ((option & ONIG_OPTION_NEGATE_SINGLELINE) != 0) {
6049 option |= syntax->options;
6050 option &= ~ONIG_OPTION_SINGLELINE;
6053 option |= syntax->options;
6056 (reg)->options = option;
6057 (reg)->syntax = syntax;
6058 (reg)->optimize = 0;
6059 (reg)->exact = (UChar* )NULL;
6060 (reg)->int_map = (
int* )NULL;
6061 (reg)->int_map_backward = (
int* )NULL;
6062 (reg)->chain = (regex_t* )NULL;
6064 (reg)->p = (UChar* )NULL;
6067 (reg)->name_table = (
void* )NULL;
6069 (reg)->case_fold_flag = case_fold_flag;
6071 (reg)->timelimit = 0;
6077onig_new_without_alloc(regex_t* reg,
const UChar* pattern,
6078 const UChar* pattern_end, OnigOptionType option, OnigEncoding enc,
6083 r = onig_reg_init(reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);
6086 r = onig_compile(reg, pattern, pattern_end, einfo);
6091onig_new(regex_t** reg,
const UChar* pattern,
const UChar* pattern_end,
6092 OnigOptionType option, OnigEncoding enc,
const OnigSyntaxType* syntax,
6095 *reg = (regex_t* )
xmalloc(
sizeof(regex_t));
6096 if (IS_NULL(*reg))
return ONIGERR_MEMORY;
6098 int r = onig_new_without_alloc(*reg, pattern, pattern_end, option, enc, syntax, einfo);
6108onig_initialize(OnigEncoding encodings[] ARG_UNUSED,
int n ARG_UNUSED)
6116 if (onig_inited != 0)
6121#if defined(ONIG_DEBUG_MEMLEAK) && defined(_MSC_VER)
6122 _CrtSetDbgFlag(_CRTDBG_ALLOC_MEM_DF | _CRTDBG_LEAK_CHECK_DF);
6128#ifdef ONIG_DEBUG_STATISTICS
6129 onig_statistics_init();
6136static OnigEndCallListItemType* EndCallTop;
6138extern void onig_add_end_call(
void (*func)(
void))
6140 OnigEndCallListItemType* item;
6142 item = (OnigEndCallListItemType* )
xmalloc(
sizeof(*item));
6143 if (item == 0) return ;
6145 item->next = EndCallTop;
6152exec_end_call_list(
void)
6154 OnigEndCallListItemType* prev;
6157 while (EndCallTop != 0) {
6158 func = EndCallTop->func;
6162 EndCallTop = EndCallTop->next;
6170 exec_end_call_list();
6172#ifdef ONIG_DEBUG_STATISTICS
6173 onig_print_statistics(stderr);
6176#if defined(ONIG_DEBUG_MEMLEAK) && defined(_MSC_VER)
6177 _CrtDumpMemoryLeaks();
6186onig_is_in_code_range(
const UChar* p, OnigCodePoint code)
6188 OnigCodePoint n, *data;
6189 OnigCodePoint low, high, x;
6191 GET_CODE_POINT(n, p);
6192 data = (OnigCodePoint* )p;
6195 for (low = 0, high = n; low < high; ) {
6196 x = (low + high) >> 1;
6197 if (code > data[x * 2 + 1])
6203 return ((low < n && code >= data[low * 2]) ? 1 : 0);
6207onig_is_code_in_cc_len(
int elen, OnigCodePoint code,
CClassNode* cc)
6211 if (elen > 1 || (code >= SINGLE_BYTE_SIZE)) {
6212 if (IS_NULL(cc->mbuf)) {
6216 found = (onig_is_in_code_range(cc->mbuf->p, code) != 0 ? 1 : 0);
6220 found = (BITSET_AT(cc->bs, code) == 0 ? 0 : 1);
6223 if (IS_NCCLASS_NOT(cc))
6230onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code,
CClassNode* cc)
6234 if (ONIGENC_MBC_MINLEN(enc) > 1) {
6238 len = ONIGENC_CODE_TO_MBCLEN(enc, code);
6240 return onig_is_code_in_cc_len(
len, code, cc);
6247# define ARG_SPECIAL -1
6249# define ARG_RELADDR 1
6250# define ARG_ABSADDR 2
6251# define ARG_LENGTH 3
6252# define ARG_MEMNUM 4
6253# define ARG_OPTION 5
6254# define ARG_STATE_CHECK 6
6256OnigOpInfoType OnigOpInfo[] = {
6257 { OP_FINISH,
"finish", ARG_NON },
6258 { OP_END,
"end", ARG_NON },
6259 { OP_EXACT1,
"exact1", ARG_SPECIAL },
6260 { OP_EXACT2,
"exact2", ARG_SPECIAL },
6261 { OP_EXACT3,
"exact3", ARG_SPECIAL },
6262 { OP_EXACT4,
"exact4", ARG_SPECIAL },
6263 { OP_EXACT5,
"exact5", ARG_SPECIAL },
6264 { OP_EXACTN,
"exactn", ARG_SPECIAL },
6265 { OP_EXACTMB2N1,
"exactmb2-n1", ARG_SPECIAL },
6266 { OP_EXACTMB2N2,
"exactmb2-n2", ARG_SPECIAL },
6267 { OP_EXACTMB2N3,
"exactmb2-n3", ARG_SPECIAL },
6268 { OP_EXACTMB2N,
"exactmb2-n", ARG_SPECIAL },
6269 { OP_EXACTMB3N,
"exactmb3n" , ARG_SPECIAL },
6270 { OP_EXACTMBN,
"exactmbn", ARG_SPECIAL },
6271 { OP_EXACT1_IC,
"exact1-ic", ARG_SPECIAL },
6272 { OP_EXACTN_IC,
"exactn-ic", ARG_SPECIAL },
6273 { OP_CCLASS,
"cclass", ARG_SPECIAL },
6274 { OP_CCLASS_MB,
"cclass-mb", ARG_SPECIAL },
6275 { OP_CCLASS_MIX,
"cclass-mix", ARG_SPECIAL },
6276 { OP_CCLASS_NOT,
"cclass-not", ARG_SPECIAL },
6277 { OP_CCLASS_MB_NOT,
"cclass-mb-not", ARG_SPECIAL },
6278 { OP_CCLASS_MIX_NOT,
"cclass-mix-not", ARG_SPECIAL },
6279 { OP_ANYCHAR,
"anychar", ARG_NON },
6280 { OP_ANYCHAR_ML,
"anychar-ml", ARG_NON },
6281 { OP_ANYCHAR_STAR,
"anychar*", ARG_NON },
6282 { OP_ANYCHAR_ML_STAR,
"anychar-ml*", ARG_NON },
6283 { OP_ANYCHAR_STAR_PEEK_NEXT,
"anychar*-peek-next", ARG_SPECIAL },
6284 { OP_ANYCHAR_ML_STAR_PEEK_NEXT,
"anychar-ml*-peek-next", ARG_SPECIAL },
6285 { OP_WORD,
"word", ARG_NON },
6286 { OP_NOT_WORD,
"not-word", ARG_NON },
6287 { OP_WORD_BOUND,
"word-bound", ARG_NON },
6288 { OP_NOT_WORD_BOUND,
"not-word-bound", ARG_NON },
6289 { OP_WORD_BEGIN,
"word-begin", ARG_NON },
6290 { OP_WORD_END,
"word-end", ARG_NON },
6291 { OP_ASCII_WORD,
"ascii-word", ARG_NON },
6292 { OP_NOT_ASCII_WORD,
"not-ascii-word", ARG_NON },
6293 { OP_ASCII_WORD_BOUND,
"ascii-word-bound", ARG_NON },
6294 { OP_NOT_ASCII_WORD_BOUND,
"not-ascii-word-bound", ARG_NON },
6295 { OP_ASCII_WORD_BEGIN,
"ascii-word-begin", ARG_NON },
6296 { OP_ASCII_WORD_END,
"ascii-word-end", ARG_NON },
6297 { OP_BEGIN_BUF,
"begin-buf", ARG_NON },
6298 { OP_END_BUF,
"end-buf", ARG_NON },
6299 { OP_BEGIN_LINE,
"begin-line", ARG_NON },
6300 { OP_END_LINE,
"end-line", ARG_NON },
6301 { OP_SEMI_END_BUF,
"semi-end-buf", ARG_NON },
6302 { OP_BEGIN_POSITION,
"begin-position", ARG_NON },
6303 { OP_BACKREF1,
"backref1", ARG_NON },
6304 { OP_BACKREF2,
"backref2", ARG_NON },
6305 { OP_BACKREFN,
"backrefn", ARG_MEMNUM },
6306 { OP_BACKREFN_IC,
"backrefn-ic", ARG_SPECIAL },
6307 { OP_BACKREF_MULTI,
"backref_multi", ARG_SPECIAL },
6308 { OP_BACKREF_MULTI_IC,
"backref_multi-ic", ARG_SPECIAL },
6309 { OP_BACKREF_WITH_LEVEL,
"backref_at_level", ARG_SPECIAL },
6310 { OP_MEMORY_START_PUSH,
"mem-start-push", ARG_MEMNUM },
6311 { OP_MEMORY_START,
"mem-start", ARG_MEMNUM },
6312 { OP_MEMORY_END_PUSH,
"mem-end-push", ARG_MEMNUM },
6313 { OP_MEMORY_END_PUSH_REC,
"mem-end-push-rec", ARG_MEMNUM },
6314 { OP_MEMORY_END,
"mem-end", ARG_MEMNUM },
6315 { OP_MEMORY_END_REC,
"mem-end-rec", ARG_MEMNUM },
6316 { OP_SET_OPTION_PUSH,
"set-option-push", ARG_OPTION },
6317 { OP_SET_OPTION,
"set-option", ARG_OPTION },
6318 { OP_KEEP,
"keep", ARG_NON },
6319 { OP_FAIL,
"fail", ARG_NON },
6320 { OP_JUMP,
"jump", ARG_RELADDR },
6321 { OP_PUSH,
"push", ARG_RELADDR },
6322 { OP_POP,
"pop", ARG_NON },
6323 { OP_PUSH_OR_JUMP_EXACT1,
"push-or-jump-e1", ARG_SPECIAL },
6324 { OP_PUSH_IF_PEEK_NEXT,
"push-if-peek-next", ARG_SPECIAL },
6325 { OP_REPEAT,
"repeat", ARG_SPECIAL },
6326 { OP_REPEAT_NG,
"repeat-ng", ARG_SPECIAL },
6327 { OP_REPEAT_INC,
"repeat-inc", ARG_MEMNUM },
6328 { OP_REPEAT_INC_NG,
"repeat-inc-ng", ARG_MEMNUM },
6329 { OP_REPEAT_INC_SG,
"repeat-inc-sg", ARG_MEMNUM },
6330 { OP_REPEAT_INC_NG_SG,
"repeat-inc-ng-sg", ARG_MEMNUM },
6331 { OP_NULL_CHECK_START,
"null-check-start", ARG_MEMNUM },
6332 { OP_NULL_CHECK_END,
"null-check-end", ARG_MEMNUM },
6333 { OP_NULL_CHECK_END_MEMST,
"null-check-end-memst", ARG_MEMNUM },
6334 { OP_NULL_CHECK_END_MEMST_PUSH,
"null-check-end-memst-push", ARG_MEMNUM },
6335 { OP_PUSH_POS,
"push-pos", ARG_NON },
6336 { OP_POP_POS,
"pop-pos", ARG_NON },
6337 { OP_PUSH_POS_NOT,
"push-pos-not", ARG_RELADDR },
6338 { OP_FAIL_POS,
"fail-pos", ARG_NON },
6339 { OP_PUSH_STOP_BT,
"push-stop-bt", ARG_NON },
6340 { OP_POP_STOP_BT,
"pop-stop-bt", ARG_NON },
6341 { OP_LOOK_BEHIND,
"look-behind", ARG_SPECIAL },
6342 { OP_PUSH_LOOK_BEHIND_NOT,
"push-look-behind-not", ARG_SPECIAL },
6343 { OP_FAIL_LOOK_BEHIND_NOT,
"fail-look-behind-not", ARG_NON },
6344 { OP_PUSH_ABSENT_POS,
"push-absent-pos", ARG_NON },
6345 { OP_ABSENT,
"absent", ARG_RELADDR },
6346 { OP_ABSENT_END,
"absent-end", ARG_NON },
6347 { OP_CALL,
"call", ARG_ABSADDR },
6348 { OP_RETURN,
"return", ARG_NON },
6349 { OP_CONDITION,
"condition", ARG_SPECIAL },
6350 { OP_STATE_CHECK_PUSH,
"state-check-push", ARG_SPECIAL },
6351 { OP_STATE_CHECK_PUSH_OR_JUMP,
"state-check-push-or-jump", ARG_SPECIAL },
6352 { OP_STATE_CHECK,
"state-check", ARG_STATE_CHECK },
6353 { OP_STATE_CHECK_ANYCHAR_STAR,
"state-check-anychar*", ARG_STATE_CHECK },
6354 { OP_STATE_CHECK_ANYCHAR_ML_STAR,
6355 "state-check-anychar-ml*", ARG_STATE_CHECK },
6364 for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
6365 if (opcode == OnigOpInfo[i].opcode)
6366 return OnigOpInfo[i].name;
6372op2arg_type(
int opcode)
6376 for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
6377 if (opcode == OnigOpInfo[i].opcode)
6378 return OnigOpInfo[i].arg_type;
6383# ifdef ONIG_DEBUG_PARSE_TREE
6385Indent(FILE* f,
int indent)
6388 for (i = 0; i < indent; i++) putc(
' ', f);
6393p_string(FILE* f, ptrdiff_t
len, UChar* s)
6396 while (
len-- > 0) { fputc(*s++, f); }
6400p_len_string(FILE* f, LengthType
len,
int mb_len, UChar* s)
6402 int x =
len * mb_len;
6404 fprintf(f,
":%d:",
len);
6405 while (x-- > 0) { fputc(*s++, f); }
6409onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar* bpend, UChar** nextp,
6416 StateCheckNumType scn;
6420 fprintf(f,
"[%s", op2name(*bp));
6421 arg_type = op2arg_type(*bp);
6422 if (arg_type != ARG_SPECIAL) {
6428 GET_RELADDR_INC(addr, bp);
6429 fprintf(f,
":(%s%d)", (addr >= 0) ?
"+" :
"", addr);
6432 GET_ABSADDR_INC(addr, bp);
6433 fprintf(f,
":(%d)", addr);
6436 GET_LENGTH_INC(
len, bp);
6437 fprintf(f,
":%d",
len);
6440 mem = *((MemNumType* )bp);
6442 fprintf(f,
":%d", mem);
6446 OnigOptionType option = *((OnigOptionType* )bp);
6448 fprintf(f,
":%d", option);
6452 case ARG_STATE_CHECK:
6453 scn = *((StateCheckNumType* )bp);
6454 bp += SIZE_STATE_CHECK_NUM;
6455 fprintf(f,
":%d", scn);
6462 case OP_ANYCHAR_STAR_PEEK_NEXT:
6463 case OP_ANYCHAR_ML_STAR_PEEK_NEXT:
6464 p_string(f, 1, bp++);
break;
6466 p_string(f, 2, bp); bp += 2;
break;
6468 p_string(f, 3, bp); bp += 3;
break;
6470 p_string(f, 4, bp); bp += 4;
break;
6472 p_string(f, 5, bp); bp += 5;
break;
6474 GET_LENGTH_INC(
len, bp);
6475 p_len_string(f,
len, 1, bp);
6480 p_string(f, 2, bp); bp += 2;
break;
6482 p_string(f, 4, bp); bp += 4;
break;
6484 p_string(f, 6, bp); bp += 6;
break;
6486 GET_LENGTH_INC(
len, bp);
6487 p_len_string(f,
len, 2, bp);
6491 GET_LENGTH_INC(
len, bp);
6492 p_len_string(f,
len, 3, bp);
6499 GET_LENGTH_INC(mb_len, bp);
6500 GET_LENGTH_INC(
len, bp);
6501 fprintf(f,
":%d:%d:", mb_len,
len);
6503 while (n-- > 0) { fputc(*bp++, f); }
6508 len = enclen(enc, bp, bpend);
6509 p_string(f,
len, bp);
6513 GET_LENGTH_INC(
len, bp);
6514 p_len_string(f,
len, 1, bp);
6519 n = bitset_on_num((BitSetRef )bp);
6521 fprintf(f,
":%d", n);
6525 n = bitset_on_num((BitSetRef )bp);
6527 fprintf(f,
":%d", n);
6531 case OP_CCLASS_MB_NOT:
6532 GET_LENGTH_INC(
len, bp);
6534# ifndef PLATFORM_UNALIGNED_WORD_ACCESS
6537 GET_CODE_POINT(code, q);
6539 fprintf(f,
":%d:%d", (
int )code,
len);
6543 case OP_CCLASS_MIX_NOT:
6544 n = bitset_on_num((BitSetRef )bp);
6546 GET_LENGTH_INC(
len, bp);
6548# ifndef PLATFORM_UNALIGNED_WORD_ACCESS
6551 GET_CODE_POINT(code, q);
6553 fprintf(f,
":%d:%d:%d", n, (
int )code,
len);
6556 case OP_BACKREFN_IC:
6557 mem = *((MemNumType* )bp);
6559 fprintf(f,
":%d", mem);
6562 case OP_BACKREF_MULTI_IC:
6563 case OP_BACKREF_MULTI:
6565 GET_LENGTH_INC(
len, bp);
6566 for (i = 0; i <
len; i++) {
6567 GET_MEMNUM_INC(mem, bp);
6568 if (i > 0) fputs(
", ", f);
6569 fprintf(f,
"%d", mem);
6573 case OP_BACKREF_WITH_LEVEL:
6575 OnigOptionType option;
6578 GET_OPTION_INC(option, bp);
6579 fprintf(f,
":%d", option);
6580 GET_LENGTH_INC(level, bp);
6581 fprintf(f,
":%d", level);
6584 GET_LENGTH_INC(
len, bp);
6585 for (i = 0; i <
len; i++) {
6586 GET_MEMNUM_INC(mem, bp);
6587 if (i > 0) fputs(
", ", f);
6588 fprintf(f,
"%d", mem);
6596 mem = *((MemNumType* )bp);
6598 addr = *((RelAddrType* )bp);
6600 fprintf(f,
":%d:%d", mem, addr);
6604 case OP_PUSH_OR_JUMP_EXACT1:
6605 case OP_PUSH_IF_PEEK_NEXT:
6606 addr = *((RelAddrType* )bp);
6608 fprintf(f,
":(%s%d)", (addr >= 0) ?
"+" :
"", addr);
6613 case OP_LOOK_BEHIND:
6614 GET_LENGTH_INC(
len, bp);
6615 fprintf(f,
":%d",
len);
6618 case OP_PUSH_LOOK_BEHIND_NOT:
6619 GET_RELADDR_INC(addr, bp);
6620 GET_LENGTH_INC(
len, bp);
6621 fprintf(f,
":%d:(%s%d)",
len, (addr >= 0) ?
"+" :
"", addr);
6624 case OP_STATE_CHECK_PUSH:
6625 case OP_STATE_CHECK_PUSH_OR_JUMP:
6626 scn = *((StateCheckNumType* )bp);
6627 bp += SIZE_STATE_CHECK_NUM;
6628 addr = *((RelAddrType* )bp);
6630 fprintf(f,
":%d:(%s%d)", scn, (addr >= 0) ?
"+" :
"", addr);
6634 GET_MEMNUM_INC(mem, bp);
6635 GET_RELADDR_INC(addr, bp);
6636 fprintf(f,
":%d:(%s%d)", mem, (addr >= 0) ?
"+" :
"", addr);
6640 fprintf(stderr,
"onig_print_compiled_byte_code: undefined code %d\n",
6645 if (nextp) *nextp = bp;
6648# ifdef ONIG_DEBUG_COMPILE
6650print_compiled_byte_code_list(FILE* f, regex_t* reg)
6654 UChar* end = reg->p + reg->used;
6656 fprintf(f,
"code length: %d", reg->used);
6662 fprintf(f,
"\n%ld:", bp - reg->p);
6664 fprintf(f,
" %ld:", bp - reg->p);
6665 onig_print_compiled_byte_code(f, bp, end, &bp, reg->enc);
6672# ifdef ONIG_DEBUG_PARSE_TREE
6674print_indent_tree(FILE* f, Node* node,
int indent)
6676 int i,
type, container_p = 0;
6681 if (IS_NULL(node)) {
6682 fprintf(f,
"ERROR: null node!!!\n");
6690 if (NTYPE(node) == NT_LIST)
6691 fprintf(f,
"<list:%"PRIxPTR
">\n", (intptr_t )node);
6693 fprintf(f,
"<alt:%"PRIxPTR
">\n", (intptr_t )node);
6695 print_indent_tree(f, NCAR(node), indent + add);
6696 while (IS_NOT_NULL(node = NCDR(node))) {
6697 if (NTYPE(node) != type) {
6698 fprintf(f,
"ERROR: list/alt right is not a cons. %d\n", NTYPE(node));
6701 print_indent_tree(f, NCAR(node), indent + add);
6706 fprintf(f,
"<string%s:%"PRIxPTR
">",
6707 (NSTRING_IS_RAW(node) ?
"-raw" :
""), (intptr_t )node);
6708 for (p = NSTR(node)->s; p < NSTR(node)->end; p++) {
6709 if (*p >= 0x20 && *p < 0x7f)
6712 fprintf(f,
" 0x%02x", *p);
6718 fprintf(f,
"<cclass:%"PRIxPTR
">", (intptr_t )node);
6719 if (IS_NCCLASS_NOT(NCCLASS(node))) fputs(
"not ", f);
6720 if (NCCLASS(node)->mbuf) {
6721 BBuf* bbuf = NCCLASS(node)->mbuf;
6722 OnigCodePoint* data = (OnigCodePoint* )bbuf->p;
6723 OnigCodePoint* end = (OnigCodePoint* )(bbuf->p + bbuf->used);
6724 fprintf(f,
"%d", *data++);
6725 for (; data < end; data+=2) {
6727 fprintf(f,
"%04x-%04x", data[0], data[1]);
6733 fprintf(f,
"<ctype:%"PRIxPTR
"> ", (intptr_t )node);
6734 switch (NCTYPE(node)->ctype) {
6735 case ONIGENC_CTYPE_WORD:
6736 if (NCTYPE(node)->not != 0)
6737 fputs(
"not word", f);
6743 fprintf(f,
"ERROR: undefined ctype.\n");
6749 fprintf(f,
"<anychar:%"PRIxPTR
">", (intptr_t )node);
6753 fprintf(f,
"<anchor:%"PRIxPTR
"> ", (intptr_t )node);
6754 switch (NANCHOR(node)->type) {
6755 case ANCHOR_BEGIN_BUF: fputs(
"begin buf", f);
break;
6756 case ANCHOR_END_BUF: fputs(
"end buf", f);
break;
6757 case ANCHOR_BEGIN_LINE: fputs(
"begin line", f);
break;
6758 case ANCHOR_END_LINE: fputs(
"end line", f);
break;
6759 case ANCHOR_SEMI_END_BUF: fputs(
"semi end buf", f);
break;
6760 case ANCHOR_BEGIN_POSITION: fputs(
"begin position", f);
break;
6762 case ANCHOR_WORD_BOUND: fputs(
"word bound", f);
break;
6763 case ANCHOR_NOT_WORD_BOUND: fputs(
"not word bound", f);
break;
6764# ifdef USE_WORD_BEGIN_END
6765 case ANCHOR_WORD_BEGIN: fputs(
"word begin", f);
break;
6766 case ANCHOR_WORD_END: fputs(
"word end", f);
break;
6768 case ANCHOR_PREC_READ: fputs(
"prec read", f); container_p = TRUE;
break;
6769 case ANCHOR_PREC_READ_NOT: fputs(
"prec read not", f); container_p = TRUE;
break;
6770 case ANCHOR_LOOK_BEHIND: fputs(
"look_behind", f); container_p = TRUE;
break;
6771 case ANCHOR_LOOK_BEHIND_NOT: fputs(
"look_behind_not",f); container_p = TRUE;
break;
6772 case ANCHOR_KEEP: fputs(
"keep",f);
break;
6775 fprintf(f,
"ERROR: undefined anchor type.\n");
6785 fprintf(f,
"<backref:%"PRIxPTR
">", (intptr_t )node);
6786 for (i = 0; i < br->back_num; i++) {
6787 if (i > 0) fputs(
", ", f);
6788 fprintf(f,
"%d", p[i]);
6793# ifdef USE_SUBEXP_CALL
6797 fprintf(f,
"<call:%"PRIxPTR
">", (intptr_t )node);
6798 p_string(f, cn->name_end - cn->name, cn->name);
6804 fprintf(f,
"<quantifier:%"PRIxPTR
">{%d,%d}%s\n", (intptr_t )node,
6805 NQTFR(node)->lower, NQTFR(node)->upper,
6806 (NQTFR(node)->greedy ?
"" :
"?"));
6807 print_indent_tree(f, NQTFR(node)->target, indent + add);
6811 fprintf(f,
"<enclose:%"PRIxPTR
"> ", (intptr_t )node);
6812 switch (NENCLOSE(node)->type) {
6813 case ENCLOSE_OPTION:
6814 fprintf(f,
"option:%d", NENCLOSE(node)->option);
6816 case ENCLOSE_MEMORY:
6817 fprintf(f,
"memory:%d", NENCLOSE(node)->regnum);
6819 case ENCLOSE_STOP_BACKTRACK:
6820 fprintf(f,
"stop-bt");
6822 case ENCLOSE_CONDITION:
6823 fprintf(f,
"condition:%d", NENCLOSE(node)->regnum);
6825 case ENCLOSE_ABSENT:
6826 fprintf(f,
"absent");
6833 print_indent_tree(f, NENCLOSE(node)->target, indent + add);
6837 fprintf(f,
"print_indent_tree: undefined node type %d\n", NTYPE(node));
6841 if (type != NT_LIST && type != NT_ALT && type != NT_QTFR &&
6845 if (container_p) print_indent_tree(f, NANCHOR(node)->target, indent + add);
6851print_tree(FILE* f, Node* node)
6853 print_indent_tree(f, node, 0);
#define xfree
Old name of ruby_xfree.
#define xrealloc
Old name of ruby_xrealloc.
#define xmalloc
Old name of ruby_xmalloc.
int len
Length of the buffer.
VALUE type(ANYARGS)
ANYARGS-ed function type.