00001 #ifndef ONIGURUMA_REGPARSE_H
00002 #define ONIGURUMA_REGPARSE_H
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032 #include "regint.h"
00033
00034 #if defined __GNUC__ && __GNUC__ >= 4
00035 #pragma GCC visibility push(default)
00036 #endif
00037
00038
00039 #define NT_STR 0
00040 #define NT_CCLASS 1
00041 #define NT_CTYPE 2
00042 #define NT_CANY 3
00043 #define NT_BREF 4
00044 #define NT_QTFR 5
00045 #define NT_ENCLOSE 6
00046 #define NT_ANCHOR 7
00047 #define NT_LIST 8
00048 #define NT_ALT 9
00049 #define NT_CALL 10
00050
00051
00052 #define NTYPE2BIT(type) (1<<(type))
00053
00054 #define BIT_NT_STR NTYPE2BIT(NT_STR)
00055 #define BIT_NT_CCLASS NTYPE2BIT(NT_CCLASS)
00056 #define BIT_NT_CTYPE NTYPE2BIT(NT_CTYPE)
00057 #define BIT_NT_CANY NTYPE2BIT(NT_CANY)
00058 #define BIT_NT_BREF NTYPE2BIT(NT_BREF)
00059 #define BIT_NT_QTFR NTYPE2BIT(NT_QTFR)
00060 #define BIT_NT_ENCLOSE NTYPE2BIT(NT_ENCLOSE)
00061 #define BIT_NT_ANCHOR NTYPE2BIT(NT_ANCHOR)
00062 #define BIT_NT_LIST NTYPE2BIT(NT_LIST)
00063 #define BIT_NT_ALT NTYPE2BIT(NT_ALT)
00064 #define BIT_NT_CALL NTYPE2BIT(NT_CALL)
00065
00066 #define IS_NODE_TYPE_SIMPLE(type) \
00067 ((NTYPE2BIT(type) & (BIT_NT_STR | BIT_NT_CCLASS | BIT_NT_CTYPE |\
00068 BIT_NT_CANY | BIT_NT_BREF)) != 0)
00069
00070 #define NTYPE(node) ((node)->u.base.type)
00071 #define SET_NTYPE(node, ntype) (node)->u.base.type = (ntype)
00072
00073 #define NSTR(node) (&((node)->u.str))
00074 #define NCCLASS(node) (&((node)->u.cclass))
00075 #define NCTYPE(node) (&((node)->u.ctype))
00076 #define NBREF(node) (&((node)->u.bref))
00077 #define NQTFR(node) (&((node)->u.qtfr))
00078 #define NENCLOSE(node) (&((node)->u.enclose))
00079 #define NANCHOR(node) (&((node)->u.anchor))
00080 #define NCONS(node) (&((node)->u.cons))
00081 #define NCALL(node) (&((node)->u.call))
00082
00083 #define NCAR(node) (NCONS(node)->car)
00084 #define NCDR(node) (NCONS(node)->cdr)
00085
00086
00087
00088 #define ANCHOR_ANYCHAR_STAR_MASK (ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_ML)
00089 #define ANCHOR_END_BUF_MASK (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF)
00090
00091 #define ENCLOSE_MEMORY (1<<0)
00092 #define ENCLOSE_OPTION (1<<1)
00093 #define ENCLOSE_STOP_BACKTRACK (1<<2)
00094
00095 #define NODE_STR_MARGIN 16
00096 #define NODE_STR_BUF_SIZE 24
00097 #define NODE_BACKREFS_SIZE 6
00098
00099 #define NSTR_RAW (1<<0)
00100 #define NSTR_AMBIG (1<<1)
00101 #define NSTR_DONT_GET_OPT_INFO (1<<2)
00102
00103 #define NSTRING_LEN(node) (OnigDistance)((node)->u.str.end - (node)->u.str.s)
00104 #define NSTRING_SET_RAW(node) (node)->u.str.flag |= NSTR_RAW
00105 #define NSTRING_CLEAR_RAW(node) (node)->u.str.flag &= ~NSTR_RAW
00106 #define NSTRING_SET_AMBIG(node) (node)->u.str.flag |= NSTR_AMBIG
00107 #define NSTRING_SET_DONT_GET_OPT_INFO(node) \
00108 (node)->u.str.flag |= NSTR_DONT_GET_OPT_INFO
00109 #define NSTRING_IS_RAW(node) (((node)->u.str.flag & NSTR_RAW) != 0)
00110 #define NSTRING_IS_AMBIG(node) (((node)->u.str.flag & NSTR_AMBIG) != 0)
00111 #define NSTRING_IS_DONT_GET_OPT_INFO(node) \
00112 (((node)->u.str.flag & NSTR_DONT_GET_OPT_INFO) != 0)
00113
00114 #define BACKREFS_P(br) \
00115 (IS_NOT_NULL((br)->back_dynamic) ? (br)->back_dynamic : (br)->back_static);
00116
00117 #define NQ_TARGET_ISNOT_EMPTY 0
00118 #define NQ_TARGET_IS_EMPTY 1
00119 #define NQ_TARGET_IS_EMPTY_MEM 2
00120 #define NQ_TARGET_IS_EMPTY_REC 3
00121
00122
00123 #define NST_MIN_FIXED (1<<0)
00124 #define NST_MAX_FIXED (1<<1)
00125 #define NST_CLEN_FIXED (1<<2)
00126 #define NST_MARK1 (1<<3)
00127 #define NST_MARK2 (1<<4)
00128 #define NST_MEM_BACKREFED (1<<5)
00129 #define NST_STOP_BT_SIMPLE_REPEAT (1<<6)
00130 #define NST_RECURSION (1<<7)
00131 #define NST_CALLED (1<<8)
00132 #define NST_ADDR_FIXED (1<<9)
00133 #define NST_NAMED_GROUP (1<<10)
00134 #define NST_NAME_REF (1<<11)
00135 #define NST_IN_REPEAT (1<<12)
00136 #define NST_NEST_LEVEL (1<<13)
00137 #define NST_BY_NUMBER (1<<14)
00138
00139 #define SET_ENCLOSE_STATUS(node,f) (node)->u.enclose.state |= (f)
00140 #define CLEAR_ENCLOSE_STATUS(node,f) (node)->u.enclose.state &= ~(f)
00141
00142 #define IS_ENCLOSE_CALLED(en) (((en)->state & NST_CALLED) != 0)
00143 #define IS_ENCLOSE_ADDR_FIXED(en) (((en)->state & NST_ADDR_FIXED) != 0)
00144 #define IS_ENCLOSE_RECURSION(en) (((en)->state & NST_RECURSION) != 0)
00145 #define IS_ENCLOSE_MARK1(en) (((en)->state & NST_MARK1) != 0)
00146 #define IS_ENCLOSE_MARK2(en) (((en)->state & NST_MARK2) != 0)
00147 #define IS_ENCLOSE_MIN_FIXED(en) (((en)->state & NST_MIN_FIXED) != 0)
00148 #define IS_ENCLOSE_MAX_FIXED(en) (((en)->state & NST_MAX_FIXED) != 0)
00149 #define IS_ENCLOSE_CLEN_FIXED(en) (((en)->state & NST_CLEN_FIXED) != 0)
00150 #define IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(en) \
00151 (((en)->state & NST_STOP_BT_SIMPLE_REPEAT) != 0)
00152 #define IS_ENCLOSE_NAMED_GROUP(en) (((en)->state & NST_NAMED_GROUP) != 0)
00153
00154 #define SET_CALL_RECURSION(node) (node)->u.call.state |= NST_RECURSION
00155 #define IS_CALL_RECURSION(cn) (((cn)->state & NST_RECURSION) != 0)
00156 #define IS_CALL_NAME_REF(cn) (((cn)->state & NST_NAME_REF) != 0)
00157 #define IS_BACKREF_NAME_REF(bn) (((bn)->state & NST_NAME_REF) != 0)
00158 #define IS_BACKREF_NEST_LEVEL(bn) (((bn)->state & NST_NEST_LEVEL) != 0)
00159 #define IS_QUANTIFIER_IN_REPEAT(qn) (((qn)->state & NST_IN_REPEAT) != 0)
00160 #define IS_QUANTIFIER_BY_NUMBER(qn) (((qn)->state & NST_BY_NUMBER) != 0)
00161
00162 #define CALLNODE_REFNUM_UNDEF -1
00163
00164 typedef struct {
00165 NodeBase base;
00166 UChar* s;
00167 UChar* end;
00168 unsigned int flag;
00169 int capa;
00170 UChar buf[NODE_STR_BUF_SIZE];
00171 } StrNode;
00172
00173 typedef struct {
00174 NodeBase base;
00175 int state;
00176 struct _Node* target;
00177 int lower;
00178 int upper;
00179 int greedy;
00180 int target_empty_info;
00181 struct _Node* head_exact;
00182 struct _Node* next_head_exact;
00183 int is_refered;
00184 #ifdef USE_COMBINATION_EXPLOSION_CHECK
00185 int comb_exp_check_num;
00186 #endif
00187 } QtfrNode;
00188
00189 typedef struct {
00190 NodeBase base;
00191 int state;
00192 int type;
00193 int regnum;
00194 OnigOptionType option;
00195 struct _Node* target;
00196 AbsAddrType call_addr;
00197
00198 OnigDistance min_len;
00199 OnigDistance max_len;
00200 int char_len;
00201 int opt_count;
00202 } EncloseNode;
00203
00204 #ifdef USE_SUBEXP_CALL
00205
00206 typedef struct {
00207 int offset;
00208 struct _Node* target;
00209 } UnsetAddr;
00210
00211 typedef struct {
00212 int num;
00213 int alloc;
00214 UnsetAddr* us;
00215 } UnsetAddrList;
00216
00217 typedef struct {
00218 NodeBase base;
00219 int state;
00220 int group_num;
00221 UChar* name;
00222 UChar* name_end;
00223 struct _Node* target;
00224 UnsetAddrList* unset_addr_list;
00225 } CallNode;
00226
00227 #endif
00228
00229 typedef struct {
00230 NodeBase base;
00231 int state;
00232 int back_num;
00233 int back_static[NODE_BACKREFS_SIZE];
00234 int* back_dynamic;
00235 int nest_level;
00236 } BRefNode;
00237
00238 typedef struct {
00239 NodeBase base;
00240 int type;
00241 struct _Node* target;
00242 int char_len;
00243 } AnchorNode;
00244
00245 typedef struct {
00246 NodeBase base;
00247 struct _Node* car;
00248 struct _Node* cdr;
00249 } ConsAltNode;
00250
00251 typedef struct {
00252 NodeBase base;
00253 int ctype;
00254 int not;
00255 } CtypeNode;
00256
00257 typedef struct _Node {
00258 union {
00259 NodeBase base;
00260 StrNode str;
00261 CClassNode cclass;
00262 QtfrNode qtfr;
00263 EncloseNode enclose;
00264 BRefNode bref;
00265 AnchorNode anchor;
00266 ConsAltNode cons;
00267 CtypeNode ctype;
00268 #ifdef USE_SUBEXP_CALL
00269 CallNode call;
00270 #endif
00271 } u;
00272 } Node;
00273
00274
00275 #define NULL_NODE ((Node* )0)
00276
00277 #define SCANENV_MEMNODES_SIZE 8
00278 #define SCANENV_MEM_NODES(senv) \
00279 (IS_NOT_NULL((senv)->mem_nodes_dynamic) ? \
00280 (senv)->mem_nodes_dynamic : (senv)->mem_nodes_static)
00281
00282 typedef struct {
00283 OnigOptionType option;
00284 OnigCaseFoldType case_fold_flag;
00285 OnigEncoding enc;
00286 const OnigSyntaxType* syntax;
00287 BitStatusType capture_history;
00288 BitStatusType bt_mem_start;
00289 BitStatusType bt_mem_end;
00290 BitStatusType backrefed_mem;
00291 UChar* pattern;
00292 UChar* pattern_end;
00293 UChar* error;
00294 UChar* error_end;
00295 regex_t* reg;
00296 int num_call;
00297 #ifdef USE_SUBEXP_CALL
00298 UnsetAddrList* unset_addr_list;
00299 #endif
00300 int num_mem;
00301 #ifdef USE_NAMED_GROUP
00302 int num_named;
00303 #endif
00304 int mem_alloc;
00305 Node* mem_nodes_static[SCANENV_MEMNODES_SIZE];
00306 Node** mem_nodes_dynamic;
00307 #ifdef USE_COMBINATION_EXPLOSION_CHECK
00308 int num_comb_exp_check;
00309 int comb_exp_max_regnum;
00310 int curr_max_regnum;
00311 int has_recursion;
00312 #endif
00313 int warnings_flag;
00314 const char* sourcefile;
00315 int sourceline;
00316 } ScanEnv;
00317
00318
00319 #define IS_SYNTAX_OP(syn, opm) (((syn)->op & (opm)) != 0)
00320 #define IS_SYNTAX_OP2(syn, opm) (((syn)->op2 & (opm)) != 0)
00321 #define IS_SYNTAX_BV(syn, bvm) (((syn)->behavior & (bvm)) != 0)
00322
00323 #ifdef USE_NAMED_GROUP
00324 typedef struct {
00325 int new_val;
00326 } GroupNumRemap;
00327
00328 extern int onig_renumber_name_table P_((regex_t* reg, GroupNumRemap* map));
00329 #endif
00330
00331 extern int onig_strncmp P_((const UChar* s1, const UChar* s2, int n));
00332 extern void onig_strcpy P_((UChar* dest, const UChar* src, const UChar* end));
00333 extern void onig_scan_env_set_error_string P_((ScanEnv* env, int ecode, UChar* arg, UChar* arg_end));
00334 extern int onig_scan_unsigned_number P_((UChar** src, const UChar* end, OnigEncoding enc));
00335 extern void onig_reduce_nested_quantifier P_((Node* pnode, Node* cnode));
00336 extern void onig_node_conv_to_str_node P_((Node* node, int raw));
00337 extern int onig_node_str_cat P_((Node* node, const UChar* s, const UChar* end));
00338 extern int onig_node_str_set P_((Node* node, const UChar* s, const UChar* end));
00339 extern void onig_node_free P_((Node* node));
00340 extern Node* onig_node_new_enclose P_((int type));
00341 extern Node* onig_node_new_anchor P_((int type));
00342 extern Node* onig_node_new_str P_((const UChar* s, const UChar* end));
00343 extern Node* onig_node_new_list P_((Node* left, Node* right));
00344 extern Node* onig_node_list_add P_((Node* list, Node* x));
00345 extern Node* onig_node_new_alt P_((Node* left, Node* right));
00346 extern void onig_node_str_clear P_((Node* node));
00347 extern int onig_free_node_list P_((void));
00348 extern int onig_names_free P_((regex_t* reg));
00349 extern int onig_parse_make_tree P_((Node** root, const UChar* pattern, const UChar* end, regex_t* reg, ScanEnv* env));
00350 extern int onig_free_shared_cclass_table P_((void));
00351
00352 #ifdef ONIG_DEBUG
00353 #ifdef USE_NAMED_GROUP
00354 extern int onig_print_names(FILE*, regex_t*);
00355 #endif
00356 #endif
00357
00358 #if defined __GNUC__ && __GNUC__ >= 4
00359 #pragma GCC visibility pop
00360 #endif
00361
00362 #endif
00363