00001
00009 #include "dkcRegex.h"
00010
00011 #include "dkutil_cstd/string.h"
00012
00013 #include "dkcStack.h"
00014
00015
00016 typedef unsigned char byte;
00017 typedef unsigned short dbyte;
00018 typedef unsigned long qbyte;
00019 typedef DKC_UNICODE unicode;
00020
00021
00022 typedef unsigned char uchar;
00023 typedef unsigned short ushort;
00024 typedef unsigned int uint;
00025 typedef unsigned long ulong;
00026
00027
00028 #define my_lstrlenW dkcstd_wcslen
00029
00030 #define my_lstrcpyW dkcstd_wcsncpy
00031
00032
00034
00035
00036
00037
00038
00039
00040
00041
00043
00044
00045
00046
00047
00048
00049
00050
00051 static DKC_INLINE unicode ignore_map( unicode c )
00052 {
00053 return (unicode)(L'a'<=c && c<=L'z' ? c-L'a'+L'A' : c);
00054 }
00055 static DKC_INLINE BOOL ignore_not_equal( unicode c1, unicode c2 )
00056 {
00057 return ignore_map(c1)!=ignore_map(c2);
00058 }
00059 static DKC_INLINE void *malloc_adapter(size_t s){
00060
00061 void *p=dkcAllocateFast(s);
00062 return p;
00063 }
00064 static DKC_INLINE void free_adapter(void *p){
00065
00066 dkcFree(&p);
00067 }
00068
00069
00070
00071 static DKC_INLINE long Min(long x,long y) { return (x<y ? x : y); }
00072 static DKC_INLINE long Max(long x,long y) { return (y<x ? x : y); }
00073
00074 DKC_REPLACE *WINAPI dkcAllocReplace(){
00075
00076 DKC_REPLACE *p = dkcAllocate(sizeof(DKC_REPLACE));
00077 if(NULL==p){
00078 return NULL;
00079 }
00080 p->mStream = dkcAllocStream(edkcStreamInitMemory,NULL,5012,NULL,NULL);
00081 if(NULL==p->mStream){
00082 goto Error;
00083 }
00084 return p;
00085 Error:
00086 dkcFreeStream(&(p->mStream));
00087 dkcFree((void **)&p);
00088 return NULL;
00089 }
00090
00091
00092 int WINAPI dkcFreeReplace(DKC_REPLACE **pp){
00093 if(NULL==pp){
00094 return edk_ArgumentException;
00095 }
00096 dkcFreeStream(&(*pp)->mStream);
00097 return dkcFree((void **)pp);
00098 }
00099
00100
00101 int WINAPI dkcReplaceRun( const BYTE *src,size_t srcsize,
00102 const BYTE *target_data,size_t target_data_size,
00103 const BYTE *replace_data,size_t replace_data_size
00104 ){
00105 return edk_SUCCEEDED;
00106 }
00107
00108
00109
00110
00111
00112
00113
00114
00115
00116
00117
00118
00119
00120
00121
00122
00123
00124
00125
00126
00127
00128
00129
00130
00131
00132
00133
00134
00135
00136
00137
00138
00139
00145 typedef struct RegLexer_
00146 {
00147 const wchar_t* pat_;
00148 const wchar_t* end_;
00149 const wchar_t* sub_;
00150 wchar_t chr_;
00151 }RegLexer;
00152
00153 void RegLexerInit( RegLexer *p,const wchar_t* pat, ulong len )
00154 {
00155
00156 p->pat_ = pat;
00157 p->end_ = pat + len;
00158 p->sub_ = L"";
00159 }
00160
00161 static DKC_INLINE int RegLexerGetTokenImpl(RegLexer *p){
00162
00163
00164 const wchar_t *x = L"\0";
00165 int r = R_Char;
00166 if( p->sub_ == p->end_ )
00167 { r = R_End; goto End;}
00168
00169 if(*p->sub_)
00170 x = p->sub_;
00171 else
00172 x = p->pat_;
00173
00174 dkcmFORCE_NOT_ASSERT(x > p->end_);
00175
00176 switch( *x++ )
00177 {
00178 case L'.':
00179 r = R_Any;break;
00180 case L'[':
00181 r = R_Lcl;break;
00182 case L']':
00183 r = R_Rcl;break;
00184 case L'^':
00185 r = R_Ncl;break;
00186 case L'-':
00187 r = R_Range;break;
00188 case L'(':
00189 r = R_Lbr;break;
00190 case L')':
00191 r = R_Rbr;break;
00192 case L'|':
00193 r = R_Bar;break;
00194 case L'*':
00195 r = R_Star;break;
00196 case L'+':
00197 r = R_Plus;break;
00198 case L'?':
00199 r = R_Quest;break;
00200 case L'\\':
00201 if( x==p->end_ ){ r = R_End;break;}
00202 switch( *x++ )
00203 {
00204 case L't': p->chr_=L'\t';
00205 r = R_Char;break;
00206 case L'w': p->sub_=L"[0-9a-zA-Z_]";
00207 r = RegLexerGetTokenImpl(p);break;
00208 case L'W': p->sub_=L"[^0-9a-zA-Z_]";
00209 r = RegLexerGetTokenImpl(p);break;
00210 case L'd': p->sub_=L"[0-9]";
00211 r = RegLexerGetTokenImpl(p);break;
00212 case L'D': p->sub_=L"[^0-9]";
00213 r = RegLexerGetTokenImpl(p);break;
00214 case L's': p->sub_=L"[\t ]";
00215 r = RegLexerGetTokenImpl(p);break;
00216 case L'S': p->sub_=L"[^\t ]";
00217 r = RegLexerGetTokenImpl(p);break;
00218 }
00219 default:
00220 p->chr_ = *(x-1);
00221 r = R_Char;
00222 }
00223 End:
00224
00225 p->sub_ = x;
00226 return r;
00227
00228 }
00229
00230 int RegLexerGetToken(RegLexer *p)
00231 {
00232 return RegLexerGetTokenImpl(p);
00233
00234
00235
00236
00237
00238
00239
00240
00241
00242
00243
00244
00245
00246
00247
00248
00249
00250
00251
00252
00253
00254
00255
00256
00257
00258
00259
00260
00261
00262
00263
00264
00265
00266
00267
00268
00269 }
00270 wchar_t RegLexerGetChar(RegLexer *p) {
00271 return p->chr_;
00272 }
00273
00274
00275
00276 typedef struct OneRange_
00277 {
00278 wchar_t stt;
00279 wchar_t end;
00280 }OneRange;
00281
00282 typedef struct RegClass_
00283 {
00284
00285 OneRange range;
00286 struct RegClass_ *next;
00287 }RegClass;
00288
00289 void RegClassInit( RegClass *p,wchar_t s, wchar_t e, RegClass* n )
00290 {
00291 p->range.stt=s;
00292 p->range.end=e;
00293 p->next=n;
00294 }
00295
00296 RegClass *alloc_RegClass(wchar_t s, wchar_t e, RegClass* n){
00297 RegClass *p = malloc_adapter(sizeof(RegClass));
00298 RegClassInit(p,s,e,n);
00299 return p;
00300 }
00301
00302 void free_RegClass(RegClass *p){
00303 free_adapter(p);
00304 }
00305
00306
00307
00308 typedef struct RegNode_
00309 {
00310
00311 int type;
00312 wchar_t ch;
00313 RegClass* cls;
00314 BOOL cmpcls;
00315 struct RegNode_* left;
00316 struct RegNode_* right;
00317 }RegNode;
00318
00319 RegNode *alloc_RegNode(){
00320 RegNode *p = (RegNode *)malloc_adapter(sizeof(RegNode));
00321
00322
00323
00324 memset(p,0,sizeof(RegNode));
00325
00326 return p;
00327 }
00328 void free_RegNode(RegNode *p){
00329 if(p->cls){
00330 free_RegClass(p->cls);
00331 }
00332 free_adapter(p);
00333 }
00334
00335
00337 typedef struct RegParser_
00338 {
00339 BOOL err_;
00340 BOOL isHeadType_;
00341 BOOL isTailType_;
00342 RegNode* root_;
00343
00344 RegLexer lex_;
00345
00346 int nextToken_;
00347 }RegParser;
00348
00349 RegNode* RegParser_make_empty_leaf(RegParser *);
00350 RegNode* RegParser_make_char_leaf(RegParser *, wchar_t c );
00351 RegNode* RegParser_make_node(RegParser *, int t, RegNode* lft, RegNode* rht );
00352 void RegParser_eat_token(RegParser *);
00353 RegNode* RegParser_expr(RegParser *);
00354 RegNode* RegParser_term(RegParser *);
00355 RegNode* RegParser_factor(RegParser *);
00356 RegNode* RegParser_primary(RegParser *);
00357 RegNode* RegParser_reclass(RegParser *);
00358
00359 void RegParser_Construct(RegParser *, const unicode* pat );
00360
00361 RegNode* RegParser_root(RegParser *p) { return p->root_; }
00362 BOOL RegParser_err(RegParser *p) { return p->err_; }
00363 BOOL RegParser_isHeadType(RegParser *p) { return p->isHeadType_; }
00364 BOOL RegParser_isTailType(RegParser *p) { return p->isTailType_; }
00365
00366
00368
00370
00371
00373 static int tmp;
00374
00375
00376
00377 void RegParser_Construct( RegParser *p,const unicode* pat )
00378
00379
00380 {
00381 p->err_ = FALSE;
00382 p->isHeadType_ = (*pat==L'^');
00383
00384 tmp=my_lstrlenW(pat);
00385
00386 p->isTailType_ = ( tmp && pat[tmp-1]==L'$' );
00387
00388 RegLexerInit(&p->lex_,
00389 (p->isHeadType_ ? pat+1 : pat),
00390 (my_lstrlenW(pat) - (p->isHeadType_ ? 1 : 0)
00391 - (p->isTailType_ ? 1 : 0))
00392 );
00393
00394 RegParser_eat_token(p);
00395 p->root_ = RegParser_expr(p);
00396 }
00397
00398 static DKC_INLINE void free_RegParser(RegParser *p,RegNode *next){
00399 if(next->left){
00400 free_RegParser(p,next->left);
00401 }
00402 if(next->right){
00403 free_RegParser(p,next->right);
00404 }
00405 free_RegNode(next);
00406 }
00407 void RegParser_Destruct(RegParser *p){
00408 free_RegParser(p,p->root_);
00409 }
00410
00411 void RegParser_eat_token(RegParser *p)
00412 {
00413
00414 p->nextToken_ = RegLexerGetToken(&p->lex_);
00415 }
00416
00417 RegNode* RegParser_make_empty_leaf(RegParser *p)
00418 {
00419
00420 RegNode *node = alloc_RegNode();
00421 node->type = N_Empty;
00422 return node;
00423 }
00424
00425 RegNode* RegParser_make_char_leaf( RegParser *p,wchar_t c )
00426 {
00427
00428 RegNode *node = alloc_RegNode();
00429 node->type = N_Char;
00430 node->ch = c;
00431 return node;
00432 }
00433
00434 RegNode* RegParser_make_node( RegParser *p,int t, RegNode* lft, RegNode* rht )
00435 {
00436
00437 RegNode *node = alloc_RegNode();
00438 node->type = t;
00439 node->left = lft;
00440 node->right= rht;
00441 return node;
00442 }
00443
00444 RegNode* RegParser_reclass(RegParser *p)
00445 {
00446
00447
00448 BOOL neg = FALSE;
00449 RegNode* node;
00450 RegClass* cls;
00451
00452 if( p->nextToken_ == R_Ncl ){
00453 neg=TRUE;
00454 RegParser_eat_token(p);
00455 }
00456
00457 cls = NULL;
00458 while( p->nextToken_ == R_Char )
00459 {
00460
00461 wchar_t ch = RegLexerGetChar(&p->lex_);
00462 RegParser_eat_token(p);
00463 if( p->nextToken_ == R_Range )
00464 {
00465 RegParser_eat_token(p);
00466 if( p->nextToken_ != R_Char )
00467 p->err_ = TRUE;
00468 else
00469 {
00470 wchar_t ch2 = RegLexerGetChar(&p->lex_);
00471
00472 cls = alloc_RegClass((unicode)Min(ch,ch2),(unicode) Max(ch,ch2), cls);
00473 RegParser_eat_token(p);
00474 }
00475 }
00476 else
00477 {
00478
00479 cls = alloc_RegClass( ch, ch, cls );
00480 }
00481 }
00482
00483
00484 node = alloc_RegNode();
00485 node->type = N_Class;
00486 node->cls = cls;
00487 node->cmpcls = neg;
00488 return node;
00489 }
00490
00491 RegNode* RegParser_primary(RegParser *p)
00492 {
00493
00494
00495
00496
00497
00498 RegNode* node;
00499 switch( p->nextToken_ )
00500 {
00501 case R_Char:
00502 node = RegParser_make_char_leaf( p,RegLexerGetChar(&p->lex_) );
00503 RegParser_eat_token(p);
00504 break;
00505 case R_Any:
00506
00507 node = alloc_RegNode();
00508 node->type = N_Class;
00509
00510 node->cls = alloc_RegClass(0, 65535, NULL);
00511 node->cmpcls = FALSE;
00512 RegParser_eat_token(p);
00513 break;
00514 case R_Lcl:
00515 RegParser_eat_token(p);
00516 node = RegParser_reclass(p);
00517 if( p->nextToken_ == R_Rcl )
00518 RegParser_eat_token(p);
00519 else
00520 p->err_ = TRUE;
00521 break;
00522 case R_Lbr:
00523 RegParser_eat_token(p);
00524 node = RegParser_expr(p);
00525 if( p->nextToken_ == R_Rbr )
00526 RegParser_eat_token(p);
00527 else
00528 p->err_ = TRUE;
00529 break;
00530 default:
00531 node = RegParser_make_empty_leaf(p);
00532 p->err_ = TRUE;
00533 break;
00534 }
00535 return node;
00536 }
00537
00538 RegNode* RegParser_factor(RegParser *p)
00539 {
00540
00541
00542
00543
00544
00545 RegNode* node = RegParser_primary(p);
00546 switch( p->nextToken_ )
00547 {
00548 case R_Star:
00549 node=RegParser_make_node(p,N_Closure,node,NULL);
00550 RegParser_eat_token(p);
00551 break;
00552 case R_Plus:
00553 node=RegParser_make_node(p,N_Closure1,node,NULL);
00554 RegParser_eat_token(p);
00555 break;
00556 case R_Quest:
00557 node=RegParser_make_node(p,N_01,node,NULL );
00558 RegParser_eat_token(p);
00559 break;
00560 }
00561
00562 return node;
00563 }
00564
00565 RegNode* RegParser_term(RegParser *p)
00566 {
00567
00568
00569 RegNode* node;
00570 if( p->nextToken_ == R_End )
00571 return RegParser_make_empty_leaf(p);
00572
00573 node = RegParser_factor(p);
00574 if( p->nextToken_==R_Lbr || p->nextToken_==R_Lcl
00575 || p->nextToken_==R_Char|| p->nextToken_==R_Any )
00576 node = RegParser_make_node(p, N_Concat, node, RegParser_term(p) );
00577 return node;
00578 }
00579
00580 RegNode* RegParser_expr(RegParser *p)
00581 {
00582
00583
00584
00585 RegNode* node = RegParser_term(p);
00586 if( p->nextToken_ == R_Bar )
00587 {
00588 RegParser_eat_token(p);
00589 node = RegParser_make_node(p, N_Or, node, RegParser_expr(p) );
00590 }
00591 return node;
00592 }
00593
00594
00595
00596
00598 typedef struct RegTrans_
00599 {
00600
00601 RegClass *cls;
00602
00603 BOOL cmpcls;
00604 int to;
00605
00606 struct RegTrans_ *next;
00607
00608 int type;
00609 }RegTrans;
00610
00611 #define alloc_RegTrans() malloc_adapter(sizeof(RegTrans))
00612 #define free_RegTrans(p) free_adapter(p)
00613
00615 enum {
00616 eEpsilon,
00617 eClass,
00618 eChar,
00619 };
00620
00621
00622 BOOL RegTrans_match_c(RegTrans *pt, wchar_t c )
00623 {
00624 RegClass* p = pt->cls;
00625 for( ; p; p=p->next )
00626 if( p->range.stt<=c && c<=p->range.end )
00627 return TRUE;
00628 return FALSE;
00629 }
00630
00631 BOOL RegTrans_match_i(RegTrans *pt, wchar_t c )
00632 {
00633 RegClass* p;
00634 c = ignore_map(c);
00635 for( p=pt->cls; p; p=p->next )
00636 if( ignore_map(p->range.stt)<=c
00637 && c<=ignore_map(p->range.end) )
00638 return TRUE;
00639 return FALSE;
00640 }
00641
00642 BOOL RegTrans_match(RegTrans *pt, wchar_t c, BOOL caseS )
00643 {
00644 BOOL m = caseS ? RegTrans_match_c(pt, c ) : RegTrans_match_i(pt, c );
00645 return pt->cmpcls ? !m : m;
00646 }
00647
00648
00649
00651
00652 typedef struct RegNFA_
00653 {
00654 const wchar_t* str_;
00655 int len_;
00656 int matchpos_;
00657 BOOL caseS_;
00658
00660 DKC_STACK *pstack;
00661
00662 RegParser parser;
00664 DKC_STACK *st;
00665 int start, final;
00666 }RegNFA;
00667
00668
00669
00670
00671
00672 BOOL RegNFA_check_and_push_stack(RegNFA *,int curSt, int pos);
00673 void RegNFA_pop_stack(RegNFA *);
00674
00675 typedef struct st_ele_ {
00676 int st;
00677 int ps;
00678 }st_ele;
00679
00680 void RegNFA_add_transition_a4(RegNFA *, int from, wchar_t ch, int to );
00681 void RegNFA_add_transition_a5(RegNFA *, int from, RegClass *cls, BOOL cmp, int to );
00682 void RegNFA_add_e_transition(RegNFA *, int from, int to );
00683 int RegNFA_gen_state(RegNFA *);
00684 void RegNFA_gen_nfa(RegNFA *, int entry, RegNode* t, int exit );
00685
00686 BOOL RegNFA_Construct(RegNFA *, const wchar_t* pat );
00687 void RegNFA_Destruct(RegNFA *);
00688
00689
00690 BOOL RegNFA_isHeadType(RegNFA *p) {
00691 return RegParser_isHeadType(&p->parser);
00692 }
00693 BOOL RegNFA_isTailType(RegNFA *p) {
00694 return RegParser_isTailType(&p->parser);
00695 }
00696
00697
00698 void RegNFA_match_a3(RegNFA *, int curSt, int pos );
00699 int RegNFA_match_a4(RegNFA *, const wchar_t* str, int len, BOOL caseS );
00700
00701 RegNFA *alloc_RegNFA(const unicode *key){
00702 RegNFA *p = dkcAllocate(sizeof(RegNFA));
00703 if(NULL==p){
00704 return NULL;
00705 }
00706 if(FALSE==RegNFA_Construct(p,key)){
00707 goto Error;
00708 }
00709 return p;
00710 Error:
00711 dkcFree(&p);
00712 return NULL;
00713 }
00714
00715 void free_RegNFA(RegNFA *p){
00716 RegNFA_Destruct(p);
00717 dkcFree(&p);
00718 }
00719
00720 BOOL RegNFA_Construct(RegNFA *p, const wchar_t* pat )
00721 {
00722
00723 p->pstack = dkcAllocStack(10,sizeof(st_ele));
00724 if(NULL==p->pstack){
00725 return FALSE;
00726 }
00727 p->st = dkcAllocStack(10,sizeof(RegTrans *));
00728 if(NULL==p->st)
00729 goto Error;
00730
00731 RegParser_Construct( &p->parser,pat );
00732
00733 p->start = RegNFA_gen_state(p);
00734 p->final = RegNFA_gen_state(p);
00735 RegNFA_gen_nfa( p,p->start, RegParser_root(&p->parser), p->final );
00736 return TRUE;
00737 Error:
00738 dkcFreeStack(&p->pstack);
00739 return FALSE;
00740 }
00741
00742 void RegNFA_Destruct(RegNFA *p)
00743 {
00744 RegTrans *t;
00745 ulong i=0;
00746 ulong e = dkcStackSize(p->st);
00747
00748 for(;i<e;i++){
00749 dkcStackTop(p->st,(void *)&t);
00750 free_RegTrans(t);
00751 dkcStackPop(p->st);
00752 }
00753 RegParser_Destruct(&p->parser);
00754 dkcFreeStack(&p->st);
00755 dkcFreeStack(&p->pstack);
00756 }
00757
00758 void RegNFA_add_transition_a5 ( RegNFA *p, int from, RegClass* cls, BOOL cmp, int to )
00759 {
00760 RegTrans **t;
00761
00762 RegTrans *x=alloc_RegTrans();
00763
00764
00765 t = (RegTrans **)dkcStackPointer(p->st);
00766
00767 x->next = t[from];
00768 x->to = to;
00769 x->type = eClass;
00770 x->cls = cls;
00771 x->cmpcls= cmp;
00772 t[from] = x;
00773 }
00774
00775 DKC_INLINE void RegNFA_add_transition_a4(RegNFA *p, int from, wchar_t ch, int to )
00776 {
00777 RegNFA_add_transition_a5( p,from,
00778 alloc_RegClass(ch,ch,NULL),
00779 FALSE, to );
00780 }
00781
00782 DKC_INLINE void RegNFA_add_e_transition(RegNFA *p, int from, int to )
00783 {
00784 RegTrans **t;
00785
00786 RegTrans* x = alloc_RegTrans();
00787
00788
00789 t = (RegTrans **)dkcStackPointer(p->st);
00790 x->next = t[from];
00791 x->to = to;
00792 x->type = eEpsilon;
00793 t[from] = x;
00794 }
00795
00796 DKC_INLINE int RegNFA_gen_state(RegNFA *p)
00797 {
00798 RegTrans *temp = NULL;
00799 dkcStackDynamicPush(p->st,(void *)&temp);
00800 return dkcStackSize(p->st) - 1;
00801 }
00802
00803 void RegNFA_gen_nfa(RegNFA *p, int entry, RegNode* t, int exit )
00804 {
00805 switch( t->type )
00806 {
00807 case N_Char:
00808
00809
00810 RegNFA_add_transition_a4( p,entry, t->ch, exit );
00811 break;
00812 case N_Class:
00813
00814
00815 RegNFA_add_transition_a5( p,entry, t->cls, t->cmpcls, exit );
00816 break;
00817 case N_Concat: {
00818
00819
00820 int step = RegNFA_gen_state(p);
00821 RegNFA_gen_nfa(p, entry, t->left, step );
00822 RegNFA_gen_nfa(p, step, t->right, exit );
00823 } break;
00824 case N_Or:
00825
00826
00827
00828
00829 RegNFA_gen_nfa(p, entry, t->left, exit );
00830 RegNFA_gen_nfa(p, entry, t->right, exit );
00831 break;
00832 case N_Closure:
00833
00834
00835
00836
00837
00838
00839 case N_Closure1: {
00840
00841
00842
00843
00844 int before = RegNFA_gen_state(p);
00845 int after = RegNFA_gen_state(p);
00846 RegNFA_add_e_transition(p, entry, before );
00847 RegNFA_add_e_transition(p, after, exit );
00848 RegNFA_add_e_transition(p, after, before );
00849 RegNFA_gen_nfa(p, before, t->left, after );
00850 if( t->type != N_Closure1 )
00851 RegNFA_add_e_transition(p, entry, exit );
00852 } break;
00853 case N_01:
00854
00855
00856
00857
00858 RegNFA_add_e_transition(p, entry, exit );
00859 RegNFA_gen_nfa(p, entry, t->left, exit );
00860 break;
00861 case N_Empty:
00862
00863
00864 RegNFA_add_e_transition(p, entry, exit );
00865 break;
00866 }
00867 }
00868
00869
00870
00871
00873
00875
00876
00877 BOOL RegNFA_check_and_push_stack(RegNFA *p,int curSt, int pos)
00878 {
00879
00880
00881
00882
00883
00884
00885
00886
00887
00888
00889 int i;
00890 st_ele *pt;
00891 st_ele nw;
00892
00893
00894 i = (int)(dkcStackSize(p->pstack) - 1);
00895 pt = dkcStackPointer(p->pstack);
00896
00897 for(;i >= 0;--i){
00898 if( pt[i].ps != pos )
00899 break;
00900 else if( pt[i].st == curSt )
00901 return FALSE;
00902 }
00903 nw.st = curSt;
00904 nw.ps = pos;
00905 dkcStackDynamicPush(p->pstack, &nw );
00906 return TRUE;
00907 }
00908
00909 void RegNFA_pop_stack(RegNFA *p)
00910 {
00911
00912 dkcStackPop(p->pstack);
00913 }
00914
00915
00916 int RegNFA_match_a4(RegNFA *p, const wchar_t* str, int len, BOOL caseS )
00917 {
00918
00919 if( RegParser_err(&p->parser) )
00920 return -1;
00921
00922 p->matchpos_ = -1;
00923 p->caseS_ = caseS;
00924 p->str_ = str;
00925 p->len_ = len;
00926
00927 {
00928 st_ele nw = {0,0};
00929 dkcStackDynamicPush(p->pstack, &nw );
00930 RegNFA_match_a3(p, 0, 0 );
00931 }
00932 return p->matchpos_ ;
00933 }
00934
00935
00936 void RegNFA_match_a3(RegNFA *p, int curSt, int pos )
00937 {
00938 if( curSt == 1 )
00939 {
00940
00941 if( p->matchpos_ < pos )
00942 p->matchpos_ = pos;
00943 }
00944
00945 if( p->matchpos_ < p->len_ )
00946 {
00947 RegTrans* tr;
00948 RegTrans **sst;
00949 sst = (RegTrans **)dkcStackPointer(p->st);
00950
00951 tr=sst[curSt];
00952 for( ; tr!=NULL; tr=tr->next )
00953 {
00954 if( tr->type == eEpsilon )
00955 {
00956
00957 if( RegNFA_check_and_push_stack(p,tr->to, pos) )
00958 {
00959 RegNFA_match_a3(p, tr->to, pos );
00960 RegNFA_pop_stack(p);
00961 }
00962 }
00963
00964 else if(pos<p->len_ && RegTrans_match(tr, p->str_[pos], p->caseS_ ) )
00965 {
00966 if( p->str_[pos] == L'i' )
00967 p->str_ = p->str_;
00968 RegNFA_match_a3(p, tr->to, pos+1 );
00969 }
00970 }
00971 }
00972 }
00973
00974
00975
00976
00978
00980
00981
00982 DKC_REGEX *WINAPI dkcAllocRegex( const unicode* key, BOOL caseS, BOOL down )
00983 {
00984 DKC_REGEX *p = dkcAllocate(sizeof(DKC_REGEX));
00985 p->re_ = alloc_RegNFA(key);
00986 p->caseS_ = caseS;
00987 p->down_ = down;
00988 return p;
00989 }
00990
00991
00992
00993 int WINAPI dkcFreeRegex(DKC_REGEX **p){
00994 free_RegNFA((*p)->re_);
00995 return dkcFree(p);
00996 }
00997
00998 BOOL WINAPI dkcRegexSearch(DKC_REGEX *p,
00999 const unicode* str, ulong len, ulong stt, ulong* mbg, ulong* med )
01000 {
01001
01002
01003
01004
01005 if( p->down_ && RegNFA_isHeadType(p->re_) && stt>0 )
01006 return FALSE;
01007 {
01008
01009 const int d = (p->down_ ? 1 : -1);
01010 int s = (!p->down_ && RegNFA_isHeadType(p->re_) ? 0 : stt);
01011 const int e = (p->down_ ? (RegNFA_isHeadType(p->re_) ? 1 : len) : -1);
01012
01013 for( ; s!=e; s+=d )
01014 {
01015
01016 int L = RegNFA_match_a4( p->re_,str+s, len-s, p->caseS_ );
01017 if( L > 0 )
01018 {
01019 if( RegNFA_isTailType(p->re_) && L!=(int)(len-s) )
01020 continue;
01021 *mbg = (ulong)(s);
01022 *med = (ulong)(s+L);
01023 return TRUE;
01024 }
01025 }
01026 }
01027
01028 return FALSE;
01029 }
01030
01031
01032 BOOL WINAPI dkcRegularExpressionSearch( const DKC_UNICODE* key, BOOL caseS, BOOL down,const unicode* str, ulong len, ulong stt,ulong* mbg, ulong* med )
01033 {
01034 DKC_REGEX *p;
01035 BOOL r;
01036
01037 p = dkcAllocRegex(key,caseS,down);
01038 if(NULL==p)
01039 return FALSE;
01040
01041 r = dkcRegexSearch(p,str,len,stt,mbg,med);
01042
01043
01044
01045 dkcFreeRegex(&p);
01046 return r;
01047
01048
01049 }
01050
01051
01052
01054
01055
01056
01057
01058
01059
01060
01061
01062 BOOL WINAPI dkcRegularExpressionMatch( const DKC_UNICODE* pat, const wchar_t* str, BOOL caseS )
01063 {
01064 int len = my_lstrlenW(str);
01065 RegNFA re;
01066 BOOL r;
01067 if(FALSE==RegNFA_Construct(&re,pat))
01068 return FALSE;
01069
01070 r = (len == RegNFA_match_a4(&re, str, len, caseS ));
01071
01072 RegNFA_Destruct(&re);
01073
01074 return r;
01075 }
01076