メインページ | アルファベット順一覧 | 構成 | ファイル一覧 | 構成メンバ | ファイルメンバ | 関連ページ

dkcRegex.c

説明を見る。
00001 
00009 #include "dkcRegex.h"
00010 //#include <wchar.h>
00011 #include "dkutil_cstd/string.h"
00012 
00013 #include "dkcStack.h"
00014 
00015 // 変数のサイズを明示的に指示するときに使う名前
00016 typedef unsigned char  byte;
00017 typedef unsigned short dbyte;
00018 typedef unsigned long  qbyte;
00019 typedef DKC_UNICODE unicode;
00020 
00021 // unsigned って毎回打つの面倒
00022 typedef unsigned char  uchar;
00023 typedef unsigned short ushort;
00024 typedef unsigned int   uint;
00025 typedef unsigned long  ulong;
00026 
00027 
00028 #define my_lstrlenW dkcstd_wcslen
00029 
00030 #define my_lstrcpyW dkcstd_wcsncpy
00031 
00032 /*
00034 struct CaseSensitive
00035 {
00036     static unicode map( unicode c )
00037         { return c; }
00038     static bool not_equal( unicode c1, unicode c2 )
00039         { return c1!=c2; }
00040 };
00041 
00043 struct IgnoreCase
00044 {
00045     static unicode map( unicode c )
00046         { return (L'a'<=c && c<=L'z' ? c-L'a'+L'A' : c); }
00047     static bool not_equal( unicode c1, unicode c2 )
00048         { return map(c1)!=map(c2); }
00049 };
00050 */
00051 static DKC_INLINE unicode ignore_map( unicode c )
00052 { 
00053     return (unicode)(L'a'<=c && c<=L'z' ? c-L'a'+L'A' : c); 
00054 }
00055 static DKC_INLINE BOOL ignore_not_equal( unicode c1, unicode c2 )
00056 {
00057     return ignore_map(c1)!=ignore_map(c2);
00058 }
00059 static DKC_INLINE void *malloc_adapter(size_t s){
00060     //return malloc(s);
00061     void *p=dkcAllocateFast(s);
00062     return p;
00063 }
00064 static DKC_INLINE void free_adapter(void *p){
00065     //free(p);
00066     dkcFree(&p);
00067 }
00068 
00069 //template<typename T> DKC_INLINE T Min(T x,T y) { return (x<y ? x : y); }
00070 //template<typename T> DKC_INLINE T Max(T x,T y) { return (y<x ? x : y); }
00071 static DKC_INLINE long Min(long x,long y) { return (x<y ? x : y); }
00072 static DKC_INLINE long Max(long x,long y) { return (y<x ? x : y); }
00073 
00074 DKC_REPLACE *WINAPI dkcAllocReplace(){
00075 
00076     DKC_REPLACE *p = dkcAllocate(sizeof(DKC_REPLACE));
00077     if(NULL==p){
00078         return NULL;
00079     }
00080     p->mStream = dkcAllocStream(edkcStreamInitMemory,NULL,5012,NULL,NULL);
00081     if(NULL==p->mStream){
00082         goto Error;
00083     }
00084     return p;
00085 Error:
00086     dkcFreeStream(&(p->mStream));
00087     dkcFree((void **)&p);
00088     return NULL;
00089 }
00090 
00091 
00092 int WINAPI dkcFreeReplace(DKC_REPLACE **pp){
00093     if(NULL==pp){
00094         return edk_ArgumentException;
00095     }
00096     dkcFreeStream(&(*pp)->mStream);
00097     return dkcFree((void **)pp);
00098 }
00099 
00100 
00101 int WINAPI dkcReplaceRun(    const BYTE *src,size_t srcsize,
00102                                                                          const BYTE *target_data,size_t target_data_size,
00103                                                                          const BYTE *replace_data,size_t replace_data_size
00104                                                                          ){
00105     return edk_SUCCEEDED;
00106 }
00107 
00108 /*
00109 @param
00110 @note
00111 target_data_size < replace_data_sizeの時はエラー
00112 */
00113 /*
00114 DKC_EXTERN int WINAPI dkcDataReplaceStuff(BYTE *dest,size_t destsize,
00115                                                                          const BYTE *src,size_t srcsize,
00116                                                                          const BYTE *target_data,size_t target_data_size,
00117                                                                          const BYTE *replace_data,size_t replace_data_size)
00118 {
00119     
00120     size_t i;
00121     
00122     if(target_data_size < replace_data_size){
00123         return edk_ArgumentException;
00124     }
00125 
00126 
00127 
00128     //まずは置換場所を探す
00129     for(i=0;i<srcsize;i++){
00130         memcmp(&src[i],
00131 
00132 
00133 
00134 
00135 
00136 }
00137 */
00138 
00139 
00145 typedef struct RegLexer_
00146 {
00147     const wchar_t* pat_;
00148     const wchar_t* end_;
00149     const wchar_t* sub_;
00150     wchar_t        chr_;
00151 }RegLexer;
00152 
00153 void RegLexerInit( RegLexer *p,const wchar_t* pat, ulong len )
00154 {
00155     //memset(p,0,sizeof(*p));
00156     p->pat_ = pat;
00157     p->end_ = pat + len;
00158     p->sub_ =  L"";
00159 }
00160 
00161 static DKC_INLINE int RegLexerGetTokenImpl(RegLexer *p){
00162 
00163     //const wchar_t* x = (*p->sub_ ? p->sub_ : p->pat_);
00164     const wchar_t *x = L"\0";
00165     int r = R_Char;
00166     if( p->sub_ == p->end_ )
00167         { r =  R_End;   goto End;}
00168 
00169     if(*p->sub_)
00170         x = p->sub_;
00171     else
00172         x = p->pat_;
00173 
00174     dkcmFORCE_NOT_ASSERT(x > p->end_);
00175     
00176     switch( *x++ )
00177     {
00178     case L'.':
00179         r =  R_Any;break;
00180     case L'[':
00181         r =  R_Lcl;break;
00182     case L']':
00183         r =  R_Rcl;break;
00184     case L'^':
00185         r =  R_Ncl;break;
00186     case L'-':
00187         r =  R_Range;break;
00188     case L'(':
00189         r =  R_Lbr;break;
00190     case L')':
00191         r =  R_Rbr;break;
00192     case L'|':
00193         r =  R_Bar;break;
00194     case L'*':
00195         r =  R_Star;break;
00196     case L'+':
00197         r =  R_Plus;break;
00198     case L'?':
00199         r =  R_Quest;break;
00200     case L'\\': 
00201         if( x==p->end_ ){ r =  R_End;break;}
00202         switch( *x++ ) 
00203         {
00204         case L't': p->chr_=L'\t'; 
00205             r =  R_Char;break;
00206         case L'w': p->sub_=L"[0-9a-zA-Z_]";
00207             r =  RegLexerGetTokenImpl(p);break;
00208         case L'W': p->sub_=L"[^0-9a-zA-Z_]";
00209             r =  RegLexerGetTokenImpl(p);break;
00210         case L'd': p->sub_=L"[0-9]";
00211             r =  RegLexerGetTokenImpl(p);break;
00212         case L'D': p->sub_=L"[^0-9]";
00213             r =  RegLexerGetTokenImpl(p);break;
00214         case L's': p->sub_=L"[\t ]";
00215             r =  RegLexerGetTokenImpl(p);break;
00216         case L'S': p->sub_=L"[^\t ]";
00217             r =  RegLexerGetTokenImpl(p);break;
00218         } // fall through...
00219     default:
00220         p->chr_ = *(x-1);
00221         r =  R_Char;
00222     }
00223 End:
00224     //update
00225     p->sub_ = x;
00226     return r;
00227 
00228 }
00229 //edkcRegToken RegLexerGetToken(RegLexer *p)
00230 int RegLexerGetToken(RegLexer *p)
00231 {
00232     return RegLexerGetTokenImpl(p);
00233     //d金魚改:ここをC言語で表現するのは骨が折れる…
00234     //const wchar_t*& x = (*p->sub_ ? p->sub_ : p->pat_);
00235     /*
00236     const wchar_t* x = (*p->sub_ ? p->sub_ : p->pat_);
00237     if( x == p->end_ ) return R_End;
00238     switch( *x++ )
00239     {
00240     case L'.': return R_Any;
00241     case L'[': return R_Lcl;
00242     case L']': return R_Rcl;
00243     case L'^': return R_Ncl;
00244     case L'-': return R_Range;
00245     case L'(': return R_Lbr;
00246     case L')': return R_Rbr;
00247     case L'|': return R_Bar;
00248     case L'*': return R_Star;
00249     case L'+': return R_Plus;
00250     case L'?': return R_Quest;
00251     case L'\\': 
00252         if( x==p->end_ ) return R_End;
00253         switch( *x++ ) 
00254         {
00255         case L't': p->chr_=L'\t';            return R_Char;
00256         case L'w': p->sub_=L"[0-9a-zA-Z_]";  return RegLexerGetToken(p);
00257         case L'W': p->sub_=L"[^0-9a-zA-Z_]"; return RegLexerGetToken(p);
00258         case L'd': p->sub_=L"[0-9]";         return RegLexerGetToken(p);
00259         case L'D': p->sub_=L"[^0-9]";        return RegLexerGetToken(p);
00260         case L's': p->sub_=L"[\t ]";         return RegLexerGetToken(p);
00261         case L'S': p->sub_=L"[^\t ]";        return RegLexerGetToken(p);
00262         } // fall through...
00263     default:
00264         p->chr_ = *(x-1);
00265         return R_Char;
00266     }
00267     */
00268 
00269 }
00270 wchar_t  RegLexerGetChar(RegLexer *p) {
00271     return p->chr_; 
00272 }
00273 
00274 //**********************************************************
00275 
00276 typedef struct OneRange_
00277 {
00278     wchar_t stt;
00279     wchar_t end;
00280 }OneRange;
00281 
00282 typedef struct RegClass_
00283 {
00284 
00285     OneRange range;
00286     struct RegClass_ *next;
00287 }RegClass;
00288 
00289 void RegClassInit( RegClass *p,wchar_t s, wchar_t e, RegClass* n )
00290 { 
00291     p->range.stt=s;
00292     p->range.end=e;
00293     p->next=n;
00294 }
00295 
00296 RegClass *alloc_RegClass(wchar_t s, wchar_t e, RegClass* n){
00297     RegClass *p = malloc_adapter(sizeof(RegClass));
00298     RegClassInit(p,s,e,n);
00299     return p;
00300 }
00301 
00302 void free_RegClass(RegClass *p){
00303     free_adapter(p);
00304 }
00305 
00306 
00307 
00308 typedef struct RegNode_
00309 {
00310 //  /*edkcRegType*/int           type; // このノードの種類
00311     int                     type;
00312     wchar_t       ch; // 文字
00313     RegClass*     cls; // 文字集合
00314     BOOL          cmpcls; // ↑補集合かどうか
00315     struct RegNode_*            left; // 左の子
00316     struct RegNode_*            right; // 右の子
00317 }RegNode;
00318 
00319 RegNode *alloc_RegNode(){
00320     RegNode *p = (RegNode *)malloc_adapter(sizeof(RegNode));
00321     /*p->left = NULL;
00322     p->right = NULL;
00323     p->cls = NULL;*/
00324     memset(p,0,sizeof(RegNode));
00325 
00326     return p;
00327 }
00328 void free_RegNode(RegNode *p){
00329     if(p->cls){
00330         free_RegClass(p->cls);
00331     }
00332     free_adapter(p);
00333 }
00334 //**********************************************************
00335 
00337 typedef struct RegParser_
00338 {
00339     BOOL    err_;
00340     BOOL    isHeadType_;
00341     BOOL    isTailType_;
00342     RegNode* root_;
00343 
00344     RegLexer lex_;
00345     //edkcRegToken nextToken_;
00346     int nextToken_;
00347 }RegParser;
00348 
00349 RegNode* RegParser_make_empty_leaf(RegParser *);
00350 RegNode* RegParser_make_char_leaf(RegParser *, wchar_t c );
00351 RegNode* RegParser_make_node(RegParser *, /*edkcRegType*/int t, RegNode* lft, RegNode* rht );
00352 void RegParser_eat_token(RegParser *);
00353 RegNode* RegParser_expr(RegParser *);
00354 RegNode* RegParser_term(RegParser *);
00355 RegNode* RegParser_factor(RegParser *);
00356 RegNode* RegParser_primary(RegParser *);
00357 RegNode* RegParser_reclass(RegParser *);
00358 
00359 void RegParser_Construct(RegParser *, const unicode* pat );
00360 
00361 RegNode* RegParser_root(RegParser *p) { return p->root_; }
00362 BOOL RegParser_err(RegParser *p) { return p->err_; }
00363 BOOL RegParser_isHeadType(RegParser *p) { return p->isHeadType_; }
00364 BOOL RegParser_isTailType(RegParser *p) { return p->isTailType_; }
00365 
00366 //=========================================================================
00368 //  構文木作成:実装
00370 //=========================================================================
00371 
00373 static int tmp; 
00374 
00375 
00376 
00377 void RegParser_Construct( RegParser *p,const unicode* pat )
00378 
00379     
00380 {
00381     p->err_ = FALSE;
00382     p->isHeadType_ = (*pat==L'^');
00383 
00384     tmp=my_lstrlenW(pat);
00385 
00386     p->isTailType_ = ( tmp && pat[tmp-1]==L'$' );
00387 
00388     RegLexerInit(&p->lex_,
00389         (p->isHeadType_ ? pat+1 : pat),
00390         (my_lstrlenW(pat) - (p->isHeadType_ ? 1 : 0)
00391                           - (p->isTailType_ ? 1 : 0)) 
00392     );
00393 
00394     RegParser_eat_token(p);
00395     p->root_ = RegParser_expr(p);
00396 }
00397 
00398 static DKC_INLINE void free_RegParser(RegParser *p,RegNode *next){
00399     if(next->left){
00400         free_RegParser(p,next->left);
00401     }
00402     if(next->right){
00403         free_RegParser(p,next->right);
00404     }
00405     free_RegNode(next);
00406 }
00407 void RegParser_Destruct(RegParser *p){
00408     free_RegParser(p,p->root_);
00409 }
00410 
00411 void RegParser_eat_token(RegParser *p)
00412 {
00413     //p->nextToken_ = p->lex_.GetToken();
00414     p->nextToken_ = RegLexerGetToken(&p->lex_);
00415 }
00416 
00417 RegNode* RegParser_make_empty_leaf(RegParser *p)
00418 {
00419     //RegNode* node = new RegNode;
00420     RegNode *node = alloc_RegNode();
00421     node->type = N_Empty;
00422     return node;
00423 }
00424 
00425 RegNode* RegParser_make_char_leaf( RegParser *p,wchar_t c )
00426 {
00427     //RegNode* node = new RegNode;
00428     RegNode *node = alloc_RegNode();
00429     node->type = N_Char;
00430     node->ch   = c;
00431     return node;
00432 }
00433 
00434 RegNode* RegParser_make_node( RegParser *p,/*edkcRegType*/int t, RegNode* lft, RegNode* rht )
00435 {
00436     //RegNode* node = new RegNode;
00437     RegNode *node = alloc_RegNode();
00438     node->type = t;
00439     node->left = lft;
00440     node->right= rht;
00441     return node;
00442 }
00443 
00444 RegNode* RegParser_reclass(RegParser *p)
00445 {
00446 //  CLASS   ::= '^'? CHAR (CHAR | -CHAR)*
00447 
00448     BOOL neg = FALSE;
00449     RegNode* node;
00450     RegClass* cls;
00451 
00452     if( p->nextToken_ == R_Ncl ){
00453         neg=TRUE;
00454         RegParser_eat_token(p);
00455     }
00456 
00457     cls = NULL;
00458     while(  p->nextToken_ == R_Char )
00459     {
00460         //wchar_t ch =  p->lex_.GetChar();
00461         wchar_t ch = RegLexerGetChar(&p->lex_);
00462         RegParser_eat_token(p);
00463         if(  p->nextToken_ == R_Range )
00464         {
00465             RegParser_eat_token(p);
00466             if(  p->nextToken_ != R_Char )
00467                  p->err_ = TRUE;
00468             else
00469             {
00470                 wchar_t ch2 =  RegLexerGetChar(&p->lex_);
00471                 //cls = new RegClass( Min(ch,ch2), Max(ch,ch2), cls );
00472                 cls = alloc_RegClass((unicode)Min(ch,ch2),(unicode) Max(ch,ch2), cls);
00473                 RegParser_eat_token(p);
00474             }
00475         }
00476         else
00477         {
00478             //cls = new RegClass( ch, ch, cls );
00479             cls = alloc_RegClass( ch, ch, cls );
00480         }
00481     }
00482 
00483     //RegNode* node = new RegNode;
00484     node = alloc_RegNode();
00485     node->type   = N_Class;
00486     node->cls    = cls;//aptr<RegClass>(cls);
00487     node->cmpcls = neg;
00488     return node;
00489 }
00490 
00491 RegNode* RegParser_primary(RegParser *p)
00492 {
00493 //  PRIMARY ::= CHAR
00494 //              '.'
00495 //              '[' CLASS ']'
00496 //              '(' REGEXP ')'
00497 
00498     RegNode* node;
00499     switch( p->nextToken_ )
00500     {
00501     case R_Char:
00502         node = RegParser_make_char_leaf( p,RegLexerGetChar(&p->lex_) );
00503         RegParser_eat_token(p);
00504         break;
00505     case R_Any:
00506         //node         = new RegNode;
00507         node = alloc_RegNode();
00508         node->type   = N_Class;
00509         //node->cls    = aptr<RegClass>(new RegClass( 0, 65535, NULL ));
00510         node->cls = alloc_RegClass(0, 65535, NULL);
00511         node->cmpcls = FALSE;
00512         RegParser_eat_token(p);
00513         break;
00514     case R_Lcl:
00515         RegParser_eat_token(p);
00516         node = RegParser_reclass(p);
00517         if( p->nextToken_ == R_Rcl )
00518             RegParser_eat_token(p);
00519         else
00520             p->err_ = TRUE;
00521         break;
00522     case R_Lbr:
00523         RegParser_eat_token(p);
00524         node = RegParser_expr(p);
00525         if( p->nextToken_ == R_Rbr )
00526             RegParser_eat_token(p);
00527         else
00528             p->err_ = TRUE;
00529         break;
00530     default:
00531         node = RegParser_make_empty_leaf(p);
00532         p->err_ = TRUE;
00533         break;
00534     }
00535     return node;
00536 }
00537 
00538 RegNode* RegParser_factor(RegParser *p)
00539 {
00540 //  FACTOR  ::= PRIMARY
00541 //              PRIMARY '*'
00542 //              PRIMARY '+'
00543 //              PRIMARY '?'
00544 
00545     RegNode* node = RegParser_primary(p);
00546     switch( p->nextToken_ )
00547     {
00548     case R_Star:
00549         node=RegParser_make_node(p,N_Closure,node,NULL);
00550         RegParser_eat_token(p);
00551         break;
00552     case R_Plus:
00553         node=RegParser_make_node(p,N_Closure1,node,NULL);
00554         RegParser_eat_token(p);
00555         break;
00556     case R_Quest:
00557         node=RegParser_make_node(p,N_01,node,NULL );
00558         RegParser_eat_token(p);
00559         break;
00560     }
00561     //RegParser_eat_token(p);
00562     return node;
00563 }
00564 
00565 RegNode* RegParser_term(RegParser *p)
00566 {
00567 //  TERM    ::= EMPTY
00568 //              FACTOR TERM
00569     RegNode* node;
00570     if( p->nextToken_ == R_End )
00571         return RegParser_make_empty_leaf(p);
00572 
00573     node = RegParser_factor(p);
00574     if( p->nextToken_==R_Lbr || p->nextToken_==R_Lcl
00575      || p->nextToken_==R_Char|| p->nextToken_==R_Any )
00576         node = RegParser_make_node(p, N_Concat, node, RegParser_term(p) );
00577     return node;
00578 }
00579 
00580 RegNode* RegParser_expr(RegParser *p)
00581 {
00582 //  REGEXP  ::= TERM
00583 //              TERM '|' REGEXP
00584 
00585     RegNode* node = RegParser_term(p);
00586     if( p->nextToken_ == R_Bar )
00587     {
00588         RegParser_eat_token(p);
00589         node = RegParser_make_node(p, N_Or, node, RegParser_expr(p) );
00590     }
00591     return node;
00592 }
00593 
00594 
00595 
00596 
00598 typedef struct RegTrans_
00599 {
00600     //aptr<RegClass>  cls; // この文字集合
00601     RegClass *cls;
00602                          // orEpsilon が来たら
00603     BOOL         cmpcls;
00604     int              to; // 状態番号toの状態へ遷移
00605     //aptr<RegTrans> next; // 連結リスト
00606     struct RegTrans_ *next;
00607     
00608     int type;
00609 }RegTrans;
00610 
00611 #define alloc_RegTrans() malloc_adapter(sizeof(RegTrans))
00612 #define free_RegTrans(p) free_adapter(p)
00613 
00615 enum {
00616     eEpsilon,
00617     eClass,
00618     eChar,
00619 };
00620 
00621 //RegClass *とRegTrans *互換性が無いよ○| ̄|_
00622 BOOL RegTrans_match_c(RegTrans *pt, wchar_t c )
00623 {
00624     RegClass* p = pt->cls;
00625     for( ; p; p=p->next )
00626         if( p->range.stt<=c && c<=p->range.end )
00627             return TRUE;
00628     return FALSE;
00629 }
00630 
00631 BOOL RegTrans_match_i(RegTrans *pt, wchar_t c )
00632 {
00633     RegClass* p;
00634     c = ignore_map(c);
00635     for( p=pt->cls; p; p=p->next )
00636         if( ignore_map(p->range.stt)<=c
00637          && c<=ignore_map(p->range.end) )
00638             return TRUE;
00639     return FALSE;
00640 }
00641 
00642 BOOL RegTrans_match(RegTrans *pt, wchar_t c, BOOL caseS )
00643 {
00644     BOOL m = caseS ? RegTrans_match_c(pt, c ) : RegTrans_match_i(pt, c );
00645     return pt->cmpcls ? !m : m;
00646 }
00647 
00648 //**********************************************************
00649 
00651 
00652 typedef struct RegNFA_
00653 {
00654     const wchar_t* str_;
00655     int len_;
00656     int matchpos_;
00657     BOOL caseS_;
00658 
00660     DKC_STACK *pstack;
00661 
00662     RegParser      parser;
00664     DKC_STACK *st;
00665     int      start, final;
00666 }RegNFA;
00667 
00668 
00669 
00670 //#define free_RegNFA(a) free_adapter(a)
00671 
00672 BOOL RegNFA_check_and_push_stack(RegNFA *,int curSt, int pos);
00673 void RegNFA_pop_stack(RegNFA *);
00674 
00675 typedef struct st_ele_ {
00676     int st;
00677     int ps; 
00678 }st_ele;
00679 
00680 void RegNFA_add_transition_a4(RegNFA *, int from, wchar_t ch, int to );
00681 void RegNFA_add_transition_a5(RegNFA *, int from, RegClass *cls, BOOL cmp, int to );
00682 void RegNFA_add_e_transition(RegNFA *, int from, int to );
00683 int RegNFA_gen_state(RegNFA *);
00684 void RegNFA_gen_nfa(RegNFA *, int entry, RegNode* t, int exit );
00685 
00686 BOOL RegNFA_Construct(RegNFA *, const wchar_t* pat );
00687 void RegNFA_Destruct(RegNFA *);
00688 
00689 
00690 BOOL RegNFA_isHeadType(RegNFA *p) { 
00691     return RegParser_isHeadType(&p->parser); 
00692 }
00693 BOOL RegNFA_isTailType(RegNFA *p) {
00694     return RegParser_isTailType(&p->parser); 
00695 }
00696 
00697 // マッチング処理
00698 void RegNFA_match_a3(RegNFA *, int curSt, int pos );
00699 int RegNFA_match_a4(RegNFA *, const wchar_t* str, int len, BOOL caseS );
00700 
00701 RegNFA *alloc_RegNFA(const unicode *key){
00702     RegNFA *p = dkcAllocate(sizeof(RegNFA));
00703     if(NULL==p){
00704         return NULL;
00705     }
00706     if(FALSE==RegNFA_Construct(p,key)){
00707         goto Error;
00708     }
00709     return p;
00710 Error:
00711     dkcFree(&p);
00712     return NULL;
00713 }
00714 
00715 void free_RegNFA(RegNFA *p){
00716     RegNFA_Destruct(p);
00717     dkcFree(&p);
00718 }
00719 
00720 BOOL RegNFA_Construct(RegNFA *p, const wchar_t* pat )
00721 {
00722         
00723     p->pstack = dkcAllocStack(10,sizeof(st_ele));
00724     if(NULL==p->pstack){
00725         return FALSE;
00726     }
00727     p->st = dkcAllocStack(10,sizeof(RegTrans *));
00728     if(NULL==p->st)
00729         goto Error;
00730 
00731     RegParser_Construct( &p->parser,pat );
00732 
00733     p->start = RegNFA_gen_state(p);
00734     p->final = RegNFA_gen_state(p);
00735     RegNFA_gen_nfa( p,p->start, RegParser_root(&p->parser), p->final );
00736     return TRUE;
00737 Error:
00738     dkcFreeStack(&p->pstack);
00739     return FALSE;
00740 }
00741 
00742 void RegNFA_Destruct(RegNFA *p)
00743 {
00744     RegTrans *t;
00745     ulong i=0;
00746     ulong e = dkcStackSize(p->st);
00747 
00748     for(;i<e;i++){//ここにバグがあるかも?
00749         dkcStackTop(p->st,(void *)&t);
00750         free_RegTrans(t);
00751         dkcStackPop(p->st);
00752     }
00753     RegParser_Destruct(&p->parser);
00754     dkcFreeStack(&p->st);
00755     dkcFreeStack(&p->pstack);
00756 }
00757 
00758 void RegNFA_add_transition_a5   ( RegNFA *p, int from, RegClass* cls, BOOL cmp, int to )
00759 {
00760     RegTrans **t;
00761     //RegTrans* x = new RegTrans;
00762     RegTrans *x=alloc_RegTrans();
00763     
00764     //x->next  = aptr<RegTrans>( p->st[from] );
00765     t = (RegTrans **)dkcStackPointer(p->st);
00766     
00767     x->next  = t[from];
00768     x->to    = to;
00769     x->type  = eClass;//type.eClass;
00770     x->cls   = cls;
00771     x->cmpcls= cmp;
00772     t[from] = x;
00773 }
00774 
00775 DKC_INLINE void RegNFA_add_transition_a4(RegNFA *p, int from, wchar_t ch, int to )
00776 {
00777     RegNFA_add_transition_a5( p,from,
00778         alloc_RegClass(ch,ch,NULL),
00779         FALSE, to );
00780 }
00781 
00782 DKC_INLINE void RegNFA_add_e_transition(RegNFA *p, int from, int to )
00783 {
00784     RegTrans **t;
00785     //RegTrans* x = new RegTrans;
00786     RegTrans* x = alloc_RegTrans();
00787     //x->next  = aptr<RegTrans>( st[from] );
00788 
00789     t = (RegTrans **)dkcStackPointer(p->st);
00790     x->next  = t[from];
00791     x->to    = to;
00792     x->type  = eEpsilon;
00793     t[from] = x;
00794 }
00795 
00796 DKC_INLINE int RegNFA_gen_state(RegNFA *p)
00797 {
00798     RegTrans *temp = NULL;
00799     dkcStackDynamicPush(p->st,(void *)&temp);
00800     return dkcStackSize(p->st) - 1;
00801 }
00802 
00803 void RegNFA_gen_nfa(RegNFA *p, int entry, RegNode* t, int exit )
00804 {
00805     switch( t->type )
00806     {
00807     case N_Char:
00808         //         ch
00809         //  entry ----> exit
00810         RegNFA_add_transition_a4( p,entry, t->ch, exit );
00811         break;
00812     case N_Class:
00813         //         cls
00814         //  entry -----> exit
00815         RegNFA_add_transition_a5( p,entry, t->cls, t->cmpcls, exit );
00816         break;
00817     case N_Concat: {
00818         //         left         right
00819         //  entry ------> step -------> exit
00820         int step = RegNFA_gen_state(p);
00821         RegNFA_gen_nfa(p, entry, t->left, step );
00822         RegNFA_gen_nfa(p, step, t->right, exit );
00823         } break;
00824     case N_Or:
00825         //          left
00826         //         ------>
00827         //  entry ------->--> exit
00828         //          right
00829         RegNFA_gen_nfa(p, entry, t->left, exit );
00830         RegNFA_gen_nfa(p, entry, t->right, exit );
00831         break;
00832     case N_Closure:
00833         //                       e
00834         //         e          <------        e
00835         //  entry ---> before ------> after ---> exit
00836         //    |                left                ^
00837         //    >------->------------------->------>-|
00838         //                      e
00839     case N_Closure1: {
00840         //                       e
00841         //         e          <------        e
00842         //  entry ---> before ------> after ---> exit
00843         //                     left
00844         int before = RegNFA_gen_state(p);
00845         int after = RegNFA_gen_state(p);
00846         RegNFA_add_e_transition(p, entry, before );
00847         RegNFA_add_e_transition(p, after, exit );
00848         RegNFA_add_e_transition(p, after, before );
00849         RegNFA_gen_nfa(p, before, t->left, after );
00850         if( t->type != N_Closure1 )
00851             RegNFA_add_e_transition(p, entry, exit );
00852         } break;
00853     case N_01:
00854         //           e
00855         //        ------>
00856         //  entry ------> exit
00857         //         left
00858         RegNFA_add_e_transition(p, entry, exit );
00859         RegNFA_gen_nfa(p, entry, t->left, exit );
00860         break;
00861     case N_Empty:
00862         //         e
00863         //  entry ---> exit
00864         RegNFA_add_e_transition(p, entry, exit );
00865         break;
00866     }
00867 }
00868 
00869 
00870 
00871 //=========================================================================
00873 //  マッチング
00875 //=========================================================================
00876 
00877 BOOL RegNFA_check_and_push_stack(RegNFA *p,int curSt, int pos)
00878 {
00879     /*for( int i=stack_.size()-1; i>=0; --i )
00880         if( stack_[i].ps != pos )
00881             break;
00882         else if( stack_[i].st == curSt )
00883             return FALSE;
00884 
00885     st_ele nw = {curSt,pos};
00886     stack_.Add( nw );
00887     return TRUE;
00888     */
00889     int i;
00890     st_ele *pt;
00891     st_ele nw;
00892 
00893 
00894     i = (int)(dkcStackSize(p->pstack) - 1);
00895     pt = dkcStackPointer(p->pstack);
00896 
00897     for(;i >= 0;--i){
00898         if( pt[i].ps != pos )
00899             break;
00900         else if( pt[i].st == curSt )
00901             return FALSE;
00902     }
00903     nw.st = curSt;
00904     nw.ps = pos;
00905     dkcStackDynamicPush(p->pstack, &nw );
00906     return TRUE;
00907 }
00908 
00909 void RegNFA_pop_stack(RegNFA *p)
00910 {
00911     //stack_.ForceSize( stack_.size()-1 );
00912     dkcStackPop(p->pstack);
00913 }
00914 
00915 
00916 int RegNFA_match_a4(RegNFA *p, const wchar_t* str, int len, BOOL caseS )
00917 {
00918 
00919     if( RegParser_err(&p->parser) )
00920         return -1; // エラー
00921 
00922     p->matchpos_ = -1;
00923     p->caseS_ = caseS;
00924     p->str_ = str;
00925     p->len_ = len; // 作業変数にコピー
00926 
00927     {
00928         st_ele nw = {0,0};
00929         dkcStackDynamicPush(p->pstack, &nw );
00930         RegNFA_match_a3(p, 0, 0 ); // 0:始状態
00931     }
00932     return p->matchpos_ ;
00933 }
00934 
00935 // 暫定
00936 void RegNFA_match_a3(RegNFA *p, int curSt, int pos )
00937 {
00938     if( curSt == 1 ) // 1==終状態
00939     {
00940         // マッチ成功を記録
00941         if( p->matchpos_ < pos )
00942             p->matchpos_ = pos;
00943     }
00944 
00945     if( p->matchpos_ < p->len_ )
00946     {
00947         RegTrans* tr;
00948         RegTrans **sst;
00949         sst = (RegTrans **)dkcStackPointer(p->st);
00950 
00951         tr=sst[curSt];
00952         for( ; tr!=NULL; tr=tr->next )
00953         {
00954             if( tr->type == eEpsilon )
00955             {
00956                 // ε無限ループを防止策。同じ状態には戻らないように…
00957                 if( RegNFA_check_and_push_stack(p,tr->to, pos) )
00958                 {
00959                     RegNFA_match_a3(p, tr->to, pos );
00960                     RegNFA_pop_stack(p);
00961                 }
00962             }
00963             //else if( pos<len_ && tr->match( str_[pos], caseS_ ) )
00964             else if(pos<p->len_ && RegTrans_match(tr, p->str_[pos], p->caseS_ ) )
00965             {
00966                 if( p->str_[pos] == L'i' )
00967                     p->str_ = p->str_;
00968                 RegNFA_match_a3(p, tr->to, pos+1 );
00969             }
00970         }
00971     }
00972 }
00973 
00974 
00975 
00976 //=========================================================================
00978 //  GreenPad用検索オブジェクト
00980 //=========================================================================
00981 
00982 DKC_REGEX *WINAPI dkcAllocRegex( const unicode* key, BOOL caseS, BOOL down )
00983 {
00984     DKC_REGEX *p = dkcAllocate(sizeof(DKC_REGEX));
00985     p->re_ = alloc_RegNFA(key);
00986     p->caseS_ = caseS;
00987     p->down_ = down;
00988     return p;
00989 }
00990 
00991 
00992 
00993 int WINAPI dkcFreeRegex(DKC_REGEX **p){
00994     free_RegNFA((*p)->re_);
00995     return dkcFree(p);
00996 }
00997 
00998 BOOL WINAPI dkcRegexSearch(DKC_REGEX *p,
00999     const unicode* str, ulong len, ulong stt, ulong* mbg, ulong* med )
01000 {
01001     /*
01002     const int d;
01003     int s;
01004     const int e;*/
01005     if( p->down_ && RegNFA_isHeadType(p->re_) && stt>0 )
01006         return FALSE;
01007     {
01008 
01009         const int d = (p->down_ ? 1 : -1);
01010                     int s = (!p->down_ && RegNFA_isHeadType(p->re_) ? 0 : stt);
01011         const int e = (p->down_ ? (RegNFA_isHeadType(p->re_) ? 1 : len) : -1);
01012 
01013         for( ; s!=e; s+=d )
01014         {
01015             //const int L = re_->match( str+s, len-s, caseS_ );
01016             int L = RegNFA_match_a4( p->re_,str+s, len-s, p->caseS_ );
01017             if( L > 0 )
01018             {
01019                 if( RegNFA_isTailType(p->re_) && L!=(int)(len-s) )
01020                     continue;
01021                 *mbg = (ulong)(s);
01022                 *med = (ulong)(s+L);
01023                 return TRUE;
01024             }
01025         }
01026     }
01027 
01028     return FALSE;
01029 }
01030 
01031 
01032 BOOL WINAPI dkcRegularExpressionSearch( const DKC_UNICODE* key, BOOL caseS, BOOL down,const unicode* str, ulong len, ulong stt,ulong* mbg, ulong* med )
01033 {
01034     DKC_REGEX *p;
01035     BOOL r;
01036 
01037     p  = dkcAllocRegex(key,caseS,down);
01038     if(NULL==p)
01039         return FALSE;
01040 
01041     r = dkcRegexSearch(p,str,len,stt,mbg,med);
01042     /*if(FALSE==r)
01043         return FALSE;*/
01044 
01045     dkcFreeRegex(&p);
01046     return r;
01047 
01048 
01049 }
01050 
01051         
01052         
01054 /*
01055 BOOL reg_match(RegNFA *p, const wchar_t* pat, const wchar_t* str, BOOL caseS )
01056 {
01057     int len = my_lstrlenW(str);
01058 
01059     RegNFA re( pat );
01060     return len == re.match( str, len, caseS );
01061 }*/
01062 BOOL WINAPI dkcRegularExpressionMatch( const DKC_UNICODE* pat, const wchar_t* str, BOOL caseS )
01063 {
01064     int len = my_lstrlenW(str);
01065     RegNFA re;
01066     BOOL r;
01067     if(FALSE==RegNFA_Construct(&re,pat))
01068         return FALSE;
01069     
01070     r = (len == RegNFA_match_a4(&re, str, len, caseS ));
01071 
01072     RegNFA_Destruct(&re);
01073 
01074     return r;
01075 }
01076 

dkutil_cに対してSat Sep 10 09:23:56 2005に生成されました。  doxygen 1.4.4