00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011 #include "ruby/ruby.h"
00012 #include "ruby/re.h"
00013 #include "ruby/encoding.h"
00014
00015 #define STRSCAN_VERSION "0.7.0"
00016
00017 #ifdef PRIsVALUE
00018 # define RB_OBJ_CLASSNAME(obj) rb_obj_class(obj)
00019 # define RB_OBJ_STRING(obj) (obj)
00020 #else
00021 # define PRIsVALUE "s"
00022 # define RB_OBJ_CLASSNAME(obj) rb_obj_classname(obj)
00023 # define RB_OBJ_STRING(obj) StringValueCStr(obj)
00024 #endif
00025
00026
00027
00028
00029
00030 static VALUE StringScanner;
00031 static VALUE ScanError;
00032
00033 struct strscanner
00034 {
00035
00036 unsigned long flags;
00037 #define FLAG_MATCHED (1 << 0)
00038
00039
00040 VALUE str;
00041
00042
00043 long prev;
00044 long curr;
00045
00046
00047 struct re_registers regs;
00048 };
00049
00050 #define MATCHED_P(s) ((s)->flags & FLAG_MATCHED)
00051 #define MATCHED(s) (s)->flags |= FLAG_MATCHED
00052 #define CLEAR_MATCH_STATUS(s) (s)->flags &= ~FLAG_MATCHED
00053
00054 #define S_PBEG(s) (RSTRING_PTR((s)->str))
00055 #define S_LEN(s) (RSTRING_LEN((s)->str))
00056 #define S_PEND(s) (S_PBEG(s) + S_LEN(s))
00057 #define CURPTR(s) (S_PBEG(s) + (s)->curr)
00058 #define S_RESTLEN(s) (S_LEN(s) - (s)->curr)
00059
00060 #define EOS_P(s) ((s)->curr >= RSTRING_LEN(p->str))
00061
00062 #define GET_SCANNER(obj,var) do {\
00063 Data_Get_Struct((obj), struct strscanner, (var));\
00064 if (NIL_P((var)->str)) rb_raise(rb_eArgError, "uninitialized StringScanner object");\
00065 } while (0)
00066
00067
00068
00069
00070
00071 static VALUE infect _((VALUE str, struct strscanner *p));
00072 static VALUE extract_range _((struct strscanner *p, long beg_i, long end_i));
00073 static VALUE extract_beg_len _((struct strscanner *p, long beg_i, long len));
00074
00075 void check_strscan _((VALUE obj));
00076 static void strscan_mark _((struct strscanner *p));
00077 static void strscan_free _((struct strscanner *p));
00078 static VALUE strscan_s_allocate _((VALUE klass));
00079 static VALUE strscan_initialize _((int argc, VALUE *argv, VALUE self));
00080 static VALUE strscan_init_copy _((VALUE vself, VALUE vorig));
00081
00082 static VALUE strscan_s_mustc _((VALUE self));
00083 static VALUE strscan_terminate _((VALUE self));
00084 static VALUE strscan_clear _((VALUE self));
00085 static VALUE strscan_get_string _((VALUE self));
00086 static VALUE strscan_set_string _((VALUE self, VALUE str));
00087 static VALUE strscan_concat _((VALUE self, VALUE str));
00088 static VALUE strscan_get_pos _((VALUE self));
00089 static VALUE strscan_set_pos _((VALUE self, VALUE pos));
00090 static VALUE strscan_do_scan _((VALUE self, VALUE regex,
00091 int succptr, int getstr, int headonly));
00092 static VALUE strscan_scan _((VALUE self, VALUE re));
00093 static VALUE strscan_match_p _((VALUE self, VALUE re));
00094 static VALUE strscan_skip _((VALUE self, VALUE re));
00095 static VALUE strscan_check _((VALUE self, VALUE re));
00096 static VALUE strscan_scan_full _((VALUE self, VALUE re,
00097 VALUE succp, VALUE getp));
00098 static VALUE strscan_scan_until _((VALUE self, VALUE re));
00099 static VALUE strscan_skip_until _((VALUE self, VALUE re));
00100 static VALUE strscan_check_until _((VALUE self, VALUE re));
00101 static VALUE strscan_search_full _((VALUE self, VALUE re,
00102 VALUE succp, VALUE getp));
00103 static void adjust_registers_to_matched _((struct strscanner *p));
00104 static VALUE strscan_getch _((VALUE self));
00105 static VALUE strscan_get_byte _((VALUE self));
00106 static VALUE strscan_getbyte _((VALUE self));
00107 static VALUE strscan_peek _((VALUE self, VALUE len));
00108 static VALUE strscan_peep _((VALUE self, VALUE len));
00109 static VALUE strscan_unscan _((VALUE self));
00110 static VALUE strscan_bol_p _((VALUE self));
00111 static VALUE strscan_eos_p _((VALUE self));
00112 static VALUE strscan_empty_p _((VALUE self));
00113 static VALUE strscan_rest_p _((VALUE self));
00114 static VALUE strscan_matched_p _((VALUE self));
00115 static VALUE strscan_matched _((VALUE self));
00116 static VALUE strscan_matched_size _((VALUE self));
00117 static VALUE strscan_aref _((VALUE self, VALUE idx));
00118 static VALUE strscan_pre_match _((VALUE self));
00119 static VALUE strscan_post_match _((VALUE self));
00120 static VALUE strscan_rest _((VALUE self));
00121 static VALUE strscan_rest_size _((VALUE self));
00122
00123 static VALUE strscan_inspect _((VALUE self));
00124 static VALUE inspect1 _((struct strscanner *p));
00125 static VALUE inspect2 _((struct strscanner *p));
00126
00127
00128
00129
00130
00131 static VALUE
00132 infect(VALUE str, struct strscanner *p)
00133 {
00134 OBJ_INFECT(str, p->str);
00135 return str;
00136 }
00137
00138 static VALUE
00139 str_new(struct strscanner *p, const char *ptr, long len)
00140 {
00141 VALUE str = rb_str_new(ptr, len);
00142 rb_enc_copy(str, p->str);
00143 return str;
00144 }
00145
00146 static VALUE
00147 extract_range(struct strscanner *p, long beg_i, long end_i)
00148 {
00149 if (beg_i > S_LEN(p)) return Qnil;
00150 if (end_i > S_LEN(p))
00151 end_i = S_LEN(p);
00152 return infect(str_new(p, S_PBEG(p) + beg_i, end_i - beg_i), p);
00153 }
00154
00155 static VALUE
00156 extract_beg_len(struct strscanner *p, long beg_i, long len)
00157 {
00158 if (beg_i > S_LEN(p)) return Qnil;
00159 if (beg_i + len > S_LEN(p))
00160 len = S_LEN(p) - beg_i;
00161 return infect(str_new(p, S_PBEG(p) + beg_i, len), p);
00162 }
00163
00164
00165
00166
00167
00168 static void
00169 strscan_mark(struct strscanner *p)
00170 {
00171 rb_gc_mark(p->str);
00172 }
00173
00174 static void
00175 strscan_free(struct strscanner *p)
00176 {
00177 onig_region_free(&(p->regs), 0);
00178 ruby_xfree(p);
00179 }
00180
00181 static VALUE
00182 strscan_s_allocate(VALUE klass)
00183 {
00184 struct strscanner *p;
00185
00186 p = ALLOC(struct strscanner);
00187 MEMZERO(p, struct strscanner, 1);
00188 CLEAR_MATCH_STATUS(p);
00189 onig_region_init(&(p->regs));
00190 p->str = Qnil;
00191 return Data_Wrap_Struct(klass, strscan_mark, strscan_free, p);
00192 }
00193
00194
00195
00196
00197
00198
00199
00200 static VALUE
00201 strscan_initialize(int argc, VALUE *argv, VALUE self)
00202 {
00203 struct strscanner *p;
00204 VALUE str, need_dup;
00205
00206 Data_Get_Struct(self, struct strscanner, p);
00207 rb_scan_args(argc, argv, "11", &str, &need_dup);
00208 StringValue(str);
00209 p->str = str;
00210
00211 return self;
00212 }
00213
00214 void
00215 check_strscan(VALUE obj)
00216 {
00217 if (TYPE(obj) != T_DATA || RDATA(obj)->dmark != (RUBY_DATA_FUNC)strscan_mark) {
00218 rb_raise(rb_eTypeError,
00219 "wrong argument type %s (expected StringScanner)",
00220 rb_obj_classname(obj));
00221 }
00222 }
00223
00224
00225
00226
00227
00228
00229
00230
00231 static VALUE
00232 strscan_init_copy(VALUE vself, VALUE vorig)
00233 {
00234 struct strscanner *self, *orig;
00235
00236 Data_Get_Struct(vself, struct strscanner, self);
00237 check_strscan(vorig);
00238 Data_Get_Struct(vorig, struct strscanner, orig);
00239 if (self != orig) {
00240 self->flags = orig->flags;
00241 self->str = orig->str;
00242 self->prev = orig->prev;
00243 self->curr = orig->curr;
00244 onig_region_copy(&self->regs, &orig->regs);
00245 }
00246
00247 return vself;
00248 }
00249
00250
00251
00252
00253
00254
00255
00256
00257
00258
00259 static VALUE
00260 strscan_s_mustc(VALUE self)
00261 {
00262 return self;
00263 }
00264
00265
00266
00267
00268 static VALUE
00269 strscan_reset(VALUE self)
00270 {
00271 struct strscanner *p;
00272
00273 GET_SCANNER(self, p);
00274 p->curr = 0;
00275 CLEAR_MATCH_STATUS(p);
00276 return self;
00277 }
00278
00279
00280
00281
00282
00283
00284
00285
00286 static VALUE
00287 strscan_terminate(VALUE self)
00288 {
00289 struct strscanner *p;
00290
00291 GET_SCANNER(self, p);
00292 p->curr = S_LEN(p);
00293 CLEAR_MATCH_STATUS(p);
00294 return self;
00295 }
00296
00297
00298
00299
00300
00301 static VALUE
00302 strscan_clear(VALUE self)
00303 {
00304 rb_warning("StringScanner#clear is obsolete; use #terminate instead");
00305 return strscan_terminate(self);
00306 }
00307
00308
00309
00310
00311 static VALUE
00312 strscan_get_string(VALUE self)
00313 {
00314 struct strscanner *p;
00315
00316 GET_SCANNER(self, p);
00317 return p->str;
00318 }
00319
00320
00321
00322
00323
00324
00325
00326 static VALUE
00327 strscan_set_string(VALUE self, VALUE str)
00328 {
00329 struct strscanner *p;
00330
00331 Data_Get_Struct(self, struct strscanner, p);
00332 StringValue(str);
00333 p->str = str;
00334 p->curr = 0;
00335 CLEAR_MATCH_STATUS(p);
00336 return str;
00337 }
00338
00339
00340
00341
00342
00343
00344
00345
00346
00347
00348
00349
00350
00351
00352
00353 static VALUE
00354 strscan_concat(VALUE self, VALUE str)
00355 {
00356 struct strscanner *p;
00357
00358 GET_SCANNER(self, p);
00359 StringValue(str);
00360 rb_str_append(p->str, str);
00361 return self;
00362 }
00363
00364
00365
00366
00367
00368
00369
00370
00371
00372
00373
00374
00375
00376
00377
00378 static VALUE
00379 strscan_get_pos(VALUE self)
00380 {
00381 struct strscanner *p;
00382
00383 GET_SCANNER(self, p);
00384 return INT2FIX(p->curr);
00385 }
00386
00387
00388
00389
00390
00391
00392
00393
00394
00395
00396 static VALUE
00397 strscan_set_pos(VALUE self, VALUE v)
00398 {
00399 struct strscanner *p;
00400 long i;
00401
00402 GET_SCANNER(self, p);
00403 i = NUM2INT(v);
00404 if (i < 0) i += S_LEN(p);
00405 if (i < 0) rb_raise(rb_eRangeError, "index out of range");
00406 if (i > S_LEN(p)) rb_raise(rb_eRangeError, "index out of range");
00407 p->curr = i;
00408 return INT2NUM(i);
00409 }
00410
00411 static VALUE
00412 strscan_do_scan(VALUE self, VALUE regex, int succptr, int getstr, int headonly)
00413 {
00414 regex_t *rb_reg_prepare_re(VALUE re, VALUE str);
00415 struct strscanner *p;
00416 regex_t *re;
00417 long ret;
00418 int tmpreg;
00419
00420 Check_Type(regex, T_REGEXP);
00421 GET_SCANNER(self, p);
00422
00423 CLEAR_MATCH_STATUS(p);
00424 if (S_RESTLEN(p) < 0) {
00425 return Qnil;
00426 }
00427 re = rb_reg_prepare_re(regex, p->str);
00428 tmpreg = re != RREGEXP(regex)->ptr;
00429 if (!tmpreg) RREGEXP(regex)->usecnt++;
00430
00431 if (headonly) {
00432 ret = onig_match(re, (UChar* )CURPTR(p),
00433 (UChar* )(CURPTR(p) + S_RESTLEN(p)),
00434 (UChar* )CURPTR(p), &(p->regs), ONIG_OPTION_NONE);
00435 }
00436 else {
00437 ret = onig_search(re,
00438 (UChar* )CURPTR(p), (UChar* )(CURPTR(p) + S_RESTLEN(p)),
00439 (UChar* )CURPTR(p), (UChar* )(CURPTR(p) + S_RESTLEN(p)),
00440 &(p->regs), ONIG_OPTION_NONE);
00441 }
00442 if (!tmpreg) RREGEXP(regex)->usecnt--;
00443 if (tmpreg) {
00444 if (RREGEXP(regex)->usecnt) {
00445 onig_free(re);
00446 }
00447 else {
00448 onig_free(RREGEXP(regex)->ptr);
00449 RREGEXP(regex)->ptr = re;
00450 }
00451 }
00452
00453 if (ret == -2) rb_raise(ScanError, "regexp buffer overflow");
00454 if (ret < 0) {
00455
00456 return Qnil;
00457 }
00458
00459 MATCHED(p);
00460 p->prev = p->curr;
00461 if (succptr) {
00462 p->curr += p->regs.end[0];
00463 }
00464 if (getstr) {
00465 return extract_beg_len(p, p->prev, p->regs.end[0]);
00466 }
00467 else {
00468 return INT2FIX(p->regs.end[0]);
00469 }
00470 }
00471
00472
00473
00474
00475
00476
00477
00478
00479
00480
00481
00482
00483
00484
00485
00486
00487 static VALUE
00488 strscan_scan(VALUE self, VALUE re)
00489 {
00490 return strscan_do_scan(self, re, 1, 1, 1);
00491 }
00492
00493
00494
00495
00496
00497
00498
00499
00500
00501
00502
00503
00504 static VALUE
00505 strscan_match_p(VALUE self, VALUE re)
00506 {
00507 return strscan_do_scan(self, re, 0, 0, 1);
00508 }
00509
00510
00511
00512
00513
00514
00515
00516
00517
00518
00519
00520
00521
00522
00523
00524
00525
00526
00527 static VALUE
00528 strscan_skip(VALUE self, VALUE re)
00529 {
00530 return strscan_do_scan(self, re, 1, 0, 1);
00531 }
00532
00533
00534
00535
00536
00537
00538
00539
00540
00541
00542
00543
00544
00545
00546
00547
00548 static VALUE
00549 strscan_check(VALUE self, VALUE re)
00550 {
00551 return strscan_do_scan(self, re, 0, 1, 1);
00552 }
00553
00554
00555
00556
00557
00558
00559
00560
00561
00562
00563
00564 static VALUE
00565 strscan_scan_full(VALUE self, VALUE re, VALUE s, VALUE f)
00566 {
00567 return strscan_do_scan(self, re, RTEST(s), RTEST(f), 1);
00568 }
00569
00570
00571
00572
00573
00574
00575
00576
00577
00578
00579
00580
00581
00582 static VALUE
00583 strscan_scan_until(VALUE self, VALUE re)
00584 {
00585 return strscan_do_scan(self, re, 1, 1, 0);
00586 }
00587
00588
00589
00590
00591
00592
00593
00594
00595
00596
00597
00598
00599
00600
00601 static VALUE
00602 strscan_exist_p(VALUE self, VALUE re)
00603 {
00604 return strscan_do_scan(self, re, 0, 0, 0);
00605 }
00606
00607
00608
00609
00610
00611
00612
00613
00614
00615
00616
00617
00618
00619
00620
00621
00622
00623 static VALUE
00624 strscan_skip_until(VALUE self, VALUE re)
00625 {
00626 return strscan_do_scan(self, re, 1, 0, 0);
00627 }
00628
00629
00630
00631
00632
00633
00634
00635
00636
00637
00638
00639
00640
00641
00642 static VALUE
00643 strscan_check_until(VALUE self, VALUE re)
00644 {
00645 return strscan_do_scan(self, re, 0, 1, 0);
00646 }
00647
00648
00649
00650
00651
00652
00653
00654
00655
00656
00657 static VALUE
00658 strscan_search_full(VALUE self, VALUE re, VALUE s, VALUE f)
00659 {
00660 return strscan_do_scan(self, re, RTEST(s), RTEST(f), 0);
00661 }
00662
00663 static void
00664 adjust_registers_to_matched(struct strscanner *p)
00665 {
00666 onig_region_clear(&(p->regs));
00667 onig_region_set(&(p->regs), 0, 0, (int)(p->curr - p->prev));
00668 }
00669
00670
00671
00672
00673
00674
00675
00676
00677
00678
00679
00680
00681
00682
00683
00684 static VALUE
00685 strscan_getch(VALUE self)
00686 {
00687 struct strscanner *p;
00688 long len;
00689
00690 GET_SCANNER(self, p);
00691 CLEAR_MATCH_STATUS(p);
00692 if (EOS_P(p))
00693 return Qnil;
00694
00695 len = rb_enc_mbclen(CURPTR(p), S_PEND(p), rb_enc_get(p->str));
00696 if (p->curr + len > S_LEN(p)) {
00697 len = S_LEN(p) - p->curr;
00698 }
00699 p->prev = p->curr;
00700 p->curr += len;
00701 MATCHED(p);
00702 adjust_registers_to_matched(p);
00703 return extract_range(p, p->prev + p->regs.beg[0],
00704 p->prev + p->regs.end[0]);
00705 }
00706
00707
00708
00709
00710
00711
00712
00713
00714
00715
00716
00717
00718
00719
00720
00721
00722
00723 static VALUE
00724 strscan_get_byte(VALUE self)
00725 {
00726 struct strscanner *p;
00727
00728 GET_SCANNER(self, p);
00729 CLEAR_MATCH_STATUS(p);
00730 if (EOS_P(p))
00731 return Qnil;
00732
00733 p->prev = p->curr;
00734 p->curr++;
00735 MATCHED(p);
00736 adjust_registers_to_matched(p);
00737 return extract_range(p, p->prev + p->regs.beg[0],
00738 p->prev + p->regs.end[0]);
00739 }
00740
00741
00742
00743
00744
00745 static VALUE
00746 strscan_getbyte(VALUE self)
00747 {
00748 rb_warning("StringScanner#getbyte is obsolete; use #get_byte instead");
00749 return strscan_get_byte(self);
00750 }
00751
00752
00753
00754
00755
00756
00757
00758
00759
00760
00761
00762
00763 static VALUE
00764 strscan_peek(VALUE self, VALUE vlen)
00765 {
00766 struct strscanner *p;
00767 long len;
00768
00769 GET_SCANNER(self, p);
00770
00771 len = NUM2LONG(vlen);
00772 if (EOS_P(p))
00773 return infect(str_new(p, "", 0), p);
00774
00775 if (p->curr + len > S_LEN(p))
00776 len = S_LEN(p) - p->curr;
00777 return extract_beg_len(p, p->curr, len);
00778 }
00779
00780
00781
00782
00783
00784 static VALUE
00785 strscan_peep(VALUE self, VALUE vlen)
00786 {
00787 rb_warning("StringScanner#peep is obsolete; use #peek instead");
00788 return strscan_peek(self, vlen);
00789 }
00790
00791
00792
00793
00794
00795
00796
00797
00798
00799
00800
00801
00802 static VALUE
00803 strscan_unscan(VALUE self)
00804 {
00805 struct strscanner *p;
00806
00807 GET_SCANNER(self, p);
00808 if (! MATCHED_P(p))
00809 rb_raise(ScanError, "unscan failed: previous match record not exist");
00810 p->curr = p->prev;
00811 CLEAR_MATCH_STATUS(p);
00812 return self;
00813 }
00814
00815
00816
00817
00818
00819
00820
00821
00822
00823
00824
00825
00826
00827 static VALUE
00828 strscan_bol_p(VALUE self)
00829 {
00830 struct strscanner *p;
00831
00832 GET_SCANNER(self, p);
00833 if (CURPTR(p) > S_PEND(p)) return Qnil;
00834 if (p->curr == 0) return Qtrue;
00835 return (*(CURPTR(p) - 1) == '\n') ? Qtrue : Qfalse;
00836 }
00837
00838
00839
00840
00841
00842
00843
00844
00845
00846
00847
00848 static VALUE
00849 strscan_eos_p(VALUE self)
00850 {
00851 struct strscanner *p;
00852
00853 GET_SCANNER(self, p);
00854 return EOS_P(p) ? Qtrue : Qfalse;
00855 }
00856
00857
00858
00859
00860
00861 static VALUE
00862 strscan_empty_p(VALUE self)
00863 {
00864 rb_warning("StringScanner#empty? is obsolete; use #eos? instead");
00865 return strscan_eos_p(self);
00866 }
00867
00868
00869
00870
00871
00872
00873
00874
00875
00876 static VALUE
00877 strscan_rest_p(VALUE self)
00878 {
00879 struct strscanner *p;
00880
00881 GET_SCANNER(self, p);
00882 return EOS_P(p) ? Qfalse : Qtrue;
00883 }
00884
00885
00886
00887
00888
00889
00890
00891
00892
00893
00894 static VALUE
00895 strscan_matched_p(VALUE self)
00896 {
00897 struct strscanner *p;
00898
00899 GET_SCANNER(self, p);
00900 return MATCHED_P(p) ? Qtrue : Qfalse;
00901 }
00902
00903
00904
00905
00906
00907
00908
00909
00910 static VALUE
00911 strscan_matched(VALUE self)
00912 {
00913 struct strscanner *p;
00914
00915 GET_SCANNER(self, p);
00916 if (! MATCHED_P(p)) return Qnil;
00917 return extract_range(p, p->prev + p->regs.beg[0],
00918 p->prev + p->regs.end[0]);
00919 }
00920
00921
00922
00923
00924
00925
00926
00927
00928
00929
00930
00931 static VALUE
00932 strscan_matched_size(VALUE self)
00933 {
00934 struct strscanner *p;
00935
00936 GET_SCANNER(self, p);
00937 if (! MATCHED_P(p)) return Qnil;
00938 return INT2NUM(p->regs.end[0] - p->regs.beg[0]);
00939 }
00940
00941
00942
00943
00944
00945
00946
00947
00948
00949
00950
00951
00952
00953
00954
00955 static VALUE
00956 strscan_aref(VALUE self, VALUE idx)
00957 {
00958 struct strscanner *p;
00959 long i;
00960
00961 GET_SCANNER(self, p);
00962 if (! MATCHED_P(p)) return Qnil;
00963
00964 i = NUM2LONG(idx);
00965 if (i < 0)
00966 i += p->regs.num_regs;
00967 if (i < 0) return Qnil;
00968 if (i >= p->regs.num_regs) return Qnil;
00969 if (p->regs.beg[i] == -1) return Qnil;
00970
00971 return extract_range(p, p->prev + p->regs.beg[i],
00972 p->prev + p->regs.end[i]);
00973 }
00974
00975
00976
00977
00978
00979
00980
00981
00982
00983
00984 static VALUE
00985 strscan_pre_match(VALUE self)
00986 {
00987 struct strscanner *p;
00988
00989 GET_SCANNER(self, p);
00990 if (! MATCHED_P(p)) return Qnil;
00991 return extract_range(p, 0, p->prev + p->regs.beg[0]);
00992 }
00993
00994
00995
00996
00997
00998
00999
01000
01001
01002
01003 static VALUE
01004 strscan_post_match(VALUE self)
01005 {
01006 struct strscanner *p;
01007
01008 GET_SCANNER(self, p);
01009 if (! MATCHED_P(p)) return Qnil;
01010 return extract_range(p, p->prev + p->regs.end[0], S_LEN(p));
01011 }
01012
01013
01014
01015
01016
01017 static VALUE
01018 strscan_rest(VALUE self)
01019 {
01020 struct strscanner *p;
01021
01022 GET_SCANNER(self, p);
01023 if (EOS_P(p)) {
01024 return infect(str_new(p, "", 0), p);
01025 }
01026 return extract_range(p, p->curr, S_LEN(p));
01027 }
01028
01029
01030
01031
01032 static VALUE
01033 strscan_rest_size(VALUE self)
01034 {
01035 struct strscanner *p;
01036 long i;
01037
01038 GET_SCANNER(self, p);
01039 if (EOS_P(p)) {
01040 return INT2FIX(0);
01041 }
01042 i = S_LEN(p) - p->curr;
01043 return INT2FIX(i);
01044 }
01045
01046
01047
01048
01049
01050 static VALUE
01051 strscan_restsize(VALUE self)
01052 {
01053 rb_warning("StringScanner#restsize is obsolete; use #rest_size instead");
01054 return strscan_rest_size(self);
01055 }
01056
01057 #define INSPECT_LENGTH 5
01058 #define BUFSIZE 256
01059
01060
01061
01062
01063
01064
01065
01066
01067
01068
01069
01070
01071 static VALUE
01072 strscan_inspect(VALUE self)
01073 {
01074 struct strscanner *p;
01075 VALUE a, b;
01076
01077 Data_Get_Struct(self, struct strscanner, p);
01078 if (NIL_P(p->str)) {
01079 a = rb_sprintf("#<%"PRIsVALUE" (uninitialized)>", RB_OBJ_CLASSNAME(self));
01080 return infect(a, p);
01081 }
01082 if (EOS_P(p)) {
01083 a = rb_sprintf("#<%"PRIsVALUE" fin>", RB_OBJ_CLASSNAME(self));
01084 return infect(a, p);
01085 }
01086 if (p->curr == 0) {
01087 b = inspect2(p);
01088 a = rb_sprintf("#<%"PRIsVALUE" %ld/%ld @ %"PRIsVALUE">",
01089 RB_OBJ_CLASSNAME(self),
01090 p->curr, S_LEN(p),
01091 RB_OBJ_STRING(b));
01092 return infect(a, p);
01093 }
01094 a = inspect1(p);
01095 b = inspect2(p);
01096 a = rb_sprintf("#<%"PRIsVALUE" %ld/%ld %"PRIsVALUE" @ %"PRIsVALUE">",
01097 RB_OBJ_CLASSNAME(self),
01098 p->curr, S_LEN(p),
01099 RB_OBJ_STRING(a), RB_OBJ_STRING(b));
01100 return infect(a, p);
01101 }
01102
01103 static VALUE
01104 inspect1(struct strscanner *p)
01105 {
01106 char buf[BUFSIZE];
01107 char *bp = buf;
01108 long len;
01109
01110 if (p->curr == 0) return rb_str_new2("");
01111 if (p->curr > INSPECT_LENGTH) {
01112 strcpy(bp, "..."); bp += 3;
01113 len = INSPECT_LENGTH;
01114 }
01115 else {
01116 len = p->curr;
01117 }
01118 memcpy(bp, CURPTR(p) - len, len); bp += len;
01119 return rb_str_dump(rb_str_new(buf, bp - buf));
01120 }
01121
01122 static VALUE
01123 inspect2(struct strscanner *p)
01124 {
01125 VALUE str;
01126 long len;
01127
01128 if (EOS_P(p)) return rb_str_new2("");
01129 len = S_LEN(p) - p->curr;
01130 if (len > INSPECT_LENGTH) {
01131 str = rb_str_new(CURPTR(p), INSPECT_LENGTH);
01132 rb_str_cat2(str, "...");
01133 }
01134 else {
01135 str = rb_str_new(CURPTR(p), len);
01136 }
01137 return rb_str_dump(str);
01138 }
01139
01140
01141
01142
01143
01144
01145
01146
01147
01148
01149
01150
01151
01152
01153
01154
01155
01156
01157
01158
01159
01160
01161
01162
01163
01164
01165
01166
01167
01168
01169
01170
01171
01172
01173
01174
01175
01176
01177
01178
01179
01180
01181
01182
01183
01184
01185
01186
01187
01188
01189
01190
01191
01192
01193
01194
01195
01196
01197
01198
01199
01200
01201
01202
01203
01204
01205
01206
01207
01208
01209
01210
01211
01212
01213
01214
01215
01216
01217
01218
01219
01220
01221
01222
01223
01224
01225
01226
01227
01228
01229
01230
01231
01232
01233
01234
01235
01236
01237
01238
01239
01240
01241
01242
01243
01244
01245
01246
01247 void
01248 Init_strscan()
01249 {
01250 ID id_scanerr = rb_intern("ScanError");
01251 VALUE tmp;
01252
01253 StringScanner = rb_define_class("StringScanner", rb_cObject);
01254 ScanError = rb_define_class_under(StringScanner, "Error", rb_eStandardError);
01255 if (!rb_const_defined(rb_cObject, id_scanerr)) {
01256 rb_const_set(rb_cObject, id_scanerr, ScanError);
01257 }
01258 tmp = rb_str_new2(STRSCAN_VERSION);
01259 rb_obj_freeze(tmp);
01260 rb_const_set(StringScanner, rb_intern("Version"), tmp);
01261 tmp = rb_str_new2("$Id: strscan.c 44754 2014-01-30 03:49:07Z usa $");
01262 rb_obj_freeze(tmp);
01263 rb_const_set(StringScanner, rb_intern("Id"), tmp);
01264
01265 rb_define_alloc_func(StringScanner, strscan_s_allocate);
01266 rb_define_private_method(StringScanner, "initialize", strscan_initialize, -1);
01267 rb_define_private_method(StringScanner, "initialize_copy", strscan_init_copy, 1);
01268 rb_define_singleton_method(StringScanner, "must_C_version", strscan_s_mustc, 0);
01269 rb_define_method(StringScanner, "reset", strscan_reset, 0);
01270 rb_define_method(StringScanner, "terminate", strscan_terminate, 0);
01271 rb_define_method(StringScanner, "clear", strscan_clear, 0);
01272 rb_define_method(StringScanner, "string", strscan_get_string, 0);
01273 rb_define_method(StringScanner, "string=", strscan_set_string, 1);
01274 rb_define_method(StringScanner, "concat", strscan_concat, 1);
01275 rb_define_method(StringScanner, "<<", strscan_concat, 1);
01276 rb_define_method(StringScanner, "pos", strscan_get_pos, 0);
01277 rb_define_method(StringScanner, "pos=", strscan_set_pos, 1);
01278 rb_define_method(StringScanner, "pointer", strscan_get_pos, 0);
01279 rb_define_method(StringScanner, "pointer=", strscan_set_pos, 1);
01280
01281 rb_define_method(StringScanner, "scan", strscan_scan, 1);
01282 rb_define_method(StringScanner, "skip", strscan_skip, 1);
01283 rb_define_method(StringScanner, "match?", strscan_match_p, 1);
01284 rb_define_method(StringScanner, "check", strscan_check, 1);
01285 rb_define_method(StringScanner, "scan_full", strscan_scan_full, 3);
01286
01287 rb_define_method(StringScanner, "scan_until", strscan_scan_until, 1);
01288 rb_define_method(StringScanner, "skip_until", strscan_skip_until, 1);
01289 rb_define_method(StringScanner, "exist?", strscan_exist_p, 1);
01290 rb_define_method(StringScanner, "check_until", strscan_check_until, 1);
01291 rb_define_method(StringScanner, "search_full", strscan_search_full, 3);
01292
01293 rb_define_method(StringScanner, "getch", strscan_getch, 0);
01294 rb_define_method(StringScanner, "get_byte", strscan_get_byte, 0);
01295 rb_define_method(StringScanner, "getbyte", strscan_getbyte, 0);
01296 rb_define_method(StringScanner, "peek", strscan_peek, 1);
01297 rb_define_method(StringScanner, "peep", strscan_peep, 1);
01298
01299 rb_define_method(StringScanner, "unscan", strscan_unscan, 0);
01300
01301 rb_define_method(StringScanner, "beginning_of_line?", strscan_bol_p, 0);
01302 rb_alias(StringScanner, rb_intern("bol?"), rb_intern("beginning_of_line?"));
01303 rb_define_method(StringScanner, "eos?", strscan_eos_p, 0);
01304 rb_define_method(StringScanner, "empty?", strscan_empty_p, 0);
01305 rb_define_method(StringScanner, "rest?", strscan_rest_p, 0);
01306
01307 rb_define_method(StringScanner, "matched?", strscan_matched_p, 0);
01308 rb_define_method(StringScanner, "matched", strscan_matched, 0);
01309 rb_define_method(StringScanner, "matched_size", strscan_matched_size, 0);
01310 rb_define_method(StringScanner, "[]", strscan_aref, 1);
01311 rb_define_method(StringScanner, "pre_match", strscan_pre_match, 0);
01312 rb_define_method(StringScanner, "post_match", strscan_post_match, 0);
01313
01314 rb_define_method(StringScanner, "rest", strscan_rest, 0);
01315 rb_define_method(StringScanner, "rest_size", strscan_rest_size, 0);
01316 rb_define_method(StringScanner, "restsize", strscan_restsize, 0);
01317
01318 rb_define_method(StringScanner, "inspect", strscan_inspect, 0);
01319 }
01320