00001
00002
00003
00004
00005 #include "ruby.h"
00006 #include "ruby/encoding.h"
00007 #include "ruby/re.h"
00008 #include <ctype.h>
00009
00010 static const char *day_names[] = {
00011 "Sunday", "Monday", "Tuesday", "Wednesday",
00012 "Thursday", "Friday", "Saturday",
00013 "Sun", "Mon", "Tue", "Wed",
00014 "Thu", "Fri", "Sat"
00015 };
00016
00017 static const char *month_names[] = {
00018 "January", "February", "March", "April",
00019 "May", "June", "July", "August", "September",
00020 "October", "November", "December",
00021 "Jan", "Feb", "Mar", "Apr", "May", "Jun",
00022 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
00023 };
00024
00025 static const char *merid_names[] = {
00026 "am", "pm",
00027 "a.m.", "p.m."
00028 };
00029
00030 static const char *extz_pats[] = {
00031 ":z",
00032 "::z",
00033 ":::z"
00034 };
00035
00036 #define sizeof_array(o) (sizeof o / sizeof o[0])
00037
00038 #define f_negate(x) rb_funcall(x, rb_intern("-@"), 0)
00039 #define f_add(x,y) rb_funcall(x, '+', 1, y)
00040 #define f_sub(x,y) rb_funcall(x, '-', 1, y)
00041 #define f_mul(x,y) rb_funcall(x, '*', 1, y)
00042 #define f_div(x,y) rb_funcall(x, '/', 1, y)
00043 #define f_idiv(x,y) rb_funcall(x, rb_intern("div"), 1, y)
00044 #define f_mod(x,y) rb_funcall(x, '%', 1, y)
00045 #define f_expt(x,y) rb_funcall(x, rb_intern("**"), 1, y)
00046
00047 #define f_lt_p(x,y) rb_funcall(x, '<', 1, y)
00048 #define f_gt_p(x,y) rb_funcall(x, '>', 1, y)
00049 #define f_le_p(x,y) rb_funcall(x, rb_intern("<="), 1, y)
00050 #define f_ge_p(x,y) rb_funcall(x, rb_intern(">="), 1, y)
00051
00052 #define f_match(r,s) rb_funcall(r, rb_intern("match"), 1, s)
00053 #define f_aref(o,i) rb_funcall(o, rb_intern("[]"), 1, i)
00054 #define f_end(o,i) rb_funcall(o, rb_intern("end"), 1, i)
00055
00056 #define issign(c) ((c) == '-' || (c) == '+')
00057
00058 static int
00059 num_pattern_p(const char *s)
00060 {
00061 if (isdigit(*s))
00062 return 1;
00063 if (*s == '%') {
00064 s++;
00065 if (*s == 'E' || *s == 'O')
00066 s++;
00067 if (*s &&
00068 (strchr("CDdeFGgHIjkLlMmNQRrSsTUuVvWwXxYy", *s) || isdigit(*s)))
00069 return 1;
00070 }
00071 return 0;
00072 }
00073
00074 #define NUM_PATTERN_P() num_pattern_p(&fmt[fi + 1])
00075
00076 static long
00077 read_digits(const char *s, VALUE *n, size_t width)
00078 {
00079 size_t l;
00080
00081 l = strspn(s, "0123456789");
00082
00083 if (l == 0)
00084 return 0;
00085
00086 if (width < l)
00087 l = width;
00088
00089 if ((4 * l * sizeof(char)) <= (sizeof(long)*CHAR_BIT)) {
00090 const char *os = s;
00091 long v;
00092
00093 v = 0;
00094 while ((size_t)(s - os) < l) {
00095 v *= 10;
00096 v += *s - '0';
00097 s++;
00098 }
00099 if (os == s)
00100 return 0;
00101 *n = LONG2NUM(v);
00102 return l;
00103 }
00104 else {
00105 char *s2 = ALLOCA_N(char, l + 1);
00106 memcpy(s2, s, l);
00107 s2[l] = '\0';
00108 *n = rb_cstr_to_inum(s2, 10, 0);
00109 return l;
00110 }
00111 }
00112
00113 #define set_hash(k,v) rb_hash_aset(hash, ID2SYM(rb_intern(k)), v)
00114 #define ref_hash(k) rb_hash_aref(hash, ID2SYM(rb_intern(k)))
00115 #define del_hash(k) rb_hash_delete(hash, ID2SYM(rb_intern(k)))
00116
00117 #define fail() \
00118 { \
00119 set_hash("_fail", Qtrue); \
00120 return 0; \
00121 }
00122
00123 #define fail_p() (!NIL_P(ref_hash("_fail")))
00124
00125 #define READ_DIGITS(n,w) \
00126 { \
00127 size_t l; \
00128 l = read_digits(&str[si], &n, w); \
00129 if (l == 0) \
00130 fail(); \
00131 si += l; \
00132 }
00133
00134 #define READ_DIGITS_MAX(n) READ_DIGITS(n, LONG_MAX)
00135
00136 static int
00137 valid_range_p(VALUE v, int a, int b)
00138 {
00139 if (FIXNUM_P(v)) {
00140 int vi = FIX2INT(v);
00141 return !(vi < a || vi > b);
00142 }
00143 return !(f_lt_p(v, INT2NUM(a)) || f_gt_p(v, INT2NUM(b)));
00144 }
00145
00146 #define recur(fmt) \
00147 { \
00148 size_t l; \
00149 l = date__strptime_internal(&str[si], slen - si, \
00150 fmt, sizeof fmt - 1, hash); \
00151 if (fail_p()) \
00152 return 0; \
00153 si += l; \
00154 }
00155
00156 VALUE date_zone_to_diff(VALUE);
00157
00158 static size_t
00159 date__strptime_internal(const char *str, size_t slen,
00160 const char *fmt, size_t flen, VALUE hash)
00161 {
00162 size_t si, fi;
00163 int c;
00164
00165 si = fi = 0;
00166
00167 while (fi < flen) {
00168
00169 switch (fmt[fi]) {
00170 case '%':
00171
00172 again:
00173 fi++;
00174 c = fmt[fi];
00175
00176 switch (c) {
00177 case 'E':
00178 if (fmt[fi + 1] && strchr("cCxXyY", fmt[fi + 1]))
00179 goto again;
00180 fi--;
00181 goto ordinal;
00182 case 'O':
00183 if (fmt[fi + 1] && strchr("deHImMSuUVwWy", fmt[fi + 1]))
00184 goto again;
00185 fi--;
00186 goto ordinal;
00187 case ':':
00188 {
00189 int i;
00190
00191 for (i = 0; i < (int)sizeof_array(extz_pats); i++)
00192 if (strncmp(extz_pats[i], &fmt[fi],
00193 strlen(extz_pats[i])) == 0) {
00194 fi += i;
00195 goto again;
00196 }
00197 fail();
00198 }
00199
00200 case 'A':
00201 case 'a':
00202 {
00203 int i;
00204
00205 for (i = 0; i < (int)sizeof_array(day_names); i++) {
00206 size_t l = strlen(day_names[i]);
00207 if (strncasecmp(day_names[i], &str[si], l) == 0) {
00208 si += l;
00209 set_hash("wday", INT2FIX(i % 7));
00210 goto matched;
00211 }
00212 }
00213 fail();
00214 }
00215 case 'B':
00216 case 'b':
00217 case 'h':
00218 {
00219 int i;
00220
00221 for (i = 0; i < (int)sizeof_array(month_names); i++) {
00222 size_t l = strlen(month_names[i]);
00223 if (strncasecmp(month_names[i], &str[si], l) == 0) {
00224 si += l;
00225 set_hash("mon", INT2FIX((i % 12) + 1));
00226 goto matched;
00227 }
00228 }
00229 fail();
00230 }
00231
00232 case 'C':
00233 {
00234 VALUE n;
00235
00236 if (NUM_PATTERN_P())
00237 READ_DIGITS(n, 2)
00238 else
00239 READ_DIGITS_MAX(n)
00240 set_hash("_cent", n);
00241 goto matched;
00242 }
00243
00244 case 'c':
00245 recur("%a %b %e %H:%M:%S %Y");
00246 goto matched;
00247
00248 case 'D':
00249 recur("%m/%d/%y");
00250 goto matched;
00251
00252 case 'd':
00253 case 'e':
00254 {
00255 VALUE n;
00256
00257 if (str[si] == ' ') {
00258 si++;
00259 READ_DIGITS(n, 1);
00260 } else {
00261 READ_DIGITS(n, 2);
00262 }
00263 if (!valid_range_p(n, 1, 31))
00264 fail();
00265 set_hash("mday", n);
00266 goto matched;
00267 }
00268
00269 case 'F':
00270 recur("%Y-%m-%d");
00271 goto matched;
00272
00273 case 'G':
00274 {
00275 VALUE n;
00276
00277 if (NUM_PATTERN_P())
00278 READ_DIGITS(n, 4)
00279 else
00280 READ_DIGITS_MAX(n)
00281 set_hash("cwyear", n);
00282 goto matched;
00283 }
00284
00285 case 'g':
00286 {
00287 VALUE n;
00288
00289 READ_DIGITS(n, 2);
00290 if (!valid_range_p(n, 0, 99))
00291 fail();
00292 set_hash("cwyear",n);
00293 set_hash("_cent",
00294 INT2FIX(f_ge_p(n, INT2FIX(69)) ? 19 : 20));
00295 goto matched;
00296 }
00297
00298 case 'H':
00299 case 'k':
00300 {
00301 VALUE n;
00302
00303 if (str[si] == ' ') {
00304 si++;
00305 READ_DIGITS(n, 1);
00306 } else {
00307 READ_DIGITS(n, 2);
00308 }
00309 if (!valid_range_p(n, 0, 24))
00310 fail();
00311 set_hash("hour", n);
00312 goto matched;
00313 }
00314
00315 case 'I':
00316 case 'l':
00317 {
00318 VALUE n;
00319
00320 if (str[si] == ' ') {
00321 si++;
00322 READ_DIGITS(n, 1);
00323 } else {
00324 READ_DIGITS(n, 2);
00325 }
00326 if (!valid_range_p(n, 1, 12))
00327 fail();
00328 set_hash("hour", n);
00329 goto matched;
00330 }
00331
00332 case 'j':
00333 {
00334 VALUE n;
00335
00336 READ_DIGITS(n, 3);
00337 if (!valid_range_p(n, 1, 366))
00338 fail();
00339 set_hash("yday", n);
00340 goto matched;
00341 }
00342
00343 case 'L':
00344 case 'N':
00345 {
00346 VALUE n;
00347 int sign = 1;
00348 size_t osi;
00349
00350 if (issign(str[si])) {
00351 if (str[si] == '-')
00352 sign = -1;
00353 si++;
00354 }
00355 osi = si;
00356 if (NUM_PATTERN_P())
00357 READ_DIGITS(n, c == 'L' ? 3 : 9)
00358 else
00359 READ_DIGITS_MAX(n)
00360 if (sign == -1)
00361 n = f_negate(n);
00362 set_hash("sec_fraction",
00363 rb_rational_new2(n,
00364 f_expt(INT2FIX(10),
00365 ULONG2NUM(si - osi))));
00366 goto matched;
00367 }
00368
00369 case 'M':
00370 {
00371 VALUE n;
00372
00373 READ_DIGITS(n, 2);
00374 if (!valid_range_p(n, 0, 59))
00375 fail();
00376 set_hash("min", n);
00377 goto matched;
00378 }
00379
00380 case 'm':
00381 {
00382 VALUE n;
00383
00384 READ_DIGITS(n, 2);
00385 if (!valid_range_p(n, 1, 12))
00386 fail();
00387 set_hash("mon", n);
00388 goto matched;
00389 }
00390
00391 case 'n':
00392 case 't':
00393 recur(" ");
00394 goto matched;
00395
00396 case 'P':
00397 case 'p':
00398 {
00399 int i;
00400
00401 for (i = 0; i < 4; i++) {
00402 size_t l = strlen(merid_names[i]);
00403 if (strncasecmp(merid_names[i], &str[si], l) == 0) {
00404 si += l;
00405 set_hash("_merid", INT2FIX((i % 2) == 0 ? 0 : 12));
00406 goto matched;
00407 }
00408 }
00409 fail();
00410 }
00411
00412 case 'Q':
00413 {
00414 VALUE n;
00415 int sign = 1;
00416
00417 if (str[si] == '-') {
00418 sign = -1;
00419 si++;
00420 }
00421 READ_DIGITS_MAX(n);
00422 if (sign == -1)
00423 n = f_negate(n);
00424 set_hash("seconds",
00425 rb_rational_new2(n,
00426 f_expt(INT2FIX(10),
00427 INT2FIX(3))));
00428 goto matched;
00429 }
00430
00431 case 'R':
00432 recur("%H:%M");
00433 goto matched;
00434
00435 case 'r':
00436 recur("%I:%M:%S %p");
00437 goto matched;
00438
00439 case 'S':
00440 {
00441 VALUE n;
00442
00443 READ_DIGITS(n, 2);
00444 if (!valid_range_p(n, 0, 60))
00445 fail();
00446 set_hash("sec", n);
00447 goto matched;
00448 }
00449
00450 case 's':
00451 {
00452 VALUE n;
00453 int sign = 1;
00454
00455 if (str[si] == '-') {
00456 sign = -1;
00457 si++;
00458 }
00459 READ_DIGITS_MAX(n);
00460 if (sign == -1)
00461 n = f_negate(n);
00462 set_hash("seconds", n);
00463 goto matched;
00464 }
00465
00466 case 'T':
00467 recur("%H:%M:%S");
00468 goto matched;
00469
00470 case 'U':
00471 case 'W':
00472 {
00473 VALUE n;
00474
00475 READ_DIGITS(n, 2);
00476 if (!valid_range_p(n, 0, 53))
00477 fail();
00478 set_hash(c == 'U' ? "wnum0" : "wnum1", n);
00479 goto matched;
00480 }
00481
00482 case 'u':
00483 {
00484 VALUE n;
00485
00486 READ_DIGITS(n, 1);
00487 if (!valid_range_p(n, 1, 7))
00488 fail();
00489 set_hash("cwday", n);
00490 goto matched;
00491 }
00492
00493 case 'V':
00494 {
00495 VALUE n;
00496
00497 READ_DIGITS(n, 2);
00498 if (!valid_range_p(n, 1, 53))
00499 fail();
00500 set_hash("cweek", n);
00501 goto matched;
00502 }
00503
00504 case 'v':
00505 recur("%e-%b-%Y");
00506 goto matched;
00507
00508 case 'w':
00509 {
00510 VALUE n;
00511
00512 READ_DIGITS(n, 1);
00513 if (!valid_range_p(n, 0, 6))
00514 fail();
00515 set_hash("wday", n);
00516 goto matched;
00517 }
00518
00519 case 'X':
00520 recur("%H:%M:%S");
00521 goto matched;
00522
00523 case 'x':
00524 recur("%m/%d/%y");
00525 goto matched;
00526
00527 case 'Y':
00528 {
00529 VALUE n;
00530 int sign = 1;
00531
00532 if (issign(str[si])) {
00533 if (str[si] == '-')
00534 sign = -1;
00535 si++;
00536 }
00537 if (NUM_PATTERN_P())
00538 READ_DIGITS(n, 4)
00539 else
00540 READ_DIGITS_MAX(n)
00541 if (sign == -1)
00542 n = f_negate(n);
00543 set_hash("year", n);
00544 goto matched;
00545 }
00546
00547 case 'y':
00548 {
00549 VALUE n;
00550 int sign = 1;
00551
00552 READ_DIGITS(n, 2);
00553 if (!valid_range_p(n, 0, 99))
00554 fail();
00555 if (sign == -1)
00556 n = f_negate(n);
00557 set_hash("year", n);
00558 set_hash("_cent",
00559 INT2FIX(f_ge_p(n, INT2FIX(69)) ? 19 : 20));
00560 goto matched;
00561 }
00562
00563 case 'Z':
00564 case 'z':
00565 {
00566 static const char pat_source[] =
00567 "\\A("
00568 "(?:gmt|utc?)?[-+]\\d+(?:[,.:]\\d+(?::\\d+)?)?"
00569 "|(?-i:[[:alpha:].\\s]+)(?:standard|daylight)\\s+time\\b"
00570 "|(?-i:[[:alpha:]]+)(?:\\s+dst)?\\b"
00571 ")";
00572 static VALUE pat = Qnil;
00573 VALUE m, b;
00574
00575 if (NIL_P(pat)) {
00576 pat = rb_reg_new(pat_source, sizeof pat_source - 1,
00577 ONIG_OPTION_IGNORECASE);
00578 rb_gc_register_mark_object(pat);
00579 }
00580
00581 b = rb_backref_get();
00582 rb_match_busy(b);
00583 m = f_match(pat, rb_usascii_str_new2(&str[si]));
00584
00585 if (!NIL_P(m)) {
00586 VALUE s, l, o;
00587
00588 s = rb_reg_nth_match(1, m);
00589 l = f_end(m, INT2FIX(0));
00590 o = date_zone_to_diff(s);
00591 si += NUM2LONG(l);
00592 set_hash("zone", s);
00593 set_hash("offset", o);
00594 rb_backref_set(b);
00595 goto matched;
00596 }
00597 rb_backref_set(b);
00598 fail();
00599 }
00600
00601 case '%':
00602 if (str[si] != '%')
00603 fail();
00604 si++;
00605 goto matched;
00606
00607 case '+':
00608 recur("%a %b %e %H:%M:%S %Z %Y");
00609 goto matched;
00610
00611 default:
00612 if (str[si] != '%')
00613 fail();
00614 si++;
00615 if (fi < flen)
00616 if (str[si] != fmt[fi])
00617 fail();
00618 si++;
00619 goto matched;
00620 }
00621 case ' ':
00622 case '\t':
00623 case '\n':
00624 case '\v':
00625 case '\f':
00626 case '\r':
00627 while (isspace(str[si]))
00628 si++;
00629 fi++;
00630 break;
00631 default:
00632 ordinal:
00633 if (str[si] != fmt[fi])
00634 fail();
00635 si++;
00636 fi++;
00637 break;
00638 matched:
00639 fi++;
00640 break;
00641 }
00642 }
00643
00644 return si;
00645 }
00646
00647 VALUE
00648 date__strptime(const char *str, size_t slen,
00649 const char *fmt, size_t flen, VALUE hash)
00650 {
00651 size_t si;
00652 VALUE cent, merid;
00653
00654 si = date__strptime_internal(str, slen, fmt, flen, hash);
00655
00656 if (slen > si) {
00657 VALUE s;
00658
00659 s = rb_usascii_str_new(&str[si], slen - si);
00660 set_hash("leftover", s);
00661 }
00662
00663 if (fail_p())
00664 return Qnil;
00665
00666 cent = ref_hash("_cent");
00667 if (!NIL_P(cent)) {
00668 VALUE year;
00669
00670 year = ref_hash("cwyear");
00671 if (!NIL_P(year))
00672 set_hash("cwyear", f_add(year, f_mul(cent, INT2FIX(100))));
00673 year = ref_hash("year");
00674 if (!NIL_P(year))
00675 set_hash("year", f_add(year, f_mul(cent, INT2FIX(100))));
00676 del_hash("_cent");
00677 }
00678
00679 merid = ref_hash("_merid");
00680 if (!NIL_P(merid)) {
00681 VALUE hour;
00682
00683 hour = ref_hash("hour");
00684 if (!NIL_P(hour)) {
00685 hour = f_mod(hour, INT2FIX(12));
00686 set_hash("hour", f_add(hour, merid));
00687 }
00688 del_hash("_merid");
00689 }
00690
00691 return hash;
00692 }
00693
00694
00695
00696
00697
00698
00699