00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012 #include "ruby/ruby.h"
00013 #include "ruby/encoding.h"
00014 #include "internal.h"
00015 #include "regenc.h"
00016 #include <ctype.h>
00017 #ifndef NO_LOCALE_CHARMAP
00018 #ifdef __CYGWIN__
00019 #include <windows.h>
00020 #endif
00021 #ifdef HAVE_LANGINFO_H
00022 #include <langinfo.h>
00023 #endif
00024 #endif
00025 #include "ruby/util.h"
00026
00027 #if defined __GNUC__ && __GNUC__ >= 4
00028 #pragma GCC visibility push(default)
00029 int rb_enc_register(const char *name, rb_encoding *encoding);
00030 void rb_enc_set_base(const char *name, const char *orig);
00031 void rb_encdb_declare(const char *name);
00032 int rb_encdb_replicate(const char *name, const char *orig);
00033 int rb_encdb_dummy(const char *name);
00034 int rb_encdb_alias(const char *alias, const char *orig);
00035 #pragma GCC visibility pop
00036 #endif
00037
00038 static ID id_encoding;
00039 VALUE rb_cEncoding;
00040 static VALUE rb_encoding_list;
00041
00042 struct rb_encoding_entry {
00043 const char *name;
00044 rb_encoding *enc;
00045 rb_encoding *base;
00046 };
00047
00048 static struct {
00049 struct rb_encoding_entry *list;
00050 int count;
00051 int size;
00052 st_table *names;
00053 } enc_table;
00054
00055 void rb_enc_init(void);
00056
00057 #define ENCODING_COUNT ENCINDEX_BUILTIN_MAX
00058 #define UNSPECIFIED_ENCODING INT_MAX
00059
00060 #define ENCODING_NAMELEN_MAX 63
00061 #define valid_encoding_name_p(name) ((name) && strlen(name) <= ENCODING_NAMELEN_MAX)
00062
00063 #define enc_autoload_p(enc) (!rb_enc_mbmaxlen(enc))
00064
00065 static int load_encoding(const char *name);
00066
00067 static size_t
00068 enc_memsize(const void *p)
00069 {
00070 return 0;
00071 }
00072
00073 static const rb_data_type_t encoding_data_type = {
00074 "encoding",
00075 {0, 0, enc_memsize,},
00076 };
00077
00078 #define is_data_encoding(obj) (RTYPEDDATA_P(obj) && RTYPEDDATA_TYPE(obj) == &encoding_data_type)
00079
00080 static VALUE
00081 enc_new(rb_encoding *encoding)
00082 {
00083 return TypedData_Wrap_Struct(rb_cEncoding, &encoding_data_type, encoding);
00084 }
00085
00086 static VALUE
00087 rb_enc_from_encoding_index(int idx)
00088 {
00089 VALUE list, enc;
00090
00091 if (!(list = rb_encoding_list)) {
00092 rb_bug("rb_enc_from_encoding_index(%d): no rb_encoding_list", idx);
00093 }
00094 enc = rb_ary_entry(list, idx);
00095 if (NIL_P(enc)) {
00096 rb_bug("rb_enc_from_encoding_index(%d): not created yet", idx);
00097 }
00098 return enc;
00099 }
00100
00101 VALUE
00102 rb_enc_from_encoding(rb_encoding *encoding)
00103 {
00104 int idx;
00105 if (!encoding) return Qnil;
00106 idx = ENC_TO_ENCINDEX(encoding);
00107 return rb_enc_from_encoding_index(idx);
00108 }
00109
00110 static int enc_autoload(rb_encoding *);
00111
00112 static int
00113 check_encoding(rb_encoding *enc)
00114 {
00115 int index = rb_enc_to_index(enc);
00116 if (rb_enc_from_index(index) != enc)
00117 return -1;
00118 if (enc_autoload_p(enc)) {
00119 index = enc_autoload(enc);
00120 }
00121 return index;
00122 }
00123
00124 static int
00125 enc_check_encoding(VALUE obj)
00126 {
00127 if (SPECIAL_CONST_P(obj) || !rb_typeddata_is_kind_of(obj, &encoding_data_type)) {
00128 return -1;
00129 }
00130 return check_encoding(RDATA(obj)->data);
00131 }
00132
00133 static int
00134 must_encoding(VALUE enc)
00135 {
00136 int index = enc_check_encoding(enc);
00137 if (index < 0) {
00138 rb_raise(rb_eTypeError, "wrong argument type %s (expected Encoding)",
00139 rb_obj_classname(enc));
00140 }
00141 return index;
00142 }
00143
00144 int
00145 rb_to_encoding_index(VALUE enc)
00146 {
00147 int idx;
00148
00149 idx = enc_check_encoding(enc);
00150 if (idx >= 0) {
00151 return idx;
00152 }
00153 else if (NIL_P(enc = rb_check_string_type(enc))) {
00154 return -1;
00155 }
00156 if (!rb_enc_asciicompat(rb_enc_get(enc))) {
00157 return -1;
00158 }
00159 return rb_enc_find_index(StringValueCStr(enc));
00160 }
00161
00162 static rb_encoding *
00163 to_encoding(VALUE enc)
00164 {
00165 int idx;
00166
00167 StringValue(enc);
00168 if (!rb_enc_asciicompat(rb_enc_get(enc))) {
00169 rb_raise(rb_eArgError, "invalid name encoding (non ASCII)");
00170 }
00171 idx = rb_enc_find_index(StringValueCStr(enc));
00172 if (idx < 0) {
00173 rb_raise(rb_eArgError, "unknown encoding name - %s", RSTRING_PTR(enc));
00174 }
00175 return rb_enc_from_index(idx);
00176 }
00177
00178 rb_encoding *
00179 rb_to_encoding(VALUE enc)
00180 {
00181 if (enc_check_encoding(enc) >= 0) return RDATA(enc)->data;
00182 return to_encoding(enc);
00183 }
00184
00185 void
00186 rb_gc_mark_encodings(void)
00187 {
00188 }
00189
00190 static int
00191 enc_table_expand(int newsize)
00192 {
00193 struct rb_encoding_entry *ent;
00194 int count = newsize;
00195
00196 if (enc_table.size >= newsize) return newsize;
00197 newsize = (newsize + 7) / 8 * 8;
00198 ent = realloc(enc_table.list, sizeof(*enc_table.list) * newsize);
00199 if (!ent) return -1;
00200 memset(ent + enc_table.size, 0, sizeof(*ent)*(newsize - enc_table.size));
00201 enc_table.list = ent;
00202 enc_table.size = newsize;
00203 return count;
00204 }
00205
00206 static int
00207 enc_register_at(int index, const char *name, rb_encoding *encoding)
00208 {
00209 struct rb_encoding_entry *ent = &enc_table.list[index];
00210 VALUE list;
00211
00212 if (!valid_encoding_name_p(name)) return -1;
00213 if (!ent->name) {
00214 ent->name = name = strdup(name);
00215 }
00216 else if (STRCASECMP(name, ent->name)) {
00217 return -1;
00218 }
00219 if (!ent->enc) {
00220 ent->enc = xmalloc(sizeof(rb_encoding));
00221 }
00222 if (encoding) {
00223 *ent->enc = *encoding;
00224 }
00225 else {
00226 memset(ent->enc, 0, sizeof(*ent->enc));
00227 }
00228 encoding = ent->enc;
00229 encoding->name = name;
00230 encoding->ruby_encoding_index = index;
00231 st_insert(enc_table.names, (st_data_t)name, (st_data_t)index);
00232 list = rb_encoding_list;
00233 if (list && NIL_P(rb_ary_entry(list, index))) {
00234
00235 rb_ary_store(list, index, enc_new(encoding));
00236 }
00237 return index;
00238 }
00239
00240 static int
00241 enc_register(const char *name, rb_encoding *encoding)
00242 {
00243 int index = enc_table.count;
00244
00245 if ((index = enc_table_expand(index + 1)) < 0) return -1;
00246 enc_table.count = index;
00247 return enc_register_at(index - 1, name, encoding);
00248 }
00249
00250 static void set_encoding_const(const char *, rb_encoding *);
00251 int rb_enc_registered(const char *name);
00252
00253 int
00254 rb_enc_register(const char *name, rb_encoding *encoding)
00255 {
00256 int index = rb_enc_registered(name);
00257
00258 if (index >= 0) {
00259 rb_encoding *oldenc = rb_enc_from_index(index);
00260 if (STRCASECMP(name, rb_enc_name(oldenc))) {
00261 index = enc_register(name, encoding);
00262 }
00263 else if (enc_autoload_p(oldenc) || !ENC_DUMMY_P(oldenc)) {
00264 enc_register_at(index, name, encoding);
00265 }
00266 else {
00267 rb_raise(rb_eArgError, "encoding %s is already registered", name);
00268 }
00269 }
00270 else {
00271 index = enc_register(name, encoding);
00272 set_encoding_const(name, rb_enc_from_index(index));
00273 }
00274 return index;
00275 }
00276
00277 void
00278 rb_encdb_declare(const char *name)
00279 {
00280 int idx = rb_enc_registered(name);
00281 if (idx < 0) {
00282 idx = enc_register(name, 0);
00283 }
00284 set_encoding_const(name, rb_enc_from_index(idx));
00285 }
00286
00287 static void
00288 enc_check_duplication(const char *name)
00289 {
00290 if (rb_enc_registered(name) >= 0) {
00291 rb_raise(rb_eArgError, "encoding %s is already registered", name);
00292 }
00293 }
00294
00295 static rb_encoding*
00296 set_base_encoding(int index, rb_encoding *base)
00297 {
00298 rb_encoding *enc = enc_table.list[index].enc;
00299
00300 enc_table.list[index].base = base;
00301 if (rb_enc_dummy_p(base)) ENC_SET_DUMMY(enc);
00302 return enc;
00303 }
00304
00305
00306
00307
00308
00309 void
00310 rb_enc_set_base(const char *name, const char *orig)
00311 {
00312 int idx = rb_enc_registered(name);
00313 int origidx = rb_enc_registered(orig);
00314 set_base_encoding(idx, rb_enc_from_index(origidx));
00315 }
00316
00317 int
00318 rb_enc_replicate(const char *name, rb_encoding *encoding)
00319 {
00320 int idx;
00321
00322 enc_check_duplication(name);
00323 idx = enc_register(name, encoding);
00324 set_base_encoding(idx, encoding);
00325 set_encoding_const(name, rb_enc_from_index(idx));
00326 return idx;
00327 }
00328
00329
00330
00331
00332
00333
00334
00335
00336
00337
00338 static VALUE
00339 enc_replicate(VALUE encoding, VALUE name)
00340 {
00341 return rb_enc_from_encoding_index(
00342 rb_enc_replicate(StringValueCStr(name),
00343 rb_to_encoding(encoding)));
00344 }
00345
00346 static int
00347 enc_replicate_with_index(const char *name, rb_encoding *origenc, int idx)
00348 {
00349 if (idx < 0) {
00350 idx = enc_register(name, origenc);
00351 }
00352 else {
00353 idx = enc_register_at(idx, name, origenc);
00354 }
00355 if (idx >= 0) {
00356 set_base_encoding(idx, origenc);
00357 set_encoding_const(name, rb_enc_from_index(idx));
00358 }
00359 return idx;
00360 }
00361
00362 int
00363 rb_encdb_replicate(const char *name, const char *orig)
00364 {
00365 int origidx = rb_enc_registered(orig);
00366 int idx = rb_enc_registered(name);
00367
00368 if (origidx < 0) {
00369 origidx = enc_register(orig, 0);
00370 }
00371 return enc_replicate_with_index(name, rb_enc_from_index(origidx), idx);
00372 }
00373
00374 int
00375 rb_define_dummy_encoding(const char *name)
00376 {
00377 int index = rb_enc_replicate(name, rb_ascii8bit_encoding());
00378 rb_encoding *enc = enc_table.list[index].enc;
00379
00380 ENC_SET_DUMMY(enc);
00381 return index;
00382 }
00383
00384 int
00385 rb_encdb_dummy(const char *name)
00386 {
00387 int index = enc_replicate_with_index(name, rb_ascii8bit_encoding(),
00388 rb_enc_registered(name));
00389 rb_encoding *enc = enc_table.list[index].enc;
00390
00391 ENC_SET_DUMMY(enc);
00392 return index;
00393 }
00394
00395
00396
00397
00398
00399
00400
00401
00402
00403
00404
00405
00406
00407
00408 static VALUE
00409 enc_dummy_p(VALUE enc)
00410 {
00411 return ENC_DUMMY_P(enc_table.list[must_encoding(enc)].enc) ? Qtrue : Qfalse;
00412 }
00413
00414
00415
00416
00417
00418
00419
00420
00421
00422
00423
00424 static VALUE
00425 enc_ascii_compatible_p(VALUE enc)
00426 {
00427 return rb_enc_asciicompat(enc_table.list[must_encoding(enc)].enc) ? Qtrue : Qfalse;
00428 }
00429
00430
00431
00432
00433 int
00434 rb_enc_unicode_p(rb_encoding *enc)
00435 {
00436 const char *name = rb_enc_name(enc);
00437 return name[0] == 'U' && name[1] == 'T' && name[2] == 'F' && name[4] != '7';
00438 }
00439
00440
00441
00442
00443
00444 static int
00445 enc_alias_internal(const char *alias, int idx)
00446 {
00447 return st_insert2(enc_table.names, (st_data_t)alias, (st_data_t)idx,
00448 (st_data_t(*)(st_data_t))strdup);
00449 }
00450
00451 static int
00452 enc_alias(const char *alias, int idx)
00453 {
00454 if (!valid_encoding_name_p(alias)) return -1;
00455 if (!enc_alias_internal(alias, idx))
00456 set_encoding_const(alias, rb_enc_from_index(idx));
00457 return idx;
00458 }
00459
00460 int
00461 rb_enc_alias(const char *alias, const char *orig)
00462 {
00463 int idx;
00464
00465 enc_check_duplication(alias);
00466 if (!enc_table.list) {
00467 rb_enc_init();
00468 }
00469 if ((idx = rb_enc_find_index(orig)) < 0) {
00470 return -1;
00471 }
00472 return enc_alias(alias, idx);
00473 }
00474
00475 int
00476 rb_encdb_alias(const char *alias, const char *orig)
00477 {
00478 int idx = rb_enc_registered(orig);
00479
00480 if (idx < 0) {
00481 idx = enc_register(orig, 0);
00482 }
00483 return enc_alias(alias, idx);
00484 }
00485
00486 enum {
00487 ENCINDEX_ASCII,
00488 ENCINDEX_UTF_8,
00489 ENCINDEX_US_ASCII,
00490 ENCINDEX_BUILTIN_MAX
00491 };
00492
00493 extern rb_encoding OnigEncodingUTF_8;
00494 extern rb_encoding OnigEncodingUS_ASCII;
00495
00496 void
00497 rb_enc_init(void)
00498 {
00499 enc_table_expand(ENCODING_COUNT + 1);
00500 if (!enc_table.names) {
00501 enc_table.names = st_init_strcasetable();
00502 }
00503 #define ENC_REGISTER(enc) enc_register_at(ENCINDEX_##enc, rb_enc_name(&OnigEncoding##enc), &OnigEncoding##enc)
00504 ENC_REGISTER(ASCII);
00505 ENC_REGISTER(UTF_8);
00506 ENC_REGISTER(US_ASCII);
00507 #undef ENC_REGISTER
00508 enc_table.count = ENCINDEX_BUILTIN_MAX;
00509 }
00510
00511 rb_encoding *
00512 rb_enc_from_index(int index)
00513 {
00514 if (!enc_table.list) {
00515 rb_enc_init();
00516 }
00517 if (index < 0 || enc_table.count <= index) {
00518 return 0;
00519 }
00520 return enc_table.list[index].enc;
00521 }
00522
00523 int
00524 rb_enc_registered(const char *name)
00525 {
00526 st_data_t idx = 0;
00527
00528 if (!name) return -1;
00529 if (!enc_table.list) return -1;
00530 if (st_lookup(enc_table.names, (st_data_t)name, &idx)) {
00531 return (int)idx;
00532 }
00533 return -1;
00534 }
00535
00536 static VALUE
00537 require_enc(VALUE enclib)
00538 {
00539 int safe = rb_safe_level();
00540 return rb_require_safe(enclib, safe > 3 ? 3 : safe);
00541 }
00542
00543 static int
00544 load_encoding(const char *name)
00545 {
00546 VALUE enclib = rb_sprintf("enc/%s.so", name);
00547 VALUE verbose = ruby_verbose;
00548 VALUE debug = ruby_debug;
00549 VALUE loaded;
00550 char *s = RSTRING_PTR(enclib) + 4, *e = RSTRING_END(enclib) - 3;
00551 int idx;
00552
00553 while (s < e) {
00554 if (!ISALNUM(*s)) *s = '_';
00555 else if (ISUPPER(*s)) *s = TOLOWER(*s);
00556 ++s;
00557 }
00558 FL_UNSET(enclib, FL_TAINT|FL_UNTRUSTED);
00559 OBJ_FREEZE(enclib);
00560 ruby_verbose = Qfalse;
00561 ruby_debug = Qfalse;
00562 loaded = rb_protect(require_enc, enclib, 0);
00563 ruby_verbose = verbose;
00564 ruby_debug = debug;
00565 rb_set_errinfo(Qnil);
00566 if (NIL_P(loaded)) return -1;
00567 if ((idx = rb_enc_registered(name)) < 0) return -1;
00568 if (enc_autoload_p(enc_table.list[idx].enc)) return -1;
00569 return idx;
00570 }
00571
00572 static int
00573 enc_autoload(rb_encoding *enc)
00574 {
00575 int i;
00576 rb_encoding *base = enc_table.list[ENC_TO_ENCINDEX(enc)].base;
00577
00578 if (base) {
00579 i = 0;
00580 do {
00581 if (i >= enc_table.count) return -1;
00582 } while (enc_table.list[i].enc != base && (++i, 1));
00583 if (enc_autoload_p(base)) {
00584 if (enc_autoload(base) < 0) return -1;
00585 }
00586 i = ENC_TO_ENCINDEX(enc);
00587 enc_register_at(i, rb_enc_name(enc), base);
00588 }
00589 else {
00590 i = load_encoding(rb_enc_name(enc));
00591 }
00592 return i;
00593 }
00594
00595 int
00596 rb_enc_find_index(const char *name)
00597 {
00598 int i = rb_enc_registered(name);
00599 rb_encoding *enc;
00600
00601 if (i < 0) {
00602 i = load_encoding(name);
00603 }
00604 else if (!(enc = rb_enc_from_index(i))) {
00605 if (i != UNSPECIFIED_ENCODING) {
00606 rb_raise(rb_eArgError, "encoding %s is not registered", name);
00607 }
00608 }
00609 else if (enc_autoload_p(enc)) {
00610 if (enc_autoload(enc) < 0) {
00611 rb_warn("failed to load encoding (%s); use ASCII-8BIT instead",
00612 name);
00613 return 0;
00614 }
00615 }
00616 return i;
00617 }
00618
00619 rb_encoding *
00620 rb_enc_find(const char *name)
00621 {
00622 int idx = rb_enc_find_index(name);
00623 if (idx < 0) idx = 0;
00624 return rb_enc_from_index(idx);
00625 }
00626
00627 static inline int
00628 enc_capable(VALUE obj)
00629 {
00630 if (SPECIAL_CONST_P(obj)) return SYMBOL_P(obj);
00631 switch (BUILTIN_TYPE(obj)) {
00632 case T_STRING:
00633 case T_REGEXP:
00634 case T_FILE:
00635 return TRUE;
00636 case T_DATA:
00637 if (is_data_encoding(obj)) return TRUE;
00638 default:
00639 return FALSE;
00640 }
00641 }
00642
00643 ID
00644 rb_id_encoding(void)
00645 {
00646 CONST_ID(id_encoding, "encoding");
00647 return id_encoding;
00648 }
00649
00650 int
00651 rb_enc_get_index(VALUE obj)
00652 {
00653 int i = -1;
00654 VALUE tmp;
00655
00656 if (SPECIAL_CONST_P(obj)) {
00657 if (!SYMBOL_P(obj)) return -1;
00658 obj = rb_id2str(SYM2ID(obj));
00659 }
00660 switch (BUILTIN_TYPE(obj)) {
00661 as_default:
00662 default:
00663 case T_STRING:
00664 case T_REGEXP:
00665 i = ENCODING_GET_INLINED(obj);
00666 if (i == ENCODING_INLINE_MAX) {
00667 VALUE iv;
00668
00669 iv = rb_ivar_get(obj, rb_id_encoding());
00670 i = NUM2INT(iv);
00671 }
00672 break;
00673 case T_FILE:
00674 tmp = rb_funcall(obj, rb_intern("internal_encoding"), 0, 0);
00675 if (NIL_P(tmp)) obj = rb_funcall(obj, rb_intern("external_encoding"), 0, 0);
00676 else obj = tmp;
00677 if (NIL_P(obj)) break;
00678 case T_DATA:
00679 if (is_data_encoding(obj)) {
00680 i = enc_check_encoding(obj);
00681 }
00682 else {
00683 goto as_default;
00684 }
00685 break;
00686 }
00687 return i;
00688 }
00689
00690 static void
00691 enc_set_index(VALUE obj, int idx)
00692 {
00693 if (idx < ENCODING_INLINE_MAX) {
00694 ENCODING_SET_INLINED(obj, idx);
00695 return;
00696 }
00697 ENCODING_SET_INLINED(obj, ENCODING_INLINE_MAX);
00698 rb_ivar_set(obj, rb_id_encoding(), INT2NUM(idx));
00699 }
00700
00701 void
00702 rb_enc_set_index(VALUE obj, int idx)
00703 {
00704 rb_check_frozen(obj);
00705 enc_set_index(obj, idx);
00706 }
00707
00708 VALUE
00709 rb_enc_associate_index(VALUE obj, int idx)
00710 {
00711
00712 rb_check_frozen(obj);
00713 if (rb_enc_get_index(obj) == idx)
00714 return obj;
00715 if (SPECIAL_CONST_P(obj)) {
00716 rb_raise(rb_eArgError, "cannot set encoding");
00717 }
00718 if (!ENC_CODERANGE_ASCIIONLY(obj) ||
00719 !rb_enc_asciicompat(rb_enc_from_index(idx))) {
00720 ENC_CODERANGE_CLEAR(obj);
00721 }
00722 enc_set_index(obj, idx);
00723 return obj;
00724 }
00725
00726 VALUE
00727 rb_enc_associate(VALUE obj, rb_encoding *enc)
00728 {
00729 return rb_enc_associate_index(obj, rb_enc_to_index(enc));
00730 }
00731
00732 rb_encoding*
00733 rb_enc_get(VALUE obj)
00734 {
00735 return rb_enc_from_index(rb_enc_get_index(obj));
00736 }
00737
00738 rb_encoding*
00739 rb_enc_check(VALUE str1, VALUE str2)
00740 {
00741 rb_encoding *enc = rb_enc_compatible(str1, str2);
00742 if (!enc)
00743 rb_raise(rb_eEncCompatError, "incompatible character encodings: %s and %s",
00744 rb_enc_name(rb_enc_get(str1)),
00745 rb_enc_name(rb_enc_get(str2)));
00746 return enc;
00747 }
00748
00749 rb_encoding*
00750 rb_enc_compatible(VALUE str1, VALUE str2)
00751 {
00752 int idx1, idx2;
00753 rb_encoding *enc1, *enc2;
00754 int isstr1, isstr2;
00755
00756 idx1 = rb_enc_get_index(str1);
00757 idx2 = rb_enc_get_index(str2);
00758
00759 if (idx1 < 0 || idx2 < 0)
00760 return 0;
00761
00762 if (idx1 == idx2) {
00763 return rb_enc_from_index(idx1);
00764 }
00765 enc1 = rb_enc_from_index(idx1);
00766 enc2 = rb_enc_from_index(idx2);
00767
00768 isstr2 = RB_TYPE_P(str2, T_STRING);
00769 if (isstr2 && RSTRING_LEN(str2) == 0)
00770 return enc1;
00771 isstr1 = RB_TYPE_P(str1, T_STRING);
00772 if (isstr1 && RSTRING_LEN(str1) == 0)
00773 return (rb_enc_asciicompat(enc1) && rb_enc_str_asciionly_p(str2)) ? enc1 : enc2;
00774 if (!rb_enc_asciicompat(enc1) || !rb_enc_asciicompat(enc2)) {
00775 return 0;
00776 }
00777
00778
00779 if (!isstr2 && idx2 == ENCINDEX_US_ASCII)
00780 return enc1;
00781 if (!isstr1 && idx1 == ENCINDEX_US_ASCII)
00782 return enc2;
00783
00784 if (!isstr1) {
00785 VALUE tmp = str1;
00786 int idx0 = idx1;
00787 str1 = str2;
00788 str2 = tmp;
00789 idx1 = idx2;
00790 idx2 = idx0;
00791 idx0 = isstr1;
00792 isstr1 = isstr2;
00793 isstr2 = idx0;
00794 }
00795 if (isstr1) {
00796 int cr1, cr2;
00797
00798 cr1 = rb_enc_str_coderange(str1);
00799 if (isstr2) {
00800 cr2 = rb_enc_str_coderange(str2);
00801 if (cr1 != cr2) {
00802
00803 if (cr1 == ENC_CODERANGE_7BIT) return enc2;
00804 if (cr2 == ENC_CODERANGE_7BIT) return enc1;
00805 }
00806 if (cr2 == ENC_CODERANGE_7BIT) {
00807 return enc1;
00808 }
00809 }
00810 if (cr1 == ENC_CODERANGE_7BIT)
00811 return enc2;
00812 }
00813 return 0;
00814 }
00815
00816 void
00817 rb_enc_copy(VALUE obj1, VALUE obj2)
00818 {
00819 rb_enc_associate_index(obj1, rb_enc_get_index(obj2));
00820 }
00821
00822
00823
00824
00825
00826
00827
00828
00829
00830 VALUE
00831 rb_obj_encoding(VALUE obj)
00832 {
00833 rb_encoding *enc = rb_enc_get(obj);
00834 if (!enc) {
00835 rb_raise(rb_eTypeError, "unknown encoding");
00836 }
00837 return rb_enc_from_encoding(enc);
00838 }
00839
00840 int
00841 rb_enc_fast_mbclen(const char *p, const char *e, rb_encoding *enc)
00842 {
00843 return ONIGENC_MBC_ENC_LEN(enc, (UChar*)p, (UChar*)e);
00844 }
00845
00846 int
00847 rb_enc_mbclen(const char *p, const char *e, rb_encoding *enc)
00848 {
00849 int n = ONIGENC_PRECISE_MBC_ENC_LEN(enc, (UChar*)p, (UChar*)e);
00850 if (MBCLEN_CHARFOUND_P(n) && MBCLEN_CHARFOUND_LEN(n) <= e-p)
00851 return MBCLEN_CHARFOUND_LEN(n);
00852 else {
00853 int min = rb_enc_mbminlen(enc);
00854 return min <= e-p ? min : (int)(e-p);
00855 }
00856 }
00857
00858 int
00859 rb_enc_precise_mbclen(const char *p, const char *e, rb_encoding *enc)
00860 {
00861 int n;
00862 if (e <= p)
00863 return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(1);
00864 n = ONIGENC_PRECISE_MBC_ENC_LEN(enc, (UChar*)p, (UChar*)e);
00865 if (e-p < n)
00866 return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(n-(int)(e-p));
00867 return n;
00868 }
00869
00870 int
00871 rb_enc_ascget(const char *p, const char *e, int *len, rb_encoding *enc)
00872 {
00873 unsigned int c, l;
00874 if (e <= p)
00875 return -1;
00876 if (rb_enc_asciicompat(enc)) {
00877 c = (unsigned char)*p;
00878 if (!ISASCII(c))
00879 return -1;
00880 if (len) *len = 1;
00881 return c;
00882 }
00883 l = rb_enc_precise_mbclen(p, e, enc);
00884 if (!MBCLEN_CHARFOUND_P(l))
00885 return -1;
00886 c = rb_enc_mbc_to_codepoint(p, e, enc);
00887 if (!rb_enc_isascii(c, enc))
00888 return -1;
00889 if (len) *len = l;
00890 return c;
00891 }
00892
00893 unsigned int
00894 rb_enc_codepoint_len(const char *p, const char *e, int *len_p, rb_encoding *enc)
00895 {
00896 int r;
00897 if (e <= p)
00898 rb_raise(rb_eArgError, "empty string");
00899 r = rb_enc_precise_mbclen(p, e, enc);
00900 if (MBCLEN_CHARFOUND_P(r)) {
00901 if (len_p) *len_p = MBCLEN_CHARFOUND_LEN(r);
00902 return rb_enc_mbc_to_codepoint(p, e, enc);
00903 }
00904 else
00905 rb_raise(rb_eArgError, "invalid byte sequence in %s", rb_enc_name(enc));
00906 }
00907
00908 #undef rb_enc_codepoint
00909 unsigned int
00910 rb_enc_codepoint(const char *p, const char *e, rb_encoding *enc)
00911 {
00912 return rb_enc_codepoint_len(p, e, 0, enc);
00913 }
00914
00915 int
00916 rb_enc_codelen(int c, rb_encoding *enc)
00917 {
00918 int n = ONIGENC_CODE_TO_MBCLEN(enc,c);
00919 if (n == 0) {
00920 rb_raise(rb_eArgError, "invalid codepoint 0x%x in %s", c, rb_enc_name(enc));
00921 }
00922 return n;
00923 }
00924
00925 int
00926 rb_enc_toupper(int c, rb_encoding *enc)
00927 {
00928 return (ONIGENC_IS_ASCII_CODE(c)?ONIGENC_ASCII_CODE_TO_UPPER_CASE(c):(c));
00929 }
00930
00931 int
00932 rb_enc_tolower(int c, rb_encoding *enc)
00933 {
00934 return (ONIGENC_IS_ASCII_CODE(c)?ONIGENC_ASCII_CODE_TO_LOWER_CASE(c):(c));
00935 }
00936
00937
00938
00939
00940
00941
00942
00943
00944
00945
00946 static VALUE
00947 enc_inspect(VALUE self)
00948 {
00949 VALUE str = rb_sprintf("#<%s:%s%s>", rb_obj_classname(self),
00950 rb_enc_name((rb_encoding*)DATA_PTR(self)),
00951 (enc_dummy_p(self) ? " (dummy)" : ""));
00952 ENCODING_CODERANGE_SET(str, rb_usascii_encindex(), ENC_CODERANGE_7BIT);
00953 return str;
00954 }
00955
00956
00957
00958
00959
00960
00961
00962
00963
00964 static VALUE
00965 enc_name(VALUE self)
00966 {
00967 return rb_usascii_str_new2(rb_enc_name((rb_encoding*)DATA_PTR(self)));
00968 }
00969
00970 static int
00971 enc_names_i(st_data_t name, st_data_t idx, st_data_t args)
00972 {
00973 VALUE *arg = (VALUE *)args;
00974
00975 if ((int)idx == (int)arg[0]) {
00976 VALUE str = rb_usascii_str_new2((char *)name);
00977 OBJ_FREEZE(str);
00978 rb_ary_push(arg[1], str);
00979 }
00980 return ST_CONTINUE;
00981 }
00982
00983
00984
00985
00986
00987
00988
00989
00990
00991 static VALUE
00992 enc_names(VALUE self)
00993 {
00994 VALUE args[2];
00995
00996 args[0] = (VALUE)rb_to_encoding_index(self);
00997 args[1] = rb_ary_new2(0);
00998 st_foreach(enc_table.names, enc_names_i, (st_data_t)args);
00999 return args[1];
01000 }
01001
01002
01003
01004
01005
01006
01007
01008
01009
01010
01011
01012
01013
01014
01015
01016
01017
01018
01019
01020 static VALUE
01021 enc_list(VALUE klass)
01022 {
01023 VALUE ary = rb_ary_new2(0);
01024 rb_ary_replace(ary, rb_encoding_list);
01025 return ary;
01026 }
01027
01028
01029
01030
01031
01032
01033
01034
01035
01036
01037
01038
01039
01040
01041
01042
01043
01044
01045
01046
01047
01048
01049
01050
01051
01052 static VALUE
01053 enc_find(VALUE klass, VALUE enc)
01054 {
01055 return rb_enc_from_encoding(rb_to_encoding(enc));
01056 }
01057
01058
01059
01060
01061
01062
01063
01064
01065
01066
01067
01068
01069
01070
01071
01072
01073
01074
01075
01076
01077
01078
01079
01080
01081
01082 static VALUE
01083 enc_compatible_p(VALUE klass, VALUE str1, VALUE str2)
01084 {
01085 rb_encoding *enc;
01086
01087 if (!enc_capable(str1)) return Qnil;
01088 if (!enc_capable(str2)) return Qnil;
01089 enc = rb_enc_compatible(str1, str2);
01090 if (!enc) return Qnil;
01091 return rb_enc_from_encoding(enc);
01092 }
01093
01094
01095 static VALUE
01096 enc_dump(int argc, VALUE *argv, VALUE self)
01097 {
01098 rb_scan_args(argc, argv, "01", 0);
01099 return enc_name(self);
01100 }
01101
01102
01103 static VALUE
01104 enc_load(VALUE klass, VALUE str)
01105 {
01106 return enc_find(klass, str);
01107 }
01108
01109 rb_encoding *
01110 rb_ascii8bit_encoding(void)
01111 {
01112 if (!enc_table.list) {
01113 rb_enc_init();
01114 }
01115 return enc_table.list[ENCINDEX_ASCII].enc;
01116 }
01117
01118 int
01119 rb_ascii8bit_encindex(void)
01120 {
01121 return ENCINDEX_ASCII;
01122 }
01123
01124 rb_encoding *
01125 rb_utf8_encoding(void)
01126 {
01127 if (!enc_table.list) {
01128 rb_enc_init();
01129 }
01130 return enc_table.list[ENCINDEX_UTF_8].enc;
01131 }
01132
01133 int
01134 rb_utf8_encindex(void)
01135 {
01136 return ENCINDEX_UTF_8;
01137 }
01138
01139 rb_encoding *
01140 rb_usascii_encoding(void)
01141 {
01142 if (!enc_table.list) {
01143 rb_enc_init();
01144 }
01145 return enc_table.list[ENCINDEX_US_ASCII].enc;
01146 }
01147
01148 int
01149 rb_usascii_encindex(void)
01150 {
01151 return ENCINDEX_US_ASCII;
01152 }
01153
01154 int
01155 rb_locale_encindex(void)
01156 {
01157 VALUE charmap = rb_locale_charmap(rb_cEncoding);
01158 int idx;
01159
01160 if (NIL_P(charmap))
01161 idx = rb_usascii_encindex();
01162 else if ((idx = rb_enc_find_index(StringValueCStr(charmap))) < 0)
01163 idx = rb_ascii8bit_encindex();
01164
01165 if (rb_enc_registered("locale") < 0) enc_alias_internal("locale", idx);
01166
01167 return idx;
01168 }
01169
01170 rb_encoding *
01171 rb_locale_encoding(void)
01172 {
01173 return rb_enc_from_index(rb_locale_encindex());
01174 }
01175
01176 static int
01177 enc_set_filesystem_encoding(void)
01178 {
01179 int idx;
01180 #if defined NO_LOCALE_CHARMAP
01181 idx = rb_enc_to_index(rb_default_external_encoding());
01182 #elif defined _WIN32 || defined __CYGWIN__
01183 char cp[sizeof(int) * 8 / 3 + 4];
01184 snprintf(cp, sizeof cp, "CP%d", AreFileApisANSI() ? GetACP() : GetOEMCP());
01185 idx = rb_enc_find_index(cp);
01186 if (idx < 0) idx = rb_ascii8bit_encindex();
01187 #else
01188 idx = rb_enc_to_index(rb_default_external_encoding());
01189 #endif
01190
01191 enc_alias_internal("filesystem", idx);
01192 return idx;
01193 }
01194
01195 int
01196 rb_filesystem_encindex(void)
01197 {
01198 int idx = rb_enc_registered("filesystem");
01199 if (idx < 0)
01200 idx = rb_ascii8bit_encindex();
01201 return idx;
01202 }
01203
01204 rb_encoding *
01205 rb_filesystem_encoding(void)
01206 {
01207 return rb_enc_from_index(rb_filesystem_encindex());
01208 }
01209
01210 struct default_encoding {
01211 int index;
01212 rb_encoding *enc;
01213 };
01214
01215 static struct default_encoding default_external = {0};
01216
01217 static int
01218 enc_set_default_encoding(struct default_encoding *def, VALUE encoding, const char *name)
01219 {
01220 int overridden = FALSE;
01221
01222 if (def->index != -2)
01223
01224 overridden = TRUE;
01225
01226 if (NIL_P(encoding)) {
01227 def->index = -1;
01228 def->enc = 0;
01229 st_insert(enc_table.names, (st_data_t)strdup(name),
01230 (st_data_t)UNSPECIFIED_ENCODING);
01231 }
01232 else {
01233 def->index = rb_enc_to_index(rb_to_encoding(encoding));
01234 def->enc = 0;
01235 enc_alias_internal(name, def->index);
01236 }
01237
01238 if (def == &default_external)
01239 enc_set_filesystem_encoding();
01240
01241 return overridden;
01242 }
01243
01244 rb_encoding *
01245 rb_default_external_encoding(void)
01246 {
01247 if (default_external.enc) return default_external.enc;
01248
01249 if (default_external.index >= 0) {
01250 default_external.enc = rb_enc_from_index(default_external.index);
01251 return default_external.enc;
01252 }
01253 else {
01254 return rb_locale_encoding();
01255 }
01256 }
01257
01258 VALUE
01259 rb_enc_default_external(void)
01260 {
01261 return rb_enc_from_encoding(rb_default_external_encoding());
01262 }
01263
01264
01265
01266
01267
01268
01269
01270
01271
01272
01273
01274
01275
01276
01277
01278
01279
01280
01281
01282
01283
01284
01285
01286
01287
01288
01289
01290 static VALUE
01291 get_default_external(VALUE klass)
01292 {
01293 return rb_enc_default_external();
01294 }
01295
01296 void
01297 rb_enc_set_default_external(VALUE encoding)
01298 {
01299 if (NIL_P(encoding)) {
01300 rb_raise(rb_eArgError, "default external can not be nil");
01301 }
01302 enc_set_default_encoding(&default_external, encoding,
01303 "external");
01304 }
01305
01306
01307
01308
01309
01310
01311
01312
01313
01314
01315
01316
01317
01318
01319 static VALUE
01320 set_default_external(VALUE klass, VALUE encoding)
01321 {
01322 rb_warning("setting Encoding.default_external");
01323 rb_enc_set_default_external(encoding);
01324 return encoding;
01325 }
01326
01327 static struct default_encoding default_internal = {-2};
01328
01329 rb_encoding *
01330 rb_default_internal_encoding(void)
01331 {
01332 if (!default_internal.enc && default_internal.index >= 0) {
01333 default_internal.enc = rb_enc_from_index(default_internal.index);
01334 }
01335 return default_internal.enc;
01336 }
01337
01338 VALUE
01339 rb_enc_default_internal(void)
01340 {
01341
01342 return rb_enc_from_encoding(rb_default_internal_encoding());
01343 }
01344
01345
01346
01347
01348
01349
01350
01351
01352
01353
01354
01355
01356
01357
01358
01359
01360
01361
01362
01363
01364
01365
01366
01367
01368
01369
01370
01371
01372
01373
01374
01375
01376 static VALUE
01377 get_default_internal(VALUE klass)
01378 {
01379 return rb_enc_default_internal();
01380 }
01381
01382 void
01383 rb_enc_set_default_internal(VALUE encoding)
01384 {
01385 enc_set_default_encoding(&default_internal, encoding,
01386 "internal");
01387 }
01388
01389
01390
01391
01392
01393
01394
01395
01396
01397
01398
01399
01400
01401
01402 static VALUE
01403 set_default_internal(VALUE klass, VALUE encoding)
01404 {
01405 rb_warning("setting Encoding.default_internal");
01406 rb_enc_set_default_internal(encoding);
01407 return encoding;
01408 }
01409
01410
01411
01412
01413
01414
01415
01416
01417
01418
01419
01420
01421
01422
01423
01424
01425
01426
01427
01428
01429
01430
01431
01432
01433
01434
01435 VALUE
01436 rb_locale_charmap(VALUE klass)
01437 {
01438 #if defined NO_LOCALE_CHARMAP
01439 return rb_usascii_str_new2("ASCII-8BIT");
01440 #elif defined _WIN32 || defined __CYGWIN__
01441 const char *nl_langinfo_codeset(void);
01442 const char *codeset = nl_langinfo_codeset();
01443 char cp[sizeof(int) * 3 + 4];
01444 if (!codeset) {
01445 UINT codepage = GetConsoleCP();
01446 if(!codepage) codepage = GetACP();
01447 snprintf(cp, sizeof(cp), "CP%d", codepage);
01448 codeset = cp;
01449 }
01450 return rb_usascii_str_new2(codeset);
01451 #elif defined HAVE_LANGINFO_H
01452 char *codeset;
01453 codeset = nl_langinfo(CODESET);
01454 return rb_usascii_str_new2(codeset);
01455 #else
01456 return Qnil;
01457 #endif
01458 }
01459
01460 static void
01461 set_encoding_const(const char *name, rb_encoding *enc)
01462 {
01463 VALUE encoding = rb_enc_from_encoding(enc);
01464 char *s = (char *)name;
01465 int haslower = 0, hasupper = 0, valid = 0;
01466
01467 if (ISDIGIT(*s)) return;
01468 if (ISUPPER(*s)) {
01469 hasupper = 1;
01470 while (*++s && (ISALNUM(*s) || *s == '_')) {
01471 if (ISLOWER(*s)) haslower = 1;
01472 }
01473 }
01474 if (!*s) {
01475 if (s - name > ENCODING_NAMELEN_MAX) return;
01476 valid = 1;
01477 rb_define_const(rb_cEncoding, name, encoding);
01478 }
01479 if (!valid || haslower) {
01480 size_t len = s - name;
01481 if (len > ENCODING_NAMELEN_MAX) return;
01482 if (!haslower || !hasupper) {
01483 do {
01484 if (ISLOWER(*s)) haslower = 1;
01485 if (ISUPPER(*s)) hasupper = 1;
01486 } while (*++s && (!haslower || !hasupper));
01487 len = s - name;
01488 }
01489 len += strlen(s);
01490 if (len++ > ENCODING_NAMELEN_MAX) return;
01491 MEMCPY(s = ALLOCA_N(char, len), name, char, len);
01492 name = s;
01493 if (!valid) {
01494 if (ISLOWER(*s)) *s = ONIGENC_ASCII_CODE_TO_UPPER_CASE((int)*s);
01495 for (; *s; ++s) {
01496 if (!ISALNUM(*s)) *s = '_';
01497 }
01498 if (hasupper) {
01499 rb_define_const(rb_cEncoding, name, encoding);
01500 }
01501 }
01502 if (haslower) {
01503 for (s = (char *)name; *s; ++s) {
01504 if (ISLOWER(*s)) *s = ONIGENC_ASCII_CODE_TO_UPPER_CASE((int)*s);
01505 }
01506 rb_define_const(rb_cEncoding, name, encoding);
01507 }
01508 }
01509 }
01510
01511 static int
01512 rb_enc_name_list_i(st_data_t name, st_data_t idx, st_data_t arg)
01513 {
01514 VALUE ary = (VALUE)arg;
01515 VALUE str = rb_usascii_str_new2((char *)name);
01516 OBJ_FREEZE(str);
01517 rb_ary_push(ary, str);
01518 return ST_CONTINUE;
01519 }
01520
01521
01522
01523
01524
01525
01526
01527
01528
01529
01530
01531
01532
01533
01534
01535 static VALUE
01536 rb_enc_name_list(VALUE klass)
01537 {
01538 VALUE ary = rb_ary_new2(enc_table.names->num_entries);
01539 st_foreach(enc_table.names, rb_enc_name_list_i, (st_data_t)ary);
01540 return ary;
01541 }
01542
01543 static int
01544 rb_enc_aliases_enc_i(st_data_t name, st_data_t orig, st_data_t arg)
01545 {
01546 VALUE *p = (VALUE *)arg;
01547 VALUE aliases = p[0], ary = p[1];
01548 int idx = (int)orig;
01549 VALUE key, str = rb_ary_entry(ary, idx);
01550
01551 if (NIL_P(str)) {
01552 rb_encoding *enc = rb_enc_from_index(idx);
01553
01554 if (!enc) return ST_CONTINUE;
01555 if (STRCASECMP((char*)name, rb_enc_name(enc)) == 0) {
01556 return ST_CONTINUE;
01557 }
01558 str = rb_usascii_str_new2(rb_enc_name(enc));
01559 OBJ_FREEZE(str);
01560 rb_ary_store(ary, idx, str);
01561 }
01562 key = rb_usascii_str_new2((char *)name);
01563 OBJ_FREEZE(key);
01564 rb_hash_aset(aliases, key, str);
01565 return ST_CONTINUE;
01566 }
01567
01568
01569
01570
01571
01572
01573
01574
01575
01576
01577
01578
01579
01580 static VALUE
01581 rb_enc_aliases(VALUE klass)
01582 {
01583 VALUE aliases[2];
01584 aliases[0] = rb_hash_new();
01585 aliases[1] = rb_ary_new();
01586 st_foreach(enc_table.names, rb_enc_aliases_enc_i, (st_data_t)aliases);
01587 return aliases[0];
01588 }
01589
01590 void
01591 Init_Encoding(void)
01592 {
01593 #undef rb_intern
01594 #define rb_intern(str) rb_intern_const(str)
01595 VALUE list;
01596 int i;
01597
01598 rb_cEncoding = rb_define_class("Encoding", rb_cObject);
01599 rb_undef_alloc_func(rb_cEncoding);
01600 rb_undef_method(CLASS_OF(rb_cEncoding), "new");
01601 rb_define_method(rb_cEncoding, "to_s", enc_name, 0);
01602 rb_define_method(rb_cEncoding, "inspect", enc_inspect, 0);
01603 rb_define_method(rb_cEncoding, "name", enc_name, 0);
01604 rb_define_method(rb_cEncoding, "names", enc_names, 0);
01605 rb_define_method(rb_cEncoding, "dummy?", enc_dummy_p, 0);
01606 rb_define_method(rb_cEncoding, "ascii_compatible?", enc_ascii_compatible_p, 0);
01607 rb_define_method(rb_cEncoding, "replicate", enc_replicate, 1);
01608 rb_define_singleton_method(rb_cEncoding, "list", enc_list, 0);
01609 rb_define_singleton_method(rb_cEncoding, "name_list", rb_enc_name_list, 0);
01610 rb_define_singleton_method(rb_cEncoding, "aliases", rb_enc_aliases, 0);
01611 rb_define_singleton_method(rb_cEncoding, "find", enc_find, 1);
01612 rb_define_singleton_method(rb_cEncoding, "compatible?", enc_compatible_p, 2);
01613
01614 rb_define_method(rb_cEncoding, "_dump", enc_dump, -1);
01615 rb_define_singleton_method(rb_cEncoding, "_load", enc_load, 1);
01616
01617 rb_define_singleton_method(rb_cEncoding, "default_external", get_default_external, 0);
01618 rb_define_singleton_method(rb_cEncoding, "default_external=", set_default_external, 1);
01619 rb_define_singleton_method(rb_cEncoding, "default_internal", get_default_internal, 0);
01620 rb_define_singleton_method(rb_cEncoding, "default_internal=", set_default_internal, 1);
01621 rb_define_singleton_method(rb_cEncoding, "locale_charmap", rb_locale_charmap, 0);
01622
01623 list = rb_ary_new2(enc_table.count);
01624 RBASIC(list)->klass = 0;
01625 rb_encoding_list = list;
01626 rb_gc_register_mark_object(list);
01627
01628 for (i = 0; i < enc_table.count; ++i) {
01629 rb_ary_push(list, enc_new(enc_table.list[i].enc));
01630 }
01631 }
01632
01633
01634
01635 #define ctype_test(c, ctype) \
01636 (rb_isascii(c) && ONIGENC_IS_ASCII_CODE_CTYPE((c), (ctype)))
01637
01638 int rb_isalnum(int c) { return ctype_test(c, ONIGENC_CTYPE_ALNUM); }
01639 int rb_isalpha(int c) { return ctype_test(c, ONIGENC_CTYPE_ALPHA); }
01640 int rb_isblank(int c) { return ctype_test(c, ONIGENC_CTYPE_BLANK); }
01641 int rb_iscntrl(int c) { return ctype_test(c, ONIGENC_CTYPE_CNTRL); }
01642 int rb_isdigit(int c) { return ctype_test(c, ONIGENC_CTYPE_DIGIT); }
01643 int rb_isgraph(int c) { return ctype_test(c, ONIGENC_CTYPE_GRAPH); }
01644 int rb_islower(int c) { return ctype_test(c, ONIGENC_CTYPE_LOWER); }
01645 int rb_isprint(int c) { return ctype_test(c, ONIGENC_CTYPE_PRINT); }
01646 int rb_ispunct(int c) { return ctype_test(c, ONIGENC_CTYPE_PUNCT); }
01647 int rb_isspace(int c) { return ctype_test(c, ONIGENC_CTYPE_SPACE); }
01648 int rb_isupper(int c) { return ctype_test(c, ONIGENC_CTYPE_UPPER); }
01649 int rb_isxdigit(int c) { return ctype_test(c, ONIGENC_CTYPE_XDIGIT); }
01650
01651 int
01652 rb_tolower(int c)
01653 {
01654 return rb_isascii(c) ? ONIGENC_ASCII_CODE_TO_LOWER_CASE(c) : c;
01655 }
01656
01657 int
01658 rb_toupper(int c)
01659 {
01660 return rb_isascii(c) ? ONIGENC_ASCII_CODE_TO_UPPER_CASE(c) : c;
01661 }
01662
01663