00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016 #include "ruby/ruby.h"
00017 #include <errno.h>
00018 #include <iconv.h>
00019 #include <assert.h>
00020 #include "ruby/st.h"
00021 #include "ruby/encoding.h"
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057
00058
00059
00060
00061
00062
00063
00064
00065
00066
00067
00068
00069
00070
00071
00072
00073
00074
00075
00076
00077
00078
00079 #define VALUE2ICONV(v) ((iconv_t)((VALUE)(v) ^ -1))
00080 #define ICONV2VALUE(c) ((VALUE)(c) ^ -1)
00081
00082 struct iconv_env_t
00083 {
00084 iconv_t cd;
00085 int argc;
00086 VALUE *argv;
00087 VALUE ret;
00088 int toidx;
00089 VALUE (*append)_((VALUE, VALUE));
00090 };
00091
00092 struct rb_iconv_opt_t
00093 {
00094 VALUE transliterate;
00095 VALUE discard_ilseq;
00096 };
00097
00098 static ID id_transliterate, id_discard_ilseq;
00099
00100 static VALUE rb_eIconvInvalidEncoding;
00101 static VALUE rb_eIconvFailure;
00102 static VALUE rb_eIconvIllegalSeq;
00103 static VALUE rb_eIconvInvalidChar;
00104 static VALUE rb_eIconvOutOfRange;
00105 static VALUE rb_eIconvBrokenLibrary;
00106
00107 static ID rb_success, rb_failed;
00108 static VALUE iconv_fail _((VALUE error, VALUE success, VALUE failed, struct iconv_env_t* env, const char *mesg));
00109 static VALUE iconv_fail_retry _((VALUE error, VALUE success, VALUE failed, struct iconv_env_t* env, const char *mesg));
00110 static VALUE iconv_failure_initialize _((VALUE error, VALUE mesg, VALUE success, VALUE failed));
00111 static VALUE iconv_failure_success _((VALUE self));
00112 static VALUE iconv_failure_failed _((VALUE self));
00113
00114 static iconv_t iconv_create _((VALUE to, VALUE from, struct rb_iconv_opt_t *opt, int *idx));
00115 static void iconv_dfree _((void *cd));
00116 static VALUE iconv_free _((VALUE cd));
00117 static VALUE iconv_try _((iconv_t cd, const char **inptr, size_t *inlen, char **outptr, size_t *outlen));
00118 static VALUE rb_str_derive _((VALUE str, const char* ptr, long len));
00119 static VALUE iconv_convert _((iconv_t cd, VALUE str, long start, long length, int toidx,
00120 struct iconv_env_t* env));
00121 static VALUE iconv_s_allocate _((VALUE klass));
00122 static VALUE iconv_initialize _((int argc, VALUE *argv, VALUE self));
00123 static VALUE iconv_s_open _((int argc, VALUE *argv, VALUE self));
00124 static VALUE iconv_s_convert _((struct iconv_env_t* env));
00125 static VALUE iconv_s_iconv _((int argc, VALUE *argv, VALUE self));
00126 static VALUE iconv_init_state _((VALUE cd));
00127 static VALUE iconv_finish _((VALUE self));
00128 static VALUE iconv_iconv _((int argc, VALUE *argv, VALUE self));
00129 static VALUE iconv_conv _((int argc, VALUE *argv, VALUE self));
00130
00131 static VALUE charset_map;
00132
00133
00134
00135
00136
00137
00138
00139 static VALUE
00140 charset_map_get(void)
00141 {
00142 return charset_map;
00143 }
00144
00145 static VALUE
00146 strip_glibc_option(VALUE *code)
00147 {
00148 VALUE val = StringValue(*code);
00149 const char *ptr = RSTRING_PTR(val), *pend = RSTRING_END(val);
00150 const char *slash = memchr(ptr, '/', pend - ptr);
00151
00152 if (slash && slash < pend - 1 && slash[1] == '/') {
00153 VALUE opt = rb_str_subseq(val, slash - ptr, pend - slash);
00154 val = rb_str_subseq(val, 0, slash - ptr);
00155 *code = val;
00156 return opt;
00157 }
00158 return 0;
00159 }
00160
00161 static char *
00162 map_charset(VALUE *code)
00163 {
00164 VALUE val = StringValue(*code);
00165
00166 if (RHASH_SIZE(charset_map)) {
00167 st_data_t data;
00168 VALUE key = rb_funcall2(val, rb_intern("downcase"), 0, 0);
00169 StringValuePtr(key);
00170 if (st_lookup(RHASH_TBL(charset_map), key, &data)) {
00171 *code = (VALUE)data;
00172 }
00173 }
00174 return StringValuePtr(*code);
00175 }
00176
00177 NORETURN(static void rb_iconv_sys_fail(const char *s));
00178 static void
00179 rb_iconv_sys_fail(const char *s)
00180 {
00181 if (errno == 0) {
00182 rb_exc_raise(iconv_fail(rb_eIconvBrokenLibrary, Qnil, Qnil, NULL, s));
00183 }
00184 rb_sys_fail(s);
00185 }
00186
00187 #define rb_sys_fail(s) rb_iconv_sys_fail(s)
00188
00189 static iconv_t
00190 iconv_create(VALUE to, VALUE from, struct rb_iconv_opt_t *opt, int *idx)
00191 {
00192 VALUE toopt = strip_glibc_option(&to);
00193 VALUE fromopt = strip_glibc_option(&from);
00194 VALUE toenc = 0, fromenc = 0;
00195 const char* tocode = map_charset(&to);
00196 const char* fromcode = map_charset(&from);
00197 iconv_t cd;
00198 int retry = 0;
00199
00200 *idx = rb_enc_find_index(tocode);
00201
00202 if (toopt) {
00203 toenc = rb_str_plus(to, toopt);
00204 tocode = RSTRING_PTR(toenc);
00205 }
00206 if (fromopt) {
00207 fromenc = rb_str_plus(from, fromopt);
00208 fromcode = RSTRING_PTR(fromenc);
00209 }
00210 while ((cd = iconv_open(tocode, fromcode)) == (iconv_t)-1) {
00211 int inval = 0;
00212 switch (errno) {
00213 case EMFILE:
00214 case ENFILE:
00215 case ENOMEM:
00216 if (!retry++) {
00217 rb_gc();
00218 continue;
00219 }
00220 break;
00221 case EINVAL:
00222 retry = 0;
00223 inval = 1;
00224 if (toenc) {
00225 tocode = RSTRING_PTR(to);
00226 rb_str_resize(toenc, 0);
00227 toenc = 0;
00228 continue;
00229 }
00230 if (fromenc) {
00231 fromcode = RSTRING_PTR(from);
00232 rb_str_resize(fromenc, 0);
00233 fromenc = 0;
00234 continue;
00235 }
00236 break;
00237 }
00238 {
00239 const char *s = inval ? "invalid encoding " : "iconv";
00240 volatile VALUE msg = rb_str_new(0, strlen(s) + RSTRING_LEN(to) +
00241 RSTRING_LEN(from) + 8);
00242
00243 sprintf(RSTRING_PTR(msg), "%s(\"%s\", \"%s\")",
00244 s, RSTRING_PTR(to), RSTRING_PTR(from));
00245 s = RSTRING_PTR(msg);
00246 rb_str_set_len(msg, strlen(s));
00247 if (!inval) rb_sys_fail(s);
00248 rb_exc_raise(iconv_fail(rb_eIconvInvalidEncoding, Qnil,
00249 rb_ary_new3(2, to, from), NULL, s));
00250 }
00251 }
00252
00253 if (toopt || fromopt) {
00254 if (toopt && fromopt && RTEST(rb_str_equal(toopt, fromopt))) {
00255 fromopt = 0;
00256 }
00257 if (toopt && fromopt) {
00258 rb_warning("encoding option isn't portable: %s, %s",
00259 RSTRING_PTR(toopt) + 2, RSTRING_PTR(fromopt) + 2);
00260 }
00261 else {
00262 rb_warning("encoding option isn't portable: %s",
00263 (toopt ? RSTRING_PTR(toopt) : RSTRING_PTR(fromopt)) + 2);
00264 }
00265 }
00266
00267 if (opt) {
00268 #ifdef ICONV_SET_TRANSLITERATE
00269 if (opt->transliterate != Qundef) {
00270 int flag = RTEST(opt->transliterate);
00271 rb_warning("encoding option isn't portable: transliterate");
00272 if (iconvctl(cd, ICONV_SET_TRANSLITERATE, (void *)&flag))
00273 rb_sys_fail("ICONV_SET_TRANSLITERATE");
00274 }
00275 #endif
00276 #ifdef ICONV_SET_DISCARD_ILSEQ
00277 if (opt->discard_ilseq != Qundef) {
00278 int flag = RTEST(opt->discard_ilseq);
00279 rb_warning("encoding option isn't portable: discard_ilseq");
00280 if (iconvctl(cd, ICONV_SET_DISCARD_ILSEQ, (void *)&flag))
00281 rb_sys_fail("ICONV_SET_DISCARD_ILSEQ");
00282 }
00283 #endif
00284 }
00285
00286 return cd;
00287 }
00288
00289 static void
00290 iconv_dfree(void *cd)
00291 {
00292 iconv_close(VALUE2ICONV(cd));
00293 }
00294
00295 #define ICONV_FREE iconv_dfree
00296
00297 static VALUE
00298 iconv_free(VALUE cd)
00299 {
00300 if (cd && iconv_close(VALUE2ICONV(cd)) == -1)
00301 rb_sys_fail("iconv_close");
00302 return Qnil;
00303 }
00304
00305 static VALUE
00306 check_iconv(VALUE obj)
00307 {
00308 Check_Type(obj, T_DATA);
00309 if (RDATA(obj)->dfree != ICONV_FREE) {
00310 rb_raise(rb_eArgError, "Iconv expected (%s)", rb_class2name(CLASS_OF(obj)));
00311 }
00312 return (VALUE)DATA_PTR(obj);
00313 }
00314
00315 static VALUE
00316 iconv_try(iconv_t cd, const char **inptr, size_t *inlen, char **outptr, size_t *outlen)
00317 {
00318 #ifdef ICONV_INPTR_CONST
00319 #define ICONV_INPTR_CAST
00320 #else
00321 #define ICONV_INPTR_CAST (char **)
00322 #endif
00323 size_t ret;
00324
00325 errno = 0;
00326 ret = iconv(cd, ICONV_INPTR_CAST inptr, inlen, outptr, outlen);
00327 if (ret == (size_t)-1) {
00328 if (!*inlen)
00329 return Qfalse;
00330 switch (errno) {
00331 case E2BIG:
00332
00333 break;
00334 case EILSEQ:
00335 return rb_eIconvIllegalSeq;
00336 case EINVAL:
00337 return rb_eIconvInvalidChar;
00338 case 0:
00339 return rb_eIconvBrokenLibrary;
00340 default:
00341 rb_sys_fail("iconv");
00342 }
00343 }
00344 else if (*inlen > 0) {
00345
00346 return rb_eIconvIllegalSeq;
00347 }
00348 else if (ret) {
00349 return Qnil;
00350 }
00351 return Qfalse;
00352 }
00353
00354 #define FAILED_MAXLEN 16
00355
00356 static VALUE
00357 iconv_failure_initialize(VALUE error, VALUE mesg, VALUE success, VALUE failed)
00358 {
00359 rb_call_super(1, &mesg);
00360 rb_ivar_set(error, rb_success, success);
00361 rb_ivar_set(error, rb_failed, failed);
00362 return error;
00363 }
00364
00365 static VALUE
00366 iconv_fail(VALUE error, VALUE success, VALUE failed, struct iconv_env_t* env, const char *mesg)
00367 {
00368 VALUE args[3];
00369
00370 if (mesg && *mesg) {
00371 args[0] = rb_str_new2(mesg);
00372 }
00373 else if (TYPE(failed) != T_STRING || RSTRING_LEN(failed) < FAILED_MAXLEN) {
00374 args[0] = rb_inspect(failed);
00375 }
00376 else {
00377 args[0] = rb_inspect(rb_str_substr(failed, 0, FAILED_MAXLEN));
00378 rb_str_cat2(args[0], "...");
00379 }
00380 args[1] = success;
00381 args[2] = failed;
00382 if (env) {
00383 args[1] = env->append(rb_obj_dup(env->ret), success);
00384 if (env->argc > 0) {
00385 *(env->argv) = failed;
00386 args[2] = rb_ary_new4(env->argc, env->argv);
00387 }
00388 }
00389 return rb_class_new_instance(3, args, error);
00390 }
00391
00392 static VALUE
00393 iconv_fail_retry(VALUE error, VALUE success, VALUE failed, struct iconv_env_t* env, const char *mesg)
00394 {
00395 error = iconv_fail(error, success, failed, env, mesg);
00396 if (!rb_block_given_p()) rb_exc_raise(error);
00397 rb_set_errinfo(error);
00398 return rb_yield(failed);
00399 }
00400
00401 static VALUE
00402 rb_str_derive(VALUE str, const char* ptr, long len)
00403 {
00404 VALUE ret;
00405
00406 if (NIL_P(str))
00407 return rb_str_new(ptr, len);
00408 if (RSTRING_PTR(str) + RSTRING_LEN(str) == ptr + len)
00409 ret = rb_str_subseq(str, ptr - RSTRING_PTR(str), len);
00410 else
00411 ret = rb_str_new(ptr, len);
00412 OBJ_INFECT(ret, str);
00413 return ret;
00414 }
00415
00416 static VALUE
00417 iconv_convert(iconv_t cd, VALUE str, long start, long length, int toidx, struct iconv_env_t* env)
00418 {
00419 VALUE ret = Qfalse;
00420 VALUE error = Qfalse;
00421 VALUE rescue;
00422 const char *inptr, *instart;
00423 size_t inlen;
00424
00425 char buffer[BUFSIZ];
00426 char *outptr;
00427 size_t outlen;
00428
00429 if (cd == (iconv_t)-1)
00430 rb_raise(rb_eArgError, "closed iconv");
00431
00432 if (NIL_P(str)) {
00433
00434 inptr = "";
00435 inlen = 0;
00436 outptr = buffer;
00437 outlen = sizeof(buffer);
00438 error = iconv_try(cd, &inptr, &inlen, &outptr, &outlen);
00439 if (RTEST(error)) {
00440 unsigned int i;
00441 rescue = iconv_fail_retry(error, Qnil, Qnil, env, 0);
00442 if (TYPE(rescue) == T_ARRAY) {
00443 str = RARRAY_LEN(rescue) > 0 ? RARRAY_PTR(rescue)[0] : Qnil;
00444 }
00445 if (FIXNUM_P(str) && (i = FIX2INT(str)) <= 0xff) {
00446 char c = i;
00447 str = rb_str_new(&c, 1);
00448 }
00449 else if (!NIL_P(str)) {
00450 StringValue(str);
00451 }
00452 }
00453
00454 inptr = NULL;
00455 length = 0;
00456 }
00457 else {
00458 long slen;
00459
00460 StringValue(str);
00461 slen = RSTRING_LEN(str);
00462 inptr = RSTRING_PTR(str);
00463
00464 inptr += start;
00465 if (length < 0 || length > start + slen)
00466 length = slen - start;
00467 }
00468 instart = inptr;
00469 inlen = length;
00470
00471 do {
00472 char errmsg[50];
00473 const char *tmpstart = inptr;
00474 outptr = buffer;
00475 outlen = sizeof(buffer);
00476
00477 errmsg[0] = 0;
00478 error = iconv_try(cd, &inptr, &inlen, &outptr, &outlen);
00479
00480 if (
00481 #if SIGNEDNESS_OF_SIZE_T < 0
00482 0 <= outlen &&
00483 #endif
00484 outlen <= sizeof(buffer)) {
00485 outlen = sizeof(buffer) - outlen;
00486 if (NIL_P(error) ||
00487 outlen > (size_t)(inptr - tmpstart) ||
00488 (outlen < (size_t)(inptr - tmpstart) && inlen > 0) ||
00489 memcmp(buffer, tmpstart, outlen))
00490 {
00491 if (NIL_P(str)) {
00492 ret = rb_str_new(buffer, outlen);
00493 if (toidx >= 0) rb_enc_associate_index(ret, toidx);
00494 }
00495 else {
00496 if (ret) {
00497 ret = rb_str_buf_cat(ret, instart, tmpstart - instart);
00498 }
00499 else {
00500 ret = rb_str_new(instart, tmpstart - instart);
00501 if (toidx >= 0) rb_enc_associate_index(ret, toidx);
00502 OBJ_INFECT(ret, str);
00503 }
00504 ret = rb_str_buf_cat(ret, buffer, outlen);
00505 instart = inptr;
00506 }
00507 }
00508 else if (!inlen) {
00509 inptr = tmpstart + outlen;
00510 }
00511 }
00512 else {
00513
00514 sprintf(errmsg, "bug?(output length = %ld)", (long)(sizeof(buffer) - outlen));
00515 error = rb_eIconvOutOfRange;
00516 }
00517
00518 if (RTEST(error)) {
00519 long len = 0;
00520
00521 if (!ret) {
00522 ret = rb_str_derive(str, instart, inptr - instart);
00523 if (toidx >= 0) rb_enc_associate_index(ret, toidx);
00524 }
00525 else if (inptr > instart) {
00526 rb_str_cat(ret, instart, inptr - instart);
00527 }
00528 str = rb_str_derive(str, inptr, inlen);
00529 rescue = iconv_fail_retry(error, ret, str, env, errmsg);
00530 if (TYPE(rescue) == T_ARRAY) {
00531 if ((len = RARRAY_LEN(rescue)) > 0)
00532 rb_str_concat(ret, RARRAY_PTR(rescue)[0]);
00533 if (len > 1 && !NIL_P(str = RARRAY_PTR(rescue)[1])) {
00534 StringValue(str);
00535 inlen = length = RSTRING_LEN(str);
00536 instart = inptr = RSTRING_PTR(str);
00537 continue;
00538 }
00539 }
00540 else if (!NIL_P(rescue)) {
00541 rb_str_concat(ret, rescue);
00542 }
00543 break;
00544 }
00545 } while (inlen > 0);
00546
00547 if (!ret) {
00548 ret = rb_str_derive(str, instart, inptr - instart);
00549 if (toidx >= 0) rb_enc_associate_index(ret, toidx);
00550 }
00551 else if (inptr > instart) {
00552 rb_str_cat(ret, instart, inptr - instart);
00553 }
00554 return ret;
00555 }
00556
00557 static VALUE
00558 iconv_s_allocate(VALUE klass)
00559 {
00560 return Data_Wrap_Struct(klass, 0, ICONV_FREE, 0);
00561 }
00562
00563 static VALUE
00564 get_iconv_opt_i(VALUE i, VALUE arg)
00565 {
00566 struct rb_iconv_opt_t *opt = (struct rb_iconv_opt_t *)arg;
00567 VALUE name, val;
00568
00569 (void)opt;
00570 i = rb_Array(i);
00571 name = rb_ary_entry(i, 0);
00572 val = rb_ary_entry(i, 1);
00573 do {
00574 if (SYMBOL_P(name)) {
00575 ID id = SYM2ID(name);
00576 if (id == id_transliterate) {
00577 #ifdef ICONV_SET_TRANSLITERATE
00578 opt->transliterate = val;
00579 #else
00580 rb_notimplement();
00581 #endif
00582 break;
00583 }
00584 if (id == id_discard_ilseq) {
00585 #ifdef ICONV_SET_DISCARD_ILSEQ
00586 opt->discard_ilseq = val;
00587 #else
00588 rb_notimplement();
00589 #endif
00590 break;
00591 }
00592 }
00593 else {
00594 const char *s = StringValueCStr(name);
00595 if (strcmp(s, "transliterate") == 0) {
00596 #ifdef ICONV_SET_TRANSLITERATE
00597 opt->transliterate = val;
00598 #else
00599 rb_notimplement();
00600 #endif
00601 break;
00602 }
00603 if (strcmp(s, "discard_ilseq") == 0) {
00604 #ifdef ICONV_SET_DISCARD_ILSEQ
00605 opt->discard_ilseq = val;
00606 #else
00607 rb_notimplement();
00608 #endif
00609 break;
00610 }
00611 }
00612 name = rb_inspect(name);
00613 rb_raise(rb_eArgError, "unknown option - %s", StringValueCStr(name));
00614 } while (0);
00615 return Qnil;
00616 }
00617
00618 static void
00619 get_iconv_opt(struct rb_iconv_opt_t *opt, VALUE options)
00620 {
00621 opt->transliterate = Qundef;
00622 opt->discard_ilseq = Qundef;
00623 if (!NIL_P(options)) {
00624 rb_block_call(options, rb_intern("each"), 0, 0, get_iconv_opt_i, (VALUE)opt);
00625 }
00626 }
00627
00628 #define iconv_ctl(self, func, val) (\
00629 iconvctl(VALUE2ICONV(check_iconv(self)), func, (void *)&(val)) ? \
00630 rb_sys_fail(#func) : (void)0)
00631
00632
00633
00634
00635
00636
00637
00638
00639
00640
00641
00642
00643
00644
00645
00646
00647
00648
00649
00650
00651 static VALUE
00652 iconv_initialize(int argc, VALUE *argv, VALUE self)
00653 {
00654 VALUE to, from, options;
00655 struct rb_iconv_opt_t opt;
00656 int idx;
00657
00658 rb_scan_args(argc, argv, "21", &to, &from, &options);
00659 get_iconv_opt(&opt, options);
00660 iconv_free(check_iconv(self));
00661 DATA_PTR(self) = NULL;
00662 DATA_PTR(self) = (void *)ICONV2VALUE(iconv_create(to, from, &opt, &idx));
00663 if (idx >= 0) ENCODING_SET(self, idx);
00664 return self;
00665 }
00666
00667
00668
00669
00670
00671
00672
00673
00674
00675 static VALUE
00676 iconv_s_open(int argc, VALUE *argv, VALUE self)
00677 {
00678 VALUE to, from, options, cd;
00679 struct rb_iconv_opt_t opt;
00680 int idx;
00681
00682 rb_scan_args(argc, argv, "21", &to, &from, &options);
00683 get_iconv_opt(&opt, options);
00684 cd = ICONV2VALUE(iconv_create(to, from, &opt, &idx));
00685
00686 self = Data_Wrap_Struct(self, NULL, ICONV_FREE, (void *)cd);
00687 if (idx >= 0) ENCODING_SET(self, idx);
00688
00689 if (rb_block_given_p()) {
00690 return rb_ensure(rb_yield, self, (VALUE(*)())iconv_finish, self);
00691 }
00692 else {
00693 return self;
00694 }
00695 }
00696
00697 static VALUE
00698 iconv_s_convert(struct iconv_env_t* env)
00699 {
00700 VALUE last = 0;
00701
00702 for (; env->argc > 0; --env->argc, ++env->argv) {
00703 VALUE s = iconv_convert(env->cd, last = *(env->argv),
00704 0, -1, env->toidx, env);
00705 env->append(env->ret, s);
00706 }
00707
00708 if (!NIL_P(last)) {
00709 VALUE s = iconv_convert(env->cd, Qnil, 0, 0, env->toidx, env);
00710 if (RSTRING_LEN(s))
00711 env->append(env->ret, s);
00712 }
00713
00714 return env->ret;
00715 }
00716
00717
00718
00719
00720
00721
00722
00723
00724
00725
00726
00727
00728
00729
00730
00731
00732
00733
00734
00735 static VALUE
00736 iconv_s_iconv(int argc, VALUE *argv, VALUE self)
00737 {
00738 struct iconv_env_t arg;
00739
00740 if (argc < 2)
00741 rb_raise(rb_eArgError, "wrong number of arguments (%d for %d)", argc, 2);
00742
00743 arg.argc = argc -= 2;
00744 arg.argv = argv + 2;
00745 arg.append = rb_ary_push;
00746 arg.ret = rb_ary_new2(argc);
00747 arg.cd = iconv_create(argv[0], argv[1], NULL, &arg.toidx);
00748 return rb_ensure(iconv_s_convert, (VALUE)&arg, iconv_free, ICONV2VALUE(arg.cd));
00749 }
00750
00751
00752
00753
00754
00755
00756
00757
00758
00759 static VALUE
00760 iconv_s_conv(VALUE self, VALUE to, VALUE from, VALUE str)
00761 {
00762 struct iconv_env_t arg;
00763
00764 arg.argc = 1;
00765 arg.argv = &str;
00766 arg.append = rb_str_append;
00767 arg.ret = rb_str_new(0, 0);
00768 arg.cd = iconv_create(to, from, NULL, &arg.toidx);
00769 return rb_ensure(iconv_s_convert, (VALUE)&arg, iconv_free, ICONV2VALUE(arg.cd));
00770 }
00771
00772
00773
00774
00775
00776
00777
00778
00779 #ifdef HAVE_ICONVLIST
00780 struct iconv_name_list
00781 {
00782 unsigned int namescount;
00783 const char *const *names;
00784 VALUE array;
00785 };
00786
00787 static VALUE
00788 list_iconv_i(VALUE ptr)
00789 {
00790 struct iconv_name_list *p = (struct iconv_name_list *)ptr;
00791 unsigned int i, namescount = p->namescount;
00792 const char *const *names = p->names;
00793 VALUE ary = rb_ary_new2(namescount);
00794
00795 for (i = 0; i < namescount; i++) {
00796 rb_ary_push(ary, rb_str_new2(names[i]));
00797 }
00798 if (p->array) {
00799 return rb_ary_push(p->array, ary);
00800 }
00801 return rb_yield(ary);
00802 }
00803
00804 static int
00805 list_iconv(unsigned int namescount, const char *const *names, void *data)
00806 {
00807 int *state = data;
00808 struct iconv_name_list list;
00809
00810 list.namescount = namescount;
00811 list.names = names;
00812 list.array = ((VALUE *)data)[1];
00813 rb_protect(list_iconv_i, (VALUE)&list, state);
00814 return *state;
00815 }
00816 #endif
00817
00818 #if defined(HAVE_ICONVLIST) || defined(HAVE___ICONV_FREE_LIST)
00819 static VALUE
00820 iconv_s_list(void)
00821 {
00822 #ifdef HAVE_ICONVLIST
00823 int state;
00824 VALUE args[2];
00825
00826 args[1] = rb_block_given_p() ? 0 : rb_ary_new();
00827 iconvlist(list_iconv, args);
00828 state = *(int *)args;
00829 if (state) rb_jump_tag(state);
00830 if (args[1]) return args[1];
00831 #elif defined(HAVE___ICONV_FREE_LIST)
00832 char **list;
00833 size_t sz, i;
00834 VALUE ary;
00835
00836 if (__iconv_get_list(&list, &sz)) return Qnil;
00837
00838 ary = rb_ary_new2(sz);
00839 for (i = 0; i < sz; i++) {
00840 rb_ary_push(ary, rb_str_new2(list[i]));
00841 }
00842 __iconv_free_list(list, sz);
00843
00844 if (!rb_block_given_p())
00845 return ary;
00846 for (i = 0; i < RARRAY_LEN(ary); i++) {
00847 rb_yield(RARRAY_PTR(ary)[i]);
00848 }
00849 #endif
00850 return Qnil;
00851 }
00852 #else
00853 #define iconv_s_list rb_f_notimplement
00854 #endif
00855
00856
00857
00858
00859
00860
00861
00862
00863
00864
00865
00866
00867 static VALUE
00868 iconv_init_state(VALUE self)
00869 {
00870 iconv_t cd = VALUE2ICONV((VALUE)DATA_PTR(self));
00871 DATA_PTR(self) = NULL;
00872 return iconv_convert(cd, Qnil, 0, 0, ENCODING_GET(self), NULL);
00873 }
00874
00875 static VALUE
00876 iconv_finish(VALUE self)
00877 {
00878 VALUE cd = check_iconv(self);
00879
00880 if (!cd) return Qnil;
00881 return rb_ensure(iconv_init_state, self, iconv_free, cd);
00882 }
00883
00884
00885
00886
00887
00888
00889
00890
00891
00892
00893
00894
00895
00896
00897
00898
00899
00900
00901
00902
00903
00904
00905
00906
00907
00908
00909
00910
00911 static VALUE
00912 iconv_iconv(int argc, VALUE *argv, VALUE self)
00913 {
00914 VALUE str, n1, n2;
00915 VALUE cd = check_iconv(self);
00916 long start = 0, length = 0, slen = 0;
00917
00918 rb_scan_args(argc, argv, "12", &str, &n1, &n2);
00919 if (!NIL_P(str)) {
00920 VALUE n = rb_str_length(StringValue(str));
00921 slen = NUM2LONG(n);
00922 }
00923 if (argc != 2 || !RTEST(rb_range_beg_len(n1, &start, &length, slen, 0))) {
00924 if (NIL_P(n1) || ((start = NUM2LONG(n1)) < 0 ? (start += slen) >= 0 : start < slen)) {
00925 length = NIL_P(n2) ? -1 : NUM2LONG(n2);
00926 }
00927 }
00928 if (start > 0 || length > 0) {
00929 rb_encoding *enc = rb_enc_get(str);
00930 const char *s = RSTRING_PTR(str), *e = s + RSTRING_LEN(str);
00931 const char *ps = s;
00932 if (start > 0) {
00933 start = (ps = rb_enc_nth(s, e, start, enc)) - s;
00934 }
00935 if (length > 0) {
00936 length = rb_enc_nth(ps, e, length, enc) - ps;
00937 }
00938 }
00939
00940 return iconv_convert(VALUE2ICONV(cd), str, start, length, ENCODING_GET(self), NULL);
00941 }
00942
00943
00944
00945
00946
00947
00948
00949
00950
00951 static VALUE
00952 iconv_conv(int argc, VALUE *argv, VALUE self)
00953 {
00954 iconv_t cd = VALUE2ICONV(check_iconv(self));
00955 VALUE str, s;
00956 int toidx = ENCODING_GET(self);
00957
00958 str = iconv_convert(cd, Qnil, 0, 0, toidx, NULL);
00959 if (argc > 0) {
00960 do {
00961 s = iconv_convert(cd, *argv++, 0, -1, toidx, NULL);
00962 if (RSTRING_LEN(s))
00963 rb_str_buf_append(str, s);
00964 } while (--argc);
00965 s = iconv_convert(cd, Qnil, 0, 0, toidx, NULL);
00966 if (RSTRING_LEN(s))
00967 rb_str_buf_append(str, s);
00968 }
00969
00970 return str;
00971 }
00972
00973 #ifdef ICONV_TRIVIALP
00974
00975
00976
00977
00978
00979
00980 static VALUE
00981 iconv_trivialp(VALUE self)
00982 {
00983 int trivial = 0;
00984 iconv_ctl(self, ICONV_TRIVIALP, trivial);
00985 if (trivial) return Qtrue;
00986 return Qfalse;
00987 }
00988 #else
00989 #define iconv_trivialp rb_f_notimplement
00990 #endif
00991
00992 #ifdef ICONV_GET_TRANSLITERATE
00993
00994
00995
00996
00997
00998
00999 static VALUE
01000 iconv_get_transliterate(VALUE self)
01001 {
01002 int trans = 0;
01003 iconv_ctl(self, ICONV_GET_TRANSLITERATE, trans);
01004 if (trans) return Qtrue;
01005 return Qfalse;
01006 }
01007 #else
01008 #define iconv_get_transliterate rb_f_notimplement
01009 #endif
01010
01011 #ifdef ICONV_SET_TRANSLITERATE
01012
01013
01014
01015
01016
01017
01018 static VALUE
01019 iconv_set_transliterate(VALUE self, VALUE transliterate)
01020 {
01021 int trans = RTEST(transliterate);
01022 iconv_ctl(self, ICONV_SET_TRANSLITERATE, trans);
01023 return self;
01024 }
01025 #else
01026 #define iconv_set_transliterate rb_f_notimplement
01027 #endif
01028
01029 #ifdef ICONV_GET_DISCARD_ILSEQ
01030
01031
01032
01033
01034
01035
01036 static VALUE
01037 iconv_get_discard_ilseq(VALUE self)
01038 {
01039 int dis = 0;
01040 iconv_ctl(self, ICONV_GET_DISCARD_ILSEQ, dis);
01041 if (dis) return Qtrue;
01042 return Qfalse;
01043 }
01044 #else
01045 #define iconv_get_discard_ilseq rb_f_notimplement
01046 #endif
01047
01048 #ifdef ICONV_SET_DISCARD_ILSEQ
01049
01050
01051
01052
01053
01054
01055 static VALUE
01056 iconv_set_discard_ilseq(VALUE self, VALUE discard_ilseq)
01057 {
01058 int dis = RTEST(discard_ilseq);
01059 iconv_ctl(self, ICONV_SET_DISCARD_ILSEQ, dis);
01060 return self;
01061 }
01062 #else
01063 #define iconv_set_discard_ilseq rb_f_notimplement
01064 #endif
01065
01066
01067
01068
01069
01070
01071
01072 static VALUE
01073 iconv_s_ctlmethods(VALUE klass)
01074 {
01075 VALUE ary = rb_ary_new();
01076 #ifdef ICONV_TRIVIALP
01077 rb_ary_push(ary, ID2SYM(rb_intern("trivial?")));
01078 #endif
01079 #ifdef ICONV_GET_TRANSLITERATE
01080 rb_ary_push(ary, ID2SYM(rb_intern("transliterate?")));
01081 #endif
01082 #ifdef ICONV_SET_TRANSLITERATE
01083 rb_ary_push(ary, ID2SYM(rb_intern("transliterate=")));
01084 #endif
01085 #ifdef ICONV_GET_DISCARD_ILSEQ
01086 rb_ary_push(ary, ID2SYM(rb_intern("discard_ilseq?")));
01087 #endif
01088 #ifdef ICONV_SET_DISCARD_ILSEQ
01089 rb_ary_push(ary, ID2SYM(rb_intern("discard_ilseq=")));
01090 #endif
01091 return ary;
01092 }
01093
01094
01095
01096
01097
01098
01099
01100
01101
01102
01103
01104
01105
01106
01107
01108
01109 static VALUE
01110 iconv_failure_success(VALUE self)
01111 {
01112 return rb_attr_get(self, rb_success);
01113 }
01114
01115
01116
01117
01118
01119
01120
01121
01122 static VALUE
01123 iconv_failure_failed(VALUE self)
01124 {
01125 return rb_attr_get(self, rb_failed);
01126 }
01127
01128
01129
01130
01131
01132
01133
01134 static VALUE
01135 iconv_failure_inspect(VALUE self)
01136 {
01137 const char *cname = rb_class2name(CLASS_OF(self));
01138 VALUE success = rb_attr_get(self, rb_success);
01139 VALUE failed = rb_attr_get(self, rb_failed);
01140 VALUE str = rb_str_buf_cat2(rb_str_new2("#<"), cname);
01141 str = rb_str_buf_cat(str, ": ", 2);
01142 str = rb_str_buf_append(str, rb_inspect(success));
01143 str = rb_str_buf_cat(str, ", ", 2);
01144 str = rb_str_buf_append(str, rb_inspect(failed));
01145 return rb_str_buf_cat(str, ">", 1);
01146 }
01147
01148
01149
01150
01151
01152
01153
01154
01155
01156
01157
01158
01159
01160
01161
01162
01163
01164
01165
01166
01167
01168
01169
01170
01171
01172
01173
01174
01175
01176
01177
01178
01179
01180
01181
01182 static void
01183 warn_deprecated(void)
01184 {
01185 static const char message[] =
01186 ": iconv will be deprecated in the future, use String#encode instead.\n";
01187 VALUE msg = Qnil, caller = rb_make_backtrace();
01188 long i;
01189
01190 for (i = 1; i < RARRAY_LEN(caller); ++i) {
01191 VALUE s = RARRAY_PTR(caller)[i];
01192 if (strncmp(RSTRING_PTR(s), "<internal:", 10) != 0) {
01193 msg = s;
01194 break;
01195 }
01196 }
01197 if (NIL_P(msg)) {
01198 msg = rb_str_new_cstr(message + 2);
01199 }
01200 else {
01201 rb_str_cat(msg, message, sizeof(message) - 1);
01202 }
01203 rb_io_puts(1, &msg, rb_stderr);
01204 }
01205
01206 void
01207 Init_iconv(void)
01208 {
01209 VALUE rb_cIconv = rb_define_class("Iconv", rb_cData);
01210
01211 if (!NIL_P(ruby_verbose)) {
01212 warn_deprecated();
01213 }
01214 rb_define_alloc_func(rb_cIconv, iconv_s_allocate);
01215 rb_define_singleton_method(rb_cIconv, "open", iconv_s_open, -1);
01216 rb_define_singleton_method(rb_cIconv, "iconv", iconv_s_iconv, -1);
01217 rb_define_singleton_method(rb_cIconv, "conv", iconv_s_conv, 3);
01218 rb_define_singleton_method(rb_cIconv, "list", iconv_s_list, 0);
01219 rb_define_singleton_method(rb_cIconv, "ctlmethods", iconv_s_ctlmethods, 0);
01220 rb_define_method(rb_cIconv, "initialize", iconv_initialize, -1);
01221 rb_define_method(rb_cIconv, "close", iconv_finish, 0);
01222 rb_define_method(rb_cIconv, "iconv", iconv_iconv, -1);
01223 rb_define_method(rb_cIconv, "conv", iconv_conv, -1);
01224 rb_define_method(rb_cIconv, "trivial?", iconv_trivialp, 0);
01225 rb_define_method(rb_cIconv, "transliterate?", iconv_get_transliterate, 0);
01226 rb_define_method(rb_cIconv, "transliterate=", iconv_set_transliterate, 1);
01227 rb_define_method(rb_cIconv, "discard_ilseq?", iconv_get_discard_ilseq, 0);
01228 rb_define_method(rb_cIconv, "discard_ilseq=", iconv_set_discard_ilseq, 1);
01229
01230 rb_eIconvFailure = rb_define_module_under(rb_cIconv, "Failure");
01231 rb_define_method(rb_eIconvFailure, "initialize", iconv_failure_initialize, 3);
01232 rb_define_method(rb_eIconvFailure, "success", iconv_failure_success, 0);
01233 rb_define_method(rb_eIconvFailure, "failed", iconv_failure_failed, 0);
01234 rb_define_method(rb_eIconvFailure, "inspect", iconv_failure_inspect, 0);
01235
01236 rb_eIconvInvalidEncoding = rb_define_class_under(rb_cIconv, "InvalidEncoding", rb_eArgError);
01237 rb_eIconvIllegalSeq = rb_define_class_under(rb_cIconv, "IllegalSequence", rb_eArgError);
01238 rb_eIconvInvalidChar = rb_define_class_under(rb_cIconv, "InvalidCharacter", rb_eArgError);
01239 rb_eIconvOutOfRange = rb_define_class_under(rb_cIconv, "OutOfRange", rb_eRuntimeError);
01240 rb_eIconvBrokenLibrary = rb_define_class_under(rb_cIconv, "BrokenLibrary", rb_eRuntimeError);
01241 rb_include_module(rb_eIconvInvalidEncoding, rb_eIconvFailure);
01242 rb_include_module(rb_eIconvIllegalSeq, rb_eIconvFailure);
01243 rb_include_module(rb_eIconvInvalidChar, rb_eIconvFailure);
01244 rb_include_module(rb_eIconvOutOfRange, rb_eIconvFailure);
01245 rb_include_module(rb_eIconvBrokenLibrary, rb_eIconvFailure);
01246
01247 rb_success = rb_intern("success");
01248 rb_failed = rb_intern("failed");
01249 id_transliterate = rb_intern("transliterate");
01250 id_discard_ilseq = rb_intern("discard_ilseq");
01251
01252 rb_gc_register_address(&charset_map);
01253 charset_map = rb_hash_new();
01254 rb_define_singleton_method(rb_cIconv, "charset_map", charset_map_get, 0);
01255 }
01256
01257