00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012 #include "ruby/ruby.h"
00013 #include "ruby/io.h"
00014 #include "ruby/st.h"
00015 #include "ruby/util.h"
00016 #include "ruby/encoding.h"
00017 #include "internal.h"
00018
00019 #include <math.h>
00020 #ifdef HAVE_FLOAT_H
00021 #include <float.h>
00022 #endif
00023 #ifdef HAVE_IEEEFP_H
00024 #include <ieeefp.h>
00025 #endif
00026
00027 #define BITSPERSHORT (2*CHAR_BIT)
00028 #define SHORTMASK ((1<<BITSPERSHORT)-1)
00029 #define SHORTDN(x) RSHIFT((x),BITSPERSHORT)
00030
00031 #if SIZEOF_SHORT == SIZEOF_BDIGITS
00032 #define SHORTLEN(x) (x)
00033 #else
00034 static long
00035 shortlen(long len, BDIGIT *ds)
00036 {
00037 BDIGIT num;
00038 int offset = 0;
00039
00040 num = ds[len-1];
00041 while (num) {
00042 num = SHORTDN(num);
00043 offset++;
00044 }
00045 return (len - 1)*sizeof(BDIGIT)/2 + offset;
00046 }
00047 #define SHORTLEN(x) shortlen((x),d)
00048 #endif
00049
00050 #define MARSHAL_MAJOR 4
00051 #define MARSHAL_MINOR 8
00052
00053 #define TYPE_NIL '0'
00054 #define TYPE_TRUE 'T'
00055 #define TYPE_FALSE 'F'
00056 #define TYPE_FIXNUM 'i'
00057
00058 #define TYPE_EXTENDED 'e'
00059 #define TYPE_UCLASS 'C'
00060 #define TYPE_OBJECT 'o'
00061 #define TYPE_DATA 'd'
00062 #define TYPE_USERDEF 'u'
00063 #define TYPE_USRMARSHAL 'U'
00064 #define TYPE_FLOAT 'f'
00065 #define TYPE_BIGNUM 'l'
00066 #define TYPE_STRING '"'
00067 #define TYPE_REGEXP '/'
00068 #define TYPE_ARRAY '['
00069 #define TYPE_HASH '{'
00070 #define TYPE_HASH_DEF '}'
00071 #define TYPE_STRUCT 'S'
00072 #define TYPE_MODULE_OLD 'M'
00073 #define TYPE_CLASS 'c'
00074 #define TYPE_MODULE 'm'
00075
00076 #define TYPE_SYMBOL ':'
00077 #define TYPE_SYMLINK ';'
00078
00079 #define TYPE_IVAR 'I'
00080 #define TYPE_LINK '@'
00081
00082 static ID s_dump, s_load, s_mdump, s_mload;
00083 static ID s_dump_data, s_load_data, s_alloc, s_call;
00084 static ID s_getbyte, s_read, s_write, s_binmode;
00085
00086 typedef struct {
00087 VALUE newclass;
00088 VALUE oldclass;
00089 VALUE (*dumper)(VALUE);
00090 VALUE (*loader)(VALUE, VALUE);
00091 } marshal_compat_t;
00092
00093 static st_table *compat_allocator_tbl;
00094 static VALUE compat_allocator_tbl_wrapper;
00095
00096 static int
00097 mark_marshal_compat_i(st_data_t key, st_data_t value)
00098 {
00099 marshal_compat_t *p = (marshal_compat_t *)value;
00100 rb_gc_mark(p->newclass);
00101 rb_gc_mark(p->oldclass);
00102 return ST_CONTINUE;
00103 }
00104
00105 static void
00106 mark_marshal_compat_t(void *tbl)
00107 {
00108 if (!tbl) return;
00109 st_foreach(tbl, mark_marshal_compat_i, 0);
00110 }
00111
00112 void
00113 rb_marshal_define_compat(VALUE newclass, VALUE oldclass, VALUE (*dumper)(VALUE), VALUE (*loader)(VALUE, VALUE))
00114 {
00115 marshal_compat_t *compat;
00116 rb_alloc_func_t allocator = rb_get_alloc_func(newclass);
00117
00118 if (!allocator) {
00119 rb_raise(rb_eTypeError, "no allocator");
00120 }
00121
00122 compat = ALLOC(marshal_compat_t);
00123 compat->newclass = Qnil;
00124 compat->oldclass = Qnil;
00125 compat->newclass = newclass;
00126 compat->oldclass = oldclass;
00127 compat->dumper = dumper;
00128 compat->loader = loader;
00129
00130 st_insert(compat_allocator_tbl, (st_data_t)allocator, (st_data_t)compat);
00131 }
00132
00133 #define MARSHAL_INFECTION (FL_TAINT|FL_UNTRUSTED)
00134 typedef char ruby_check_marshal_viral_flags[MARSHAL_INFECTION == (int)MARSHAL_INFECTION ? 1 : -1];
00135
00136 struct dump_arg {
00137 VALUE str, dest;
00138 st_table *symbols;
00139 st_table *data;
00140 st_table *compat_tbl;
00141 st_table *encodings;
00142 int infection;
00143 };
00144
00145 struct dump_call_arg {
00146 VALUE obj;
00147 struct dump_arg *arg;
00148 int limit;
00149 };
00150
00151 static void
00152 check_dump_arg(struct dump_arg *arg, ID sym)
00153 {
00154 if (!arg->symbols) {
00155 rb_raise(rb_eRuntimeError, "Marshal.dump reentered at %s",
00156 rb_id2name(sym));
00157 }
00158 }
00159
00160 static void clear_dump_arg(struct dump_arg *arg);
00161
00162 static void
00163 mark_dump_arg(void *ptr)
00164 {
00165 struct dump_arg *p = ptr;
00166 if (!p->symbols)
00167 return;
00168 rb_mark_set(p->data);
00169 rb_mark_hash(p->compat_tbl);
00170 rb_gc_mark(p->str);
00171 }
00172
00173 static void
00174 free_dump_arg(void *ptr)
00175 {
00176 clear_dump_arg(ptr);
00177 xfree(ptr);
00178 }
00179
00180 static size_t
00181 memsize_dump_arg(const void *ptr)
00182 {
00183 return ptr ? sizeof(struct dump_arg) : 0;
00184 }
00185
00186 static const rb_data_type_t dump_arg_data = {
00187 "dump_arg",
00188 {mark_dump_arg, free_dump_arg, memsize_dump_arg,},
00189 };
00190
00191 static const char *
00192 must_not_be_anonymous(const char *type, VALUE path)
00193 {
00194 char *n = RSTRING_PTR(path);
00195
00196 if (!rb_enc_asciicompat(rb_enc_get(path))) {
00197
00198 rb_raise(rb_eTypeError, "can't dump non-ascii %s name", type);
00199 }
00200 if (n[0] == '#') {
00201 rb_raise(rb_eTypeError, "can't dump anonymous %s %.*s", type,
00202 (int)RSTRING_LEN(path), n);
00203 }
00204 return n;
00205 }
00206
00207 static VALUE
00208 class2path(VALUE klass)
00209 {
00210 VALUE path = rb_class_path(klass);
00211 const char *n;
00212
00213 n = must_not_be_anonymous((TYPE(klass) == T_CLASS ? "class" : "module"), path);
00214 if (rb_path_to_class(path) != rb_class_real(klass)) {
00215 rb_raise(rb_eTypeError, "%s can't be referred to", n);
00216 }
00217 return path;
00218 }
00219
00220 static void w_long(long, struct dump_arg*);
00221 static void w_encoding(VALUE obj, long num, struct dump_call_arg *arg);
00222
00223 static void
00224 w_nbyte(const char *s, long n, struct dump_arg *arg)
00225 {
00226 VALUE buf = arg->str;
00227 rb_str_buf_cat(buf, s, n);
00228 RBASIC(buf)->flags |= arg->infection;
00229 if (arg->dest && RSTRING_LEN(buf) >= BUFSIZ) {
00230 rb_io_write(arg->dest, buf);
00231 rb_str_resize(buf, 0);
00232 }
00233 }
00234
00235 static void
00236 w_byte(char c, struct dump_arg *arg)
00237 {
00238 w_nbyte(&c, 1, arg);
00239 }
00240
00241 static void
00242 w_bytes(const char *s, long n, struct dump_arg *arg)
00243 {
00244 w_long(n, arg);
00245 w_nbyte(s, n, arg);
00246 }
00247
00248 #define w_cstr(s, arg) w_bytes((s), strlen(s), (arg))
00249
00250 static void
00251 w_short(int x, struct dump_arg *arg)
00252 {
00253 w_byte((char)((x >> 0) & 0xff), arg);
00254 w_byte((char)((x >> 8) & 0xff), arg);
00255 }
00256
00257 static void
00258 w_long(long x, struct dump_arg *arg)
00259 {
00260 char buf[sizeof(long)+1];
00261 int i, len = 0;
00262
00263 #if SIZEOF_LONG > 4
00264 if (!(RSHIFT(x, 31) == 0 || RSHIFT(x, 31) == -1)) {
00265
00266 rb_raise(rb_eTypeError, "long too big to dump");
00267 }
00268 #endif
00269
00270 if (x == 0) {
00271 w_byte(0, arg);
00272 return;
00273 }
00274 if (0 < x && x < 123) {
00275 w_byte((char)(x + 5), arg);
00276 return;
00277 }
00278 if (-124 < x && x < 0) {
00279 w_byte((char)((x - 5)&0xff), arg);
00280 return;
00281 }
00282 for (i=1;i<(int)sizeof(long)+1;i++) {
00283 buf[i] = (char)(x & 0xff);
00284 x = RSHIFT(x,8);
00285 if (x == 0) {
00286 buf[0] = i;
00287 break;
00288 }
00289 if (x == -1) {
00290 buf[0] = -i;
00291 break;
00292 }
00293 }
00294 len = i;
00295 for (i=0;i<=len;i++) {
00296 w_byte(buf[i], arg);
00297 }
00298 }
00299
00300 #ifdef DBL_MANT_DIG
00301 #define DECIMAL_MANT (53-16)
00302
00303 #if DBL_MANT_DIG > 32
00304 #define MANT_BITS 32
00305 #elif DBL_MANT_DIG > 24
00306 #define MANT_BITS 24
00307 #elif DBL_MANT_DIG > 16
00308 #define MANT_BITS 16
00309 #else
00310 #define MANT_BITS 8
00311 #endif
00312
00313 static double
00314 load_mantissa(double d, const char *buf, long len)
00315 {
00316 if (!len) return d;
00317 if (--len > 0 && !*buf++) {
00318 int e, s = d < 0, dig = 0;
00319 unsigned long m;
00320
00321 modf(ldexp(frexp(fabs(d), &e), DECIMAL_MANT), &d);
00322 do {
00323 m = 0;
00324 switch (len) {
00325 default: m = *buf++ & 0xff;
00326 #if MANT_BITS > 24
00327 case 3: m = (m << 8) | (*buf++ & 0xff);
00328 #endif
00329 #if MANT_BITS > 16
00330 case 2: m = (m << 8) | (*buf++ & 0xff);
00331 #endif
00332 #if MANT_BITS > 8
00333 case 1: m = (m << 8) | (*buf++ & 0xff);
00334 #endif
00335 }
00336 dig -= len < MANT_BITS / 8 ? 8 * (unsigned)len : MANT_BITS;
00337 d += ldexp((double)m, dig);
00338 } while ((len -= MANT_BITS / 8) > 0);
00339 d = ldexp(d, e - DECIMAL_MANT);
00340 if (s) d = -d;
00341 }
00342 return d;
00343 }
00344 #else
00345 #define load_mantissa(d, buf, len) (d)
00346 #endif
00347
00348 #ifdef DBL_DIG
00349 #define FLOAT_DIG (DBL_DIG+2)
00350 #else
00351 #define FLOAT_DIG 17
00352 #endif
00353
00354 static void
00355 w_float(double d, struct dump_arg *arg)
00356 {
00357 char *ruby_dtoa(double d_, int mode, int ndigits, int *decpt, int *sign, char **rve);
00358 char buf[FLOAT_DIG + (DECIMAL_MANT + 7) / 8 + 10];
00359
00360 if (isinf(d)) {
00361 if (d < 0) w_cstr("-inf", arg);
00362 else w_cstr("inf", arg);
00363 }
00364 else if (isnan(d)) {
00365 w_cstr("nan", arg);
00366 }
00367 else if (d == 0.0) {
00368 if (1.0/d < 0) w_cstr("-0", arg);
00369 else w_cstr("0", arg);
00370 }
00371 else {
00372 int decpt, sign, digs, len = 0;
00373 char *e, *p = ruby_dtoa(d, 0, 0, &decpt, &sign, &e);
00374 if (sign) buf[len++] = '-';
00375 digs = (int)(e - p);
00376 if (decpt < -3 || decpt > digs) {
00377 buf[len++] = p[0];
00378 if (--digs > 0) buf[len++] = '.';
00379 memcpy(buf + len, p + 1, digs);
00380 len += digs;
00381 len += snprintf(buf + len, sizeof(buf) - len, "e%d", decpt - 1);
00382 }
00383 else if (decpt > 0) {
00384 memcpy(buf + len, p, decpt);
00385 len += decpt;
00386 if ((digs -= decpt) > 0) {
00387 buf[len++] = '.';
00388 memcpy(buf + len, p + decpt, digs);
00389 len += digs;
00390 }
00391 }
00392 else {
00393 buf[len++] = '0';
00394 buf[len++] = '.';
00395 if (decpt) {
00396 memset(buf + len, '0', -decpt);
00397 len -= decpt;
00398 }
00399 memcpy(buf + len, p, digs);
00400 len += digs;
00401 }
00402 xfree(p);
00403 w_bytes(buf, len, arg);
00404 }
00405 }
00406
00407 static void
00408 w_symbol(ID id, struct dump_arg *arg)
00409 {
00410 VALUE sym;
00411 st_data_t num;
00412 int encidx = -1;
00413
00414 if (st_lookup(arg->symbols, id, &num)) {
00415 w_byte(TYPE_SYMLINK, arg);
00416 w_long((long)num, arg);
00417 }
00418 else {
00419 sym = rb_id2str(id);
00420 if (!sym) {
00421 rb_raise(rb_eTypeError, "can't dump anonymous ID %"PRIdVALUE, id);
00422 }
00423 encidx = rb_enc_get_index(sym);
00424 if (encidx == rb_usascii_encindex() ||
00425 rb_enc_str_coderange(sym) == ENC_CODERANGE_7BIT) {
00426 encidx = -1;
00427 }
00428 else {
00429 w_byte(TYPE_IVAR, arg);
00430 }
00431 w_byte(TYPE_SYMBOL, arg);
00432 w_bytes(RSTRING_PTR(sym), RSTRING_LEN(sym), arg);
00433 st_add_direct(arg->symbols, id, arg->symbols->num_entries);
00434 if (encidx != -1) {
00435 struct dump_call_arg c_arg;
00436 c_arg.limit = 1;
00437 c_arg.arg = arg;
00438 w_encoding(sym, 0, &c_arg);
00439 }
00440 }
00441 }
00442
00443 static void
00444 w_unique(VALUE s, struct dump_arg *arg)
00445 {
00446 must_not_be_anonymous("class", s);
00447 w_symbol(rb_intern_str(s), arg);
00448 }
00449
00450 static void w_object(VALUE,struct dump_arg*,int);
00451
00452 static int
00453 hash_each(VALUE key, VALUE value, struct dump_call_arg *arg)
00454 {
00455 w_object(key, arg->arg, arg->limit);
00456 w_object(value, arg->arg, arg->limit);
00457 return ST_CONTINUE;
00458 }
00459
00460 static void
00461 w_extended(VALUE klass, struct dump_arg *arg, int check)
00462 {
00463 if (check && FL_TEST(klass, FL_SINGLETON)) {
00464 if (RCLASS_M_TBL(klass)->num_entries ||
00465 (RCLASS_IV_TBL(klass) && RCLASS_IV_TBL(klass)->num_entries > 1)) {
00466 rb_raise(rb_eTypeError, "singleton can't be dumped");
00467 }
00468 klass = RCLASS_SUPER(klass);
00469 }
00470 while (BUILTIN_TYPE(klass) == T_ICLASS) {
00471 VALUE path = rb_class_name(RBASIC(klass)->klass);
00472 w_byte(TYPE_EXTENDED, arg);
00473 w_unique(path, arg);
00474 klass = RCLASS_SUPER(klass);
00475 }
00476 }
00477
00478 static void
00479 w_class(char type, VALUE obj, struct dump_arg *arg, int check)
00480 {
00481 VALUE path;
00482 st_data_t real_obj;
00483 VALUE klass;
00484
00485 if (st_lookup(arg->compat_tbl, (st_data_t)obj, &real_obj)) {
00486 obj = (VALUE)real_obj;
00487 }
00488 klass = CLASS_OF(obj);
00489 w_extended(klass, arg, check);
00490 w_byte(type, arg);
00491 path = class2path(rb_class_real(klass));
00492 w_unique(path, arg);
00493 }
00494
00495 static void
00496 w_uclass(VALUE obj, VALUE super, struct dump_arg *arg)
00497 {
00498 VALUE klass = CLASS_OF(obj);
00499
00500 w_extended(klass, arg, TRUE);
00501 klass = rb_class_real(klass);
00502 if (klass != super) {
00503 w_byte(TYPE_UCLASS, arg);
00504 w_unique(class2path(klass), arg);
00505 }
00506 }
00507
00508 static int
00509 w_obj_each(ID id, VALUE value, struct dump_call_arg *arg)
00510 {
00511 if (id == rb_id_encoding()) return ST_CONTINUE;
00512 if (id == rb_intern("E")) return ST_CONTINUE;
00513 w_symbol(id, arg->arg);
00514 w_object(value, arg->arg, arg->limit);
00515 return ST_CONTINUE;
00516 }
00517
00518 static void
00519 w_encoding(VALUE obj, long num, struct dump_call_arg *arg)
00520 {
00521 int encidx = rb_enc_get_index(obj);
00522 rb_encoding *enc = 0;
00523 st_data_t name;
00524
00525 if (encidx <= 0 || !(enc = rb_enc_from_index(encidx))) {
00526 w_long(num, arg->arg);
00527 return;
00528 }
00529 w_long(num + 1, arg->arg);
00530
00531
00532 if (encidx == rb_usascii_encindex()) {
00533 w_symbol(rb_intern("E"), arg->arg);
00534 w_object(Qfalse, arg->arg, arg->limit + 1);
00535 return;
00536 }
00537 else if (encidx == rb_utf8_encindex()) {
00538 w_symbol(rb_intern("E"), arg->arg);
00539 w_object(Qtrue, arg->arg, arg->limit + 1);
00540 return;
00541 }
00542
00543 w_symbol(rb_id_encoding(), arg->arg);
00544 do {
00545 if (!arg->arg->encodings)
00546 arg->arg->encodings = st_init_strcasetable();
00547 else if (st_lookup(arg->arg->encodings, (st_data_t)rb_enc_name(enc), &name))
00548 break;
00549 name = (st_data_t)rb_str_new2(rb_enc_name(enc));
00550 st_insert(arg->arg->encodings, (st_data_t)rb_enc_name(enc), name);
00551 } while (0);
00552 w_object(name, arg->arg, arg->limit + 1);
00553 }
00554
00555 static void
00556 w_ivar(VALUE obj, st_table *tbl, struct dump_call_arg *arg)
00557 {
00558 long num = tbl ? tbl->num_entries : 0;
00559
00560 w_encoding(obj, num, arg);
00561 if (tbl) {
00562 st_foreach_safe(tbl, w_obj_each, (st_data_t)arg);
00563 }
00564 }
00565
00566 static void
00567 w_objivar(VALUE obj, struct dump_call_arg *arg)
00568 {
00569 VALUE *ptr;
00570 long i, len, num;
00571
00572 len = ROBJECT_NUMIV(obj);
00573 ptr = ROBJECT_IVPTR(obj);
00574 num = 0;
00575 for (i = 0; i < len; i++)
00576 if (ptr[i] != Qundef)
00577 num += 1;
00578
00579 w_encoding(obj, num, arg);
00580 if (num != 0) {
00581 rb_ivar_foreach(obj, w_obj_each, (st_data_t)arg);
00582 }
00583 }
00584
00585 static void
00586 w_object(VALUE obj, struct dump_arg *arg, int limit)
00587 {
00588 struct dump_call_arg c_arg;
00589 st_table *ivtbl = 0;
00590 st_data_t num;
00591 int hasiv = 0;
00592 #define has_ivars(obj, ivtbl) (((ivtbl) = rb_generic_ivar_table(obj)) != 0 || \
00593 (!SPECIAL_CONST_P(obj) && !ENCODING_IS_ASCII8BIT(obj)))
00594
00595 if (limit == 0) {
00596 rb_raise(rb_eArgError, "exceed depth limit");
00597 }
00598
00599 limit--;
00600 c_arg.limit = limit;
00601 c_arg.arg = arg;
00602
00603 if (st_lookup(arg->data, obj, &num)) {
00604 w_byte(TYPE_LINK, arg);
00605 w_long((long)num, arg);
00606 return;
00607 }
00608
00609 if (obj == Qnil) {
00610 w_byte(TYPE_NIL, arg);
00611 }
00612 else if (obj == Qtrue) {
00613 w_byte(TYPE_TRUE, arg);
00614 }
00615 else if (obj == Qfalse) {
00616 w_byte(TYPE_FALSE, arg);
00617 }
00618 else if (FIXNUM_P(obj)) {
00619 #if SIZEOF_LONG <= 4
00620 w_byte(TYPE_FIXNUM, arg);
00621 w_long(FIX2INT(obj), arg);
00622 #else
00623 if (RSHIFT((long)obj, 31) == 0 || RSHIFT((long)obj, 31) == -1) {
00624 w_byte(TYPE_FIXNUM, arg);
00625 w_long(FIX2LONG(obj), arg);
00626 }
00627 else {
00628 w_object(rb_int2big(FIX2LONG(obj)), arg, limit);
00629 }
00630 #endif
00631 }
00632 else if (SYMBOL_P(obj)) {
00633 w_symbol(SYM2ID(obj), arg);
00634 }
00635 else {
00636 arg->infection |= (int)FL_TEST(obj, MARSHAL_INFECTION);
00637
00638 if (rb_respond_to(obj, s_mdump)) {
00639 volatile VALUE v;
00640
00641 st_add_direct(arg->data, obj, arg->data->num_entries);
00642
00643 v = rb_funcall(obj, s_mdump, 0, 0);
00644 check_dump_arg(arg, s_mdump);
00645 hasiv = has_ivars(obj, ivtbl);
00646 if (hasiv) w_byte(TYPE_IVAR, arg);
00647 w_class(TYPE_USRMARSHAL, obj, arg, FALSE);
00648 w_object(v, arg, limit);
00649 if (hasiv) w_ivar(obj, ivtbl, &c_arg);
00650 return;
00651 }
00652 if (rb_respond_to(obj, s_dump)) {
00653 VALUE v;
00654 st_table *ivtbl2 = 0;
00655 int hasiv2;
00656
00657 v = rb_funcall(obj, s_dump, 1, INT2NUM(limit));
00658 check_dump_arg(arg, s_dump);
00659 if (TYPE(v) != T_STRING) {
00660 rb_raise(rb_eTypeError, "_dump() must return string");
00661 }
00662 hasiv = has_ivars(obj, ivtbl);
00663 if (hasiv) w_byte(TYPE_IVAR, arg);
00664 if ((hasiv2 = has_ivars(v, ivtbl2)) != 0 && !hasiv) {
00665 w_byte(TYPE_IVAR, arg);
00666 }
00667 w_class(TYPE_USERDEF, obj, arg, FALSE);
00668 w_bytes(RSTRING_PTR(v), RSTRING_LEN(v), arg);
00669 if (hasiv2) {
00670 w_ivar(v, ivtbl2, &c_arg);
00671 }
00672 else if (hasiv) {
00673 w_ivar(obj, ivtbl, &c_arg);
00674 }
00675 st_add_direct(arg->data, obj, arg->data->num_entries);
00676 return;
00677 }
00678
00679 st_add_direct(arg->data, obj, arg->data->num_entries);
00680
00681 hasiv = has_ivars(obj, ivtbl);
00682 {
00683 st_data_t compat_data;
00684 rb_alloc_func_t allocator = rb_get_alloc_func(RBASIC(obj)->klass);
00685 if (st_lookup(compat_allocator_tbl,
00686 (st_data_t)allocator,
00687 &compat_data)) {
00688 marshal_compat_t *compat = (marshal_compat_t*)compat_data;
00689 VALUE real_obj = obj;
00690 obj = compat->dumper(real_obj);
00691 st_insert(arg->compat_tbl, (st_data_t)obj, (st_data_t)real_obj);
00692 if (obj != real_obj && !ivtbl) hasiv = 0;
00693 }
00694 }
00695 if (hasiv) w_byte(TYPE_IVAR, arg);
00696
00697 switch (BUILTIN_TYPE(obj)) {
00698 case T_CLASS:
00699 if (FL_TEST(obj, FL_SINGLETON)) {
00700 rb_raise(rb_eTypeError, "singleton class can't be dumped");
00701 }
00702 w_byte(TYPE_CLASS, arg);
00703 {
00704 volatile VALUE path = class2path(obj);
00705 w_bytes(RSTRING_PTR(path), RSTRING_LEN(path), arg);
00706 }
00707 break;
00708
00709 case T_MODULE:
00710 w_byte(TYPE_MODULE, arg);
00711 {
00712 VALUE path = class2path(obj);
00713 w_bytes(RSTRING_PTR(path), RSTRING_LEN(path), arg);
00714 }
00715 break;
00716
00717 case T_FLOAT:
00718 w_byte(TYPE_FLOAT, arg);
00719 w_float(RFLOAT_VALUE(obj), arg);
00720 break;
00721
00722 case T_BIGNUM:
00723 w_byte(TYPE_BIGNUM, arg);
00724 {
00725 char sign = RBIGNUM_SIGN(obj) ? '+' : '-';
00726 long len = RBIGNUM_LEN(obj);
00727 BDIGIT *d = RBIGNUM_DIGITS(obj);
00728
00729 w_byte(sign, arg);
00730 w_long(SHORTLEN(len), arg);
00731 while (len--) {
00732 #if SIZEOF_BDIGITS > SIZEOF_SHORT
00733 BDIGIT num = *d;
00734 int i;
00735
00736 for (i=0; i<SIZEOF_BDIGITS; i+=SIZEOF_SHORT) {
00737 w_short(num & SHORTMASK, arg);
00738 num = SHORTDN(num);
00739 if (len == 0 && num == 0) break;
00740 }
00741 #else
00742 w_short(*d, arg);
00743 #endif
00744 d++;
00745 }
00746 }
00747 break;
00748
00749 case T_STRING:
00750 w_uclass(obj, rb_cString, arg);
00751 w_byte(TYPE_STRING, arg);
00752 w_bytes(RSTRING_PTR(obj), RSTRING_LEN(obj), arg);
00753 break;
00754
00755 case T_REGEXP:
00756 w_uclass(obj, rb_cRegexp, arg);
00757 w_byte(TYPE_REGEXP, arg);
00758 {
00759 int opts = rb_reg_options(obj);
00760 w_bytes(RREGEXP_SRC_PTR(obj), RREGEXP_SRC_LEN(obj), arg);
00761 w_byte((char)opts, arg);
00762 }
00763 break;
00764
00765 case T_ARRAY:
00766 w_uclass(obj, rb_cArray, arg);
00767 w_byte(TYPE_ARRAY, arg);
00768 {
00769 long i, len = RARRAY_LEN(obj);
00770
00771 w_long(len, arg);
00772 for (i=0; i<RARRAY_LEN(obj); i++) {
00773 w_object(RARRAY_PTR(obj)[i], arg, limit);
00774 if (len != RARRAY_LEN(obj)) {
00775 rb_raise(rb_eRuntimeError, "array modified during dump");
00776 }
00777 }
00778 }
00779 break;
00780
00781 case T_HASH:
00782 w_uclass(obj, rb_cHash, arg);
00783 if (NIL_P(RHASH_IFNONE(obj))) {
00784 w_byte(TYPE_HASH, arg);
00785 }
00786 else if (FL_TEST(obj, FL_USER2)) {
00787
00788 rb_raise(rb_eTypeError, "can't dump hash with default proc");
00789 }
00790 else {
00791 w_byte(TYPE_HASH_DEF, arg);
00792 }
00793 w_long(RHASH_SIZE(obj), arg);
00794 rb_hash_foreach(obj, hash_each, (st_data_t)&c_arg);
00795 if (!NIL_P(RHASH_IFNONE(obj))) {
00796 w_object(RHASH_IFNONE(obj), arg, limit);
00797 }
00798 break;
00799
00800 case T_STRUCT:
00801 w_class(TYPE_STRUCT, obj, arg, TRUE);
00802 {
00803 long len = RSTRUCT_LEN(obj);
00804 VALUE mem;
00805 long i;
00806
00807 w_long(len, arg);
00808 mem = rb_struct_members(obj);
00809 for (i=0; i<len; i++) {
00810 w_symbol(SYM2ID(RARRAY_PTR(mem)[i]), arg);
00811 w_object(RSTRUCT_PTR(obj)[i], arg, limit);
00812 }
00813 }
00814 break;
00815
00816 case T_OBJECT:
00817 w_class(TYPE_OBJECT, obj, arg, TRUE);
00818 w_objivar(obj, &c_arg);
00819 break;
00820
00821 case T_DATA:
00822 {
00823 VALUE v;
00824
00825 if (!rb_respond_to(obj, s_dump_data)) {
00826 rb_raise(rb_eTypeError,
00827 "no _dump_data is defined for class %s",
00828 rb_obj_classname(obj));
00829 }
00830 v = rb_funcall(obj, s_dump_data, 0);
00831 check_dump_arg(arg, s_dump_data);
00832 w_class(TYPE_DATA, obj, arg, TRUE);
00833 w_object(v, arg, limit);
00834 }
00835 break;
00836
00837 default:
00838 rb_raise(rb_eTypeError, "can't dump %s",
00839 rb_obj_classname(obj));
00840 break;
00841 }
00842 }
00843 if (hasiv) {
00844 w_ivar(obj, ivtbl, &c_arg);
00845 }
00846 }
00847
00848 static void
00849 clear_dump_arg(struct dump_arg *arg)
00850 {
00851 if (!arg->symbols) return;
00852 st_free_table(arg->symbols);
00853 arg->symbols = 0;
00854 st_free_table(arg->data);
00855 arg->data = 0;
00856 st_free_table(arg->compat_tbl);
00857 arg->compat_tbl = 0;
00858 if (arg->encodings) {
00859 st_free_table(arg->encodings);
00860 arg->encodings = 0;
00861 }
00862 }
00863
00864
00865
00866
00867
00868
00869
00870
00871
00872
00873
00874
00875
00876
00877
00878
00879
00880
00881
00882
00883
00884
00885
00886
00887
00888
00889
00890
00891
00892
00893
00894
00895
00896
00897
00898 static VALUE
00899 marshal_dump(int argc, VALUE *argv)
00900 {
00901 VALUE obj, port, a1, a2;
00902 int limit = -1;
00903 struct dump_arg *arg;
00904 volatile VALUE wrapper;
00905
00906 port = Qnil;
00907 rb_scan_args(argc, argv, "12", &obj, &a1, &a2);
00908 if (argc == 3) {
00909 if (!NIL_P(a2)) limit = NUM2INT(a2);
00910 if (NIL_P(a1)) goto type_error;
00911 port = a1;
00912 }
00913 else if (argc == 2) {
00914 if (FIXNUM_P(a1)) limit = FIX2INT(a1);
00915 else if (NIL_P(a1)) goto type_error;
00916 else port = a1;
00917 }
00918 wrapper = TypedData_Make_Struct(rb_cData, struct dump_arg, &dump_arg_data, arg);
00919 arg->dest = 0;
00920 arg->symbols = st_init_numtable();
00921 arg->data = st_init_numtable();
00922 arg->infection = 0;
00923 arg->compat_tbl = st_init_numtable();
00924 arg->encodings = 0;
00925 arg->str = rb_str_buf_new(0);
00926 if (!NIL_P(port)) {
00927 if (!rb_respond_to(port, s_write)) {
00928 type_error:
00929 rb_raise(rb_eTypeError, "instance of IO needed");
00930 }
00931 arg->dest = port;
00932 if (rb_respond_to(port, s_binmode)) {
00933 rb_funcall2(port, s_binmode, 0, 0);
00934 check_dump_arg(arg, s_binmode);
00935 }
00936 }
00937 else {
00938 port = arg->str;
00939 }
00940
00941 w_byte(MARSHAL_MAJOR, arg);
00942 w_byte(MARSHAL_MINOR, arg);
00943
00944 w_object(obj, arg, limit);
00945 if (arg->dest) {
00946 rb_io_write(arg->dest, arg->str);
00947 rb_str_resize(arg->str, 0);
00948 }
00949 clear_dump_arg(arg);
00950 RB_GC_GUARD(wrapper);
00951
00952 return port;
00953 }
00954
00955 struct load_arg {
00956 VALUE src;
00957 long offset;
00958 st_table *symbols;
00959 st_table *data;
00960 VALUE proc;
00961 st_table *compat_tbl;
00962 int infection;
00963 };
00964
00965 static void
00966 check_load_arg(struct load_arg *arg, ID sym)
00967 {
00968 if (!arg->symbols) {
00969 rb_raise(rb_eRuntimeError, "Marshal.load reentered at %s",
00970 rb_id2name(sym));
00971 }
00972 }
00973
00974 static void clear_load_arg(struct load_arg *arg);
00975
00976 static void
00977 mark_load_arg(void *ptr)
00978 {
00979 struct load_arg *p = ptr;
00980 if (!p->symbols)
00981 return;
00982 rb_mark_tbl(p->data);
00983 rb_mark_hash(p->compat_tbl);
00984 }
00985
00986 static void
00987 free_load_arg(void *ptr)
00988 {
00989 clear_load_arg(ptr);
00990 xfree(ptr);
00991 }
00992
00993 static size_t
00994 memsize_load_arg(const void *ptr)
00995 {
00996 return ptr ? sizeof(struct load_arg) : 0;
00997 }
00998
00999 static const rb_data_type_t load_arg_data = {
01000 "load_arg",
01001 {mark_load_arg, free_load_arg, memsize_load_arg,},
01002 };
01003
01004 #define r_entry(v, arg) r_entry0((v), (arg)->data->num_entries, (arg))
01005 static VALUE r_entry0(VALUE v, st_index_t num, struct load_arg *arg);
01006 static VALUE r_object(struct load_arg *arg);
01007 static ID r_symbol(struct load_arg *arg);
01008 static VALUE path2class(VALUE path);
01009
01010 static st_index_t
01011 r_prepare(struct load_arg *arg)
01012 {
01013 st_index_t idx = arg->data->num_entries;
01014
01015 st_insert(arg->data, (st_data_t)idx, (st_data_t)Qundef);
01016 return idx;
01017 }
01018
01019 static int
01020 r_byte(struct load_arg *arg)
01021 {
01022 int c;
01023
01024 if (TYPE(arg->src) == T_STRING) {
01025 if (RSTRING_LEN(arg->src) > arg->offset) {
01026 c = (unsigned char)RSTRING_PTR(arg->src)[arg->offset++];
01027 }
01028 else {
01029 rb_raise(rb_eArgError, "marshal data too short");
01030 }
01031 }
01032 else {
01033 VALUE src = arg->src;
01034 VALUE v = rb_funcall2(src, s_getbyte, 0, 0);
01035 check_load_arg(arg, s_getbyte);
01036 if (NIL_P(v)) rb_eof_error();
01037 c = (unsigned char)NUM2CHR(v);
01038 }
01039 return c;
01040 }
01041
01042 static void
01043 long_toobig(int size)
01044 {
01045 rb_raise(rb_eTypeError, "long too big for this architecture (size "
01046 STRINGIZE(SIZEOF_LONG)", given %d)", size);
01047 }
01048
01049 #undef SIGN_EXTEND_CHAR
01050 #if __STDC__
01051 # define SIGN_EXTEND_CHAR(c) ((signed char)(c))
01052 #else
01053
01054 # define SIGN_EXTEND_CHAR(c) ((((unsigned char)(c)) ^ 128) - 128)
01055 #endif
01056
01057 static long
01058 r_long(struct load_arg *arg)
01059 {
01060 register long x;
01061 int c = SIGN_EXTEND_CHAR(r_byte(arg));
01062 long i;
01063
01064 if (c == 0) return 0;
01065 if (c > 0) {
01066 if (4 < c && c < 128) {
01067 return c - 5;
01068 }
01069 if (c > (int)sizeof(long)) long_toobig(c);
01070 x = 0;
01071 for (i=0;i<c;i++) {
01072 x |= (long)r_byte(arg) << (8*i);
01073 }
01074 }
01075 else {
01076 if (-129 < c && c < -4) {
01077 return c + 5;
01078 }
01079 c = -c;
01080 if (c > (int)sizeof(long)) long_toobig(c);
01081 x = -1;
01082 for (i=0;i<c;i++) {
01083 x &= ~((long)0xff << (8*i));
01084 x |= (long)r_byte(arg) << (8*i);
01085 }
01086 }
01087 return x;
01088 }
01089
01090 #define r_bytes(arg) r_bytes0(r_long(arg), (arg))
01091
01092 static VALUE
01093 r_bytes0(long len, struct load_arg *arg)
01094 {
01095 VALUE str;
01096
01097 if (len == 0) return rb_str_new(0, 0);
01098 if (TYPE(arg->src) == T_STRING) {
01099 if (RSTRING_LEN(arg->src) - arg->offset >= len) {
01100 str = rb_str_new(RSTRING_PTR(arg->src)+arg->offset, len);
01101 arg->offset += len;
01102 }
01103 else {
01104 too_short:
01105 rb_raise(rb_eArgError, "marshal data too short");
01106 }
01107 }
01108 else {
01109 VALUE src = arg->src;
01110 VALUE n = LONG2NUM(len);
01111 str = rb_funcall2(src, s_read, 1, &n);
01112 check_load_arg(arg, s_read);
01113 if (NIL_P(str)) goto too_short;
01114 StringValue(str);
01115 if (RSTRING_LEN(str) != len) goto too_short;
01116 arg->infection |= (int)FL_TEST(str, MARSHAL_INFECTION);
01117 }
01118 return str;
01119 }
01120
01121 static int
01122 id2encidx(ID id, VALUE val)
01123 {
01124 if (id == rb_id_encoding()) {
01125 int idx = rb_enc_find_index(StringValueCStr(val));
01126 return idx;
01127 }
01128 else if (id == rb_intern("E")) {
01129 if (val == Qfalse) return rb_usascii_encindex();
01130 else if (val == Qtrue) return rb_utf8_encindex();
01131
01132 }
01133 return -1;
01134 }
01135
01136 static ID
01137 r_symlink(struct load_arg *arg)
01138 {
01139 st_data_t id;
01140 long num = r_long(arg);
01141
01142 if (st_lookup(arg->symbols, num, &id)) {
01143 return (ID)id;
01144 }
01145 rb_raise(rb_eArgError, "bad symbol");
01146 }
01147
01148 static ID
01149 r_symreal(struct load_arg *arg, int ivar)
01150 {
01151 volatile VALUE s = r_bytes(arg);
01152 ID id;
01153 int idx = -1;
01154 st_index_t n = arg->symbols->num_entries;
01155
01156 st_insert(arg->symbols, (st_data_t)n, (st_data_t)0);
01157 if (ivar) {
01158 long num = r_long(arg);
01159 while (num-- > 0) {
01160 id = r_symbol(arg);
01161 idx = id2encidx(id, r_object(arg));
01162 }
01163 }
01164 if (idx < 0) idx = rb_usascii_encindex();
01165 rb_enc_associate_index(s, idx);
01166 id = rb_intern_str(s);
01167 st_insert(arg->symbols, (st_data_t)n, (st_data_t)id);
01168
01169 return id;
01170 }
01171
01172 static ID
01173 r_symbol(struct load_arg *arg)
01174 {
01175 int type, ivar = 0;
01176
01177 again:
01178 switch ((type = r_byte(arg))) {
01179 case TYPE_IVAR:
01180 ivar = 1;
01181 goto again;
01182 case TYPE_SYMBOL:
01183 return r_symreal(arg, ivar);
01184 case TYPE_SYMLINK:
01185 if (ivar) {
01186 rb_raise(rb_eArgError, "dump format error (symlink with encoding)");
01187 }
01188 return r_symlink(arg);
01189 default:
01190 rb_raise(rb_eArgError, "dump format error for symbol(0x%x)", type);
01191 break;
01192 }
01193 }
01194
01195 static VALUE
01196 r_unique(struct load_arg *arg)
01197 {
01198 return rb_id2str(r_symbol(arg));
01199 }
01200
01201 static VALUE
01202 r_string(struct load_arg *arg)
01203 {
01204 return r_bytes(arg);
01205 }
01206
01207 static VALUE
01208 r_entry0(VALUE v, st_index_t num, struct load_arg *arg)
01209 {
01210 st_data_t real_obj = (VALUE)Qundef;
01211 if (st_lookup(arg->compat_tbl, v, &real_obj)) {
01212 st_insert(arg->data, num, (st_data_t)real_obj);
01213 }
01214 else {
01215 st_insert(arg->data, num, (st_data_t)v);
01216 }
01217 if (arg->infection &&
01218 TYPE(v) != T_CLASS && TYPE(v) != T_MODULE) {
01219 FL_SET(v, arg->infection);
01220 if ((VALUE)real_obj != Qundef)
01221 FL_SET((VALUE)real_obj, arg->infection);
01222 }
01223 return v;
01224 }
01225
01226 static VALUE
01227 r_leave(VALUE v, struct load_arg *arg)
01228 {
01229 st_data_t data;
01230 if (st_lookup(arg->compat_tbl, v, &data)) {
01231 VALUE real_obj = (VALUE)data;
01232 rb_alloc_func_t allocator = rb_get_alloc_func(CLASS_OF(real_obj));
01233 st_data_t key = v;
01234 if (st_lookup(compat_allocator_tbl, (st_data_t)allocator, &data)) {
01235 marshal_compat_t *compat = (marshal_compat_t*)data;
01236 compat->loader(real_obj, v);
01237 }
01238 st_delete(arg->compat_tbl, &key, 0);
01239 v = real_obj;
01240 }
01241 if (arg->proc) {
01242 v = rb_funcall(arg->proc, s_call, 1, v);
01243 check_load_arg(arg, s_call);
01244 }
01245 return v;
01246 }
01247
01248 static void
01249 r_ivar(VALUE obj, int *has_encoding, struct load_arg *arg)
01250 {
01251 long len;
01252
01253 len = r_long(arg);
01254 if (len > 0) {
01255 do {
01256 ID id = r_symbol(arg);
01257 VALUE val = r_object(arg);
01258 int idx = id2encidx(id, val);
01259 if (idx >= 0) {
01260 rb_enc_associate_index(obj, idx);
01261 if (has_encoding) *has_encoding = TRUE;
01262 }
01263 else {
01264 rb_ivar_set(obj, id, val);
01265 }
01266 } while (--len > 0);
01267 }
01268 }
01269
01270 static VALUE
01271 path2class(VALUE path)
01272 {
01273 VALUE v = rb_path_to_class(path);
01274
01275 if (TYPE(v) != T_CLASS) {
01276 rb_raise(rb_eArgError, "%.*s does not refer to class",
01277 (int)RSTRING_LEN(path), RSTRING_PTR(path));
01278 }
01279 return v;
01280 }
01281
01282 static VALUE
01283 path2module(VALUE path)
01284 {
01285 VALUE v = rb_path_to_class(path);
01286
01287 if (TYPE(v) != T_MODULE) {
01288 rb_raise(rb_eArgError, "%.*s does not refer to module",
01289 (int)RSTRING_LEN(path), RSTRING_PTR(path));
01290 }
01291 return v;
01292 }
01293
01294 static VALUE
01295 obj_alloc_by_path(VALUE path, struct load_arg *arg)
01296 {
01297 VALUE klass;
01298 st_data_t data;
01299 rb_alloc_func_t allocator;
01300
01301 klass = path2class(path);
01302
01303 allocator = rb_get_alloc_func(klass);
01304 if (st_lookup(compat_allocator_tbl, (st_data_t)allocator, &data)) {
01305 marshal_compat_t *compat = (marshal_compat_t*)data;
01306 VALUE real_obj = rb_obj_alloc(klass);
01307 VALUE obj = rb_obj_alloc(compat->oldclass);
01308 st_insert(arg->compat_tbl, (st_data_t)obj, (st_data_t)real_obj);
01309 return obj;
01310 }
01311
01312 return rb_obj_alloc(klass);
01313 }
01314
01315 static VALUE
01316 r_object0(struct load_arg *arg, int *ivp, VALUE extmod)
01317 {
01318 VALUE v = Qnil;
01319 int type = r_byte(arg);
01320 long id;
01321 st_data_t link;
01322
01323 switch (type) {
01324 case TYPE_LINK:
01325 id = r_long(arg);
01326 if (!st_lookup(arg->data, (st_data_t)id, &link)) {
01327 rb_raise(rb_eArgError, "dump format error (unlinked)");
01328 }
01329 v = (VALUE)link;
01330 if (arg->proc) {
01331 v = rb_funcall(arg->proc, s_call, 1, v);
01332 check_load_arg(arg, s_call);
01333 }
01334 break;
01335
01336 case TYPE_IVAR:
01337 {
01338 int ivar = TRUE;
01339
01340 v = r_object0(arg, &ivar, extmod);
01341 if (ivar) r_ivar(v, NULL, arg);
01342 }
01343 break;
01344
01345 case TYPE_EXTENDED:
01346 {
01347 VALUE m = path2module(r_unique(arg));
01348
01349 if (NIL_P(extmod)) extmod = rb_ary_new2(0);
01350 rb_ary_push(extmod, m);
01351
01352 v = r_object0(arg, 0, extmod);
01353 while (RARRAY_LEN(extmod) > 0) {
01354 m = rb_ary_pop(extmod);
01355 rb_extend_object(v, m);
01356 }
01357 }
01358 break;
01359
01360 case TYPE_UCLASS:
01361 {
01362 VALUE c = path2class(r_unique(arg));
01363
01364 if (FL_TEST(c, FL_SINGLETON)) {
01365 rb_raise(rb_eTypeError, "singleton can't be loaded");
01366 }
01367 v = r_object0(arg, 0, extmod);
01368 if (rb_special_const_p(v) || TYPE(v) == T_OBJECT || TYPE(v) == T_CLASS) {
01369 format_error:
01370 rb_raise(rb_eArgError, "dump format error (user class)");
01371 }
01372 if (TYPE(v) == T_MODULE || !RTEST(rb_class_inherited_p(c, RBASIC(v)->klass))) {
01373 VALUE tmp = rb_obj_alloc(c);
01374
01375 if (TYPE(v) != TYPE(tmp)) goto format_error;
01376 }
01377 RBASIC(v)->klass = c;
01378 }
01379 break;
01380
01381 case TYPE_NIL:
01382 v = Qnil;
01383 v = r_leave(v, arg);
01384 break;
01385
01386 case TYPE_TRUE:
01387 v = Qtrue;
01388 v = r_leave(v, arg);
01389 break;
01390
01391 case TYPE_FALSE:
01392 v = Qfalse;
01393 v = r_leave(v, arg);
01394 break;
01395
01396 case TYPE_FIXNUM:
01397 {
01398 long i = r_long(arg);
01399 v = LONG2FIX(i);
01400 }
01401 v = r_leave(v, arg);
01402 break;
01403
01404 case TYPE_FLOAT:
01405 {
01406 double d;
01407 VALUE str = r_bytes(arg);
01408 const char *ptr = RSTRING_PTR(str);
01409
01410 if (strcmp(ptr, "nan") == 0) {
01411 d = NAN;
01412 }
01413 else if (strcmp(ptr, "inf") == 0) {
01414 d = INFINITY;
01415 }
01416 else if (strcmp(ptr, "-inf") == 0) {
01417 d = -INFINITY;
01418 }
01419 else {
01420 char *e;
01421 d = strtod(ptr, &e);
01422 d = load_mantissa(d, e, RSTRING_LEN(str) - (e - ptr));
01423 }
01424 v = DBL2NUM(d);
01425 v = r_entry(v, arg);
01426 v = r_leave(v, arg);
01427 }
01428 break;
01429
01430 case TYPE_BIGNUM:
01431 {
01432 long len;
01433 BDIGIT *digits;
01434 volatile VALUE data;
01435
01436 NEWOBJ(big, struct RBignum);
01437 OBJSETUP(big, rb_cBignum, T_BIGNUM);
01438 RBIGNUM_SET_SIGN(big, (r_byte(arg) == '+'));
01439 len = r_long(arg);
01440 data = r_bytes0(len * 2, arg);
01441 #if SIZEOF_BDIGITS == SIZEOF_SHORT
01442 rb_big_resize((VALUE)big, len);
01443 #else
01444 rb_big_resize((VALUE)big, (len + 1) * 2 / sizeof(BDIGIT));
01445 #endif
01446 digits = RBIGNUM_DIGITS(big);
01447 MEMCPY(digits, RSTRING_PTR(data), char, len * 2);
01448 #if SIZEOF_BDIGITS > SIZEOF_SHORT
01449 MEMZERO((char *)digits + len * 2, char,
01450 RBIGNUM_LEN(big) * sizeof(BDIGIT) - len * 2);
01451 #endif
01452 len = RBIGNUM_LEN(big);
01453 while (len > 0) {
01454 unsigned char *p = (unsigned char *)digits;
01455 BDIGIT num = 0;
01456 #if SIZEOF_BDIGITS > SIZEOF_SHORT
01457 int shift = 0;
01458 int i;
01459
01460 for (i=0; i<SIZEOF_BDIGITS; i++) {
01461 num |= (int)p[i] << shift;
01462 shift += 8;
01463 }
01464 #else
01465 num = p[0] | (p[1] << 8);
01466 #endif
01467 *digits++ = num;
01468 len--;
01469 }
01470 v = rb_big_norm((VALUE)big);
01471 v = r_entry(v, arg);
01472 v = r_leave(v, arg);
01473 }
01474 break;
01475
01476 case TYPE_STRING:
01477 v = r_entry(r_string(arg), arg);
01478 v = r_leave(v, arg);
01479 break;
01480
01481 case TYPE_REGEXP:
01482 {
01483 volatile VALUE str = r_bytes(arg);
01484 int options = r_byte(arg);
01485 int has_encoding = FALSE;
01486 st_index_t idx = r_prepare(arg);
01487
01488 if (ivp) {
01489 r_ivar(str, &has_encoding, arg);
01490 *ivp = FALSE;
01491 }
01492 if (!has_encoding) {
01493
01494 char *ptr = RSTRING_PTR(str), *dst = ptr, *src = ptr;
01495 long len = RSTRING_LEN(str);
01496 long bs = 0;
01497 for (; len-- > 0; *dst++ = *src++) {
01498 switch (*src) {
01499 case '\\': bs++; break;
01500 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
01501 case 'm': case 'o': case 'p': case 'q': case 'u': case 'y':
01502 case 'E': case 'F': case 'H': case 'I': case 'J': case 'K':
01503 case 'L': case 'N': case 'O': case 'P': case 'Q': case 'R':
01504 case 'S': case 'T': case 'U': case 'V': case 'X': case 'Y':
01505 if (bs & 1) --dst;
01506 default: bs = 0; break;
01507 }
01508 }
01509 rb_str_set_len(str, dst - ptr);
01510 }
01511 v = r_entry0(rb_reg_new_str(str, options), idx, arg);
01512 v = r_leave(v, arg);
01513 }
01514 break;
01515
01516 case TYPE_ARRAY:
01517 {
01518 volatile long len = r_long(arg);
01519
01520 v = rb_ary_new2(len);
01521 v = r_entry(v, arg);
01522 while (len--) {
01523 rb_ary_push(v, r_object(arg));
01524 }
01525 v = r_leave(v, arg);
01526 }
01527 break;
01528
01529 case TYPE_HASH:
01530 case TYPE_HASH_DEF:
01531 {
01532 long len = r_long(arg);
01533
01534 v = rb_hash_new();
01535 v = r_entry(v, arg);
01536 while (len--) {
01537 VALUE key = r_object(arg);
01538 VALUE value = r_object(arg);
01539 rb_hash_aset(v, key, value);
01540 }
01541 if (type == TYPE_HASH_DEF) {
01542 RHASH_IFNONE(v) = r_object(arg);
01543 }
01544 v = r_leave(v, arg);
01545 }
01546 break;
01547
01548 case TYPE_STRUCT:
01549 {
01550 VALUE mem, values;
01551 volatile long i;
01552 ID slot;
01553 st_index_t idx = r_prepare(arg);
01554 VALUE klass = path2class(r_unique(arg));
01555 long len = r_long(arg);
01556
01557 v = rb_obj_alloc(klass);
01558 if (TYPE(v) != T_STRUCT) {
01559 rb_raise(rb_eTypeError, "class %s not a struct", rb_class2name(klass));
01560 }
01561 mem = rb_struct_s_members(klass);
01562 if (RARRAY_LEN(mem) != len) {
01563 rb_raise(rb_eTypeError, "struct %s not compatible (struct size differs)",
01564 rb_class2name(klass));
01565 }
01566
01567 v = r_entry0(v, idx, arg);
01568 values = rb_ary_new2(len);
01569 for (i=0; i<len; i++) {
01570 slot = r_symbol(arg);
01571
01572 if (RARRAY_PTR(mem)[i] != ID2SYM(slot)) {
01573 rb_raise(rb_eTypeError, "struct %s not compatible (:%s for :%s)",
01574 rb_class2name(klass),
01575 rb_id2name(slot),
01576 rb_id2name(SYM2ID(RARRAY_PTR(mem)[i])));
01577 }
01578 rb_ary_push(values, r_object(arg));
01579 }
01580 rb_struct_initialize(v, values);
01581 v = r_leave(v, arg);
01582 }
01583 break;
01584
01585 case TYPE_USERDEF:
01586 {
01587 VALUE klass = path2class(r_unique(arg));
01588 VALUE data;
01589
01590 if (!rb_respond_to(klass, s_load)) {
01591 rb_raise(rb_eTypeError, "class %s needs to have method `_load'",
01592 rb_class2name(klass));
01593 }
01594 data = r_string(arg);
01595 if (ivp) {
01596 r_ivar(data, NULL, arg);
01597 *ivp = FALSE;
01598 }
01599 v = rb_funcall(klass, s_load, 1, data);
01600 check_load_arg(arg, s_load);
01601 v = r_entry(v, arg);
01602 v = r_leave(v, arg);
01603 }
01604 break;
01605
01606 case TYPE_USRMARSHAL:
01607 {
01608 VALUE klass = path2class(r_unique(arg));
01609 VALUE data;
01610
01611 v = rb_obj_alloc(klass);
01612 if (!NIL_P(extmod)) {
01613 while (RARRAY_LEN(extmod) > 0) {
01614 VALUE m = rb_ary_pop(extmod);
01615 rb_extend_object(v, m);
01616 }
01617 }
01618 if (!rb_respond_to(v, s_mload)) {
01619 rb_raise(rb_eTypeError, "instance of %s needs to have method `marshal_load'",
01620 rb_class2name(klass));
01621 }
01622 v = r_entry(v, arg);
01623 data = r_object(arg);
01624 rb_funcall(v, s_mload, 1, data);
01625 check_load_arg(arg, s_mload);
01626 v = r_leave(v, arg);
01627 }
01628 break;
01629
01630 case TYPE_OBJECT:
01631 {
01632 st_index_t idx = r_prepare(arg);
01633 v = obj_alloc_by_path(r_unique(arg), arg);
01634 if (TYPE(v) != T_OBJECT) {
01635 rb_raise(rb_eArgError, "dump format error");
01636 }
01637 v = r_entry0(v, idx, arg);
01638 r_ivar(v, NULL, arg);
01639 v = r_leave(v, arg);
01640 }
01641 break;
01642
01643 case TYPE_DATA:
01644 {
01645 VALUE klass = path2class(r_unique(arg));
01646 if (rb_respond_to(klass, s_alloc)) {
01647 static int warn = TRUE;
01648 if (warn) {
01649 rb_warn("define `allocate' instead of `_alloc'");
01650 warn = FALSE;
01651 }
01652 v = rb_funcall(klass, s_alloc, 0);
01653 check_load_arg(arg, s_alloc);
01654 }
01655 else {
01656 v = rb_obj_alloc(klass);
01657 }
01658 if (TYPE(v) != T_DATA) {
01659 rb_raise(rb_eArgError, "dump format error");
01660 }
01661 v = r_entry(v, arg);
01662 if (!rb_respond_to(v, s_load_data)) {
01663 rb_raise(rb_eTypeError,
01664 "class %s needs to have instance method `_load_data'",
01665 rb_class2name(klass));
01666 }
01667 rb_funcall(v, s_load_data, 1, r_object0(arg, 0, extmod));
01668 check_load_arg(arg, s_load_data);
01669 v = r_leave(v, arg);
01670 }
01671 break;
01672
01673 case TYPE_MODULE_OLD:
01674 {
01675 volatile VALUE str = r_bytes(arg);
01676
01677 v = rb_path_to_class(str);
01678 v = r_entry(v, arg);
01679 v = r_leave(v, arg);
01680 }
01681 break;
01682
01683 case TYPE_CLASS:
01684 {
01685 volatile VALUE str = r_bytes(arg);
01686
01687 v = path2class(str);
01688 v = r_entry(v, arg);
01689 v = r_leave(v, arg);
01690 }
01691 break;
01692
01693 case TYPE_MODULE:
01694 {
01695 volatile VALUE str = r_bytes(arg);
01696
01697 v = path2module(str);
01698 v = r_entry(v, arg);
01699 v = r_leave(v, arg);
01700 }
01701 break;
01702
01703 case TYPE_SYMBOL:
01704 if (ivp) {
01705 v = ID2SYM(r_symreal(arg, *ivp));
01706 *ivp = FALSE;
01707 }
01708 else {
01709 v = ID2SYM(r_symreal(arg, 0));
01710 }
01711 v = r_leave(v, arg);
01712 break;
01713
01714 case TYPE_SYMLINK:
01715 v = ID2SYM(r_symlink(arg));
01716 break;
01717
01718 default:
01719 rb_raise(rb_eArgError, "dump format error(0x%x)", type);
01720 break;
01721 }
01722 return v;
01723 }
01724
01725 static VALUE
01726 r_object(struct load_arg *arg)
01727 {
01728 return r_object0(arg, 0, Qnil);
01729 }
01730
01731 static void
01732 clear_load_arg(struct load_arg *arg)
01733 {
01734 if (!arg->symbols) return;
01735 st_free_table(arg->symbols);
01736 arg->symbols = 0;
01737 st_free_table(arg->data);
01738 arg->data = 0;
01739 st_free_table(arg->compat_tbl);
01740 arg->compat_tbl = 0;
01741 }
01742
01743
01744
01745
01746
01747
01748
01749
01750
01751
01752
01753
01754 static VALUE
01755 marshal_load(int argc, VALUE *argv)
01756 {
01757 VALUE port, proc;
01758 int major, minor, infection = 0;
01759 VALUE v;
01760 volatile VALUE wrapper;
01761 struct load_arg *arg;
01762
01763 rb_scan_args(argc, argv, "11", &port, &proc);
01764 v = rb_check_string_type(port);
01765 if (!NIL_P(v)) {
01766 infection = (int)FL_TEST(port, MARSHAL_INFECTION);
01767 port = v;
01768 }
01769 else if (rb_respond_to(port, s_getbyte) && rb_respond_to(port, s_read)) {
01770 if (rb_respond_to(port, s_binmode)) {
01771 rb_funcall2(port, s_binmode, 0, 0);
01772 }
01773 infection = (int)(FL_TAINT | FL_TEST(port, FL_UNTRUSTED));
01774 }
01775 else {
01776 rb_raise(rb_eTypeError, "instance of IO needed");
01777 }
01778 wrapper = TypedData_Make_Struct(rb_cData, struct load_arg, &load_arg_data, arg);
01779 arg->infection = infection;
01780 arg->src = port;
01781 arg->offset = 0;
01782 arg->symbols = st_init_numtable();
01783 arg->data = st_init_numtable();
01784 arg->compat_tbl = st_init_numtable();
01785 arg->proc = 0;
01786
01787 major = r_byte(arg);
01788 minor = r_byte(arg);
01789 if (major != MARSHAL_MAJOR || minor > MARSHAL_MINOR) {
01790 clear_load_arg(arg);
01791 rb_raise(rb_eTypeError, "incompatible marshal file format (can't be read)\n\
01792 \tformat version %d.%d required; %d.%d given",
01793 MARSHAL_MAJOR, MARSHAL_MINOR, major, minor);
01794 }
01795 if (RTEST(ruby_verbose) && minor != MARSHAL_MINOR) {
01796 rb_warn("incompatible marshal file format (can be read)\n\
01797 \tformat version %d.%d required; %d.%d given",
01798 MARSHAL_MAJOR, MARSHAL_MINOR, major, minor);
01799 }
01800
01801 if (!NIL_P(proc)) arg->proc = proc;
01802 v = r_object(arg);
01803 clear_load_arg(arg);
01804 RB_GC_GUARD(wrapper);
01805
01806 return v;
01807 }
01808
01809
01810
01811
01812
01813
01814
01815
01816
01817
01818
01819
01820
01821
01822
01823
01824
01825
01826
01827
01828
01829
01830
01831
01832
01833
01834
01835
01836
01837
01838
01839
01840
01841
01842
01843
01844
01845
01846
01847
01848
01849
01850
01851
01852
01853
01854
01855
01856
01857
01858
01859
01860
01861
01862
01863
01864
01865
01866
01867
01868
01869
01870
01871
01872
01873
01874
01875
01876
01877
01878
01879
01880
01881
01882
01883
01884
01885
01886
01887
01888
01889
01890
01891
01892
01893
01894
01895
01896
01897
01898
01899
01900
01901
01902
01903
01904
01905 void
01906 Init_marshal(void)
01907 {
01908 #undef rb_intern
01909 #define rb_intern(str) rb_intern_const(str)
01910
01911 VALUE rb_mMarshal = rb_define_module("Marshal");
01912
01913 s_dump = rb_intern("_dump");
01914 s_load = rb_intern("_load");
01915 s_mdump = rb_intern("marshal_dump");
01916 s_mload = rb_intern("marshal_load");
01917 s_dump_data = rb_intern("_dump_data");
01918 s_load_data = rb_intern("_load_data");
01919 s_alloc = rb_intern("_alloc");
01920 s_call = rb_intern("call");
01921 s_getbyte = rb_intern("getbyte");
01922 s_read = rb_intern("read");
01923 s_write = rb_intern("write");
01924 s_binmode = rb_intern("binmode");
01925
01926 rb_define_module_function(rb_mMarshal, "dump", marshal_dump, -1);
01927 rb_define_module_function(rb_mMarshal, "load", marshal_load, -1);
01928 rb_define_module_function(rb_mMarshal, "restore", marshal_load, -1);
01929
01930 rb_define_const(rb_mMarshal, "MAJOR_VERSION", INT2FIX(MARSHAL_MAJOR));
01931 rb_define_const(rb_mMarshal, "MINOR_VERSION", INT2FIX(MARSHAL_MINOR));
01932
01933 compat_allocator_tbl = st_init_numtable();
01934 compat_allocator_tbl_wrapper =
01935 Data_Wrap_Struct(rb_cData, mark_marshal_compat_t, 0, compat_allocator_tbl);
01936 rb_gc_register_mark_object(compat_allocator_tbl_wrapper);
01937 }
01938
01939 VALUE
01940 rb_marshal_dump(VALUE obj, VALUE port)
01941 {
01942 int argc = 1;
01943 VALUE argv[2];
01944
01945 argv[0] = obj;
01946 argv[1] = port;
01947 if (!NIL_P(port)) argc = 2;
01948 return marshal_dump(argc, argv);
01949 }
01950
01951 VALUE
01952 rb_marshal_load(VALUE port)
01953 {
01954 return marshal_load(1, &port);
01955 }
01956