00001 #include <psych.h>
00002
00003 VALUE cPsychParser;
00004 VALUE ePsychSyntaxError;
00005
00006 static ID id_read;
00007 static ID id_path;
00008 static ID id_empty;
00009 static ID id_start_stream;
00010 static ID id_end_stream;
00011 static ID id_start_document;
00012 static ID id_end_document;
00013 static ID id_alias;
00014 static ID id_scalar;
00015 static ID id_start_sequence;
00016 static ID id_end_sequence;
00017 static ID id_start_mapping;
00018 static ID id_end_mapping;
00019
00020 #define PSYCH_TRANSCODE(_str, _yaml_enc, _internal_enc) \
00021 do { \
00022 rb_enc_associate_index((_str), (_yaml_enc)); \
00023 if(_internal_enc) \
00024 (_str) = rb_str_export_to_enc((_str), (_internal_enc)); \
00025 } while (0)
00026
00027 static int io_reader(void * data, unsigned char *buf, size_t size, size_t *read)
00028 {
00029 VALUE io = (VALUE)data;
00030 VALUE string = rb_funcall(io, id_read, 1, INT2NUM(size));
00031
00032 *read = 0;
00033
00034 if(! NIL_P(string)) {
00035 void * str = (void *)StringValuePtr(string);
00036 *read = (size_t)RSTRING_LEN(string);
00037 memcpy(buf, str, *read);
00038 }
00039
00040 return 1;
00041 }
00042
00043 static void dealloc(void * ptr)
00044 {
00045 yaml_parser_t * parser;
00046
00047 parser = (yaml_parser_t *)ptr;
00048 yaml_parser_delete(parser);
00049 xfree(parser);
00050 }
00051
00052 static VALUE allocate(VALUE klass)
00053 {
00054 yaml_parser_t * parser;
00055
00056 parser = xmalloc(sizeof(yaml_parser_t));
00057 yaml_parser_initialize(parser);
00058
00059 return Data_Wrap_Struct(klass, 0, dealloc, parser);
00060 }
00061
00062 static VALUE make_exception(yaml_parser_t * parser, VALUE path)
00063 {
00064 size_t line, column;
00065
00066 line = parser->context_mark.line + 1;
00067 column = parser->context_mark.column + 1;
00068
00069 return rb_funcall(ePsychSyntaxError, rb_intern("new"), 6,
00070 path,
00071 INT2NUM(line),
00072 INT2NUM(column),
00073 INT2NUM(parser->problem_offset),
00074 parser->problem ? rb_usascii_str_new2(parser->problem) : Qnil,
00075 parser->context ? rb_usascii_str_new2(parser->context) : Qnil);
00076 }
00077
00078 #ifdef HAVE_RUBY_ENCODING_H
00079 static VALUE transcode_string(VALUE src, int * parser_encoding)
00080 {
00081 int utf8 = rb_utf8_encindex();
00082 int utf16le = rb_enc_find_index("UTF-16LE");
00083 int utf16be = rb_enc_find_index("UTF-16BE");
00084 int source_encoding = rb_enc_get_index(src);
00085
00086 if (source_encoding == utf8) {
00087 *parser_encoding = YAML_UTF8_ENCODING;
00088 return src;
00089 }
00090
00091 if (source_encoding == utf16le) {
00092 *parser_encoding = YAML_UTF16LE_ENCODING;
00093 return src;
00094 }
00095
00096 if (source_encoding == utf16be) {
00097 *parser_encoding = YAML_UTF16BE_ENCODING;
00098 return src;
00099 }
00100
00101 src = rb_str_export_to_enc(src, rb_utf8_encoding());
00102 RB_GC_GUARD(src);
00103
00104 *parser_encoding = YAML_UTF8_ENCODING;
00105 return src;
00106 }
00107
00108 static VALUE transcode_io(VALUE src, int * parser_encoding)
00109 {
00110 VALUE io_external_encoding;
00111 int io_external_enc_index;
00112
00113 io_external_encoding = rb_funcall(src, rb_intern("external_encoding"), 0);
00114
00115
00116 if (NIL_P(io_external_encoding)) {
00117 io_external_enc_index = rb_ascii8bit_encindex();
00118 } else {
00119 io_external_enc_index = rb_to_encoding_index(io_external_encoding);
00120 }
00121
00122
00123 if (io_external_enc_index == rb_usascii_encindex()) {
00124 *parser_encoding = YAML_UTF8_ENCODING;
00125 return src;
00126 }
00127
00128 if (io_external_enc_index == rb_utf8_encindex()) {
00129 *parser_encoding = YAML_UTF8_ENCODING;
00130 return src;
00131 }
00132
00133 if (io_external_enc_index == rb_enc_find_index("UTF-16LE")) {
00134 *parser_encoding = YAML_UTF16LE_ENCODING;
00135 return src;
00136 }
00137
00138 if (io_external_enc_index == rb_enc_find_index("UTF-16BE")) {
00139 *parser_encoding = YAML_UTF16BE_ENCODING;
00140 return src;
00141 }
00142
00143
00144 if (io_external_enc_index == rb_ascii8bit_encindex()) {
00145 *parser_encoding = YAML_ANY_ENCODING;
00146 return src;
00147 }
00148
00149
00150
00151 *parser_encoding = YAML_ANY_ENCODING;
00152
00153 return src;
00154 }
00155
00156 #endif
00157
00158 static VALUE protected_start_stream(VALUE pointer)
00159 {
00160 VALUE *args = (VALUE *)pointer;
00161 return rb_funcall(args[0], id_start_stream, 1, args[1]);
00162 }
00163
00164 static VALUE protected_start_document(VALUE pointer)
00165 {
00166 VALUE *args = (VALUE *)pointer;
00167 return rb_funcall3(args[0], id_start_document, 3, args + 1);
00168 }
00169
00170 static VALUE protected_end_document(VALUE pointer)
00171 {
00172 VALUE *args = (VALUE *)pointer;
00173 return rb_funcall(args[0], id_end_document, 1, args[1]);
00174 }
00175
00176 static VALUE protected_alias(VALUE pointer)
00177 {
00178 VALUE *args = (VALUE *)pointer;
00179 return rb_funcall(args[0], id_alias, 1, args[1]);
00180 }
00181
00182 static VALUE protected_scalar(VALUE pointer)
00183 {
00184 VALUE *args = (VALUE *)pointer;
00185 return rb_funcall3(args[0], id_scalar, 6, args + 1);
00186 }
00187
00188 static VALUE protected_start_sequence(VALUE pointer)
00189 {
00190 VALUE *args = (VALUE *)pointer;
00191 return rb_funcall3(args[0], id_start_sequence, 4, args + 1);
00192 }
00193
00194 static VALUE protected_end_sequence(VALUE handler)
00195 {
00196 return rb_funcall(handler, id_end_sequence, 0);
00197 }
00198
00199 static VALUE protected_start_mapping(VALUE pointer)
00200 {
00201 VALUE *args = (VALUE *)pointer;
00202 return rb_funcall3(args[0], id_start_mapping, 4, args + 1);
00203 }
00204
00205 static VALUE protected_end_mapping(VALUE handler)
00206 {
00207 return rb_funcall(handler, id_end_mapping, 0);
00208 }
00209
00210 static VALUE protected_empty(VALUE handler)
00211 {
00212 return rb_funcall(handler, id_empty, 0);
00213 }
00214
00215 static VALUE protected_end_stream(VALUE handler)
00216 {
00217 return rb_funcall(handler, id_end_stream, 0);
00218 }
00219
00220
00221
00222
00223
00224
00225
00226
00227
00228
00229 static VALUE parse(int argc, VALUE *argv, VALUE self)
00230 {
00231 VALUE yaml, path;
00232 yaml_parser_t * parser;
00233 yaml_event_t event;
00234 int done = 0;
00235 int tainted = 0;
00236 int state = 0;
00237 int parser_encoding = YAML_ANY_ENCODING;
00238 #ifdef HAVE_RUBY_ENCODING_H
00239 int encoding = rb_utf8_encindex();
00240 rb_encoding * internal_enc = rb_default_internal_encoding();
00241 #endif
00242 VALUE handler = rb_iv_get(self, "@handler");
00243
00244 if (rb_scan_args(argc, argv, "11", &yaml, &path) == 1) {
00245 if(rb_respond_to(yaml, id_path))
00246 path = rb_funcall(yaml, id_path, 0);
00247 else
00248 path = rb_str_new2("<unknown>");
00249 }
00250
00251 Data_Get_Struct(self, yaml_parser_t, parser);
00252
00253 yaml_parser_delete(parser);
00254 yaml_parser_initialize(parser);
00255
00256 if (OBJ_TAINTED(yaml)) tainted = 1;
00257
00258 if (rb_respond_to(yaml, id_read)) {
00259 #ifdef HAVE_RUBY_ENCODING_H
00260 yaml = transcode_io(yaml, &parser_encoding);
00261 yaml_parser_set_encoding(parser, parser_encoding);
00262 #endif
00263 yaml_parser_set_input(parser, io_reader, (void *)yaml);
00264 if (RTEST(rb_obj_is_kind_of(yaml, rb_cIO))) tainted = 1;
00265 } else {
00266 StringValue(yaml);
00267 #ifdef HAVE_RUBY_ENCODING_H
00268 yaml = transcode_string(yaml, &parser_encoding);
00269 yaml_parser_set_encoding(parser, parser_encoding);
00270 #endif
00271 yaml_parser_set_input_string(
00272 parser,
00273 (const unsigned char *)RSTRING_PTR(yaml),
00274 (size_t)RSTRING_LEN(yaml)
00275 );
00276 }
00277
00278 while(!done) {
00279 if(!yaml_parser_parse(parser, &event)) {
00280 VALUE exception;
00281
00282 exception = make_exception(parser, path);
00283 yaml_parser_delete(parser);
00284 yaml_parser_initialize(parser);
00285
00286 rb_exc_raise(exception);
00287 }
00288
00289 switch(event.type) {
00290 case YAML_STREAM_START_EVENT:
00291 {
00292 VALUE args[2];
00293
00294 args[0] = handler;
00295 args[1] = INT2NUM((long)event.data.stream_start.encoding);
00296 rb_protect(protected_start_stream, (VALUE)args, &state);
00297 }
00298 break;
00299 case YAML_DOCUMENT_START_EVENT:
00300 {
00301 VALUE args[4];
00302
00303 VALUE tag_directives = rb_ary_new();
00304
00305 VALUE version = event.data.document_start.version_directive ?
00306 rb_ary_new3(
00307 (long)2,
00308 INT2NUM((long)event.data.document_start.version_directive->major),
00309 INT2NUM((long)event.data.document_start.version_directive->minor)
00310 ) : rb_ary_new();
00311
00312 if(event.data.document_start.tag_directives.start) {
00313 yaml_tag_directive_t *start =
00314 event.data.document_start.tag_directives.start;
00315 yaml_tag_directive_t *end =
00316 event.data.document_start.tag_directives.end;
00317 for(; start != end; start++) {
00318 VALUE handle = Qnil;
00319 VALUE prefix = Qnil;
00320 if(start->handle) {
00321 handle = rb_str_new2((const char *)start->handle);
00322 if (tainted) OBJ_TAINT(handle);
00323 #ifdef HAVE_RUBY_ENCODING_H
00324 PSYCH_TRANSCODE(handle, encoding, internal_enc);
00325 #endif
00326 }
00327
00328 if(start->prefix) {
00329 prefix = rb_str_new2((const char *)start->prefix);
00330 if (tainted) OBJ_TAINT(prefix);
00331 #ifdef HAVE_RUBY_ENCODING_H
00332 PSYCH_TRANSCODE(prefix, encoding, internal_enc);
00333 #endif
00334 }
00335
00336 rb_ary_push(tag_directives, rb_ary_new3((long)2, handle, prefix));
00337 }
00338 }
00339 args[0] = handler;
00340 args[1] = version;
00341 args[2] = tag_directives;
00342 args[3] = event.data.document_start.implicit == 1 ? Qtrue : Qfalse;
00343 rb_protect(protected_start_document, (VALUE)args, &state);
00344 }
00345 break;
00346 case YAML_DOCUMENT_END_EVENT:
00347 {
00348 VALUE args[2];
00349
00350 args[0] = handler;
00351 args[1] = event.data.document_end.implicit == 1 ? Qtrue : Qfalse;
00352 rb_protect(protected_end_document, (VALUE)args, &state);
00353 }
00354 break;
00355 case YAML_ALIAS_EVENT:
00356 {
00357 VALUE args[2];
00358 VALUE alias = Qnil;
00359 if(event.data.alias.anchor) {
00360 alias = rb_str_new2((const char *)event.data.alias.anchor);
00361 if (tainted) OBJ_TAINT(alias);
00362 #ifdef HAVE_RUBY_ENCODING_H
00363 PSYCH_TRANSCODE(alias, encoding, internal_enc);
00364 #endif
00365 }
00366
00367 args[0] = handler;
00368 args[1] = alias;
00369 rb_protect(protected_alias, (VALUE)args, &state);
00370 }
00371 break;
00372 case YAML_SCALAR_EVENT:
00373 {
00374 VALUE args[7];
00375 VALUE anchor = Qnil;
00376 VALUE tag = Qnil;
00377 VALUE plain_implicit, quoted_implicit, style;
00378 VALUE val = rb_str_new(
00379 (const char *)event.data.scalar.value,
00380 (long)event.data.scalar.length
00381 );
00382 if (tainted) OBJ_TAINT(val);
00383
00384 #ifdef HAVE_RUBY_ENCODING_H
00385 PSYCH_TRANSCODE(val, encoding, internal_enc);
00386 #endif
00387
00388 if(event.data.scalar.anchor) {
00389 anchor = rb_str_new2((const char *)event.data.scalar.anchor);
00390 if (tainted) OBJ_TAINT(anchor);
00391 #ifdef HAVE_RUBY_ENCODING_H
00392 PSYCH_TRANSCODE(anchor, encoding, internal_enc);
00393 #endif
00394 }
00395
00396 if(event.data.scalar.tag) {
00397 tag = rb_str_new2((const char *)event.data.scalar.tag);
00398 if (tainted) OBJ_TAINT(tag);
00399 #ifdef HAVE_RUBY_ENCODING_H
00400 PSYCH_TRANSCODE(tag, encoding, internal_enc);
00401 #endif
00402 }
00403
00404 plain_implicit =
00405 event.data.scalar.plain_implicit == 0 ? Qfalse : Qtrue;
00406
00407 quoted_implicit =
00408 event.data.scalar.quoted_implicit == 0 ? Qfalse : Qtrue;
00409
00410 style = INT2NUM((long)event.data.scalar.style);
00411
00412 args[0] = handler;
00413 args[1] = val;
00414 args[2] = anchor;
00415 args[3] = tag;
00416 args[4] = plain_implicit;
00417 args[5] = quoted_implicit;
00418 args[6] = style;
00419 rb_protect(protected_scalar, (VALUE)args, &state);
00420 }
00421 break;
00422 case YAML_SEQUENCE_START_EVENT:
00423 {
00424 VALUE args[5];
00425 VALUE anchor = Qnil;
00426 VALUE tag = Qnil;
00427 VALUE implicit, style;
00428 if(event.data.sequence_start.anchor) {
00429 anchor = rb_str_new2((const char *)event.data.sequence_start.anchor);
00430 if (tainted) OBJ_TAINT(anchor);
00431 #ifdef HAVE_RUBY_ENCODING_H
00432 PSYCH_TRANSCODE(anchor, encoding, internal_enc);
00433 #endif
00434 }
00435
00436 tag = Qnil;
00437 if(event.data.sequence_start.tag) {
00438 tag = rb_str_new2((const char *)event.data.sequence_start.tag);
00439 if (tainted) OBJ_TAINT(tag);
00440 #ifdef HAVE_RUBY_ENCODING_H
00441 PSYCH_TRANSCODE(tag, encoding, internal_enc);
00442 #endif
00443 }
00444
00445 implicit =
00446 event.data.sequence_start.implicit == 0 ? Qfalse : Qtrue;
00447
00448 style = INT2NUM((long)event.data.sequence_start.style);
00449
00450 args[0] = handler;
00451 args[1] = anchor;
00452 args[2] = tag;
00453 args[3] = implicit;
00454 args[4] = style;
00455
00456 rb_protect(protected_start_sequence, (VALUE)args, &state);
00457 }
00458 break;
00459 case YAML_SEQUENCE_END_EVENT:
00460 rb_protect(protected_end_sequence, handler, &state);
00461 break;
00462 case YAML_MAPPING_START_EVENT:
00463 {
00464 VALUE args[5];
00465 VALUE anchor = Qnil;
00466 VALUE tag = Qnil;
00467 VALUE implicit, style;
00468 if(event.data.mapping_start.anchor) {
00469 anchor = rb_str_new2((const char *)event.data.mapping_start.anchor);
00470 if (tainted) OBJ_TAINT(anchor);
00471 #ifdef HAVE_RUBY_ENCODING_H
00472 PSYCH_TRANSCODE(anchor, encoding, internal_enc);
00473 #endif
00474 }
00475
00476 if(event.data.mapping_start.tag) {
00477 tag = rb_str_new2((const char *)event.data.mapping_start.tag);
00478 if (tainted) OBJ_TAINT(tag);
00479 #ifdef HAVE_RUBY_ENCODING_H
00480 PSYCH_TRANSCODE(tag, encoding, internal_enc);
00481 #endif
00482 }
00483
00484 implicit =
00485 event.data.mapping_start.implicit == 0 ? Qfalse : Qtrue;
00486
00487 style = INT2NUM((long)event.data.mapping_start.style);
00488
00489 args[0] = handler;
00490 args[1] = anchor;
00491 args[2] = tag;
00492 args[3] = implicit;
00493 args[4] = style;
00494
00495 rb_protect(protected_start_mapping, (VALUE)args, &state);
00496 }
00497 break;
00498 case YAML_MAPPING_END_EVENT:
00499 rb_protect(protected_end_mapping, handler, &state);
00500 break;
00501 case YAML_NO_EVENT:
00502 rb_protect(protected_empty, handler, &state);
00503 break;
00504 case YAML_STREAM_END_EVENT:
00505 rb_protect(protected_end_stream, handler, &state);
00506 done = 1;
00507 break;
00508 }
00509 yaml_event_delete(&event);
00510 if (state) rb_jump_tag(state);
00511 }
00512
00513 return self;
00514 }
00515
00516
00517
00518
00519
00520
00521
00522
00523 static VALUE mark(VALUE self)
00524 {
00525 VALUE mark_klass;
00526 VALUE args[3];
00527 yaml_parser_t * parser;
00528
00529 Data_Get_Struct(self, yaml_parser_t, parser);
00530 mark_klass = rb_const_get_at(cPsychParser, rb_intern("Mark"));
00531 args[0] = INT2NUM(parser->mark.index);
00532 args[1] = INT2NUM(parser->mark.line);
00533 args[2] = INT2NUM(parser->mark.column);
00534
00535 return rb_class_new_instance(3, args, mark_klass);
00536 }
00537
00538 void Init_psych_parser()
00539 {
00540 #if 0
00541 mPsych = rb_define_module("Psych");
00542 #endif
00543
00544 cPsychParser = rb_define_class_under(mPsych, "Parser", rb_cObject);
00545 rb_define_alloc_func(cPsychParser, allocate);
00546
00547
00548 rb_define_const(cPsychParser, "ANY", INT2NUM(YAML_ANY_ENCODING));
00549
00550
00551 rb_define_const(cPsychParser, "UTF8", INT2NUM(YAML_UTF8_ENCODING));
00552
00553
00554 rb_define_const(cPsychParser, "UTF16LE", INT2NUM(YAML_UTF16LE_ENCODING));
00555
00556
00557 rb_define_const(cPsychParser, "UTF16BE", INT2NUM(YAML_UTF16BE_ENCODING));
00558
00559 rb_require("psych/syntax_error");
00560 ePsychSyntaxError = rb_define_class_under(mPsych, "SyntaxError", rb_eSyntaxError);
00561
00562 rb_define_method(cPsychParser, "parse", parse, -1);
00563 rb_define_method(cPsychParser, "mark", mark, 0);
00564
00565 id_read = rb_intern("read");
00566 id_path = rb_intern("path");
00567 id_empty = rb_intern("empty");
00568 id_start_stream = rb_intern("start_stream");
00569 id_end_stream = rb_intern("end_stream");
00570 id_start_document = rb_intern("start_document");
00571 id_end_document = rb_intern("end_document");
00572 id_alias = rb_intern("alias");
00573 id_scalar = rb_intern("scalar");
00574 id_start_sequence = rb_intern("start_sequence");
00575 id_end_sequence = rb_intern("end_sequence");
00576 id_start_mapping = rb_intern("start_mapping");
00577 id_end_mapping = rb_intern("end_mapping");
00578 }
00579
00580