00001
00002
00003
00004
00005 #include "transcode_data.h"
00006
00007
00008
00009 static const unsigned char
00010 utf_16_32_byte_array[1288] = {
00011 #define from_UTF_16LE_00toFF_D8toDB_00toFF_offsets 0
00012 220, 223,
00013 1, 1, 1, 1,
00014
00015 #define from_UTF_16LE_00toFF_D8toDB_offsets 6
00016 0, 255,
00017 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00018 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00019 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00020 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00021 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00022 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00023 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00024 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00025 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00026 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00027 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00028 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00029 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00030 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00031 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00032 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00033
00034 #define from_UTF_16LE_00toFF_offsets 264
00035 0, 255,
00036 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00037 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00038 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00039 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00040 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00041 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00042 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00043 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00044 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00045 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00046 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00047 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00048 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00049 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2,
00050 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00051 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00052
00053 #define from_UTF_32LE_00toFF_00toD7_00_offsets 522
00054 0, 0,
00055 0,
00056
00057 #define from_UTF_32LE_00toFF_00toD7_offsets 525
00058 0, 16,
00059 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00060 0,
00061
00062 #define from_UTF_32LE_00toFF_D8toDF_offsets 544
00063 1, 16,
00064 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00065
00066 #define from_UTF_32LE_00toFF_offsets 562
00067 0, 255,
00068 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00069 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00070 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00071 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00072 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00073 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00074 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00075 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00076 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00077 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00078 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00079 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00080 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00081 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
00082 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00083 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00084
00085 #define from_UTF_32BE_00_offsets 820
00086 0, 16,
00087 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00088 1,
00089
00090 #define from_UTF_8_C2toDF_offsets 839
00091 128, 191,
00092 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00093 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00094 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00095 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00096
00097 #define from_UTF_8_E0_offsets 905
00098 160, 191,
00099 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00100 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00101
00102 #define from_UTF_8_ED_offsets 939
00103 128, 159,
00104 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00105 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00106
00107 #define from_UTF_8_F0_offsets 973
00108 144, 191,
00109 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00110 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00111 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00112
00113 #define from_UTF_8_F4_offsets 1023
00114 128, 143,
00115 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00116
00117 #define from_UTF_8_offsets 1041
00118 0, 244,
00119 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00120 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00121 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00122 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00123 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00124 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00125 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00126 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00127 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00128 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00129 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00130 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00131 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
00132 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
00133 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 4, 4,
00134 6, 7, 7, 7, 8,
00135
00136 };
00137 static const unsigned int
00138 utf_16_32_word_array[106] = {
00139 #define from_UTF_16LE_00toFF_D8toDB_00toFF_infos WORDINDEX2INFO(0)
00140 INVALID, FUNso,
00141
00142 #define from_UTF_16LE_00toFF_D8toDB_00toFF WORDINDEX2INFO(2)
00143 from_UTF_16LE_00toFF_D8toDB_00toFF_offsets,
00144 from_UTF_16LE_00toFF_D8toDB_00toFF_infos,
00145
00146 #define from_UTF_16LE_00toFF_D8toDB_infos WORDINDEX2INFO(4)
00147 from_UTF_16LE_00toFF_D8toDB_00toFF,
00148
00149 #define from_UTF_16LE_00toFF_D8toDB WORDINDEX2INFO(5)
00150 from_UTF_16LE_00toFF_D8toDB_offsets,
00151 from_UTF_16LE_00toFF_D8toDB_infos,
00152
00153 #define from_UTF_16LE_00toFF_infos WORDINDEX2INFO(7)
00154 FUNso, from_UTF_16LE_00toFF_D8toDB,
00155 INVALID,
00156
00157 #define from_UTF_16LE_00toFF WORDINDEX2INFO(10)
00158 from_UTF_16LE_00toFF_offsets,
00159 from_UTF_16LE_00toFF_infos,
00160
00161 #define from_UTF_16LE_infos WORDINDEX2INFO(12)
00162 from_UTF_16LE_00toFF,
00163
00164 #define from_UTF_16LE WORDINDEX2INFO(13)
00165 from_UTF_16LE_00toFF_D8toDB_offsets,
00166 from_UTF_16LE_infos,
00167
00168 #define from_UTF_32LE_00toFF_00toD7_00_infos WORDINDEX2INFO(15)
00169 FUNso, INVALID,
00170
00171 #define from_UTF_32LE_00toFF_00toD7_00 WORDINDEX2INFO(17)
00172 from_UTF_32LE_00toFF_00toD7_00_offsets,
00173 from_UTF_32LE_00toFF_00toD7_00_infos,
00174
00175 #define from_UTF_32LE_00toFF_00toD7_infos WORDINDEX2INFO(19)
00176 from_UTF_32LE_00toFF_00toD7_00, INVALID,
00177
00178 #define from_UTF_32LE_00toFF_00toD7 WORDINDEX2INFO(21)
00179 from_UTF_32LE_00toFF_00toD7_offsets,
00180 from_UTF_32LE_00toFF_00toD7_infos,
00181
00182 #define from_UTF_32LE_00toFF_D8toDF_infos WORDINDEX2INFO(23)
00183 INVALID, from_UTF_32LE_00toFF_00toD7_00,
00184
00185 #define from_UTF_32LE_00toFF_D8toDF WORDINDEX2INFO(25)
00186 from_UTF_32LE_00toFF_D8toDF_offsets,
00187 from_UTF_32LE_00toFF_D8toDF_infos,
00188
00189 #define from_UTF_32LE_00toFF_infos WORDINDEX2INFO(27)
00190 from_UTF_32LE_00toFF_00toD7, from_UTF_32LE_00toFF_D8toDF,
00191
00192 #define from_UTF_32LE_00toFF WORDINDEX2INFO(29)
00193 from_UTF_32LE_00toFF_offsets,
00194 from_UTF_32LE_00toFF_infos,
00195
00196 #define from_UTF_32LE_infos WORDINDEX2INFO(31)
00197 from_UTF_32LE_00toFF,
00198
00199 #define from_UTF_32LE WORDINDEX2INFO(32)
00200 from_UTF_16LE_00toFF_D8toDB_offsets,
00201 from_UTF_32LE_infos,
00202
00203 #define from_UTF_16BE_00toD7_infos WORDINDEX2INFO(34)
00204 FUNso,
00205
00206 #define from_UTF_16BE_00toD7 WORDINDEX2INFO(35)
00207 from_UTF_16LE_00toFF_D8toDB_offsets,
00208 from_UTF_16BE_00toD7_infos,
00209
00210 #define from_UTF_16BE_D8toDB_00toFF_infos WORDINDEX2INFO(37)
00211 INVALID, from_UTF_16BE_00toD7,
00212
00213 #define from_UTF_16BE_D8toDB_00toFF WORDINDEX2INFO(39)
00214 from_UTF_16LE_00toFF_D8toDB_00toFF_offsets,
00215 from_UTF_16BE_D8toDB_00toFF_infos,
00216
00217 #define from_UTF_16BE_D8toDB_infos WORDINDEX2INFO(41)
00218 from_UTF_16BE_D8toDB_00toFF,
00219
00220 #define from_UTF_16BE_D8toDB WORDINDEX2INFO(42)
00221 from_UTF_16LE_00toFF_D8toDB_offsets,
00222 from_UTF_16BE_D8toDB_infos,
00223
00224 #define from_UTF_16BE_infos WORDINDEX2INFO(44)
00225 from_UTF_16BE_00toD7, from_UTF_16BE_D8toDB,
00226 INVALID,
00227
00228 #define from_UTF_16BE WORDINDEX2INFO(47)
00229 from_UTF_16LE_00toFF_offsets,
00230 from_UTF_16BE_infos,
00231
00232 #define from_UTF_32BE_00_00_infos WORDINDEX2INFO(49)
00233 from_UTF_16BE_00toD7, INVALID,
00234
00235 #define from_UTF_32BE_00_00 WORDINDEX2INFO(51)
00236 from_UTF_32LE_00toFF_offsets,
00237 from_UTF_32BE_00_00_infos,
00238
00239 #define from_UTF_32BE_00_01to10_infos WORDINDEX2INFO(53)
00240 from_UTF_16BE_00toD7,
00241
00242 #define from_UTF_32BE_00_01to10 WORDINDEX2INFO(54)
00243 from_UTF_16LE_00toFF_D8toDB_offsets,
00244 from_UTF_32BE_00_01to10_infos,
00245
00246 #define from_UTF_32BE_00_infos WORDINDEX2INFO(56)
00247 from_UTF_32BE_00_00, from_UTF_32BE_00_01to10,
00248 INVALID,
00249
00250 #define from_UTF_32BE_00 WORDINDEX2INFO(59)
00251 from_UTF_32BE_00_offsets,
00252 from_UTF_32BE_00_infos,
00253
00254 #define from_UTF_32BE_infos WORDINDEX2INFO(61)
00255 from_UTF_32BE_00, INVALID,
00256
00257 #define from_UTF_32BE WORDINDEX2INFO(63)
00258 from_UTF_32LE_00toFF_00toD7_00_offsets,
00259 from_UTF_32BE_infos,
00260
00261 #define from_UTF_16_00toFF_infos WORDINDEX2INFO(65)
00262 FUNsi,
00263
00264 #define from_UTF_16_00toFF WORDINDEX2INFO(66)
00265 from_UTF_16LE_00toFF_D8toDB_offsets,
00266 from_UTF_16_00toFF_infos,
00267
00268 #define from_UTF_16_infos WORDINDEX2INFO(68)
00269 from_UTF_16_00toFF,
00270
00271 #define from_UTF_16 WORDINDEX2INFO(69)
00272 from_UTF_16LE_00toFF_D8toDB_offsets,
00273 from_UTF_16_infos,
00274
00275 #define from_UTF_32_00toFF_infos WORDINDEX2INFO(71)
00276 from_UTF_16,
00277
00278 #define from_UTF_32_00toFF WORDINDEX2INFO(72)
00279 from_UTF_16LE_00toFF_D8toDB_offsets,
00280 from_UTF_32_00toFF_infos,
00281
00282 #define from_UTF_32_infos WORDINDEX2INFO(74)
00283 from_UTF_32_00toFF,
00284
00285 #define from_UTF_32 WORDINDEX2INFO(75)
00286 from_UTF_16LE_00toFF_D8toDB_offsets,
00287 from_UTF_32_infos,
00288
00289 #define from_UTF_8_C2toDF WORDINDEX2INFO(77)
00290 from_UTF_8_C2toDF_offsets,
00291 from_UTF_16LE_00toFF_D8toDB_00toFF_infos,
00292
00293 #define from_UTF_8_E0_infos WORDINDEX2INFO(79)
00294 INVALID, from_UTF_8_C2toDF,
00295
00296 #define from_UTF_8_E0 WORDINDEX2INFO(81)
00297 from_UTF_8_E0_offsets,
00298 from_UTF_8_E0_infos,
00299
00300 #define from_UTF_8_E1toEC WORDINDEX2INFO(83)
00301 from_UTF_8_C2toDF_offsets,
00302 from_UTF_8_E0_infos,
00303
00304 #define from_UTF_8_ED WORDINDEX2INFO(85)
00305 from_UTF_8_ED_offsets,
00306 from_UTF_8_E0_infos,
00307
00308 #define from_UTF_8_F0_infos WORDINDEX2INFO(87)
00309 INVALID, from_UTF_8_E1toEC,
00310
00311 #define from_UTF_8_F0 WORDINDEX2INFO(89)
00312 from_UTF_8_F0_offsets,
00313 from_UTF_8_F0_infos,
00314
00315 #define from_UTF_8_F1toF3 WORDINDEX2INFO(91)
00316 from_UTF_8_C2toDF_offsets,
00317 from_UTF_8_F0_infos,
00318
00319 #define from_UTF_8_F4 WORDINDEX2INFO(93)
00320 from_UTF_8_F4_offsets,
00321 from_UTF_8_F0_infos,
00322
00323 #define from_UTF_8_infos WORDINDEX2INFO(95)
00324 FUNso, INVALID,
00325 from_UTF_8_C2toDF, from_UTF_8_E0,
00326 from_UTF_8_E1toEC, from_UTF_8_ED,
00327 from_UTF_8_F0, from_UTF_8_F1toF3,
00328 from_UTF_8_F4,
00329
00330 #define from_UTF_8 WORDINDEX2INFO(104)
00331 from_UTF_8_offsets,
00332 from_UTF_8_infos,
00333
00334 };
00335 #define TRANSCODE_TABLE_INFO utf_16_32_byte_array, 1288, utf_16_32_word_array, 106, ((int)sizeof(unsigned int))
00336
00337
00338 static ssize_t
00339 fun_so_from_utf_16be(void *statep, const unsigned char *s, size_t l, unsigned char *o, size_t osize)
00340 {
00341 if (!s[0] && s[1]<0x80) {
00342 o[0] = s[1];
00343 return 1;
00344 }
00345 else if (s[0]<0x08) {
00346 o[0] = 0xC0 | (s[0]<<2) | (s[1]>>6);
00347 o[1] = 0x80 | (s[1]&0x3F);
00348 return 2;
00349 }
00350 else if ((s[0]&0xF8)!=0xD8) {
00351 o[0] = 0xE0 | (s[0]>>4);
00352 o[1] = 0x80 | ((s[0]&0x0F)<<2) | (s[1]>>6);
00353 o[2] = 0x80 | (s[1]&0x3F);
00354 return 3;
00355 }
00356 else {
00357 unsigned int u = (((s[0]&0x03)<<2)|(s[1]>>6)) + 1;
00358 o[0] = 0xF0 | (u>>2);
00359 o[1] = 0x80 | ((u&0x03)<<4) | ((s[1]>>2)&0x0F);
00360 o[2] = 0x80 | ((s[1]&0x03)<<4) | ((s[2]&0x03)<<2) | (s[3]>>6);
00361 o[3] = 0x80 | (s[3]&0x3F);
00362 return 4;
00363 }
00364 }
00365
00366 static ssize_t
00367 fun_so_to_utf_16be(void *statep, const unsigned char *s, size_t l, unsigned char *o, size_t osize)
00368 {
00369 if (!(s[0]&0x80)) {
00370 o[0] = 0x00;
00371 o[1] = s[0];
00372 return 2;
00373 }
00374 else if ((s[0]&0xE0)==0xC0) {
00375 o[0] = (s[0]>>2)&0x07;
00376 o[1] = ((s[0]&0x03)<<6) | (s[1]&0x3F);
00377 return 2;
00378 }
00379 else if ((s[0]&0xF0)==0xE0) {
00380 o[0] = (s[0]<<4) | ((s[1]>>2)^0x20);
00381 o[1] = (s[1]<<6) | (s[2]^0x80);
00382 return 2;
00383 }
00384 else {
00385 int w = (((s[0]&0x07)<<2) | ((s[1]>>4)&0x03)) - 1;
00386 o[0] = 0xD8 | (w>>2);
00387 o[1] = (w<<6) | ((s[1]&0x0F)<<2) | ((s[2]>>4)-8);
00388 o[2] = 0xDC | ((s[2]>>2)&0x03);
00389 o[3] = (s[2]<<6) | (s[3]&~0x80);
00390 return 4;
00391 }
00392 }
00393
00394 static ssize_t
00395 fun_so_from_utf_16le(void *statep, const unsigned char *s, size_t l, unsigned char *o, size_t osize)
00396 {
00397 if (!s[1] && s[0]<0x80) {
00398 o[0] = s[0];
00399 return 1;
00400 }
00401 else if (s[1]<0x08) {
00402 o[0] = 0xC0 | (s[1]<<2) | (s[0]>>6);
00403 o[1] = 0x80 | (s[0]&0x3F);
00404 return 2;
00405 }
00406 else if ((s[1]&0xF8)!=0xD8) {
00407 o[0] = 0xE0 | (s[1]>>4);
00408 o[1] = 0x80 | ((s[1]&0x0F)<<2) | (s[0]>>6);
00409 o[2] = 0x80 | (s[0]&0x3F);
00410 return 3;
00411 }
00412 else {
00413 unsigned int u = (((s[1]&0x03)<<2)|(s[0]>>6)) + 1;
00414 o[0] = 0xF0 | u>>2;
00415 o[1] = 0x80 | ((u&0x03)<<4) | ((s[0]>>2)&0x0F);
00416 o[2] = 0x80 | ((s[0]&0x03)<<4) | ((s[3]&0x03)<<2) | (s[2]>>6);
00417 o[3] = 0x80 | (s[2]&0x3F);
00418 return 4;
00419 }
00420 }
00421
00422 static ssize_t
00423 fun_so_to_utf_16le(void *statep, const unsigned char *s, size_t l, unsigned char *o, size_t osize)
00424 {
00425 if (!(s[0]&0x80)) {
00426 o[1] = 0x00;
00427 o[0] = s[0];
00428 return 2;
00429 }
00430 else if ((s[0]&0xE0)==0xC0) {
00431 o[1] = (s[0]>>2)&0x07;
00432 o[0] = ((s[0]&0x03)<<6) | (s[1]&0x3F);
00433 return 2;
00434 }
00435 else if ((s[0]&0xF0)==0xE0) {
00436 o[1] = (s[0]<<4) | ((s[1]>>2)^0x20);
00437 o[0] = (s[1]<<6) | (s[2]^0x80);
00438 return 2;
00439 }
00440 else {
00441 int w = (((s[0]&0x07)<<2) | ((s[1]>>4)&0x03)) - 1;
00442 o[1] = 0xD8 | (w>>2);
00443 o[0] = (w<<6) | ((s[1]&0x0F)<<2) | ((s[2]>>4)-8);
00444 o[3] = 0xDC | ((s[2]>>2)&0x03);
00445 o[2] = (s[2]<<6) | (s[3]&~0x80);
00446 return 4;
00447 }
00448 }
00449
00450 static ssize_t
00451 fun_so_from_utf_32be(void *statep, const unsigned char *s, size_t l, unsigned char *o, size_t osize)
00452 {
00453 if (!s[1]) {
00454 if (s[2]==0 && s[3]<0x80) {
00455 o[0] = s[3];
00456 return 1;
00457 }
00458 else if (s[2]<0x08) {
00459 o[0] = 0xC0 | (s[2]<<2) | (s[3]>>6);
00460 o[1] = 0x80 | (s[3]&0x3F);
00461 return 2;
00462 }
00463 else {
00464 o[0] = 0xE0 | (s[2]>>4);
00465 o[1] = 0x80 | ((s[2]&0x0F)<<2) | (s[3]>>6);
00466 o[2] = 0x80 | (s[3]&0x3F);
00467 return 3;
00468 }
00469 }
00470 else {
00471 o[0] = 0xF0 | (s[1]>>2);
00472 o[1] = 0x80 | ((s[1]&0x03)<<4) | (s[2]>>4);
00473 o[2] = 0x80 | ((s[2]&0x0F)<<2) | (s[3]>>6);
00474 o[3] = 0x80 | (s[3]&0x3F);
00475 return 4;
00476 }
00477 }
00478
00479 static ssize_t
00480 fun_so_to_utf_32be(void *statep, const unsigned char *s, size_t l, unsigned char *o, size_t osize)
00481 {
00482 o[0] = 0;
00483 if (!(s[0]&0x80)) {
00484 o[1] = o[2] = 0x00;
00485 o[3] = s[0];
00486 }
00487 else if ((s[0]&0xE0)==0xC0) {
00488 o[1] = 0x00;
00489 o[2] = (s[0]>>2)&0x07;
00490 o[3] = ((s[0]&0x03)<<6) | (s[1]&0x3F);
00491 }
00492 else if ((s[0]&0xF0)==0xE0) {
00493 o[1] = 0x00;
00494 o[2] = (s[0]<<4) | ((s[1]>>2)^0x20);
00495 o[3] = (s[1]<<6) | (s[2]^0x80);
00496 }
00497 else {
00498 o[1] = ((s[0]&0x07)<<2) | ((s[1]>>4)&0x03);
00499 o[2] = ((s[1]&0x0F)<<4) | ((s[2]>>2)&0x0F);
00500 o[3] = ((s[2]&0x03)<<6) | (s[3]&0x3F);
00501 }
00502 return 4;
00503 }
00504
00505 static ssize_t
00506 fun_so_from_utf_32le(void *statep, const unsigned char *s, size_t l, unsigned char *o, size_t osize)
00507 {
00508 if (!s[2]) {
00509 if (s[1]==0 && s[0]<0x80) {
00510 o[0] = s[0];
00511 return 1;
00512 }
00513 else if (s[1]<0x08) {
00514 o[0] = 0xC0 | (s[1]<<2) | (s[0]>>6);
00515 o[1] = 0x80 | (s[0]&0x3F);
00516 return 2;
00517 }
00518 else {
00519 o[0] = 0xE0 | (s[1]>>4);
00520 o[1] = 0x80 | ((s[1]&0x0F)<<2) | (s[0]>>6);
00521 o[2] = 0x80 | (s[0]&0x3F);
00522 return 3;
00523 }
00524 }
00525 else {
00526 o[0] = 0xF0 | (s[2]>>2);
00527 o[1] = 0x80 | ((s[2]&0x03)<<4) | (s[1]>>4);
00528 o[2] = 0x80 | ((s[1]&0x0F)<<2) | (s[0]>>6);
00529 o[3] = 0x80 | (s[0]&0x3F);
00530 return 4;
00531 }
00532 }
00533
00534 static ssize_t
00535 fun_so_to_utf_32le(void *statep, const unsigned char *s, size_t l, unsigned char *o, size_t osize)
00536 {
00537 o[3] = 0;
00538 if (!(s[0]&0x80)) {
00539 o[2] = o[1] = 0x00;
00540 o[0] = s[0];
00541 }
00542 else if ((s[0]&0xE0)==0xC0) {
00543 o[2] = 0x00;
00544 o[1] = (s[0]>>2)&0x07;
00545 o[0] = ((s[0]&0x03)<<6) | (s[1]&0x3F);
00546 }
00547 else if ((s[0]&0xF0)==0xE0) {
00548 o[2] = 0x00;
00549 o[1] = (s[0]<<4) | ((s[1]>>2)^0x20);
00550 o[0] = (s[1]<<6) | (s[2]^0x80);
00551 }
00552 else {
00553 o[2] = ((s[0]&0x07)<<2) | ((s[1]>>4)&0x03);
00554 o[1] = ((s[1]&0x0F)<<4) | ((s[2]>>2)&0x0F);
00555 o[0] = ((s[2]&0x03)<<6) | (s[3]&0x3F);
00556 }
00557 return 4;
00558 }
00559
00560 static int
00561 state_init(void *statep)
00562 {
00563 unsigned char *sp = statep;
00564 *sp = 0;
00565 return 0;
00566 }
00567
00568 static VALUE
00569 fun_si_from_utf_16(void *statep, const unsigned char *s, size_t l)
00570 {
00571 #define BE 1
00572 #define LE 2
00573 unsigned char *sp = statep;
00574 switch (*sp) {
00575 case 0:
00576 if (s[0] == 0xFE && s[1] == 0xFF) {
00577 *sp = BE;
00578 return ZERObt;
00579 }
00580 else if (s[0] == 0xFF && s[1] == 0xFE) {
00581 *sp = LE;
00582 return ZERObt;
00583 }
00584 break;
00585 case BE:
00586 if (s[0] < 0xD8 || 0xDF < s[0]) {
00587 return (VALUE)FUNso;
00588 }
00589 else if (s[0] <= 0xDB) {
00590 return (VALUE)from_UTF_16BE_D8toDB_00toFF;
00591 }
00592 break;
00593 case LE:
00594 if (s[1] < 0xD8 || 0xDF < s[1]) {
00595 return (VALUE)FUNso;
00596 }
00597 else if (s[1] <= 0xDB) {
00598 return (VALUE)from_UTF_16LE_00toFF_D8toDB;
00599 }
00600 break;
00601 }
00602 return (VALUE)INVALID;
00603 }
00604
00605 static ssize_t
00606 fun_so_from_utf_16(void *statep, const unsigned char *s, size_t l, unsigned char *o, size_t osize)
00607 {
00608 unsigned char *sp = statep;
00609 switch (*sp) {
00610 case BE:
00611 return fun_so_from_utf_16be(statep, s, l, o, osize);
00612 case LE:
00613 return fun_so_from_utf_16le(statep, s, l, o, osize);
00614 }
00615 return 0;
00616 }
00617
00618 static VALUE
00619 fun_si_from_utf_32(void *statep, const unsigned char *s, size_t l)
00620 {
00621 unsigned char *sp = statep;
00622 switch (*sp) {
00623 case 0:
00624 if (s[0] == 0 && s[1] == 0 && s[2] == 0xFE && s[3] == 0xFF) {
00625 *sp = BE;
00626 return ZERObt;
00627 }
00628 else if (s[0] == 0xFF && s[1] == 0xFE && s[2] == 0 && s[3] == 0) {
00629 *sp = LE;
00630 return ZERObt;
00631 }
00632 break;
00633 case BE:
00634 if (s[0] == 0 && ((0 < s[1] && s[1] <= 0x10) ||
00635 (s[1] == 0 && (s[2] < 0xD8 || 0xDF < s[2]))))
00636 return (VALUE)FUNso;
00637 break;
00638 case LE:
00639 if (s[3] == 0 && ((0 < s[2] && s[2] <= 0x10) ||
00640 (s[2] == 0 && (s[1] < 0xD8 || 0xDF < s[1]))))
00641 return (VALUE)FUNso;
00642 break;
00643 }
00644 return (VALUE)INVALID;
00645 }
00646
00647 static ssize_t
00648 fun_so_from_utf_32(void *statep, const unsigned char *s, size_t l, unsigned char *o, size_t osize)
00649 {
00650 unsigned char *sp = statep;
00651 switch (*sp) {
00652 case BE:
00653 return fun_so_from_utf_32be(statep, s, l, o, osize);
00654 case LE:
00655 return fun_so_from_utf_32le(statep, s, l, o, osize);
00656 }
00657 return 0;
00658 }
00659
00660 static ssize_t
00661 fun_so_to_utf_16(void *statep, const unsigned char *s, size_t l, unsigned char *o, size_t osize)
00662 {
00663 unsigned char *sp = statep;
00664 if (*sp == 0) {
00665 *o++ = 0xFE;
00666 *o++ = 0xFF;
00667 *sp = 1;
00668 return 2 + fun_so_to_utf_16be(statep, s, l, o, osize);
00669 }
00670 return fun_so_to_utf_16be(statep, s, l, o, osize);
00671 }
00672
00673 static ssize_t
00674 fun_so_to_utf_32(void *statep, const unsigned char *s, size_t l, unsigned char *o, size_t osize)
00675 {
00676 unsigned char *sp = statep;
00677 if (*sp == 0) {
00678 *o++ = 0x00;
00679 *o++ = 0x00;
00680 *o++ = 0xFE;
00681 *o++ = 0xFF;
00682 *sp = 1;
00683 return 4 + fun_so_to_utf_32be(statep, s, l, o, osize);
00684 }
00685 return fun_so_to_utf_32be(statep, s, l, o, osize);
00686 }
00687
00688 static const rb_transcoder
00689 rb_from_UTF_16BE = {
00690 "UTF-16BE", "UTF-8", from_UTF_16BE,
00691 TRANSCODE_TABLE_INFO,
00692 2,
00693 4,
00694 4,
00695 asciicompat_decoder,
00696 0, NULL, NULL,
00697 NULL, NULL, NULL, fun_so_from_utf_16be
00698 };
00699
00700 static const rb_transcoder
00701 rb_to_UTF_16BE = {
00702 "UTF-8", "UTF-16BE", from_UTF_8,
00703 TRANSCODE_TABLE_INFO,
00704 1,
00705 4,
00706 4,
00707 asciicompat_encoder,
00708 0, NULL, NULL,
00709 NULL, NULL, NULL, fun_so_to_utf_16be
00710 };
00711
00712 static const rb_transcoder
00713 rb_from_UTF_16LE = {
00714 "UTF-16LE", "UTF-8", from_UTF_16LE,
00715 TRANSCODE_TABLE_INFO,
00716 2,
00717 4,
00718 4,
00719 asciicompat_decoder,
00720 0, NULL, NULL,
00721 NULL, NULL, NULL, fun_so_from_utf_16le
00722 };
00723
00724 static const rb_transcoder
00725 rb_to_UTF_16LE = {
00726 "UTF-8", "UTF-16LE", from_UTF_8,
00727 TRANSCODE_TABLE_INFO,
00728 1,
00729 4,
00730 4,
00731 asciicompat_encoder,
00732 0, NULL, NULL,
00733 NULL, NULL, NULL, fun_so_to_utf_16le
00734 };
00735
00736 static const rb_transcoder
00737 rb_from_UTF_32BE = {
00738 "UTF-32BE", "UTF-8", from_UTF_32BE,
00739 TRANSCODE_TABLE_INFO,
00740 4,
00741 4,
00742 4,
00743 asciicompat_decoder,
00744 0, NULL, NULL,
00745 NULL, NULL, NULL, fun_so_from_utf_32be
00746 };
00747
00748 static const rb_transcoder
00749 rb_to_UTF_32BE = {
00750 "UTF-8", "UTF-32BE", from_UTF_8,
00751 TRANSCODE_TABLE_INFO,
00752 1,
00753 4,
00754 4,
00755 asciicompat_encoder,
00756 0, NULL, NULL,
00757 NULL, NULL, NULL, fun_so_to_utf_32be
00758 };
00759
00760 static const rb_transcoder
00761 rb_from_UTF_32LE = {
00762 "UTF-32LE", "UTF-8", from_UTF_32LE,
00763 TRANSCODE_TABLE_INFO,
00764 4,
00765 4,
00766 4,
00767 asciicompat_decoder,
00768 0, NULL, NULL,
00769 NULL, NULL, NULL, fun_so_from_utf_32le
00770 };
00771
00772 static const rb_transcoder
00773 rb_to_UTF_32LE = {
00774 "UTF-8", "UTF-32LE", from_UTF_8,
00775 TRANSCODE_TABLE_INFO,
00776 1,
00777 4,
00778 4,
00779 asciicompat_encoder,
00780 0, NULL, NULL,
00781 NULL, NULL, NULL, fun_so_to_utf_32le
00782 };
00783
00784 static const rb_transcoder
00785 rb_from_UTF_16 = {
00786 "UTF-16", "UTF-8", from_UTF_16,
00787 TRANSCODE_TABLE_INFO,
00788 2,
00789 4,
00790 4,
00791 asciicompat_decoder,
00792 1, state_init, NULL,
00793 NULL, fun_si_from_utf_16, NULL, fun_so_from_utf_16
00794 };
00795
00796 static const rb_transcoder
00797 rb_from_UTF_32 = {
00798 "UTF-32", "UTF-8", from_UTF_32,
00799 TRANSCODE_TABLE_INFO,
00800 4,
00801 4,
00802 4,
00803 asciicompat_decoder,
00804 1, state_init, NULL,
00805 NULL, fun_si_from_utf_32, NULL, fun_so_from_utf_32
00806 };
00807
00808 static const rb_transcoder
00809 rb_to_UTF_16 = {
00810 "UTF-8", "UTF-16", from_UTF_8,
00811 TRANSCODE_TABLE_INFO,
00812 1,
00813 4,
00814 4,
00815 asciicompat_encoder,
00816 1, state_init, NULL,
00817 NULL, NULL, NULL, fun_so_to_utf_16
00818 };
00819
00820 static const rb_transcoder
00821 rb_to_UTF_32 = {
00822 "UTF-8", "UTF-32", from_UTF_8,
00823 TRANSCODE_TABLE_INFO,
00824 1,
00825 4,
00826 4,
00827 asciicompat_encoder,
00828 1, state_init, NULL,
00829 NULL, NULL, NULL, fun_so_to_utf_32
00830 };
00831
00832 void
00833 Init_utf_16_32(void)
00834 {
00835 rb_register_transcoder(&rb_from_UTF_16BE);
00836 rb_register_transcoder(&rb_to_UTF_16BE);
00837 rb_register_transcoder(&rb_from_UTF_16LE);
00838 rb_register_transcoder(&rb_to_UTF_16LE);
00839 rb_register_transcoder(&rb_from_UTF_32BE);
00840 rb_register_transcoder(&rb_to_UTF_32BE);
00841 rb_register_transcoder(&rb_from_UTF_32LE);
00842 rb_register_transcoder(&rb_to_UTF_32LE);
00843 rb_register_transcoder(&rb_from_UTF_16);
00844 rb_register_transcoder(&rb_to_UTF_16);
00845 rb_register_transcoder(&rb_from_UTF_32);
00846 rb_register_transcoder(&rb_to_UTF_32);
00847 }
00848
00849