00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030 #include "regint.h"
00031
00032 OnigEncoding OnigEncDefaultCharEncoding = ONIG_ENCODING_INIT_DEFAULT;
00033
00034 extern int
00035 onigenc_init(void)
00036 {
00037 return 0;
00038 }
00039
00040 extern OnigEncoding
00041 onigenc_get_default_encoding(void)
00042 {
00043 return OnigEncDefaultCharEncoding;
00044 }
00045
00046 extern int
00047 onigenc_set_default_encoding(OnigEncoding enc)
00048 {
00049 OnigEncDefaultCharEncoding = enc;
00050 return 0;
00051 }
00052
00053 extern int
00054 onigenc_mbclen_approximate(const OnigUChar* p,const OnigUChar* e, struct OnigEncodingTypeST* enc)
00055 {
00056 int ret = ONIGENC_PRECISE_MBC_ENC_LEN(enc,p,e);
00057 if (ONIGENC_MBCLEN_CHARFOUND_P(ret))
00058 return ONIGENC_MBCLEN_CHARFOUND_LEN(ret);
00059 else if (ONIGENC_MBCLEN_NEEDMORE_P(ret))
00060 return (int)(e-p)+ONIGENC_MBCLEN_NEEDMORE_LEN(ret);
00061 return 1;
00062 }
00063
00064 extern UChar*
00065 onigenc_get_right_adjust_char_head(OnigEncoding enc, const UChar* start, const UChar* s, const UChar* end)
00066 {
00067 UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s, end);
00068 if (p < s) {
00069 p += enclen(enc, p, end);
00070 }
00071 return p;
00072 }
00073
00074 extern UChar*
00075 onigenc_get_right_adjust_char_head_with_prev(OnigEncoding enc,
00076 const UChar* start, const UChar* s, const UChar* end, const UChar** prev)
00077 {
00078 UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s, end);
00079
00080 if (p < s) {
00081 if (prev) *prev = (const UChar* )p;
00082 p += enclen(enc, p, end);
00083 }
00084 else {
00085 if (prev) *prev = (const UChar* )NULL;
00086 }
00087 return p;
00088 }
00089
00090 extern UChar*
00091 onigenc_get_prev_char_head(OnigEncoding enc, const UChar* start, const UChar* s, const UChar* end)
00092 {
00093 if (s <= start)
00094 return (UChar* )NULL;
00095
00096 return ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s - 1, end);
00097 }
00098
00099 extern UChar*
00100 onigenc_step_back(OnigEncoding enc, const UChar* start, const UChar* s, const UChar* end, int n)
00101 {
00102 while (ONIG_IS_NOT_NULL(s) && n-- > 0) {
00103 if (s <= start)
00104 return (UChar* )NULL;
00105
00106 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s - 1, end);
00107 }
00108 return (UChar* )s;
00109 }
00110
00111 extern UChar*
00112 onigenc_step(OnigEncoding enc, const UChar* p, const UChar* end, int n)
00113 {
00114 UChar* q = (UChar* )p;
00115 while (n-- > 0) {
00116 q += ONIGENC_MBC_ENC_LEN(enc, q, end);
00117 }
00118 return (q <= end ? q : NULL);
00119 }
00120
00121 extern int
00122 onigenc_strlen(OnigEncoding enc, const UChar* p, const UChar* end)
00123 {
00124 int n = 0;
00125 UChar* q = (UChar* )p;
00126
00127 while (q < end) {
00128 q += ONIGENC_MBC_ENC_LEN(enc, q, end);
00129 n++;
00130 }
00131 return n;
00132 }
00133
00134 extern int
00135 onigenc_strlen_null(OnigEncoding enc, const UChar* s)
00136 {
00137 int n = 0;
00138 UChar* p = (UChar* )s;
00139 UChar* e;
00140
00141 while (1) {
00142 if (*p == '\0') {
00143 UChar* q;
00144 int len = ONIGENC_MBC_MINLEN(enc);
00145
00146 if (len == 1) return n;
00147 q = p + 1;
00148 while (len > 1) {
00149 if (*q != '\0') break;
00150 q++;
00151 len--;
00152 }
00153 if (len == 1) return n;
00154 }
00155 e = p + ONIGENC_MBC_MAXLEN(enc);
00156 p += ONIGENC_MBC_ENC_LEN(enc, p, e);
00157 n++;
00158 }
00159 }
00160
00161 extern int
00162 onigenc_str_bytelen_null(OnigEncoding enc, const UChar* s)
00163 {
00164 UChar* start = (UChar* )s;
00165 UChar* p = (UChar* )s;
00166 UChar* e;
00167
00168 while (1) {
00169 if (*p == '\0') {
00170 UChar* q;
00171 int len = ONIGENC_MBC_MINLEN(enc);
00172
00173 if (len == 1) return (int )(p - start);
00174 q = p + 1;
00175 while (len > 1) {
00176 if (*q != '\0') break;
00177 q++;
00178 len--;
00179 }
00180 if (len == 1) return (int )(p - start);
00181 }
00182 e = p + ONIGENC_MBC_MAXLEN(enc);
00183 p += ONIGENC_MBC_ENC_LEN(enc, p, e);
00184 }
00185 }
00186
00187 const UChar OnigEncAsciiToLowerCaseTable[] = {
00188 '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
00189 '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
00190 '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
00191 '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
00192 '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
00193 '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
00194 '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
00195 '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
00196 '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
00197 '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
00198 '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
00199 '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
00200 '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
00201 '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
00202 '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
00203 '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
00204 '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
00205 '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
00206 '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
00207 '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
00208 '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
00209 '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
00210 '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
00211 '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
00212 '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
00213 '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
00214 '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
00215 '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
00216 '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
00217 '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
00218 '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
00219 '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377',
00220 };
00221
00222 #ifdef USE_UPPER_CASE_TABLE
00223 const UChar OnigEncAsciiToUpperCaseTable[256] = {
00224 '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
00225 '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
00226 '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
00227 '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
00228 '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
00229 '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
00230 '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
00231 '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
00232 '\100', '\101', '\102', '\103', '\104', '\105', '\106', '\107',
00233 '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117',
00234 '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127',
00235 '\130', '\131', '\132', '\133', '\134', '\135', '\136', '\137',
00236 '\140', '\101', '\102', '\103', '\104', '\105', '\106', '\107',
00237 '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117',
00238 '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127',
00239 '\130', '\131', '\132', '\173', '\174', '\175', '\176', '\177',
00240 '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
00241 '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
00242 '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
00243 '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
00244 '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
00245 '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
00246 '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
00247 '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
00248 '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
00249 '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
00250 '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
00251 '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
00252 '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
00253 '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
00254 '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
00255 '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377',
00256 };
00257 #endif
00258
00259 const unsigned short OnigEncAsciiCtypeTable[256] = {
00260 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
00261 0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,
00262 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
00263 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
00264 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
00265 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
00266 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
00267 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
00268 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
00269 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
00270 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
00271 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
00272 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
00273 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
00274 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
00275 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
00276 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
00277 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
00278 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
00279 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
00280 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
00281 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
00282 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
00283 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
00284 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
00285 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
00286 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
00287 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
00288 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
00289 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
00290 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
00291 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000
00292 };
00293
00294 const UChar OnigEncISO_8859_1_ToLowerCaseTable[256] = {
00295 '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
00296 '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
00297 '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
00298 '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
00299 '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
00300 '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
00301 '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
00302 '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
00303 '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
00304 '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
00305 '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
00306 '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
00307 '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
00308 '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
00309 '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
00310 '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
00311 '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
00312 '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
00313 '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
00314 '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
00315 '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
00316 '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
00317 '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
00318 '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
00319 '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
00320 '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
00321 '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\327',
00322 '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\337',
00323 '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
00324 '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
00325 '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
00326 '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377'
00327 };
00328
00329 #ifdef USE_UPPER_CASE_TABLE
00330 const UChar OnigEncISO_8859_1_ToUpperCaseTable[256] = {
00331 '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
00332 '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
00333 '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
00334 '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
00335 '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
00336 '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
00337 '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
00338 '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
00339 '\100', '\101', '\102', '\103', '\104', '\105', '\106', '\107',
00340 '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117',
00341 '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127',
00342 '\130', '\131', '\132', '\133', '\134', '\135', '\136', '\137',
00343 '\140', '\101', '\102', '\103', '\104', '\105', '\106', '\107',
00344 '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117',
00345 '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127',
00346 '\130', '\131', '\132', '\173', '\174', '\175', '\176', '\177',
00347 '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
00348 '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
00349 '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
00350 '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
00351 '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
00352 '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
00353 '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
00354 '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
00355 '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
00356 '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
00357 '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
00358 '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
00359 '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
00360 '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
00361 '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\367',
00362 '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\377',
00363 };
00364 #endif
00365
00366 extern void
00367 onigenc_set_default_caseconv_table(const UChar* table ARG_UNUSED)
00368 {
00369
00370
00371 }
00372
00373 extern UChar*
00374 onigenc_get_left_adjust_char_head(OnigEncoding enc, const UChar* start, const UChar* s, const UChar* end)
00375 {
00376 return ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s, end);
00377 }
00378
00379 const OnigPairCaseFoldCodes OnigAsciiLowerMap[] = {
00380 { 0x41, 0x61 },
00381 { 0x42, 0x62 },
00382 { 0x43, 0x63 },
00383 { 0x44, 0x64 },
00384 { 0x45, 0x65 },
00385 { 0x46, 0x66 },
00386 { 0x47, 0x67 },
00387 { 0x48, 0x68 },
00388 { 0x49, 0x69 },
00389 { 0x4a, 0x6a },
00390 { 0x4b, 0x6b },
00391 { 0x4c, 0x6c },
00392 { 0x4d, 0x6d },
00393 { 0x4e, 0x6e },
00394 { 0x4f, 0x6f },
00395 { 0x50, 0x70 },
00396 { 0x51, 0x71 },
00397 { 0x52, 0x72 },
00398 { 0x53, 0x73 },
00399 { 0x54, 0x74 },
00400 { 0x55, 0x75 },
00401 { 0x56, 0x76 },
00402 { 0x57, 0x77 },
00403 { 0x58, 0x78 },
00404 { 0x59, 0x79 },
00405 { 0x5a, 0x7a }
00406 };
00407
00408 extern int
00409 onigenc_ascii_apply_all_case_fold(OnigCaseFoldType flag ARG_UNUSED,
00410 OnigApplyAllCaseFoldFunc f, void* arg,
00411 OnigEncoding enc ARG_UNUSED)
00412 {
00413 OnigCodePoint code;
00414 int i, r;
00415
00416 for (i = 0;
00417 i < (int )(sizeof(OnigAsciiLowerMap)/sizeof(OnigPairCaseFoldCodes));
00418 i++) {
00419 code = OnigAsciiLowerMap[i].to;
00420 r = (*f)(OnigAsciiLowerMap[i].from, &code, 1, arg);
00421 if (r != 0) return r;
00422
00423 code = OnigAsciiLowerMap[i].from;
00424 r = (*f)(OnigAsciiLowerMap[i].to, &code, 1, arg);
00425 if (r != 0) return r;
00426 }
00427
00428 return 0;
00429 }
00430
00431 extern int
00432 onigenc_ascii_get_case_fold_codes_by_str(OnigCaseFoldType flag ARG_UNUSED,
00433 const OnigUChar* p, const OnigUChar* end ARG_UNUSED, OnigCaseFoldCodeItem items[],
00434 OnigEncoding enc ARG_UNUSED)
00435 {
00436 if (0x41 <= *p && *p <= 0x5a) {
00437 items[0].byte_len = 1;
00438 items[0].code_len = 1;
00439 items[0].code[0] = (OnigCodePoint )(*p + 0x20);
00440 return 1;
00441 }
00442 else if (0x61 <= *p && *p <= 0x7a) {
00443 items[0].byte_len = 1;
00444 items[0].code_len = 1;
00445 items[0].code[0] = (OnigCodePoint )(*p - 0x20);
00446 return 1;
00447 }
00448 else
00449 return 0;
00450 }
00451
00452 static int
00453 ss_apply_all_case_fold(OnigCaseFoldType flag ARG_UNUSED,
00454 OnigApplyAllCaseFoldFunc f, void* arg)
00455 {
00456 OnigCodePoint ss[] = { 0x73, 0x73 };
00457
00458 return (*f)((OnigCodePoint )0xdf, ss, 2, arg);
00459 }
00460
00461 extern int
00462 onigenc_apply_all_case_fold_with_map(int map_size,
00463 const OnigPairCaseFoldCodes map[],
00464 int ess_tsett_flag, OnigCaseFoldType flag,
00465 OnigApplyAllCaseFoldFunc f, void* arg)
00466 {
00467 OnigCodePoint code;
00468 int i, r;
00469
00470 r = onigenc_ascii_apply_all_case_fold(flag, f, arg, 0);
00471 if (r != 0) return r;
00472
00473 for (i = 0; i < map_size; i++) {
00474 code = map[i].to;
00475 r = (*f)(map[i].from, &code, 1, arg);
00476 if (r != 0) return r;
00477
00478 code = map[i].from;
00479 r = (*f)(map[i].to, &code, 1, arg);
00480 if (r != 0) return r;
00481 }
00482
00483 if (ess_tsett_flag != 0)
00484 return ss_apply_all_case_fold(flag, f, arg);
00485
00486 return 0;
00487 }
00488
00489 extern int
00490 onigenc_get_case_fold_codes_by_str_with_map(int map_size,
00491 const OnigPairCaseFoldCodes map[],
00492 int ess_tsett_flag, OnigCaseFoldType flag ARG_UNUSED,
00493 const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])
00494 {
00495 if (0x41 <= *p && *p <= 0x5a) {
00496 items[0].byte_len = 1;
00497 items[0].code_len = 1;
00498 items[0].code[0] = (OnigCodePoint )(*p + 0x20);
00499 if (*p == 0x53 && ess_tsett_flag != 0 && end > p + 1
00500 && (*(p+1) == 0x53 || *(p+1) == 0x73)) {
00501
00502 items[1].byte_len = 2;
00503 items[1].code_len = 1;
00504 items[1].code[0] = (OnigCodePoint )0xdf;
00505 return 2;
00506 }
00507 else
00508 return 1;
00509 }
00510 else if (0x61 <= *p && *p <= 0x7a) {
00511 items[0].byte_len = 1;
00512 items[0].code_len = 1;
00513 items[0].code[0] = (OnigCodePoint )(*p - 0x20);
00514 if (*p == 0x73 && ess_tsett_flag != 0 && end > p + 1
00515 && (*(p+1) == 0x73 || *(p+1) == 0x53)) {
00516
00517 items[1].byte_len = 2;
00518 items[1].code_len = 1;
00519 items[1].code[0] = (OnigCodePoint )0xdf;
00520 return 2;
00521 }
00522 else
00523 return 1;
00524 }
00525 else if (*p == 0xdf && ess_tsett_flag != 0) {
00526 items[0].byte_len = 1;
00527 items[0].code_len = 2;
00528 items[0].code[0] = (OnigCodePoint )'s';
00529 items[0].code[1] = (OnigCodePoint )'s';
00530
00531 items[1].byte_len = 1;
00532 items[1].code_len = 2;
00533 items[1].code[0] = (OnigCodePoint )'S';
00534 items[1].code[1] = (OnigCodePoint )'S';
00535
00536 items[2].byte_len = 1;
00537 items[2].code_len = 2;
00538 items[2].code[0] = (OnigCodePoint )'s';
00539 items[2].code[1] = (OnigCodePoint )'S';
00540
00541 items[3].byte_len = 1;
00542 items[3].code_len = 2;
00543 items[3].code[0] = (OnigCodePoint )'S';
00544 items[3].code[1] = (OnigCodePoint )'s';
00545
00546 return 4;
00547 }
00548 else {
00549 int i;
00550
00551 for (i = 0; i < map_size; i++) {
00552 if (*p == map[i].from) {
00553 items[0].byte_len = 1;
00554 items[0].code_len = 1;
00555 items[0].code[0] = map[i].to;
00556 return 1;
00557 }
00558 else if (*p == map[i].to) {
00559 items[0].byte_len = 1;
00560 items[0].code_len = 1;
00561 items[0].code[0] = map[i].from;
00562 return 1;
00563 }
00564 }
00565 }
00566
00567 return 0;
00568 }
00569
00570
00571 extern int
00572 onigenc_not_support_get_ctype_code_range(OnigCtype ctype,
00573 OnigCodePoint* sb_out, const OnigCodePoint* ranges[],
00574 OnigEncoding enc)
00575 {
00576 return ONIG_NO_SUPPORT_CONFIG;
00577 }
00578
00579 extern int
00580 onigenc_is_mbc_newline_0x0a(const UChar* p, const UChar* end, OnigEncoding enc ARG_UNUSED)
00581 {
00582 if (p < end) {
00583 if (*p == 0x0a) return 1;
00584 }
00585 return 0;
00586 }
00587
00588
00589 extern int
00590 onigenc_ascii_mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED, const UChar** p,
00591 const UChar*end, UChar* lower, OnigEncoding enc ARG_UNUSED)
00592 {
00593 *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(**p);
00594
00595 (*p)++;
00596 return 1;
00597 }
00598
00599 #if 0
00600 extern int
00601 onigenc_ascii_is_mbc_ambiguous(OnigCaseFoldType flag ARG_UNUSED,
00602 const UChar** pp, const UChar* end ARG_UNUSED)
00603 {
00604 const UChar* p = *pp;
00605
00606 (*pp)++;
00607 return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p);
00608 }
00609 #endif
00610
00611 extern int
00612 onigenc_single_byte_mbc_enc_len(const UChar* p ARG_UNUSED, const UChar* e ARG_UNUSED,
00613 OnigEncoding enc ARG_UNUSED)
00614 {
00615 return 1;
00616 }
00617
00618 extern OnigCodePoint
00619 onigenc_single_byte_mbc_to_code(const UChar* p, const UChar* end ARG_UNUSED,
00620 OnigEncoding enc ARG_UNUSED)
00621 {
00622 return (OnigCodePoint )(*p);
00623 }
00624
00625 extern int
00626 onigenc_single_byte_code_to_mbclen(OnigCodePoint code ARG_UNUSED, OnigEncoding enc ARG_UNUSED)
00627 {
00628 return 1;
00629 }
00630
00631 extern int
00632 onigenc_single_byte_code_to_mbc(OnigCodePoint code, UChar *buf, OnigEncoding enc ARG_UNUSED)
00633 {
00634 if (code > 0xff)
00635 rb_raise(rb_eRangeError, "%u out of char range", code);
00636 *buf = (UChar )(code & 0xff);
00637 return 1;
00638 }
00639
00640 extern UChar*
00641 onigenc_single_byte_left_adjust_char_head(const UChar* start ARG_UNUSED, const UChar* s,
00642 const UChar* end,
00643 OnigEncoding enc ARG_UNUSED)
00644 {
00645 return (UChar* )s;
00646 }
00647
00648 extern int
00649 onigenc_always_true_is_allowed_reverse_match(const UChar* s ARG_UNUSED, const UChar* end ARG_UNUSED,
00650 OnigEncoding enc ARG_UNUSED)
00651 {
00652 return TRUE;
00653 }
00654
00655 extern int
00656 onigenc_always_false_is_allowed_reverse_match(const UChar* s ARG_UNUSED, const UChar* end ARG_UNUSED,
00657 OnigEncoding enc ARG_UNUSED)
00658 {
00659 return FALSE;
00660 }
00661
00662 extern int
00663 onigenc_ascii_is_code_ctype(OnigCodePoint code, unsigned int ctype,
00664 OnigEncoding enc ARG_UNUSED)
00665 {
00666 if (code < 128)
00667 return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
00668 else
00669 return FALSE;
00670 }
00671
00672 extern OnigCodePoint
00673 onigenc_mbn_mbc_to_code(OnigEncoding enc, const UChar* p, const UChar* end)
00674 {
00675 int c, i, len;
00676 OnigCodePoint n;
00677
00678 len = enclen(enc, p, end);
00679 n = (OnigCodePoint )(*p++);
00680 if (len == 1) return n;
00681
00682 for (i = 1; i < len; i++) {
00683 if (p >= end) break;
00684 c = *p++;
00685 n <<= 8; n += c;
00686 }
00687 return n;
00688 }
00689
00690 extern int
00691 onigenc_mbn_mbc_case_fold(OnigEncoding enc, OnigCaseFoldType flag ARG_UNUSED,
00692 const UChar** pp, const UChar* end ARG_UNUSED,
00693 UChar* lower)
00694 {
00695 int len;
00696 const UChar *p = *pp;
00697
00698 if (ONIGENC_IS_MBC_ASCII(p)) {
00699 *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p);
00700 (*pp)++;
00701 return 1;
00702 }
00703 else {
00704 int i;
00705
00706 len = enclen(enc, p, end);
00707 for (i = 0; i < len; i++) {
00708 *lower++ = *p++;
00709 }
00710 (*pp) += len;
00711 return len;
00712 }
00713 }
00714
00715 #if 0
00716 extern int
00717 onigenc_mbn_is_mbc_ambiguous(OnigEncoding enc, OnigCaseFoldType flag,
00718 const UChar** pp ARG_UNUSED, const UChar* end ARG_UNUSED)
00719 {
00720 const UChar* p = *pp;
00721
00722 if (ONIGENC_IS_MBC_ASCII(p)) {
00723 (*pp)++;
00724 return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p);
00725 }
00726
00727 (*pp) += enclen(enc, p);
00728 return FALSE;
00729 }
00730 #endif
00731
00732 extern int
00733 onigenc_mb2_code_to_mbclen(OnigCodePoint code, OnigEncoding enc ARG_UNUSED)
00734 {
00735 if (code <= 0xff) return 1;
00736 if (code <= 0xffff) return 2;
00737 return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;
00738 }
00739
00740 extern int
00741 onigenc_mb4_code_to_mbclen(OnigCodePoint code, OnigEncoding enc ARG_UNUSED)
00742 {
00743 if ((code & 0xff000000) != 0) return 4;
00744 else if ((code & 0xff0000) != 0) return 3;
00745 else if ((code & 0xff00) != 0) return 2;
00746 else return 1;
00747 }
00748
00749 extern int
00750 onigenc_mb2_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf)
00751 {
00752 UChar *p = buf;
00753
00754 if ((code & 0xff00) != 0) {
00755 *p++ = (UChar )((code >> 8) & 0xff);
00756 }
00757 *p++ = (UChar )(code & 0xff);
00758
00759 #if 1
00760 if (enclen(enc, buf, p) != (p - buf))
00761 return ONIGERR_INVALID_CODE_POINT_VALUE;
00762 #endif
00763 return (int)(p - buf);
00764 }
00765
00766 extern int
00767 onigenc_mb4_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf)
00768 {
00769 UChar *p = buf;
00770
00771 if ((code & 0xff000000) != 0) {
00772 *p++ = (UChar )((code >> 24) & 0xff);
00773 }
00774 if ((code & 0xff0000) != 0 || p != buf) {
00775 *p++ = (UChar )((code >> 16) & 0xff);
00776 }
00777 if ((code & 0xff00) != 0 || p != buf) {
00778 *p++ = (UChar )((code >> 8) & 0xff);
00779 }
00780 *p++ = (UChar )(code & 0xff);
00781
00782 #if 1
00783 if (enclen(enc, buf, p) != (p - buf))
00784 return ONIGERR_INVALID_CODE_POINT_VALUE;
00785 #endif
00786 return (int)(p - buf);
00787 }
00788
00789 extern int
00790 onigenc_minimum_property_name_to_ctype(OnigEncoding enc, UChar* p, UChar* end)
00791 {
00792 static const PosixBracketEntryType PBS[] = {
00793 PosixBracketEntryInit("Alnum", ONIGENC_CTYPE_ALNUM),
00794 PosixBracketEntryInit("Alpha", ONIGENC_CTYPE_ALPHA),
00795 PosixBracketEntryInit("Blank", ONIGENC_CTYPE_BLANK),
00796 PosixBracketEntryInit("Cntrl", ONIGENC_CTYPE_CNTRL),
00797 PosixBracketEntryInit("Digit", ONIGENC_CTYPE_DIGIT),
00798 PosixBracketEntryInit("Graph", ONIGENC_CTYPE_GRAPH),
00799 PosixBracketEntryInit("Lower", ONIGENC_CTYPE_LOWER),
00800 PosixBracketEntryInit("Print", ONIGENC_CTYPE_PRINT),
00801 PosixBracketEntryInit("Punct", ONIGENC_CTYPE_PUNCT),
00802 PosixBracketEntryInit("Space", ONIGENC_CTYPE_SPACE),
00803 PosixBracketEntryInit("Upper", ONIGENC_CTYPE_UPPER),
00804 PosixBracketEntryInit("XDigit", ONIGENC_CTYPE_XDIGIT),
00805 PosixBracketEntryInit("ASCII", ONIGENC_CTYPE_ASCII),
00806 PosixBracketEntryInit("Word", ONIGENC_CTYPE_WORD),
00807 };
00808
00809 const PosixBracketEntryType *pb, *pbe;
00810 int len;
00811
00812 len = onigenc_strlen(enc, p, end);
00813 for (pbe = (pb = PBS) + sizeof(PBS)/sizeof(PBS[0]); pb < pbe; ++pb) {
00814 if (len == pb->len &&
00815 STRNCASECMP((char *)p, (char *)pb->name, len) == 0)
00816 return pb->ctype;
00817 }
00818
00819 return ONIGERR_INVALID_CHAR_PROPERTY_NAME;
00820 }
00821
00822 extern int
00823 onigenc_mb2_is_code_ctype(OnigEncoding enc, OnigCodePoint code,
00824 unsigned int ctype)
00825 {
00826 if (code < 128)
00827 return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
00828 else {
00829 if (CTYPE_IS_WORD_GRAPH_PRINT(ctype)) {
00830 return (ONIGENC_CODE_TO_MBCLEN(enc, code) > 1 ? TRUE : FALSE);
00831 }
00832 }
00833
00834 return FALSE;
00835 }
00836
00837 extern int
00838 onigenc_mb4_is_code_ctype(OnigEncoding enc, OnigCodePoint code,
00839 unsigned int ctype)
00840 {
00841 if (code < 128)
00842 return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
00843 else {
00844 if (CTYPE_IS_WORD_GRAPH_PRINT(ctype)) {
00845 return (ONIGENC_CODE_TO_MBCLEN(enc, code) > 1 ? TRUE : FALSE);
00846 }
00847 }
00848
00849 return FALSE;
00850 }
00851
00852 extern int
00853 onigenc_with_ascii_strncmp(OnigEncoding enc, const UChar* p, const UChar* end,
00854 const UChar* sascii , int n)
00855 {
00856 int x, c;
00857
00858 while (n-- > 0) {
00859 if (p >= end) return (int )(*sascii);
00860
00861 c = (int )ONIGENC_MBC_TO_CODE(enc, p, end);
00862 x = *sascii - c;
00863 if (x) return x;
00864
00865 sascii++;
00866 p += enclen(enc, p, end);
00867 }
00868 return 0;
00869 }
00870
00871
00872 static int
00873 resize_property_list(int new_size, const OnigCodePoint*** plist, int* psize)
00874 {
00875 size_t size;
00876 const OnigCodePoint **list = *plist;
00877
00878 size = sizeof(OnigCodePoint*) * new_size;
00879 if (IS_NULL(list)) {
00880 list = (const OnigCodePoint** )xmalloc(size);
00881 }
00882 else {
00883 list = (const OnigCodePoint** )xrealloc((void* )list, size);
00884 }
00885
00886 if (IS_NULL(list)) return ONIGERR_MEMORY;
00887
00888 *plist = list;
00889 *psize = new_size;
00890
00891 return 0;
00892 }
00893
00894 extern int
00895 onigenc_property_list_add_property(UChar* name, const OnigCodePoint* prop,
00896 hash_table_type **table, const OnigCodePoint*** plist, int *pnum,
00897 int *psize)
00898 {
00899 #define PROP_INIT_SIZE 16
00900
00901 int r;
00902
00903 if (*psize <= *pnum) {
00904 int new_size = (*psize == 0 ? PROP_INIT_SIZE : *psize * 2);
00905 r = resize_property_list(new_size, plist, psize);
00906 if (r != 0) return r;
00907 }
00908
00909 (*plist)[*pnum] = prop;
00910
00911 if (ONIG_IS_NULL(*table)) {
00912 *table = onig_st_init_strend_table_with_size(PROP_INIT_SIZE);
00913 if (ONIG_IS_NULL(*table)) return ONIGERR_MEMORY;
00914 }
00915
00916 *pnum = *pnum + 1;
00917 onig_st_insert_strend(*table, name, name + strlen((char* )name),
00918 (hash_data_type )(*pnum + ONIGENC_MAX_STD_CTYPE));
00919 return 0;
00920 }
00921
00922 extern int
00923 onigenc_property_list_init(int (*f)(void))
00924 {
00925 int r;
00926
00927 THREAD_ATOMIC_START;
00928
00929 r = f();
00930
00931 THREAD_ATOMIC_END;
00932 return r;
00933 }
00934