00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030 #include "regenc.h"
00031
00032 #define numberof(array) (int)(sizeof(array) / sizeof((array)[0]))
00033
00034 #define ENC_IS_ISO_8859_1_CTYPE(code,ctype) \
00035 ((EncISO_8859_1_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0)
00036
00037 static const unsigned short EncISO_8859_1_CtypeTable[256] = {
00038 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
00039 0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,
00040 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
00041 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
00042 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
00043 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
00044 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
00045 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
00046 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
00047 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
00048 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
00049 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
00050 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
00051 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
00052 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
00053 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
00054 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
00055 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
00056 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
00057 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
00058 0x0284, 0x01a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0,
00059 0x00a0, 0x00a0, 0x30e2, 0x01a0, 0x00a0, 0x01a0, 0x00a0, 0x00a0,
00060 0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x00a0, 0x30e2, 0x00a0, 0x01a0,
00061 0x00a0, 0x10a0, 0x30e2, 0x01a0, 0x10a0, 0x10a0, 0x10a0, 0x01a0,
00062 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
00063 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
00064 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x00a0,
00065 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x30e2,
00066 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
00067 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
00068 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x00a0,
00069 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2
00070 };
00071
00072 static const OnigPairCaseFoldCodes CaseFoldMap[] = {
00073 { 0xc0, 0xe0 },
00074 { 0xc1, 0xe1 },
00075 { 0xc2, 0xe2 },
00076 { 0xc3, 0xe3 },
00077 { 0xc4, 0xe4 },
00078 { 0xc5, 0xe5 },
00079 { 0xc6, 0xe6 },
00080 { 0xc7, 0xe7 },
00081 { 0xc8, 0xe8 },
00082 { 0xc9, 0xe9 },
00083 { 0xca, 0xea },
00084 { 0xcb, 0xeb },
00085 { 0xcc, 0xec },
00086 { 0xcd, 0xed },
00087 { 0xce, 0xee },
00088 { 0xcf, 0xef },
00089
00090 { 0xd0, 0xf0 },
00091 { 0xd1, 0xf1 },
00092 { 0xd2, 0xf2 },
00093 { 0xd3, 0xf3 },
00094 { 0xd4, 0xf4 },
00095 { 0xd5, 0xf5 },
00096 { 0xd6, 0xf6 },
00097 { 0xd8, 0xf8 },
00098 { 0xd9, 0xf9 },
00099 { 0xda, 0xfa },
00100 { 0xdb, 0xfb },
00101 { 0xdc, 0xfc },
00102 { 0xdd, 0xfd },
00103 { 0xde, 0xfe }
00104 };
00105
00106 static int
00107 apply_all_case_fold(OnigCaseFoldType flag,
00108 OnigApplyAllCaseFoldFunc f, void* arg,
00109 OnigEncoding enc ARG_UNUSED)
00110 {
00111 return onigenc_apply_all_case_fold_with_map(
00112 numberof(CaseFoldMap), CaseFoldMap, 1,
00113 flag, f, arg);
00114 }
00115
00116 static int
00117 get_case_fold_codes_by_str(OnigCaseFoldType flag,
00118 const OnigUChar* p, const OnigUChar* end,
00119 OnigCaseFoldCodeItem items[],
00120 OnigEncoding enc ARG_UNUSED)
00121 {
00122 if (0x41 <= *p && *p <= 0x5a) {
00123 items[0].byte_len = 1;
00124 items[0].code_len = 1;
00125 items[0].code[0] = (OnigCodePoint )(*p + 0x20);
00126 if (*p == 0x53 && end > p + 1
00127 && (*(p+1) == 0x53 || *(p+1) == 0x73)) {
00128 items[1].byte_len = 2;
00129 items[1].code_len = 1;
00130 items[1].code[0] = (OnigCodePoint )0xdf;
00131 return 2;
00132 }
00133 else
00134 return 1;
00135 }
00136 else if (0x61 <= *p && *p <= 0x7a) {
00137 items[0].byte_len = 1;
00138 items[0].code_len = 1;
00139 items[0].code[0] = (OnigCodePoint )(*p - 0x20);
00140 if (*p == 0x73 && end > p + 1
00141 && (*(p+1) == 0x73 || *(p+1) == 0x53)) {
00142 items[1].byte_len = 2;
00143 items[1].code_len = 1;
00144 items[1].code[0] = (OnigCodePoint )0xdf;
00145 return 2;
00146 }
00147 else
00148 return 1;
00149 }
00150 else if (0xc0 <= *p && *p <= 0xcf) {
00151 items[0].byte_len = 1;
00152 items[0].code_len = 1;
00153 items[0].code[0] = (OnigCodePoint )(*p + 0x20);
00154 return 1;
00155 }
00156 else if (0xd0 <= *p && *p <= 0xdf) {
00157 if (*p == 0xdf) {
00158 items[0].byte_len = 1;
00159 items[0].code_len = 2;
00160 items[0].code[0] = (OnigCodePoint )'s';
00161 items[0].code[1] = (OnigCodePoint )'s';
00162
00163 items[1].byte_len = 1;
00164 items[1].code_len = 2;
00165 items[1].code[0] = (OnigCodePoint )'S';
00166 items[1].code[1] = (OnigCodePoint )'S';
00167
00168 items[2].byte_len = 1;
00169 items[2].code_len = 2;
00170 items[2].code[0] = (OnigCodePoint )'s';
00171 items[2].code[1] = (OnigCodePoint )'S';
00172
00173 items[3].byte_len = 1;
00174 items[3].code_len = 2;
00175 items[3].code[0] = (OnigCodePoint )'S';
00176 items[3].code[1] = (OnigCodePoint )'s';
00177
00178 return 4;
00179 }
00180 else if (*p != 0xd7) {
00181 items[0].byte_len = 1;
00182 items[0].code_len = 1;
00183 items[0].code[0] = (OnigCodePoint )(*p + 0x20);
00184 return 1;
00185 }
00186 }
00187 else if (0xe0 <= *p && *p <= 0xef) {
00188 items[0].byte_len = 1;
00189 items[0].code_len = 1;
00190 items[0].code[0] = (OnigCodePoint )(*p - 0x20);
00191 return 1;
00192 }
00193 else if (0xf0 <= *p && *p <= 0xfe) {
00194 if (*p != 0xf7) {
00195 items[0].byte_len = 1;
00196 items[0].code_len = 1;
00197 items[0].code[0] = (OnigCodePoint )(*p - 0x20);
00198 return 1;
00199 }
00200 }
00201
00202 return 0;
00203 }
00204
00205 static int
00206 mbc_case_fold(OnigCaseFoldType flag, const UChar** pp, const UChar* end ARG_UNUSED,
00207 UChar* lower, OnigEncoding enc ARG_UNUSED)
00208 {
00209 const UChar* p = *pp;
00210
00211 if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
00212 *lower++ = 's';
00213 *lower = 's';
00214 (*pp)++;
00215 return 2;
00216 }
00217
00218 *lower = ONIGENC_ISO_8859_1_TO_LOWER_CASE(*p);
00219 (*pp)++;
00220 return 1;
00221 }
00222
00223 #if 0
00224 static int
00225 is_mbc_ambiguous(OnigCaseFoldType flag,
00226 const UChar** pp, const UChar* end)
00227 {
00228 int v;
00229 const UChar* p = *pp;
00230
00231 if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
00232 (*pp)++;
00233 return TRUE;
00234 }
00235
00236 (*pp)++;
00237 v = (EncISO_8859_1_CtypeTable[*p] & (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER));
00238 if ((v | BIT_CTYPE_LOWER) != 0) {
00239
00240 if (*p >= 0xaa && *p <= 0xba)
00241 return FALSE;
00242 else
00243 return TRUE;
00244 }
00245
00246 return (v != 0 ? TRUE : FALSE);
00247 }
00248 #endif
00249
00250 static int
00251 is_code_ctype(OnigCodePoint code, unsigned int ctype, OnigEncoding enc ARG_UNUSED)
00252 {
00253 if (code < 256)
00254 return ENC_IS_ISO_8859_1_CTYPE(code, ctype);
00255 else
00256 return FALSE;
00257 }
00258
00259 OnigEncodingDefine(iso_8859_1, ISO_8859_1) = {
00260 onigenc_single_byte_mbc_enc_len,
00261 "ISO-8859-1",
00262 1,
00263 1,
00264 onigenc_is_mbc_newline_0x0a,
00265 onigenc_single_byte_mbc_to_code,
00266 onigenc_single_byte_code_to_mbclen,
00267 onigenc_single_byte_code_to_mbc,
00268 mbc_case_fold,
00269 apply_all_case_fold,
00270 get_case_fold_codes_by_str,
00271 onigenc_minimum_property_name_to_ctype,
00272 is_code_ctype,
00273 onigenc_not_support_get_ctype_code_range,
00274 onigenc_single_byte_left_adjust_char_head,
00275 onigenc_always_true_is_allowed_reverse_match
00276 };
00277 ENC_ALIAS("ISO8859-1", "ISO-8859-1")
00278
00279
00280
00281
00282
00283
00284
00285
00286 ENC_REPLICATE("Windows-1252", "ISO-8859-1")
00287 ENC_ALIAS("CP1252", "Windows-1252")
00288