00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035 #include "ruby/missing.h"
00036 #include <stdlib.h>
00037 #include <string.h>
00038 #if defined _WIN32 || defined __CYGWIN__
00039 #include <windows.h>
00040 #if defined _WIN32 && !defined strncasecmp
00041 #define strncasecmp strnicmp
00042 #endif
00043 #endif
00044 #ifdef HAVE_LANGINFO_H
00045 #include "langinfo.h"
00046 #endif
00047
00048 #define C_CODESET "US-ASCII"
00049
00050
00051
00052 #if defined _WIN32 || defined __CYGWIN__
00053 #define JA_CODESET "Windows-31J"
00054 #else
00055 #define JA_CODESET "EUC-JP"
00056 #endif
00057
00058 #define digit(x) ((x) >= '0' && (x) <= '9')
00059 #define strstart(s, n) (strncasecmp((s), (n), strlen(n)) == 0)
00060
00061 static char buf[16];
00062
00063 const char *
00064 nl_langinfo_codeset(void)
00065 {
00066 const char *l, *p;
00067 int n;
00068
00069 if (((l = getenv("LC_ALL")) && *l) ||
00070 ((l = getenv("LC_CTYPE")) && *l) ||
00071 ((l = getenv("LANG")) && *l)) {
00072
00073 if (!strcmp(l, "C") || !strcmp(l, "POSIX"))
00074 return C_CODESET;
00075
00076 p = strchr(l, '.');
00077 if (!p++) p = l;
00078 if (strstart(p, "UTF"))
00079 return "UTF-8";
00080 if ((n = 5, strstart(p, "8859-")) || (n = 9, strstart(p, "ISO-8859-"))) {
00081 if (digit(p[n])) {
00082 p += n;
00083 memcpy(buf, "ISO-8859-\0\0", 12);
00084 buf[9] = *p++;
00085 if (digit(*p)) buf[10] = *p++;
00086 return buf;
00087 }
00088 }
00089 if (strstart(p, "KOI8-R")) return "KOI8-R";
00090 if (strstart(p, "KOI8-U")) return "KOI8-U";
00091 if (strstart(p, "620")) return "TIS-620";
00092 if (strstart(p, "2312")) return "GB2312";
00093 if (strstart(p, "HKSCS")) return "Big5HKSCS";
00094 if (strstart(p, "BIG5")) return "Big5";
00095 if (strstart(p, "GBK")) return "GBK";
00096 if (strstart(p, "18030")) return "GB18030";
00097 if (strstart(p, "Shift_JIS") || strstart(p, "SJIS")) return "Windows-31J";
00098
00099 if (strstart(p, "euro")) return "ISO-8859-15";
00100
00101 if (strstart(l, "zh_TW")) return "Big5";
00102 if (strstart(l, "zh_HK")) return "Big5HKSCS";
00103 if (strstart(l, "zh")) return "GB2312";
00104 if (strstart(l, "ja")) return JA_CODESET;
00105 if (strstart(l, "ko")) return "EUC-KR";
00106 if (strstart(l, "ru")) return "KOI8-R";
00107 if (strstart(l, "uk")) return "KOI8-U";
00108 if (strstart(l, "pl") || strstart(l, "hr") ||
00109 strstart(l, "hu") || strstart(l, "cs") ||
00110 strstart(l, "sk") || strstart(l, "sl")) return "ISO-8859-2";
00111 if (strstart(l, "eo") || strstart(l, "mt")) return "ISO-8859-3";
00112 if (strstart(l, "el")) return "ISO-8859-7";
00113 if (strstart(l, "he")) return "ISO-8859-8";
00114 if (strstart(l, "tr")) return "ISO-8859-9";
00115 if (strstart(l, "th")) return "TIS-620";
00116 if (strstart(l, "lt")) return "ISO-8859-13";
00117 if (strstart(l, "cy")) return "ISO-8859-14";
00118 if (strstart(l, "ro")) return "ISO-8859-2";
00119 if (strstart(l, "am") || strstart(l, "vi")) return "UTF-8";
00120
00121
00122
00123 }
00124 return NULL;
00125 }
00126
00127 #ifdef HAVE_LANGINFO_H
00128 char *nl_langinfo(nl_item item)
00129 {
00130 const char *codeset;
00131 if (item != CODESET)
00132 return NULL;
00133 codeset = nl_langinfo_codeset();
00134 if (!codeset) codeset = C_CODESET;
00135 return (char *)codeset;
00136 }
00137 #endif
00138
00139
00140
00141 #ifdef TEST
00142 #include <stdio.h>
00143 int main()
00144 {
00145 printf("%s\n", nl_langinfo(CODESET));
00146 return 0;
00147 }
00148 #endif
00149