locale.py 73 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680
  1. """Locale support module.
  2. The module provides low-level access to the C lib's locale APIs and adds high
  3. level number formatting APIs as well as a locale aliasing engine to complement
  4. these.
  5. The aliasing engine includes support for many commonly used locale names and
  6. maps them to values suitable for passing to the C lib's setlocale() function. It
  7. also includes default encodings for all supported locale names.
  8. """
  9. import sys
  10. import encodings
  11. import encodings.aliases
  12. import re
  13. import collections
  14. from builtins import str as _builtin_str
  15. import functools
  16. # Try importing the _locale module.
  17. #
  18. # If this fails, fall back on a basic 'C' locale emulation.
  19. # Yuck: LC_MESSAGES is non-standard: can't tell whether it exists before
  20. # trying the import. So __all__ is also fiddled at the end of the file.
  21. __all__ = ["getlocale", "getdefaultlocale", "getpreferredencoding", "Error",
  22. "setlocale", "resetlocale", "localeconv", "strcoll", "strxfrm",
  23. "str", "atof", "atoi", "format", "format_string", "currency",
  24. "normalize", "LC_CTYPE", "LC_COLLATE", "LC_TIME", "LC_MONETARY",
  25. "LC_NUMERIC", "LC_ALL", "CHAR_MAX"]
  26. def _strcoll(a,b):
  27. """ strcoll(string,string) -> int.
  28. Compares two strings according to the locale.
  29. """
  30. return (a > b) - (a < b)
  31. def _strxfrm(s):
  32. """ strxfrm(string) -> string.
  33. Returns a string that behaves for cmp locale-aware.
  34. """
  35. return s
  36. try:
  37. from _locale import *
  38. except ImportError:
  39. # Locale emulation
  40. CHAR_MAX = 127
  41. LC_ALL = 6
  42. LC_COLLATE = 3
  43. LC_CTYPE = 0
  44. LC_MESSAGES = 5
  45. LC_MONETARY = 4
  46. LC_NUMERIC = 1
  47. LC_TIME = 2
  48. Error = ValueError
  49. def localeconv():
  50. """ localeconv() -> dict.
  51. Returns numeric and monetary locale-specific parameters.
  52. """
  53. # 'C' locale default values
  54. return {'grouping': [127],
  55. 'currency_symbol': '',
  56. 'n_sign_posn': 127,
  57. 'p_cs_precedes': 127,
  58. 'n_cs_precedes': 127,
  59. 'mon_grouping': [],
  60. 'n_sep_by_space': 127,
  61. 'decimal_point': '.',
  62. 'negative_sign': '',
  63. 'positive_sign': '',
  64. 'p_sep_by_space': 127,
  65. 'int_curr_symbol': '',
  66. 'p_sign_posn': 127,
  67. 'thousands_sep': '',
  68. 'mon_thousands_sep': '',
  69. 'frac_digits': 127,
  70. 'mon_decimal_point': '',
  71. 'int_frac_digits': 127}
  72. def setlocale(category, value=None):
  73. """ setlocale(integer,string=None) -> string.
  74. Activates/queries locale processing.
  75. """
  76. if value not in (None, '', 'C'):
  77. raise Error('_locale emulation only supports "C" locale')
  78. return 'C'
  79. # These may or may not exist in _locale, so be sure to set them.
  80. if 'strxfrm' not in globals():
  81. strxfrm = _strxfrm
  82. if 'strcoll' not in globals():
  83. strcoll = _strcoll
  84. _localeconv = localeconv
  85. # With this dict, you can override some items of localeconv's return value.
  86. # This is useful for testing purposes.
  87. _override_localeconv = {}
  88. @functools.wraps(_localeconv)
  89. def localeconv():
  90. d = _localeconv()
  91. if _override_localeconv:
  92. d.update(_override_localeconv)
  93. return d
  94. ### Number formatting APIs
  95. # Author: Martin von Loewis
  96. # improved by Georg Brandl
  97. # Iterate over grouping intervals
  98. def _grouping_intervals(grouping):
  99. last_interval = None
  100. for interval in grouping:
  101. # if grouping is -1, we are done
  102. if interval == CHAR_MAX:
  103. return
  104. # 0: re-use last group ad infinitum
  105. if interval == 0:
  106. if last_interval is None:
  107. raise ValueError("invalid grouping")
  108. while True:
  109. yield last_interval
  110. yield interval
  111. last_interval = interval
  112. #perform the grouping from right to left
  113. def _group(s, monetary=False):
  114. conv = localeconv()
  115. thousands_sep = conv[monetary and 'mon_thousands_sep' or 'thousands_sep']
  116. grouping = conv[monetary and 'mon_grouping' or 'grouping']
  117. if not grouping:
  118. return (s, 0)
  119. if s[-1] == ' ':
  120. stripped = s.rstrip()
  121. right_spaces = s[len(stripped):]
  122. s = stripped
  123. else:
  124. right_spaces = ''
  125. left_spaces = ''
  126. groups = []
  127. for interval in _grouping_intervals(grouping):
  128. if not s or s[-1] not in "0123456789":
  129. # only non-digit characters remain (sign, spaces)
  130. left_spaces = s
  131. s = ''
  132. break
  133. groups.append(s[-interval:])
  134. s = s[:-interval]
  135. if s:
  136. groups.append(s)
  137. groups.reverse()
  138. return (
  139. left_spaces + thousands_sep.join(groups) + right_spaces,
  140. len(thousands_sep) * (len(groups) - 1)
  141. )
  142. # Strip a given amount of excess padding from the given string
  143. def _strip_padding(s, amount):
  144. lpos = 0
  145. while amount and s[lpos] == ' ':
  146. lpos += 1
  147. amount -= 1
  148. rpos = len(s) - 1
  149. while amount and s[rpos] == ' ':
  150. rpos -= 1
  151. amount -= 1
  152. return s[lpos:rpos+1]
  153. _percent_re = re.compile(r'%(?:\((?P<key>.*?)\))?'
  154. r'(?P<modifiers>[-#0-9 +*.hlL]*?)[eEfFgGdiouxXcrs%]')
  155. def format(percent, value, grouping=False, monetary=False, *additional):
  156. """Returns the locale-aware substitution of a %? specifier
  157. (percent).
  158. additional is for format strings which contain one or more
  159. '*' modifiers."""
  160. # this is only for one-percent-specifier strings and this should be checked
  161. match = _percent_re.match(percent)
  162. if not match or len(match.group())!= len(percent):
  163. raise ValueError(("format() must be given exactly one %%char "
  164. "format specifier, %s not valid") % repr(percent))
  165. return _format(percent, value, grouping, monetary, *additional)
  166. def _format(percent, value, grouping=False, monetary=False, *additional):
  167. if additional:
  168. formatted = percent % ((value,) + additional)
  169. else:
  170. formatted = percent % value
  171. # floats and decimal ints need special action!
  172. if percent[-1] in 'eEfFgG':
  173. seps = 0
  174. parts = formatted.split('.')
  175. if grouping:
  176. parts[0], seps = _group(parts[0], monetary=monetary)
  177. decimal_point = localeconv()[monetary and 'mon_decimal_point'
  178. or 'decimal_point']
  179. formatted = decimal_point.join(parts)
  180. if seps:
  181. formatted = _strip_padding(formatted, seps)
  182. elif percent[-1] in 'diu':
  183. seps = 0
  184. if grouping:
  185. formatted, seps = _group(formatted, monetary=monetary)
  186. if seps:
  187. formatted = _strip_padding(formatted, seps)
  188. return formatted
  189. def format_string(f, val, grouping=False):
  190. """Formats a string in the same way that the % formatting would use,
  191. but takes the current locale into account.
  192. Grouping is applied if the third parameter is true."""
  193. percents = list(_percent_re.finditer(f))
  194. new_f = _percent_re.sub('%s', f)
  195. if isinstance(val, collections.Mapping):
  196. new_val = []
  197. for perc in percents:
  198. if perc.group()[-1]=='%':
  199. new_val.append('%')
  200. else:
  201. new_val.append(format(perc.group(), val, grouping))
  202. else:
  203. if not isinstance(val, tuple):
  204. val = (val,)
  205. new_val = []
  206. i = 0
  207. for perc in percents:
  208. if perc.group()[-1]=='%':
  209. new_val.append('%')
  210. else:
  211. starcount = perc.group('modifiers').count('*')
  212. new_val.append(_format(perc.group(),
  213. val[i],
  214. grouping,
  215. False,
  216. *val[i+1:i+1+starcount]))
  217. i += (1 + starcount)
  218. val = tuple(new_val)
  219. return new_f % val
  220. def currency(val, symbol=True, grouping=False, international=False):
  221. """Formats val according to the currency settings
  222. in the current locale."""
  223. conv = localeconv()
  224. # check for illegal values
  225. digits = conv[international and 'int_frac_digits' or 'frac_digits']
  226. if digits == 127:
  227. raise ValueError("Currency formatting is not possible using "
  228. "the 'C' locale.")
  229. s = format('%%.%if' % digits, abs(val), grouping, monetary=True)
  230. # '<' and '>' are markers if the sign must be inserted between symbol and value
  231. s = '<' + s + '>'
  232. if symbol:
  233. smb = conv[international and 'int_curr_symbol' or 'currency_symbol']
  234. precedes = conv[val<0 and 'n_cs_precedes' or 'p_cs_precedes']
  235. separated = conv[val<0 and 'n_sep_by_space' or 'p_sep_by_space']
  236. if precedes:
  237. s = smb + (separated and ' ' or '') + s
  238. else:
  239. s = s + (separated and ' ' or '') + smb
  240. sign_pos = conv[val<0 and 'n_sign_posn' or 'p_sign_posn']
  241. sign = conv[val<0 and 'negative_sign' or 'positive_sign']
  242. if sign_pos == 0:
  243. s = '(' + s + ')'
  244. elif sign_pos == 1:
  245. s = sign + s
  246. elif sign_pos == 2:
  247. s = s + sign
  248. elif sign_pos == 3:
  249. s = s.replace('<', sign)
  250. elif sign_pos == 4:
  251. s = s.replace('>', sign)
  252. else:
  253. # the default if nothing specified;
  254. # this should be the most fitting sign position
  255. s = sign + s
  256. return s.replace('<', '').replace('>', '')
  257. def str(val):
  258. """Convert float to string, taking the locale into account."""
  259. return format("%.12g", val)
  260. def delocalize(string):
  261. "Parses a string as a normalized number according to the locale settings."
  262. #First, get rid of the grouping
  263. ts = localeconv()['thousands_sep']
  264. if ts:
  265. string = string.replace(ts, '')
  266. #next, replace the decimal point with a dot
  267. dd = localeconv()['decimal_point']
  268. if dd:
  269. string = string.replace(dd, '.')
  270. return string
  271. def atof(string, func=float):
  272. "Parses a string as a float according to the locale settings."
  273. return func(delocalize(string))
  274. def atoi(string):
  275. "Converts a string to an integer according to the locale settings."
  276. return int(delocalize(string))
  277. def _test():
  278. setlocale(LC_ALL, "")
  279. #do grouping
  280. s1 = format("%d", 123456789,1)
  281. print(s1, "is", atoi(s1))
  282. #standard formatting
  283. s1 = str(3.14)
  284. print(s1, "is", atof(s1))
  285. ### Locale name aliasing engine
  286. # Author: Marc-Andre Lemburg, mal@lemburg.com
  287. # Various tweaks by Fredrik Lundh <fredrik@pythonware.com>
  288. # store away the low-level version of setlocale (it's
  289. # overridden below)
  290. _setlocale = setlocale
  291. def _replace_encoding(code, encoding):
  292. if '.' in code:
  293. langname = code[:code.index('.')]
  294. else:
  295. langname = code
  296. # Convert the encoding to a C lib compatible encoding string
  297. norm_encoding = encodings.normalize_encoding(encoding)
  298. #print('norm encoding: %r' % norm_encoding)
  299. norm_encoding = encodings.aliases.aliases.get(norm_encoding.lower(),
  300. norm_encoding)
  301. #print('aliased encoding: %r' % norm_encoding)
  302. encoding = norm_encoding
  303. norm_encoding = norm_encoding.lower()
  304. if norm_encoding in locale_encoding_alias:
  305. encoding = locale_encoding_alias[norm_encoding]
  306. else:
  307. norm_encoding = norm_encoding.replace('_', '')
  308. norm_encoding = norm_encoding.replace('-', '')
  309. if norm_encoding in locale_encoding_alias:
  310. encoding = locale_encoding_alias[norm_encoding]
  311. #print('found encoding %r' % encoding)
  312. return langname + '.' + encoding
  313. def _append_modifier(code, modifier):
  314. if modifier == 'euro':
  315. if '.' not in code:
  316. return code + '.ISO8859-15'
  317. _, _, encoding = code.partition('.')
  318. if encoding in ('ISO8859-15', 'UTF-8'):
  319. return code
  320. if encoding == 'ISO8859-1':
  321. return _replace_encoding(code, 'ISO8859-15')
  322. return code + '@' + modifier
  323. def normalize(localename):
  324. """ Returns a normalized locale code for the given locale
  325. name.
  326. The returned locale code is formatted for use with
  327. setlocale().
  328. If normalization fails, the original name is returned
  329. unchanged.
  330. If the given encoding is not known, the function defaults to
  331. the default encoding for the locale code just like setlocale()
  332. does.
  333. """
  334. # Normalize the locale name and extract the encoding and modifier
  335. code = localename.lower()
  336. if ':' in code:
  337. # ':' is sometimes used as encoding delimiter.
  338. code = code.replace(':', '.')
  339. if '@' in code:
  340. code, modifier = code.split('@', 1)
  341. else:
  342. modifier = ''
  343. if '.' in code:
  344. langname, encoding = code.split('.')[:2]
  345. else:
  346. langname = code
  347. encoding = ''
  348. # First lookup: fullname (possibly with encoding and modifier)
  349. lang_enc = langname
  350. if encoding:
  351. norm_encoding = encoding.replace('-', '')
  352. norm_encoding = norm_encoding.replace('_', '')
  353. lang_enc += '.' + norm_encoding
  354. lookup_name = lang_enc
  355. if modifier:
  356. lookup_name += '@' + modifier
  357. code = locale_alias.get(lookup_name, None)
  358. if code is not None:
  359. return code
  360. #print('first lookup failed')
  361. if modifier:
  362. # Second try: fullname without modifier (possibly with encoding)
  363. code = locale_alias.get(lang_enc, None)
  364. if code is not None:
  365. #print('lookup without modifier succeeded')
  366. if '@' not in code:
  367. return _append_modifier(code, modifier)
  368. if code.split('@', 1)[1].lower() == modifier:
  369. return code
  370. #print('second lookup failed')
  371. if encoding:
  372. # Third try: langname (without encoding, possibly with modifier)
  373. lookup_name = langname
  374. if modifier:
  375. lookup_name += '@' + modifier
  376. code = locale_alias.get(lookup_name, None)
  377. if code is not None:
  378. #print('lookup without encoding succeeded')
  379. if '@' not in code:
  380. return _replace_encoding(code, encoding)
  381. code, modifier = code.split('@', 1)
  382. return _replace_encoding(code, encoding) + '@' + modifier
  383. if modifier:
  384. # Fourth try: langname (without encoding and modifier)
  385. code = locale_alias.get(langname, None)
  386. if code is not None:
  387. #print('lookup without modifier and encoding succeeded')
  388. if '@' not in code:
  389. code = _replace_encoding(code, encoding)
  390. return _append_modifier(code, modifier)
  391. code, defmod = code.split('@', 1)
  392. if defmod.lower() == modifier:
  393. return _replace_encoding(code, encoding) + '@' + defmod
  394. return localename
  395. def _parse_localename(localename):
  396. """ Parses the locale code for localename and returns the
  397. result as tuple (language code, encoding).
  398. The localename is normalized and passed through the locale
  399. alias engine. A ValueError is raised in case the locale name
  400. cannot be parsed.
  401. The language code corresponds to RFC 1766. code and encoding
  402. can be None in case the values cannot be determined or are
  403. unknown to this implementation.
  404. """
  405. code = normalize(localename)
  406. if '@' in code:
  407. # Deal with locale modifiers
  408. code, modifier = code.split('@', 1)
  409. if modifier == 'euro' and '.' not in code:
  410. # Assume Latin-9 for @euro locales. This is bogus,
  411. # since some systems may use other encodings for these
  412. # locales. Also, we ignore other modifiers.
  413. return code, 'iso-8859-15'
  414. if '.' in code:
  415. return tuple(code.split('.')[:2])
  416. elif code == 'C':
  417. return None, None
  418. raise ValueError('unknown locale: %s' % localename)
  419. def _build_localename(localetuple):
  420. """ Builds a locale code from the given tuple (language code,
  421. encoding).
  422. No aliasing or normalizing takes place.
  423. """
  424. try:
  425. language, encoding = localetuple
  426. if language is None:
  427. language = 'C'
  428. if encoding is None:
  429. return language
  430. else:
  431. return language + '.' + encoding
  432. except (TypeError, ValueError):
  433. raise TypeError('Locale must be None, a string, or an iterable of two strings -- language code, encoding.')
  434. def getdefaultlocale(envvars=('LC_ALL', 'LC_CTYPE', 'LANG', 'LANGUAGE')):
  435. """ Tries to determine the default locale settings and returns
  436. them as tuple (language code, encoding).
  437. According to POSIX, a program which has not called
  438. setlocale(LC_ALL, "") runs using the portable 'C' locale.
  439. Calling setlocale(LC_ALL, "") lets it use the default locale as
  440. defined by the LANG variable. Since we don't want to interfere
  441. with the current locale setting we thus emulate the behavior
  442. in the way described above.
  443. To maintain compatibility with other platforms, not only the
  444. LANG variable is tested, but a list of variables given as
  445. envvars parameter. The first found to be defined will be
  446. used. envvars defaults to the search path used in GNU gettext;
  447. it must always contain the variable name 'LANG'.
  448. Except for the code 'C', the language code corresponds to RFC
  449. 1766. code and encoding can be None in case the values cannot
  450. be determined.
  451. """
  452. try:
  453. # check if it's supported by the _locale module
  454. import _locale
  455. code, encoding = _locale._getdefaultlocale()
  456. except (ImportError, AttributeError):
  457. pass
  458. else:
  459. # make sure the code/encoding values are valid
  460. if sys.platform == "win32" and code and code[:2] == "0x":
  461. # map windows language identifier to language name
  462. code = windows_locale.get(int(code, 0))
  463. # ...add other platform-specific processing here, if
  464. # necessary...
  465. return code, encoding
  466. # fall back on POSIX behaviour
  467. import os
  468. lookup = os.environ.get
  469. for variable in envvars:
  470. localename = lookup(variable,None)
  471. if localename:
  472. if variable == 'LANGUAGE':
  473. localename = localename.split(':')[0]
  474. break
  475. else:
  476. localename = 'C'
  477. return _parse_localename(localename)
  478. def getlocale(category=LC_CTYPE):
  479. """ Returns the current setting for the given locale category as
  480. tuple (language code, encoding).
  481. category may be one of the LC_* value except LC_ALL. It
  482. defaults to LC_CTYPE.
  483. Except for the code 'C', the language code corresponds to RFC
  484. 1766. code and encoding can be None in case the values cannot
  485. be determined.
  486. """
  487. localename = _setlocale(category)
  488. if category == LC_ALL and ';' in localename:
  489. raise TypeError('category LC_ALL is not supported')
  490. return _parse_localename(localename)
  491. def setlocale(category, locale=None):
  492. """ Set the locale for the given category. The locale can be
  493. a string, an iterable of two strings (language code and encoding),
  494. or None.
  495. Iterables are converted to strings using the locale aliasing
  496. engine. Locale strings are passed directly to the C lib.
  497. category may be given as one of the LC_* values.
  498. """
  499. if locale and not isinstance(locale, _builtin_str):
  500. # convert to string
  501. locale = normalize(_build_localename(locale))
  502. return _setlocale(category, locale)
  503. def resetlocale(category=LC_ALL):
  504. """ Sets the locale for category to the default setting.
  505. The default setting is determined by calling
  506. getdefaultlocale(). category defaults to LC_ALL.
  507. """
  508. _setlocale(category, _build_localename(getdefaultlocale()))
  509. if sys.platform.startswith("win"):
  510. # On Win32, this will return the ANSI code page
  511. def getpreferredencoding(do_setlocale = True):
  512. """Return the charset that the user is likely using."""
  513. import _bootlocale
  514. return _bootlocale.getpreferredencoding(False)
  515. else:
  516. # On Unix, if CODESET is available, use that.
  517. try:
  518. CODESET
  519. except NameError:
  520. # Fall back to parsing environment variables :-(
  521. def getpreferredencoding(do_setlocale = True):
  522. """Return the charset that the user is likely using,
  523. by looking at environment variables."""
  524. res = getdefaultlocale()[1]
  525. if res is None:
  526. # LANG not set, default conservatively to ASCII
  527. res = 'ascii'
  528. return res
  529. else:
  530. def getpreferredencoding(do_setlocale = True):
  531. """Return the charset that the user is likely using,
  532. according to the system configuration."""
  533. import _bootlocale
  534. if do_setlocale:
  535. oldloc = setlocale(LC_CTYPE)
  536. try:
  537. setlocale(LC_CTYPE, "")
  538. except Error:
  539. pass
  540. result = _bootlocale.getpreferredencoding(False)
  541. if do_setlocale:
  542. setlocale(LC_CTYPE, oldloc)
  543. return result
  544. ### Database
  545. #
  546. # The following data was extracted from the locale.alias file which
  547. # comes with X11 and then hand edited removing the explicit encoding
  548. # definitions and adding some more aliases. The file is usually
  549. # available as /usr/lib/X11/locale/locale.alias.
  550. #
  551. #
  552. # The local_encoding_alias table maps lowercase encoding alias names
  553. # to C locale encoding names (case-sensitive). Note that normalize()
  554. # first looks up the encoding in the encodings.aliases dictionary and
  555. # then applies this mapping to find the correct C lib name for the
  556. # encoding.
  557. #
  558. locale_encoding_alias = {
  559. # Mappings for non-standard encoding names used in locale names
  560. '437': 'C',
  561. 'c': 'C',
  562. 'en': 'ISO8859-1',
  563. 'jis': 'JIS7',
  564. 'jis7': 'JIS7',
  565. 'ajec': 'eucJP',
  566. 'koi8c': 'KOI8-C',
  567. 'microsoftcp1251': 'CP1251',
  568. 'microsoftcp1255': 'CP1255',
  569. 'microsoftcp1256': 'CP1256',
  570. '88591': 'ISO8859-1',
  571. '88592': 'ISO8859-2',
  572. '88595': 'ISO8859-5',
  573. '885915': 'ISO8859-15',
  574. # Mappings from Python codec names to C lib encoding names
  575. 'ascii': 'ISO8859-1',
  576. 'latin_1': 'ISO8859-1',
  577. 'iso8859_1': 'ISO8859-1',
  578. 'iso8859_10': 'ISO8859-10',
  579. 'iso8859_11': 'ISO8859-11',
  580. 'iso8859_13': 'ISO8859-13',
  581. 'iso8859_14': 'ISO8859-14',
  582. 'iso8859_15': 'ISO8859-15',
  583. 'iso8859_16': 'ISO8859-16',
  584. 'iso8859_2': 'ISO8859-2',
  585. 'iso8859_3': 'ISO8859-3',
  586. 'iso8859_4': 'ISO8859-4',
  587. 'iso8859_5': 'ISO8859-5',
  588. 'iso8859_6': 'ISO8859-6',
  589. 'iso8859_7': 'ISO8859-7',
  590. 'iso8859_8': 'ISO8859-8',
  591. 'iso8859_9': 'ISO8859-9',
  592. 'iso2022_jp': 'JIS7',
  593. 'shift_jis': 'SJIS',
  594. 'tactis': 'TACTIS',
  595. 'euc_jp': 'eucJP',
  596. 'euc_kr': 'eucKR',
  597. 'utf_8': 'UTF-8',
  598. 'koi8_r': 'KOI8-R',
  599. 'koi8_t': 'KOI8-T',
  600. 'koi8_u': 'KOI8-U',
  601. 'kz1048': 'RK1048',
  602. 'cp1251': 'CP1251',
  603. 'cp1255': 'CP1255',
  604. 'cp1256': 'CP1256',
  605. # XXX This list is still incomplete. If you know more
  606. # mappings, please file a bug report. Thanks.
  607. }
  608. for k, v in sorted(locale_encoding_alias.items()):
  609. k = k.replace('_', '')
  610. locale_encoding_alias.setdefault(k, v)
  611. #
  612. # The locale_alias table maps lowercase alias names to C locale names
  613. # (case-sensitive). Encodings are always separated from the locale
  614. # name using a dot ('.'); they should only be given in case the
  615. # language name is needed to interpret the given encoding alias
  616. # correctly (CJK codes often have this need).
  617. #
  618. # Note that the normalize() function which uses this tables
  619. # removes '_' and '-' characters from the encoding part of the
  620. # locale name before doing the lookup. This saves a lot of
  621. # space in the table.
  622. #
  623. # MAL 2004-12-10:
  624. # Updated alias mapping to most recent locale.alias file
  625. # from X.org distribution using makelocalealias.py.
  626. #
  627. # These are the differences compared to the old mapping (Python 2.4
  628. # and older):
  629. #
  630. # updated 'bg' -> 'bg_BG.ISO8859-5' to 'bg_BG.CP1251'
  631. # updated 'bg_bg' -> 'bg_BG.ISO8859-5' to 'bg_BG.CP1251'
  632. # updated 'bulgarian' -> 'bg_BG.ISO8859-5' to 'bg_BG.CP1251'
  633. # updated 'cz' -> 'cz_CZ.ISO8859-2' to 'cs_CZ.ISO8859-2'
  634. # updated 'cz_cz' -> 'cz_CZ.ISO8859-2' to 'cs_CZ.ISO8859-2'
  635. # updated 'czech' -> 'cs_CS.ISO8859-2' to 'cs_CZ.ISO8859-2'
  636. # updated 'dutch' -> 'nl_BE.ISO8859-1' to 'nl_NL.ISO8859-1'
  637. # updated 'et' -> 'et_EE.ISO8859-4' to 'et_EE.ISO8859-15'
  638. # updated 'et_ee' -> 'et_EE.ISO8859-4' to 'et_EE.ISO8859-15'
  639. # updated 'fi' -> 'fi_FI.ISO8859-1' to 'fi_FI.ISO8859-15'
  640. # updated 'fi_fi' -> 'fi_FI.ISO8859-1' to 'fi_FI.ISO8859-15'
  641. # updated 'iw' -> 'iw_IL.ISO8859-8' to 'he_IL.ISO8859-8'
  642. # updated 'iw_il' -> 'iw_IL.ISO8859-8' to 'he_IL.ISO8859-8'
  643. # updated 'japanese' -> 'ja_JP.SJIS' to 'ja_JP.eucJP'
  644. # updated 'lt' -> 'lt_LT.ISO8859-4' to 'lt_LT.ISO8859-13'
  645. # updated 'lv' -> 'lv_LV.ISO8859-4' to 'lv_LV.ISO8859-13'
  646. # updated 'sl' -> 'sl_CS.ISO8859-2' to 'sl_SI.ISO8859-2'
  647. # updated 'slovene' -> 'sl_CS.ISO8859-2' to 'sl_SI.ISO8859-2'
  648. # updated 'th_th' -> 'th_TH.TACTIS' to 'th_TH.ISO8859-11'
  649. # updated 'zh_cn' -> 'zh_CN.eucCN' to 'zh_CN.gb2312'
  650. # updated 'zh_cn.big5' -> 'zh_TW.eucTW' to 'zh_TW.big5'
  651. # updated 'zh_tw' -> 'zh_TW.eucTW' to 'zh_TW.big5'
  652. #
  653. # MAL 2008-05-30:
  654. # Updated alias mapping to most recent locale.alias file
  655. # from X.org distribution using makelocalealias.py.
  656. #
  657. # These are the differences compared to the old mapping (Python 2.5
  658. # and older):
  659. #
  660. # updated 'cs_cs.iso88592' -> 'cs_CZ.ISO8859-2' to 'cs_CS.ISO8859-2'
  661. # updated 'serbocroatian' -> 'sh_YU.ISO8859-2' to 'sr_CS.ISO8859-2'
  662. # updated 'sh' -> 'sh_YU.ISO8859-2' to 'sr_CS.ISO8859-2'
  663. # updated 'sh_hr.iso88592' -> 'sh_HR.ISO8859-2' to 'hr_HR.ISO8859-2'
  664. # updated 'sh_sp' -> 'sh_YU.ISO8859-2' to 'sr_CS.ISO8859-2'
  665. # updated 'sh_yu' -> 'sh_YU.ISO8859-2' to 'sr_CS.ISO8859-2'
  666. # updated 'sp' -> 'sp_YU.ISO8859-5' to 'sr_CS.ISO8859-5'
  667. # updated 'sp_yu' -> 'sp_YU.ISO8859-5' to 'sr_CS.ISO8859-5'
  668. # updated 'sr' -> 'sr_YU.ISO8859-5' to 'sr_CS.ISO8859-5'
  669. # updated 'sr@cyrillic' -> 'sr_YU.ISO8859-5' to 'sr_CS.ISO8859-5'
  670. # updated 'sr_sp' -> 'sr_SP.ISO8859-2' to 'sr_CS.ISO8859-2'
  671. # updated 'sr_yu' -> 'sr_YU.ISO8859-5' to 'sr_CS.ISO8859-5'
  672. # updated 'sr_yu.cp1251@cyrillic' -> 'sr_YU.CP1251' to 'sr_CS.CP1251'
  673. # updated 'sr_yu.iso88592' -> 'sr_YU.ISO8859-2' to 'sr_CS.ISO8859-2'
  674. # updated 'sr_yu.iso88595' -> 'sr_YU.ISO8859-5' to 'sr_CS.ISO8859-5'
  675. # updated 'sr_yu.iso88595@cyrillic' -> 'sr_YU.ISO8859-5' to 'sr_CS.ISO8859-5'
  676. # updated 'sr_yu.microsoftcp1251@cyrillic' -> 'sr_YU.CP1251' to 'sr_CS.CP1251'
  677. # updated 'sr_yu.utf8@cyrillic' -> 'sr_YU.UTF-8' to 'sr_CS.UTF-8'
  678. # updated 'sr_yu@cyrillic' -> 'sr_YU.ISO8859-5' to 'sr_CS.ISO8859-5'
  679. #
  680. # AP 2010-04-12:
  681. # Updated alias mapping to most recent locale.alias file
  682. # from X.org distribution using makelocalealias.py.
  683. #
  684. # These are the differences compared to the old mapping (Python 2.6.5
  685. # and older):
  686. #
  687. # updated 'ru' -> 'ru_RU.ISO8859-5' to 'ru_RU.UTF-8'
  688. # updated 'ru_ru' -> 'ru_RU.ISO8859-5' to 'ru_RU.UTF-8'
  689. # updated 'serbocroatian' -> 'sr_CS.ISO8859-2' to 'sr_RS.UTF-8@latin'
  690. # updated 'sh' -> 'sr_CS.ISO8859-2' to 'sr_RS.UTF-8@latin'
  691. # updated 'sh_yu' -> 'sr_CS.ISO8859-2' to 'sr_RS.UTF-8@latin'
  692. # updated 'sr' -> 'sr_CS.ISO8859-5' to 'sr_RS.UTF-8'
  693. # updated 'sr@cyrillic' -> 'sr_CS.ISO8859-5' to 'sr_RS.UTF-8'
  694. # updated 'sr@latn' -> 'sr_CS.ISO8859-2' to 'sr_RS.UTF-8@latin'
  695. # updated 'sr_cs.utf8@latn' -> 'sr_CS.UTF-8' to 'sr_RS.UTF-8@latin'
  696. # updated 'sr_cs@latn' -> 'sr_CS.ISO8859-2' to 'sr_RS.UTF-8@latin'
  697. # updated 'sr_yu' -> 'sr_CS.ISO8859-5' to 'sr_RS.UTF-8@latin'
  698. # updated 'sr_yu.utf8@cyrillic' -> 'sr_CS.UTF-8' to 'sr_RS.UTF-8'
  699. # updated 'sr_yu@cyrillic' -> 'sr_CS.ISO8859-5' to 'sr_RS.UTF-8'
  700. #
  701. # SS 2013-12-20:
  702. # Updated alias mapping to most recent locale.alias file
  703. # from X.org distribution using makelocalealias.py.
  704. #
  705. # These are the differences compared to the old mapping (Python 3.3.3
  706. # and older):
  707. #
  708. # updated 'a3' -> 'a3_AZ.KOI8-C' to 'az_AZ.KOI8-C'
  709. # updated 'a3_az' -> 'a3_AZ.KOI8-C' to 'az_AZ.KOI8-C'
  710. # updated 'a3_az.koi8c' -> 'a3_AZ.KOI8-C' to 'az_AZ.KOI8-C'
  711. # updated 'cs_cs.iso88592' -> 'cs_CS.ISO8859-2' to 'cs_CZ.ISO8859-2'
  712. # updated 'hebrew' -> 'iw_IL.ISO8859-8' to 'he_IL.ISO8859-8'
  713. # updated 'hebrew.iso88598' -> 'iw_IL.ISO8859-8' to 'he_IL.ISO8859-8'
  714. # updated 'sd' -> 'sd_IN@devanagari.UTF-8' to 'sd_IN.UTF-8'
  715. # updated 'sr@latn' -> 'sr_RS.UTF-8@latin' to 'sr_CS.UTF-8@latin'
  716. # updated 'sr_cs' -> 'sr_RS.UTF-8' to 'sr_CS.UTF-8'
  717. # updated 'sr_cs.utf8@latn' -> 'sr_RS.UTF-8@latin' to 'sr_CS.UTF-8@latin'
  718. # updated 'sr_cs@latn' -> 'sr_RS.UTF-8@latin' to 'sr_CS.UTF-8@latin'
  719. #
  720. # SS 2014-10-01:
  721. # Updated alias mapping with glibc 2.19 supported locales.
  722. locale_alias = {
  723. 'a3': 'az_AZ.KOI8-C',
  724. 'a3_az': 'az_AZ.KOI8-C',
  725. 'a3_az.koic': 'az_AZ.KOI8-C',
  726. 'aa_dj': 'aa_DJ.ISO8859-1',
  727. 'aa_er': 'aa_ER.UTF-8',
  728. 'aa_et': 'aa_ET.UTF-8',
  729. 'af': 'af_ZA.ISO8859-1',
  730. 'af_za': 'af_ZA.ISO8859-1',
  731. 'am': 'am_ET.UTF-8',
  732. 'am_et': 'am_ET.UTF-8',
  733. 'american': 'en_US.ISO8859-1',
  734. 'an_es': 'an_ES.ISO8859-15',
  735. 'ar': 'ar_AA.ISO8859-6',
  736. 'ar_aa': 'ar_AA.ISO8859-6',
  737. 'ar_ae': 'ar_AE.ISO8859-6',
  738. 'ar_bh': 'ar_BH.ISO8859-6',
  739. 'ar_dz': 'ar_DZ.ISO8859-6',
  740. 'ar_eg': 'ar_EG.ISO8859-6',
  741. 'ar_in': 'ar_IN.UTF-8',
  742. 'ar_iq': 'ar_IQ.ISO8859-6',
  743. 'ar_jo': 'ar_JO.ISO8859-6',
  744. 'ar_kw': 'ar_KW.ISO8859-6',
  745. 'ar_lb': 'ar_LB.ISO8859-6',
  746. 'ar_ly': 'ar_LY.ISO8859-6',
  747. 'ar_ma': 'ar_MA.ISO8859-6',
  748. 'ar_om': 'ar_OM.ISO8859-6',
  749. 'ar_qa': 'ar_QA.ISO8859-6',
  750. 'ar_sa': 'ar_SA.ISO8859-6',
  751. 'ar_sd': 'ar_SD.ISO8859-6',
  752. 'ar_sy': 'ar_SY.ISO8859-6',
  753. 'ar_tn': 'ar_TN.ISO8859-6',
  754. 'ar_ye': 'ar_YE.ISO8859-6',
  755. 'arabic': 'ar_AA.ISO8859-6',
  756. 'as': 'as_IN.UTF-8',
  757. 'as_in': 'as_IN.UTF-8',
  758. 'ast_es': 'ast_ES.ISO8859-15',
  759. 'ayc_pe': 'ayc_PE.UTF-8',
  760. 'az': 'az_AZ.ISO8859-9E',
  761. 'az_az': 'az_AZ.ISO8859-9E',
  762. 'az_az.iso88599e': 'az_AZ.ISO8859-9E',
  763. 'be': 'be_BY.CP1251',
  764. 'be@latin': 'be_BY.UTF-8@latin',
  765. 'be_bg.utf8': 'bg_BG.UTF-8',
  766. 'be_by': 'be_BY.CP1251',
  767. 'be_by@latin': 'be_BY.UTF-8@latin',
  768. 'bem_zm': 'bem_ZM.UTF-8',
  769. 'ber_dz': 'ber_DZ.UTF-8',
  770. 'ber_ma': 'ber_MA.UTF-8',
  771. 'bg': 'bg_BG.CP1251',
  772. 'bg_bg': 'bg_BG.CP1251',
  773. 'bho_in': 'bho_IN.UTF-8',
  774. 'bn_bd': 'bn_BD.UTF-8',
  775. 'bn_in': 'bn_IN.UTF-8',
  776. 'bo_cn': 'bo_CN.UTF-8',
  777. 'bo_in': 'bo_IN.UTF-8',
  778. 'bokmal': 'nb_NO.ISO8859-1',
  779. 'bokm\xe5l': 'nb_NO.ISO8859-1',
  780. 'br': 'br_FR.ISO8859-1',
  781. 'br_fr': 'br_FR.ISO8859-1',
  782. 'brx_in': 'brx_IN.UTF-8',
  783. 'bs': 'bs_BA.ISO8859-2',
  784. 'bs_ba': 'bs_BA.ISO8859-2',
  785. 'bulgarian': 'bg_BG.CP1251',
  786. 'byn_er': 'byn_ER.UTF-8',
  787. 'c': 'C',
  788. 'c-french': 'fr_CA.ISO8859-1',
  789. 'c.ascii': 'C',
  790. 'c.en': 'C',
  791. 'c.iso88591': 'en_US.ISO8859-1',
  792. 'c.utf8': 'en_US.UTF-8',
  793. 'c_c': 'C',
  794. 'c_c.c': 'C',
  795. 'ca': 'ca_ES.ISO8859-1',
  796. 'ca_ad': 'ca_AD.ISO8859-1',
  797. 'ca_es': 'ca_ES.ISO8859-1',
  798. 'ca_es@valencia': 'ca_ES.ISO8859-15@valencia',
  799. 'ca_fr': 'ca_FR.ISO8859-1',
  800. 'ca_it': 'ca_IT.ISO8859-1',
  801. 'catalan': 'ca_ES.ISO8859-1',
  802. 'cextend': 'en_US.ISO8859-1',
  803. 'chinese-s': 'zh_CN.eucCN',
  804. 'chinese-t': 'zh_TW.eucTW',
  805. 'crh_ua': 'crh_UA.UTF-8',
  806. 'croatian': 'hr_HR.ISO8859-2',
  807. 'cs': 'cs_CZ.ISO8859-2',
  808. 'cs_cs': 'cs_CZ.ISO8859-2',
  809. 'cs_cz': 'cs_CZ.ISO8859-2',
  810. 'csb_pl': 'csb_PL.UTF-8',
  811. 'cv_ru': 'cv_RU.UTF-8',
  812. 'cy': 'cy_GB.ISO8859-1',
  813. 'cy_gb': 'cy_GB.ISO8859-1',
  814. 'cz': 'cs_CZ.ISO8859-2',
  815. 'cz_cz': 'cs_CZ.ISO8859-2',
  816. 'czech': 'cs_CZ.ISO8859-2',
  817. 'da': 'da_DK.ISO8859-1',
  818. 'da_dk': 'da_DK.ISO8859-1',
  819. 'danish': 'da_DK.ISO8859-1',
  820. 'dansk': 'da_DK.ISO8859-1',
  821. 'de': 'de_DE.ISO8859-1',
  822. 'de_at': 'de_AT.ISO8859-1',
  823. 'de_be': 'de_BE.ISO8859-1',
  824. 'de_ch': 'de_CH.ISO8859-1',
  825. 'de_de': 'de_DE.ISO8859-1',
  826. 'de_li.utf8': 'de_LI.UTF-8',
  827. 'de_lu': 'de_LU.ISO8859-1',
  828. 'deutsch': 'de_DE.ISO8859-1',
  829. 'doi_in': 'doi_IN.UTF-8',
  830. 'dutch': 'nl_NL.ISO8859-1',
  831. 'dutch.iso88591': 'nl_BE.ISO8859-1',
  832. 'dv_mv': 'dv_MV.UTF-8',
  833. 'dz_bt': 'dz_BT.UTF-8',
  834. 'ee': 'ee_EE.ISO8859-4',
  835. 'ee_ee': 'ee_EE.ISO8859-4',
  836. 'eesti': 'et_EE.ISO8859-1',
  837. 'el': 'el_GR.ISO8859-7',
  838. 'el_cy': 'el_CY.ISO8859-7',
  839. 'el_gr': 'el_GR.ISO8859-7',
  840. 'el_gr@euro': 'el_GR.ISO8859-15',
  841. 'en': 'en_US.ISO8859-1',
  842. 'en_ag': 'en_AG.UTF-8',
  843. 'en_au': 'en_AU.ISO8859-1',
  844. 'en_be': 'en_BE.ISO8859-1',
  845. 'en_bw': 'en_BW.ISO8859-1',
  846. 'en_ca': 'en_CA.ISO8859-1',
  847. 'en_dk': 'en_DK.ISO8859-1',
  848. 'en_dl.utf8': 'en_DL.UTF-8',
  849. 'en_gb': 'en_GB.ISO8859-1',
  850. 'en_hk': 'en_HK.ISO8859-1',
  851. 'en_ie': 'en_IE.ISO8859-1',
  852. 'en_in': 'en_IN.ISO8859-1',
  853. 'en_ng': 'en_NG.UTF-8',
  854. 'en_nz': 'en_NZ.ISO8859-1',
  855. 'en_ph': 'en_PH.ISO8859-1',
  856. 'en_sg': 'en_SG.ISO8859-1',
  857. 'en_uk': 'en_GB.ISO8859-1',
  858. 'en_us': 'en_US.ISO8859-1',
  859. 'en_us@euro@euro': 'en_US.ISO8859-15',
  860. 'en_za': 'en_ZA.ISO8859-1',
  861. 'en_zm': 'en_ZM.UTF-8',
  862. 'en_zw': 'en_ZW.ISO8859-1',
  863. 'en_zw.utf8': 'en_ZS.UTF-8',
  864. 'eng_gb': 'en_GB.ISO8859-1',
  865. 'english': 'en_EN.ISO8859-1',
  866. 'english_uk': 'en_GB.ISO8859-1',
  867. 'english_united-states': 'en_US.ISO8859-1',
  868. 'english_united-states.437': 'C',
  869. 'english_us': 'en_US.ISO8859-1',
  870. 'eo': 'eo_XX.ISO8859-3',
  871. 'eo.utf8': 'eo.UTF-8',
  872. 'eo_eo': 'eo_EO.ISO8859-3',
  873. 'eo_us.utf8': 'eo_US.UTF-8',
  874. 'eo_xx': 'eo_XX.ISO8859-3',
  875. 'es': 'es_ES.ISO8859-1',
  876. 'es_ar': 'es_AR.ISO8859-1',
  877. 'es_bo': 'es_BO.ISO8859-1',
  878. 'es_cl': 'es_CL.ISO8859-1',
  879. 'es_co': 'es_CO.ISO8859-1',
  880. 'es_cr': 'es_CR.ISO8859-1',
  881. 'es_cu': 'es_CU.UTF-8',
  882. 'es_do': 'es_DO.ISO8859-1',
  883. 'es_ec': 'es_EC.ISO8859-1',
  884. 'es_es': 'es_ES.ISO8859-1',
  885. 'es_gt': 'es_GT.ISO8859-1',
  886. 'es_hn': 'es_HN.ISO8859-1',
  887. 'es_mx': 'es_MX.ISO8859-1',
  888. 'es_ni': 'es_NI.ISO8859-1',
  889. 'es_pa': 'es_PA.ISO8859-1',
  890. 'es_pe': 'es_PE.ISO8859-1',
  891. 'es_pr': 'es_PR.ISO8859-1',
  892. 'es_py': 'es_PY.ISO8859-1',
  893. 'es_sv': 'es_SV.ISO8859-1',
  894. 'es_us': 'es_US.ISO8859-1',
  895. 'es_uy': 'es_UY.ISO8859-1',
  896. 'es_ve': 'es_VE.ISO8859-1',
  897. 'estonian': 'et_EE.ISO8859-1',
  898. 'et': 'et_EE.ISO8859-15',
  899. 'et_ee': 'et_EE.ISO8859-15',
  900. 'eu': 'eu_ES.ISO8859-1',
  901. 'eu_es': 'eu_ES.ISO8859-1',
  902. 'eu_fr': 'eu_FR.ISO8859-1',
  903. 'fa': 'fa_IR.UTF-8',
  904. 'fa_ir': 'fa_IR.UTF-8',
  905. 'fa_ir.isiri3342': 'fa_IR.ISIRI-3342',
  906. 'ff_sn': 'ff_SN.UTF-8',
  907. 'fi': 'fi_FI.ISO8859-15',
  908. 'fi_fi': 'fi_FI.ISO8859-15',
  909. 'fil_ph': 'fil_PH.UTF-8',
  910. 'finnish': 'fi_FI.ISO8859-1',
  911. 'fo': 'fo_FO.ISO8859-1',
  912. 'fo_fo': 'fo_FO.ISO8859-1',
  913. 'fr': 'fr_FR.ISO8859-1',
  914. 'fr_be': 'fr_BE.ISO8859-1',
  915. 'fr_ca': 'fr_CA.ISO8859-1',
  916. 'fr_ch': 'fr_CH.ISO8859-1',
  917. 'fr_fr': 'fr_FR.ISO8859-1',
  918. 'fr_lu': 'fr_LU.ISO8859-1',
  919. 'fran\xe7ais': 'fr_FR.ISO8859-1',
  920. 'fre_fr': 'fr_FR.ISO8859-1',
  921. 'french': 'fr_FR.ISO8859-1',
  922. 'french.iso88591': 'fr_CH.ISO8859-1',
  923. 'french_france': 'fr_FR.ISO8859-1',
  924. 'fur_it': 'fur_IT.UTF-8',
  925. 'fy_de': 'fy_DE.UTF-8',
  926. 'fy_nl': 'fy_NL.UTF-8',
  927. 'ga': 'ga_IE.ISO8859-1',
  928. 'ga_ie': 'ga_IE.ISO8859-1',
  929. 'galego': 'gl_ES.ISO8859-1',
  930. 'galician': 'gl_ES.ISO8859-1',
  931. 'gd': 'gd_GB.ISO8859-1',
  932. 'gd_gb': 'gd_GB.ISO8859-1',
  933. 'ger_de': 'de_DE.ISO8859-1',
  934. 'german': 'de_DE.ISO8859-1',
  935. 'german.iso88591': 'de_CH.ISO8859-1',
  936. 'german_germany': 'de_DE.ISO8859-1',
  937. 'gez_er': 'gez_ER.UTF-8',
  938. 'gez_et': 'gez_ET.UTF-8',
  939. 'gl': 'gl_ES.ISO8859-1',
  940. 'gl_es': 'gl_ES.ISO8859-1',
  941. 'greek': 'el_GR.ISO8859-7',
  942. 'gu_in': 'gu_IN.UTF-8',
  943. 'gv': 'gv_GB.ISO8859-1',
  944. 'gv_gb': 'gv_GB.ISO8859-1',
  945. 'ha_ng': 'ha_NG.UTF-8',
  946. 'he': 'he_IL.ISO8859-8',
  947. 'he_il': 'he_IL.ISO8859-8',
  948. 'hebrew': 'he_IL.ISO8859-8',
  949. 'hi': 'hi_IN.ISCII-DEV',
  950. 'hi_in': 'hi_IN.ISCII-DEV',
  951. 'hi_in.isciidev': 'hi_IN.ISCII-DEV',
  952. 'hne': 'hne_IN.UTF-8',
  953. 'hne_in': 'hne_IN.UTF-8',
  954. 'hr': 'hr_HR.ISO8859-2',
  955. 'hr_hr': 'hr_HR.ISO8859-2',
  956. 'hrvatski': 'hr_HR.ISO8859-2',
  957. 'hsb_de': 'hsb_DE.ISO8859-2',
  958. 'ht_ht': 'ht_HT.UTF-8',
  959. 'hu': 'hu_HU.ISO8859-2',
  960. 'hu_hu': 'hu_HU.ISO8859-2',
  961. 'hungarian': 'hu_HU.ISO8859-2',
  962. 'hy_am': 'hy_AM.UTF-8',
  963. 'hy_am.armscii8': 'hy_AM.ARMSCII_8',
  964. 'ia': 'ia.UTF-8',
  965. 'ia_fr': 'ia_FR.UTF-8',
  966. 'icelandic': 'is_IS.ISO8859-1',
  967. 'id': 'id_ID.ISO8859-1',
  968. 'id_id': 'id_ID.ISO8859-1',
  969. 'ig_ng': 'ig_NG.UTF-8',
  970. 'ik_ca': 'ik_CA.UTF-8',
  971. 'in': 'id_ID.ISO8859-1',
  972. 'in_id': 'id_ID.ISO8859-1',
  973. 'is': 'is_IS.ISO8859-1',
  974. 'is_is': 'is_IS.ISO8859-1',
  975. 'iso-8859-1': 'en_US.ISO8859-1',
  976. 'iso-8859-15': 'en_US.ISO8859-15',
  977. 'iso8859-1': 'en_US.ISO8859-1',
  978. 'iso8859-15': 'en_US.ISO8859-15',
  979. 'iso_8859_1': 'en_US.ISO8859-1',
  980. 'iso_8859_15': 'en_US.ISO8859-15',
  981. 'it': 'it_IT.ISO8859-1',
  982. 'it_ch': 'it_CH.ISO8859-1',
  983. 'it_it': 'it_IT.ISO8859-1',
  984. 'italian': 'it_IT.ISO8859-1',
  985. 'iu': 'iu_CA.NUNACOM-8',
  986. 'iu_ca': 'iu_CA.NUNACOM-8',
  987. 'iu_ca.nunacom8': 'iu_CA.NUNACOM-8',
  988. 'iw': 'he_IL.ISO8859-8',
  989. 'iw_il': 'he_IL.ISO8859-8',
  990. 'iw_il.utf8': 'iw_IL.UTF-8',
  991. 'ja': 'ja_JP.eucJP',
  992. 'ja_jp': 'ja_JP.eucJP',
  993. 'ja_jp.euc': 'ja_JP.eucJP',
  994. 'ja_jp.mscode': 'ja_JP.SJIS',
  995. 'ja_jp.pck': 'ja_JP.SJIS',
  996. 'japan': 'ja_JP.eucJP',
  997. 'japanese': 'ja_JP.eucJP',
  998. 'japanese-euc': 'ja_JP.eucJP',
  999. 'japanese.euc': 'ja_JP.eucJP',
  1000. 'jp_jp': 'ja_JP.eucJP',
  1001. 'ka': 'ka_GE.GEORGIAN-ACADEMY',
  1002. 'ka_ge': 'ka_GE.GEORGIAN-ACADEMY',
  1003. 'ka_ge.georgianacademy': 'ka_GE.GEORGIAN-ACADEMY',
  1004. 'ka_ge.georgianps': 'ka_GE.GEORGIAN-PS',
  1005. 'ka_ge.georgianrs': 'ka_GE.GEORGIAN-ACADEMY',
  1006. 'kk_kz': 'kk_KZ.RK1048',
  1007. 'kl': 'kl_GL.ISO8859-1',
  1008. 'kl_gl': 'kl_GL.ISO8859-1',
  1009. 'km_kh': 'km_KH.UTF-8',
  1010. 'kn': 'kn_IN.UTF-8',
  1011. 'kn_in': 'kn_IN.UTF-8',
  1012. 'ko': 'ko_KR.eucKR',
  1013. 'ko_kr': 'ko_KR.eucKR',
  1014. 'ko_kr.euc': 'ko_KR.eucKR',
  1015. 'kok_in': 'kok_IN.UTF-8',
  1016. 'korean': 'ko_KR.eucKR',
  1017. 'korean.euc': 'ko_KR.eucKR',
  1018. 'ks': 'ks_IN.UTF-8',
  1019. 'ks_in': 'ks_IN.UTF-8',
  1020. 'ks_in@devanagari.utf8': 'ks_IN.UTF-8@devanagari',
  1021. 'ku_tr': 'ku_TR.ISO8859-9',
  1022. 'kw': 'kw_GB.ISO8859-1',
  1023. 'kw_gb': 'kw_GB.ISO8859-1',
  1024. 'ky': 'ky_KG.UTF-8',
  1025. 'ky_kg': 'ky_KG.UTF-8',
  1026. 'lb_lu': 'lb_LU.UTF-8',
  1027. 'lg_ug': 'lg_UG.ISO8859-10',
  1028. 'li_be': 'li_BE.UTF-8',
  1029. 'li_nl': 'li_NL.UTF-8',
  1030. 'lij_it': 'lij_IT.UTF-8',
  1031. 'lithuanian': 'lt_LT.ISO8859-13',
  1032. 'lo': 'lo_LA.MULELAO-1',
  1033. 'lo_la': 'lo_LA.MULELAO-1',
  1034. 'lo_la.cp1133': 'lo_LA.IBM-CP1133',
  1035. 'lo_la.ibmcp1133': 'lo_LA.IBM-CP1133',
  1036. 'lo_la.mulelao1': 'lo_LA.MULELAO-1',
  1037. 'lt': 'lt_LT.ISO8859-13',
  1038. 'lt_lt': 'lt_LT.ISO8859-13',
  1039. 'lv': 'lv_LV.ISO8859-13',
  1040. 'lv_lv': 'lv_LV.ISO8859-13',
  1041. 'mag_in': 'mag_IN.UTF-8',
  1042. 'mai': 'mai_IN.UTF-8',
  1043. 'mai_in': 'mai_IN.UTF-8',
  1044. 'mg_mg': 'mg_MG.ISO8859-15',
  1045. 'mhr_ru': 'mhr_RU.UTF-8',
  1046. 'mi': 'mi_NZ.ISO8859-1',
  1047. 'mi_nz': 'mi_NZ.ISO8859-1',
  1048. 'mk': 'mk_MK.ISO8859-5',
  1049. 'mk_mk': 'mk_MK.ISO8859-5',
  1050. 'ml': 'ml_IN.UTF-8',
  1051. 'ml_in': 'ml_IN.UTF-8',
  1052. 'mn_mn': 'mn_MN.UTF-8',
  1053. 'mni_in': 'mni_IN.UTF-8',
  1054. 'mr': 'mr_IN.UTF-8',
  1055. 'mr_in': 'mr_IN.UTF-8',
  1056. 'ms': 'ms_MY.ISO8859-1',
  1057. 'ms_my': 'ms_MY.ISO8859-1',
  1058. 'mt': 'mt_MT.ISO8859-3',
  1059. 'mt_mt': 'mt_MT.ISO8859-3',
  1060. 'my_mm': 'my_MM.UTF-8',
  1061. 'nan_tw@latin': 'nan_TW.UTF-8@latin',
  1062. 'nb': 'nb_NO.ISO8859-1',
  1063. 'nb_no': 'nb_NO.ISO8859-1',
  1064. 'nds_de': 'nds_DE.UTF-8',
  1065. 'nds_nl': 'nds_NL.UTF-8',
  1066. 'ne_np': 'ne_NP.UTF-8',
  1067. 'nhn_mx': 'nhn_MX.UTF-8',
  1068. 'niu_nu': 'niu_NU.UTF-8',
  1069. 'niu_nz': 'niu_NZ.UTF-8',
  1070. 'nl': 'nl_NL.ISO8859-1',
  1071. 'nl_aw': 'nl_AW.UTF-8',
  1072. 'nl_be': 'nl_BE.ISO8859-1',
  1073. 'nl_nl': 'nl_NL.ISO8859-1',
  1074. 'nn': 'nn_NO.ISO8859-1',
  1075. 'nn_no': 'nn_NO.ISO8859-1',
  1076. 'no': 'no_NO.ISO8859-1',
  1077. 'no@nynorsk': 'ny_NO.ISO8859-1',
  1078. 'no_no': 'no_NO.ISO8859-1',
  1079. 'no_no.iso88591@bokmal': 'no_NO.ISO8859-1',
  1080. 'no_no.iso88591@nynorsk': 'no_NO.ISO8859-1',
  1081. 'norwegian': 'no_NO.ISO8859-1',
  1082. 'nr': 'nr_ZA.ISO8859-1',
  1083. 'nr_za': 'nr_ZA.ISO8859-1',
  1084. 'nso': 'nso_ZA.ISO8859-15',
  1085. 'nso_za': 'nso_ZA.ISO8859-15',
  1086. 'ny': 'ny_NO.ISO8859-1',
  1087. 'ny_no': 'ny_NO.ISO8859-1',
  1088. 'nynorsk': 'nn_NO.ISO8859-1',
  1089. 'oc': 'oc_FR.ISO8859-1',
  1090. 'oc_fr': 'oc_FR.ISO8859-1',
  1091. 'om_et': 'om_ET.UTF-8',
  1092. 'om_ke': 'om_KE.ISO8859-1',
  1093. 'or': 'or_IN.UTF-8',
  1094. 'or_in': 'or_IN.UTF-8',
  1095. 'os_ru': 'os_RU.UTF-8',
  1096. 'pa': 'pa_IN.UTF-8',
  1097. 'pa_in': 'pa_IN.UTF-8',
  1098. 'pa_pk': 'pa_PK.UTF-8',
  1099. 'pap_an': 'pap_AN.UTF-8',
  1100. 'pd': 'pd_US.ISO8859-1',
  1101. 'pd_de': 'pd_DE.ISO8859-1',
  1102. 'pd_us': 'pd_US.ISO8859-1',
  1103. 'ph': 'ph_PH.ISO8859-1',
  1104. 'ph_ph': 'ph_PH.ISO8859-1',
  1105. 'pl': 'pl_PL.ISO8859-2',
  1106. 'pl_pl': 'pl_PL.ISO8859-2',
  1107. 'polish': 'pl_PL.ISO8859-2',
  1108. 'portuguese': 'pt_PT.ISO8859-1',
  1109. 'portuguese_brazil': 'pt_BR.ISO8859-1',
  1110. 'posix': 'C',
  1111. 'posix-utf2': 'C',
  1112. 'pp': 'pp_AN.ISO8859-1',
  1113. 'pp_an': 'pp_AN.ISO8859-1',
  1114. 'ps_af': 'ps_AF.UTF-8',
  1115. 'pt': 'pt_PT.ISO8859-1',
  1116. 'pt_br': 'pt_BR.ISO8859-1',
  1117. 'pt_pt': 'pt_PT.ISO8859-1',
  1118. 'ro': 'ro_RO.ISO8859-2',
  1119. 'ro_ro': 'ro_RO.ISO8859-2',
  1120. 'romanian': 'ro_RO.ISO8859-2',
  1121. 'ru': 'ru_RU.UTF-8',
  1122. 'ru_ru': 'ru_RU.UTF-8',
  1123. 'ru_ua': 'ru_UA.KOI8-U',
  1124. 'rumanian': 'ro_RO.ISO8859-2',
  1125. 'russian': 'ru_RU.ISO8859-5',
  1126. 'rw': 'rw_RW.ISO8859-1',
  1127. 'rw_rw': 'rw_RW.ISO8859-1',
  1128. 'sa_in': 'sa_IN.UTF-8',
  1129. 'sat_in': 'sat_IN.UTF-8',
  1130. 'sc_it': 'sc_IT.UTF-8',
  1131. 'sd': 'sd_IN.UTF-8',
  1132. 'sd_in': 'sd_IN.UTF-8',
  1133. 'sd_in@devanagari.utf8': 'sd_IN.UTF-8@devanagari',
  1134. 'sd_pk': 'sd_PK.UTF-8',
  1135. 'se_no': 'se_NO.UTF-8',
  1136. 'serbocroatian': 'sr_RS.UTF-8@latin',
  1137. 'sh': 'sr_RS.UTF-8@latin',
  1138. 'sh_ba.iso88592@bosnia': 'sr_CS.ISO8859-2',
  1139. 'sh_hr': 'sh_HR.ISO8859-2',
  1140. 'sh_hr.iso88592': 'hr_HR.ISO8859-2',
  1141. 'sh_sp': 'sr_CS.ISO8859-2',
  1142. 'sh_yu': 'sr_RS.UTF-8@latin',
  1143. 'shs_ca': 'shs_CA.UTF-8',
  1144. 'si': 'si_LK.UTF-8',
  1145. 'si_lk': 'si_LK.UTF-8',
  1146. 'sid_et': 'sid_ET.UTF-8',
  1147. 'sinhala': 'si_LK.UTF-8',
  1148. 'sk': 'sk_SK.ISO8859-2',
  1149. 'sk_sk': 'sk_SK.ISO8859-2',
  1150. 'sl': 'sl_SI.ISO8859-2',
  1151. 'sl_cs': 'sl_CS.ISO8859-2',
  1152. 'sl_si': 'sl_SI.ISO8859-2',
  1153. 'slovak': 'sk_SK.ISO8859-2',
  1154. 'slovene': 'sl_SI.ISO8859-2',
  1155. 'slovenian': 'sl_SI.ISO8859-2',
  1156. 'so_dj': 'so_DJ.ISO8859-1',
  1157. 'so_et': 'so_ET.UTF-8',
  1158. 'so_ke': 'so_KE.ISO8859-1',
  1159. 'so_so': 'so_SO.ISO8859-1',
  1160. 'sp': 'sr_CS.ISO8859-5',
  1161. 'sp_yu': 'sr_CS.ISO8859-5',
  1162. 'spanish': 'es_ES.ISO8859-1',
  1163. 'spanish_spain': 'es_ES.ISO8859-1',
  1164. 'sq': 'sq_AL.ISO8859-2',
  1165. 'sq_al': 'sq_AL.ISO8859-2',
  1166. 'sq_mk': 'sq_MK.UTF-8',
  1167. 'sr': 'sr_RS.UTF-8',
  1168. 'sr@cyrillic': 'sr_RS.UTF-8',
  1169. 'sr@latn': 'sr_CS.UTF-8@latin',
  1170. 'sr_cs': 'sr_CS.UTF-8',
  1171. 'sr_cs.iso88592@latn': 'sr_CS.ISO8859-2',
  1172. 'sr_cs@latn': 'sr_CS.UTF-8@latin',
  1173. 'sr_me': 'sr_ME.UTF-8',
  1174. 'sr_rs': 'sr_RS.UTF-8',
  1175. 'sr_rs@latn': 'sr_RS.UTF-8@latin',
  1176. 'sr_sp': 'sr_CS.ISO8859-2',
  1177. 'sr_yu': 'sr_RS.UTF-8@latin',
  1178. 'sr_yu.cp1251@cyrillic': 'sr_CS.CP1251',
  1179. 'sr_yu.iso88592': 'sr_CS.ISO8859-2',
  1180. 'sr_yu.iso88595': 'sr_CS.ISO8859-5',
  1181. 'sr_yu.iso88595@cyrillic': 'sr_CS.ISO8859-5',
  1182. 'sr_yu.microsoftcp1251@cyrillic': 'sr_CS.CP1251',
  1183. 'sr_yu.utf8': 'sr_RS.UTF-8',
  1184. 'sr_yu.utf8@cyrillic': 'sr_RS.UTF-8',
  1185. 'sr_yu@cyrillic': 'sr_RS.UTF-8',
  1186. 'ss': 'ss_ZA.ISO8859-1',
  1187. 'ss_za': 'ss_ZA.ISO8859-1',
  1188. 'st': 'st_ZA.ISO8859-1',
  1189. 'st_za': 'st_ZA.ISO8859-1',
  1190. 'sv': 'sv_SE.ISO8859-1',
  1191. 'sv_fi': 'sv_FI.ISO8859-1',
  1192. 'sv_se': 'sv_SE.ISO8859-1',
  1193. 'sw_ke': 'sw_KE.UTF-8',
  1194. 'sw_tz': 'sw_TZ.UTF-8',
  1195. 'swedish': 'sv_SE.ISO8859-1',
  1196. 'szl_pl': 'szl_PL.UTF-8',
  1197. 'ta': 'ta_IN.TSCII-0',
  1198. 'ta_in': 'ta_IN.TSCII-0',
  1199. 'ta_in.tscii': 'ta_IN.TSCII-0',
  1200. 'ta_in.tscii0': 'ta_IN.TSCII-0',
  1201. 'ta_lk': 'ta_LK.UTF-8',
  1202. 'te': 'te_IN.UTF-8',
  1203. 'te_in': 'te_IN.UTF-8',
  1204. 'tg': 'tg_TJ.KOI8-C',
  1205. 'tg_tj': 'tg_TJ.KOI8-C',
  1206. 'th': 'th_TH.ISO8859-11',
  1207. 'th_th': 'th_TH.ISO8859-11',
  1208. 'th_th.tactis': 'th_TH.TIS620',
  1209. 'th_th.tis620': 'th_TH.TIS620',
  1210. 'thai': 'th_TH.ISO8859-11',
  1211. 'ti_er': 'ti_ER.UTF-8',
  1212. 'ti_et': 'ti_ET.UTF-8',
  1213. 'tig_er': 'tig_ER.UTF-8',
  1214. 'tk_tm': 'tk_TM.UTF-8',
  1215. 'tl': 'tl_PH.ISO8859-1',
  1216. 'tl_ph': 'tl_PH.ISO8859-1',
  1217. 'tn': 'tn_ZA.ISO8859-15',
  1218. 'tn_za': 'tn_ZA.ISO8859-15',
  1219. 'tr': 'tr_TR.ISO8859-9',
  1220. 'tr_cy': 'tr_CY.ISO8859-9',
  1221. 'tr_tr': 'tr_TR.ISO8859-9',
  1222. 'ts': 'ts_ZA.ISO8859-1',
  1223. 'ts_za': 'ts_ZA.ISO8859-1',
  1224. 'tt': 'tt_RU.TATAR-CYR',
  1225. 'tt_ru': 'tt_RU.TATAR-CYR',
  1226. 'tt_ru.tatarcyr': 'tt_RU.TATAR-CYR',
  1227. 'tt_ru@iqtelif': 'tt_RU.UTF-8@iqtelif',
  1228. 'turkish': 'tr_TR.ISO8859-9',
  1229. 'ug_cn': 'ug_CN.UTF-8',
  1230. 'uk': 'uk_UA.KOI8-U',
  1231. 'uk_ua': 'uk_UA.KOI8-U',
  1232. 'univ': 'en_US.utf',
  1233. 'universal': 'en_US.utf',
  1234. 'universal.utf8@ucs4': 'en_US.UTF-8',
  1235. 'unm_us': 'unm_US.UTF-8',
  1236. 'ur': 'ur_PK.CP1256',
  1237. 'ur_in': 'ur_IN.UTF-8',
  1238. 'ur_pk': 'ur_PK.CP1256',
  1239. 'uz': 'uz_UZ.UTF-8',
  1240. 'uz_uz': 'uz_UZ.UTF-8',
  1241. 'uz_uz@cyrillic': 'uz_UZ.UTF-8',
  1242. 've': 've_ZA.UTF-8',
  1243. 've_za': 've_ZA.UTF-8',
  1244. 'vi': 'vi_VN.TCVN',
  1245. 'vi_vn': 'vi_VN.TCVN',
  1246. 'vi_vn.tcvn': 'vi_VN.TCVN',
  1247. 'vi_vn.tcvn5712': 'vi_VN.TCVN',
  1248. 'vi_vn.viscii': 'vi_VN.VISCII',
  1249. 'vi_vn.viscii111': 'vi_VN.VISCII',
  1250. 'wa': 'wa_BE.ISO8859-1',
  1251. 'wa_be': 'wa_BE.ISO8859-1',
  1252. 'wae_ch': 'wae_CH.UTF-8',
  1253. 'wal_et': 'wal_ET.UTF-8',
  1254. 'wo_sn': 'wo_SN.UTF-8',
  1255. 'xh': 'xh_ZA.ISO8859-1',
  1256. 'xh_za': 'xh_ZA.ISO8859-1',
  1257. 'yi': 'yi_US.CP1255',
  1258. 'yi_us': 'yi_US.CP1255',
  1259. 'yo_ng': 'yo_NG.UTF-8',
  1260. 'yue_hk': 'yue_HK.UTF-8',
  1261. 'zh': 'zh_CN.eucCN',
  1262. 'zh_cn': 'zh_CN.gb2312',
  1263. 'zh_cn.big5': 'zh_TW.big5',
  1264. 'zh_cn.euc': 'zh_CN.eucCN',
  1265. 'zh_hk': 'zh_HK.big5hkscs',
  1266. 'zh_hk.big5hk': 'zh_HK.big5hkscs',
  1267. 'zh_sg': 'zh_SG.GB2312',
  1268. 'zh_sg.gbk': 'zh_SG.GBK',
  1269. 'zh_tw': 'zh_TW.big5',
  1270. 'zh_tw.euc': 'zh_TW.eucTW',
  1271. 'zh_tw.euctw': 'zh_TW.eucTW',
  1272. 'zu': 'zu_ZA.ISO8859-1',
  1273. 'zu_za': 'zu_ZA.ISO8859-1',
  1274. }
  1275. #
  1276. # This maps Windows language identifiers to locale strings.
  1277. #
  1278. # This list has been updated from
  1279. # http://msdn.microsoft.com/library/default.asp?url=/library/en-us/intl/nls_238z.asp
  1280. # to include every locale up to Windows Vista.
  1281. #
  1282. # NOTE: this mapping is incomplete. If your language is missing, please
  1283. # submit a bug report to the Python bug tracker at http://bugs.python.org/
  1284. # Make sure you include the missing language identifier and the suggested
  1285. # locale code.
  1286. #
  1287. windows_locale = {
  1288. 0x0436: "af_ZA", # Afrikaans
  1289. 0x041c: "sq_AL", # Albanian
  1290. 0x0484: "gsw_FR",# Alsatian - France
  1291. 0x045e: "am_ET", # Amharic - Ethiopia
  1292. 0x0401: "ar_SA", # Arabic - Saudi Arabia
  1293. 0x0801: "ar_IQ", # Arabic - Iraq
  1294. 0x0c01: "ar_EG", # Arabic - Egypt
  1295. 0x1001: "ar_LY", # Arabic - Libya
  1296. 0x1401: "ar_DZ", # Arabic - Algeria
  1297. 0x1801: "ar_MA", # Arabic - Morocco
  1298. 0x1c01: "ar_TN", # Arabic - Tunisia
  1299. 0x2001: "ar_OM", # Arabic - Oman
  1300. 0x2401: "ar_YE", # Arabic - Yemen
  1301. 0x2801: "ar_SY", # Arabic - Syria
  1302. 0x2c01: "ar_JO", # Arabic - Jordan
  1303. 0x3001: "ar_LB", # Arabic - Lebanon
  1304. 0x3401: "ar_KW", # Arabic - Kuwait
  1305. 0x3801: "ar_AE", # Arabic - United Arab Emirates
  1306. 0x3c01: "ar_BH", # Arabic - Bahrain
  1307. 0x4001: "ar_QA", # Arabic - Qatar
  1308. 0x042b: "hy_AM", # Armenian
  1309. 0x044d: "as_IN", # Assamese - India
  1310. 0x042c: "az_AZ", # Azeri - Latin
  1311. 0x082c: "az_AZ", # Azeri - Cyrillic
  1312. 0x046d: "ba_RU", # Bashkir
  1313. 0x042d: "eu_ES", # Basque - Russia
  1314. 0x0423: "be_BY", # Belarusian
  1315. 0x0445: "bn_IN", # Begali
  1316. 0x201a: "bs_BA", # Bosnian - Cyrillic
  1317. 0x141a: "bs_BA", # Bosnian - Latin
  1318. 0x047e: "br_FR", # Breton - France
  1319. 0x0402: "bg_BG", # Bulgarian
  1320. # 0x0455: "my_MM", # Burmese - Not supported
  1321. 0x0403: "ca_ES", # Catalan
  1322. 0x0004: "zh_CHS",# Chinese - Simplified
  1323. 0x0404: "zh_TW", # Chinese - Taiwan
  1324. 0x0804: "zh_CN", # Chinese - PRC
  1325. 0x0c04: "zh_HK", # Chinese - Hong Kong S.A.R.
  1326. 0x1004: "zh_SG", # Chinese - Singapore
  1327. 0x1404: "zh_MO", # Chinese - Macao S.A.R.
  1328. 0x7c04: "zh_CHT",# Chinese - Traditional
  1329. 0x0483: "co_FR", # Corsican - France
  1330. 0x041a: "hr_HR", # Croatian
  1331. 0x101a: "hr_BA", # Croatian - Bosnia
  1332. 0x0405: "cs_CZ", # Czech
  1333. 0x0406: "da_DK", # Danish
  1334. 0x048c: "gbz_AF",# Dari - Afghanistan
  1335. 0x0465: "div_MV",# Divehi - Maldives
  1336. 0x0413: "nl_NL", # Dutch - The Netherlands
  1337. 0x0813: "nl_BE", # Dutch - Belgium
  1338. 0x0409: "en_US", # English - United States
  1339. 0x0809: "en_GB", # English - United Kingdom
  1340. 0x0c09: "en_AU", # English - Australia
  1341. 0x1009: "en_CA", # English - Canada
  1342. 0x1409: "en_NZ", # English - New Zealand
  1343. 0x1809: "en_IE", # English - Ireland
  1344. 0x1c09: "en_ZA", # English - South Africa
  1345. 0x2009: "en_JA", # English - Jamaica
  1346. 0x2409: "en_CB", # English - Caribbean
  1347. 0x2809: "en_BZ", # English - Belize
  1348. 0x2c09: "en_TT", # English - Trinidad
  1349. 0x3009: "en_ZW", # English - Zimbabwe
  1350. 0x3409: "en_PH", # English - Philippines
  1351. 0x4009: "en_IN", # English - India
  1352. 0x4409: "en_MY", # English - Malaysia
  1353. 0x4809: "en_IN", # English - Singapore
  1354. 0x0425: "et_EE", # Estonian
  1355. 0x0438: "fo_FO", # Faroese
  1356. 0x0464: "fil_PH",# Filipino
  1357. 0x040b: "fi_FI", # Finnish
  1358. 0x040c: "fr_FR", # French - France
  1359. 0x080c: "fr_BE", # French - Belgium
  1360. 0x0c0c: "fr_CA", # French - Canada
  1361. 0x100c: "fr_CH", # French - Switzerland
  1362. 0x140c: "fr_LU", # French - Luxembourg
  1363. 0x180c: "fr_MC", # French - Monaco
  1364. 0x0462: "fy_NL", # Frisian - Netherlands
  1365. 0x0456: "gl_ES", # Galician
  1366. 0x0437: "ka_GE", # Georgian
  1367. 0x0407: "de_DE", # German - Germany
  1368. 0x0807: "de_CH", # German - Switzerland
  1369. 0x0c07: "de_AT", # German - Austria
  1370. 0x1007: "de_LU", # German - Luxembourg
  1371. 0x1407: "de_LI", # German - Liechtenstein
  1372. 0x0408: "el_GR", # Greek
  1373. 0x046f: "kl_GL", # Greenlandic - Greenland
  1374. 0x0447: "gu_IN", # Gujarati
  1375. 0x0468: "ha_NG", # Hausa - Latin
  1376. 0x040d: "he_IL", # Hebrew
  1377. 0x0439: "hi_IN", # Hindi
  1378. 0x040e: "hu_HU", # Hungarian
  1379. 0x040f: "is_IS", # Icelandic
  1380. 0x0421: "id_ID", # Indonesian
  1381. 0x045d: "iu_CA", # Inuktitut - Syllabics
  1382. 0x085d: "iu_CA", # Inuktitut - Latin
  1383. 0x083c: "ga_IE", # Irish - Ireland
  1384. 0x0410: "it_IT", # Italian - Italy
  1385. 0x0810: "it_CH", # Italian - Switzerland
  1386. 0x0411: "ja_JP", # Japanese
  1387. 0x044b: "kn_IN", # Kannada - India
  1388. 0x043f: "kk_KZ", # Kazakh
  1389. 0x0453: "kh_KH", # Khmer - Cambodia
  1390. 0x0486: "qut_GT",# K'iche - Guatemala
  1391. 0x0487: "rw_RW", # Kinyarwanda - Rwanda
  1392. 0x0457: "kok_IN",# Konkani
  1393. 0x0412: "ko_KR", # Korean
  1394. 0x0440: "ky_KG", # Kyrgyz
  1395. 0x0454: "lo_LA", # Lao - Lao PDR
  1396. 0x0426: "lv_LV", # Latvian
  1397. 0x0427: "lt_LT", # Lithuanian
  1398. 0x082e: "dsb_DE",# Lower Sorbian - Germany
  1399. 0x046e: "lb_LU", # Luxembourgish
  1400. 0x042f: "mk_MK", # FYROM Macedonian
  1401. 0x043e: "ms_MY", # Malay - Malaysia
  1402. 0x083e: "ms_BN", # Malay - Brunei Darussalam
  1403. 0x044c: "ml_IN", # Malayalam - India
  1404. 0x043a: "mt_MT", # Maltese
  1405. 0x0481: "mi_NZ", # Maori
  1406. 0x047a: "arn_CL",# Mapudungun
  1407. 0x044e: "mr_IN", # Marathi
  1408. 0x047c: "moh_CA",# Mohawk - Canada
  1409. 0x0450: "mn_MN", # Mongolian - Cyrillic
  1410. 0x0850: "mn_CN", # Mongolian - PRC
  1411. 0x0461: "ne_NP", # Nepali
  1412. 0x0414: "nb_NO", # Norwegian - Bokmal
  1413. 0x0814: "nn_NO", # Norwegian - Nynorsk
  1414. 0x0482: "oc_FR", # Occitan - France
  1415. 0x0448: "or_IN", # Oriya - India
  1416. 0x0463: "ps_AF", # Pashto - Afghanistan
  1417. 0x0429: "fa_IR", # Persian
  1418. 0x0415: "pl_PL", # Polish
  1419. 0x0416: "pt_BR", # Portuguese - Brazil
  1420. 0x0816: "pt_PT", # Portuguese - Portugal
  1421. 0x0446: "pa_IN", # Punjabi
  1422. 0x046b: "quz_BO",# Quechua (Bolivia)
  1423. 0x086b: "quz_EC",# Quechua (Ecuador)
  1424. 0x0c6b: "quz_PE",# Quechua (Peru)
  1425. 0x0418: "ro_RO", # Romanian - Romania
  1426. 0x0417: "rm_CH", # Romansh
  1427. 0x0419: "ru_RU", # Russian
  1428. 0x243b: "smn_FI",# Sami Finland
  1429. 0x103b: "smj_NO",# Sami Norway
  1430. 0x143b: "smj_SE",# Sami Sweden
  1431. 0x043b: "se_NO", # Sami Northern Norway
  1432. 0x083b: "se_SE", # Sami Northern Sweden
  1433. 0x0c3b: "se_FI", # Sami Northern Finland
  1434. 0x203b: "sms_FI",# Sami Skolt
  1435. 0x183b: "sma_NO",# Sami Southern Norway
  1436. 0x1c3b: "sma_SE",# Sami Southern Sweden
  1437. 0x044f: "sa_IN", # Sanskrit
  1438. 0x0c1a: "sr_SP", # Serbian - Cyrillic
  1439. 0x1c1a: "sr_BA", # Serbian - Bosnia Cyrillic
  1440. 0x081a: "sr_SP", # Serbian - Latin
  1441. 0x181a: "sr_BA", # Serbian - Bosnia Latin
  1442. 0x045b: "si_LK", # Sinhala - Sri Lanka
  1443. 0x046c: "ns_ZA", # Northern Sotho
  1444. 0x0432: "tn_ZA", # Setswana - Southern Africa
  1445. 0x041b: "sk_SK", # Slovak
  1446. 0x0424: "sl_SI", # Slovenian
  1447. 0x040a: "es_ES", # Spanish - Spain
  1448. 0x080a: "es_MX", # Spanish - Mexico
  1449. 0x0c0a: "es_ES", # Spanish - Spain (Modern)
  1450. 0x100a: "es_GT", # Spanish - Guatemala
  1451. 0x140a: "es_CR", # Spanish - Costa Rica
  1452. 0x180a: "es_PA", # Spanish - Panama
  1453. 0x1c0a: "es_DO", # Spanish - Dominican Republic
  1454. 0x200a: "es_VE", # Spanish - Venezuela
  1455. 0x240a: "es_CO", # Spanish - Colombia
  1456. 0x280a: "es_PE", # Spanish - Peru
  1457. 0x2c0a: "es_AR", # Spanish - Argentina
  1458. 0x300a: "es_EC", # Spanish - Ecuador
  1459. 0x340a: "es_CL", # Spanish - Chile
  1460. 0x380a: "es_UR", # Spanish - Uruguay
  1461. 0x3c0a: "es_PY", # Spanish - Paraguay
  1462. 0x400a: "es_BO", # Spanish - Bolivia
  1463. 0x440a: "es_SV", # Spanish - El Salvador
  1464. 0x480a: "es_HN", # Spanish - Honduras
  1465. 0x4c0a: "es_NI", # Spanish - Nicaragua
  1466. 0x500a: "es_PR", # Spanish - Puerto Rico
  1467. 0x540a: "es_US", # Spanish - United States
  1468. # 0x0430: "", # Sutu - Not supported
  1469. 0x0441: "sw_KE", # Swahili
  1470. 0x041d: "sv_SE", # Swedish - Sweden
  1471. 0x081d: "sv_FI", # Swedish - Finland
  1472. 0x045a: "syr_SY",# Syriac
  1473. 0x0428: "tg_TJ", # Tajik - Cyrillic
  1474. 0x085f: "tmz_DZ",# Tamazight - Latin
  1475. 0x0449: "ta_IN", # Tamil
  1476. 0x0444: "tt_RU", # Tatar
  1477. 0x044a: "te_IN", # Telugu
  1478. 0x041e: "th_TH", # Thai
  1479. 0x0851: "bo_BT", # Tibetan - Bhutan
  1480. 0x0451: "bo_CN", # Tibetan - PRC
  1481. 0x041f: "tr_TR", # Turkish
  1482. 0x0442: "tk_TM", # Turkmen - Cyrillic
  1483. 0x0480: "ug_CN", # Uighur - Arabic
  1484. 0x0422: "uk_UA", # Ukrainian
  1485. 0x042e: "wen_DE",# Upper Sorbian - Germany
  1486. 0x0420: "ur_PK", # Urdu
  1487. 0x0820: "ur_IN", # Urdu - India
  1488. 0x0443: "uz_UZ", # Uzbek - Latin
  1489. 0x0843: "uz_UZ", # Uzbek - Cyrillic
  1490. 0x042a: "vi_VN", # Vietnamese
  1491. 0x0452: "cy_GB", # Welsh
  1492. 0x0488: "wo_SN", # Wolof - Senegal
  1493. 0x0434: "xh_ZA", # Xhosa - South Africa
  1494. 0x0485: "sah_RU",# Yakut - Cyrillic
  1495. 0x0478: "ii_CN", # Yi - PRC
  1496. 0x046a: "yo_NG", # Yoruba - Nigeria
  1497. 0x0435: "zu_ZA", # Zulu
  1498. }
  1499. def _print_locale():
  1500. """ Test function.
  1501. """
  1502. categories = {}
  1503. def _init_categories(categories=categories):
  1504. for k,v in globals().items():
  1505. if k[:3] == 'LC_':
  1506. categories[k] = v
  1507. _init_categories()
  1508. del categories['LC_ALL']
  1509. print('Locale defaults as determined by getdefaultlocale():')
  1510. print('-'*72)
  1511. lang, enc = getdefaultlocale()
  1512. print('Language: ', lang or '(undefined)')
  1513. print('Encoding: ', enc or '(undefined)')
  1514. print()
  1515. print('Locale settings on startup:')
  1516. print('-'*72)
  1517. for name,category in categories.items():
  1518. print(name, '...')
  1519. lang, enc = getlocale(category)
  1520. print(' Language: ', lang or '(undefined)')
  1521. print(' Encoding: ', enc or '(undefined)')
  1522. print()
  1523. print()
  1524. print('Locale settings after calling resetlocale():')
  1525. print('-'*72)
  1526. resetlocale()
  1527. for name,category in categories.items():
  1528. print(name, '...')
  1529. lang, enc = getlocale(category)
  1530. print(' Language: ', lang or '(undefined)')
  1531. print(' Encoding: ', enc or '(undefined)')
  1532. print()
  1533. try:
  1534. setlocale(LC_ALL, "")
  1535. except:
  1536. print('NOTE:')
  1537. print('setlocale(LC_ALL, "") does not support the default locale')
  1538. print('given in the OS environment variables.')
  1539. else:
  1540. print()
  1541. print('Locale settings after calling setlocale(LC_ALL, ""):')
  1542. print('-'*72)
  1543. for name,category in categories.items():
  1544. print(name, '...')
  1545. lang, enc = getlocale(category)
  1546. print(' Language: ', lang or '(undefined)')
  1547. print(' Encoding: ', enc or '(undefined)')
  1548. print()
  1549. ###
  1550. try:
  1551. LC_MESSAGES
  1552. except NameError:
  1553. pass
  1554. else:
  1555. __all__.append("LC_MESSAGES")
  1556. if __name__=='__main__':
  1557. print('Locale aliasing:')
  1558. print()
  1559. _print_locale()
  1560. print()
  1561. print('Number formatting:')
  1562. print()
  1563. _test()