aliases.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549
  1. """ Encoding Aliases Support
  2. This module is used by the encodings package search function to
  3. map encodings names to module names.
  4. Note that the search function normalizes the encoding names before
  5. doing the lookup, so the mapping will have to map normalized
  6. encoding names to module names.
  7. Contents:
  8. The following aliases dictionary contains mappings of all IANA
  9. character set names for which the Python core library provides
  10. codecs. In addition to these, a few Python specific codec
  11. aliases have also been added.
  12. """
  13. aliases = {
  14. # Please keep this list sorted alphabetically by value !
  15. # ascii codec
  16. '646' : 'ascii',
  17. 'ansi_x3.4_1968' : 'ascii',
  18. 'ansi_x3_4_1968' : 'ascii', # some email headers use this non-standard name
  19. 'ansi_x3.4_1986' : 'ascii',
  20. 'cp367' : 'ascii',
  21. 'csascii' : 'ascii',
  22. 'ibm367' : 'ascii',
  23. 'iso646_us' : 'ascii',
  24. 'iso_646.irv_1991' : 'ascii',
  25. 'iso_ir_6' : 'ascii',
  26. 'us' : 'ascii',
  27. 'us_ascii' : 'ascii',
  28. # base64_codec codec
  29. 'base64' : 'base64_codec',
  30. 'base_64' : 'base64_codec',
  31. # big5 codec
  32. 'big5_tw' : 'big5',
  33. 'csbig5' : 'big5',
  34. # big5hkscs codec
  35. 'big5_hkscs' : 'big5hkscs',
  36. 'hkscs' : 'big5hkscs',
  37. # bz2_codec codec
  38. 'bz2' : 'bz2_codec',
  39. # cp037 codec
  40. '037' : 'cp037',
  41. 'csibm037' : 'cp037',
  42. 'ebcdic_cp_ca' : 'cp037',
  43. 'ebcdic_cp_nl' : 'cp037',
  44. 'ebcdic_cp_us' : 'cp037',
  45. 'ebcdic_cp_wt' : 'cp037',
  46. 'ibm037' : 'cp037',
  47. 'ibm039' : 'cp037',
  48. # cp1026 codec
  49. '1026' : 'cp1026',
  50. 'csibm1026' : 'cp1026',
  51. 'ibm1026' : 'cp1026',
  52. # cp1125 codec
  53. '1125' : 'cp1125',
  54. 'ibm1125' : 'cp1125',
  55. 'cp866u' : 'cp1125',
  56. 'ruscii' : 'cp1125',
  57. # cp1140 codec
  58. '1140' : 'cp1140',
  59. 'ibm1140' : 'cp1140',
  60. # cp1250 codec
  61. '1250' : 'cp1250',
  62. 'windows_1250' : 'cp1250',
  63. # cp1251 codec
  64. '1251' : 'cp1251',
  65. 'windows_1251' : 'cp1251',
  66. # cp1252 codec
  67. '1252' : 'cp1252',
  68. 'windows_1252' : 'cp1252',
  69. # cp1253 codec
  70. '1253' : 'cp1253',
  71. 'windows_1253' : 'cp1253',
  72. # cp1254 codec
  73. '1254' : 'cp1254',
  74. 'windows_1254' : 'cp1254',
  75. # cp1255 codec
  76. '1255' : 'cp1255',
  77. 'windows_1255' : 'cp1255',
  78. # cp1256 codec
  79. '1256' : 'cp1256',
  80. 'windows_1256' : 'cp1256',
  81. # cp1257 codec
  82. '1257' : 'cp1257',
  83. 'windows_1257' : 'cp1257',
  84. # cp1258 codec
  85. '1258' : 'cp1258',
  86. 'windows_1258' : 'cp1258',
  87. # cp273 codec
  88. '273' : 'cp273',
  89. 'ibm273' : 'cp273',
  90. 'csibm273' : 'cp273',
  91. # cp424 codec
  92. '424' : 'cp424',
  93. 'csibm424' : 'cp424',
  94. 'ebcdic_cp_he' : 'cp424',
  95. 'ibm424' : 'cp424',
  96. # cp437 codec
  97. '437' : 'cp437',
  98. 'cspc8codepage437' : 'cp437',
  99. 'ibm437' : 'cp437',
  100. # cp500 codec
  101. '500' : 'cp500',
  102. 'csibm500' : 'cp500',
  103. 'ebcdic_cp_be' : 'cp500',
  104. 'ebcdic_cp_ch' : 'cp500',
  105. 'ibm500' : 'cp500',
  106. # cp775 codec
  107. '775' : 'cp775',
  108. 'cspc775baltic' : 'cp775',
  109. 'ibm775' : 'cp775',
  110. # cp850 codec
  111. '850' : 'cp850',
  112. 'cspc850multilingual' : 'cp850',
  113. 'ibm850' : 'cp850',
  114. # cp852 codec
  115. '852' : 'cp852',
  116. 'cspcp852' : 'cp852',
  117. 'ibm852' : 'cp852',
  118. # cp855 codec
  119. '855' : 'cp855',
  120. 'csibm855' : 'cp855',
  121. 'ibm855' : 'cp855',
  122. # cp857 codec
  123. '857' : 'cp857',
  124. 'csibm857' : 'cp857',
  125. 'ibm857' : 'cp857',
  126. # cp858 codec
  127. '858' : 'cp858',
  128. 'csibm858' : 'cp858',
  129. 'ibm858' : 'cp858',
  130. # cp860 codec
  131. '860' : 'cp860',
  132. 'csibm860' : 'cp860',
  133. 'ibm860' : 'cp860',
  134. # cp861 codec
  135. '861' : 'cp861',
  136. 'cp_is' : 'cp861',
  137. 'csibm861' : 'cp861',
  138. 'ibm861' : 'cp861',
  139. # cp862 codec
  140. '862' : 'cp862',
  141. 'cspc862latinhebrew' : 'cp862',
  142. 'ibm862' : 'cp862',
  143. # cp863 codec
  144. '863' : 'cp863',
  145. 'csibm863' : 'cp863',
  146. 'ibm863' : 'cp863',
  147. # cp864 codec
  148. '864' : 'cp864',
  149. 'csibm864' : 'cp864',
  150. 'ibm864' : 'cp864',
  151. # cp865 codec
  152. '865' : 'cp865',
  153. 'csibm865' : 'cp865',
  154. 'ibm865' : 'cp865',
  155. # cp866 codec
  156. '866' : 'cp866',
  157. 'csibm866' : 'cp866',
  158. 'ibm866' : 'cp866',
  159. # cp869 codec
  160. '869' : 'cp869',
  161. 'cp_gr' : 'cp869',
  162. 'csibm869' : 'cp869',
  163. 'ibm869' : 'cp869',
  164. # cp932 codec
  165. '932' : 'cp932',
  166. 'ms932' : 'cp932',
  167. 'mskanji' : 'cp932',
  168. 'ms_kanji' : 'cp932',
  169. # cp949 codec
  170. '949' : 'cp949',
  171. 'ms949' : 'cp949',
  172. 'uhc' : 'cp949',
  173. # cp950 codec
  174. '950' : 'cp950',
  175. 'ms950' : 'cp950',
  176. # euc_jis_2004 codec
  177. 'jisx0213' : 'euc_jis_2004',
  178. 'eucjis2004' : 'euc_jis_2004',
  179. 'euc_jis2004' : 'euc_jis_2004',
  180. # euc_jisx0213 codec
  181. 'eucjisx0213' : 'euc_jisx0213',
  182. # euc_jp codec
  183. 'eucjp' : 'euc_jp',
  184. 'ujis' : 'euc_jp',
  185. 'u_jis' : 'euc_jp',
  186. # euc_kr codec
  187. 'euckr' : 'euc_kr',
  188. 'korean' : 'euc_kr',
  189. 'ksc5601' : 'euc_kr',
  190. 'ks_c_5601' : 'euc_kr',
  191. 'ks_c_5601_1987' : 'euc_kr',
  192. 'ksx1001' : 'euc_kr',
  193. 'ks_x_1001' : 'euc_kr',
  194. # gb18030 codec
  195. 'gb18030_2000' : 'gb18030',
  196. # gb2312 codec
  197. 'chinese' : 'gb2312',
  198. 'csiso58gb231280' : 'gb2312',
  199. 'euc_cn' : 'gb2312',
  200. 'euccn' : 'gb2312',
  201. 'eucgb2312_cn' : 'gb2312',
  202. 'gb2312_1980' : 'gb2312',
  203. 'gb2312_80' : 'gb2312',
  204. 'iso_ir_58' : 'gb2312',
  205. # gbk codec
  206. '936' : 'gbk',
  207. 'cp936' : 'gbk',
  208. 'ms936' : 'gbk',
  209. # hex_codec codec
  210. 'hex' : 'hex_codec',
  211. # hp_roman8 codec
  212. 'roman8' : 'hp_roman8',
  213. 'r8' : 'hp_roman8',
  214. 'csHPRoman8' : 'hp_roman8',
  215. # hz codec
  216. 'hzgb' : 'hz',
  217. 'hz_gb' : 'hz',
  218. 'hz_gb_2312' : 'hz',
  219. # iso2022_jp codec
  220. 'csiso2022jp' : 'iso2022_jp',
  221. 'iso2022jp' : 'iso2022_jp',
  222. 'iso_2022_jp' : 'iso2022_jp',
  223. # iso2022_jp_1 codec
  224. 'iso2022jp_1' : 'iso2022_jp_1',
  225. 'iso_2022_jp_1' : 'iso2022_jp_1',
  226. # iso2022_jp_2 codec
  227. 'iso2022jp_2' : 'iso2022_jp_2',
  228. 'iso_2022_jp_2' : 'iso2022_jp_2',
  229. # iso2022_jp_2004 codec
  230. 'iso_2022_jp_2004' : 'iso2022_jp_2004',
  231. 'iso2022jp_2004' : 'iso2022_jp_2004',
  232. # iso2022_jp_3 codec
  233. 'iso2022jp_3' : 'iso2022_jp_3',
  234. 'iso_2022_jp_3' : 'iso2022_jp_3',
  235. # iso2022_jp_ext codec
  236. 'iso2022jp_ext' : 'iso2022_jp_ext',
  237. 'iso_2022_jp_ext' : 'iso2022_jp_ext',
  238. # iso2022_kr codec
  239. 'csiso2022kr' : 'iso2022_kr',
  240. 'iso2022kr' : 'iso2022_kr',
  241. 'iso_2022_kr' : 'iso2022_kr',
  242. # iso8859_10 codec
  243. 'csisolatin6' : 'iso8859_10',
  244. 'iso_8859_10' : 'iso8859_10',
  245. 'iso_8859_10_1992' : 'iso8859_10',
  246. 'iso_ir_157' : 'iso8859_10',
  247. 'l6' : 'iso8859_10',
  248. 'latin6' : 'iso8859_10',
  249. # iso8859_11 codec
  250. 'thai' : 'iso8859_11',
  251. 'iso_8859_11' : 'iso8859_11',
  252. 'iso_8859_11_2001' : 'iso8859_11',
  253. # iso8859_13 codec
  254. 'iso_8859_13' : 'iso8859_13',
  255. 'l7' : 'iso8859_13',
  256. 'latin7' : 'iso8859_13',
  257. # iso8859_14 codec
  258. 'iso_8859_14' : 'iso8859_14',
  259. 'iso_8859_14_1998' : 'iso8859_14',
  260. 'iso_celtic' : 'iso8859_14',
  261. 'iso_ir_199' : 'iso8859_14',
  262. 'l8' : 'iso8859_14',
  263. 'latin8' : 'iso8859_14',
  264. # iso8859_15 codec
  265. 'iso_8859_15' : 'iso8859_15',
  266. 'l9' : 'iso8859_15',
  267. 'latin9' : 'iso8859_15',
  268. # iso8859_16 codec
  269. 'iso_8859_16' : 'iso8859_16',
  270. 'iso_8859_16_2001' : 'iso8859_16',
  271. 'iso_ir_226' : 'iso8859_16',
  272. 'l10' : 'iso8859_16',
  273. 'latin10' : 'iso8859_16',
  274. # iso8859_2 codec
  275. 'csisolatin2' : 'iso8859_2',
  276. 'iso_8859_2' : 'iso8859_2',
  277. 'iso_8859_2_1987' : 'iso8859_2',
  278. 'iso_ir_101' : 'iso8859_2',
  279. 'l2' : 'iso8859_2',
  280. 'latin2' : 'iso8859_2',
  281. # iso8859_3 codec
  282. 'csisolatin3' : 'iso8859_3',
  283. 'iso_8859_3' : 'iso8859_3',
  284. 'iso_8859_3_1988' : 'iso8859_3',
  285. 'iso_ir_109' : 'iso8859_3',
  286. 'l3' : 'iso8859_3',
  287. 'latin3' : 'iso8859_3',
  288. # iso8859_4 codec
  289. 'csisolatin4' : 'iso8859_4',
  290. 'iso_8859_4' : 'iso8859_4',
  291. 'iso_8859_4_1988' : 'iso8859_4',
  292. 'iso_ir_110' : 'iso8859_4',
  293. 'l4' : 'iso8859_4',
  294. 'latin4' : 'iso8859_4',
  295. # iso8859_5 codec
  296. 'csisolatincyrillic' : 'iso8859_5',
  297. 'cyrillic' : 'iso8859_5',
  298. 'iso_8859_5' : 'iso8859_5',
  299. 'iso_8859_5_1988' : 'iso8859_5',
  300. 'iso_ir_144' : 'iso8859_5',
  301. # iso8859_6 codec
  302. 'arabic' : 'iso8859_6',
  303. 'asmo_708' : 'iso8859_6',
  304. 'csisolatinarabic' : 'iso8859_6',
  305. 'ecma_114' : 'iso8859_6',
  306. 'iso_8859_6' : 'iso8859_6',
  307. 'iso_8859_6_1987' : 'iso8859_6',
  308. 'iso_ir_127' : 'iso8859_6',
  309. # iso8859_7 codec
  310. 'csisolatingreek' : 'iso8859_7',
  311. 'ecma_118' : 'iso8859_7',
  312. 'elot_928' : 'iso8859_7',
  313. 'greek' : 'iso8859_7',
  314. 'greek8' : 'iso8859_7',
  315. 'iso_8859_7' : 'iso8859_7',
  316. 'iso_8859_7_1987' : 'iso8859_7',
  317. 'iso_ir_126' : 'iso8859_7',
  318. # iso8859_8 codec
  319. 'csisolatinhebrew' : 'iso8859_8',
  320. 'hebrew' : 'iso8859_8',
  321. 'iso_8859_8' : 'iso8859_8',
  322. 'iso_8859_8_1988' : 'iso8859_8',
  323. 'iso_ir_138' : 'iso8859_8',
  324. # iso8859_9 codec
  325. 'csisolatin5' : 'iso8859_9',
  326. 'iso_8859_9' : 'iso8859_9',
  327. 'iso_8859_9_1989' : 'iso8859_9',
  328. 'iso_ir_148' : 'iso8859_9',
  329. 'l5' : 'iso8859_9',
  330. 'latin5' : 'iso8859_9',
  331. # johab codec
  332. 'cp1361' : 'johab',
  333. 'ms1361' : 'johab',
  334. # koi8_r codec
  335. 'cskoi8r' : 'koi8_r',
  336. # kz1048 codec
  337. 'kz_1048' : 'kz1048',
  338. 'rk1048' : 'kz1048',
  339. 'strk1048_2002' : 'kz1048',
  340. # latin_1 codec
  341. #
  342. # Note that the latin_1 codec is implemented internally in C and a
  343. # lot faster than the charmap codec iso8859_1 which uses the same
  344. # encoding. This is why we discourage the use of the iso8859_1
  345. # codec and alias it to latin_1 instead.
  346. #
  347. '8859' : 'latin_1',
  348. 'cp819' : 'latin_1',
  349. 'csisolatin1' : 'latin_1',
  350. 'ibm819' : 'latin_1',
  351. 'iso8859' : 'latin_1',
  352. 'iso8859_1' : 'latin_1',
  353. 'iso_8859_1' : 'latin_1',
  354. 'iso_8859_1_1987' : 'latin_1',
  355. 'iso_ir_100' : 'latin_1',
  356. 'l1' : 'latin_1',
  357. 'latin' : 'latin_1',
  358. 'latin1' : 'latin_1',
  359. # mac_cyrillic codec
  360. 'maccyrillic' : 'mac_cyrillic',
  361. # mac_greek codec
  362. 'macgreek' : 'mac_greek',
  363. # mac_iceland codec
  364. 'maciceland' : 'mac_iceland',
  365. # mac_latin2 codec
  366. 'maccentraleurope' : 'mac_latin2',
  367. 'maclatin2' : 'mac_latin2',
  368. # mac_roman codec
  369. 'macintosh' : 'mac_roman',
  370. 'macroman' : 'mac_roman',
  371. # mac_turkish codec
  372. 'macturkish' : 'mac_turkish',
  373. # mbcs codec
  374. 'dbcs' : 'mbcs',
  375. # ptcp154 codec
  376. 'csptcp154' : 'ptcp154',
  377. 'pt154' : 'ptcp154',
  378. 'cp154' : 'ptcp154',
  379. 'cyrillic_asian' : 'ptcp154',
  380. # quopri_codec codec
  381. 'quopri' : 'quopri_codec',
  382. 'quoted_printable' : 'quopri_codec',
  383. 'quotedprintable' : 'quopri_codec',
  384. # rot_13 codec
  385. 'rot13' : 'rot_13',
  386. # shift_jis codec
  387. 'csshiftjis' : 'shift_jis',
  388. 'shiftjis' : 'shift_jis',
  389. 'sjis' : 'shift_jis',
  390. 's_jis' : 'shift_jis',
  391. # shift_jis_2004 codec
  392. 'shiftjis2004' : 'shift_jis_2004',
  393. 'sjis_2004' : 'shift_jis_2004',
  394. 's_jis_2004' : 'shift_jis_2004',
  395. # shift_jisx0213 codec
  396. 'shiftjisx0213' : 'shift_jisx0213',
  397. 'sjisx0213' : 'shift_jisx0213',
  398. 's_jisx0213' : 'shift_jisx0213',
  399. # tactis codec
  400. 'tis260' : 'tactis',
  401. # tis_620 codec
  402. 'tis620' : 'tis_620',
  403. 'tis_620_0' : 'tis_620',
  404. 'tis_620_2529_0' : 'tis_620',
  405. 'tis_620_2529_1' : 'tis_620',
  406. 'iso_ir_166' : 'tis_620',
  407. # utf_16 codec
  408. 'u16' : 'utf_16',
  409. 'utf16' : 'utf_16',
  410. # utf_16_be codec
  411. 'unicodebigunmarked' : 'utf_16_be',
  412. 'utf_16be' : 'utf_16_be',
  413. # utf_16_le codec
  414. 'unicodelittleunmarked' : 'utf_16_le',
  415. 'utf_16le' : 'utf_16_le',
  416. # utf_32 codec
  417. 'u32' : 'utf_32',
  418. 'utf32' : 'utf_32',
  419. # utf_32_be codec
  420. 'utf_32be' : 'utf_32_be',
  421. # utf_32_le codec
  422. 'utf_32le' : 'utf_32_le',
  423. # utf_7 codec
  424. 'u7' : 'utf_7',
  425. 'utf7' : 'utf_7',
  426. 'unicode_1_1_utf_7' : 'utf_7',
  427. # utf_8 codec
  428. 'u8' : 'utf_8',
  429. 'utf' : 'utf_8',
  430. 'utf8' : 'utf_8',
  431. 'utf8_ucs2' : 'utf_8',
  432. 'utf8_ucs4' : 'utf_8',
  433. # uu_codec codec
  434. 'uu' : 'uu_codec',
  435. # zlib_codec codec
  436. 'zip' : 'zlib_codec',
  437. 'zlib' : 'zlib_codec',
  438. # temporary mac CJK aliases, will be replaced by proper codecs in 3.1
  439. 'x_mac_japanese' : 'shift_jis',
  440. 'x_mac_korean' : 'euc_kr',
  441. 'x_mac_simp_chinese' : 'gb2312',
  442. 'x_mac_trad_chinese' : 'big5',
  443. }