test_locale.py 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527
  1. from test.test_support import run_unittest, verbose
  2. import unittest
  3. import locale
  4. import sys
  5. import codecs
  6. enUS_locale = None
  7. def get_enUS_locale():
  8. global enUS_locale
  9. if sys.platform == 'darwin':
  10. import os
  11. tlocs = ("en_US.UTF-8", "en_US.ISO8859-1", "en_US")
  12. if int(os.uname()[2].split('.')[0]) < 10:
  13. # The locale test work fine on OSX 10.6, I (ronaldoussoren)
  14. # haven't had time yet to verify if tests work on OSX 10.5
  15. # (10.4 is known to be bad)
  16. raise unittest.SkipTest("Locale support on MacOSX is minimal")
  17. if sys.platform.startswith("win"):
  18. tlocs = ("En", "English")
  19. else:
  20. tlocs = ("en_US.UTF-8", "en_US.US-ASCII", "en_US")
  21. oldlocale = locale.setlocale(locale.LC_NUMERIC)
  22. for tloc in tlocs:
  23. try:
  24. locale.setlocale(locale.LC_NUMERIC, tloc)
  25. except locale.Error:
  26. continue
  27. break
  28. else:
  29. raise unittest.SkipTest(
  30. "Test locale not supported (tried %s)" % (', '.join(tlocs)))
  31. enUS_locale = tloc
  32. locale.setlocale(locale.LC_NUMERIC, oldlocale)
  33. class BaseLocalizedTest(unittest.TestCase):
  34. #
  35. # Base class for tests using a real locale
  36. #
  37. def setUp(self):
  38. self.oldlocale = locale.setlocale(self.locale_type)
  39. locale.setlocale(self.locale_type, enUS_locale)
  40. if verbose:
  41. print "testing with \"%s\"..." % enUS_locale,
  42. def tearDown(self):
  43. locale.setlocale(self.locale_type, self.oldlocale)
  44. class BaseCookedTest(unittest.TestCase):
  45. #
  46. # Base class for tests using cooked localeconv() values
  47. #
  48. def setUp(self):
  49. locale._override_localeconv = self.cooked_values
  50. def tearDown(self):
  51. locale._override_localeconv = {}
  52. class CCookedTest(BaseCookedTest):
  53. # A cooked "C" locale
  54. cooked_values = {
  55. 'currency_symbol': '',
  56. 'decimal_point': '.',
  57. 'frac_digits': 127,
  58. 'grouping': [],
  59. 'int_curr_symbol': '',
  60. 'int_frac_digits': 127,
  61. 'mon_decimal_point': '',
  62. 'mon_grouping': [],
  63. 'mon_thousands_sep': '',
  64. 'n_cs_precedes': 127,
  65. 'n_sep_by_space': 127,
  66. 'n_sign_posn': 127,
  67. 'negative_sign': '',
  68. 'p_cs_precedes': 127,
  69. 'p_sep_by_space': 127,
  70. 'p_sign_posn': 127,
  71. 'positive_sign': '',
  72. 'thousands_sep': ''
  73. }
  74. class EnUSCookedTest(BaseCookedTest):
  75. # A cooked "en_US" locale
  76. cooked_values = {
  77. 'currency_symbol': '$',
  78. 'decimal_point': '.',
  79. 'frac_digits': 2,
  80. 'grouping': [3, 3, 0],
  81. 'int_curr_symbol': 'USD ',
  82. 'int_frac_digits': 2,
  83. 'mon_decimal_point': '.',
  84. 'mon_grouping': [3, 3, 0],
  85. 'mon_thousands_sep': ',',
  86. 'n_cs_precedes': 1,
  87. 'n_sep_by_space': 0,
  88. 'n_sign_posn': 1,
  89. 'negative_sign': '-',
  90. 'p_cs_precedes': 1,
  91. 'p_sep_by_space': 0,
  92. 'p_sign_posn': 1,
  93. 'positive_sign': '',
  94. 'thousands_sep': ','
  95. }
  96. class FrFRCookedTest(BaseCookedTest):
  97. # A cooked "fr_FR" locale with a space character as decimal separator
  98. # and a non-ASCII currency symbol.
  99. cooked_values = {
  100. 'currency_symbol': '\xe2\x82\xac',
  101. 'decimal_point': ',',
  102. 'frac_digits': 2,
  103. 'grouping': [3, 3, 0],
  104. 'int_curr_symbol': 'EUR ',
  105. 'int_frac_digits': 2,
  106. 'mon_decimal_point': ',',
  107. 'mon_grouping': [3, 3, 0],
  108. 'mon_thousands_sep': ' ',
  109. 'n_cs_precedes': 0,
  110. 'n_sep_by_space': 1,
  111. 'n_sign_posn': 1,
  112. 'negative_sign': '-',
  113. 'p_cs_precedes': 0,
  114. 'p_sep_by_space': 1,
  115. 'p_sign_posn': 1,
  116. 'positive_sign': '',
  117. 'thousands_sep': ' '
  118. }
  119. class BaseFormattingTest(object):
  120. #
  121. # Utility functions for formatting tests
  122. #
  123. def _test_formatfunc(self, format, value, out, func, **format_opts):
  124. self.assertEqual(
  125. func(format, value, **format_opts), out)
  126. def _test_format(self, format, value, out, **format_opts):
  127. self._test_formatfunc(format, value, out,
  128. func=locale.format, **format_opts)
  129. def _test_format_string(self, format, value, out, **format_opts):
  130. self._test_formatfunc(format, value, out,
  131. func=locale.format_string, **format_opts)
  132. def _test_currency(self, value, out, **format_opts):
  133. self.assertEqual(locale.currency(value, **format_opts), out)
  134. class EnUSNumberFormatting(BaseFormattingTest):
  135. # XXX there is a grouping + padding bug when the thousands separator
  136. # is empty but the grouping array contains values (e.g. Solaris 10)
  137. def setUp(self):
  138. self.sep = locale.localeconv()['thousands_sep']
  139. def test_grouping(self):
  140. self._test_format("%f", 1024, grouping=1, out='1%s024.000000' % self.sep)
  141. self._test_format("%f", 102, grouping=1, out='102.000000')
  142. self._test_format("%f", -42, grouping=1, out='-42.000000')
  143. self._test_format("%+f", -42, grouping=1, out='-42.000000')
  144. def test_grouping_and_padding(self):
  145. self._test_format("%20.f", -42, grouping=1, out='-42'.rjust(20))
  146. if self.sep:
  147. self._test_format("%+10.f", -4200, grouping=1,
  148. out=('-4%s200' % self.sep).rjust(10))
  149. self._test_format("%-10.f", -4200, grouping=1,
  150. out=('-4%s200' % self.sep).ljust(10))
  151. def test_integer_grouping(self):
  152. self._test_format("%d", 4200, grouping=True, out='4%s200' % self.sep)
  153. self._test_format("%+d", 4200, grouping=True, out='+4%s200' % self.sep)
  154. self._test_format("%+d", -4200, grouping=True, out='-4%s200' % self.sep)
  155. def test_integer_grouping_and_padding(self):
  156. self._test_format("%10d", 4200, grouping=True,
  157. out=('4%s200' % self.sep).rjust(10))
  158. self._test_format("%-10d", -4200, grouping=True,
  159. out=('-4%s200' % self.sep).ljust(10))
  160. def test_simple(self):
  161. self._test_format("%f", 1024, grouping=0, out='1024.000000')
  162. self._test_format("%f", 102, grouping=0, out='102.000000')
  163. self._test_format("%f", -42, grouping=0, out='-42.000000')
  164. self._test_format("%+f", -42, grouping=0, out='-42.000000')
  165. def test_padding(self):
  166. self._test_format("%20.f", -42, grouping=0, out='-42'.rjust(20))
  167. self._test_format("%+10.f", -4200, grouping=0, out='-4200'.rjust(10))
  168. self._test_format("%-10.f", 4200, grouping=0, out='4200'.ljust(10))
  169. def test_complex_formatting(self):
  170. # Spaces in formatting string
  171. self._test_format_string("One million is %i", 1000000, grouping=1,
  172. out='One million is 1%s000%s000' % (self.sep, self.sep))
  173. self._test_format_string("One million is %i", 1000000, grouping=1,
  174. out='One million is 1%s000%s000' % (self.sep, self.sep))
  175. # Dots in formatting string
  176. self._test_format_string(".%f.", 1000.0, out='.1000.000000.')
  177. # Padding
  178. if self.sep:
  179. self._test_format_string("--> %10.2f", 4200, grouping=1,
  180. out='--> ' + ('4%s200.00' % self.sep).rjust(10))
  181. # Asterisk formats
  182. self._test_format_string("%10.*f", (2, 1000), grouping=0,
  183. out='1000.00'.rjust(10))
  184. if self.sep:
  185. self._test_format_string("%*.*f", (10, 2, 1000), grouping=1,
  186. out=('1%s000.00' % self.sep).rjust(10))
  187. # Test more-in-one
  188. if self.sep:
  189. self._test_format_string("int %i float %.2f str %s",
  190. (1000, 1000.0, 'str'), grouping=1,
  191. out='int 1%s000 float 1%s000.00 str str' %
  192. (self.sep, self.sep))
  193. class TestFormatPatternArg(unittest.TestCase):
  194. # Test handling of pattern argument of format
  195. def test_onlyOnePattern(self):
  196. # Issue 2522: accept exactly one % pattern, and no extra chars.
  197. self.assertRaises(ValueError, locale.format, "%f\n", 'foo')
  198. self.assertRaises(ValueError, locale.format, "%f\r", 'foo')
  199. self.assertRaises(ValueError, locale.format, "%f\r\n", 'foo')
  200. self.assertRaises(ValueError, locale.format, " %f", 'foo')
  201. self.assertRaises(ValueError, locale.format, "%fg", 'foo')
  202. self.assertRaises(ValueError, locale.format, "%^g", 'foo')
  203. self.assertRaises(ValueError, locale.format, "%f%%", 'foo')
  204. class TestLocaleFormatString(unittest.TestCase):
  205. """General tests on locale.format_string"""
  206. def test_percent_escape(self):
  207. self.assertEqual(locale.format_string('%f%%', 1.0), '%f%%' % 1.0)
  208. self.assertEqual(locale.format_string('%d %f%%d', (1, 1.0)),
  209. '%d %f%%d' % (1, 1.0))
  210. self.assertEqual(locale.format_string('%(foo)s %%d', {'foo': 'bar'}),
  211. ('%(foo)s %%d' % {'foo': 'bar'}))
  212. def test_mapping(self):
  213. self.assertEqual(locale.format_string('%(foo)s bing.', {'foo': 'bar'}),
  214. ('%(foo)s bing.' % {'foo': 'bar'}))
  215. self.assertEqual(locale.format_string('%(foo)s', {'foo': 'bar'}),
  216. ('%(foo)s' % {'foo': 'bar'}))
  217. class TestNumberFormatting(BaseLocalizedTest, EnUSNumberFormatting):
  218. # Test number formatting with a real English locale.
  219. locale_type = locale.LC_NUMERIC
  220. def setUp(self):
  221. BaseLocalizedTest.setUp(self)
  222. EnUSNumberFormatting.setUp(self)
  223. class TestEnUSNumberFormatting(EnUSCookedTest, EnUSNumberFormatting):
  224. # Test number formatting with a cooked "en_US" locale.
  225. def setUp(self):
  226. EnUSCookedTest.setUp(self)
  227. EnUSNumberFormatting.setUp(self)
  228. def test_currency(self):
  229. self._test_currency(50000, "$50000.00")
  230. self._test_currency(50000, "$50,000.00", grouping=True)
  231. self._test_currency(50000, "USD 50,000.00",
  232. grouping=True, international=True)
  233. class TestCNumberFormatting(CCookedTest, BaseFormattingTest):
  234. # Test number formatting with a cooked "C" locale.
  235. def test_grouping(self):
  236. self._test_format("%.2f", 12345.67, grouping=True, out='12345.67')
  237. def test_grouping_and_padding(self):
  238. self._test_format("%9.2f", 12345.67, grouping=True, out=' 12345.67')
  239. class TestFrFRNumberFormatting(FrFRCookedTest, BaseFormattingTest):
  240. # Test number formatting with a cooked "fr_FR" locale.
  241. def test_decimal_point(self):
  242. self._test_format("%.2f", 12345.67, out='12345,67')
  243. def test_grouping(self):
  244. self._test_format("%.2f", 345.67, grouping=True, out='345,67')
  245. self._test_format("%.2f", 12345.67, grouping=True, out='12 345,67')
  246. def test_grouping_and_padding(self):
  247. self._test_format("%6.2f", 345.67, grouping=True, out='345,67')
  248. self._test_format("%7.2f", 345.67, grouping=True, out=' 345,67')
  249. self._test_format("%8.2f", 12345.67, grouping=True, out='12 345,67')
  250. self._test_format("%9.2f", 12345.67, grouping=True, out='12 345,67')
  251. self._test_format("%10.2f", 12345.67, grouping=True, out=' 12 345,67')
  252. self._test_format("%-6.2f", 345.67, grouping=True, out='345,67')
  253. self._test_format("%-7.2f", 345.67, grouping=True, out='345,67 ')
  254. self._test_format("%-8.2f", 12345.67, grouping=True, out='12 345,67')
  255. self._test_format("%-9.2f", 12345.67, grouping=True, out='12 345,67')
  256. self._test_format("%-10.2f", 12345.67, grouping=True, out='12 345,67 ')
  257. def test_integer_grouping(self):
  258. self._test_format("%d", 200, grouping=True, out='200')
  259. self._test_format("%d", 4200, grouping=True, out='4 200')
  260. def test_integer_grouping_and_padding(self):
  261. self._test_format("%4d", 4200, grouping=True, out='4 200')
  262. self._test_format("%5d", 4200, grouping=True, out='4 200')
  263. self._test_format("%10d", 4200, grouping=True, out='4 200'.rjust(10))
  264. self._test_format("%-4d", 4200, grouping=True, out='4 200')
  265. self._test_format("%-5d", 4200, grouping=True, out='4 200')
  266. self._test_format("%-10d", 4200, grouping=True, out='4 200'.ljust(10))
  267. def test_currency(self):
  268. euro = u'\u20ac'.encode('utf-8')
  269. self._test_currency(50000, "50000,00 " + euro)
  270. self._test_currency(50000, "50 000,00 " + euro, grouping=True)
  271. # XXX is the trailing space a bug?
  272. self._test_currency(50000, "50 000,00 EUR ",
  273. grouping=True, international=True)
  274. class TestStringMethods(BaseLocalizedTest):
  275. locale_type = locale.LC_CTYPE
  276. if sys.platform != 'sunos5' and not sys.platform.startswith("win"):
  277. # Test BSD Rune locale's bug for isctype functions.
  278. def test_isspace(self):
  279. self.assertEqual('\x20'.isspace(), True)
  280. self.assertEqual('\xa0'.isspace(), False)
  281. self.assertEqual('\xa1'.isspace(), False)
  282. def test_isalpha(self):
  283. self.assertEqual('\xc0'.isalpha(), False)
  284. def test_isalnum(self):
  285. self.assertEqual('\xc0'.isalnum(), False)
  286. def test_isupper(self):
  287. self.assertEqual('\xc0'.isupper(), False)
  288. def test_islower(self):
  289. self.assertEqual('\xc0'.islower(), False)
  290. def test_lower(self):
  291. self.assertEqual('\xcc\x85'.lower(), '\xcc\x85')
  292. def test_upper(self):
  293. self.assertEqual('\xed\x95\xa0'.upper(), '\xed\x95\xa0')
  294. def test_strip(self):
  295. self.assertEqual('\xed\x95\xa0'.strip(), '\xed\x95\xa0')
  296. def test_split(self):
  297. self.assertEqual('\xec\xa0\xbc'.split(), ['\xec\xa0\xbc'])
  298. class NormalizeTest(unittest.TestCase):
  299. def check(self, localename, expected):
  300. self.assertEqual(locale.normalize(localename), expected, msg=localename)
  301. def test_locale_alias(self):
  302. for localename, alias in locale.locale_alias.items():
  303. self.check(localename, alias)
  304. def test_empty(self):
  305. self.check('', '')
  306. def test_c(self):
  307. self.check('c', 'C')
  308. self.check('posix', 'C')
  309. def test_english(self):
  310. self.check('en', 'en_US.ISO8859-1')
  311. self.check('EN', 'en_US.ISO8859-1')
  312. self.check('en_US', 'en_US.ISO8859-1')
  313. self.check('en_us', 'en_US.ISO8859-1')
  314. self.check('en_GB', 'en_GB.ISO8859-1')
  315. self.check('en_US.UTF-8', 'en_US.UTF-8')
  316. self.check('en_US.utf8', 'en_US.UTF-8')
  317. self.check('en_US:UTF-8', 'en_US.UTF-8')
  318. self.check('en_US.ISO8859-1', 'en_US.ISO8859-1')
  319. self.check('en_US.US-ASCII', 'en_US.ISO8859-1')
  320. self.check('english', 'en_EN.ISO8859-1')
  321. def test_hyphenated_encoding(self):
  322. self.check('az_AZ.iso88599e', 'az_AZ.ISO8859-9E')
  323. self.check('az_AZ.ISO8859-9E', 'az_AZ.ISO8859-9E')
  324. self.check('tt_RU.koi8c', 'tt_RU.KOI8-C')
  325. self.check('tt_RU.KOI8-C', 'tt_RU.KOI8-C')
  326. self.check('lo_LA.cp1133', 'lo_LA.IBM-CP1133')
  327. self.check('lo_LA.ibmcp1133', 'lo_LA.IBM-CP1133')
  328. self.check('lo_LA.IBM-CP1133', 'lo_LA.IBM-CP1133')
  329. self.check('uk_ua.microsoftcp1251', 'uk_UA.CP1251')
  330. self.check('uk_ua.microsoft-cp1251', 'uk_UA.CP1251')
  331. self.check('ka_ge.georgianacademy', 'ka_GE.GEORGIAN-ACADEMY')
  332. self.check('ka_GE.GEORGIAN-ACADEMY', 'ka_GE.GEORGIAN-ACADEMY')
  333. self.check('cs_CZ.iso88592', 'cs_CZ.ISO8859-2')
  334. self.check('cs_CZ.ISO8859-2', 'cs_CZ.ISO8859-2')
  335. def test_euro_modifier(self):
  336. self.check('de_DE@euro', 'de_DE.ISO8859-15')
  337. self.check('en_US.ISO8859-15@euro', 'en_US.ISO8859-15')
  338. def test_latin_modifier(self):
  339. self.check('be_BY.UTF-8@latin', 'be_BY.UTF-8@latin')
  340. self.check('sr_RS.UTF-8@latin', 'sr_RS.UTF-8@latin')
  341. def test_valencia_modifier(self):
  342. self.check('ca_ES.UTF-8@valencia', 'ca_ES.UTF-8@valencia')
  343. self.check('ca_ES@valencia', 'ca_ES.ISO8859-15@valencia')
  344. self.check('ca@valencia', 'ca_ES.ISO8859-1@valencia')
  345. def test_devanagari_modifier(self):
  346. self.check('ks_IN.UTF-8@devanagari', 'ks_IN.UTF-8@devanagari')
  347. self.check('ks_IN@devanagari', 'ks_IN.UTF-8@devanagari')
  348. self.check('ks@devanagari', 'ks_IN.UTF-8@devanagari')
  349. self.check('ks_IN.UTF-8', 'ks_IN.UTF-8')
  350. self.check('ks_IN', 'ks_IN.UTF-8')
  351. self.check('ks', 'ks_IN.UTF-8')
  352. self.check('sd_IN.UTF-8@devanagari', 'sd_IN.UTF-8@devanagari')
  353. self.check('sd_IN@devanagari', 'sd_IN.UTF-8@devanagari')
  354. self.check('sd@devanagari', 'sd_IN.UTF-8@devanagari')
  355. self.check('sd_IN.UTF-8', 'sd_IN.UTF-8')
  356. self.check('sd_IN', 'sd_IN.UTF-8')
  357. self.check('sd', 'sd_IN.UTF-8')
  358. class TestMiscellaneous(unittest.TestCase):
  359. def test_getpreferredencoding(self):
  360. # Invoke getpreferredencoding to make sure it does not cause exceptions.
  361. enc = locale.getpreferredencoding()
  362. if enc:
  363. # If encoding non-empty, make sure it is valid
  364. codecs.lookup(enc)
  365. if hasattr(locale, "strcoll"):
  366. def test_strcoll_3303(self):
  367. # test crasher from bug #3303
  368. self.assertRaises(TypeError, locale.strcoll, u"a", None)
  369. def test_setlocale_category(self):
  370. locale.setlocale(locale.LC_ALL)
  371. locale.setlocale(locale.LC_TIME)
  372. locale.setlocale(locale.LC_CTYPE)
  373. locale.setlocale(locale.LC_COLLATE)
  374. locale.setlocale(locale.LC_MONETARY)
  375. locale.setlocale(locale.LC_NUMERIC)
  376. # crasher from bug #7419
  377. self.assertRaises(locale.Error, locale.setlocale, 12345)
  378. def test_getsetlocale_issue1813(self):
  379. # Issue #1813: setting and getting the locale under a Turkish locale
  380. oldlocale = locale.getlocale()
  381. self.addCleanup(locale.setlocale, locale.LC_CTYPE, oldlocale)
  382. for loc in ('tr_TR', 'tr_TR.UTF-8', 'tr_TR.ISO8859-9'):
  383. try:
  384. locale.setlocale(locale.LC_CTYPE, loc)
  385. break
  386. except locale.Error:
  387. continue
  388. else:
  389. # Unsupported locale on this system
  390. self.skipTest('test needs Turkish locale')
  391. loc = locale.getlocale()
  392. try:
  393. locale.setlocale(locale.LC_CTYPE, loc)
  394. except Exception as e:
  395. self.fail("Failed to set locale %r (default locale is %r): %r" %
  396. (loc, oldlocale, e))
  397. self.assertEqual(loc, locale.getlocale())
  398. def test_normalize_issue12752(self):
  399. # Issue #1813 caused a regression where locale.normalize() would no
  400. # longer accept unicode strings.
  401. self.assertEqual(locale.normalize(u'en_US'), 'en_US.ISO8859-1')
  402. def test_setlocale_unicode(self):
  403. oldlocale = locale.getlocale()
  404. self.addCleanup(locale.setlocale, locale.LC_CTYPE, oldlocale)
  405. user_locale = locale.setlocale(locale.LC_CTYPE, '')
  406. unicode_locale = user_locale.decode('utf-8')
  407. user_locale2 = locale.setlocale(locale.LC_CTYPE, unicode_locale)
  408. self.assertEqual(user_locale, user_locale2)
  409. def test_main():
  410. tests = [
  411. TestMiscellaneous,
  412. TestFormatPatternArg,
  413. TestLocaleFormatString,
  414. TestEnUSNumberFormatting,
  415. TestCNumberFormatting,
  416. TestFrFRNumberFormatting,
  417. ]
  418. # SkipTest can't be raised inside unittests, handle it manually instead
  419. try:
  420. get_enUS_locale()
  421. except unittest.SkipTest as e:
  422. if verbose:
  423. print "Some tests will be disabled: %s" % e
  424. else:
  425. tests += [TestNumberFormatting, TestStringMethods]
  426. run_unittest(*tests)
  427. if __name__ == '__main__':
  428. test_main()