uscript.h 25 KB


  1. /*
  2. **********************************************************************
  3. * Copyright (C) 1997-2015, International Business Machines
  4. * Corporation and others. All Rights Reserved.
  5. **********************************************************************
  6. *
  7. * File USCRIPT.H
  8. *
  9. * Modification History:
  10. *
  11. * Date Name Description
  12. * 07/06/2001 Ram Creation.
  13. ******************************************************************************
  14. */
  15. #ifndef USCRIPT_H
  16. #define USCRIPT_H
  17. #include "unicode/utypes.h"
  18. /**
  19. * \file
  20. * \brief C API: Unicode Script Information
  21. */
  22. /**
  23. * Constants for ISO 15924 script codes.
  24. *
  25. * The current set of script code constants supports at least all scripts
  26. * that are encoded in the version of Unicode which ICU currently supports.
  27. * The names of the constants are usually derived from the
  28. * Unicode script property value aliases.
  29. * See UAX #24 Unicode Script Property (http://www.unicode.org/reports/tr24/)
  30. * and http://www.unicode.org/Public/UCD/latest/ucd/PropertyValueAliases.txt .
  31. *
  32. * Starting with ICU 3.6, constants for most ISO 15924 script codes
  33. * are included, for use with language tags, CLDR data, and similar.
  34. * Some of those codes are not used in the Unicode Character Database (UCD).
  35. * For example, there are no characters that have a UCD script property value of
  36. * Hans or Hant. All Han ideographs have the Hani script property value in Unicode.
  37. *
  38. * Private-use codes Qaaa..Qabx are not included.
  39. *
  40. * Starting with ICU 55, script codes are only added when their scripts
  41. * have been or will certainly be encoded in Unicode,
  42. * and have been assigned Unicode script property value aliases,
  43. * to ensure that their script names are stable and match the names of the constants.
  44. * Script codes like Latf and Aran that are not subject to separate encoding
  45. * may be added at any time.
  46. *
  47. * @stable ICU 2.2
  48. */
  49. typedef enum UScriptCode {
  50. /*
  51. * Note: UScriptCode constants and their ISO script code comments
  52. * are parsed by preparseucd.py.
  53. * It matches lines like
  54. * USCRIPT_<Unicode Script value name> = <integer>, / * <ISO script code> * /
  55. */
  56. /** @stable ICU 2.2 */
  57. USCRIPT_INVALID_CODE = -1,
  58. /** @stable ICU 2.2 */
  59. USCRIPT_COMMON = 0, /* Zyyy */
  60. /** @stable ICU 2.2 */
  61. USCRIPT_INHERITED = 1, /* Zinh */ /* "Code for inherited script", for non-spacing combining marks; also Qaai */
  62. /** @stable ICU 2.2 */
  63. USCRIPT_ARABIC = 2, /* Arab */
  64. /** @stable ICU 2.2 */
  65. USCRIPT_ARMENIAN = 3, /* Armn */
  66. /** @stable ICU 2.2 */
  67. USCRIPT_BENGALI = 4, /* Beng */
  68. /** @stable ICU 2.2 */
  69. USCRIPT_BOPOMOFO = 5, /* Bopo */
  70. /** @stable ICU 2.2 */
  71. USCRIPT_CHEROKEE = 6, /* Cher */
  72. /** @stable ICU 2.2 */
  73. USCRIPT_COPTIC = 7, /* Copt */
  74. /** @stable ICU 2.2 */
  75. USCRIPT_CYRILLIC = 8, /* Cyrl */
  76. /** @stable ICU 2.2 */
  77. USCRIPT_DESERET = 9, /* Dsrt */
  78. /** @stable ICU 2.2 */
  79. USCRIPT_DEVANAGARI = 10, /* Deva */
  80. /** @stable ICU 2.2 */
  81. USCRIPT_ETHIOPIC = 11, /* Ethi */
  82. /** @stable ICU 2.2 */
  83. USCRIPT_GEORGIAN = 12, /* Geor */
  84. /** @stable ICU 2.2 */
  85. USCRIPT_GOTHIC = 13, /* Goth */
  86. /** @stable ICU 2.2 */
  87. USCRIPT_GREEK = 14, /* Grek */
  88. /** @stable ICU 2.2 */
  89. USCRIPT_GUJARATI = 15, /* Gujr */
  90. /** @stable ICU 2.2 */
  91. USCRIPT_GURMUKHI = 16, /* Guru */
  92. /** @stable ICU 2.2 */
  93. USCRIPT_HAN = 17, /* Hani */
  94. /** @stable ICU 2.2 */
  95. USCRIPT_HANGUL = 18, /* Hang */
  96. /** @stable ICU 2.2 */
  97. USCRIPT_HEBREW = 19, /* Hebr */
  98. /** @stable ICU 2.2 */
  99. USCRIPT_HIRAGANA = 20, /* Hira */
  100. /** @stable ICU 2.2 */
  101. USCRIPT_KANNADA = 21, /* Knda */
  102. /** @stable ICU 2.2 */
  103. USCRIPT_KATAKANA = 22, /* Kana */
  104. /** @stable ICU 2.2 */
  105. USCRIPT_KHMER = 23, /* Khmr */
  106. /** @stable ICU 2.2 */
  107. USCRIPT_LAO = 24, /* Laoo */
  108. /** @stable ICU 2.2 */
  109. USCRIPT_LATIN = 25, /* Latn */
  110. /** @stable ICU 2.2 */
  111. USCRIPT_MALAYALAM = 26, /* Mlym */
  112. /** @stable ICU 2.2 */
  113. USCRIPT_MONGOLIAN = 27, /* Mong */
  114. /** @stable ICU 2.2 */
  115. USCRIPT_MYANMAR = 28, /* Mymr */
  116. /** @stable ICU 2.2 */
  117. USCRIPT_OGHAM = 29, /* Ogam */
  118. /** @stable ICU 2.2 */
  119. USCRIPT_OLD_ITALIC = 30, /* Ital */
  120. /** @stable ICU 2.2 */
  121. USCRIPT_ORIYA = 31, /* Orya */
  122. /** @stable ICU 2.2 */
  123. USCRIPT_RUNIC = 32, /* Runr */
  124. /** @stable ICU 2.2 */
  125. USCRIPT_SINHALA = 33, /* Sinh */
  126. /** @stable ICU 2.2 */
  127. USCRIPT_SYRIAC = 34, /* Syrc */
  128. /** @stable ICU 2.2 */
  129. USCRIPT_TAMIL = 35, /* Taml */
  130. /** @stable ICU 2.2 */
  131. USCRIPT_TELUGU = 36, /* Telu */
  132. /** @stable ICU 2.2 */
  133. USCRIPT_THAANA = 37, /* Thaa */
  134. /** @stable ICU 2.2 */
  135. USCRIPT_THAI = 38, /* Thai */
  136. /** @stable ICU 2.2 */
  137. USCRIPT_TIBETAN = 39, /* Tibt */
  138. /** Canadian_Aboriginal script. @stable ICU 2.6 */
  139. USCRIPT_CANADIAN_ABORIGINAL = 40, /* Cans */
  140. /** Canadian_Aboriginal script (alias). @stable ICU 2.2 */
  141. USCRIPT_UCAS = USCRIPT_CANADIAN_ABORIGINAL,
  142. /** @stable ICU 2.2 */
  143. USCRIPT_YI = 41, /* Yiii */
  144. /* New scripts in Unicode 3.2 */
  145. /** @stable ICU 2.2 */
  146. USCRIPT_TAGALOG = 42, /* Tglg */
  147. /** @stable ICU 2.2 */
  148. USCRIPT_HANUNOO = 43, /* Hano */
  149. /** @stable ICU 2.2 */
  150. USCRIPT_BUHID = 44, /* Buhd */
  151. /** @stable ICU 2.2 */
  152. USCRIPT_TAGBANWA = 45, /* Tagb */
  153. /* New scripts in Unicode 4 */
  154. /** @stable ICU 2.6 */
  155. USCRIPT_BRAILLE = 46, /* Brai */
  156. /** @stable ICU 2.6 */
  157. USCRIPT_CYPRIOT = 47, /* Cprt */
  158. /** @stable ICU 2.6 */
  159. USCRIPT_LIMBU = 48, /* Limb */
  160. /** @stable ICU 2.6 */
  161. USCRIPT_LINEAR_B = 49, /* Linb */
  162. /** @stable ICU 2.6 */
  163. USCRIPT_OSMANYA = 50, /* Osma */
  164. /** @stable ICU 2.6 */
  165. USCRIPT_SHAVIAN = 51, /* Shaw */
  166. /** @stable ICU 2.6 */
  167. USCRIPT_TAI_LE = 52, /* Tale */
  168. /** @stable ICU 2.6 */
  169. USCRIPT_UGARITIC = 53, /* Ugar */
  170. /** New script code in Unicode 4.0.1 @stable ICU 3.0 */
  171. USCRIPT_KATAKANA_OR_HIRAGANA = 54,/*Hrkt */
  172. /* New scripts in Unicode 4.1 */
  173. /** @stable ICU 3.4 */
  174. USCRIPT_BUGINESE = 55, /* Bugi */
  175. /** @stable ICU 3.4 */
  176. USCRIPT_GLAGOLITIC = 56, /* Glag */
  177. /** @stable ICU 3.4 */
  178. USCRIPT_KHAROSHTHI = 57, /* Khar */
  179. /** @stable ICU 3.4 */
  180. USCRIPT_SYLOTI_NAGRI = 58, /* Sylo */
  181. /** @stable ICU 3.4 */
  182. USCRIPT_NEW_TAI_LUE = 59, /* Talu */
  183. /** @stable ICU 3.4 */
  184. USCRIPT_TIFINAGH = 60, /* Tfng */
  185. /** @stable ICU 3.4 */
  186. USCRIPT_OLD_PERSIAN = 61, /* Xpeo */
  187. /* New script codes from Unicode and ISO 15924 */
  188. /** @stable ICU 3.6 */
  189. USCRIPT_BALINESE = 62, /* Bali */
  190. /** @stable ICU 3.6 */
  191. USCRIPT_BATAK = 63, /* Batk */
  192. /** @stable ICU 3.6 */
  193. USCRIPT_BLISSYMBOLS = 64, /* Blis */
  194. /** @stable ICU 3.6 */
  195. USCRIPT_BRAHMI = 65, /* Brah */
  196. /** @stable ICU 3.6 */
  197. USCRIPT_CHAM = 66, /* Cham */
  198. /** @stable ICU 3.6 */
  199. USCRIPT_CIRTH = 67, /* Cirt */
  200. /** @stable ICU 3.6 */
  201. USCRIPT_OLD_CHURCH_SLAVONIC_CYRILLIC = 68, /* Cyrs */
  202. /** @stable ICU 3.6 */
  203. USCRIPT_DEMOTIC_EGYPTIAN = 69, /* Egyd */
  204. /** @stable ICU 3.6 */
  205. USCRIPT_HIERATIC_EGYPTIAN = 70, /* Egyh */
  206. /** @stable ICU 3.6 */
  207. USCRIPT_EGYPTIAN_HIEROGLYPHS = 71, /* Egyp */
  208. /** @stable ICU 3.6 */
  209. USCRIPT_KHUTSURI = 72, /* Geok */
  210. /** @stable ICU 3.6 */
  211. USCRIPT_SIMPLIFIED_HAN = 73, /* Hans */
  212. /** @stable ICU 3.6 */
  213. USCRIPT_TRADITIONAL_HAN = 74, /* Hant */
  214. /** @stable ICU 3.6 */
  215. USCRIPT_PAHAWH_HMONG = 75, /* Hmng */
  216. /** @stable ICU 3.6 */
  217. USCRIPT_OLD_HUNGARIAN = 76, /* Hung */
  218. /** @stable ICU 3.6 */
  219. USCRIPT_HARAPPAN_INDUS = 77, /* Inds */
  220. /** @stable ICU 3.6 */
  221. USCRIPT_JAVANESE = 78, /* Java */
  222. /** @stable ICU 3.6 */
  223. USCRIPT_KAYAH_LI = 79, /* Kali */
  224. /** @stable ICU 3.6 */
  225. USCRIPT_LATIN_FRAKTUR = 80, /* Latf */
  226. /** @stable ICU 3.6 */
  227. USCRIPT_LATIN_GAELIC = 81, /* Latg */
  228. /** @stable ICU 3.6 */
  229. USCRIPT_LEPCHA = 82, /* Lepc */
  230. /** @stable ICU 3.6 */
  231. USCRIPT_LINEAR_A = 83, /* Lina */
  232. /** @stable ICU 4.6 */
  233. USCRIPT_MANDAIC = 84, /* Mand */
  234. /** @stable ICU 3.6 */
  235. USCRIPT_MANDAEAN = USCRIPT_MANDAIC,
  236. /** @stable ICU 3.6 */
  237. USCRIPT_MAYAN_HIEROGLYPHS = 85, /* Maya */
  238. /** @stable ICU 4.6 */
  239. USCRIPT_MEROITIC_HIEROGLYPHS = 86, /* Mero */
  240. /** @stable ICU 3.6 */
  241. USCRIPT_MEROITIC = USCRIPT_MEROITIC_HIEROGLYPHS,
  242. /** @stable ICU 3.6 */
  243. USCRIPT_NKO = 87, /* Nkoo */
  244. /** @stable ICU 3.6 */
  245. USCRIPT_ORKHON = 88, /* Orkh */
  246. /** @stable ICU 3.6 */
  247. USCRIPT_OLD_PERMIC = 89, /* Perm */
  248. /** @stable ICU 3.6 */
  249. USCRIPT_PHAGS_PA = 90, /* Phag */
  250. /** @stable ICU 3.6 */
  251. USCRIPT_PHOENICIAN = 91, /* Phnx */
  252. /** @stable ICU 52 */
  253. USCRIPT_MIAO = 92, /* Plrd */
  254. /** @stable ICU 3.6 */
  255. USCRIPT_PHONETIC_POLLARD = USCRIPT_MIAO,
  256. /** @stable ICU 3.6 */
  257. USCRIPT_RONGORONGO = 93, /* Roro */
  258. /** @stable ICU 3.6 */
  259. USCRIPT_SARATI = 94, /* Sara */
  260. /** @stable ICU 3.6 */
  261. USCRIPT_ESTRANGELO_SYRIAC = 95, /* Syre */
  262. /** @stable ICU 3.6 */
  263. USCRIPT_WESTERN_SYRIAC = 96, /* Syrj */
  264. /** @stable ICU 3.6 */
  265. USCRIPT_EASTERN_SYRIAC = 97, /* Syrn */
  266. /** @stable ICU 3.6 */
  267. USCRIPT_TENGWAR = 98, /* Teng */
  268. /** @stable ICU 3.6 */
  269. USCRIPT_VAI = 99, /* Vaii */
  270. /** @stable ICU 3.6 */
  271. USCRIPT_VISIBLE_SPEECH = 100,/* Visp */
  272. /** @stable ICU 3.6 */
  273. USCRIPT_CUNEIFORM = 101,/* Xsux */
  274. /** @stable ICU 3.6 */
  275. USCRIPT_UNWRITTEN_LANGUAGES = 102,/* Zxxx */
  276. /** @stable ICU 3.6 */
  277. USCRIPT_UNKNOWN = 103,/* Zzzz */ /* Unknown="Code for uncoded script", for unassigned code points */
  278. /** @stable ICU 3.8 */
  279. USCRIPT_CARIAN = 104,/* Cari */
  280. /** @stable ICU 3.8 */
  281. USCRIPT_JAPANESE = 105,/* Jpan */
  282. /** @stable ICU 3.8 */
  283. USCRIPT_LANNA = 106,/* Lana */
  284. /** @stable ICU 3.8 */
  285. USCRIPT_LYCIAN = 107,/* Lyci */
  286. /** @stable ICU 3.8 */
  287. USCRIPT_LYDIAN = 108,/* Lydi */
  288. /** @stable ICU 3.8 */
  289. USCRIPT_OL_CHIKI = 109,/* Olck */
  290. /** @stable ICU 3.8 */
  291. USCRIPT_REJANG = 110,/* Rjng */
  292. /** @stable ICU 3.8 */
  293. USCRIPT_SAURASHTRA = 111,/* Saur */
  294. /** Sutton SignWriting @stable ICU 3.8 */
  295. USCRIPT_SIGN_WRITING = 112,/* Sgnw */
  296. /** @stable ICU 3.8 */
  297. USCRIPT_SUNDANESE = 113,/* Sund */
  298. /** @stable ICU 3.8 */
  299. USCRIPT_MOON = 114,/* Moon */
  300. /** @stable ICU 3.8 */
  301. USCRIPT_MEITEI_MAYEK = 115,/* Mtei */
  302. /** @stable ICU 4.0 */
  303. USCRIPT_IMPERIAL_ARAMAIC = 116,/* Armi */
  304. /** @stable ICU 4.0 */
  305. USCRIPT_AVESTAN = 117,/* Avst */
  306. /** @stable ICU 4.0 */
  307. USCRIPT_CHAKMA = 118,/* Cakm */
  308. /** @stable ICU 4.0 */
  309. USCRIPT_KOREAN = 119,/* Kore */
  310. /** @stable ICU 4.0 */
  311. USCRIPT_KAITHI = 120,/* Kthi */
  312. /** @stable ICU 4.0 */
  313. USCRIPT_MANICHAEAN = 121,/* Mani */
  314. /** @stable ICU 4.0 */
  315. USCRIPT_INSCRIPTIONAL_PAHLAVI = 122,/* Phli */
  316. /** @stable ICU 4.0 */
  317. USCRIPT_PSALTER_PAHLAVI = 123,/* Phlp */
  318. /** @stable ICU 4.0 */
  319. USCRIPT_BOOK_PAHLAVI = 124,/* Phlv */
  320. /** @stable ICU 4.0 */
  321. USCRIPT_INSCRIPTIONAL_PARTHIAN = 125,/* Prti */
  322. /** @stable ICU 4.0 */
  323. USCRIPT_SAMARITAN = 126,/* Samr */
  324. /** @stable ICU 4.0 */
  325. USCRIPT_TAI_VIET = 127,/* Tavt */
  326. /** @stable ICU 4.0 */
  327. USCRIPT_MATHEMATICAL_NOTATION = 128,/* Zmth */
  328. /** @stable ICU 4.0 */
  329. USCRIPT_SYMBOLS = 129,/* Zsym */
  330. /** @stable ICU 4.4 */
  331. USCRIPT_BAMUM = 130,/* Bamu */
  332. /** @stable ICU 4.4 */
  333. USCRIPT_LISU = 131,/* Lisu */
  334. /** @stable ICU 4.4 */
  335. USCRIPT_NAKHI_GEBA = 132,/* Nkgb */
  336. /** @stable ICU 4.4 */
  337. USCRIPT_OLD_SOUTH_ARABIAN = 133,/* Sarb */
  338. /** @stable ICU 4.6 */
  339. USCRIPT_BASSA_VAH = 134,/* Bass */
  340. /** @stable ICU 54 */
  341. USCRIPT_DUPLOYAN = 135,/* Dupl */
  342. #ifndef U_HIDE_DEPRECATED_API
  343. /** @deprecated ICU 54 Typo, use USCRIPT_DUPLOYAN */
  344. USCRIPT_DUPLOYAN_SHORTAND = USCRIPT_DUPLOYAN,
  345. #endif /* U_HIDE_DEPRECATED_API */
  346. /** @stable ICU 4.6 */
  347. USCRIPT_ELBASAN = 136,/* Elba */
  348. /** @stable ICU 4.6 */
  349. USCRIPT_GRANTHA = 137,/* Gran */
  350. /** @stable ICU 4.6 */
  351. USCRIPT_KPELLE = 138,/* Kpel */
  352. /** @stable ICU 4.6 */
  353. USCRIPT_LOMA = 139,/* Loma */
  354. /** Mende Kikakui @stable ICU 4.6 */
  355. USCRIPT_MENDE = 140,/* Mend */
  356. /** @stable ICU 4.6 */
  357. USCRIPT_MEROITIC_CURSIVE = 141,/* Merc */
  358. /** @stable ICU 4.6 */
  359. USCRIPT_OLD_NORTH_ARABIAN = 142,/* Narb */
  360. /** @stable ICU 4.6 */
  361. USCRIPT_NABATAEAN = 143,/* Nbat */
  362. /** @stable ICU 4.6 */
  363. USCRIPT_PALMYRENE = 144,/* Palm */
  364. /** @stable ICU 54 */
  365. USCRIPT_KHUDAWADI = 145,/* Sind */
  366. /** @stable ICU 4.6 */
  367. USCRIPT_SINDHI = USCRIPT_KHUDAWADI,
  368. /** @stable ICU 4.6 */
  369. USCRIPT_WARANG_CITI = 146,/* Wara */
  370. /** @stable ICU 4.8 */
  371. USCRIPT_AFAKA = 147,/* Afak */
  372. /** @stable ICU 4.8 */
  373. USCRIPT_JURCHEN = 148,/* Jurc */
  374. /** @stable ICU 4.8 */
  375. USCRIPT_MRO = 149,/* Mroo */
  376. /** @stable ICU 4.8 */
  377. USCRIPT_NUSHU = 150,/* Nshu */
  378. /** @stable ICU 4.8 */
  379. USCRIPT_SHARADA = 151,/* Shrd */
  380. /** @stable ICU 4.8 */
  381. USCRIPT_SORA_SOMPENG = 152,/* Sora */
  382. /** @stable ICU 4.8 */
  383. USCRIPT_TAKRI = 153,/* Takr */
  384. /** @stable ICU 4.8 */
  385. USCRIPT_TANGUT = 154,/* Tang */
  386. /** @stable ICU 4.8 */
  387. USCRIPT_WOLEAI = 155,/* Wole */
  388. /** @stable ICU 49 */
  389. USCRIPT_ANATOLIAN_HIEROGLYPHS = 156,/* Hluw */
  390. /** @stable ICU 49 */
  391. USCRIPT_KHOJKI = 157,/* Khoj */
  392. /** @stable ICU 49 */
  393. USCRIPT_TIRHUTA = 158,/* Tirh */
  394. /** @stable ICU 52 */
  395. USCRIPT_CAUCASIAN_ALBANIAN = 159,/* Aghb */
  396. /** @stable ICU 52 */
  397. USCRIPT_MAHAJANI = 160,/* Mahj */
  398. /** @stable ICU 54 */
  399. USCRIPT_AHOM = 161,/* Ahom */
  400. /** @stable ICU 54 */
  401. USCRIPT_HATRAN = 162,/* Hatr */
  402. /** @stable ICU 54 */
  403. USCRIPT_MODI = 163,/* Modi */
  404. /** @stable ICU 54 */
  405. USCRIPT_MULTANI = 164,/* Mult */
  406. /** @stable ICU 54 */
  407. USCRIPT_PAU_CIN_HAU = 165,/* Pauc */
  408. /** @stable ICU 54 */
  409. USCRIPT_SIDDHAM = 166,/* Sidd */
  410. /**
  411. * One higher than the last script code constant.
  412. * This value increases as constants for script codes are added.
  413. *
  414. * There are constants for Unicode 7 script property values.
  415. * There are constants for ISO 15924 script codes assigned on or before 2013-10-12.
  416. * There are no constants for private use codes from Qaaa - Qabx
  417. * except as used in the UCD.
  418. *
  419. * @stable ICU 2.2
  420. */
  421. USCRIPT_CODE_LIMIT = 167
  422. } UScriptCode;
  423. /**
  424. * Gets the script codes associated with the given locale or ISO 15924 abbreviation or name.
  425. * Fills in USCRIPT_MALAYALAM given "Malayam" OR "Mlym".
  426. * Fills in USCRIPT_LATIN given "en" OR "en_US"
  427. * If the required capacity is greater than the capacity of the destination buffer,
  428. * then the error code is set to U_BUFFER_OVERFLOW_ERROR and the required capacity is returned.
  429. *
  430. * <p>Note: To search by short or long script alias only, use
  431. * u_getPropertyValueEnum(UCHAR_SCRIPT, alias) instead. That does
  432. * a fast lookup with no access of the locale data.
  433. *
  434. * @param nameOrAbbrOrLocale name of the script, as given in
  435. * PropertyValueAliases.txt, or ISO 15924 code or locale
  436. * @param fillIn the UScriptCode buffer to fill in the script code
  437. * @param capacity the capacity (size) fo UScriptCode buffer passed in.
  438. * @param err the error status code.
  439. * @return The number of script codes filled in the buffer passed in
  440. * @stable ICU 2.4
  441. */
  442. U_STABLE int32_t U_EXPORT2
  443. uscript_getCode(const char* nameOrAbbrOrLocale,UScriptCode* fillIn,int32_t capacity,UErrorCode *err);
  444. /**
  445. * Returns the long Unicode script name, if there is one.
  446. * Otherwise returns the 4-letter ISO 15924 script code.
  447. * Returns "Malayam" given USCRIPT_MALAYALAM.
  448. *
  449. * @param scriptCode UScriptCode enum
  450. * @return long script name as given in PropertyValueAliases.txt, or the 4-letter code,
  451. * or NULL if scriptCode is invalid
  452. * @stable ICU 2.4
  453. */
  454. U_STABLE const char* U_EXPORT2
  455. uscript_getName(UScriptCode scriptCode);
  456. /**
  457. * Returns the 4-letter ISO 15924 script code,
  458. * which is the same as the short Unicode script name if Unicode has names for the script.
  459. * Returns "Mlym" given USCRIPT_MALAYALAM.
  460. *
  461. * @param scriptCode UScriptCode enum
  462. * @return short script name (4-letter code), or NULL if scriptCode is invalid
  463. * @stable ICU 2.4
  464. */
  465. U_STABLE const char* U_EXPORT2
  466. uscript_getShortName(UScriptCode scriptCode);
  467. /**
  468. * Gets the script code associated with the given codepoint.
  469. * Returns USCRIPT_MALAYALAM given 0x0D02
  470. * @param codepoint UChar32 codepoint
  471. * @param err the error status code.
  472. * @return The UScriptCode, or 0 if codepoint is invalid
  473. * @stable ICU 2.4
  474. */
  475. U_STABLE UScriptCode U_EXPORT2
  476. uscript_getScript(UChar32 codepoint, UErrorCode *err);
  477. /**
  478. * Do the Script_Extensions of code point c contain script sc?
  479. * If c does not have explicit Script_Extensions, then this tests whether
  480. * c has the Script property value sc.
  481. *
  482. * Some characters are commonly used in multiple scripts.
  483. * For more information, see UAX #24: http://www.unicode.org/reports/tr24/.
  484. *
  485. * The Script_Extensions property is provisional. It may be modified or removed
  486. * in future versions of the Unicode Standard, and thus in ICU.
  487. * @param c code point
  488. * @param sc script code
  489. * @return TRUE if sc is in Script_Extensions(c)
  490. * @stable ICU 49
  491. */
  492. U_STABLE UBool U_EXPORT2
  493. uscript_hasScript(UChar32 c, UScriptCode sc);
  494. /**
  495. * Writes code point c's Script_Extensions as a list of UScriptCode values
  496. * to the output scripts array and returns the number of script codes.
  497. * - If c does have Script_Extensions, then the Script property value
  498. * (normally Common or Inherited) is not included.
  499. * - If c does not have Script_Extensions, then the one Script code is written to the output array.
  500. * - If c is not a valid code point, then the one USCRIPT_UNKNOWN code is written.
  501. * In other words, if the return value is 1,
  502. * then the output array contains exactly c's single Script code.
  503. * If the return value is n>=2, then the output array contains c's n Script_Extensions script codes.
  504. *
  505. * Some characters are commonly used in multiple scripts.
  506. * For more information, see UAX #24: http://www.unicode.org/reports/tr24/.
  507. *
  508. * If there are more than capacity script codes to be written, then
  509. * U_BUFFER_OVERFLOW_ERROR is set and the number of Script_Extensions is returned.
  510. * (Usual ICU buffer handling behavior.)
  511. *
  512. * The Script_Extensions property is provisional. It may be modified or removed
  513. * in future versions of the Unicode Standard, and thus in ICU.
  514. * @param c code point
  515. * @param scripts output script code array
  516. * @param capacity capacity of the scripts array
  517. * @param errorCode Standard ICU error code. Its input value must
  518. * pass the U_SUCCESS() test, or else the function returns
  519. * immediately. Check for U_FAILURE() on output or use with
  520. * function chaining. (See User Guide for details.)
  521. * @return number of script codes in c's Script_Extensions, or 1 for the single Script value,
  522. * written to scripts unless U_BUFFER_OVERFLOW_ERROR indicates insufficient capacity
  523. * @stable ICU 49
  524. */
  525. U_STABLE int32_t U_EXPORT2
  526. uscript_getScriptExtensions(UChar32 c,
  527. UScriptCode *scripts, int32_t capacity,
  528. UErrorCode *errorCode);
  529. /**
  530. * Script usage constants.
  531. * See UAX #31 Unicode Identifier and Pattern Syntax.
  532. * http://www.unicode.org/reports/tr31/#Table_Candidate_Characters_for_Exclusion_from_Identifiers
  533. *
  534. * @stable ICU 51
  535. */
  536. typedef enum UScriptUsage {
  537. /** Not encoded in Unicode. @stable ICU 51 */
  538. USCRIPT_USAGE_NOT_ENCODED,
  539. /** Unknown script usage. @stable ICU 51 */
  540. USCRIPT_USAGE_UNKNOWN,
  541. /** Candidate for Exclusion from Identifiers. @stable ICU 51 */
  542. USCRIPT_USAGE_EXCLUDED,
  543. /** Limited Use script. @stable ICU 51 */
  544. USCRIPT_USAGE_LIMITED_USE,
  545. /** Aspirational Use script. @stable ICU 51 */
  546. USCRIPT_USAGE_ASPIRATIONAL,
  547. /** Recommended script. @stable ICU 51 */
  548. USCRIPT_USAGE_RECOMMENDED
  549. } UScriptUsage;
  550. /**
  551. * Writes the script sample character string.
  552. * This string normally consists of one code point but might be longer.
  553. * The string is empty if the script is not encoded.
  554. *
  555. * @param script script code
  556. * @param dest output string array
  557. * @param capacity number of UChars in the dest array
  558. * @param pErrorCode standard ICU in/out error code, must pass U_SUCCESS() on input
  559. * @return the string length, even if U_BUFFER_OVERFLOW_ERROR
  560. * @stable ICU 51
  561. */
  562. U_STABLE int32_t U_EXPORT2
  563. uscript_getSampleString(UScriptCode script, UChar *dest, int32_t capacity, UErrorCode *pErrorCode);
  564. #if U_SHOW_CPLUSPLUS_API
  565. U_NAMESPACE_BEGIN
  566. class UnicodeString;
  567. U_NAMESPACE_END
  568. /**
  569. * Returns the script sample character string.
  570. * This string normally consists of one code point but might be longer.
  571. * The string is empty if the script is not encoded.
  572. *
  573. * @param script script code
  574. * @return the sample character string
  575. * @stable ICU 51
  576. */
  577. U_COMMON_API icu::UnicodeString U_EXPORT2
  578. uscript_getSampleUnicodeString(UScriptCode script);
  579. #endif
  580. /**
  581. * Returns the script usage according to UAX #31 Unicode Identifier and Pattern Syntax.
  582. * Returns USCRIPT_USAGE_NOT_ENCODED if the script is not encoded in Unicode.
  583. *
  584. * @param script script code
  585. * @return script usage
  586. * @see UScriptUsage
  587. * @stable ICU 51
  588. */
  589. U_STABLE UScriptUsage U_EXPORT2
  590. uscript_getUsage(UScriptCode script);
  591. /**
  592. * Returns TRUE if the script is written right-to-left.
  593. * For example, Arab and Hebr.
  594. *
  595. * @param script script code
  596. * @return TRUE if the script is right-to-left
  597. * @stable ICU 51
  598. */
  599. U_STABLE UBool U_EXPORT2
  600. uscript_isRightToLeft(UScriptCode script);
  601. /**
  602. * Returns TRUE if the script allows line breaks between letters (excluding hyphenation).
  603. * Such a script typically requires dictionary-based line breaking.
  604. * For example, Hani and Thai.
  605. *
  606. * @param script script code
  607. * @return TRUE if the script allows line breaks between letters
  608. * @stable ICU 51
  609. */
  610. U_STABLE UBool U_EXPORT2
  611. uscript_breaksBetweenLetters(UScriptCode script);
  612. /**
  613. * Returns TRUE if in modern (or most recent) usage of the script case distinctions are customary.
  614. * For example, Latn and Cyrl.
  615. *
  616. * @param script script code
  617. * @return TRUE if the script is cased
  618. * @stable ICU 51
  619. */
  620. U_STABLE UBool U_EXPORT2
  621. uscript_isCased(UScriptCode script);
  622. #endif