hb-unicode.h 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471
  1. /*
  2. * Copyright © 2009 Red Hat, Inc.
  3. * Copyright © 2011 Codethink Limited
  4. * Copyright © 2011,2012 Google, Inc.
  5. *
  6. * This is part of HarfBuzz, a text shaping library.
  7. *
  8. * Permission is hereby granted, without written agreement and without
  9. * license or royalty fees, to use, copy, modify, and distribute this
  10. * software and its documentation for any purpose, provided that the
  11. * above copyright notice and the following two paragraphs appear in
  12. * all copies of this software.
  13. *
  14. * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
  15. * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
  16. * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
  17. * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
  18. * DAMAGE.
  19. *
  20. * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
  21. * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
  22. * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
  23. * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
  24. * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
  25. *
  26. * Red Hat Author(s): Behdad Esfahbod
  27. * Codethink Author(s): Ryan Lortie
  28. * Google Author(s): Behdad Esfahbod
  29. */
  30. #ifndef HB_H_IN
  31. #error "Include <hb.h> instead."
  32. #endif
  33. #ifndef HB_UNICODE_H
  34. #define HB_UNICODE_H
  35. #include "hb-common.h"
  36. HB_BEGIN_DECLS
  37. /* hb_unicode_general_category_t */
  38. /* Unicode Character Database property: General_Category (gc) */
  39. typedef enum
  40. {
  41. HB_UNICODE_GENERAL_CATEGORY_CONTROL, /* Cc */
  42. HB_UNICODE_GENERAL_CATEGORY_FORMAT, /* Cf */
  43. HB_UNICODE_GENERAL_CATEGORY_UNASSIGNED, /* Cn */
  44. HB_UNICODE_GENERAL_CATEGORY_PRIVATE_USE, /* Co */
  45. HB_UNICODE_GENERAL_CATEGORY_SURROGATE, /* Cs */
  46. HB_UNICODE_GENERAL_CATEGORY_LOWERCASE_LETTER, /* Ll */
  47. HB_UNICODE_GENERAL_CATEGORY_MODIFIER_LETTER, /* Lm */
  48. HB_UNICODE_GENERAL_CATEGORY_OTHER_LETTER, /* Lo */
  49. HB_UNICODE_GENERAL_CATEGORY_TITLECASE_LETTER, /* Lt */
  50. HB_UNICODE_GENERAL_CATEGORY_UPPERCASE_LETTER, /* Lu */
  51. HB_UNICODE_GENERAL_CATEGORY_SPACING_MARK, /* Mc */
  52. HB_UNICODE_GENERAL_CATEGORY_ENCLOSING_MARK, /* Me */
  53. HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK, /* Mn */
  54. HB_UNICODE_GENERAL_CATEGORY_DECIMAL_NUMBER, /* Nd */
  55. HB_UNICODE_GENERAL_CATEGORY_LETTER_NUMBER, /* Nl */
  56. HB_UNICODE_GENERAL_CATEGORY_OTHER_NUMBER, /* No */
  57. HB_UNICODE_GENERAL_CATEGORY_CONNECT_PUNCTUATION, /* Pc */
  58. HB_UNICODE_GENERAL_CATEGORY_DASH_PUNCTUATION, /* Pd */
  59. HB_UNICODE_GENERAL_CATEGORY_CLOSE_PUNCTUATION, /* Pe */
  60. HB_UNICODE_GENERAL_CATEGORY_FINAL_PUNCTUATION, /* Pf */
  61. HB_UNICODE_GENERAL_CATEGORY_INITIAL_PUNCTUATION, /* Pi */
  62. HB_UNICODE_GENERAL_CATEGORY_OTHER_PUNCTUATION, /* Po */
  63. HB_UNICODE_GENERAL_CATEGORY_OPEN_PUNCTUATION, /* Ps */
  64. HB_UNICODE_GENERAL_CATEGORY_CURRENCY_SYMBOL, /* Sc */
  65. HB_UNICODE_GENERAL_CATEGORY_MODIFIER_SYMBOL, /* Sk */
  66. HB_UNICODE_GENERAL_CATEGORY_MATH_SYMBOL, /* Sm */
  67. HB_UNICODE_GENERAL_CATEGORY_OTHER_SYMBOL, /* So */
  68. HB_UNICODE_GENERAL_CATEGORY_LINE_SEPARATOR, /* Zl */
  69. HB_UNICODE_GENERAL_CATEGORY_PARAGRAPH_SEPARATOR, /* Zp */
  70. HB_UNICODE_GENERAL_CATEGORY_SPACE_SEPARATOR /* Zs */
  71. } hb_unicode_general_category_t;
  72. /* hb_unicode_combining_class_t */
  73. /* Note: newer versions of Unicode may add new values. Clients should be ready to handle
  74. * any value in the 0..254 range being returned from hb_unicode_combining_class().
  75. */
  76. /* Unicode Character Database property: Canonical_Combining_Class (ccc) */
  77. typedef enum
  78. {
  79. HB_UNICODE_COMBINING_CLASS_NOT_REORDERED = 0,
  80. HB_UNICODE_COMBINING_CLASS_OVERLAY = 1,
  81. HB_UNICODE_COMBINING_CLASS_NUKTA = 7,
  82. HB_UNICODE_COMBINING_CLASS_KANA_VOICING = 8,
  83. HB_UNICODE_COMBINING_CLASS_VIRAMA = 9,
  84. /* Hebrew */
  85. HB_UNICODE_COMBINING_CLASS_CCC10 = 10,
  86. HB_UNICODE_COMBINING_CLASS_CCC11 = 11,
  87. HB_UNICODE_COMBINING_CLASS_CCC12 = 12,
  88. HB_UNICODE_COMBINING_CLASS_CCC13 = 13,
  89. HB_UNICODE_COMBINING_CLASS_CCC14 = 14,
  90. HB_UNICODE_COMBINING_CLASS_CCC15 = 15,
  91. HB_UNICODE_COMBINING_CLASS_CCC16 = 16,
  92. HB_UNICODE_COMBINING_CLASS_CCC17 = 17,
  93. HB_UNICODE_COMBINING_CLASS_CCC18 = 18,
  94. HB_UNICODE_COMBINING_CLASS_CCC19 = 19,
  95. HB_UNICODE_COMBINING_CLASS_CCC20 = 20,
  96. HB_UNICODE_COMBINING_CLASS_CCC21 = 21,
  97. HB_UNICODE_COMBINING_CLASS_CCC22 = 22,
  98. HB_UNICODE_COMBINING_CLASS_CCC23 = 23,
  99. HB_UNICODE_COMBINING_CLASS_CCC24 = 24,
  100. HB_UNICODE_COMBINING_CLASS_CCC25 = 25,
  101. HB_UNICODE_COMBINING_CLASS_CCC26 = 26,
  102. /* Arabic */
  103. HB_UNICODE_COMBINING_CLASS_CCC27 = 27,
  104. HB_UNICODE_COMBINING_CLASS_CCC28 = 28,
  105. HB_UNICODE_COMBINING_CLASS_CCC29 = 29,
  106. HB_UNICODE_COMBINING_CLASS_CCC30 = 30,
  107. HB_UNICODE_COMBINING_CLASS_CCC31 = 31,
  108. HB_UNICODE_COMBINING_CLASS_CCC32 = 32,
  109. HB_UNICODE_COMBINING_CLASS_CCC33 = 33,
  110. HB_UNICODE_COMBINING_CLASS_CCC34 = 34,
  111. HB_UNICODE_COMBINING_CLASS_CCC35 = 35,
  112. /* Syriac */
  113. HB_UNICODE_COMBINING_CLASS_CCC36 = 36,
  114. /* Telugu */
  115. HB_UNICODE_COMBINING_CLASS_CCC84 = 84,
  116. HB_UNICODE_COMBINING_CLASS_CCC91 = 91,
  117. /* Thai */
  118. HB_UNICODE_COMBINING_CLASS_CCC103 = 103,
  119. HB_UNICODE_COMBINING_CLASS_CCC107 = 107,
  120. /* Lao */
  121. HB_UNICODE_COMBINING_CLASS_CCC118 = 118,
  122. HB_UNICODE_COMBINING_CLASS_CCC122 = 122,
  123. /* Tibetan */
  124. HB_UNICODE_COMBINING_CLASS_CCC129 = 129,
  125. HB_UNICODE_COMBINING_CLASS_CCC130 = 130,
  126. HB_UNICODE_COMBINING_CLASS_CCC133 = 132,
  127. HB_UNICODE_COMBINING_CLASS_ATTACHED_BELOW_LEFT = 200,
  128. HB_UNICODE_COMBINING_CLASS_ATTACHED_BELOW = 202,
  129. HB_UNICODE_COMBINING_CLASS_ATTACHED_ABOVE = 214,
  130. HB_UNICODE_COMBINING_CLASS_ATTACHED_ABOVE_RIGHT = 216,
  131. HB_UNICODE_COMBINING_CLASS_BELOW_LEFT = 218,
  132. HB_UNICODE_COMBINING_CLASS_BELOW = 220,
  133. HB_UNICODE_COMBINING_CLASS_BELOW_RIGHT = 222,
  134. HB_UNICODE_COMBINING_CLASS_LEFT = 224,
  135. HB_UNICODE_COMBINING_CLASS_RIGHT = 226,
  136. HB_UNICODE_COMBINING_CLASS_ABOVE_LEFT = 228,
  137. HB_UNICODE_COMBINING_CLASS_ABOVE = 230,
  138. HB_UNICODE_COMBINING_CLASS_ABOVE_RIGHT = 232,
  139. HB_UNICODE_COMBINING_CLASS_DOUBLE_BELOW = 233,
  140. HB_UNICODE_COMBINING_CLASS_DOUBLE_ABOVE = 234,
  141. HB_UNICODE_COMBINING_CLASS_IOTA_SUBSCRIPT = 240,
  142. HB_UNICODE_COMBINING_CLASS_INVALID = 255
  143. } hb_unicode_combining_class_t;
  144. /*
  145. * hb_unicode_funcs_t
  146. */
  147. typedef struct hb_unicode_funcs_t hb_unicode_funcs_t;
  148. /*
  149. * just give me the best implementation you've got there.
  150. */
  151. HB_EXTERN hb_unicode_funcs_t *
  152. hb_unicode_funcs_get_default (void);
  153. HB_EXTERN hb_unicode_funcs_t *
  154. hb_unicode_funcs_create (hb_unicode_funcs_t *parent);
  155. HB_EXTERN hb_unicode_funcs_t *
  156. hb_unicode_funcs_get_empty (void);
  157. HB_EXTERN hb_unicode_funcs_t *
  158. hb_unicode_funcs_reference (hb_unicode_funcs_t *ufuncs);
  159. HB_EXTERN void
  160. hb_unicode_funcs_destroy (hb_unicode_funcs_t *ufuncs);
  161. HB_EXTERN hb_bool_t
  162. hb_unicode_funcs_set_user_data (hb_unicode_funcs_t *ufuncs,
  163. hb_user_data_key_t *key,
  164. void * data,
  165. hb_destroy_func_t destroy,
  166. hb_bool_t replace);
  167. HB_EXTERN void *
  168. hb_unicode_funcs_get_user_data (hb_unicode_funcs_t *ufuncs,
  169. hb_user_data_key_t *key);
  170. HB_EXTERN void
  171. hb_unicode_funcs_make_immutable (hb_unicode_funcs_t *ufuncs);
  172. HB_EXTERN hb_bool_t
  173. hb_unicode_funcs_is_immutable (hb_unicode_funcs_t *ufuncs);
  174. HB_EXTERN hb_unicode_funcs_t *
  175. hb_unicode_funcs_get_parent (hb_unicode_funcs_t *ufuncs);
  176. /*
  177. * funcs
  178. */
  179. /* typedefs */
  180. typedef hb_unicode_combining_class_t (*hb_unicode_combining_class_func_t) (hb_unicode_funcs_t *ufuncs,
  181. hb_codepoint_t unicode,
  182. void *user_data);
  183. typedef unsigned int (*hb_unicode_eastasian_width_func_t) (hb_unicode_funcs_t *ufuncs,
  184. hb_codepoint_t unicode,
  185. void *user_data);
  186. typedef hb_unicode_general_category_t (*hb_unicode_general_category_func_t) (hb_unicode_funcs_t *ufuncs,
  187. hb_codepoint_t unicode,
  188. void *user_data);
  189. typedef hb_codepoint_t (*hb_unicode_mirroring_func_t) (hb_unicode_funcs_t *ufuncs,
  190. hb_codepoint_t unicode,
  191. void *user_data);
  192. typedef hb_script_t (*hb_unicode_script_func_t) (hb_unicode_funcs_t *ufuncs,
  193. hb_codepoint_t unicode,
  194. void *user_data);
  195. typedef hb_bool_t (*hb_unicode_compose_func_t) (hb_unicode_funcs_t *ufuncs,
  196. hb_codepoint_t a,
  197. hb_codepoint_t b,
  198. hb_codepoint_t *ab,
  199. void *user_data);
  200. typedef hb_bool_t (*hb_unicode_decompose_func_t) (hb_unicode_funcs_t *ufuncs,
  201. hb_codepoint_t ab,
  202. hb_codepoint_t *a,
  203. hb_codepoint_t *b,
  204. void *user_data);
  205. /**
  206. * hb_unicode_decompose_compatibility_func_t:
  207. * @ufuncs: a Unicode function structure
  208. * @u: codepoint to decompose
  209. * @decomposed: address of codepoint array (of length %HB_UNICODE_MAX_DECOMPOSITION_LEN) to write decomposition into
  210. * @user_data: user data pointer as passed to hb_unicode_funcs_set_decompose_compatibility_func()
  211. *
  212. * Fully decompose @u to its Unicode compatibility decomposition. The codepoints of the decomposition will be written to @decomposed.
  213. * The complete length of the decomposition will be returned.
  214. *
  215. * If @u has no compatibility decomposition, zero should be returned.
  216. *
  217. * The Unicode standard guarantees that a buffer of length %HB_UNICODE_MAX_DECOMPOSITION_LEN codepoints will always be sufficient for any
  218. * compatibility decomposition plus an terminating value of 0. Consequently, @decompose must be allocated by the caller to be at least this length. Implementations
  219. * of this function type must ensure that they do not write past the provided array.
  220. *
  221. * Return value: number of codepoints in the full compatibility decomposition of @u, or 0 if no decomposition available.
  222. */
  223. typedef unsigned int (*hb_unicode_decompose_compatibility_func_t) (hb_unicode_funcs_t *ufuncs,
  224. hb_codepoint_t u,
  225. hb_codepoint_t *decomposed,
  226. void *user_data);
  227. /* See Unicode 6.1 for details on the maximum decomposition length. */
  228. #define HB_UNICODE_MAX_DECOMPOSITION_LEN (18+1) /* codepoints */
  229. /* setters */
  230. /**
  231. * hb_unicode_funcs_set_combining_class_func:
  232. * @ufuncs: a Unicode function structure
  233. * @func: (closure user_data) (destroy destroy) (scope notified):
  234. * @user_data:
  235. * @destroy:
  236. *
  237. *
  238. *
  239. * Since: 0.9.2
  240. **/
  241. HB_EXTERN void
  242. hb_unicode_funcs_set_combining_class_func (hb_unicode_funcs_t *ufuncs,
  243. hb_unicode_combining_class_func_t func,
  244. void *user_data, hb_destroy_func_t destroy);
  245. /**
  246. * hb_unicode_funcs_set_eastasian_width_func:
  247. * @ufuncs: a Unicode function structure
  248. * @func: (closure user_data) (destroy destroy) (scope notified):
  249. * @user_data:
  250. * @destroy:
  251. *
  252. *
  253. *
  254. * Since: 0.9.2
  255. **/
  256. HB_EXTERN void
  257. hb_unicode_funcs_set_eastasian_width_func (hb_unicode_funcs_t *ufuncs,
  258. hb_unicode_eastasian_width_func_t func,
  259. void *user_data, hb_destroy_func_t destroy);
  260. /**
  261. * hb_unicode_funcs_set_general_category_func:
  262. * @ufuncs: a Unicode function structure
  263. * @func: (closure user_data) (destroy destroy) (scope notified):
  264. * @user_data:
  265. * @destroy:
  266. *
  267. *
  268. *
  269. * Since: 0.9.2
  270. **/
  271. HB_EXTERN void
  272. hb_unicode_funcs_set_general_category_func (hb_unicode_funcs_t *ufuncs,
  273. hb_unicode_general_category_func_t func,
  274. void *user_data, hb_destroy_func_t destroy);
  275. /**
  276. * hb_unicode_funcs_set_mirroring_func:
  277. * @ufuncs: a Unicode function structure
  278. * @func: (closure user_data) (destroy destroy) (scope notified):
  279. * @user_data:
  280. * @destroy:
  281. *
  282. *
  283. *
  284. * Since: 0.9.2
  285. **/
  286. HB_EXTERN void
  287. hb_unicode_funcs_set_mirroring_func (hb_unicode_funcs_t *ufuncs,
  288. hb_unicode_mirroring_func_t func,
  289. void *user_data, hb_destroy_func_t destroy);
  290. /**
  291. * hb_unicode_funcs_set_script_func:
  292. * @ufuncs: a Unicode function structure
  293. * @func: (closure user_data) (destroy destroy) (scope notified):
  294. * @user_data:
  295. * @destroy:
  296. *
  297. *
  298. *
  299. * Since: 0.9.2
  300. **/
  301. HB_EXTERN void
  302. hb_unicode_funcs_set_script_func (hb_unicode_funcs_t *ufuncs,
  303. hb_unicode_script_func_t func,
  304. void *user_data, hb_destroy_func_t destroy);
  305. /**
  306. * hb_unicode_funcs_set_compose_func:
  307. * @ufuncs: a Unicode function structure
  308. * @func: (closure user_data) (destroy destroy) (scope notified):
  309. * @user_data:
  310. * @destroy:
  311. *
  312. *
  313. *
  314. * Since: 0.9.2
  315. **/
  316. HB_EXTERN void
  317. hb_unicode_funcs_set_compose_func (hb_unicode_funcs_t *ufuncs,
  318. hb_unicode_compose_func_t func,
  319. void *user_data, hb_destroy_func_t destroy);
  320. /**
  321. * hb_unicode_funcs_set_decompose_func:
  322. * @ufuncs: a Unicode function structure
  323. * @func: (closure user_data) (destroy destroy) (scope notified):
  324. * @user_data:
  325. * @destroy:
  326. *
  327. *
  328. *
  329. * Since: 0.9.2
  330. **/
  331. HB_EXTERN void
  332. hb_unicode_funcs_set_decompose_func (hb_unicode_funcs_t *ufuncs,
  333. hb_unicode_decompose_func_t func,
  334. void *user_data, hb_destroy_func_t destroy);
  335. /**
  336. * hb_unicode_funcs_set_decompose_compatibility_func:
  337. * @ufuncs: a Unicode function structure
  338. * @func: (closure user_data) (destroy destroy) (scope notified):
  339. * @user_data:
  340. * @destroy:
  341. *
  342. *
  343. *
  344. * Since: 0.9.2
  345. **/
  346. HB_EXTERN void
  347. hb_unicode_funcs_set_decompose_compatibility_func (hb_unicode_funcs_t *ufuncs,
  348. hb_unicode_decompose_compatibility_func_t func,
  349. void *user_data, hb_destroy_func_t destroy);
  350. /* accessors */
  351. /**
  352. * hb_unicode_combining_class:
  353. *
  354. * Since: 0.9.2
  355. **/
  356. HB_EXTERN hb_unicode_combining_class_t
  357. hb_unicode_combining_class (hb_unicode_funcs_t *ufuncs,
  358. hb_codepoint_t unicode);
  359. /**
  360. * hb_unicode_eastasian_width:
  361. *
  362. * Since: 0.9.2
  363. **/
  364. HB_EXTERN unsigned int
  365. hb_unicode_eastasian_width (hb_unicode_funcs_t *ufuncs,
  366. hb_codepoint_t unicode);
  367. /**
  368. * hb_unicode_general_category:
  369. *
  370. * Since: 0.9.2
  371. **/
  372. HB_EXTERN hb_unicode_general_category_t
  373. hb_unicode_general_category (hb_unicode_funcs_t *ufuncs,
  374. hb_codepoint_t unicode);
  375. /**
  376. * hb_unicode_mirroring:
  377. *
  378. * Since: 0.9.2
  379. **/
  380. HB_EXTERN hb_codepoint_t
  381. hb_unicode_mirroring (hb_unicode_funcs_t *ufuncs,
  382. hb_codepoint_t unicode);
  383. /**
  384. * hb_unicode_script:
  385. *
  386. * Since: 0.9.2
  387. **/
  388. HB_EXTERN hb_script_t
  389. hb_unicode_script (hb_unicode_funcs_t *ufuncs,
  390. hb_codepoint_t unicode);
  391. HB_EXTERN hb_bool_t
  392. hb_unicode_compose (hb_unicode_funcs_t *ufuncs,
  393. hb_codepoint_t a,
  394. hb_codepoint_t b,
  395. hb_codepoint_t *ab);
  396. HB_EXTERN hb_bool_t
  397. hb_unicode_decompose (hb_unicode_funcs_t *ufuncs,
  398. hb_codepoint_t ab,
  399. hb_codepoint_t *a,
  400. hb_codepoint_t *b);
  401. HB_EXTERN unsigned int
  402. hb_unicode_decompose_compatibility (hb_unicode_funcs_t *ufuncs,
  403. hb_codepoint_t u,
  404. hb_codepoint_t *decomposed);
  405. HB_END_DECLS
  406. #endif /* HB_UNICODE_H */