pango-break.h 6.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173
  1. /* Pango
  2. * pango-break.h:
  3. *
  4. * Copyright (C) 1999 Red Hat Software
  5. *
  6. * This library is free software; you can redistribute it and/or
  7. * modify it under the terms of the GNU Library General Public
  8. * License as published by the Free Software Foundation; either
  9. * version 2 of the License, or (at your option) any later version.
  10. *
  11. * This library is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14. * Library General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU Library General Public
  17. * License along with this library; if not, write to the
  18. * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  19. * Boston, MA 02111-1307, USA.
  20. */
  21. #ifndef __PANGO_BREAK_H__
  22. #define __PANGO_BREAK_H__
  23. #include <glib.h>
  24. G_BEGIN_DECLS
  25. #include <pango/pango-item.h>
  26. /* Logical attributes of a character.
  27. */
  28. /**
  29. * PangoLogAttr:
  30. * @is_line_break: if set, can break line in front of character
  31. * @is_mandatory_break: if set, must break line in front of character
  32. * @is_char_break: if set, can break here when doing character wrapping
  33. * @is_white: is whitespace character
  34. * @is_cursor_position: if set, cursor can appear in front of character.
  35. * i.e. this is a grapheme boundary, or the first character
  36. * in the text.
  37. * This flag implements Unicode's
  38. * <ulink url="http://www.unicode.org/reports/tr29/">Grapheme
  39. * Cluster Boundaries</ulink> semantics.
  40. * @is_word_start: is first character in a word
  41. * @is_word_end: is first non-word char after a word
  42. * Note that in degenerate cases, you could have both @is_word_start
  43. * and @is_word_end set for some character.
  44. * @is_sentence_boundary: is a sentence boundary.
  45. * There are two ways to divide sentences. The first assigns all
  46. * inter-sentence whitespace/control/format chars to some sentence,
  47. * so all chars are in some sentence; @is_sentence_boundary denotes
  48. * the boundaries there. The second way doesn't assign
  49. * between-sentence spaces, etc. to any sentence, so
  50. * @is_sentence_start/@is_sentence_end mark the boundaries of those sentences.
  51. * @is_sentence_start: is first character in a sentence
  52. * @is_sentence_end: is first char after a sentence.
  53. * Note that in degenerate cases, you could have both @is_sentence_start
  54. * and @is_sentence_end set for some character. (e.g. no space after a
  55. * period, so the next sentence starts right away)
  56. * @backspace_deletes_character: if set, backspace deletes one character
  57. * rather than the entire grapheme cluster. This
  58. * field is only meaningful on grapheme
  59. * boundaries (where @is_cursor_position is
  60. * set). In some languages, the full grapheme
  61. * (e.g. letter + diacritics) is considered a
  62. * unit, while in others, each decomposed
  63. * character in the grapheme is a unit. In the
  64. * default implementation of pango_break(), this
  65. * bit is set on all grapheme boundaries except
  66. * those following Latin, Cyrillic or Greek base characters.
  67. * @is_expandable_space: is a whitespace character that can possibly be
  68. * expanded for justification purposes. (Since: 1.18)
  69. * @is_word_boundary: is a word boundary.
  70. * More specifically, means that this is not a position in the middle
  71. * of a word. For example, both sides of a punctuation mark are
  72. * considered word boundaries. This flag is particularly useful when
  73. * selecting text word-by-word.
  74. * This flag implements Unicode's
  75. * <ulink url="http://www.unicode.org/reports/tr29/">Word
  76. * Boundaries</ulink> semantics. (Since: 1.22)
  77. *
  78. * The #PangoLogAttr structure stores information
  79. * about the attributes of a single character.
  80. */
  81. struct _PangoLogAttr
  82. {
  83. guint is_line_break : 1; /* Can break line in front of character */
  84. guint is_mandatory_break : 1; /* Must break line in front of character */
  85. guint is_char_break : 1; /* Can break here when doing char wrap */
  86. guint is_white : 1; /* Whitespace character */
  87. /* Cursor can appear in front of character (i.e. this is a grapheme
  88. * boundary, or the first character in the text).
  89. */
  90. guint is_cursor_position : 1;
  91. /* Note that in degenerate cases, you could have both start/end set on
  92. * some text, most likely for sentences (e.g. no space after a period, so
  93. * the next sentence starts right away).
  94. */
  95. guint is_word_start : 1; /* first character in a word */
  96. guint is_word_end : 1; /* is first non-word char after a word */
  97. /* There are two ways to divide sentences. The first assigns all
  98. * intersentence whitespace/control/format chars to some sentence,
  99. * so all chars are in some sentence; is_sentence_boundary denotes
  100. * the boundaries there. The second way doesn't assign
  101. * between-sentence spaces, etc. to any sentence, so
  102. * is_sentence_start/is_sentence_end mark the boundaries of those
  103. * sentences.
  104. */
  105. guint is_sentence_boundary : 1;
  106. guint is_sentence_start : 1; /* first character in a sentence */
  107. guint is_sentence_end : 1; /* first non-sentence char after a sentence */
  108. /* If set, backspace deletes one character rather than
  109. * the entire grapheme cluster.
  110. */
  111. guint backspace_deletes_character : 1;
  112. /* Only few space variants (U+0020 and U+00A0) have variable
  113. * width during justification.
  114. */
  115. guint is_expandable_space : 1;
  116. /* Word boundary as defined by UAX#29 */
  117. guint is_word_boundary : 1; /* is NOT in the middle of a word */
  118. };
  119. /* Determine information about cluster/word/line breaks in a string
  120. * of Unicode text.
  121. */
  122. void pango_break (const gchar *text,
  123. int length,
  124. PangoAnalysis *analysis,
  125. PangoLogAttr *attrs,
  126. int attrs_len);
  127. void pango_find_paragraph_boundary (const gchar *text,
  128. gint length,
  129. gint *paragraph_delimiter_index,
  130. gint *next_paragraph_start);
  131. void pango_get_log_attrs (const char *text,
  132. int length,
  133. int level,
  134. PangoLanguage *language,
  135. PangoLogAttr *log_attrs,
  136. int attrs_len);
  137. #ifdef PANGO_ENABLE_ENGINE
  138. /* This is the default break algorithm, used if no language
  139. * engine overrides it. Normally you should use pango_break()
  140. * instead; this function is mostly useful for chaining up
  141. * from a language engine override.
  142. */
  143. void pango_default_break (const gchar *text,
  144. int length,
  145. PangoAnalysis *analysis,
  146. PangoLogAttr *attrs,
  147. int attrs_len);
  148. #endif /* PANGO_ENABLE_ENGINE */
  149. G_END_DECLS
  150. #endif /* __PANGO_BREAK_H__ */