unimatch.h 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163
  1. /*
  2. * Copyright (C) 2001-2005, International Business Machines Corporation and others. All Rights Reserved.
  3. **********************************************************************
  4. * Date Name Description
  5. * 07/18/01 aliu Creation.
  6. **********************************************************************
  7. */
  8. #ifndef UNIMATCH_H
  9. #define UNIMATCH_H
  10. #include "unicode/utypes.h"
  11. /**
  12. * \file
  13. * \brief C++ API: Unicode Matcher
  14. */
  15. U_NAMESPACE_BEGIN
  16. class Replaceable;
  17. class UnicodeString;
  18. class UnicodeSet;
  19. /**
  20. * Constants returned by <code>UnicodeMatcher::matches()</code>
  21. * indicating the degree of match.
  22. * @stable ICU 2.4
  23. */
  24. enum UMatchDegree {
  25. /**
  26. * Constant returned by <code>matches()</code> indicating a
  27. * mismatch between the text and this matcher. The text contains
  28. * a character which does not match, or the text does not contain
  29. * all desired characters for a non-incremental match.
  30. * @stable ICU 2.4
  31. */
  32. U_MISMATCH,
  33. /**
  34. * Constant returned by <code>matches()</code> indicating a
  35. * partial match between the text and this matcher. This value is
  36. * only returned for incremental match operations. All characters
  37. * of the text match, but more characters are required for a
  38. * complete match. Alternatively, for variable-length matchers,
  39. * all characters of the text match, and if more characters were
  40. * supplied at limit, they might also match.
  41. * @stable ICU 2.4
  42. */
  43. U_PARTIAL_MATCH,
  44. /**
  45. * Constant returned by <code>matches()</code> indicating a
  46. * complete match between the text and this matcher. For an
  47. * incremental variable-length match, this value is returned if
  48. * the given text matches, and it is known that additional
  49. * characters would not alter the extent of the match.
  50. * @stable ICU 2.4
  51. */
  52. U_MATCH
  53. };
  54. /**
  55. * <code>UnicodeMatcher</code> defines a protocol for objects that can
  56. * match a range of characters in a Replaceable string.
  57. * @stable ICU 2.4
  58. */
  59. class U_COMMON_API UnicodeMatcher /* not : public UObject because this is an interface/mixin class */ {
  60. public:
  61. /**
  62. * Destructor.
  63. * @stable ICU 2.4
  64. */
  65. virtual ~UnicodeMatcher();
  66. /**
  67. * Return a UMatchDegree value indicating the degree of match for
  68. * the given text at the given offset. Zero, one, or more
  69. * characters may be matched.
  70. *
  71. * Matching in the forward direction is indicated by limit >
  72. * offset. Characters from offset forwards to limit-1 will be
  73. * considered for matching.
  74. *
  75. * Matching in the reverse direction is indicated by limit <
  76. * offset. Characters from offset backwards to limit+1 will be
  77. * considered for matching.
  78. *
  79. * If limit == offset then the only match possible is a zero
  80. * character match (which subclasses may implement if desired).
  81. *
  82. * As a side effect, advance the offset parameter to the limit of
  83. * the matched substring. In the forward direction, this will be
  84. * the index of the last matched character plus one. In the
  85. * reverse direction, this will be the index of the last matched
  86. * character minus one.
  87. *
  88. * <p>Note: This method is not const because some classes may
  89. * modify their state as the result of a match.
  90. *
  91. * @param text the text to be matched
  92. * @param offset on input, the index into text at which to begin
  93. * matching. On output, the limit of the matched text. The
  94. * number of matched characters is the output value of offset
  95. * minus the input value. Offset should always point to the
  96. * HIGH SURROGATE (leading code unit) of a pair of surrogates,
  97. * both on entry and upon return.
  98. * @param limit the limit index of text to be matched. Greater
  99. * than offset for a forward direction match, less than offset for
  100. * a backward direction match. The last character to be
  101. * considered for matching will be text.charAt(limit-1) in the
  102. * forward direction or text.charAt(limit+1) in the backward
  103. * direction.
  104. * @param incremental if TRUE, then assume further characters may
  105. * be inserted at limit and check for partial matching. Otherwise
  106. * assume the text as given is complete.
  107. * @return a match degree value indicating a full match, a partial
  108. * match, or a mismatch. If incremental is FALSE then
  109. * U_PARTIAL_MATCH should never be returned.
  110. * @stable ICU 2.4
  111. */
  112. virtual UMatchDegree matches(const Replaceable& text,
  113. int32_t& offset,
  114. int32_t limit,
  115. UBool incremental) = 0;
  116. /**
  117. * Returns a string representation of this matcher. If the result of
  118. * calling this function is passed to the appropriate parser, it
  119. * will produce another matcher that is equal to this one.
  120. * @param result the string to receive the pattern. Previous
  121. * contents will be deleted.
  122. * @param escapeUnprintable if TRUE then convert unprintable
  123. * character to their hex escape representations, \\uxxxx or
  124. * \\Uxxxxxxxx. Unprintable characters are those other than
  125. * U+000A, U+0020..U+007E.
  126. * @stable ICU 2.4
  127. */
  128. virtual UnicodeString& toPattern(UnicodeString& result,
  129. UBool escapeUnprintable = FALSE) const = 0;
  130. /**
  131. * Returns TRUE if this matcher will match a character c, where c
  132. * & 0xFF == v, at offset, in the forward direction (with limit >
  133. * offset). This is used by <tt>RuleBasedTransliterator</tt> for
  134. * indexing.
  135. * @stable ICU 2.4
  136. */
  137. virtual UBool matchesIndexValue(uint8_t v) const = 0;
  138. /**
  139. * Union the set of all characters that may be matched by this object
  140. * into the given set.
  141. * @param toUnionTo the set into which to union the source characters
  142. * @stable ICU 2.4
  143. */
  144. virtual void addMatchSetTo(UnicodeSet& toUnionTo) const = 0;
  145. };
  146. U_NAMESPACE_END
  147. #endif