unifilt.h 3.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120
  1. /*
  2. **********************************************************************
  3. * Copyright (C) 1999-2010, International Business Machines Corporation and others.
  4. * All Rights Reserved.
  5. **********************************************************************
  6. * Date Name Description
  7. * 11/17/99 aliu Creation.
  8. **********************************************************************
  9. */
  10. #ifndef UNIFILT_H
  11. #define UNIFILT_H
  12. #include "unicode/unifunct.h"
  13. #include "unicode/unimatch.h"
  14. /**
  15. * \file
  16. * \brief C++ API: Unicode Filter
  17. */
  18. U_NAMESPACE_BEGIN
  19. /**
  20. * U_ETHER is used to represent character values for positions outside
  21. * a range. For example, transliterator uses this to represent
  22. * characters outside the range contextStart..contextLimit-1. This
  23. * allows explicit matching by rules and UnicodeSets of text outside a
  24. * defined range.
  25. * @stable ICU 3.0
  26. */
  27. #define U_ETHER ((UChar)0xFFFF)
  28. /**
  29. *
  30. * <code>UnicodeFilter</code> defines a protocol for selecting a
  31. * subset of the full range (U+0000 to U+10FFFF) of Unicode characters.
  32. * Currently, filters are used in conjunction with classes like {@link
  33. * Transliterator} to only process selected characters through a
  34. * transformation.
  35. *
  36. * <p>Note: UnicodeFilter currently stubs out two pure virtual methods
  37. * of its base class, UnicodeMatcher. These methods are toPattern()
  38. * and matchesIndexValue(). This is done so that filter classes that
  39. * are not actually used as matchers -- specifically, those in the
  40. * UnicodeFilterLogic component, and those in tests -- can continue to
  41. * work without defining these methods. As long as a filter is not
  42. * used in an RBT during real transliteration, these methods will not
  43. * be called. However, this breaks the UnicodeMatcher base class
  44. * protocol, and it is not a correct solution.
  45. *
  46. * <p>In the future we may revisit the UnicodeMatcher / UnicodeFilter
  47. * hierarchy and either redesign it, or simply remove the stubs in
  48. * UnicodeFilter and force subclasses to implement the full
  49. * UnicodeMatcher protocol.
  50. *
  51. * @see UnicodeFilterLogic
  52. * @stable ICU 2.0
  53. */
  54. class U_COMMON_API UnicodeFilter : public UnicodeFunctor, public UnicodeMatcher {
  55. public:
  56. /**
  57. * Destructor
  58. * @stable ICU 2.0
  59. */
  60. virtual ~UnicodeFilter();
  61. /**
  62. * Returns <tt>true</tt> for characters that are in the selected
  63. * subset. In other words, if a character is <b>to be
  64. * filtered</b>, then <tt>contains()</tt> returns
  65. * <b><tt>false</tt></b>.
  66. * @stable ICU 2.0
  67. */
  68. virtual UBool contains(UChar32 c) const = 0;
  69. /**
  70. * UnicodeFunctor API. Cast 'this' to a UnicodeMatcher* pointer
  71. * and return the pointer.
  72. * @stable ICU 2.4
  73. */
  74. virtual UnicodeMatcher* toMatcher() const;
  75. /**
  76. * Implement UnicodeMatcher API.
  77. * @stable ICU 2.4
  78. */
  79. virtual UMatchDegree matches(const Replaceable& text,
  80. int32_t& offset,
  81. int32_t limit,
  82. UBool incremental);
  83. /**
  84. * UnicodeFunctor API. Nothing to do.
  85. * @stable ICU 2.4
  86. */
  87. virtual void setData(const TransliterationRuleData*);
  88. /**
  89. * ICU "poor man's RTTI", returns a UClassID for this class.
  90. *
  91. * @stable ICU 2.2
  92. */
  93. static UClassID U_EXPORT2 getStaticClassID();
  94. protected:
  95. /*
  96. * Since this class has pure virtual functions,
  97. * a constructor can't be used.
  98. * @stable ICU 2.0
  99. */
  100. /* UnicodeFilter();*/
  101. };
  102. /*inline UnicodeFilter::UnicodeFilter() {}*/
  103. U_NAMESPACE_END
  104. #endif