usetiter.h 9.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318
  1. /*
  2. **********************************************************************
  3. * Copyright (c) 2002-2014, International Business Machines
  4. * Corporation and others. All Rights Reserved.
  5. **********************************************************************
  6. */
  7. #ifndef USETITER_H
  8. #define USETITER_H
  9. #include "unicode/utypes.h"
  10. #include "unicode/uobject.h"
  11. #include "unicode/unistr.h"
  12. /**
  13. * \file
  14. * \brief C++ API: UnicodeSetIterator iterates over the contents of a UnicodeSet.
  15. */
  16. U_NAMESPACE_BEGIN
  17. class UnicodeSet;
  18. class UnicodeString;
  19. /**
  20. *
  21. * UnicodeSetIterator iterates over the contents of a UnicodeSet. It
  22. * iterates over either code points or code point ranges. After all
  23. * code points or ranges have been returned, it returns the
  24. * multicharacter strings of the UnicodeSet, if any.
  25. *
  26. * This class is not intended to be subclassed. Consider any fields
  27. * or methods declared as "protected" to be private. The use of
  28. * protected in this class is an artifact of history.
  29. *
  30. * <p>To iterate over code points and strings, use a loop like this:
  31. * <pre>
  32. * UnicodeSetIterator it(set);
  33. * while (it.next()) {
  34. * processItem(it.getString());
  35. * }
  36. * </pre>
  37. * <p>Each item in the set is accessed as a string. Set elements
  38. * consisting of single code points are returned as strings containing
  39. * just the one code point.
  40. *
  41. * <p>To iterate over code point ranges, instead of individual code points,
  42. * use a loop like this:
  43. * <pre>
  44. * UnicodeSetIterator it(set);
  45. * while (it.nextRange()) {
  46. * if (it.isString()) {
  47. * processString(it.getString());
  48. * } else {
  49. * processCodepointRange(it.getCodepoint(), it.getCodepointEnd());
  50. * }
  51. * }
  52. * </pre>
  53. * @author M. Davis
  54. * @stable ICU 2.4
  55. */
  56. class U_COMMON_API UnicodeSetIterator : public UObject {
  57. protected:
  58. /**
  59. * Value of <tt>codepoint</tt> if the iterator points to a string.
  60. * If <tt>codepoint == IS_STRING</tt>, then examine
  61. * <tt>string</tt> for the current iteration result.
  62. * @stable ICU 2.4
  63. */
  64. enum { IS_STRING = -1 };
  65. /**
  66. * Current code point, or the special value <tt>IS_STRING</tt>, if
  67. * the iterator points to a string.
  68. * @stable ICU 2.4
  69. */
  70. UChar32 codepoint;
  71. /**
  72. * When iterating over ranges using <tt>nextRange()</tt>,
  73. * <tt>codepointEnd</tt> contains the inclusive end of the
  74. * iteration range, if <tt>codepoint != IS_STRING</tt>. If
  75. * iterating over code points using <tt>next()</tt>, or if
  76. * <tt>codepoint == IS_STRING</tt>, then the value of
  77. * <tt>codepointEnd</tt> is undefined.
  78. * @stable ICU 2.4
  79. */
  80. UChar32 codepointEnd;
  81. /**
  82. * If <tt>codepoint == IS_STRING</tt>, then <tt>string</tt> points
  83. * to the current string. If <tt>codepoint != IS_STRING</tt>, the
  84. * value of <tt>string</tt> is undefined.
  85. * @stable ICU 2.4
  86. */
  87. const UnicodeString* string;
  88. public:
  89. /**
  90. * Create an iterator over the given set. The iterator is valid
  91. * only so long as <tt>set</tt> is valid.
  92. * @param set set to iterate over
  93. * @stable ICU 2.4
  94. */
  95. UnicodeSetIterator(const UnicodeSet& set);
  96. /**
  97. * Create an iterator over nothing. <tt>next()</tt> and
  98. * <tt>nextRange()</tt> return false. This is a convenience
  99. * constructor allowing the target to be set later.
  100. * @stable ICU 2.4
  101. */
  102. UnicodeSetIterator();
  103. /**
  104. * Destructor.
  105. * @stable ICU 2.4
  106. */
  107. virtual ~UnicodeSetIterator();
  108. /**
  109. * Returns true if the current element is a string. If so, the
  110. * caller can retrieve it with <tt>getString()</tt>. If this
  111. * method returns false, the current element is a code point or
  112. * code point range, depending on whether <tt>next()</tt> or
  113. * <tt>nextRange()</tt> was called.
  114. * Elements of types string and codepoint can both be retrieved
  115. * with the function <tt>getString()</tt>.
  116. * Elements of type codepoint can also be retrieved with
  117. * <tt>getCodepoint()</tt>.
  118. * For ranges, <tt>getCodepoint()</tt> returns the starting codepoint
  119. * of the range, and <tt>getCodepointEnd()</tt> returns the end
  120. * of the range.
  121. * @stable ICU 2.4
  122. */
  123. inline UBool isString() const;
  124. /**
  125. * Returns the current code point, if <tt>isString()</tt> returned
  126. * false. Otherwise returns an undefined result.
  127. * @stable ICU 2.4
  128. */
  129. inline UChar32 getCodepoint() const;
  130. /**
  131. * Returns the end of the current code point range, if
  132. * <tt>isString()</tt> returned false and <tt>nextRange()</tt> was
  133. * called. Otherwise returns an undefined result.
  134. * @stable ICU 2.4
  135. */
  136. inline UChar32 getCodepointEnd() const;
  137. /**
  138. * Returns the current string, if <tt>isString()</tt> returned
  139. * true. If the current iteration item is a code point, a UnicodeString
  140. * containing that single code point is returned.
  141. *
  142. * Ownership of the returned string remains with the iterator.
  143. * The string is guaranteed to remain valid only until the iterator is
  144. * advanced to the next item, or until the iterator is deleted.
  145. *
  146. * @stable ICU 2.4
  147. */
  148. const UnicodeString& getString();
  149. /**
  150. * Advances the iteration position to the next element in the set,
  151. * which can be either a single code point or a string.
  152. * If there are no more elements in the set, return false.
  153. *
  154. * <p>
  155. * If <tt>isString() == TRUE</tt>, the value is a
  156. * string, otherwise the value is a
  157. * single code point. Elements of either type can be retrieved
  158. * with the function <tt>getString()</tt>, while elements of
  159. * consisting of a single code point can be retrieved with
  160. * <tt>getCodepoint()</tt>
  161. *
  162. * <p>The order of iteration is all code points in sorted order,
  163. * followed by all strings sorted order. Do not mix
  164. * calls to <tt>next()</tt> and <tt>nextRange()</tt> without
  165. * calling <tt>reset()</tt> between them. The results of doing so
  166. * are undefined.
  167. *
  168. * @return true if there was another element in the set.
  169. * @stable ICU 2.4
  170. */
  171. UBool next();
  172. /**
  173. * Returns the next element in the set, either a code point range
  174. * or a string. If there are no more elements in the set, return
  175. * false. If <tt>isString() == TRUE</tt>, the value is a
  176. * string and can be accessed with <tt>getString()</tt>. Otherwise the value is a
  177. * range of one or more code points from <tt>getCodepoint()</tt> to
  178. * <tt>getCodepointeEnd()</tt> inclusive.
  179. *
  180. * <p>The order of iteration is all code points ranges in sorted
  181. * order, followed by all strings sorted order. Ranges are
  182. * disjoint and non-contiguous. The value returned from <tt>getString()</tt>
  183. * is undefined unless <tt>isString() == TRUE</tt>. Do not mix calls to
  184. * <tt>next()</tt> and <tt>nextRange()</tt> without calling
  185. * <tt>reset()</tt> between them. The results of doing so are
  186. * undefined.
  187. *
  188. * @return true if there was another element in the set.
  189. * @stable ICU 2.4
  190. */
  191. UBool nextRange();
  192. /**
  193. * Sets this iterator to visit the elements of the given set and
  194. * resets it to the start of that set. The iterator is valid only
  195. * so long as <tt>set</tt> is valid.
  196. * @param set the set to iterate over.
  197. * @stable ICU 2.4
  198. */
  199. void reset(const UnicodeSet& set);
  200. /**
  201. * Resets this iterator to the start of the set.
  202. * @stable ICU 2.4
  203. */
  204. void reset();
  205. /**
  206. * ICU "poor man's RTTI", returns a UClassID for this class.
  207. *
  208. * @stable ICU 2.4
  209. */
  210. static UClassID U_EXPORT2 getStaticClassID();
  211. /**
  212. * ICU "poor man's RTTI", returns a UClassID for the actual class.
  213. *
  214. * @stable ICU 2.4
  215. */
  216. virtual UClassID getDynamicClassID() const;
  217. // ======================= PRIVATES ===========================
  218. protected:
  219. // endElement and nextElements are really UChar32's, but we keep
  220. // them as signed int32_t's so we can do comparisons with
  221. // endElement set to -1. Leave them as int32_t's.
  222. /** The set
  223. * @stable ICU 2.4
  224. */
  225. const UnicodeSet* set;
  226. /** End range
  227. * @stable ICU 2.4
  228. */
  229. int32_t endRange;
  230. /** Range
  231. * @stable ICU 2.4
  232. */
  233. int32_t range;
  234. /** End element
  235. * @stable ICU 2.4
  236. */
  237. int32_t endElement;
  238. /** Next element
  239. * @stable ICU 2.4
  240. */
  241. int32_t nextElement;
  242. //UBool abbreviated;
  243. /** Next string
  244. * @stable ICU 2.4
  245. */
  246. int32_t nextString;
  247. /** String count
  248. * @stable ICU 2.4
  249. */
  250. int32_t stringCount;
  251. /**
  252. * Points to the string to use when the caller asks for a
  253. * string and the current iteration item is a code point, not a string.
  254. * @internal
  255. */
  256. UnicodeString *cpString;
  257. /** Copy constructor. Disallowed.
  258. * @stable ICU 2.4
  259. */
  260. UnicodeSetIterator(const UnicodeSetIterator&); // disallow
  261. /** Assignment operator. Disallowed.
  262. * @stable ICU 2.4
  263. */
  264. UnicodeSetIterator& operator=(const UnicodeSetIterator&); // disallow
  265. /** Load range
  266. * @stable ICU 2.4
  267. */
  268. virtual void loadRange(int32_t range);
  269. };
  270. inline UBool UnicodeSetIterator::isString() const {
  271. return codepoint == (UChar32)IS_STRING;
  272. }
  273. inline UChar32 UnicodeSetIterator::getCodepoint() const {
  274. return codepoint;
  275. }
  276. inline UChar32 UnicodeSetIterator::getCodepointEnd() const {
  277. return codepointEnd;
  278. }
  279. U_NAMESPACE_END
  280. #endif