translit.h 54 KB


  1. /*
  2. **********************************************************************
  3. * Copyright (C) 1999-2014, International Business Machines
  4. * Corporation and others. All Rights Reserved.
  5. **********************************************************************
  6. * Date Name Description
  7. * 11/17/99 aliu Creation.
  8. **********************************************************************
  9. */
  10. #ifndef TRANSLIT_H
  11. #define TRANSLIT_H
  12. #include "unicode/utypes.h"
  13. /**
  14. * \file
  15. * \brief C++ API: Tranforms text from one format to another.
  16. */
  17. #if !UCONFIG_NO_TRANSLITERATION
  18. #include "unicode/uobject.h"
  19. #include "unicode/unistr.h"
  20. #include "unicode/parseerr.h"
  21. #include "unicode/utrans.h" // UTransPosition, UTransDirection
  22. #include "unicode/strenum.h"
  23. U_NAMESPACE_BEGIN
  24. class UnicodeFilter;
  25. class UnicodeSet;
  26. class CompoundTransliterator;
  27. class TransliteratorParser;
  28. class NormalizationTransliterator;
  29. class TransliteratorIDParser;
  30. /**
  31. *
  32. * <code>Transliterator</code> is an abstract class that
  33. * transliterates text from one format to another. The most common
  34. * kind of transliterator is a script, or alphabet, transliterator.
  35. * For example, a Russian to Latin transliterator changes Russian text
  36. * written in Cyrillic characters to phonetically equivalent Latin
  37. * characters. It does not <em>translate</em> Russian to English!
  38. * Transliteration, unlike translation, operates on characters, without
  39. * reference to the meanings of words and sentences.
  40. *
  41. * <p>Although script conversion is its most common use, a
  42. * transliterator can actually perform a more general class of tasks.
  43. * In fact, <code>Transliterator</code> defines a very general API
  44. * which specifies only that a segment of the input text is replaced
  45. * by new text. The particulars of this conversion are determined
  46. * entirely by subclasses of <code>Transliterator</code>.
  47. *
  48. * <p><b>Transliterators are stateless</b>
  49. *
  50. * <p><code>Transliterator</code> objects are <em>stateless</em>; they
  51. * retain no information between calls to
  52. * <code>transliterate()</code>. (However, this does <em>not</em>
  53. * mean that threads may share transliterators without synchronizing
  54. * them. Transliterators are not immutable, so they must be
  55. * synchronized when shared between threads.) This might seem to
  56. * limit the complexity of the transliteration operation. In
  57. * practice, subclasses perform complex transliterations by delaying
  58. * the replacement of text until it is known that no other
  59. * replacements are possible. In other words, although the
  60. * <code>Transliterator</code> objects are stateless, the source text
  61. * itself embodies all the needed information, and delayed operation
  62. * allows arbitrary complexity.
  63. *
  64. * <p><b>Batch transliteration</b>
  65. *
  66. * <p>The simplest way to perform transliteration is all at once, on a
  67. * string of existing text. This is referred to as <em>batch</em>
  68. * transliteration. For example, given a string <code>input</code>
  69. * and a transliterator <code>t</code>, the call
  70. *
  71. * \htmlonly<blockquote>\endhtmlonly<code>String result = t.transliterate(input);
  72. * </code>\htmlonly</blockquote>\endhtmlonly
  73. *
  74. * will transliterate it and return the result. Other methods allow
  75. * the client to specify a substring to be transliterated and to use
  76. * {@link Replaceable } objects instead of strings, in order to
  77. * preserve out-of-band information (such as text styles).
  78. *
  79. * <p><b>Keyboard transliteration</b>
  80. *
  81. * <p>Somewhat more involved is <em>keyboard</em>, or incremental
  82. * transliteration. This is the transliteration of text that is
  83. * arriving from some source (typically the user's keyboard) one
  84. * character at a time, or in some other piecemeal fashion.
  85. *
  86. * <p>In keyboard transliteration, a <code>Replaceable</code> buffer
  87. * stores the text. As text is inserted, as much as possible is
  88. * transliterated on the fly. This means a GUI that displays the
  89. * contents of the buffer may show text being modified as each new
  90. * character arrives.
  91. *
  92. * <p>Consider the simple <code>RuleBasedTransliterator</code>:
  93. *
  94. * \htmlonly<blockquote>\endhtmlonly<code>
  95. * th&gt;{theta}<br>
  96. * t&gt;{tau}
  97. * </code>\htmlonly</blockquote>\endhtmlonly
  98. *
  99. * When the user types 't', nothing will happen, since the
  100. * transliterator is waiting to see if the next character is 'h'. To
  101. * remedy this, we introduce the notion of a cursor, marked by a '|'
  102. * in the output string:
  103. *
  104. * \htmlonly<blockquote>\endhtmlonly<code>
  105. * t&gt;|{tau}<br>
  106. * {tau}h&gt;{theta}
  107. * </code>\htmlonly</blockquote>\endhtmlonly
  108. *
  109. * Now when the user types 't', tau appears, and if the next character
  110. * is 'h', the tau changes to a theta. This is accomplished by
  111. * maintaining a cursor position (independent of the insertion point,
  112. * and invisible in the GUI) across calls to
  113. * <code>transliterate()</code>. Typically, the cursor will
  114. * be coincident with the insertion point, but in a case like the one
  115. * above, it will precede the insertion point.
  116. *
  117. * <p>Keyboard transliteration methods maintain a set of three indices
  118. * that are updated with each call to
  119. * <code>transliterate()</code>, including the cursor, start,
  120. * and limit. Since these indices are changed by the method, they are
  121. * passed in an <code>int[]</code> array. The <code>START</code> index
  122. * marks the beginning of the substring that the transliterator will
  123. * look at. It is advanced as text becomes committed (but it is not
  124. * the committed index; that's the <code>CURSOR</code>). The
  125. * <code>CURSOR</code> index, described above, marks the point at
  126. * which the transliterator last stopped, either because it reached
  127. * the end, or because it required more characters to disambiguate
  128. * between possible inputs. The <code>CURSOR</code> can also be
  129. * explicitly set by rules in a <code>RuleBasedTransliterator</code>.
  130. * Any characters before the <code>CURSOR</code> index are frozen;
  131. * future keyboard transliteration calls within this input sequence
  132. * will not change them. New text is inserted at the
  133. * <code>LIMIT</code> index, which marks the end of the substring that
  134. * the transliterator looks at.
  135. *
  136. * <p>Because keyboard transliteration assumes that more characters
  137. * are to arrive, it is conservative in its operation. It only
  138. * transliterates when it can do so unambiguously. Otherwise it waits
  139. * for more characters to arrive. When the client code knows that no
  140. * more characters are forthcoming, perhaps because the user has
  141. * performed some input termination operation, then it should call
  142. * <code>finishTransliteration()</code> to complete any
  143. * pending transliterations.
  144. *
  145. * <p><b>Inverses</b>
  146. *
  147. * <p>Pairs of transliterators may be inverses of one another. For
  148. * example, if transliterator <b>A</b> transliterates characters by
  149. * incrementing their Unicode value (so "abc" -> "def"), and
  150. * transliterator <b>B</b> decrements character values, then <b>A</b>
  151. * is an inverse of <b>B</b> and vice versa. If we compose <b>A</b>
  152. * with <b>B</b> in a compound transliterator, the result is the
  153. * indentity transliterator, that is, a transliterator that does not
  154. * change its input text.
  155. *
  156. * The <code>Transliterator</code> method <code>getInverse()</code>
  157. * returns a transliterator's inverse, if one exists, or
  158. * <code>null</code> otherwise. However, the result of
  159. * <code>getInverse()</code> usually will <em>not</em> be a true
  160. * mathematical inverse. This is because true inverse transliterators
  161. * are difficult to formulate. For example, consider two
  162. * transliterators: <b>AB</b>, which transliterates the character 'A'
  163. * to 'B', and <b>BA</b>, which transliterates 'B' to 'A'. It might
  164. * seem that these are exact inverses, since
  165. *
  166. * \htmlonly<blockquote>\endhtmlonly"A" x <b>AB</b> -> "B"<br>
  167. * "B" x <b>BA</b> -> "A"\htmlonly</blockquote>\endhtmlonly
  168. *
  169. * where 'x' represents transliteration. However,
  170. *
  171. * \htmlonly<blockquote>\endhtmlonly"ABCD" x <b>AB</b> -> "BBCD"<br>
  172. * "BBCD" x <b>BA</b> -> "AACD"\htmlonly</blockquote>\endhtmlonly
  173. *
  174. * so <b>AB</b> composed with <b>BA</b> is not the
  175. * identity. Nonetheless, <b>BA</b> may be usefully considered to be
  176. * <b>AB</b>'s inverse, and it is on this basis that
  177. * <b>AB</b><code>.getInverse()</code> could legitimately return
  178. * <b>BA</b>.
  179. *
  180. * <p><b>IDs and display names</b>
  181. *
  182. * <p>A transliterator is designated by a short identifier string or
  183. * <em>ID</em>. IDs follow the format <em>source-destination</em>,
  184. * where <em>source</em> describes the entity being replaced, and
  185. * <em>destination</em> describes the entity replacing
  186. * <em>source</em>. The entities may be the names of scripts,
  187. * particular sequences of characters, or whatever else it is that the
  188. * transliterator converts to or from. For example, a transliterator
  189. * from Russian to Latin might be named "Russian-Latin". A
  190. * transliterator from keyboard escape sequences to Latin-1 characters
  191. * might be named "KeyboardEscape-Latin1". By convention, system
  192. * entity names are in English, with the initial letters of words
  193. * capitalized; user entity names may follow any format so long as
  194. * they do not contain dashes.
  195. *
  196. * <p>In addition to programmatic IDs, transliterator objects have
  197. * display names for presentation in user interfaces, returned by
  198. * {@link #getDisplayName }.
  199. *
  200. * <p><b>Factory methods and registration</b>
  201. *
  202. * <p>In general, client code should use the factory method
  203. * {@link #createInstance } to obtain an instance of a
  204. * transliterator given its ID. Valid IDs may be enumerated using
  205. * <code>getAvailableIDs()</code>. Since transliterators are mutable,
  206. * multiple calls to {@link #createInstance } with the same ID will
  207. * return distinct objects.
  208. *
  209. * <p>In addition to the system transliterators registered at startup,
  210. * user transliterators may be registered by calling
  211. * <code>registerInstance()</code> at run time. A registered instance
  212. * acts a template; future calls to {@link #createInstance } with the ID
  213. * of the registered object return clones of that object. Thus any
  214. * object passed to <tt>registerInstance()</tt> must implement
  215. * <tt>clone()</tt> propertly. To register a transliterator subclass
  216. * without instantiating it (until it is needed), users may call
  217. * {@link #registerFactory }. In this case, the objects are
  218. * instantiated by invoking the zero-argument public constructor of
  219. * the class.
  220. *
  221. * <p><b>Subclassing</b>
  222. *
  223. * Subclasses must implement the abstract method
  224. * <code>handleTransliterate()</code>. <p>Subclasses should override
  225. * the <code>transliterate()</code> method taking a
  226. * <code>Replaceable</code> and the <code>transliterate()</code>
  227. * method taking a <code>String</code> and <code>StringBuffer</code>
  228. * if the performance of these methods can be improved over the
  229. * performance obtained by the default implementations in this class.
  230. *
  231. * @author Alan Liu
  232. * @stable ICU 2.0
  233. */
  234. class U_I18N_API Transliterator : public UObject {
  235. private:
  236. /**
  237. * Programmatic name, e.g., "Latin-Arabic".
  238. */
  239. UnicodeString ID;
  240. /**
  241. * This transliterator's filter. Any character for which
  242. * <tt>filter.contains()</tt> returns <tt>false</tt> will not be
  243. * altered by this transliterator. If <tt>filter</tt> is
  244. * <tt>null</tt> then no filtering is applied.
  245. */
  246. UnicodeFilter* filter;
  247. int32_t maximumContextLength;
  248. public:
  249. /**
  250. * A context integer or pointer for a factory function, passed by
  251. * value.
  252. * @stable ICU 2.4
  253. */
  254. union Token {
  255. /**
  256. * This token, interpreted as a 32-bit integer.
  257. * @stable ICU 2.4
  258. */
  259. int32_t integer;
  260. /**
  261. * This token, interpreted as a native pointer.
  262. * @stable ICU 2.4
  263. */
  264. void* pointer;
  265. };
  266. #ifndef U_HIDE_INTERNAL_API
  267. /**
  268. * Return a token containing an integer.
  269. * @return a token containing an integer.
  270. * @internal
  271. */
  272. inline static Token integerToken(int32_t);
  273. /**
  274. * Return a token containing a pointer.
  275. * @return a token containing a pointer.
  276. * @internal
  277. */
  278. inline static Token pointerToken(void*);
  279. #endif /* U_HIDE_INTERNAL_API */
  280. /**
  281. * A function that creates and returns a Transliterator. When
  282. * invoked, it will be passed the ID string that is being
  283. * instantiated, together with the context pointer that was passed
  284. * in when the factory function was first registered. Many
  285. * factory functions will ignore both parameters, however,
  286. * functions that are registered to more than one ID may use the
  287. * ID or the context parameter to parameterize the transliterator
  288. * they create.
  289. * @param ID the string identifier for this transliterator
  290. * @param context a context pointer that will be stored and
  291. * later passed to the factory function when an ID matching
  292. * the registration ID is being instantiated with this factory.
  293. * @stable ICU 2.4
  294. */
  295. typedef Transliterator* (U_EXPORT2 *Factory)(const UnicodeString& ID, Token context);
  296. protected:
  297. /**
  298. * Default constructor.
  299. * @param ID the string identifier for this transliterator
  300. * @param adoptedFilter the filter. Any character for which
  301. * <tt>filter.contains()</tt> returns <tt>false</tt> will not be
  302. * altered by this transliterator. If <tt>filter</tt> is
  303. * <tt>null</tt> then no filtering is applied.
  304. * @stable ICU 2.4
  305. */
  306. Transliterator(const UnicodeString& ID, UnicodeFilter* adoptedFilter);
  307. /**
  308. * Copy constructor.
  309. * @stable ICU 2.4
  310. */
  311. Transliterator(const Transliterator&);
  312. /**
  313. * Assignment operator.
  314. * @stable ICU 2.4
  315. */
  316. Transliterator& operator=(const Transliterator&);
  317. /**
  318. * Create a transliterator from a basic ID. This is an ID
  319. * containing only the forward direction source, target, and
  320. * variant.
  321. * @param id a basic ID of the form S-T or S-T/V.
  322. * @param canon canonical ID to assign to the object, or
  323. * NULL to leave the ID unchanged
  324. * @return a newly created Transliterator or null if the ID is
  325. * invalid.
  326. * @stable ICU 2.4
  327. */
  328. static Transliterator* createBasicInstance(const UnicodeString& id,
  329. const UnicodeString* canon);
  330. friend class TransliteratorParser; // for parseID()
  331. friend class TransliteratorIDParser; // for createBasicInstance()
  332. friend class TransliteratorAlias; // for setID()
  333. public:
  334. /**
  335. * Destructor.
  336. * @stable ICU 2.0
  337. */
  338. virtual ~Transliterator();
  339. /**
  340. * Implements Cloneable.
  341. * All subclasses are encouraged to implement this method if it is
  342. * possible and reasonable to do so. Subclasses that are to be
  343. * registered with the system using <tt>registerInstance()</tt>
  344. * are required to implement this method. If a subclass does not
  345. * implement clone() properly and is registered with the system
  346. * using registerInstance(), then the default clone() implementation
  347. * will return null, and calls to createInstance() will fail.
  348. *
  349. * @return a copy of the object.
  350. * @see #registerInstance
  351. * @stable ICU 2.0
  352. */
  353. virtual Transliterator* clone() const;
  354. /**
  355. * Transliterates a segment of a string, with optional filtering.
  356. *
  357. * @param text the string to be transliterated
  358. * @param start the beginning index, inclusive; <code>0 <= start
  359. * <= limit</code>.
  360. * @param limit the ending index, exclusive; <code>start <= limit
  361. * <= text.length()</code>.
  362. * @return The new limit index. The text previously occupying <code>[start,
  363. * limit)</code> has been transliterated, possibly to a string of a different
  364. * length, at <code>[start, </code><em>new-limit</em><code>)</code>, where
  365. * <em>new-limit</em> is the return value. If the input offsets are out of bounds,
  366. * the returned value is -1 and the input string remains unchanged.
  367. * @stable ICU 2.0
  368. */
  369. virtual int32_t transliterate(Replaceable& text,
  370. int32_t start, int32_t limit) const;
  371. /**
  372. * Transliterates an entire string in place. Convenience method.
  373. * @param text the string to be transliterated
  374. * @stable ICU 2.0
  375. */
  376. virtual void transliterate(Replaceable& text) const;
  377. /**
  378. * Transliterates the portion of the text buffer that can be
  379. * transliterated unambiguosly after new text has been inserted,
  380. * typically as a result of a keyboard event. The new text in
  381. * <code>insertion</code> will be inserted into <code>text</code>
  382. * at <code>index.limit</code>, advancing
  383. * <code>index.limit</code> by <code>insertion.length()</code>.
  384. * Then the transliterator will try to transliterate characters of
  385. * <code>text</code> between <code>index.cursor</code> and
  386. * <code>index.limit</code>. Characters before
  387. * <code>index.cursor</code> will not be changed.
  388. *
  389. * <p>Upon return, values in <code>index</code> will be updated.
  390. * <code>index.start</code> will be advanced to the first
  391. * character that future calls to this method will read.
  392. * <code>index.cursor</code> and <code>index.limit</code> will
  393. * be adjusted to delimit the range of text that future calls to
  394. * this method may change.
  395. *
  396. * <p>Typical usage of this method begins with an initial call
  397. * with <code>index.start</code> and <code>index.limit</code>
  398. * set to indicate the portion of <code>text</code> to be
  399. * transliterated, and <code>index.cursor == index.start</code>.
  400. * Thereafter, <code>index</code> can be used without
  401. * modification in future calls, provided that all changes to
  402. * <code>text</code> are made via this method.
  403. *
  404. * <p>This method assumes that future calls may be made that will
  405. * insert new text into the buffer. As a result, it only performs
  406. * unambiguous transliterations. After the last call to this
  407. * method, there may be untransliterated text that is waiting for
  408. * more input to resolve an ambiguity. In order to perform these
  409. * pending transliterations, clients should call {@link
  410. * #finishTransliteration } after the last call to this
  411. * method has been made.
  412. *
  413. * @param text the buffer holding transliterated and untransliterated text
  414. * @param index an array of three integers.
  415. *
  416. * <ul><li><code>index.start</code>: the beginning index,
  417. * inclusive; <code>0 <= index.start <= index.limit</code>.
  418. *
  419. * <li><code>index.limit</code>: the ending index, exclusive;
  420. * <code>index.start <= index.limit <= text.length()</code>.
  421. * <code>insertion</code> is inserted at
  422. * <code>index.limit</code>.
  423. *
  424. * <li><code>index.cursor</code>: the next character to be
  425. * considered for transliteration; <code>index.start <=
  426. * index.cursor <= index.limit</code>. Characters before
  427. * <code>index.cursor</code> will not be changed by future calls
  428. * to this method.</ul>
  429. *
  430. * @param insertion text to be inserted and possibly
  431. * transliterated into the translation buffer at
  432. * <code>index.limit</code>. If <code>null</code> then no text
  433. * is inserted.
  434. * @param status Output param to filled in with a success or an error.
  435. * @see #handleTransliterate
  436. * @exception IllegalArgumentException if <code>index</code>
  437. * is invalid
  438. * @see UTransPosition
  439. * @stable ICU 2.0
  440. */
  441. virtual void transliterate(Replaceable& text, UTransPosition& index,
  442. const UnicodeString& insertion,
  443. UErrorCode& status) const;
  444. /**
  445. * Transliterates the portion of the text buffer that can be
  446. * transliterated unambiguosly after a new character has been
  447. * inserted, typically as a result of a keyboard event. This is a
  448. * convenience method.
  449. * @param text the buffer holding transliterated and
  450. * untransliterated text
  451. * @param index an array of three integers.
  452. * @param insertion text to be inserted and possibly
  453. * transliterated into the translation buffer at
  454. * <code>index.limit</code>.
  455. * @param status Output param to filled in with a success or an error.
  456. * @see #transliterate(Replaceable&, UTransPosition&, const UnicodeString&, UErrorCode&) const
  457. * @stable ICU 2.0
  458. */
  459. virtual void transliterate(Replaceable& text, UTransPosition& index,
  460. UChar32 insertion,
  461. UErrorCode& status) const;
  462. /**
  463. * Transliterates the portion of the text buffer that can be
  464. * transliterated unambiguosly. This is a convenience method; see
  465. * {@link
  466. * #transliterate(Replaceable&, UTransPosition&, const UnicodeString&, UErrorCode&) const }
  467. * for details.
  468. * @param text the buffer holding transliterated and
  469. * untransliterated text
  470. * @param index an array of three integers. See {@link #transliterate(Replaceable&, UTransPosition&, const UnicodeString*, UErrorCode&) const }.
  471. * @param status Output param to filled in with a success or an error.
  472. * @see #transliterate(Replaceable, int[], String)
  473. * @stable ICU 2.0
  474. */
  475. virtual void transliterate(Replaceable& text, UTransPosition& index,
  476. UErrorCode& status) const;
  477. /**
  478. * Finishes any pending transliterations that were waiting for
  479. * more characters. Clients should call this method as the last
  480. * call after a sequence of one or more calls to
  481. * <code>transliterate()</code>.
  482. * @param text the buffer holding transliterated and
  483. * untransliterated text.
  484. * @param index the array of indices previously passed to {@link
  485. * #transliterate }
  486. * @stable ICU 2.0
  487. */
  488. virtual void finishTransliteration(Replaceable& text,
  489. UTransPosition& index) const;
  490. private:
  491. /**
  492. * This internal method does incremental transliteration. If the
  493. * 'insertion' is non-null then we append it to 'text' before
  494. * proceeding. This method calls through to the pure virtual
  495. * framework method handleTransliterate() to do the actual
  496. * work.
  497. * @param text the buffer holding transliterated and
  498. * untransliterated text
  499. * @param index an array of three integers. See {@link
  500. * #transliterate(Replaceable, int[], String)}.
  501. * @param insertion text to be inserted and possibly
  502. * transliterated into the translation buffer at
  503. * <code>index.limit</code>.
  504. * @param status Output param to filled in with a success or an error.
  505. */
  506. void _transliterate(Replaceable& text,
  507. UTransPosition& index,
  508. const UnicodeString* insertion,
  509. UErrorCode &status) const;
  510. protected:
  511. /**
  512. * Abstract method that concrete subclasses define to implement
  513. * their transliteration algorithm. This method handles both
  514. * incremental and non-incremental transliteration. Let
  515. * <code>originalStart</code> refer to the value of
  516. * <code>pos.start</code> upon entry.
  517. *
  518. * <ul>
  519. * <li>If <code>incremental</code> is false, then this method
  520. * should transliterate all characters between
  521. * <code>pos.start</code> and <code>pos.limit</code>. Upon return
  522. * <code>pos.start</code> must == <code> pos.limit</code>.</li>
  523. *
  524. * <li>If <code>incremental</code> is true, then this method
  525. * should transliterate all characters between
  526. * <code>pos.start</code> and <code>pos.limit</code> that can be
  527. * unambiguously transliterated, regardless of future insertions
  528. * of text at <code>pos.limit</code>. Upon return,
  529. * <code>pos.start</code> should be in the range
  530. * [<code>originalStart</code>, <code>pos.limit</code>).
  531. * <code>pos.start</code> should be positioned such that
  532. * characters [<code>originalStart</code>, <code>
  533. * pos.start</code>) will not be changed in the future by this
  534. * transliterator and characters [<code>pos.start</code>,
  535. * <code>pos.limit</code>) are unchanged.</li>
  536. * </ul>
  537. *
  538. * <p>Implementations of this method should also obey the
  539. * following invariants:</p>
  540. *
  541. * <ul>
  542. * <li> <code>pos.limit</code> and <code>pos.contextLimit</code>
  543. * should be updated to reflect changes in length of the text
  544. * between <code>pos.start</code> and <code>pos.limit</code>. The
  545. * difference <code> pos.contextLimit - pos.limit</code> should
  546. * not change.</li>
  547. *
  548. * <li><code>pos.contextStart</code> should not change.</li>
  549. *
  550. * <li>Upon return, neither <code>pos.start</code> nor
  551. * <code>pos.limit</code> should be less than
  552. * <code>originalStart</code>.</li>
  553. *
  554. * <li>Text before <code>originalStart</code> and text after
  555. * <code>pos.limit</code> should not change.</li>
  556. *
  557. * <li>Text before <code>pos.contextStart</code> and text after
  558. * <code> pos.contextLimit</code> should be ignored.</li>
  559. * </ul>
  560. *
  561. * <p>Subclasses may safely assume that all characters in
  562. * [<code>pos.start</code>, <code>pos.limit</code>) are filtered.
  563. * In other words, the filter has already been applied by the time
  564. * this method is called. See
  565. * <code>filteredTransliterate()</code>.
  566. *
  567. * <p>This method is <b>not</b> for public consumption. Calling
  568. * this method directly will transliterate
  569. * [<code>pos.start</code>, <code>pos.limit</code>) without
  570. * applying the filter. End user code should call <code>
  571. * transliterate()</code> instead of this method. Subclass code
  572. * and wrapping transliterators should call
  573. * <code>filteredTransliterate()</code> instead of this method.<p>
  574. *
  575. * @param text the buffer holding transliterated and
  576. * untransliterated text
  577. *
  578. * @param pos the indices indicating the start, limit, context
  579. * start, and context limit of the text.
  580. *
  581. * @param incremental if true, assume more text may be inserted at
  582. * <code>pos.limit</code> and act accordingly. Otherwise,
  583. * transliterate all text between <code>pos.start</code> and
  584. * <code>pos.limit</code> and move <code>pos.start</code> up to
  585. * <code>pos.limit</code>.
  586. *
  587. * @see #transliterate
  588. * @stable ICU 2.4
  589. */
  590. virtual void handleTransliterate(Replaceable& text,
  591. UTransPosition& pos,
  592. UBool incremental) const = 0;
  593. public:
  594. /**
  595. * Transliterate a substring of text, as specified by index, taking filters
  596. * into account. This method is for subclasses that need to delegate to
  597. * another transliterator, such as CompoundTransliterator.
  598. * @param text the text to be transliterated
  599. * @param index the position indices
  600. * @param incremental if TRUE, then assume more characters may be inserted
  601. * at index.limit, and postpone processing to accomodate future incoming
  602. * characters
  603. * @stable ICU 2.4
  604. */
  605. virtual void filteredTransliterate(Replaceable& text,
  606. UTransPosition& index,
  607. UBool incremental) const;
  608. private:
  609. /**
  610. * Top-level transliteration method, handling filtering, incremental and
  611. * non-incremental transliteration, and rollback. All transliteration
  612. * public API methods eventually call this method with a rollback argument
  613. * of TRUE. Other entities may call this method but rollback should be
  614. * FALSE.
  615. *
  616. * <p>If this transliterator has a filter, break up the input text into runs
  617. * of unfiltered characters. Pass each run to
  618. * subclass.handleTransliterate().
  619. *
  620. * <p>In incremental mode, if rollback is TRUE, perform a special
  621. * incremental procedure in which several passes are made over the input
  622. * text, adding one character at a time, and committing successful
  623. * transliterations as they occur. Unsuccessful transliterations are rolled
  624. * back and retried with additional characters to give correct results.
  625. *
  626. * @param text the text to be transliterated
  627. * @param index the position indices
  628. * @param incremental if TRUE, then assume more characters may be inserted
  629. * at index.limit, and postpone processing to accomodate future incoming
  630. * characters
  631. * @param rollback if TRUE and if incremental is TRUE, then perform special
  632. * incremental processing, as described above, and undo partial
  633. * transliterations where necessary. If incremental is FALSE then this
  634. * parameter is ignored.
  635. */
  636. virtual void filteredTransliterate(Replaceable& text,
  637. UTransPosition& index,
  638. UBool incremental,
  639. UBool rollback) const;
  640. public:
  641. /**
  642. * Returns the length of the longest context required by this transliterator.
  643. * This is <em>preceding</em> context. The default implementation supplied
  644. * by <code>Transliterator</code> returns zero; subclasses
  645. * that use preceding context should override this method to return the
  646. * correct value. For example, if a transliterator translates "ddd" (where
  647. * d is any digit) to "555" when preceded by "(ddd)", then the preceding
  648. * context length is 5, the length of "(ddd)".
  649. *
  650. * @return The maximum number of preceding context characters this
  651. * transliterator needs to examine
  652. * @stable ICU 2.0
  653. */
  654. int32_t getMaximumContextLength(void) const;
  655. protected:
  656. /**
  657. * Method for subclasses to use to set the maximum context length.
  658. * @param maxContextLength the new value to be set.
  659. * @see #getMaximumContextLength
  660. * @stable ICU 2.4
  661. */
  662. void setMaximumContextLength(int32_t maxContextLength);
  663. public:
  664. /**
  665. * Returns a programmatic identifier for this transliterator.
  666. * If this identifier is passed to <code>createInstance()</code>, it
  667. * will return this object, if it has been registered.
  668. * @return a programmatic identifier for this transliterator.
  669. * @see #registerInstance
  670. * @see #registerFactory
  671. * @see #getAvailableIDs
  672. * @stable ICU 2.0
  673. */
  674. virtual const UnicodeString& getID(void) const;
  675. /**
  676. * Returns a name for this transliterator that is appropriate for
  677. * display to the user in the default locale. See {@link
  678. * #getDisplayName } for details.
  679. * @param ID the string identifier for this transliterator
  680. * @param result Output param to receive the display name
  681. * @return A reference to 'result'.
  682. * @stable ICU 2.0
  683. */
  684. static UnicodeString& U_EXPORT2 getDisplayName(const UnicodeString& ID,
  685. UnicodeString& result);
  686. /**
  687. * Returns a name for this transliterator that is appropriate for
  688. * display to the user in the given locale. This name is taken
  689. * from the locale resource data in the standard manner of the
  690. * <code>java.text</code> package.
  691. *
  692. * <p>If no localized names exist in the system resource bundles,
  693. * a name is synthesized using a localized
  694. * <code>MessageFormat</code> pattern from the resource data. The
  695. * arguments to this pattern are an integer followed by one or two
  696. * strings. The integer is the number of strings, either 1 or 2.
  697. * The strings are formed by splitting the ID for this
  698. * transliterator at the first '-'. If there is no '-', then the
  699. * entire ID forms the only string.
  700. * @param ID the string identifier for this transliterator
  701. * @param inLocale the Locale in which the display name should be
  702. * localized.
  703. * @param result Output param to receive the display name
  704. * @return A reference to 'result'.
  705. * @stable ICU 2.0
  706. */
  707. static UnicodeString& U_EXPORT2 getDisplayName(const UnicodeString& ID,
  708. const Locale& inLocale,
  709. UnicodeString& result);
  710. /**
  711. * Returns the filter used by this transliterator, or <tt>NULL</tt>
  712. * if this transliterator uses no filter.
  713. * @return the filter used by this transliterator, or <tt>NULL</tt>
  714. * if this transliterator uses no filter.
  715. * @stable ICU 2.0
  716. */
  717. const UnicodeFilter* getFilter(void) const;
  718. /**
  719. * Returns the filter used by this transliterator, or <tt>NULL</tt> if this
  720. * transliterator uses no filter. The caller must eventually delete the
  721. * result. After this call, this transliterator's filter is set to
  722. * <tt>NULL</tt>.
  723. * @return the filter used by this transliterator, or <tt>NULL</tt> if this
  724. * transliterator uses no filter.
  725. * @stable ICU 2.4
  726. */
  727. UnicodeFilter* orphanFilter(void);
  728. /**
  729. * Changes the filter used by this transliterator. If the filter
  730. * is set to <tt>null</tt> then no filtering will occur.
  731. *
  732. * <p>Callers must take care if a transliterator is in use by
  733. * multiple threads. The filter should not be changed by one
  734. * thread while another thread may be transliterating.
  735. * @param adoptedFilter the new filter to be adopted.
  736. * @stable ICU 2.0
  737. */
  738. void adoptFilter(UnicodeFilter* adoptedFilter);
  739. /**
  740. * Returns this transliterator's inverse. See the class
  741. * documentation for details. This implementation simply inverts
  742. * the two entities in the ID and attempts to retrieve the
  743. * resulting transliterator. That is, if <code>getID()</code>
  744. * returns "A-B", then this method will return the result of
  745. * <code>createInstance("B-A")</code>, or <code>null</code> if that
  746. * call fails.
  747. *
  748. * <p>Subclasses with knowledge of their inverse may wish to
  749. * override this method.
  750. *
  751. * @param status Output param to filled in with a success or an error.
  752. * @return a transliterator that is an inverse, not necessarily
  753. * exact, of this transliterator, or <code>null</code> if no such
  754. * transliterator is registered.
  755. * @see #registerInstance
  756. * @stable ICU 2.0
  757. */
  758. Transliterator* createInverse(UErrorCode& status) const;
  759. /**
  760. * Returns a <code>Transliterator</code> object given its ID.
  761. * The ID must be either a system transliterator ID or a ID registered
  762. * using <code>registerInstance()</code>.
  763. *
  764. * @param ID a valid ID, as enumerated by <code>getAvailableIDs()</code>
  765. * @param dir either FORWARD or REVERSE.
  766. * @param parseError Struct to recieve information on position
  767. * of error if an error is encountered
  768. * @param status Output param to filled in with a success or an error.
  769. * @return A <code>Transliterator</code> object with the given ID
  770. * @see #registerInstance
  771. * @see #getAvailableIDs
  772. * @see #getID
  773. * @stable ICU 2.0
  774. */
  775. static Transliterator* U_EXPORT2 createInstance(const UnicodeString& ID,
  776. UTransDirection dir,
  777. UParseError& parseError,
  778. UErrorCode& status);
  779. /**
  780. * Returns a <code>Transliterator</code> object given its ID.
  781. * The ID must be either a system transliterator ID or a ID registered
  782. * using <code>registerInstance()</code>.
  783. * @param ID a valid ID, as enumerated by <code>getAvailableIDs()</code>
  784. * @param dir either FORWARD or REVERSE.
  785. * @param status Output param to filled in with a success or an error.
  786. * @return A <code>Transliterator</code> object with the given ID
  787. * @stable ICU 2.0
  788. */
  789. static Transliterator* U_EXPORT2 createInstance(const UnicodeString& ID,
  790. UTransDirection dir,
  791. UErrorCode& status);
  792. /**
  793. * Returns a <code>Transliterator</code> object constructed from
  794. * the given rule string. This will be a RuleBasedTransliterator,
  795. * if the rule string contains only rules, or a
  796. * CompoundTransliterator, if it contains ID blocks, or a
  797. * NullTransliterator, if it contains ID blocks which parse as
  798. * empty for the given direction.
  799. * @param ID the id for the transliterator.
  800. * @param rules rules, separated by ';'
  801. * @param dir either FORWARD or REVERSE.
  802. * @param parseError Struct to recieve information on position
  803. * of error if an error is encountered
  804. * @param status Output param set to success/failure code.
  805. * @stable ICU 2.0
  806. */
  807. static Transliterator* U_EXPORT2 createFromRules(const UnicodeString& ID,
  808. const UnicodeString& rules,
  809. UTransDirection dir,
  810. UParseError& parseError,
  811. UErrorCode& status);
  812. /**
  813. * Create a rule string that can be passed to createFromRules()
  814. * to recreate this transliterator.
  815. * @param result the string to receive the rules. Previous
  816. * contents will be deleted.
  817. * @param escapeUnprintable if TRUE then convert unprintable
  818. * character to their hex escape representations, \\uxxxx or
  819. * \\Uxxxxxxxx. Unprintable characters are those other than
  820. * U+000A, U+0020..U+007E.
  821. * @stable ICU 2.0
  822. */
  823. virtual UnicodeString& toRules(UnicodeString& result,
  824. UBool escapeUnprintable) const;
  825. /**
  826. * Return the number of elements that make up this transliterator.
  827. * For example, if the transliterator "NFD;Jamo-Latin;Latin-Greek"
  828. * were created, the return value of this method would be 3.
  829. *
  830. * <p>If this transliterator is not composed of other
  831. * transliterators, then this method returns 1.
  832. * @return the number of transliterators that compose this
  833. * transliterator, or 1 if this transliterator is not composed of
  834. * multiple transliterators
  835. * @stable ICU 3.0
  836. */
  837. int32_t countElements() const;
  838. /**
  839. * Return an element that makes up this transliterator. For
  840. * example, if the transliterator "NFD;Jamo-Latin;Latin-Greek"
  841. * were created, the return value of this method would be one
  842. * of the three transliterator objects that make up that
  843. * transliterator: [NFD, Jamo-Latin, Latin-Greek].
  844. *
  845. * <p>If this transliterator is not composed of other
  846. * transliterators, then this method will return a reference to
  847. * this transliterator when given the index 0.
  848. * @param index a value from 0..countElements()-1 indicating the
  849. * transliterator to return
  850. * @param ec input-output error code
  851. * @return one of the transliterators that makes up this
  852. * transliterator, if this transliterator is made up of multiple
  853. * transliterators, otherwise a reference to this object if given
  854. * an index of 0
  855. * @stable ICU 3.0
  856. */
  857. const Transliterator& getElement(int32_t index, UErrorCode& ec) const;
  858. /**
  859. * Returns the set of all characters that may be modified in the
  860. * input text by this Transliterator. This incorporates this
  861. * object's current filter; if the filter is changed, the return
  862. * value of this function will change. The default implementation
  863. * returns an empty set. Some subclasses may override {@link
  864. * #handleGetSourceSet } to return a more precise result. The
  865. * return result is approximate in any case and is intended for
  866. * use by tests, tools, or utilities.
  867. * @param result receives result set; previous contents lost
  868. * @return a reference to result
  869. * @see #getTargetSet
  870. * @see #handleGetSourceSet
  871. * @stable ICU 2.4
  872. */
  873. UnicodeSet& getSourceSet(UnicodeSet& result) const;
  874. /**
  875. * Framework method that returns the set of all characters that
  876. * may be modified in the input text by this Transliterator,
  877. * ignoring the effect of this object's filter. The base class
  878. * implementation returns the empty set. Subclasses that wish to
  879. * implement this should override this method.
  880. * @return the set of characters that this transliterator may
  881. * modify. The set may be modified, so subclasses should return a
  882. * newly-created object.
  883. * @param result receives result set; previous contents lost
  884. * @see #getSourceSet
  885. * @see #getTargetSet
  886. * @stable ICU 2.4
  887. */
  888. virtual void handleGetSourceSet(UnicodeSet& result) const;
  889. /**
  890. * Returns the set of all characters that may be generated as
  891. * replacement text by this transliterator. The default
  892. * implementation returns the empty set. Some subclasses may
  893. * override this method to return a more precise result. The
  894. * return result is approximate in any case and is intended for
  895. * use by tests, tools, or utilities requiring such
  896. * meta-information.
  897. * @param result receives result set; previous contents lost
  898. * @return a reference to result
  899. * @see #getTargetSet
  900. * @stable ICU 2.4
  901. */
  902. virtual UnicodeSet& getTargetSet(UnicodeSet& result) const;
  903. public:
  904. /**
  905. * Registers a factory function that creates transliterators of
  906. * a given ID.
  907. *
  908. * Because ICU may choose to cache Transliterators internally, this must
  909. * be called at application startup, prior to any calls to
  910. * Transliterator::createXXX to avoid undefined behavior.
  911. *
  912. * @param id the ID being registered
  913. * @param factory a function pointer that will be copied and
  914. * called later when the given ID is passed to createInstance()
  915. * @param context a context pointer that will be stored and
  916. * later passed to the factory function when an ID matching
  917. * the registration ID is being instantiated with this factory.
  918. * @stable ICU 2.0
  919. */
  920. static void U_EXPORT2 registerFactory(const UnicodeString& id,
  921. Factory factory,
  922. Token context);
  923. /**
  924. * Registers an instance <tt>obj</tt> of a subclass of
  925. * <code>Transliterator</code> with the system. When
  926. * <tt>createInstance()</tt> is called with an ID string that is
  927. * equal to <tt>obj->getID()</tt>, then <tt>obj->clone()</tt> is
  928. * returned.
  929. *
  930. * After this call the Transliterator class owns the adoptedObj
  931. * and will delete it.
  932. *
  933. * Because ICU may choose to cache Transliterators internally, this must
  934. * be called at application startup, prior to any calls to
  935. * Transliterator::createXXX to avoid undefined behavior.
  936. *
  937. * @param adoptedObj an instance of subclass of
  938. * <code>Transliterator</code> that defines <tt>clone()</tt>
  939. * @see #createInstance
  940. * @see #registerFactory
  941. * @see #unregister
  942. * @stable ICU 2.0
  943. */
  944. static void U_EXPORT2 registerInstance(Transliterator* adoptedObj);
  945. /**
  946. * Registers an ID string as an alias of another ID string.
  947. * That is, after calling this function, <tt>createInstance(aliasID)</tt>
  948. * will return the same thing as <tt>createInstance(realID)</tt>.
  949. * This is generally used to create shorter, more mnemonic aliases
  950. * for long compound IDs.
  951. *
  952. * @param aliasID The new ID being registered.
  953. * @param realID The ID that the new ID is to be an alias for.
  954. * This can be a compound ID and can include filters and should
  955. * refer to transliterators that have already been registered with
  956. * the framework, although this isn't checked.
  957. * @stable ICU 3.6
  958. */
  959. static void U_EXPORT2 registerAlias(const UnicodeString& aliasID,
  960. const UnicodeString& realID);
  961. protected:
  962. #ifndef U_HIDE_INTERNAL_API
  963. /**
  964. * @param id the ID being registered
  965. * @param factory a function pointer that will be copied and
  966. * called later when the given ID is passed to createInstance()
  967. * @param context a context pointer that will be stored and
  968. * later passed to the factory function when an ID matching
  969. * the registration ID is being instantiated with this factory.
  970. * @internal
  971. */
  972. static void _registerFactory(const UnicodeString& id,
  973. Factory factory,
  974. Token context);
  975. /**
  976. * @internal
  977. */
  978. static void _registerInstance(Transliterator* adoptedObj);
  979. /**
  980. * @internal
  981. */
  982. static void _registerAlias(const UnicodeString& aliasID, const UnicodeString& realID);
  983. /**
  984. * Register two targets as being inverses of one another. For
  985. * example, calling registerSpecialInverse("NFC", "NFD", true) causes
  986. * Transliterator to form the following inverse relationships:
  987. *
  988. * <pre>NFC => NFD
  989. * Any-NFC => Any-NFD
  990. * NFD => NFC
  991. * Any-NFD => Any-NFC</pre>
  992. *
  993. * (Without the special inverse registration, the inverse of NFC
  994. * would be NFC-Any.) Note that NFD is shorthand for Any-NFD, but
  995. * that the presence or absence of "Any-" is preserved.
  996. *
  997. * <p>The relationship is symmetrical; registering (a, b) is
  998. * equivalent to registering (b, a).
  999. *
  1000. * <p>The relevant IDs must still be registered separately as
  1001. * factories or classes.
  1002. *
  1003. * <p>Only the targets are specified. Special inverses always
  1004. * have the form Any-Target1 <=> Any-Target2. The target should
  1005. * have canonical casing (the casing desired to be produced when
  1006. * an inverse is formed) and should contain no whitespace or other
  1007. * extraneous characters.
  1008. *
  1009. * @param target the target against which to register the inverse
  1010. * @param inverseTarget the inverse of target, that is
  1011. * Any-target.getInverse() => Any-inverseTarget
  1012. * @param bidirectional if true, register the reverse relation
  1013. * as well, that is, Any-inverseTarget.getInverse() => Any-target
  1014. * @internal
  1015. */
  1016. static void _registerSpecialInverse(const UnicodeString& target,
  1017. const UnicodeString& inverseTarget,
  1018. UBool bidirectional);
  1019. #endif /* U_HIDE_INTERNAL_API */
  1020. public:
  1021. /**
  1022. * Unregisters a transliterator or class. This may be either
  1023. * a system transliterator or a user transliterator or class.
  1024. * Any attempt to construct an unregistered transliterator based
  1025. * on its ID will fail.
  1026. *
  1027. * Because ICU may choose to cache Transliterators internally, this should
  1028. * be called during application shutdown, after all calls to
  1029. * Transliterator::createXXX to avoid undefined behavior.
  1030. *
  1031. * @param ID the ID of the transliterator or class
  1032. * @return the <code>Object</code> that was registered with
  1033. * <code>ID</code>, or <code>null</code> if none was
  1034. * @see #registerInstance
  1035. * @see #registerFactory
  1036. * @stable ICU 2.0
  1037. */
  1038. static void U_EXPORT2 unregister(const UnicodeString& ID);
  1039. public:
  1040. /**
  1041. * Return a StringEnumeration over the IDs available at the time of the
  1042. * call, including user-registered IDs.
  1043. * @param ec input-output error code
  1044. * @return a newly-created StringEnumeration over the transliterators
  1045. * available at the time of the call. The caller should delete this object
  1046. * when done using it.
  1047. * @stable ICU 3.0
  1048. */
  1049. static StringEnumeration* U_EXPORT2 getAvailableIDs(UErrorCode& ec);
  1050. /**
  1051. * Return the number of registered source specifiers.
  1052. * @return the number of registered source specifiers.
  1053. * @stable ICU 2.0
  1054. */
  1055. static int32_t U_EXPORT2 countAvailableSources(void);
  1056. /**
  1057. * Return a registered source specifier.
  1058. * @param index which specifier to return, from 0 to n-1, where
  1059. * n = countAvailableSources()
  1060. * @param result fill-in paramter to receive the source specifier.
  1061. * If index is out of range, result will be empty.
  1062. * @return reference to result
  1063. * @stable ICU 2.0
  1064. */
  1065. static UnicodeString& U_EXPORT2 getAvailableSource(int32_t index,
  1066. UnicodeString& result);
  1067. /**
  1068. * Return the number of registered target specifiers for a given
  1069. * source specifier.
  1070. * @param source the given source specifier.
  1071. * @return the number of registered target specifiers for a given
  1072. * source specifier.
  1073. * @stable ICU 2.0
  1074. */
  1075. static int32_t U_EXPORT2 countAvailableTargets(const UnicodeString& source);
  1076. /**
  1077. * Return a registered target specifier for a given source.
  1078. * @param index which specifier to return, from 0 to n-1, where
  1079. * n = countAvailableTargets(source)
  1080. * @param source the source specifier
  1081. * @param result fill-in paramter to receive the target specifier.
  1082. * If source is invalid or if index is out of range, result will
  1083. * be empty.
  1084. * @return reference to result
  1085. * @stable ICU 2.0
  1086. */
  1087. static UnicodeString& U_EXPORT2 getAvailableTarget(int32_t index,
  1088. const UnicodeString& source,
  1089. UnicodeString& result);
  1090. /**
  1091. * Return the number of registered variant specifiers for a given
  1092. * source-target pair.
  1093. * @param source the source specifiers.
  1094. * @param target the target specifiers.
  1095. * @stable ICU 2.0
  1096. */
  1097. static int32_t U_EXPORT2 countAvailableVariants(const UnicodeString& source,
  1098. const UnicodeString& target);
  1099. /**
  1100. * Return a registered variant specifier for a given source-target
  1101. * pair.
  1102. * @param index which specifier to return, from 0 to n-1, where
  1103. * n = countAvailableVariants(source, target)
  1104. * @param source the source specifier
  1105. * @param target the target specifier
  1106. * @param result fill-in paramter to receive the variant
  1107. * specifier. If source is invalid or if target is invalid or if
  1108. * index is out of range, result will be empty.
  1109. * @return reference to result
  1110. * @stable ICU 2.0
  1111. */
  1112. static UnicodeString& U_EXPORT2 getAvailableVariant(int32_t index,
  1113. const UnicodeString& source,
  1114. const UnicodeString& target,
  1115. UnicodeString& result);
  1116. protected:
  1117. #ifndef U_HIDE_INTERNAL_API
  1118. /**
  1119. * Non-mutexed internal method
  1120. * @internal
  1121. */
  1122. static int32_t _countAvailableSources(void);
  1123. /**
  1124. * Non-mutexed internal method
  1125. * @internal
  1126. */
  1127. static UnicodeString& _getAvailableSource(int32_t index,
  1128. UnicodeString& result);
  1129. /**
  1130. * Non-mutexed internal method
  1131. * @internal
  1132. */
  1133. static int32_t _countAvailableTargets(const UnicodeString& source);
  1134. /**
  1135. * Non-mutexed internal method
  1136. * @internal
  1137. */
  1138. static UnicodeString& _getAvailableTarget(int32_t index,
  1139. const UnicodeString& source,
  1140. UnicodeString& result);
  1141. /**
  1142. * Non-mutexed internal method
  1143. * @internal
  1144. */
  1145. static int32_t _countAvailableVariants(const UnicodeString& source,
  1146. const UnicodeString& target);
  1147. /**
  1148. * Non-mutexed internal method
  1149. * @internal
  1150. */
  1151. static UnicodeString& _getAvailableVariant(int32_t index,
  1152. const UnicodeString& source,
  1153. const UnicodeString& target,
  1154. UnicodeString& result);
  1155. #endif /* U_HIDE_INTERNAL_API */
  1156. protected:
  1157. /**
  1158. * Set the ID of this transliterators. Subclasses shouldn't do
  1159. * this, unless the underlying script behavior has changed.
  1160. * @param id the new id t to be set.
  1161. * @stable ICU 2.4
  1162. */
  1163. void setID(const UnicodeString& id);
  1164. public:
  1165. /**
  1166. * Return the class ID for this class. This is useful only for
  1167. * comparing to a return value from getDynamicClassID().
  1168. * Note that Transliterator is an abstract base class, and therefor
  1169. * no fully constructed object will have a dynamic
  1170. * UCLassID that equals the UClassID returned from
  1171. * TRansliterator::getStaticClassID().
  1172. * @return The class ID for class Transliterator.
  1173. * @stable ICU 2.0
  1174. */
  1175. static UClassID U_EXPORT2 getStaticClassID(void);
  1176. /**
  1177. * Returns a unique class ID <b>polymorphically</b>. This method
  1178. * is to implement a simple version of RTTI, since not all C++
  1179. * compilers support genuine RTTI. Polymorphic operator==() and
  1180. * clone() methods call this method.
  1181. *
  1182. * <p>Concrete subclasses of Transliterator must use the
  1183. * UOBJECT_DEFINE_RTTI_IMPLEMENTATION macro from
  1184. * uobject.h to provide the RTTI functions.
  1185. *
  1186. * @return The class ID for this object. All objects of a given
  1187. * class have the same class ID. Objects of other classes have
  1188. * different class IDs.
  1189. * @stable ICU 2.0
  1190. */
  1191. virtual UClassID getDynamicClassID(void) const = 0;
  1192. private:
  1193. static UBool initializeRegistry(UErrorCode &status);
  1194. public:
  1195. #ifndef U_HIDE_OBSOLETE_API
  1196. /**
  1197. * Return the number of IDs currently registered with the system.
  1198. * To retrieve the actual IDs, call getAvailableID(i) with
  1199. * i from 0 to countAvailableIDs() - 1.
  1200. * @return the number of IDs currently registered with the system.
  1201. * @obsolete ICU 3.4 use getAvailableIDs() instead
  1202. */
  1203. static int32_t U_EXPORT2 countAvailableIDs(void);
  1204. /**
  1205. * Return the index-th available ID. index must be between 0
  1206. * and countAvailableIDs() - 1, inclusive. If index is out of
  1207. * range, the result of getAvailableID(0) is returned.
  1208. * @param index the given ID index.
  1209. * @return the index-th available ID. index must be between 0
  1210. * and countAvailableIDs() - 1, inclusive. If index is out of
  1211. * range, the result of getAvailableID(0) is returned.
  1212. * @obsolete ICU 3.4 use getAvailableIDs() instead; this function
  1213. * is not thread safe, since it returns a reference to storage that
  1214. * may become invalid if another thread calls unregister
  1215. */
  1216. static const UnicodeString& U_EXPORT2 getAvailableID(int32_t index);
  1217. #endif /* U_HIDE_OBSOLETE_API */
  1218. };
  1219. inline int32_t Transliterator::getMaximumContextLength(void) const {
  1220. return maximumContextLength;
  1221. }
  1222. inline void Transliterator::setID(const UnicodeString& id) {
  1223. ID = id;
  1224. // NUL-terminate the ID string, which is a non-aliased copy.
  1225. ID.append((UChar)0);
  1226. ID.truncate(ID.length()-1);
  1227. }
  1228. #ifndef U_HIDE_INTERNAL_API
  1229. inline Transliterator::Token Transliterator::integerToken(int32_t i) {
  1230. Token t;
  1231. t.integer = i;
  1232. return t;
  1233. }
  1234. inline Transliterator::Token Transliterator::pointerToken(void* p) {
  1235. Token t;
  1236. t.pointer = p;
  1237. return t;
  1238. }
  1239. #endif /* U_HIDE_INTERNAL_API */
  1240. U_NAMESPACE_END
  1241. #endif /* #if !UCONFIG_NO_TRANSLITERATION */
  1242. #endif