utext.h 58 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600
  1. /*
  2. *******************************************************************************
  3. *
  4. * Copyright (C) 2004-2012, International Business Machines
  5. * Corporation and others. All Rights Reserved.
  6. *
  7. *******************************************************************************
  8. * file name: utext.h
  9. * encoding: US-ASCII
  10. * tab size: 8 (not used)
  11. * indentation:4
  12. *
  13. * created on: 2004oct06
  14. * created by: Markus W. Scherer
  15. */
  16. #ifndef __UTEXT_H__
  17. #define __UTEXT_H__
  18. /**
  19. * \file
  20. * \brief C API: Abstract Unicode Text API
  21. *
  22. * The Text Access API provides a means to allow text that is stored in alternative
  23. * formats to work with ICU services. ICU normally operates on text that is
  24. * stored in UTF-16 format, in (UChar *) arrays for the C APIs or as type
  25. * UnicodeString for C++ APIs.
  26. *
  27. * ICU Text Access allows other formats, such as UTF-8 or non-contiguous
  28. * UTF-16 strings, to be placed in a UText wrapper and then passed to ICU services.
  29. *
  30. * There are three general classes of usage for UText:
  31. *
  32. * Application Level Use. This is the simplest usage - applications would
  33. * use one of the utext_open() functions on their input text, and pass
  34. * the resulting UText to the desired ICU service.
  35. *
  36. * Second is usage in ICU Services, such as break iteration, that will need to
  37. * operate on input presented to them as a UText. These implementations
  38. * will need to use the iteration and related UText functions to gain
  39. * access to the actual text.
  40. *
  41. * The third class of UText users are "text providers." These are the
  42. * UText implementations for the various text storage formats. An application
  43. * or system with a unique text storage format can implement a set of
  44. * UText provider functions for that format, which will then allow
  45. * ICU services to operate on that format.
  46. *
  47. *
  48. * <em>Iterating over text</em>
  49. *
  50. * Here is sample code for a forward iteration over the contents of a UText
  51. *
  52. * \code
  53. * UChar32 c;
  54. * UText *ut = whatever();
  55. *
  56. * for (c=utext_next32From(ut, 0); c>=0; c=utext_next32(ut)) {
  57. * // do whatever with the codepoint c here.
  58. * }
  59. * \endcode
  60. *
  61. * And here is similar code to iterate in the reverse direction, from the end
  62. * of the text towards the beginning.
  63. *
  64. * \code
  65. * UChar32 c;
  66. * UText *ut = whatever();
  67. * int textLength = utext_nativeLength(ut);
  68. * for (c=utext_previous32From(ut, textLength); c>=0; c=utext_previous32(ut)) {
  69. * // do whatever with the codepoint c here.
  70. * }
  71. * \endcode
  72. *
  73. * <em>Characters and Indexing</em>
  74. *
  75. * Indexing into text by UText functions is nearly always in terms of the native
  76. * indexing of the underlying text storage. The storage format could be UTF-8
  77. * or UTF-32, for example. When coding to the UText access API, no assumptions
  78. * can be made regarding the size of characters, or how far an index
  79. * may move when iterating between characters.
  80. *
  81. * All indices supplied to UText functions are pinned to the length of the
  82. * text. An out-of-bounds index is not considered to be an error, but is
  83. * adjusted to be in the range 0 <= index <= length of input text.
  84. *
  85. *
  86. * When an index position is returned from a UText function, it will be
  87. * a native index to the underlying text. In the case of multi-unit characters,
  88. * it will always refer to the first position of the character,
  89. * never to the interior. This is essentially the same thing as saying that
  90. * a returned index will always point to a boundary between characters.
  91. *
  92. * When a native index is supplied to a UText function, all indices that
  93. * refer to any part of a multi-unit character representation are considered
  94. * to be equivalent. In the case of multi-unit characters, an incoming index
  95. * will be logically normalized to refer to the start of the character.
  96. *
  97. * It is possible to test whether a native index is on a code point boundary
  98. * by doing a utext_setNativeIndex() followed by a utext_getNativeIndex().
  99. * If the index is returned unchanged, it was on a code point boundary. If
  100. * an adjusted index is returned, the original index referred to the
  101. * interior of a character.
  102. *
  103. * <em>Conventions for calling UText functions</em>
  104. *
  105. * Most UText access functions have as their first parameter a (UText *) pointer,
  106. * which specifies the UText to be used. Unless otherwise noted, the
  107. * pointer must refer to a valid, open UText. Attempting to
  108. * use a closed UText or passing a NULL pointer is a programming error and
  109. * will produce undefined results or NULL pointer exceptions.
  110. *
  111. * The UText_Open family of functions can either open an existing (closed)
  112. * UText, or heap allocate a new UText. Here is sample code for creating
  113. * a stack-allocated UText.
  114. *
  115. * \code
  116. * char *s = whatever(); // A utf-8 string
  117. * U_ErrorCode status = U_ZERO_ERROR;
  118. * UText ut = UTEXT_INITIALIZER;
  119. * utext_openUTF8(ut, s, -1, &status);
  120. * if (U_FAILURE(status)) {
  121. * // error handling
  122. * } else {
  123. * // work with the UText
  124. * }
  125. * \endcode
  126. *
  127. * Any existing UText passed to an open function _must_ have been initialized,
  128. * either by the UTEXT_INITIALIZER, or by having been originally heap-allocated
  129. * by an open function. Passing NULL will cause the open function to
  130. * heap-allocate and fully initialize a new UText.
  131. *
  132. */
  133. #include "unicode/utypes.h"
  134. #include "unicode/uchar.h"
  135. #if U_SHOW_CPLUSPLUS_API
  136. #include "unicode/localpointer.h"
  137. #include "unicode/rep.h"
  138. #include "unicode/unistr.h"
  139. #include "unicode/chariter.h"
  140. #endif
  141. U_CDECL_BEGIN
  142. struct UText;
  143. typedef struct UText UText; /**< C typedef for struct UText. @stable ICU 3.6 */
  144. /***************************************************************************************
  145. *
  146. * C Functions for creating UText wrappers around various kinds of text strings.
  147. *
  148. ****************************************************************************************/
  149. /**
  150. * Close function for UText instances.
  151. * Cleans up, releases any resources being held by an open UText.
  152. * <p>
  153. * If the UText was originally allocated by one of the utext_open functions,
  154. * the storage associated with the utext will also be freed.
  155. * If the UText storage originated with the application, as it would with
  156. * a local or static instance, the storage will not be deleted.
  157. *
  158. * An open UText can be reset to refer to new string by using one of the utext_open()
  159. * functions without first closing the UText.
  160. *
  161. * @param ut The UText to be closed.
  162. * @return NULL if the UText struct was deleted by the close. If the UText struct
  163. * was originally provided by the caller to the open function, it is
  164. * returned by this function, and may be safely used again in
  165. * a subsequent utext_open.
  166. *
  167. * @stable ICU 3.4
  168. */
  169. U_STABLE UText * U_EXPORT2
  170. utext_close(UText *ut);
  171. #if U_SHOW_CPLUSPLUS_API
  172. U_NAMESPACE_BEGIN
  173. /**
  174. * \class LocalUTextPointer
  175. * "Smart pointer" class, closes a UText via utext_close().
  176. * For most methods see the LocalPointerBase base class.
  177. *
  178. * @see LocalPointerBase
  179. * @see LocalPointer
  180. * @stable ICU 4.4
  181. */
  182. U_DEFINE_LOCAL_OPEN_POINTER(LocalUTextPointer, UText, utext_close);
  183. U_NAMESPACE_END
  184. #endif
  185. /**
  186. * Open a read-only UText implementation for UTF-8 strings.
  187. *
  188. * \htmlonly
  189. * Any invalid UTF-8 in the input will be handled in this way:
  190. * a sequence of bytes that has the form of a truncated, but otherwise valid,
  191. * UTF-8 sequence will be replaced by a single unicode replacement character, \uFFFD.
  192. * Any other illegal bytes will each be replaced by a \uFFFD.
  193. * \endhtmlonly
  194. *
  195. * @param ut Pointer to a UText struct. If NULL, a new UText will be created.
  196. * If non-NULL, must refer to an initialized UText struct, which will then
  197. * be reset to reference the specified UTF-8 string.
  198. * @param s A UTF-8 string. Must not be NULL.
  199. * @param length The length of the UTF-8 string in bytes, or -1 if the string is
  200. * zero terminated.
  201. * @param status Errors are returned here.
  202. * @return A pointer to the UText. If a pre-allocated UText was provided, it
  203. * will always be used and returned.
  204. * @stable ICU 3.4
  205. */
  206. U_STABLE UText * U_EXPORT2
  207. utext_openUTF8(UText *ut, const char *s, int64_t length, UErrorCode *status);
  208. /**
  209. * Open a read-only UText for UChar * string.
  210. *
  211. * @param ut Pointer to a UText struct. If NULL, a new UText will be created.
  212. * If non-NULL, must refer to an initialized UText struct, which will then
  213. * be reset to reference the specified UChar string.
  214. * @param s A UChar (UTF-16) string
  215. * @param length The number of UChars in the input string, or -1 if the string is
  216. * zero terminated.
  217. * @param status Errors are returned here.
  218. * @return A pointer to the UText. If a pre-allocated UText was provided, it
  219. * will always be used and returned.
  220. * @stable ICU 3.4
  221. */
  222. U_STABLE UText * U_EXPORT2
  223. utext_openUChars(UText *ut, const UChar *s, int64_t length, UErrorCode *status);
  224. #if U_SHOW_CPLUSPLUS_API
  225. /**
  226. * Open a writable UText for a non-const UnicodeString.
  227. *
  228. * @param ut Pointer to a UText struct. If NULL, a new UText will be created.
  229. * If non-NULL, must refer to an initialized UText struct, which will then
  230. * be reset to reference the specified input string.
  231. * @param s A UnicodeString.
  232. * @param status Errors are returned here.
  233. * @return Pointer to the UText. If a UText was supplied as input, this
  234. * will always be used and returned.
  235. * @stable ICU 3.4
  236. */
  237. U_STABLE UText * U_EXPORT2
  238. utext_openUnicodeString(UText *ut, icu::UnicodeString *s, UErrorCode *status);
  239. /**
  240. * Open a UText for a const UnicodeString. The resulting UText will not be writable.
  241. *
  242. * @param ut Pointer to a UText struct. If NULL, a new UText will be created.
  243. * If non-NULL, must refer to an initialized UText struct, which will then
  244. * be reset to reference the specified input string.
  245. * @param s A const UnicodeString to be wrapped.
  246. * @param status Errors are returned here.
  247. * @return Pointer to the UText. If a UText was supplied as input, this
  248. * will always be used and returned.
  249. * @stable ICU 3.4
  250. */
  251. U_STABLE UText * U_EXPORT2
  252. utext_openConstUnicodeString(UText *ut, const icu::UnicodeString *s, UErrorCode *status);
  253. /**
  254. * Open a writable UText implementation for an ICU Replaceable object.
  255. * @param ut Pointer to a UText struct. If NULL, a new UText will be created.
  256. * If non-NULL, must refer to an already existing UText, which will then
  257. * be reset to reference the specified replaceable text.
  258. * @param rep A Replaceable text object.
  259. * @param status Errors are returned here.
  260. * @return Pointer to the UText. If a UText was supplied as input, this
  261. * will always be used and returned.
  262. * @see Replaceable
  263. * @stable ICU 3.4
  264. */
  265. U_STABLE UText * U_EXPORT2
  266. utext_openReplaceable(UText *ut, icu::Replaceable *rep, UErrorCode *status);
  267. /**
  268. * Open a UText implementation over an ICU CharacterIterator.
  269. * @param ut Pointer to a UText struct. If NULL, a new UText will be created.
  270. * If non-NULL, must refer to an already existing UText, which will then
  271. * be reset to reference the specified replaceable text.
  272. * @param ci A Character Iterator.
  273. * @param status Errors are returned here.
  274. * @return Pointer to the UText. If a UText was supplied as input, this
  275. * will always be used and returned.
  276. * @see Replaceable
  277. * @stable ICU 3.4
  278. */
  279. U_STABLE UText * U_EXPORT2
  280. utext_openCharacterIterator(UText *ut, icu::CharacterIterator *ci, UErrorCode *status);
  281. #endif
  282. /**
  283. * Clone a UText. This is much like opening a UText where the source text is itself
  284. * another UText.
  285. *
  286. * A deep clone will copy both the UText data structures and the underlying text.
  287. * The original and cloned UText will operate completely independently; modifications
  288. * made to the text in one will not affect the other. Text providers are not
  289. * required to support deep clones. The user of clone() must check the status return
  290. * and be prepared to handle failures.
  291. *
  292. * The standard UText implementations for UTF8, UChar *, UnicodeString and
  293. * Replaceable all support deep cloning.
  294. *
  295. * The UText returned from a deep clone will be writable, assuming that the text
  296. * provider is able to support writing, even if the source UText had been made
  297. * non-writable by means of UText_freeze().
  298. *
  299. * A shallow clone replicates only the UText data structures; it does not make
  300. * a copy of the underlying text. Shallow clones can be used as an efficient way to
  301. * have multiple iterators active in a single text string that is not being
  302. * modified.
  303. *
  304. * A shallow clone operation will not fail, barring truly exceptional conditions such
  305. * as memory allocation failures.
  306. *
  307. * Shallow UText clones should be avoided if the UText functions that modify the
  308. * text are expected to be used, either on the original or the cloned UText.
  309. * Any such modifications can cause unpredictable behavior. Read Only
  310. * shallow clones provide some protection against errors of this type by
  311. * disabling text modification via the cloned UText.
  312. *
  313. * A shallow clone made with the readOnly parameter == FALSE will preserve the
  314. * utext_isWritable() state of the source object. Note, however, that
  315. * write operations must be avoided while more than one UText exists that refer
  316. * to the same underlying text.
  317. *
  318. * A UText and its clone may be safely concurrently accessed by separate threads.
  319. * This is true for read access only with shallow clones, and for both read and
  320. * write access with deep clones.
  321. * It is the responsibility of the Text Provider to ensure that this thread safety
  322. * constraint is met.
  323. *
  324. * @param dest A UText struct to be filled in with the result of the clone operation,
  325. * or NULL if the clone function should heap-allocate a new UText struct.
  326. * If non-NULL, must refer to an already existing UText, which will then
  327. * be reset to become the clone.
  328. * @param src The UText to be cloned.
  329. * @param deep TRUE to request a deep clone, FALSE for a shallow clone.
  330. * @param readOnly TRUE to request that the cloned UText have read only access to the
  331. * underlying text.
  332. * @param status Errors are returned here. For deep clones, U_UNSUPPORTED_ERROR
  333. * will be returned if the text provider is unable to clone the
  334. * original text.
  335. * @return The newly created clone, or NULL if the clone operation failed.
  336. * @stable ICU 3.4
  337. */
  338. U_STABLE UText * U_EXPORT2
  339. utext_clone(UText *dest, const UText *src, UBool deep, UBool readOnly, UErrorCode *status);
  340. /**
  341. * Compare two UText objects for equality.
  342. * UTexts are equal if they are iterating over the same text, and
  343. * have the same iteration position within the text.
  344. * If either or both of the parameters are NULL, the comparison is FALSE.
  345. *
  346. * @param a The first of the two UTexts to compare.
  347. * @param b The other UText to be compared.
  348. * @return TRUE if the two UTexts are equal.
  349. * @stable ICU 3.6
  350. */
  351. U_STABLE UBool U_EXPORT2
  352. utext_equals(const UText *a, const UText *b);
  353. /*****************************************************************************
  354. *
  355. * Functions to work with the text represeted by a UText wrapper
  356. *
  357. *****************************************************************************/
  358. /**
  359. * Get the length of the text. Depending on the characteristics
  360. * of the underlying text representation, this may be expensive.
  361. * @see utext_isLengthExpensive()
  362. *
  363. *
  364. * @param ut the text to be accessed.
  365. * @return the length of the text, expressed in native units.
  366. *
  367. * @stable ICU 3.4
  368. */
  369. U_STABLE int64_t U_EXPORT2
  370. utext_nativeLength(UText *ut);
  371. /**
  372. * Return TRUE if calculating the length of the text could be expensive.
  373. * Finding the length of NUL terminated strings is considered to be expensive.
  374. *
  375. * Note that the value of this function may change
  376. * as the result of other operations on a UText.
  377. * Once the length of a string has been discovered, it will no longer
  378. * be expensive to report it.
  379. *
  380. * @param ut the text to be accessed.
  381. * @return TRUE if determining the length of the text could be time consuming.
  382. * @stable ICU 3.4
  383. */
  384. U_STABLE UBool U_EXPORT2
  385. utext_isLengthExpensive(const UText *ut);
  386. /**
  387. * Returns the code point at the requested index,
  388. * or U_SENTINEL (-1) if it is out of bounds.
  389. *
  390. * If the specified index points to the interior of a multi-unit
  391. * character - one of the trail bytes of a UTF-8 sequence, for example -
  392. * the complete code point will be returned.
  393. *
  394. * The iteration position will be set to the start of the returned code point.
  395. *
  396. * This function is roughly equivalent to the the sequence
  397. * utext_setNativeIndex(index);
  398. * utext_current32();
  399. * (There is a subtle difference if the index is out of bounds by being less than zero -
  400. * utext_setNativeIndex(negative value) sets the index to zero, after which utext_current()
  401. * will return the char at zero. utext_char32At(negative index), on the other hand, will
  402. * return the U_SENTINEL value of -1.)
  403. *
  404. * @param ut the text to be accessed
  405. * @param nativeIndex the native index of the character to be accessed. If the index points
  406. * to other than the first unit of a multi-unit character, it will be adjusted
  407. * to the start of the character.
  408. * @return the code point at the specified index.
  409. * @stable ICU 3.4
  410. */
  411. U_STABLE UChar32 U_EXPORT2
  412. utext_char32At(UText *ut, int64_t nativeIndex);
  413. /**
  414. *
  415. * Get the code point at the current iteration position,
  416. * or U_SENTINEL (-1) if the iteration has reached the end of
  417. * the input text.
  418. *
  419. * @param ut the text to be accessed.
  420. * @return the Unicode code point at the current iterator position.
  421. * @stable ICU 3.4
  422. */
  423. U_STABLE UChar32 U_EXPORT2
  424. utext_current32(UText *ut);
  425. /**
  426. * Get the code point at the current iteration position of the UText, and
  427. * advance the position to the first index following the character.
  428. *
  429. * If the position is at the end of the text (the index following
  430. * the last character, which is also the length of the text),
  431. * return U_SENTINEL (-1) and do not advance the index.
  432. *
  433. * This is a post-increment operation.
  434. *
  435. * An inline macro version of this function, UTEXT_NEXT32(),
  436. * is available for performance critical use.
  437. *
  438. * @param ut the text to be accessed.
  439. * @return the Unicode code point at the iteration position.
  440. * @see UTEXT_NEXT32
  441. * @stable ICU 3.4
  442. */
  443. U_STABLE UChar32 U_EXPORT2
  444. utext_next32(UText *ut);
  445. /**
  446. * Move the iterator position to the character (code point) whose
  447. * index precedes the current position, and return that character.
  448. * This is a pre-decrement operation.
  449. *
  450. * If the initial position is at the start of the text (index of 0)
  451. * return U_SENTINEL (-1), and leave the position unchanged.
  452. *
  453. * An inline macro version of this function, UTEXT_PREVIOUS32(),
  454. * is available for performance critical use.
  455. *
  456. * @param ut the text to be accessed.
  457. * @return the previous UChar32 code point, or U_SENTINEL (-1)
  458. * if the iteration has reached the start of the text.
  459. * @see UTEXT_PREVIOUS32
  460. * @stable ICU 3.4
  461. */
  462. U_STABLE UChar32 U_EXPORT2
  463. utext_previous32(UText *ut);
  464. /**
  465. * Set the iteration index and return the code point at that index.
  466. * Leave the iteration index at the start of the following code point.
  467. *
  468. * This function is the most efficient and convenient way to
  469. * begin a forward iteration. The results are identical to the those
  470. * from the sequence
  471. * \code
  472. * utext_setIndex();
  473. * utext_next32();
  474. * \endcode
  475. *
  476. * @param ut the text to be accessed.
  477. * @param nativeIndex Iteration index, in the native units of the text provider.
  478. * @return Code point which starts at or before index,
  479. * or U_SENTINEL (-1) if it is out of bounds.
  480. * @stable ICU 3.4
  481. */
  482. U_STABLE UChar32 U_EXPORT2
  483. utext_next32From(UText *ut, int64_t nativeIndex);
  484. /**
  485. * Set the iteration index, and return the code point preceding the
  486. * one specified by the initial index. Leave the iteration position
  487. * at the start of the returned code point.
  488. *
  489. * This function is the most efficient and convenient way to
  490. * begin a backwards iteration.
  491. *
  492. * @param ut the text to be accessed.
  493. * @param nativeIndex Iteration index in the native units of the text provider.
  494. * @return Code point preceding the one at the initial index,
  495. * or U_SENTINEL (-1) if it is out of bounds.
  496. *
  497. * @stable ICU 3.4
  498. */
  499. U_STABLE UChar32 U_EXPORT2
  500. utext_previous32From(UText *ut, int64_t nativeIndex);
  501. /**
  502. * Get the current iterator position, which can range from 0 to
  503. * the length of the text.
  504. * The position is a native index into the input text, in whatever format it
  505. * may have (possibly UTF-8 for example), and may not always be the same as
  506. * the corresponding UChar (UTF-16) index.
  507. * The returned position will always be aligned to a code point boundary.
  508. *
  509. * @param ut the text to be accessed.
  510. * @return the current index position, in the native units of the text provider.
  511. * @stable ICU 3.4
  512. */
  513. U_STABLE int64_t U_EXPORT2
  514. utext_getNativeIndex(const UText *ut);
  515. /**
  516. * Set the current iteration position to the nearest code point
  517. * boundary at or preceding the specified index.
  518. * The index is in the native units of the original input text.
  519. * If the index is out of range, it will be pinned to be within
  520. * the range of the input text.
  521. * <p>
  522. * It will usually be more efficient to begin an iteration
  523. * using the functions utext_next32From() or utext_previous32From()
  524. * rather than setIndex().
  525. * <p>
  526. * Moving the index position to an adjacent character is best done
  527. * with utext_next32(), utext_previous32() or utext_moveIndex32().
  528. * Attempting to do direct arithmetic on the index position is
  529. * complicated by the fact that the size (in native units) of a
  530. * character depends on the underlying representation of the character
  531. * (UTF-8, UTF-16, UTF-32, arbitrary codepage), and is not
  532. * easily knowable.
  533. *
  534. * @param ut the text to be accessed.
  535. * @param nativeIndex the native unit index of the new iteration position.
  536. * @stable ICU 3.4
  537. */
  538. U_STABLE void U_EXPORT2
  539. utext_setNativeIndex(UText *ut, int64_t nativeIndex);
  540. /**
  541. * Move the iterator postion by delta code points. The number of code points
  542. * is a signed number; a negative delta will move the iterator backwards,
  543. * towards the start of the text.
  544. * <p>
  545. * The index is moved by <code>delta</code> code points
  546. * forward or backward, but no further backward than to 0 and
  547. * no further forward than to utext_nativeLength().
  548. * The resulting index value will be in between 0 and length, inclusive.
  549. *
  550. * @param ut the text to be accessed.
  551. * @param delta the signed number of code points to move the iteration position.
  552. * @return TRUE if the position could be moved the requested number of positions while
  553. * staying within the range [0 - text length].
  554. * @stable ICU 3.4
  555. */
  556. U_STABLE UBool U_EXPORT2
  557. utext_moveIndex32(UText *ut, int32_t delta);
  558. /**
  559. * Get the native index of the character preceeding the current position.
  560. * If the iteration position is already at the start of the text, zero
  561. * is returned.
  562. * The value returned is the same as that obtained from the following sequence,
  563. * but without the side effect of changing the iteration position.
  564. *
  565. * \code
  566. * UText *ut = whatever;
  567. * ...
  568. * utext_previous(ut)
  569. * utext_getNativeIndex(ut);
  570. * \endcode
  571. *
  572. * This function is most useful during forwards iteration, where it will get the
  573. * native index of the character most recently returned from utext_next().
  574. *
  575. * @param ut the text to be accessed
  576. * @return the native index of the character preceeding the current index position,
  577. * or zero if the current position is at the start of the text.
  578. * @stable ICU 3.6
  579. */
  580. U_STABLE int64_t U_EXPORT2
  581. utext_getPreviousNativeIndex(UText *ut);
  582. /**
  583. *
  584. * Extract text from a UText into a UChar buffer. The range of text to be extracted
  585. * is specified in the native indices of the UText provider. These may not necessarily
  586. * be UTF-16 indices.
  587. * <p>
  588. * The size (number of 16 bit UChars) of the data to be extracted is returned. The
  589. * full number of UChars is returned, even when the extracted text is truncated
  590. * because the specified buffer size is too small.
  591. * <p>
  592. * The extracted string will (if you are a user) / must (if you are a text provider)
  593. * be NUL-terminated if there is sufficient space in the destination buffer. This
  594. * terminating NUL is not included in the returned length.
  595. * <p>
  596. * The iteration index is left at the position following the last extracted character.
  597. *
  598. * @param ut the UText from which to extract data.
  599. * @param nativeStart the native index of the first character to extract.\
  600. * If the specified index is out of range,
  601. * it will be pinned to to be within 0 <= index <= textLength
  602. * @param nativeLimit the native string index of the position following the last
  603. * character to extract. If the specified index is out of range,
  604. * it will be pinned to to be within 0 <= index <= textLength.
  605. * nativeLimit must be >= nativeStart.
  606. * @param dest the UChar (UTF-16) buffer into which the extracted text is placed
  607. * @param destCapacity The size, in UChars, of the destination buffer. May be zero
  608. * for precomputing the required size.
  609. * @param status receives any error status.
  610. * U_BUFFER_OVERFLOW_ERROR: the extracted text was truncated because the
  611. * buffer was too small. Returns number of UChars for preflighting.
  612. * @return Number of UChars in the data to be extracted. Does not include a trailing NUL.
  613. *
  614. * @stable ICU 3.4
  615. */
  616. U_STABLE int32_t U_EXPORT2
  617. utext_extract(UText *ut,
  618. int64_t nativeStart, int64_t nativeLimit,
  619. UChar *dest, int32_t destCapacity,
  620. UErrorCode *status);
  621. /************************************************************************************
  622. *
  623. * #define inline versions of selected performance-critical text access functions
  624. * Caution: do not use auto increment++ or decrement-- expressions
  625. * as parameters to these macros.
  626. *
  627. * For most use, where there is no extreme performance constraint, the
  628. * normal, non-inline functions are a better choice. The resulting code
  629. * will be smaller, and, if the need ever arises, easier to debug.
  630. *
  631. * These are implemented as #defines rather than real functions
  632. * because there is no fully portable way to do inline functions in plain C.
  633. *
  634. ************************************************************************************/
  635. #ifndef U_HIDE_INTERNAL_API
  636. /**
  637. * inline version of utext_current32(), for performance-critical situations.
  638. *
  639. * Get the code point at the current iteration position of the UText.
  640. * Returns U_SENTINEL (-1) if the position is at the end of the
  641. * text.
  642. *
  643. * @internal ICU 4.4 technology preview
  644. */
  645. #define UTEXT_CURRENT32(ut) \
  646. ((ut)->chunkOffset < (ut)->chunkLength && ((ut)->chunkContents)[(ut)->chunkOffset]<0xd800 ? \
  647. ((ut)->chunkContents)[((ut)->chunkOffset)] : utext_current32(ut))
  648. #endif /* U_HIDE_INTERNAL_API */
  649. /**
  650. * inline version of utext_next32(), for performance-critical situations.
  651. *
  652. * Get the code point at the current iteration position of the UText, and
  653. * advance the position to the first index following the character.
  654. * This is a post-increment operation.
  655. * Returns U_SENTINEL (-1) if the position is at the end of the
  656. * text.
  657. *
  658. * @stable ICU 3.4
  659. */
  660. #define UTEXT_NEXT32(ut) \
  661. ((ut)->chunkOffset < (ut)->chunkLength && ((ut)->chunkContents)[(ut)->chunkOffset]<0xd800 ? \
  662. ((ut)->chunkContents)[((ut)->chunkOffset)++] : utext_next32(ut))
  663. /**
  664. * inline version of utext_previous32(), for performance-critical situations.
  665. *
  666. * Move the iterator position to the character (code point) whose
  667. * index precedes the current position, and return that character.
  668. * This is a pre-decrement operation.
  669. * Returns U_SENTINEL (-1) if the position is at the start of the text.
  670. *
  671. * @stable ICU 3.4
  672. */
  673. #define UTEXT_PREVIOUS32(ut) \
  674. ((ut)->chunkOffset > 0 && \
  675. (ut)->chunkContents[(ut)->chunkOffset-1] < 0xd800 ? \
  676. (ut)->chunkContents[--((ut)->chunkOffset)] : utext_previous32(ut))
  677. /**
  678. * inline version of utext_getNativeIndex(), for performance-critical situations.
  679. *
  680. * Get the current iterator position, which can range from 0 to
  681. * the length of the text.
  682. * The position is a native index into the input text, in whatever format it
  683. * may have (possibly UTF-8 for example), and may not always be the same as
  684. * the corresponding UChar (UTF-16) index.
  685. * The returned position will always be aligned to a code point boundary.
  686. *
  687. * @stable ICU 3.6
  688. */
  689. #define UTEXT_GETNATIVEINDEX(ut) \
  690. ((ut)->chunkOffset <= (ut)->nativeIndexingLimit? \
  691. (ut)->chunkNativeStart+(ut)->chunkOffset : \
  692. (ut)->pFuncs->mapOffsetToNative(ut))
  693. /**
  694. * inline version of utext_setNativeIndex(), for performance-critical situations.
  695. *
  696. * Set the current iteration position to the nearest code point
  697. * boundary at or preceding the specified index.
  698. * The index is in the native units of the original input text.
  699. * If the index is out of range, it will be pinned to be within
  700. * the range of the input text.
  701. *
  702. * @stable ICU 3.8
  703. */
  704. #define UTEXT_SETNATIVEINDEX(ut, ix) \
  705. { int64_t __offset = (ix) - (ut)->chunkNativeStart; \
  706. if (__offset>=0 && __offset<=(int64_t)(ut)->nativeIndexingLimit) { \
  707. (ut)->chunkOffset=(int32_t)__offset; \
  708. } else { \
  709. utext_setNativeIndex((ut), (ix)); } }
  710. /************************************************************************************
  711. *
  712. * Functions related to writing or modifying the text.
  713. * These will work only with modifiable UTexts. Attempting to
  714. * modify a read-only UText will return an error status.
  715. *
  716. ************************************************************************************/
  717. /**
  718. * Return TRUE if the text can be written (modified) with utext_replace() or
  719. * utext_copy(). For the text to be writable, the text provider must
  720. * be of a type that supports writing and the UText must not be frozen.
  721. *
  722. * Attempting to modify text when utext_isWriteable() is FALSE will fail -
  723. * the text will not be modified, and an error will be returned from the function
  724. * that attempted the modification.
  725. *
  726. * @param ut the UText to be tested.
  727. * @return TRUE if the text is modifiable.
  728. *
  729. * @see utext_freeze()
  730. * @see utext_replace()
  731. * @see utext_copy()
  732. * @stable ICU 3.4
  733. *
  734. */
  735. U_STABLE UBool U_EXPORT2
  736. utext_isWritable(const UText *ut);
  737. /**
  738. * Test whether there is meta data associated with the text.
  739. * @see Replaceable::hasMetaData()
  740. *
  741. * @param ut The UText to be tested
  742. * @return TRUE if the underlying text includes meta data.
  743. * @stable ICU 3.4
  744. */
  745. U_STABLE UBool U_EXPORT2
  746. utext_hasMetaData(const UText *ut);
  747. /**
  748. * Replace a range of the original text with a replacement text.
  749. *
  750. * Leaves the current iteration position at the position following the
  751. * newly inserted replacement text.
  752. *
  753. * This function is only available on UText types that support writing,
  754. * that is, ones where utext_isWritable() returns TRUE.
  755. *
  756. * When using this function, there should be only a single UText opened onto the
  757. * underlying native text string. Behavior after a replace operation
  758. * on a UText is undefined for any other additional UTexts that refer to the
  759. * modified string.
  760. *
  761. * @param ut the UText representing the text to be operated on.
  762. * @param nativeStart the native index of the start of the region to be replaced
  763. * @param nativeLimit the native index of the character following the region to be replaced.
  764. * @param replacementText pointer to the replacement text
  765. * @param replacementLength length of the replacement text, or -1 if the text is NUL terminated.
  766. * @param status receives any error status. Possible errors include
  767. * U_NO_WRITE_PERMISSION
  768. *
  769. * @return The signed number of (native) storage units by which
  770. * the length of the text expanded or contracted.
  771. *
  772. * @stable ICU 3.4
  773. */
  774. U_STABLE int32_t U_EXPORT2
  775. utext_replace(UText *ut,
  776. int64_t nativeStart, int64_t nativeLimit,
  777. const UChar *replacementText, int32_t replacementLength,
  778. UErrorCode *status);
  779. /**
  780. *
  781. * Copy or move a substring from one position to another within the text,
  782. * while retaining any metadata associated with the text.
  783. * This function is used to duplicate or reorder substrings.
  784. * The destination index must not overlap the source range.
  785. *
  786. * The text to be copied or moved is inserted at destIndex;
  787. * it does not replace or overwrite any existing text.
  788. *
  789. * The iteration position is left following the newly inserted text
  790. * at the destination position.
  791. *
  792. * This function is only available on UText types that support writing,
  793. * that is, ones where utext_isWritable() returns TRUE.
  794. *
  795. * When using this function, there should be only a single UText opened onto the
  796. * underlying native text string. Behavior after a copy operation
  797. * on a UText is undefined in any other additional UTexts that refer to the
  798. * modified string.
  799. *
  800. * @param ut The UText representing the text to be operated on.
  801. * @param nativeStart The native index of the start of the region to be copied or moved
  802. * @param nativeLimit The native index of the character position following the region
  803. * to be copied.
  804. * @param destIndex The native destination index to which the source substring is
  805. * copied or moved.
  806. * @param move If TRUE, then the substring is moved, not copied/duplicated.
  807. * @param status receives any error status. Possible errors include U_NO_WRITE_PERMISSION
  808. *
  809. * @stable ICU 3.4
  810. */
  811. U_STABLE void U_EXPORT2
  812. utext_copy(UText *ut,
  813. int64_t nativeStart, int64_t nativeLimit,
  814. int64_t destIndex,
  815. UBool move,
  816. UErrorCode *status);
  817. /**
  818. * <p>
  819. * Freeze a UText. This prevents any modification to the underlying text itself
  820. * by means of functions operating on this UText.
  821. * </p>
  822. * <p>
  823. * Once frozen, a UText can not be unfrozen. The intent is to ensure
  824. * that a the text underlying a frozen UText wrapper cannot be modified via that UText.
  825. * </p>
  826. * <p>
  827. * Caution: freezing a UText will disable changes made via the specific
  828. * frozen UText wrapper only; it will not have any effect on the ability to
  829. * directly modify the text by bypassing the UText. Any such backdoor modifications
  830. * are always an error while UText access is occuring because the underlying
  831. * text can get out of sync with UText's buffering.
  832. * </p>
  833. *
  834. * @param ut The UText to be frozen.
  835. * @see utext_isWritable()
  836. * @stable ICU 3.6
  837. */
  838. U_STABLE void U_EXPORT2
  839. utext_freeze(UText *ut);
  840. /**
  841. * UText provider properties (bit field indexes).
  842. *
  843. * @see UText
  844. * @stable ICU 3.4
  845. */
  846. enum {
  847. /**
  848. * It is potentially time consuming for the provider to determine the length of the text.
  849. * @stable ICU 3.4
  850. */
  851. UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE = 1,
  852. /**
  853. * Text chunks remain valid and usable until the text object is modified or
  854. * deleted, not just until the next time the access() function is called
  855. * (which is the default).
  856. * @stable ICU 3.4
  857. */
  858. UTEXT_PROVIDER_STABLE_CHUNKS = 2,
  859. /**
  860. * The provider supports modifying the text via the replace() and copy()
  861. * functions.
  862. * @see Replaceable
  863. * @stable ICU 3.4
  864. */
  865. UTEXT_PROVIDER_WRITABLE = 3,
  866. /**
  867. * There is meta data associated with the text.
  868. * @see Replaceable::hasMetaData()
  869. * @stable ICU 3.4
  870. */
  871. UTEXT_PROVIDER_HAS_META_DATA = 4,
  872. /**
  873. * Text provider owns the text storage.
  874. * Generally occurs as the result of a deep clone of the UText.
  875. * When closing the UText, the associated text must
  876. * also be closed/deleted/freed/ whatever is appropriate.
  877. * @stable ICU 3.6
  878. */
  879. UTEXT_PROVIDER_OWNS_TEXT = 5
  880. };
  881. /**
  882. * Function type declaration for UText.clone().
  883. *
  884. * clone a UText. Much like opening a UText where the source text is itself
  885. * another UText.
  886. *
  887. * A deep clone will copy both the UText data structures and the underlying text.
  888. * The original and cloned UText will operate completely independently; modifications
  889. * made to the text in one will not effect the other. Text providers are not
  890. * required to support deep clones. The user of clone() must check the status return
  891. * and be prepared to handle failures.
  892. *
  893. * A shallow clone replicates only the UText data structures; it does not make
  894. * a copy of the underlying text. Shallow clones can be used as an efficient way to
  895. * have multiple iterators active in a single text string that is not being
  896. * modified.
  897. *
  898. * A shallow clone operation must not fail except for truly exceptional conditions such
  899. * as memory allocation failures.
  900. *
  901. * A UText and its clone may be safely concurrently accessed by separate threads.
  902. * This is true for both shallow and deep clones.
  903. * It is the responsibility of the Text Provider to ensure that this thread safety
  904. * constraint is met.
  905. *
  906. * @param dest A UText struct to be filled in with the result of the clone operation,
  907. * or NULL if the clone function should heap-allocate a new UText struct.
  908. * @param src The UText to be cloned.
  909. * @param deep TRUE to request a deep clone, FALSE for a shallow clone.
  910. * @param status Errors are returned here. For deep clones, U_UNSUPPORTED_ERROR
  911. * should be returned if the text provider is unable to clone the
  912. * original text.
  913. * @return The newly created clone, or NULL if the clone operation failed.
  914. *
  915. * @stable ICU 3.4
  916. */
  917. typedef UText * U_CALLCONV
  918. UTextClone(UText *dest, const UText *src, UBool deep, UErrorCode *status);
  919. /**
  920. * Function type declaration for UText.nativeLength().
  921. *
  922. * @param ut the UText to get the length of.
  923. * @return the length, in the native units of the original text string.
  924. * @see UText
  925. * @stable ICU 3.4
  926. */
  927. typedef int64_t U_CALLCONV
  928. UTextNativeLength(UText *ut);
  929. /**
  930. * Function type declaration for UText.access(). Get the description of the text chunk
  931. * containing the text at a requested native index. The UText's iteration
  932. * position will be left at the requested index. If the index is out
  933. * of bounds, the iteration position will be left at the start or end
  934. * of the string, as appropriate.
  935. *
  936. * Chunks must begin and end on code point boundaries. A single code point
  937. * comprised of multiple storage units must never span a chunk boundary.
  938. *
  939. *
  940. * @param ut the UText being accessed.
  941. * @param nativeIndex Requested index of the text to be accessed.
  942. * @param forward If TRUE, then the returned chunk must contain text
  943. * starting from the index, so that start<=index<limit.
  944. * If FALSE, then the returned chunk must contain text
  945. * before the index, so that start<index<=limit.
  946. * @return True if the requested index could be accessed. The chunk
  947. * will contain the requested text.
  948. * False value if a chunk cannot be accessed
  949. * (the requested index is out of bounds).
  950. *
  951. * @see UText
  952. * @stable ICU 3.4
  953. */
  954. typedef UBool U_CALLCONV
  955. UTextAccess(UText *ut, int64_t nativeIndex, UBool forward);
  956. /**
  957. * Function type declaration for UText.extract().
  958. *
  959. * Extract text from a UText into a UChar buffer. The range of text to be extracted
  960. * is specified in the native indices of the UText provider. These may not necessarily
  961. * be UTF-16 indices.
  962. * <p>
  963. * The size (number of 16 bit UChars) in the data to be extracted is returned. The
  964. * full amount is returned, even when the specified buffer size is smaller.
  965. * <p>
  966. * The extracted string will (if you are a user) / must (if you are a text provider)
  967. * be NUL-terminated if there is sufficient space in the destination buffer.
  968. *
  969. * @param ut the UText from which to extract data.
  970. * @param nativeStart the native index of the first characer to extract.
  971. * @param nativeLimit the native string index of the position following the last
  972. * character to extract.
  973. * @param dest the UChar (UTF-16) buffer into which the extracted text is placed
  974. * @param destCapacity The size, in UChars, of the destination buffer. May be zero
  975. * for precomputing the required size.
  976. * @param status receives any error status.
  977. * If U_BUFFER_OVERFLOW_ERROR: Returns number of UChars for
  978. * preflighting.
  979. * @return Number of UChars in the data. Does not include a trailing NUL.
  980. *
  981. * @stable ICU 3.4
  982. */
  983. typedef int32_t U_CALLCONV
  984. UTextExtract(UText *ut,
  985. int64_t nativeStart, int64_t nativeLimit,
  986. UChar *dest, int32_t destCapacity,
  987. UErrorCode *status);
  988. /**
  989. * Function type declaration for UText.replace().
  990. *
  991. * Replace a range of the original text with a replacement text.
  992. *
  993. * Leaves the current iteration position at the position following the
  994. * newly inserted replacement text.
  995. *
  996. * This function need only be implemented on UText types that support writing.
  997. *
  998. * When using this function, there should be only a single UText opened onto the
  999. * underlying native text string. The function is responsible for updating the
  1000. * text chunk within the UText to reflect the updated iteration position,
  1001. * taking into account any changes to the underlying string's structure caused
  1002. * by the replace operation.
  1003. *
  1004. * @param ut the UText representing the text to be operated on.
  1005. * @param nativeStart the index of the start of the region to be replaced
  1006. * @param nativeLimit the index of the character following the region to be replaced.
  1007. * @param replacementText pointer to the replacement text
  1008. * @param replacmentLength length of the replacement text in UChars, or -1 if the text is NUL terminated.
  1009. * @param status receives any error status. Possible errors include
  1010. * U_NO_WRITE_PERMISSION
  1011. *
  1012. * @return The signed number of (native) storage units by which
  1013. * the length of the text expanded or contracted.
  1014. *
  1015. * @stable ICU 3.4
  1016. */
  1017. typedef int32_t U_CALLCONV
  1018. UTextReplace(UText *ut,
  1019. int64_t nativeStart, int64_t nativeLimit,
  1020. const UChar *replacementText, int32_t replacmentLength,
  1021. UErrorCode *status);
  1022. /**
  1023. * Function type declaration for UText.copy().
  1024. *
  1025. * Copy or move a substring from one position to another within the text,
  1026. * while retaining any metadata associated with the text.
  1027. * This function is used to duplicate or reorder substrings.
  1028. * The destination index must not overlap the source range.
  1029. *
  1030. * The text to be copied or moved is inserted at destIndex;
  1031. * it does not replace or overwrite any existing text.
  1032. *
  1033. * This function need only be implemented for UText types that support writing.
  1034. *
  1035. * When using this function, there should be only a single UText opened onto the
  1036. * underlying native text string. The function is responsible for updating the
  1037. * text chunk within the UText to reflect the updated iteration position,
  1038. * taking into account any changes to the underlying string's structure caused
  1039. * by the replace operation.
  1040. *
  1041. * @param ut The UText representing the text to be operated on.
  1042. * @param nativeStart The index of the start of the region to be copied or moved
  1043. * @param nativeLimit The index of the character following the region to be replaced.
  1044. * @param nativeDest The destination index to which the source substring is copied or moved.
  1045. * @param move If TRUE, then the substring is moved, not copied/duplicated.
  1046. * @param status receives any error status. Possible errors include U_NO_WRITE_PERMISSION
  1047. *
  1048. * @stable ICU 3.4
  1049. */
  1050. typedef void U_CALLCONV
  1051. UTextCopy(UText *ut,
  1052. int64_t nativeStart, int64_t nativeLimit,
  1053. int64_t nativeDest,
  1054. UBool move,
  1055. UErrorCode *status);
  1056. /**
  1057. * Function type declaration for UText.mapOffsetToNative().
  1058. * Map from the current UChar offset within the current text chunk to
  1059. * the corresponding native index in the original source text.
  1060. *
  1061. * This is required only for text providers that do not use native UTF-16 indexes.
  1062. *
  1063. * @param ut the UText.
  1064. * @return Absolute (native) index corresponding to chunkOffset in the current chunk.
  1065. * The returned native index should always be to a code point boundary.
  1066. *
  1067. * @stable ICU 3.4
  1068. */
  1069. typedef int64_t U_CALLCONV
  1070. UTextMapOffsetToNative(const UText *ut);
  1071. /**
  1072. * Function type declaration for UText.mapIndexToUTF16().
  1073. * Map from a native index to a UChar offset within a text chunk.
  1074. * Behavior is undefined if the native index does not fall within the
  1075. * current chunk.
  1076. *
  1077. * This function is required only for text providers that do not use native UTF-16 indexes.
  1078. *
  1079. * @param ut The UText containing the text chunk.
  1080. * @param nativeIndex Absolute (native) text index, chunk->start<=index<=chunk->limit.
  1081. * @return Chunk-relative UTF-16 offset corresponding to the specified native
  1082. * index.
  1083. *
  1084. * @stable ICU 3.4
  1085. */
  1086. typedef int32_t U_CALLCONV
  1087. UTextMapNativeIndexToUTF16(const UText *ut, int64_t nativeIndex);
  1088. /**
  1089. * Function type declaration for UText.utextClose().
  1090. *
  1091. * A Text Provider close function is only required for provider types that make
  1092. * allocations in their open function (or other functions) that must be
  1093. * cleaned when the UText is closed.
  1094. *
  1095. * The allocation of the UText struct itself and any "extra" storage
  1096. * associated with the UText is handled by the common UText implementation
  1097. * and does not require provider specific cleanup in a close function.
  1098. *
  1099. * Most UText provider implementations do not need to implement this function.
  1100. *
  1101. * @param ut A UText object to be closed.
  1102. *
  1103. * @stable ICU 3.4
  1104. */
  1105. typedef void U_CALLCONV
  1106. UTextClose(UText *ut);
  1107. /**
  1108. * (public) Function dispatch table for UText.
  1109. * Conceptually very much like a C++ Virtual Function Table.
  1110. * This struct defines the organization of the table.
  1111. * Each text provider implementation must provide an
  1112. * actual table that is initialized with the appropriate functions
  1113. * for the type of text being handled.
  1114. * @stable ICU 3.6
  1115. */
  1116. struct UTextFuncs {
  1117. /**
  1118. * (public) Function table size, sizeof(UTextFuncs)
  1119. * Intended for use should the table grow to accomodate added
  1120. * functions in the future, to allow tests for older format
  1121. * function tables that do not contain the extensions.
  1122. *
  1123. * Fields are placed for optimal alignment on
  1124. * 32/64/128-bit-pointer machines, by normally grouping together
  1125. * 4 32-bit fields,
  1126. * 4 pointers,
  1127. * 2 64-bit fields
  1128. * in sequence.
  1129. * @stable ICU 3.6
  1130. */
  1131. int32_t tableSize;
  1132. /**
  1133. * (private) Alignment padding.
  1134. * Do not use, reserved for use by the UText framework only.
  1135. * @internal
  1136. */
  1137. int32_t reserved1, /** @internal */ reserved2, /** @internal */ reserved3;
  1138. /**
  1139. * (public) Function pointer for UTextClone
  1140. *
  1141. * @see UTextClone
  1142. * @stable ICU 3.6
  1143. */
  1144. UTextClone *clone;
  1145. /**
  1146. * (public) function pointer for UTextLength
  1147. * May be expensive to compute!
  1148. *
  1149. * @see UTextLength
  1150. * @stable ICU 3.6
  1151. */
  1152. UTextNativeLength *nativeLength;
  1153. /**
  1154. * (public) Function pointer for UTextAccess.
  1155. *
  1156. * @see UTextAccess
  1157. * @stable ICU 3.6
  1158. */
  1159. UTextAccess *access;
  1160. /**
  1161. * (public) Function pointer for UTextExtract.
  1162. *
  1163. * @see UTextExtract
  1164. * @stable ICU 3.6
  1165. */
  1166. UTextExtract *extract;
  1167. /**
  1168. * (public) Function pointer for UTextReplace.
  1169. *
  1170. * @see UTextReplace
  1171. * @stable ICU 3.6
  1172. */
  1173. UTextReplace *replace;
  1174. /**
  1175. * (public) Function pointer for UTextCopy.
  1176. *
  1177. * @see UTextCopy
  1178. * @stable ICU 3.6
  1179. */
  1180. UTextCopy *copy;
  1181. /**
  1182. * (public) Function pointer for UTextMapOffsetToNative.
  1183. *
  1184. * @see UTextMapOffsetToNative
  1185. * @stable ICU 3.6
  1186. */
  1187. UTextMapOffsetToNative *mapOffsetToNative;
  1188. /**
  1189. * (public) Function pointer for UTextMapNativeIndexToUTF16.
  1190. *
  1191. * @see UTextMapNativeIndexToUTF16
  1192. * @stable ICU 3.6
  1193. */
  1194. UTextMapNativeIndexToUTF16 *mapNativeIndexToUTF16;
  1195. /**
  1196. * (public) Function pointer for UTextClose.
  1197. *
  1198. * @see UTextClose
  1199. * @stable ICU 3.6
  1200. */
  1201. UTextClose *close;
  1202. /**
  1203. * (private) Spare function pointer
  1204. * @internal
  1205. */
  1206. UTextClose *spare1;
  1207. /**
  1208. * (private) Spare function pointer
  1209. * @internal
  1210. */
  1211. UTextClose *spare2;
  1212. /**
  1213. * (private) Spare function pointer
  1214. * @internal
  1215. */
  1216. UTextClose *spare3;
  1217. };
  1218. /**
  1219. * Function dispatch table for UText
  1220. * @see UTextFuncs
  1221. */
  1222. typedef struct UTextFuncs UTextFuncs;
  1223. /**
  1224. * UText struct. Provides the interface between the generic UText access code
  1225. * and the UText provider code that works on specific kinds of
  1226. * text (UTF-8, noncontiguous UTF-16, whatever.)
  1227. *
  1228. * Applications that are using predefined types of text providers
  1229. * to pass text data to ICU services will have no need to view the
  1230. * internals of the UText structs that they open.
  1231. *
  1232. * @stable ICU 3.6
  1233. */
  1234. struct UText {
  1235. /**
  1236. * (private) Magic. Used to help detect when UText functions are handed
  1237. * invalid or unitialized UText structs.
  1238. * utext_openXYZ() functions take an initialized,
  1239. * but not necessarily open, UText struct as an
  1240. * optional fill-in parameter. This magic field
  1241. * is used to check for that initialization.
  1242. * Text provider close functions must NOT clear
  1243. * the magic field because that would prevent
  1244. * reuse of the UText struct.
  1245. * @internal
  1246. */
  1247. uint32_t magic;
  1248. /**
  1249. * (private) Flags for managing the allocation and freeing of
  1250. * memory associated with this UText.
  1251. * @internal
  1252. */
  1253. int32_t flags;
  1254. /**
  1255. * Text provider properties. This set of flags is maintainted by the
  1256. * text provider implementation.
  1257. * @stable ICU 3.4
  1258. */
  1259. int32_t providerProperties;
  1260. /**
  1261. * (public) sizeOfStruct=sizeof(UText)
  1262. * Allows possible backward compatible extension.
  1263. *
  1264. * @stable ICU 3.4
  1265. */
  1266. int32_t sizeOfStruct;
  1267. /* ------ 16 byte alignment boundary ----------- */
  1268. /**
  1269. * (protected) Native index of the first character position following
  1270. * the current chunk.
  1271. * @stable ICU 3.6
  1272. */
  1273. int64_t chunkNativeLimit;
  1274. /**
  1275. * (protected) Size in bytes of the extra space (pExtra).
  1276. * @stable ICU 3.4
  1277. */
  1278. int32_t extraSize;
  1279. /**
  1280. * (protected) The highest chunk offset where native indexing and
  1281. * chunk (UTF-16) indexing correspond. For UTF-16 sources, value
  1282. * will be equal to chunkLength.
  1283. *
  1284. * @stable ICU 3.6
  1285. */
  1286. int32_t nativeIndexingLimit;
  1287. /* ---- 16 byte alignment boundary------ */
  1288. /**
  1289. * (protected) Native index of the first character in the text chunk.
  1290. * @stable ICU 3.6
  1291. */
  1292. int64_t chunkNativeStart;
  1293. /**
  1294. * (protected) Current iteration position within the text chunk (UTF-16 buffer).
  1295. * This is the index to the character that will be returned by utext_next32().
  1296. * @stable ICU 3.6
  1297. */
  1298. int32_t chunkOffset;
  1299. /**
  1300. * (protected) Length the text chunk (UTF-16 buffer), in UChars.
  1301. * @stable ICU 3.6
  1302. */
  1303. int32_t chunkLength;
  1304. /* ---- 16 byte alignment boundary-- */
  1305. /**
  1306. * (protected) pointer to a chunk of text in UTF-16 format.
  1307. * May refer either to original storage of the source of the text, or
  1308. * if conversion was required, to a buffer owned by the UText.
  1309. * @stable ICU 3.6
  1310. */
  1311. const UChar *chunkContents;
  1312. /**
  1313. * (public) Pointer to Dispatch table for accessing functions for this UText.
  1314. * @stable ICU 3.6
  1315. */
  1316. const UTextFuncs *pFuncs;
  1317. /**
  1318. * (protected) Pointer to additional space requested by the
  1319. * text provider during the utext_open operation.
  1320. * @stable ICU 3.4
  1321. */
  1322. void *pExtra;
  1323. /**
  1324. * (protected) Pointer to string or text-containin object or similar.
  1325. * This is the source of the text that this UText is wrapping, in a format
  1326. * that is known to the text provider functions.
  1327. * @stable ICU 3.4
  1328. */
  1329. const void *context;
  1330. /* --- 16 byte alignment boundary--- */
  1331. /**
  1332. * (protected) Pointer fields available for use by the text provider.
  1333. * Not used by UText common code.
  1334. * @stable ICU 3.6
  1335. */
  1336. const void *p;
  1337. /**
  1338. * (protected) Pointer fields available for use by the text provider.
  1339. * Not used by UText common code.
  1340. * @stable ICU 3.6
  1341. */
  1342. const void *q;
  1343. /**
  1344. * (protected) Pointer fields available for use by the text provider.
  1345. * Not used by UText common code.
  1346. * @stable ICU 3.6
  1347. */
  1348. const void *r;
  1349. /**
  1350. * Private field reserved for future use by the UText framework
  1351. * itself. This is not to be touched by the text providers.
  1352. * @internal ICU 3.4
  1353. */
  1354. void *privP;
  1355. /* --- 16 byte alignment boundary--- */
  1356. /**
  1357. * (protected) Integer field reserved for use by the text provider.
  1358. * Not used by the UText framework, or by the client (user) of the UText.
  1359. * @stable ICU 3.4
  1360. */
  1361. int64_t a;
  1362. /**
  1363. * (protected) Integer field reserved for use by the text provider.
  1364. * Not used by the UText framework, or by the client (user) of the UText.
  1365. * @stable ICU 3.4
  1366. */
  1367. int32_t b;
  1368. /**
  1369. * (protected) Integer field reserved for use by the text provider.
  1370. * Not used by the UText framework, or by the client (user) of the UText.
  1371. * @stable ICU 3.4
  1372. */
  1373. int32_t c;
  1374. /* ---- 16 byte alignment boundary---- */
  1375. /**
  1376. * Private field reserved for future use by the UText framework
  1377. * itself. This is not to be touched by the text providers.
  1378. * @internal ICU 3.4
  1379. */
  1380. int64_t privA;
  1381. /**
  1382. * Private field reserved for future use by the UText framework
  1383. * itself. This is not to be touched by the text providers.
  1384. * @internal ICU 3.4
  1385. */
  1386. int32_t privB;
  1387. /**
  1388. * Private field reserved for future use by the UText framework
  1389. * itself. This is not to be touched by the text providers.
  1390. * @internal ICU 3.4
  1391. */
  1392. int32_t privC;
  1393. };
  1394. /**
  1395. * Common function for use by Text Provider implementations to allocate and/or initialize
  1396. * a new UText struct. To be called in the implementation of utext_open() functions.
  1397. * If the supplied UText parameter is null, a new UText struct will be allocated on the heap.
  1398. * If the supplied UText is already open, the provider's close function will be called
  1399. * so that the struct can be reused by the open that is in progress.
  1400. *
  1401. * @param ut pointer to a UText struct to be re-used, or null if a new UText
  1402. * should be allocated.
  1403. * @param extraSpace The amount of additional space to be allocated as part
  1404. * of this UText, for use by types of providers that require
  1405. * additional storage.
  1406. * @param status Errors are returned here.
  1407. * @return pointer to the UText, allocated if necessary, with extra space set up if requested.
  1408. * @stable ICU 3.4
  1409. */
  1410. U_STABLE UText * U_EXPORT2
  1411. utext_setup(UText *ut, int32_t extraSpace, UErrorCode *status);
  1412. #ifndef U_HIDE_INTERNAL_API
  1413. /**
  1414. * @internal
  1415. * Value used to help identify correctly initialized UText structs.
  1416. * Note: must be publicly visible so that UTEXT_INITIALIZER can access it.
  1417. */
  1418. enum {
  1419. UTEXT_MAGIC = 0x345ad82c
  1420. };
  1421. #endif /* U_HIDE_INTERNAL_API */
  1422. /**
  1423. * initializer to be used with local (stack) instances of a UText
  1424. * struct. UText structs must be initialized before passing
  1425. * them to one of the utext_open functions.
  1426. *
  1427. * @stable ICU 3.6
  1428. */
  1429. #define UTEXT_INITIALIZER { \
  1430. UTEXT_MAGIC, /* magic */ \
  1431. 0, /* flags */ \
  1432. 0, /* providerProps */ \
  1433. sizeof(UText), /* sizeOfStruct */ \
  1434. 0, /* chunkNativeLimit */ \
  1435. 0, /* extraSize */ \
  1436. 0, /* nativeIndexingLimit */ \
  1437. 0, /* chunkNativeStart */ \
  1438. 0, /* chunkOffset */ \
  1439. 0, /* chunkLength */ \
  1440. NULL, /* chunkContents */ \
  1441. NULL, /* pFuncs */ \
  1442. NULL, /* pExtra */ \
  1443. NULL, /* context */ \
  1444. NULL, NULL, NULL, /* p, q, r */ \
  1445. NULL, /* privP */ \
  1446. 0, 0, 0, /* a, b, c */ \
  1447. 0, 0, 0 /* privA,B,C, */ \
  1448. }
  1449. U_CDECL_END
  1450. #endif