udata.h 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430
  1. /*
  2. ******************************************************************************
  3. *
  4. * Copyright (C) 1999-2014, International Business Machines
  5. * Corporation and others. All Rights Reserved.
  6. *
  7. ******************************************************************************
  8. * file name: udata.h
  9. * encoding: US-ASCII
  10. * tab size: 8 (not used)
  11. * indentation:4
  12. *
  13. * created on: 1999oct25
  14. * created by: Markus W. Scherer
  15. */
  16. #ifndef __UDATA_H__
  17. #define __UDATA_H__
  18. #include "unicode/utypes.h"
  19. #include "unicode/localpointer.h"
  20. U_CDECL_BEGIN
  21. /**
  22. * \file
  23. * \brief C API: Data loading interface
  24. *
  25. * <h2>Information about data loading interface</h2>
  26. *
  27. * This API is used to find and efficiently load data for ICU and applications
  28. * using ICU. It provides an abstract interface that specifies a data type and
  29. * name to find and load the data. Normally this API is used by other ICU APIs
  30. * to load required data out of the ICU data library, but it can be used to
  31. * load data out of other places.
  32. *
  33. * See the User Guide Data Management chapter.
  34. */
  35. #ifndef U_HIDE_INTERNAL_API
  36. /**
  37. * Character used to separate package names from tree names
  38. * @internal ICU 3.0
  39. */
  40. #define U_TREE_SEPARATOR '-'
  41. /**
  42. * String used to separate package names from tree names
  43. * @internal ICU 3.0
  44. */
  45. #define U_TREE_SEPARATOR_STRING "-"
  46. /**
  47. * Character used to separate parts of entry names
  48. * @internal ICU 3.0
  49. */
  50. #define U_TREE_ENTRY_SEP_CHAR '/'
  51. /**
  52. * String used to separate parts of entry names
  53. * @internal ICU 3.0
  54. */
  55. #define U_TREE_ENTRY_SEP_STRING "/"
  56. /**
  57. * Alias for standard ICU data
  58. * @internal ICU 3.0
  59. */
  60. #define U_ICUDATA_ALIAS "ICUDATA"
  61. #endif /* U_HIDE_INTERNAL_API */
  62. /**
  63. * UDataInfo contains the properties about the requested data.
  64. * This is meta data.
  65. *
  66. * <p>This structure may grow in the future, indicated by the
  67. * <code>size</code> field.</p>
  68. *
  69. * <p>ICU data must be at least 8-aligned, and should be 16-aligned.
  70. * The UDataInfo struct begins 4 bytes after the start of the data item,
  71. * so it is 4-aligned.
  72. *
  73. * <p>The platform data property fields help determine if a data
  74. * file can be efficiently used on a given machine.
  75. * The particular fields are of importance only if the data
  76. * is affected by the properties - if there is integer data
  77. * with word sizes > 1 byte, char* text, or UChar* text.</p>
  78. *
  79. * <p>The implementation for the <code>udata_open[Choice]()</code>
  80. * functions may reject data based on the value in <code>isBigEndian</code>.
  81. * No other field is used by the <code>udata</code> API implementation.</p>
  82. *
  83. * <p>The <code>dataFormat</code> may be used to identify
  84. * the kind of data, e.g. a converter table.</p>
  85. *
  86. * <p>The <code>formatVersion</code> field should be used to
  87. * make sure that the format can be interpreted.
  88. * It may be a good idea to check only for the one or two highest
  89. * of the version elements to allow the data memory to
  90. * get more or somewhat rearranged contents, for as long
  91. * as the using code can still interpret the older contents.</p>
  92. *
  93. * <p>The <code>dataVersion</code> field is intended to be a
  94. * common place to store the source version of the data;
  95. * for data from the Unicode character database, this could
  96. * reflect the Unicode version.</p>
  97. *
  98. * @stable ICU 2.0
  99. */
  100. typedef struct {
  101. /** sizeof(UDataInfo)
  102. * @stable ICU 2.0 */
  103. uint16_t size;
  104. /** unused, set to 0
  105. * @stable ICU 2.0*/
  106. uint16_t reservedWord;
  107. /* platform data properties */
  108. /** 0 for little-endian machine, 1 for big-endian
  109. * @stable ICU 2.0 */
  110. uint8_t isBigEndian;
  111. /** see U_CHARSET_FAMILY values in utypes.h
  112. * @stable ICU 2.0*/
  113. uint8_t charsetFamily;
  114. /** sizeof(UChar), one of { 1, 2, 4 }
  115. * @stable ICU 2.0*/
  116. uint8_t sizeofUChar;
  117. /** unused, set to 0
  118. * @stable ICU 2.0*/
  119. uint8_t reservedByte;
  120. /** data format identifier
  121. * @stable ICU 2.0*/
  122. uint8_t dataFormat[4];
  123. /** versions: [0] major [1] minor [2] milli [3] micro
  124. * @stable ICU 2.0*/
  125. uint8_t formatVersion[4];
  126. /** versions: [0] major [1] minor [2] milli [3] micro
  127. * @stable ICU 2.0*/
  128. uint8_t dataVersion[4];
  129. } UDataInfo;
  130. /* API for reading data -----------------------------------------------------*/
  131. /**
  132. * Forward declaration of the data memory type.
  133. * @stable ICU 2.0
  134. */
  135. typedef struct UDataMemory UDataMemory;
  136. /**
  137. * Callback function for udata_openChoice().
  138. * @param context parameter passed into <code>udata_openChoice()</code>.
  139. * @param type The type of the data as passed into <code>udata_openChoice()</code>.
  140. * It may be <code>NULL</code>.
  141. * @param name The name of the data as passed into <code>udata_openChoice()</code>.
  142. * @param pInfo A pointer to the <code>UDataInfo</code> structure
  143. * of data that has been loaded and will be returned
  144. * by <code>udata_openChoice()</code> if this function
  145. * returns <code>TRUE</code>.
  146. * @return TRUE if the current data memory is acceptable
  147. * @stable ICU 2.0
  148. */
  149. typedef UBool U_CALLCONV
  150. UDataMemoryIsAcceptable(void *context,
  151. const char *type, const char *name,
  152. const UDataInfo *pInfo);
  153. /**
  154. * Convenience function.
  155. * This function works the same as <code>udata_openChoice</code>
  156. * except that any data that matches the type and name
  157. * is assumed to be acceptable.
  158. * @param path Specifies an absolute path and/or a basename for the
  159. * finding of the data in the file system.
  160. * <code>NULL</code> for ICU data.
  161. * @param type A string that specifies the type of data to be loaded.
  162. * For example, resource bundles are loaded with type "res",
  163. * conversion tables with type "cnv".
  164. * This may be <code>NULL</code> or empty.
  165. * @param name A string that specifies the name of the data.
  166. * @param pErrorCode An ICU UErrorCode parameter. It must not be <code>NULL</code>.
  167. * @return A pointer (handle) to a data memory object, or <code>NULL</code>
  168. * if an error occurs. Call <code>udata_getMemory()</code>
  169. * to get a pointer to the actual data.
  170. *
  171. * @see udata_openChoice
  172. * @stable ICU 2.0
  173. */
  174. U_STABLE UDataMemory * U_EXPORT2
  175. udata_open(const char *path, const char *type, const char *name,
  176. UErrorCode *pErrorCode);
  177. /**
  178. * Data loading function.
  179. * This function is used to find and load efficiently data for
  180. * ICU and applications using ICU.
  181. * It provides an abstract interface that allows to specify a data
  182. * type and name to find and load the data.
  183. *
  184. * <p>The implementation depends on platform properties and user preferences
  185. * and may involve loading shared libraries (DLLs), mapping
  186. * files into memory, or fopen()/fread() files.
  187. * It may also involve using static memory or database queries etc.
  188. * Several or all data items may be combined into one entity
  189. * (DLL, memory-mappable file).</p>
  190. *
  191. * <p>The data is always preceded by a header that includes
  192. * a <code>UDataInfo</code> structure.
  193. * The caller's <code>isAcceptable()</code> function is called to make
  194. * sure that the data is useful. It may be called several times if it
  195. * rejects the data and there is more than one location with data
  196. * matching the type and name.</p>
  197. *
  198. * <p>If <code>path==NULL</code>, then ICU data is loaded.
  199. * Otherwise, it is separated into a basename and a basename-less directory string.
  200. * The basename is used as the data package name, and the directory is
  201. * logically prepended to the ICU data directory string.</p>
  202. *
  203. * <p>For details about ICU data loading see the User Guide
  204. * Data Management chapter. (http://icu-project.org/userguide/icudata.html)</p>
  205. *
  206. * @param path Specifies an absolute path and/or a basename for the
  207. * finding of the data in the file system.
  208. * <code>NULL</code> for ICU data.
  209. * @param type A string that specifies the type of data to be loaded.
  210. * For example, resource bundles are loaded with type "res",
  211. * conversion tables with type "cnv".
  212. * This may be <code>NULL</code> or empty.
  213. * @param name A string that specifies the name of the data.
  214. * @param isAcceptable This function is called to verify that loaded data
  215. * is useful for the client code. If it returns FALSE
  216. * for all data items, then <code>udata_openChoice()</code>
  217. * will return with an error.
  218. * @param context Arbitrary parameter to be passed into isAcceptable.
  219. * @param pErrorCode An ICU UErrorCode parameter. It must not be <code>NULL</code>.
  220. * @return A pointer (handle) to a data memory object, or <code>NULL</code>
  221. * if an error occurs. Call <code>udata_getMemory()</code>
  222. * to get a pointer to the actual data.
  223. * @stable ICU 2.0
  224. */
  225. U_STABLE UDataMemory * U_EXPORT2
  226. udata_openChoice(const char *path, const char *type, const char *name,
  227. UDataMemoryIsAcceptable *isAcceptable, void *context,
  228. UErrorCode *pErrorCode);
  229. /**
  230. * Close the data memory.
  231. * This function must be called to allow the system to
  232. * release resources associated with this data memory.
  233. * @param pData The pointer to data memory object
  234. * @stable ICU 2.0
  235. */
  236. U_STABLE void U_EXPORT2
  237. udata_close(UDataMemory *pData);
  238. #if U_SHOW_CPLUSPLUS_API
  239. U_NAMESPACE_BEGIN
  240. /**
  241. * \class LocalUDataMemoryPointer
  242. * "Smart pointer" class, closes a UDataMemory via udata_close().
  243. * For most methods see the LocalPointerBase base class.
  244. *
  245. * @see LocalPointerBase
  246. * @see LocalPointer
  247. * @stable ICU 4.4
  248. */
  249. U_DEFINE_LOCAL_OPEN_POINTER(LocalUDataMemoryPointer, UDataMemory, udata_close);
  250. U_NAMESPACE_END
  251. #endif
  252. /**
  253. * Get the pointer to the actual data inside the data memory.
  254. * The data is read-only.
  255. *
  256. * ICU data must be at least 8-aligned, and should be 16-aligned.
  257. *
  258. * @param pData The pointer to data memory object
  259. * @stable ICU 2.0
  260. */
  261. U_STABLE const void * U_EXPORT2
  262. udata_getMemory(UDataMemory *pData);
  263. /**
  264. * Get the information from the data memory header.
  265. * This allows to get access to the header containing
  266. * platform data properties etc. which is not part of
  267. * the data itself and can therefore not be accessed
  268. * via the pointer that <code>udata_getMemory()</code> returns.
  269. *
  270. * @param pData pointer to the data memory object
  271. * @param pInfo pointer to a UDataInfo object;
  272. * its <code>size</code> field must be set correctly,
  273. * typically to <code>sizeof(UDataInfo)</code>.
  274. *
  275. * <code>*pInfo</code> will be filled with the UDataInfo structure
  276. * in the data memory object. If this structure is smaller than
  277. * <code>pInfo->size</code>, then the <code>size</code> will be
  278. * adjusted and only part of the structure will be filled.
  279. * @stable ICU 2.0
  280. */
  281. U_STABLE void U_EXPORT2
  282. udata_getInfo(UDataMemory *pData, UDataInfo *pInfo);
  283. /**
  284. * This function bypasses the normal ICU data loading process and
  285. * allows you to force ICU's system data to come out of a user-specified
  286. * area in memory.
  287. *
  288. * ICU data must be at least 8-aligned, and should be 16-aligned.
  289. * See http://userguide.icu-project.org/icudata
  290. *
  291. * The format of this data is that of the icu common data file, as is
  292. * generated by the pkgdata tool with mode=common or mode=dll.
  293. * You can read in a whole common mode file and pass the address to the start of the
  294. * data, or (with the appropriate link options) pass in the pointer to
  295. * the data that has been loaded from a dll by the operating system,
  296. * as shown in this code:
  297. *
  298. * extern const char U_IMPORT U_ICUDATA_ENTRY_POINT [];
  299. * // U_ICUDATA_ENTRY_POINT is same as entry point specified to pkgdata tool
  300. * UErrorCode status = U_ZERO_ERROR;
  301. *
  302. * udata_setCommonData(&U_ICUDATA_ENTRY_POINT, &status);
  303. *
  304. * It is important that the declaration be as above. The entry point
  305. * must not be declared as an extern void*.
  306. *
  307. * Starting with ICU 4.4, it is possible to set several data packages,
  308. * one per call to this function.
  309. * udata_open() will look for data in the multiple data packages in the order
  310. * in which they were set.
  311. * The position of the linked-in or default-name ICU .data package in the
  312. * search list depends on when the first data item is loaded that is not contained
  313. * in the already explicitly set packages.
  314. * If data was loaded implicitly before the first call to this function
  315. * (for example, via opening a converter, constructing a UnicodeString
  316. * from default-codepage data, using formatting or collation APIs, etc.),
  317. * then the default data will be first in the list.
  318. *
  319. * This function has no effect on application (non ICU) data. See udata_setAppData()
  320. * for similar functionality for application data.
  321. *
  322. * @param data pointer to ICU common data
  323. * @param err outgoing error status <code>U_USING_DEFAULT_WARNING, U_UNSUPPORTED_ERROR</code>
  324. * @stable ICU 2.0
  325. */
  326. U_STABLE void U_EXPORT2
  327. udata_setCommonData(const void *data, UErrorCode *err);
  328. /**
  329. * This function bypasses the normal ICU data loading process for application-specific
  330. * data and allows you to force the it to come out of a user-specified
  331. * pointer.
  332. *
  333. * ICU data must be at least 8-aligned, and should be 16-aligned.
  334. * See http://userguide.icu-project.org/icudata
  335. *
  336. * The format of this data is that of the icu common data file, like 'icudt26l.dat'
  337. * or the corresponding shared library (DLL) file.
  338. * The application must read in or otherwise construct an image of the data and then
  339. * pass the address of it to this function.
  340. *
  341. *
  342. * Warning: setAppData will set a U_USING_DEFAULT_WARNING code if
  343. * data with the specifed path that has already been opened, or
  344. * if setAppData with the same path has already been called.
  345. * Any such calls to setAppData will have no effect.
  346. *
  347. *
  348. * @param packageName the package name by which the application will refer
  349. * to (open) this data
  350. * @param data pointer to the data
  351. * @param err outgoing error status <code>U_USING_DEFAULT_WARNING, U_UNSUPPORTED_ERROR</code>
  352. * @see udata_setCommonData
  353. * @stable ICU 2.0
  354. */
  355. U_STABLE void U_EXPORT2
  356. udata_setAppData(const char *packageName, const void *data, UErrorCode *err);
  357. /**
  358. * Possible settings for udata_setFileAccess()
  359. * @see udata_setFileAccess
  360. * @stable ICU 3.4
  361. */
  362. typedef enum UDataFileAccess {
  363. /** ICU looks for data in single files first, then in packages. (default) @stable ICU 3.4 */
  364. UDATA_FILES_FIRST,
  365. /** An alias for the default access mode. @stable ICU 3.4 */
  366. UDATA_DEFAULT_ACCESS = UDATA_FILES_FIRST,
  367. /** ICU only loads data from packages, not from single files. @stable ICU 3.4 */
  368. UDATA_ONLY_PACKAGES,
  369. /** ICU loads data from packages first, and only from single files
  370. if the data cannot be found in a package. @stable ICU 3.4 */
  371. UDATA_PACKAGES_FIRST,
  372. /** ICU does not access the file system for data loading. @stable ICU 3.4 */
  373. UDATA_NO_FILES,
  374. /** Number of real UDataFileAccess values. @stable ICU 3.4 */
  375. UDATA_FILE_ACCESS_COUNT
  376. } UDataFileAccess;
  377. /**
  378. * This function may be called to control how ICU loads data. It must be called
  379. * before any ICU data is loaded, including application data loaded with
  380. * ures/ResourceBundle or udata APIs. This function is not multithread safe.
  381. * The results of calling it while other threads are loading data are undefined.
  382. * @param access The type of file access to be used
  383. * @param status Error code.
  384. * @see UDataFileAccess
  385. * @stable ICU 3.4
  386. */
  387. U_STABLE void U_EXPORT2
  388. udata_setFileAccess(UDataFileAccess access, UErrorCode *status);
  389. U_CDECL_END
  390. #endif