usprep.h 8.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269
  1. /*
  2. *******************************************************************************
  3. *
  4. * Copyright (C) 2003-2014, International Business Machines
  5. * Corporation and others. All Rights Reserved.
  6. *
  7. *******************************************************************************
  8. * file name: usprep.h
  9. * encoding: US-ASCII
  10. * tab size: 8 (not used)
  11. * indentation:4
  12. *
  13. * created on: 2003jul2
  14. * created by: Ram Viswanadha
  15. */
  16. #ifndef __USPREP_H__
  17. #define __USPREP_H__
  18. /**
  19. * \file
  20. * \brief C API: Implements the StringPrep algorithm.
  21. */
  22. #include "unicode/utypes.h"
  23. #include "unicode/localpointer.h"
  24. /**
  25. *
  26. * StringPrep API implements the StingPrep framework as described by RFC 3454.
  27. * StringPrep prepares Unicode strings for use in network protocols.
  28. * Profiles of StingPrep are set of rules and data according to with the
  29. * Unicode Strings are prepared. Each profiles contains tables which describe
  30. * how a code point should be treated. The tables are broadly classied into
  31. * <ul>
  32. * <li> Unassinged Table: Contains code points that are unassigned
  33. * in the Unicode Version supported by StringPrep. Currently
  34. * RFC 3454 supports Unicode 3.2. </li>
  35. * <li> Prohibited Table: Contains code points that are prohibted from
  36. * the output of the StringPrep processing function. </li>
  37. * <li> Mapping Table: Contains code ponts that are deleted from the output or case mapped. </li>
  38. * </ul>
  39. *
  40. * The procedure for preparing Unicode strings:
  41. * <ol>
  42. * <li> Map: For each character in the input, check if it has a mapping
  43. * and, if so, replace it with its mapping. </li>
  44. * <li> Normalize: Possibly normalize the result of step 1 using Unicode
  45. * normalization. </li>
  46. * <li> Prohibit: Check for any characters that are not allowed in the
  47. * output. If any are found, return an error.</li>
  48. * <li> Check bidi: Possibly check for right-to-left characters, and if
  49. * any are found, make sure that the whole string satisfies the
  50. * requirements for bidirectional strings. If the string does not
  51. * satisfy the requirements for bidirectional strings, return an
  52. * error. </li>
  53. * </ol>
  54. * @author Ram Viswanadha
  55. */
  56. #if !UCONFIG_NO_IDNA
  57. #include "unicode/parseerr.h"
  58. /**
  59. * The StringPrep profile
  60. * @stable ICU 2.8
  61. */
  62. typedef struct UStringPrepProfile UStringPrepProfile;
  63. /**
  64. * Option to prohibit processing of unassigned code points in the input
  65. *
  66. * @see usprep_prepare
  67. * @stable ICU 2.8
  68. */
  69. #define USPREP_DEFAULT 0x0000
  70. /**
  71. * Option to allow processing of unassigned code points in the input
  72. *
  73. * @see usprep_prepare
  74. * @stable ICU 2.8
  75. */
  76. #define USPREP_ALLOW_UNASSIGNED 0x0001
  77. /**
  78. * enums for the standard stringprep profile types
  79. * supported by usprep_openByType.
  80. * @see usprep_openByType
  81. * @stable ICU 4.2
  82. */
  83. typedef enum UStringPrepProfileType {
  84. /**
  85. * RFC3491 Nameprep
  86. * @stable ICU 4.2
  87. */
  88. USPREP_RFC3491_NAMEPREP,
  89. /**
  90. * RFC3530 nfs4_cs_prep
  91. * @stable ICU 4.2
  92. */
  93. USPREP_RFC3530_NFS4_CS_PREP,
  94. /**
  95. * RFC3530 nfs4_cs_prep with case insensitive option
  96. * @stable ICU 4.2
  97. */
  98. USPREP_RFC3530_NFS4_CS_PREP_CI,
  99. /**
  100. * RFC3530 nfs4_cis_prep
  101. * @stable ICU 4.2
  102. */
  103. USPREP_RFC3530_NFS4_CIS_PREP,
  104. /**
  105. * RFC3530 nfs4_mixed_prep for prefix
  106. * @stable ICU 4.2
  107. */
  108. USPREP_RFC3530_NFS4_MIXED_PREP_PREFIX,
  109. /**
  110. * RFC3530 nfs4_mixed_prep for suffix
  111. * @stable ICU 4.2
  112. */
  113. USPREP_RFC3530_NFS4_MIXED_PREP_SUFFIX,
  114. /**
  115. * RFC3722 iSCSI
  116. * @stable ICU 4.2
  117. */
  118. USPREP_RFC3722_ISCSI,
  119. /**
  120. * RFC3920 XMPP Nodeprep
  121. * @stable ICU 4.2
  122. */
  123. USPREP_RFC3920_NODEPREP,
  124. /**
  125. * RFC3920 XMPP Resourceprep
  126. * @stable ICU 4.2
  127. */
  128. USPREP_RFC3920_RESOURCEPREP,
  129. /**
  130. * RFC4011 Policy MIB Stringprep
  131. * @stable ICU 4.2
  132. */
  133. USPREP_RFC4011_MIB,
  134. /**
  135. * RFC4013 SASLprep
  136. * @stable ICU 4.2
  137. */
  138. USPREP_RFC4013_SASLPREP,
  139. /**
  140. * RFC4505 trace
  141. * @stable ICU 4.2
  142. */
  143. USPREP_RFC4505_TRACE,
  144. /**
  145. * RFC4518 LDAP
  146. * @stable ICU 4.2
  147. */
  148. USPREP_RFC4518_LDAP,
  149. /**
  150. * RFC4518 LDAP for case ignore, numeric and stored prefix
  151. * matching rules
  152. * @stable ICU 4.2
  153. */
  154. USPREP_RFC4518_LDAP_CI
  155. } UStringPrepProfileType;
  156. /**
  157. * Creates a StringPrep profile from the data file.
  158. *
  159. * @param path string containing the full path pointing to the directory
  160. * where the profile reside followed by the package name
  161. * e.g. "/usr/resource/my_app/profiles/mydata" on a Unix system.
  162. * if NULL, ICU default data files will be used.
  163. * @param fileName name of the profile file to be opened
  164. * @param status ICU error code in/out parameter. Must not be NULL.
  165. * Must fulfill U_SUCCESS before the function call.
  166. * @return Pointer to UStringPrepProfile that is opened. Should be closed by
  167. * calling usprep_close()
  168. * @see usprep_close()
  169. * @stable ICU 2.8
  170. */
  171. U_STABLE UStringPrepProfile* U_EXPORT2
  172. usprep_open(const char* path,
  173. const char* fileName,
  174. UErrorCode* status);
  175. /**
  176. * Creates a StringPrep profile for the specified profile type.
  177. *
  178. * @param type The profile type
  179. * @param status ICU error code in/out parameter. Must not be NULL.
  180. * Must fulfill U_SUCCESS before the function call.
  181. * @return Pointer to UStringPrepProfile that is opened. Should be closed by
  182. * calling usprep_close()
  183. * @see usprep_close()
  184. * @stable ICU 4.2
  185. */
  186. U_STABLE UStringPrepProfile* U_EXPORT2
  187. usprep_openByType(UStringPrepProfileType type,
  188. UErrorCode* status);
  189. /**
  190. * Closes the profile
  191. * @param profile The profile to close
  192. * @stable ICU 2.8
  193. */
  194. U_STABLE void U_EXPORT2
  195. usprep_close(UStringPrepProfile* profile);
  196. #if U_SHOW_CPLUSPLUS_API
  197. U_NAMESPACE_BEGIN
  198. /**
  199. * \class LocalUStringPrepProfilePointer
  200. * "Smart pointer" class, closes a UStringPrepProfile via usprep_close().
  201. * For most methods see the LocalPointerBase base class.
  202. *
  203. * @see LocalPointerBase
  204. * @see LocalPointer
  205. * @stable ICU 4.4
  206. */
  207. U_DEFINE_LOCAL_OPEN_POINTER(LocalUStringPrepProfilePointer, UStringPrepProfile, usprep_close);
  208. U_NAMESPACE_END
  209. #endif
  210. /**
  211. * Prepare the input buffer for use in applications with the given profile. This operation maps, normalizes(NFKC),
  212. * checks for prohited and BiDi characters in the order defined by RFC 3454
  213. * depending on the options specified in the profile.
  214. *
  215. * @param prep The profile to use
  216. * @param src Pointer to UChar buffer containing the string to prepare
  217. * @param srcLength Number of characters in the source string
  218. * @param dest Pointer to the destination buffer to receive the output
  219. * @param destCapacity The capacity of destination array
  220. * @param options A bit set of options:
  221. *
  222. * - USPREP_DEFAULT Prohibit processing of unassigned code points in the input
  223. *
  224. * - USPREP_ALLOW_UNASSIGNED Treat the unassigned code points are in the input
  225. * as normal Unicode code points.
  226. *
  227. * @param parseError Pointer to UParseError struct to receive information on position
  228. * of error if an error is encountered. Can be NULL.
  229. * @param status ICU in/out error code parameter.
  230. * U_INVALID_CHAR_FOUND if src contains
  231. * unmatched single surrogates.
  232. * U_INDEX_OUTOFBOUNDS_ERROR if src contains
  233. * too many code points.
  234. * U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough
  235. * @return The number of UChars in the destination buffer
  236. * @stable ICU 2.8
  237. */
  238. U_STABLE int32_t U_EXPORT2
  239. usprep_prepare( const UStringPrepProfile* prep,
  240. const UChar* src, int32_t srcLength,
  241. UChar* dest, int32_t destCapacity,
  242. int32_t options,
  243. UParseError* parseError,
  244. UErrorCode* status );
  245. #endif /* #if !UCONFIG_NO_IDNA */
  246. #endif