idna.h 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323
  1. /*
  2. *******************************************************************************
  3. * Copyright (C) 2010-2012, International Business Machines
  4. * Corporation and others. All Rights Reserved.
  5. *******************************************************************************
  6. * file name: idna.h
  7. * encoding: US-ASCII
  8. * tab size: 8 (not used)
  9. * indentation:4
  10. *
  11. * created on: 2010mar05
  12. * created by: Markus W. Scherer
  13. */
  14. #ifndef __IDNA_H__
  15. #define __IDNA_H__
  16. /**
  17. * \file
  18. * \brief C++ API: Internationalizing Domain Names in Applications (IDNA)
  19. */
  20. #include "unicode/utypes.h"
  21. #if !UCONFIG_NO_IDNA
  22. #include "unicode/bytestream.h"
  23. #include "unicode/stringpiece.h"
  24. #include "unicode/uidna.h"
  25. #include "unicode/unistr.h"
  26. U_NAMESPACE_BEGIN
  27. class IDNAInfo;
  28. /**
  29. * Abstract base class for IDNA processing.
  30. * See http://www.unicode.org/reports/tr46/
  31. * and http://www.ietf.org/rfc/rfc3490.txt
  32. *
  33. * The IDNA class is not intended for public subclassing.
  34. *
  35. * This C++ API currently only implements UTS #46.
  36. * The uidna.h C API implements both UTS #46 (functions using UIDNA service object)
  37. * and IDNA2003 (functions that do not use a service object).
  38. * @stable ICU 4.6
  39. */
  40. class U_COMMON_API IDNA : public UObject {
  41. public:
  42. /**
  43. * Destructor.
  44. * @stable ICU 4.6
  45. */
  46. ~IDNA();
  47. /**
  48. * Returns an IDNA instance which implements UTS #46.
  49. * Returns an unmodifiable instance, owned by the caller.
  50. * Cache it for multiple operations, and delete it when done.
  51. * The instance is thread-safe, that is, it can be used concurrently.
  52. *
  53. * UTS #46 defines Unicode IDNA Compatibility Processing,
  54. * updated to the latest version of Unicode and compatible with both
  55. * IDNA2003 and IDNA2008.
  56. *
  57. * The worker functions use transitional processing, including deviation mappings,
  58. * unless UIDNA_NONTRANSITIONAL_TO_ASCII or UIDNA_NONTRANSITIONAL_TO_UNICODE
  59. * is used in which case the deviation characters are passed through without change.
  60. *
  61. * Disallowed characters are mapped to U+FFFD.
  62. *
  63. * For available options see the uidna.h header.
  64. * Operations with the UTS #46 instance do not support the
  65. * UIDNA_ALLOW_UNASSIGNED option.
  66. *
  67. * By default, the UTS #46 implementation allows all ASCII characters (as valid or mapped).
  68. * When the UIDNA_USE_STD3_RULES option is used, ASCII characters other than
  69. * letters, digits, hyphen (LDH) and dot/full stop are disallowed and mapped to U+FFFD.
  70. *
  71. * @param options Bit set to modify the processing and error checking.
  72. * See option bit set values in uidna.h.
  73. * @param errorCode Standard ICU error code. Its input value must
  74. * pass the U_SUCCESS() test, or else the function returns
  75. * immediately. Check for U_FAILURE() on output or use with
  76. * function chaining. (See User Guide for details.)
  77. * @return the UTS #46 IDNA instance, if successful
  78. * @stable ICU 4.6
  79. */
  80. static IDNA *
  81. createUTS46Instance(uint32_t options, UErrorCode &errorCode);
  82. /**
  83. * Converts a single domain name label into its ASCII form for DNS lookup.
  84. * If any processing step fails, then info.hasErrors() will be TRUE and
  85. * the result might not be an ASCII string.
  86. * The label might be modified according to the types of errors.
  87. * Labels with severe errors will be left in (or turned into) their Unicode form.
  88. *
  89. * The UErrorCode indicates an error only in exceptional cases,
  90. * such as a U_MEMORY_ALLOCATION_ERROR.
  91. *
  92. * @param label Input domain name label
  93. * @param dest Destination string object
  94. * @param info Output container of IDNA processing details.
  95. * @param errorCode Standard ICU error code. Its input value must
  96. * pass the U_SUCCESS() test, or else the function returns
  97. * immediately. Check for U_FAILURE() on output or use with
  98. * function chaining. (See User Guide for details.)
  99. * @return dest
  100. * @stable ICU 4.6
  101. */
  102. virtual UnicodeString &
  103. labelToASCII(const UnicodeString &label, UnicodeString &dest,
  104. IDNAInfo &info, UErrorCode &errorCode) const = 0;
  105. /**
  106. * Converts a single domain name label into its Unicode form for human-readable display.
  107. * If any processing step fails, then info.hasErrors() will be TRUE.
  108. * The label might be modified according to the types of errors.
  109. *
  110. * The UErrorCode indicates an error only in exceptional cases,
  111. * such as a U_MEMORY_ALLOCATION_ERROR.
  112. *
  113. * @param label Input domain name label
  114. * @param dest Destination string object
  115. * @param info Output container of IDNA processing details.
  116. * @param errorCode Standard ICU error code. Its input value must
  117. * pass the U_SUCCESS() test, or else the function returns
  118. * immediately. Check for U_FAILURE() on output or use with
  119. * function chaining. (See User Guide for details.)
  120. * @return dest
  121. * @stable ICU 4.6
  122. */
  123. virtual UnicodeString &
  124. labelToUnicode(const UnicodeString &label, UnicodeString &dest,
  125. IDNAInfo &info, UErrorCode &errorCode) const = 0;
  126. /**
  127. * Converts a whole domain name into its ASCII form for DNS lookup.
  128. * If any processing step fails, then info.hasErrors() will be TRUE and
  129. * the result might not be an ASCII string.
  130. * The domain name might be modified according to the types of errors.
  131. * Labels with severe errors will be left in (or turned into) their Unicode form.
  132. *
  133. * The UErrorCode indicates an error only in exceptional cases,
  134. * such as a U_MEMORY_ALLOCATION_ERROR.
  135. *
  136. * @param name Input domain name
  137. * @param dest Destination string object
  138. * @param info Output container of IDNA processing details.
  139. * @param errorCode Standard ICU error code. Its input value must
  140. * pass the U_SUCCESS() test, or else the function returns
  141. * immediately. Check for U_FAILURE() on output or use with
  142. * function chaining. (See User Guide for details.)
  143. * @return dest
  144. * @stable ICU 4.6
  145. */
  146. virtual UnicodeString &
  147. nameToASCII(const UnicodeString &name, UnicodeString &dest,
  148. IDNAInfo &info, UErrorCode &errorCode) const = 0;
  149. /**
  150. * Converts a whole domain name into its Unicode form for human-readable display.
  151. * If any processing step fails, then info.hasErrors() will be TRUE.
  152. * The domain name might be modified according to the types of errors.
  153. *
  154. * The UErrorCode indicates an error only in exceptional cases,
  155. * such as a U_MEMORY_ALLOCATION_ERROR.
  156. *
  157. * @param name Input domain name
  158. * @param dest Destination string object
  159. * @param info Output container of IDNA processing details.
  160. * @param errorCode Standard ICU error code. Its input value must
  161. * pass the U_SUCCESS() test, or else the function returns
  162. * immediately. Check for U_FAILURE() on output or use with
  163. * function chaining. (See User Guide for details.)
  164. * @return dest
  165. * @stable ICU 4.6
  166. */
  167. virtual UnicodeString &
  168. nameToUnicode(const UnicodeString &name, UnicodeString &dest,
  169. IDNAInfo &info, UErrorCode &errorCode) const = 0;
  170. // UTF-8 versions of the processing methods ---------------------------- ***
  171. /**
  172. * Converts a single domain name label into its ASCII form for DNS lookup.
  173. * UTF-8 version of labelToASCII(), same behavior.
  174. *
  175. * @param label Input domain name label
  176. * @param dest Destination byte sink; Flush()ed if successful
  177. * @param info Output container of IDNA processing details.
  178. * @param errorCode Standard ICU error code. Its input value must
  179. * pass the U_SUCCESS() test, or else the function returns
  180. * immediately. Check for U_FAILURE() on output or use with
  181. * function chaining. (See User Guide for details.)
  182. * @return dest
  183. * @stable ICU 4.6
  184. */
  185. virtual void
  186. labelToASCII_UTF8(const StringPiece &label, ByteSink &dest,
  187. IDNAInfo &info, UErrorCode &errorCode) const;
  188. /**
  189. * Converts a single domain name label into its Unicode form for human-readable display.
  190. * UTF-8 version of labelToUnicode(), same behavior.
  191. *
  192. * @param label Input domain name label
  193. * @param dest Destination byte sink; Flush()ed if successful
  194. * @param info Output container of IDNA processing details.
  195. * @param errorCode Standard ICU error code. Its input value must
  196. * pass the U_SUCCESS() test, or else the function returns
  197. * immediately. Check for U_FAILURE() on output or use with
  198. * function chaining. (See User Guide for details.)
  199. * @return dest
  200. * @stable ICU 4.6
  201. */
  202. virtual void
  203. labelToUnicodeUTF8(const StringPiece &label, ByteSink &dest,
  204. IDNAInfo &info, UErrorCode &errorCode) const;
  205. /**
  206. * Converts a whole domain name into its ASCII form for DNS lookup.
  207. * UTF-8 version of nameToASCII(), same behavior.
  208. *
  209. * @param name Input domain name
  210. * @param dest Destination byte sink; Flush()ed if successful
  211. * @param info Output container of IDNA processing details.
  212. * @param errorCode Standard ICU error code. Its input value must
  213. * pass the U_SUCCESS() test, or else the function returns
  214. * immediately. Check for U_FAILURE() on output or use with
  215. * function chaining. (See User Guide for details.)
  216. * @return dest
  217. * @stable ICU 4.6
  218. */
  219. virtual void
  220. nameToASCII_UTF8(const StringPiece &name, ByteSink &dest,
  221. IDNAInfo &info, UErrorCode &errorCode) const;
  222. /**
  223. * Converts a whole domain name into its Unicode form for human-readable display.
  224. * UTF-8 version of nameToUnicode(), same behavior.
  225. *
  226. * @param name Input domain name
  227. * @param dest Destination byte sink; Flush()ed if successful
  228. * @param info Output container of IDNA processing details.
  229. * @param errorCode Standard ICU error code. Its input value must
  230. * pass the U_SUCCESS() test, or else the function returns
  231. * immediately. Check for U_FAILURE() on output or use with
  232. * function chaining. (See User Guide for details.)
  233. * @return dest
  234. * @stable ICU 4.6
  235. */
  236. virtual void
  237. nameToUnicodeUTF8(const StringPiece &name, ByteSink &dest,
  238. IDNAInfo &info, UErrorCode &errorCode) const;
  239. };
  240. class UTS46;
  241. /**
  242. * Output container for IDNA processing errors.
  243. * The IDNAInfo class is not suitable for subclassing.
  244. * @stable ICU 4.6
  245. */
  246. class U_COMMON_API IDNAInfo : public UMemory {
  247. public:
  248. /**
  249. * Constructor for stack allocation.
  250. * @stable ICU 4.6
  251. */
  252. IDNAInfo() : errors(0), labelErrors(0), isTransDiff(FALSE), isBiDi(FALSE), isOkBiDi(TRUE) {}
  253. /**
  254. * Were there IDNA processing errors?
  255. * @return TRUE if there were processing errors
  256. * @stable ICU 4.6
  257. */
  258. UBool hasErrors() const { return errors!=0; }
  259. /**
  260. * Returns a bit set indicating IDNA processing errors.
  261. * See UIDNA_ERROR_... constants in uidna.h.
  262. * @return bit set of processing errors
  263. * @stable ICU 4.6
  264. */
  265. uint32_t getErrors() const { return errors; }
  266. /**
  267. * Returns TRUE if transitional and nontransitional processing produce different results.
  268. * This is the case when the input label or domain name contains
  269. * one or more deviation characters outside a Punycode label (see UTS #46).
  270. * <ul>
  271. * <li>With nontransitional processing, such characters are
  272. * copied to the destination string.
  273. * <li>With transitional processing, such characters are
  274. * mapped (sharp s/sigma) or removed (joiner/nonjoiner).
  275. * </ul>
  276. * @return TRUE if transitional and nontransitional processing produce different results
  277. * @stable ICU 4.6
  278. */
  279. UBool isTransitionalDifferent() const { return isTransDiff; }
  280. private:
  281. friend class UTS46;
  282. IDNAInfo(const IDNAInfo &other); // no copying
  283. IDNAInfo &operator=(const IDNAInfo &other); // no copying
  284. void reset() {
  285. errors=labelErrors=0;
  286. isTransDiff=FALSE;
  287. isBiDi=FALSE;
  288. isOkBiDi=TRUE;
  289. }
  290. uint32_t errors, labelErrors;
  291. UBool isTransDiff;
  292. UBool isBiDi;
  293. UBool isOkBiDi;
  294. };
  295. U_NAMESPACE_END
  296. #endif // UCONFIG_NO_IDNA
  297. #endif // __IDNA_H__