uidna.h 33 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770
  1. /*
  2. *******************************************************************************
  3. *
  4. * Copyright (C) 2003-2014, International Business Machines
  5. * Corporation and others. All Rights Reserved.
  6. *
  7. *******************************************************************************
  8. * file name: uidna.h
  9. * encoding: US-ASCII
  10. * tab size: 8 (not used)
  11. * indentation:4
  12. *
  13. * created on: 2003feb1
  14. * created by: Ram Viswanadha
  15. */
  16. #ifndef __UIDNA_H__
  17. #define __UIDNA_H__
  18. #include "unicode/utypes.h"
  19. #if !UCONFIG_NO_IDNA
  20. #include "unicode/localpointer.h"
  21. #include "unicode/parseerr.h"
  22. /**
  23. * \file
  24. * \brief C API: Internationalizing Domain Names in Applications (IDNA)
  25. *
  26. * IDNA2008 is implemented according to UTS #46, see the IDNA C++ class in idna.h.
  27. *
  28. * The C API functions which do take a UIDNA * service object pointer
  29. * implement UTS #46 and IDNA2008.
  30. *
  31. * IDNA2003 is obsolete.
  32. * The C API functions which do not take a service object pointer
  33. * implement IDNA2003. They are all deprecated.
  34. */
  35. /*
  36. * IDNA option bit set values.
  37. */
  38. enum {
  39. /**
  40. * Default options value: None of the other options are set.
  41. * For use in static worker and factory methods.
  42. * @stable ICU 2.6
  43. */
  44. UIDNA_DEFAULT=0,
  45. #ifndef U_HIDE_DEPRECATED_API
  46. /**
  47. * Option to allow unassigned code points in domain names and labels.
  48. * For use in static worker and factory methods.
  49. * <p>This option is ignored by the UTS46 implementation.
  50. * (UTS #46 disallows unassigned code points.)
  51. * @deprecated ICU 55 Use UTS #46 instead via uidna_openUTS46() or class IDNA.
  52. */
  53. UIDNA_ALLOW_UNASSIGNED=1,
  54. #endif /* U_HIDE_DEPRECATED_API */
  55. /**
  56. * Option to check whether the input conforms to the STD3 ASCII rules,
  57. * for example the restriction of labels to LDH characters
  58. * (ASCII Letters, Digits and Hyphen-Minus).
  59. * For use in static worker and factory methods.
  60. * @stable ICU 2.6
  61. */
  62. UIDNA_USE_STD3_RULES=2,
  63. /**
  64. * IDNA option to check for whether the input conforms to the BiDi rules.
  65. * For use in static worker and factory methods.
  66. * <p>This option is ignored by the IDNA2003 implementation.
  67. * (IDNA2003 always performs a BiDi check.)
  68. * @stable ICU 4.6
  69. */
  70. UIDNA_CHECK_BIDI=4,
  71. /**
  72. * IDNA option to check for whether the input conforms to the CONTEXTJ rules.
  73. * For use in static worker and factory methods.
  74. * <p>This option is ignored by the IDNA2003 implementation.
  75. * (The CONTEXTJ check is new in IDNA2008.)
  76. * @stable ICU 4.6
  77. */
  78. UIDNA_CHECK_CONTEXTJ=8,
  79. /**
  80. * IDNA option for nontransitional processing in ToASCII().
  81. * For use in static worker and factory methods.
  82. * <p>By default, ToASCII() uses transitional processing.
  83. * <p>This option is ignored by the IDNA2003 implementation.
  84. * (This is only relevant for compatibility of newer IDNA implementations with IDNA2003.)
  85. * @stable ICU 4.6
  86. */
  87. UIDNA_NONTRANSITIONAL_TO_ASCII=0x10,
  88. /**
  89. * IDNA option for nontransitional processing in ToUnicode().
  90. * For use in static worker and factory methods.
  91. * <p>By default, ToUnicode() uses transitional processing.
  92. * <p>This option is ignored by the IDNA2003 implementation.
  93. * (This is only relevant for compatibility of newer IDNA implementations with IDNA2003.)
  94. * @stable ICU 4.6
  95. */
  96. UIDNA_NONTRANSITIONAL_TO_UNICODE=0x20,
  97. /**
  98. * IDNA option to check for whether the input conforms to the CONTEXTO rules.
  99. * For use in static worker and factory methods.
  100. * <p>This option is ignored by the IDNA2003 implementation.
  101. * (The CONTEXTO check is new in IDNA2008.)
  102. * <p>This is for use by registries for IDNA2008 conformance.
  103. * UTS #46 does not require the CONTEXTO check.
  104. * @stable ICU 49
  105. */
  106. UIDNA_CHECK_CONTEXTO=0x40
  107. };
  108. /**
  109. * Opaque C service object type for the new IDNA API.
  110. * @stable ICU 4.6
  111. */
  112. struct UIDNA;
  113. typedef struct UIDNA UIDNA; /**< C typedef for struct UIDNA. @stable ICU 4.6 */
  114. /**
  115. * Returns a UIDNA instance which implements UTS #46.
  116. * Returns an unmodifiable instance, owned by the caller.
  117. * Cache it for multiple operations, and uidna_close() it when done.
  118. * The instance is thread-safe, that is, it can be used concurrently.
  119. *
  120. * For details about the UTS #46 implementation see the IDNA C++ class in idna.h.
  121. *
  122. * @param options Bit set to modify the processing and error checking.
  123. * See option bit set values in uidna.h.
  124. * @param pErrorCode Standard ICU error code. Its input value must
  125. * pass the U_SUCCESS() test, or else the function returns
  126. * immediately. Check for U_FAILURE() on output or use with
  127. * function chaining. (See User Guide for details.)
  128. * @return the UTS #46 UIDNA instance, if successful
  129. * @stable ICU 4.6
  130. */
  131. U_STABLE UIDNA * U_EXPORT2
  132. uidna_openUTS46(uint32_t options, UErrorCode *pErrorCode);
  133. /**
  134. * Closes a UIDNA instance.
  135. * @param idna UIDNA instance to be closed
  136. * @stable ICU 4.6
  137. */
  138. U_STABLE void U_EXPORT2
  139. uidna_close(UIDNA *idna);
  140. #if U_SHOW_CPLUSPLUS_API
  141. U_NAMESPACE_BEGIN
  142. /**
  143. * \class LocalUIDNAPointer
  144. * "Smart pointer" class, closes a UIDNA via uidna_close().
  145. * For most methods see the LocalPointerBase base class.
  146. *
  147. * @see LocalPointerBase
  148. * @see LocalPointer
  149. * @stable ICU 4.6
  150. */
  151. U_DEFINE_LOCAL_OPEN_POINTER(LocalUIDNAPointer, UIDNA, uidna_close);
  152. U_NAMESPACE_END
  153. #endif
  154. /**
  155. * Output container for IDNA processing errors.
  156. * Initialize with UIDNA_INFO_INITIALIZER:
  157. * \code
  158. * UIDNAInfo info = UIDNA_INFO_INITIALIZER;
  159. * int32_t length = uidna_nameToASCII(..., &info, &errorCode);
  160. * if(U_SUCCESS(errorCode) && info.errors!=0) { ... }
  161. * \endcode
  162. * @stable ICU 4.6
  163. */
  164. typedef struct UIDNAInfo {
  165. /** sizeof(UIDNAInfo) @stable ICU 4.6 */
  166. int16_t size;
  167. /**
  168. * Set to TRUE if transitional and nontransitional processing produce different results.
  169. * For details see C++ IDNAInfo::isTransitionalDifferent().
  170. * @stable ICU 4.6
  171. */
  172. UBool isTransitionalDifferent;
  173. UBool reservedB3; /**< Reserved field, do not use. @internal */
  174. /**
  175. * Bit set indicating IDNA processing errors. 0 if no errors.
  176. * See UIDNA_ERROR_... constants.
  177. * @stable ICU 4.6
  178. */
  179. uint32_t errors;
  180. int32_t reservedI2; /**< Reserved field, do not use. @internal */
  181. int32_t reservedI3; /**< Reserved field, do not use. @internal */
  182. } UIDNAInfo;
  183. /**
  184. * Static initializer for a UIDNAInfo struct.
  185. * @stable ICU 4.6
  186. */
  187. #define UIDNA_INFO_INITIALIZER { \
  188. (int16_t)sizeof(UIDNAInfo), \
  189. FALSE, FALSE, \
  190. 0, 0, 0 }
  191. /**
  192. * Converts a single domain name label into its ASCII form for DNS lookup.
  193. * If any processing step fails, then pInfo->errors will be non-zero and
  194. * the result might not be an ASCII string.
  195. * The label might be modified according to the types of errors.
  196. * Labels with severe errors will be left in (or turned into) their Unicode form.
  197. *
  198. * The UErrorCode indicates an error only in exceptional cases,
  199. * such as a U_MEMORY_ALLOCATION_ERROR.
  200. *
  201. * @param idna UIDNA instance
  202. * @param label Input domain name label
  203. * @param length Label length, or -1 if NUL-terminated
  204. * @param dest Destination string buffer
  205. * @param capacity Destination buffer capacity
  206. * @param pInfo Output container of IDNA processing details.
  207. * @param pErrorCode Standard ICU error code. Its input value must
  208. * pass the U_SUCCESS() test, or else the function returns
  209. * immediately. Check for U_FAILURE() on output or use with
  210. * function chaining. (See User Guide for details.)
  211. * @return destination string length
  212. * @stable ICU 4.6
  213. */
  214. U_STABLE int32_t U_EXPORT2
  215. uidna_labelToASCII(const UIDNA *idna,
  216. const UChar *label, int32_t length,
  217. UChar *dest, int32_t capacity,
  218. UIDNAInfo *pInfo, UErrorCode *pErrorCode);
  219. /**
  220. * Converts a single domain name label into its Unicode form for human-readable display.
  221. * If any processing step fails, then pInfo->errors will be non-zero.
  222. * The label might be modified according to the types of errors.
  223. *
  224. * The UErrorCode indicates an error only in exceptional cases,
  225. * such as a U_MEMORY_ALLOCATION_ERROR.
  226. *
  227. * @param idna UIDNA instance
  228. * @param label Input domain name label
  229. * @param length Label length, or -1 if NUL-terminated
  230. * @param dest Destination string buffer
  231. * @param capacity Destination buffer capacity
  232. * @param pInfo Output container of IDNA processing details.
  233. * @param pErrorCode Standard ICU error code. Its input value must
  234. * pass the U_SUCCESS() test, or else the function returns
  235. * immediately. Check for U_FAILURE() on output or use with
  236. * function chaining. (See User Guide for details.)
  237. * @return destination string length
  238. * @stable ICU 4.6
  239. */
  240. U_STABLE int32_t U_EXPORT2
  241. uidna_labelToUnicode(const UIDNA *idna,
  242. const UChar *label, int32_t length,
  243. UChar *dest, int32_t capacity,
  244. UIDNAInfo *pInfo, UErrorCode *pErrorCode);
  245. /**
  246. * Converts a whole domain name into its ASCII form for DNS lookup.
  247. * If any processing step fails, then pInfo->errors will be non-zero and
  248. * the result might not be an ASCII string.
  249. * The domain name might be modified according to the types of errors.
  250. * Labels with severe errors will be left in (or turned into) their Unicode form.
  251. *
  252. * The UErrorCode indicates an error only in exceptional cases,
  253. * such as a U_MEMORY_ALLOCATION_ERROR.
  254. *
  255. * @param idna UIDNA instance
  256. * @param name Input domain name
  257. * @param length Domain name length, or -1 if NUL-terminated
  258. * @param dest Destination string buffer
  259. * @param capacity Destination buffer capacity
  260. * @param pInfo Output container of IDNA processing details.
  261. * @param pErrorCode Standard ICU error code. Its input value must
  262. * pass the U_SUCCESS() test, or else the function returns
  263. * immediately. Check for U_FAILURE() on output or use with
  264. * function chaining. (See User Guide for details.)
  265. * @return destination string length
  266. * @stable ICU 4.6
  267. */
  268. U_STABLE int32_t U_EXPORT2
  269. uidna_nameToASCII(const UIDNA *idna,
  270. const UChar *name, int32_t length,
  271. UChar *dest, int32_t capacity,
  272. UIDNAInfo *pInfo, UErrorCode *pErrorCode);
  273. /**
  274. * Converts a whole domain name into its Unicode form for human-readable display.
  275. * If any processing step fails, then pInfo->errors will be non-zero.
  276. * The domain name might be modified according to the types of errors.
  277. *
  278. * The UErrorCode indicates an error only in exceptional cases,
  279. * such as a U_MEMORY_ALLOCATION_ERROR.
  280. *
  281. * @param idna UIDNA instance
  282. * @param name Input domain name
  283. * @param length Domain name length, or -1 if NUL-terminated
  284. * @param dest Destination string buffer
  285. * @param capacity Destination buffer capacity
  286. * @param pInfo Output container of IDNA processing details.
  287. * @param pErrorCode Standard ICU error code. Its input value must
  288. * pass the U_SUCCESS() test, or else the function returns
  289. * immediately. Check for U_FAILURE() on output or use with
  290. * function chaining. (See User Guide for details.)
  291. * @return destination string length
  292. * @stable ICU 4.6
  293. */
  294. U_STABLE int32_t U_EXPORT2
  295. uidna_nameToUnicode(const UIDNA *idna,
  296. const UChar *name, int32_t length,
  297. UChar *dest, int32_t capacity,
  298. UIDNAInfo *pInfo, UErrorCode *pErrorCode);
  299. /* UTF-8 versions of the processing methods --------------------------------- */
  300. /**
  301. * Converts a single domain name label into its ASCII form for DNS lookup.
  302. * UTF-8 version of uidna_labelToASCII(), same behavior.
  303. *
  304. * @param idna UIDNA instance
  305. * @param label Input domain name label
  306. * @param length Label length, or -1 if NUL-terminated
  307. * @param dest Destination string buffer
  308. * @param capacity Destination buffer capacity
  309. * @param pInfo Output container of IDNA processing details.
  310. * @param pErrorCode Standard ICU error code. Its input value must
  311. * pass the U_SUCCESS() test, or else the function returns
  312. * immediately. Check for U_FAILURE() on output or use with
  313. * function chaining. (See User Guide for details.)
  314. * @return destination string length
  315. * @stable ICU 4.6
  316. */
  317. U_STABLE int32_t U_EXPORT2
  318. uidna_labelToASCII_UTF8(const UIDNA *idna,
  319. const char *label, int32_t length,
  320. char *dest, int32_t capacity,
  321. UIDNAInfo *pInfo, UErrorCode *pErrorCode);
  322. /**
  323. * Converts a single domain name label into its Unicode form for human-readable display.
  324. * UTF-8 version of uidna_labelToUnicode(), same behavior.
  325. *
  326. * @param idna UIDNA instance
  327. * @param label Input domain name label
  328. * @param length Label length, or -1 if NUL-terminated
  329. * @param dest Destination string buffer
  330. * @param capacity Destination buffer capacity
  331. * @param pInfo Output container of IDNA processing details.
  332. * @param pErrorCode Standard ICU error code. Its input value must
  333. * pass the U_SUCCESS() test, or else the function returns
  334. * immediately. Check for U_FAILURE() on output or use with
  335. * function chaining. (See User Guide for details.)
  336. * @return destination string length
  337. * @stable ICU 4.6
  338. */
  339. U_STABLE int32_t U_EXPORT2
  340. uidna_labelToUnicodeUTF8(const UIDNA *idna,
  341. const char *label, int32_t length,
  342. char *dest, int32_t capacity,
  343. UIDNAInfo *pInfo, UErrorCode *pErrorCode);
  344. /**
  345. * Converts a whole domain name into its ASCII form for DNS lookup.
  346. * UTF-8 version of uidna_nameToASCII(), same behavior.
  347. *
  348. * @param idna UIDNA instance
  349. * @param name Input domain name
  350. * @param length Domain name length, or -1 if NUL-terminated
  351. * @param dest Destination string buffer
  352. * @param capacity Destination buffer capacity
  353. * @param pInfo Output container of IDNA processing details.
  354. * @param pErrorCode Standard ICU error code. Its input value must
  355. * pass the U_SUCCESS() test, or else the function returns
  356. * immediately. Check for U_FAILURE() on output or use with
  357. * function chaining. (See User Guide for details.)
  358. * @return destination string length
  359. * @stable ICU 4.6
  360. */
  361. U_STABLE int32_t U_EXPORT2
  362. uidna_nameToASCII_UTF8(const UIDNA *idna,
  363. const char *name, int32_t length,
  364. char *dest, int32_t capacity,
  365. UIDNAInfo *pInfo, UErrorCode *pErrorCode);
  366. /**
  367. * Converts a whole domain name into its Unicode form for human-readable display.
  368. * UTF-8 version of uidna_nameToUnicode(), same behavior.
  369. *
  370. * @param idna UIDNA instance
  371. * @param name Input domain name
  372. * @param length Domain name length, or -1 if NUL-terminated
  373. * @param dest Destination string buffer
  374. * @param capacity Destination buffer capacity
  375. * @param pInfo Output container of IDNA processing details.
  376. * @param pErrorCode Standard ICU error code. Its input value must
  377. * pass the U_SUCCESS() test, or else the function returns
  378. * immediately. Check for U_FAILURE() on output or use with
  379. * function chaining. (See User Guide for details.)
  380. * @return destination string length
  381. * @stable ICU 4.6
  382. */
  383. U_STABLE int32_t U_EXPORT2
  384. uidna_nameToUnicodeUTF8(const UIDNA *idna,
  385. const char *name, int32_t length,
  386. char *dest, int32_t capacity,
  387. UIDNAInfo *pInfo, UErrorCode *pErrorCode);
  388. /*
  389. * IDNA error bit set values.
  390. * When a domain name or label fails a processing step or does not meet the
  391. * validity criteria, then one or more of these error bits are set.
  392. */
  393. enum {
  394. /**
  395. * A non-final domain name label (or the whole domain name) is empty.
  396. * @stable ICU 4.6
  397. */
  398. UIDNA_ERROR_EMPTY_LABEL=1,
  399. /**
  400. * A domain name label is longer than 63 bytes.
  401. * (See STD13/RFC1034 3.1. Name space specifications and terminology.)
  402. * This is only checked in ToASCII operations, and only if the output label is all-ASCII.
  403. * @stable ICU 4.6
  404. */
  405. UIDNA_ERROR_LABEL_TOO_LONG=2,
  406. /**
  407. * A domain name is longer than 255 bytes in its storage form.
  408. * (See STD13/RFC1034 3.1. Name space specifications and terminology.)
  409. * This is only checked in ToASCII operations, and only if the output domain name is all-ASCII.
  410. * @stable ICU 4.6
  411. */
  412. UIDNA_ERROR_DOMAIN_NAME_TOO_LONG=4,
  413. /**
  414. * A label starts with a hyphen-minus ('-').
  415. * @stable ICU 4.6
  416. */
  417. UIDNA_ERROR_LEADING_HYPHEN=8,
  418. /**
  419. * A label ends with a hyphen-minus ('-').
  420. * @stable ICU 4.6
  421. */
  422. UIDNA_ERROR_TRAILING_HYPHEN=0x10,
  423. /**
  424. * A label contains hyphen-minus ('-') in the third and fourth positions.
  425. * @stable ICU 4.6
  426. */
  427. UIDNA_ERROR_HYPHEN_3_4=0x20,
  428. /**
  429. * A label starts with a combining mark.
  430. * @stable ICU 4.6
  431. */
  432. UIDNA_ERROR_LEADING_COMBINING_MARK=0x40,
  433. /**
  434. * A label or domain name contains disallowed characters.
  435. * @stable ICU 4.6
  436. */
  437. UIDNA_ERROR_DISALLOWED=0x80,
  438. /**
  439. * A label starts with "xn--" but does not contain valid Punycode.
  440. * That is, an xn-- label failed Punycode decoding.
  441. * @stable ICU 4.6
  442. */
  443. UIDNA_ERROR_PUNYCODE=0x100,
  444. /**
  445. * A label contains a dot=full stop.
  446. * This can occur in an input string for a single-label function.
  447. * @stable ICU 4.6
  448. */
  449. UIDNA_ERROR_LABEL_HAS_DOT=0x200,
  450. /**
  451. * An ACE label does not contain a valid label string.
  452. * The label was successfully ACE (Punycode) decoded but the resulting
  453. * string had severe validation errors. For example,
  454. * it might contain characters that are not allowed in ACE labels,
  455. * or it might not be normalized.
  456. * @stable ICU 4.6
  457. */
  458. UIDNA_ERROR_INVALID_ACE_LABEL=0x400,
  459. /**
  460. * A label does not meet the IDNA BiDi requirements (for right-to-left characters).
  461. * @stable ICU 4.6
  462. */
  463. UIDNA_ERROR_BIDI=0x800,
  464. /**
  465. * A label does not meet the IDNA CONTEXTJ requirements.
  466. * @stable ICU 4.6
  467. */
  468. UIDNA_ERROR_CONTEXTJ=0x1000,
  469. /**
  470. * A label does not meet the IDNA CONTEXTO requirements for punctuation characters.
  471. * Some punctuation characters "Would otherwise have been DISALLOWED"
  472. * but are allowed in certain contexts. (RFC 5892)
  473. * @stable ICU 49
  474. */
  475. UIDNA_ERROR_CONTEXTO_PUNCTUATION=0x2000,
  476. /**
  477. * A label does not meet the IDNA CONTEXTO requirements for digits.
  478. * Arabic-Indic Digits (U+066x) must not be mixed with Extended Arabic-Indic Digits (U+06Fx).
  479. * @stable ICU 49
  480. */
  481. UIDNA_ERROR_CONTEXTO_DIGITS=0x4000
  482. };
  483. #ifndef U_HIDE_DEPRECATED_API
  484. /* IDNA2003 API ------------------------------------------------------------- */
  485. /**
  486. * IDNA2003: This function implements the ToASCII operation as defined in the IDNA RFC.
  487. * This operation is done on <b>single labels</b> before sending it to something that expects
  488. * ASCII names. A label is an individual part of a domain name. Labels are usually
  489. * separated by dots; e.g. "www.example.com" is composed of 3 labels "www","example", and "com".
  490. *
  491. * IDNA2003 API Overview:
  492. *
  493. * The uidna_ API implements the IDNA protocol as defined in the IDNA RFC
  494. * (http://www.ietf.org/rfc/rfc3490.txt).
  495. * The RFC defines 2 operations: ToASCII and ToUnicode. Domain name labels
  496. * containing non-ASCII code points are processed by the
  497. * ToASCII operation before passing it to resolver libraries. Domain names
  498. * that are obtained from resolver libraries are processed by the
  499. * ToUnicode operation before displaying the domain name to the user.
  500. * IDNA requires that implementations process input strings with Nameprep
  501. * (http://www.ietf.org/rfc/rfc3491.txt),
  502. * which is a profile of Stringprep (http://www.ietf.org/rfc/rfc3454.txt),
  503. * and then with Punycode (http://www.ietf.org/rfc/rfc3492.txt).
  504. * Implementations of IDNA MUST fully implement Nameprep and Punycode;
  505. * neither Nameprep nor Punycode are optional.
  506. * The input and output of ToASCII and ToUnicode operations are Unicode
  507. * and are designed to be chainable, i.e., applying ToASCII or ToUnicode operations
  508. * multiple times to an input string will yield the same result as applying the operation
  509. * once.
  510. * ToUnicode(ToUnicode(ToUnicode...(ToUnicode(string)))) == ToUnicode(string)
  511. * ToASCII(ToASCII(ToASCII...(ToASCII(string))) == ToASCII(string).
  512. *
  513. * @param src Input UChar array containing label in Unicode.
  514. * @param srcLength Number of UChars in src, or -1 if NUL-terminated.
  515. * @param dest Output UChar array with ASCII (ACE encoded) label.
  516. * @param destCapacity Size of dest.
  517. * @param options A bit set of options:
  518. *
  519. * - UIDNA_DEFAULT Use default options, i.e., do not process unassigned code points
  520. * and do not use STD3 ASCII rules
  521. * If unassigned code points are found the operation fails with
  522. * U_UNASSIGNED_ERROR error code.
  523. *
  524. * - UIDNA_ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
  525. * If this option is set, the unassigned code points are in the input
  526. * are treated as normal Unicode code points.
  527. *
  528. * - UIDNA_USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
  529. * If this option is set and the input does not satisfy STD3 rules,
  530. * the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
  531. *
  532. * @param parseError Pointer to UParseError struct to receive information on position
  533. * of error if an error is encountered. Can be NULL.
  534. * @param status ICU in/out error code parameter.
  535. * U_INVALID_CHAR_FOUND if src contains
  536. * unmatched single surrogates.
  537. * U_INDEX_OUTOFBOUNDS_ERROR if src contains
  538. * too many code points.
  539. * U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough
  540. * @return The length of the result string, if successful - or in case of a buffer overflow,
  541. * in which case it will be greater than destCapacity.
  542. * @deprecated ICU 55 Use UTS #46 instead via uidna_openUTS46() or class IDNA.
  543. */
  544. U_DEPRECATED int32_t U_EXPORT2
  545. uidna_toASCII(const UChar* src, int32_t srcLength,
  546. UChar* dest, int32_t destCapacity,
  547. int32_t options,
  548. UParseError* parseError,
  549. UErrorCode* status);
  550. /**
  551. * IDNA2003: This function implements the ToUnicode operation as defined in the IDNA RFC.
  552. * This operation is done on <b>single labels</b> before sending it to something that expects
  553. * Unicode names. A label is an individual part of a domain name. Labels are usually
  554. * separated by dots; for e.g. "www.example.com" is composed of 3 labels "www","example", and "com".
  555. *
  556. * @param src Input UChar array containing ASCII (ACE encoded) label.
  557. * @param srcLength Number of UChars in src, or -1 if NUL-terminated.
  558. * @param dest Output Converted UChar array containing Unicode equivalent of label.
  559. * @param destCapacity Size of dest.
  560. * @param options A bit set of options:
  561. *
  562. * - UIDNA_DEFAULT Use default options, i.e., do not process unassigned code points
  563. * and do not use STD3 ASCII rules
  564. * If unassigned code points are found the operation fails with
  565. * U_UNASSIGNED_ERROR error code.
  566. *
  567. * - UIDNA_ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
  568. * If this option is set, the unassigned code points are in the input
  569. * are treated as normal Unicode code points. <b> Note: </b> This option is
  570. * required on toUnicode operation because the RFC mandates
  571. * verification of decoded ACE input by applying toASCII and comparing
  572. * its output with source
  573. *
  574. * - UIDNA_USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
  575. * If this option is set and the input does not satisfy STD3 rules,
  576. * the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
  577. *
  578. * @param parseError Pointer to UParseError struct to receive information on position
  579. * of error if an error is encountered. Can be NULL.
  580. * @param status ICU in/out error code parameter.
  581. * U_INVALID_CHAR_FOUND if src contains
  582. * unmatched single surrogates.
  583. * U_INDEX_OUTOFBOUNDS_ERROR if src contains
  584. * too many code points.
  585. * U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough
  586. * @return The length of the result string, if successful - or in case of a buffer overflow,
  587. * in which case it will be greater than destCapacity.
  588. * @deprecated ICU 55 Use UTS #46 instead via uidna_openUTS46() or class IDNA.
  589. */
  590. U_DEPRECATED int32_t U_EXPORT2
  591. uidna_toUnicode(const UChar* src, int32_t srcLength,
  592. UChar* dest, int32_t destCapacity,
  593. int32_t options,
  594. UParseError* parseError,
  595. UErrorCode* status);
  596. /**
  597. * IDNA2003: Convenience function that implements the IDNToASCII operation as defined in the IDNA RFC.
  598. * This operation is done on complete domain names, e.g: "www.example.com".
  599. * It is important to note that this operation can fail. If it fails, then the input
  600. * domain name cannot be used as an Internationalized Domain Name and the application
  601. * should have methods defined to deal with the failure.
  602. *
  603. * <b>Note:</b> IDNA RFC specifies that a conformant application should divide a domain name
  604. * into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each,
  605. * and then convert. This function does not offer that level of granularity. The options once
  606. * set will apply to all labels in the domain name
  607. *
  608. * @param src Input UChar array containing IDN in Unicode.
  609. * @param srcLength Number of UChars in src, or -1 if NUL-terminated.
  610. * @param dest Output UChar array with ASCII (ACE encoded) IDN.
  611. * @param destCapacity Size of dest.
  612. * @param options A bit set of options:
  613. *
  614. * - UIDNA_DEFAULT Use default options, i.e., do not process unassigned code points
  615. * and do not use STD3 ASCII rules
  616. * If unassigned code points are found the operation fails with
  617. * U_UNASSIGNED_CODE_POINT_FOUND error code.
  618. *
  619. * - UIDNA_ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
  620. * If this option is set, the unassigned code points are in the input
  621. * are treated as normal Unicode code points.
  622. *
  623. * - UIDNA_USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
  624. * If this option is set and the input does not satisfy STD3 rules,
  625. * the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
  626. *
  627. * @param parseError Pointer to UParseError struct to receive information on position
  628. * of error if an error is encountered. Can be NULL.
  629. * @param status ICU in/out error code parameter.
  630. * U_INVALID_CHAR_FOUND if src contains
  631. * unmatched single surrogates.
  632. * U_INDEX_OUTOFBOUNDS_ERROR if src contains
  633. * too many code points.
  634. * U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough
  635. * @return The length of the result string, if successful - or in case of a buffer overflow,
  636. * in which case it will be greater than destCapacity.
  637. * @deprecated ICU 55 Use UTS #46 instead via uidna_openUTS46() or class IDNA.
  638. */
  639. U_DEPRECATED int32_t U_EXPORT2
  640. uidna_IDNToASCII( const UChar* src, int32_t srcLength,
  641. UChar* dest, int32_t destCapacity,
  642. int32_t options,
  643. UParseError* parseError,
  644. UErrorCode* status);
  645. /**
  646. * IDNA2003: Convenience function that implements the IDNToUnicode operation as defined in the IDNA RFC.
  647. * This operation is done on complete domain names, e.g: "www.example.com".
  648. *
  649. * <b>Note:</b> IDNA RFC specifies that a conformant application should divide a domain name
  650. * into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each,
  651. * and then convert. This function does not offer that level of granularity. The options once
  652. * set will apply to all labels in the domain name
  653. *
  654. * @param src Input UChar array containing IDN in ASCII (ACE encoded) form.
  655. * @param srcLength Number of UChars in src, or -1 if NUL-terminated.
  656. * @param dest Output UChar array containing Unicode equivalent of source IDN.
  657. * @param destCapacity Size of dest.
  658. * @param options A bit set of options:
  659. *
  660. * - UIDNA_DEFAULT Use default options, i.e., do not process unassigned code points
  661. * and do not use STD3 ASCII rules
  662. * If unassigned code points are found the operation fails with
  663. * U_UNASSIGNED_CODE_POINT_FOUND error code.
  664. *
  665. * - UIDNA_ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
  666. * If this option is set, the unassigned code points are in the input
  667. * are treated as normal Unicode code points.
  668. *
  669. * - UIDNA_USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
  670. * If this option is set and the input does not satisfy STD3 rules,
  671. * the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
  672. *
  673. * @param parseError Pointer to UParseError struct to receive information on position
  674. * of error if an error is encountered. Can be NULL.
  675. * @param status ICU in/out error code parameter.
  676. * U_INVALID_CHAR_FOUND if src contains
  677. * unmatched single surrogates.
  678. * U_INDEX_OUTOFBOUNDS_ERROR if src contains
  679. * too many code points.
  680. * U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough
  681. * @return The length of the result string, if successful - or in case of a buffer overflow,
  682. * in which case it will be greater than destCapacity.
  683. * @deprecated ICU 55 Use UTS #46 instead via uidna_openUTS46() or class IDNA.
  684. */
  685. U_DEPRECATED int32_t U_EXPORT2
  686. uidna_IDNToUnicode( const UChar* src, int32_t srcLength,
  687. UChar* dest, int32_t destCapacity,
  688. int32_t options,
  689. UParseError* parseError,
  690. UErrorCode* status);
  691. /**
  692. * IDNA2003: Compare two IDN strings for equivalence.
  693. * This function splits the domain names into labels and compares them.
  694. * According to IDN RFC, whenever two labels are compared, they are
  695. * considered equal if and only if their ASCII forms (obtained by
  696. * applying toASCII) match using an case-insensitive ASCII comparison.
  697. * Two domain names are considered a match if and only if all labels
  698. * match regardless of whether label separators match.
  699. *
  700. * @param s1 First source string.
  701. * @param length1 Length of first source string, or -1 if NUL-terminated.
  702. *
  703. * @param s2 Second source string.
  704. * @param length2 Length of second source string, or -1 if NUL-terminated.
  705. * @param options A bit set of options:
  706. *
  707. * - UIDNA_DEFAULT Use default options, i.e., do not process unassigned code points
  708. * and do not use STD3 ASCII rules
  709. * If unassigned code points are found the operation fails with
  710. * U_UNASSIGNED_CODE_POINT_FOUND error code.
  711. *
  712. * - UIDNA_ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
  713. * If this option is set, the unassigned code points are in the input
  714. * are treated as normal Unicode code points.
  715. *
  716. * - UIDNA_USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
  717. * If this option is set and the input does not satisfy STD3 rules,
  718. * the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
  719. *
  720. * @param status ICU error code in/out parameter.
  721. * Must fulfill U_SUCCESS before the function call.
  722. * @return <0 or 0 or >0 as usual for string comparisons
  723. * @deprecated ICU 55 Use UTS #46 instead via uidna_openUTS46() or class IDNA.
  724. */
  725. U_DEPRECATED int32_t U_EXPORT2
  726. uidna_compare( const UChar *s1, int32_t length1,
  727. const UChar *s2, int32_t length2,
  728. int32_t options,
  729. UErrorCode* status);
  730. #endif /* U_HIDE_DEPRECATED_API */
  731. #endif /* #if !UCONFIG_NO_IDNA */
  732. #endif