idn.c 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348
  1. /*
  2. +----------------------------------------------------------------------+
  3. | PHP Version 5 |
  4. +----------------------------------------------------------------------+
  5. | Copyright (c) 2009 The PHP Group |
  6. +----------------------------------------------------------------------+
  7. | This source file is subject to version 3.01 of the PHP license, |
  8. | that is bundled with this package in the file LICENSE, and is |
  9. | available through the world-wide-web at the following url: |
  10. | http://www.php.net/license/3_01.txt |
  11. | If you did not receive a copy of the PHP license and are unable to |
  12. | obtain it through the world-wide-web, please send a note to |
  13. | license@php.net so we can mail you a copy immediately. |
  14. +----------------------------------------------------------------------+
  15. | Author: Pierre A. Joye <pierre@php.net> |
  16. | Gustavo Lopes <cataphract@php.net> |
  17. +----------------------------------------------------------------------+
  18. */
  19. /* $Id$ */
  20. /* {{{ includes */
  21. #ifdef HAVE_CONFIG_H
  22. #include "config.h"
  23. #endif
  24. #include <php.h>
  25. #include <unicode/uidna.h>
  26. #include <unicode/ustring.h>
  27. #include "ext/standard/php_string.h"
  28. #include "intl_error.h"
  29. #include "intl_convert.h"
  30. /* }}} */
  31. #ifdef UIDNA_INFO_INITIALIZER
  32. #define HAVE_46_API 1 /* has UTS#46 API (introduced in ICU 4.6) */
  33. #endif
  34. enum {
  35. INTL_IDN_VARIANT_2003 = 0,
  36. INTL_IDN_VARIANT_UTS46
  37. };
  38. /* {{{ grapheme_register_constants
  39. * Register API constants
  40. */
  41. void idn_register_constants( INIT_FUNC_ARGS )
  42. {
  43. /* OPTIONS */
  44. /* Option to prohibit processing of unassigned codepoints in the input and
  45. do not check if the input conforms to STD-3 ASCII rules. */
  46. REGISTER_LONG_CONSTANT("IDNA_DEFAULT", UIDNA_DEFAULT, CONST_CS | CONST_PERSISTENT);
  47. /* Option to allow processing of unassigned codepoints in the input */
  48. REGISTER_LONG_CONSTANT("IDNA_ALLOW_UNASSIGNED", UIDNA_ALLOW_UNASSIGNED, CONST_CS | CONST_PERSISTENT);
  49. /* Option to check if input conforms to STD-3 ASCII rules */
  50. REGISTER_LONG_CONSTANT("IDNA_USE_STD3_RULES", UIDNA_USE_STD3_RULES, CONST_CS | CONST_PERSISTENT);
  51. #ifdef HAVE_46_API
  52. /* Option to check for whether the input conforms to the BiDi rules.
  53. * Ignored by the IDNA2003 implementation. (IDNA2003 always performs a BiDi check.) */
  54. REGISTER_LONG_CONSTANT("IDNA_CHECK_BIDI", UIDNA_CHECK_BIDI, CONST_CS | CONST_PERSISTENT);
  55. /* Option to check for whether the input conforms to the CONTEXTJ rules.
  56. * Ignored by the IDNA2003 implementation. (The CONTEXTJ check is new in IDNA2008.) */
  57. REGISTER_LONG_CONSTANT("IDNA_CHECK_CONTEXTJ", UIDNA_CHECK_CONTEXTJ, CONST_CS | CONST_PERSISTENT);
  58. /* Option for nontransitional processing in ToASCII().
  59. * By default, ToASCII() uses transitional processing.
  60. * Ignored by the IDNA2003 implementation. */
  61. REGISTER_LONG_CONSTANT("IDNA_NONTRANSITIONAL_TO_ASCII", UIDNA_NONTRANSITIONAL_TO_ASCII, CONST_CS | CONST_PERSISTENT);
  62. /* Option for nontransitional processing in ToUnicode().
  63. * By default, ToUnicode() uses transitional processing.
  64. * Ignored by the IDNA2003 implementation. */
  65. REGISTER_LONG_CONSTANT("IDNA_NONTRANSITIONAL_TO_UNICODE", UIDNA_NONTRANSITIONAL_TO_UNICODE, CONST_CS | CONST_PERSISTENT);
  66. #endif
  67. /* VARIANTS */
  68. REGISTER_LONG_CONSTANT("INTL_IDNA_VARIANT_2003", INTL_IDN_VARIANT_2003, CONST_CS | CONST_PERSISTENT);
  69. #ifdef HAVE_46_API
  70. REGISTER_LONG_CONSTANT("INTL_IDNA_VARIANT_UTS46", INTL_IDN_VARIANT_UTS46, CONST_CS | CONST_PERSISTENT);
  71. #endif
  72. #ifdef HAVE_46_API
  73. /* PINFO ERROR CODES */
  74. REGISTER_LONG_CONSTANT("IDNA_ERROR_EMPTY_LABEL", UIDNA_ERROR_EMPTY_LABEL, CONST_CS | CONST_PERSISTENT);
  75. REGISTER_LONG_CONSTANT("IDNA_ERROR_LABEL_TOO_LONG", UIDNA_ERROR_LABEL_TOO_LONG, CONST_CS | CONST_PERSISTENT);
  76. REGISTER_LONG_CONSTANT("IDNA_ERROR_DOMAIN_NAME_TOO_LONG", UIDNA_ERROR_DOMAIN_NAME_TOO_LONG, CONST_CS | CONST_PERSISTENT);
  77. REGISTER_LONG_CONSTANT("IDNA_ERROR_LEADING_HYPHEN", UIDNA_ERROR_LEADING_HYPHEN, CONST_CS | CONST_PERSISTENT);
  78. REGISTER_LONG_CONSTANT("IDNA_ERROR_TRAILING_HYPHEN", UIDNA_ERROR_TRAILING_HYPHEN, CONST_CS | CONST_PERSISTENT);
  79. REGISTER_LONG_CONSTANT("IDNA_ERROR_HYPHEN_3_4", UIDNA_ERROR_HYPHEN_3_4, CONST_CS | CONST_PERSISTENT);
  80. REGISTER_LONG_CONSTANT("IDNA_ERROR_LEADING_COMBINING_MARK", UIDNA_ERROR_LEADING_COMBINING_MARK, CONST_CS | CONST_PERSISTENT);
  81. REGISTER_LONG_CONSTANT("IDNA_ERROR_DISALLOWED", UIDNA_ERROR_DISALLOWED, CONST_CS | CONST_PERSISTENT);
  82. REGISTER_LONG_CONSTANT("IDNA_ERROR_PUNYCODE", UIDNA_ERROR_PUNYCODE, CONST_CS | CONST_PERSISTENT);
  83. REGISTER_LONG_CONSTANT("IDNA_ERROR_LABEL_HAS_DOT", UIDNA_ERROR_LABEL_HAS_DOT, CONST_CS | CONST_PERSISTENT);
  84. REGISTER_LONG_CONSTANT("IDNA_ERROR_INVALID_ACE_LABEL", UIDNA_ERROR_INVALID_ACE_LABEL, CONST_CS | CONST_PERSISTENT);
  85. REGISTER_LONG_CONSTANT("IDNA_ERROR_BIDI", UIDNA_ERROR_BIDI, CONST_CS | CONST_PERSISTENT);
  86. REGISTER_LONG_CONSTANT("IDNA_ERROR_CONTEXTJ", UIDNA_ERROR_CONTEXTJ, CONST_CS | CONST_PERSISTENT);
  87. #endif
  88. }
  89. /* }}} */
  90. enum {
  91. INTL_IDN_TO_ASCII = 0,
  92. INTL_IDN_TO_UTF8
  93. };
  94. /* like INTL_CHECK_STATUS, but as a function and varying the name of the func */
  95. static int php_intl_idn_check_status(UErrorCode err, const char *msg, int mode TSRMLS_DC)
  96. {
  97. intl_error_set_code(NULL, err TSRMLS_CC);
  98. if (U_FAILURE(err)) {
  99. char *buff;
  100. spprintf(&buff, 0, "%s: %s",
  101. mode == INTL_IDN_TO_ASCII ? "idn_to_ascii" : "idn_to_utf8",
  102. msg);
  103. intl_error_set_custom_msg(NULL, buff, 1 TSRMLS_CC);
  104. efree(buff);
  105. return FAILURE;
  106. }
  107. return SUCCESS;
  108. }
  109. static inline void php_intl_bad_args(const char *msg, int mode TSRMLS_DC)
  110. {
  111. php_intl_idn_check_status(U_ILLEGAL_ARGUMENT_ERROR, msg, mode TSRMLS_CC);
  112. }
  113. #ifdef HAVE_46_API
  114. static void php_intl_idn_to_46(INTERNAL_FUNCTION_PARAMETERS,
  115. const char *domain, int domain_len, uint32_t option, int mode, zval *idna_info)
  116. {
  117. UErrorCode status = U_ZERO_ERROR;
  118. UIDNA *uts46;
  119. int32_t len;
  120. int32_t buffer_capac = 255; /* no domain name may exceed this */
  121. char *buffer = emalloc(buffer_capac);
  122. UIDNAInfo info = UIDNA_INFO_INITIALIZER;
  123. int buffer_used = 0;
  124. uts46 = uidna_openUTS46(option, &status);
  125. if (php_intl_idn_check_status(status, "failed to open UIDNA instance",
  126. mode TSRMLS_CC) == FAILURE) {
  127. efree(buffer);
  128. RETURN_FALSE;
  129. }
  130. if (mode == INTL_IDN_TO_ASCII) {
  131. len = uidna_nameToASCII_UTF8(uts46, domain, (int32_t)domain_len,
  132. buffer, buffer_capac, &info, &status);
  133. } else {
  134. len = uidna_nameToUnicodeUTF8(uts46, domain, (int32_t)domain_len,
  135. buffer, buffer_capac, &info, &status);
  136. }
  137. if (len >= 255 || php_intl_idn_check_status(status, "failed to convert name",
  138. mode TSRMLS_CC) == FAILURE) {
  139. uidna_close(uts46);
  140. efree(buffer);
  141. RETURN_FALSE;
  142. }
  143. buffer[len] = '\0';
  144. if (info.errors == 0) {
  145. RETVAL_STRINGL(buffer, len, 0);
  146. buffer_used = 1;
  147. } else {
  148. RETVAL_FALSE;
  149. }
  150. if (idna_info) {
  151. if (buffer_used) { /* used in return_value then */
  152. zval_addref_p(return_value);
  153. add_assoc_zval_ex(idna_info, "result", sizeof("result"), return_value);
  154. } else {
  155. zval *zv;
  156. ALLOC_INIT_ZVAL(zv);
  157. ZVAL_STRINGL(zv, buffer, len, 0);
  158. buffer_used = 1;
  159. add_assoc_zval_ex(idna_info, "result", sizeof("result"), zv);
  160. }
  161. add_assoc_bool_ex(idna_info, "isTransitionalDifferent",
  162. sizeof("isTransitionalDifferent"), info.isTransitionalDifferent);
  163. add_assoc_long_ex(idna_info, "errors", sizeof("errors"), (long)info.errors);
  164. }
  165. if (!buffer_used) {
  166. efree(buffer);
  167. }
  168. uidna_close(uts46);
  169. }
  170. #endif
  171. static void php_intl_idn_to(INTERNAL_FUNCTION_PARAMETERS,
  172. const char *domain, int domain_len, uint32_t option, int mode)
  173. {
  174. UChar* ustring = NULL;
  175. int ustring_len = 0;
  176. UErrorCode status;
  177. char *converted_utf8;
  178. int32_t converted_utf8_len;
  179. UChar converted[MAXPATHLEN];
  180. int32_t converted_ret_len;
  181. /* convert the string to UTF-16. */
  182. status = U_ZERO_ERROR;
  183. intl_convert_utf8_to_utf16(&ustring, &ustring_len, domain, domain_len, &status);
  184. if (U_FAILURE(status)) {
  185. intl_error_set_code(NULL, status TSRMLS_CC);
  186. /* Set error messages. */
  187. intl_error_set_custom_msg( NULL, "Error converting input string to UTF-16", 0 TSRMLS_CC );
  188. if (ustring) {
  189. efree(ustring);
  190. }
  191. RETURN_FALSE;
  192. } else {
  193. UParseError parse_error;
  194. status = U_ZERO_ERROR;
  195. if (mode == INTL_IDN_TO_ASCII) {
  196. converted_ret_len = uidna_IDNToASCII(ustring, ustring_len, converted, MAXPATHLEN, (int32_t)option, &parse_error, &status);
  197. } else {
  198. converted_ret_len = uidna_IDNToUnicode(ustring, ustring_len, converted, MAXPATHLEN, (int32_t)option, &parse_error, &status);
  199. }
  200. efree(ustring);
  201. if (U_FAILURE(status)) {
  202. intl_error_set( NULL, status, "idn_to_ascii: cannot convert to ASCII", 0 TSRMLS_CC );
  203. RETURN_FALSE;
  204. }
  205. status = U_ZERO_ERROR;
  206. intl_convert_utf16_to_utf8(&converted_utf8, &converted_utf8_len, converted, converted_ret_len, &status);
  207. if (U_FAILURE(status)) {
  208. /* Set global error code. */
  209. intl_error_set_code(NULL, status TSRMLS_CC);
  210. /* Set error messages. */
  211. intl_error_set_custom_msg( NULL, "Error converting output string to UTF-8", 0 TSRMLS_CC );
  212. efree(converted_utf8);
  213. RETURN_FALSE;
  214. }
  215. }
  216. /* return the allocated string, not a duplicate */
  217. RETURN_STRINGL(((char *)converted_utf8), converted_utf8_len, 0);
  218. }
  219. static void php_intl_idn_handoff(INTERNAL_FUNCTION_PARAMETERS, int mode)
  220. {
  221. char *domain;
  222. int domain_len;
  223. long option = 0,
  224. variant = INTL_IDN_VARIANT_2003;
  225. zval *idna_info = NULL;
  226. intl_error_reset(NULL TSRMLS_CC);
  227. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|llz",
  228. &domain, &domain_len, &option, &variant, &idna_info) == FAILURE) {
  229. php_intl_bad_args("bad arguments", mode TSRMLS_CC);
  230. RETURN_NULL(); /* don't set FALSE because that's not the way it was before... */
  231. }
  232. #ifdef HAVE_46_API
  233. if (variant != INTL_IDN_VARIANT_2003 && variant != INTL_IDN_VARIANT_UTS46) {
  234. php_intl_bad_args("invalid variant, must be one of {"
  235. "INTL_IDNA_VARIANT_2003, INTL_IDNA_VARIANT_UTS46}", mode TSRMLS_CC);
  236. RETURN_FALSE;
  237. }
  238. #else
  239. if (variant != INTL_IDN_VARIANT_2003) {
  240. php_intl_bad_args("invalid variant, PHP was compiled against "
  241. "an old version of ICU and only supports INTL_IDN_VARIANT_2003",
  242. mode TSRMLS_CC);
  243. RETURN_FALSE;
  244. }
  245. #endif
  246. if (domain_len < 1) {
  247. php_intl_bad_args("empty domain name", mode TSRMLS_CC);
  248. RETURN_FALSE;
  249. }
  250. if (domain_len > INT32_MAX - 1) {
  251. php_intl_bad_args("domain name too large", mode TSRMLS_CC);
  252. RETURN_FALSE;
  253. }
  254. /* don't check options; it wasn't checked before */
  255. if (idna_info != NULL) {
  256. if (variant == INTL_IDN_VARIANT_2003) {
  257. php_error_docref0(NULL TSRMLS_CC, E_NOTICE,
  258. "4 arguments were provided, but INTL_IDNA_VARIANT_2003 only "
  259. "takes 3 - extra argument ignored");
  260. } else {
  261. zval_dtor(idna_info);
  262. array_init(idna_info);
  263. }
  264. }
  265. if (variant == INTL_IDN_VARIANT_2003) {
  266. php_intl_idn_to(INTERNAL_FUNCTION_PARAM_PASSTHRU,
  267. domain, domain_len, (uint32_t)option, mode);
  268. }
  269. #ifdef HAVE_46_API
  270. else {
  271. php_intl_idn_to_46(INTERNAL_FUNCTION_PARAM_PASSTHRU, domain, domain_len,
  272. (uint32_t)option, mode, idna_info);
  273. }
  274. #endif
  275. }
  276. /* {{{ proto int idn_to_ascii(string domain[, int options[, int variant[, array &idna_info]]])
  277. Converts an Unicode domain to ASCII representation, as defined in the IDNA RFC */
  278. PHP_FUNCTION(idn_to_ascii)
  279. {
  280. php_intl_idn_handoff(INTERNAL_FUNCTION_PARAM_PASSTHRU, INTL_IDN_TO_ASCII);
  281. }
  282. /* }}} */
  283. /* {{{ proto int idn_to_utf8(string domain[, int options[, int variant[, array &idna_info]]])
  284. Converts an ASCII representation of the domain to Unicode (UTF-8), as defined in the IDNA RFC */
  285. PHP_FUNCTION(idn_to_utf8)
  286. {
  287. php_intl_idn_handoff(INTERNAL_FUNCTION_PARAM_PASSTHRU, INTL_IDN_TO_UTF8);
  288. }
  289. /* }}} */
  290. /*
  291. * Local variables:
  292. * tab-width: 4
  293. * c-basic-offset: 4
  294. * End:
  295. * vim600: fdm=marker
  296. * vim: noet sw=4 ts=4
  297. */