idn.c 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362
  1. /*
  2. +----------------------------------------------------------------------+
  3. | PHP Version 7 |
  4. +----------------------------------------------------------------------+
  5. | Copyright (c) 2009 The PHP Group |
  6. +----------------------------------------------------------------------+
  7. | This source file is subject to version 3.01 of the PHP license, |
  8. | that is bundled with this package in the file LICENSE, and is |
  9. | available through the world-wide-web at the following url: |
  10. | http://www.php.net/license/3_01.txt |
  11. | If you did not receive a copy of the PHP license and are unable to |
  12. | obtain it through the world-wide-web, please send a note to |
  13. | license@php.net so we can mail you a copy immediately. |
  14. +----------------------------------------------------------------------+
  15. | Author: Pierre A. Joye <pierre@php.net> |
  16. | Gustavo Lopes <cataphract@php.net> |
  17. +----------------------------------------------------------------------+
  18. */
  19. /* {{{ includes */
  20. #ifdef HAVE_CONFIG_H
  21. #include "config.h"
  22. #endif
  23. #include <php.h>
  24. #include <unicode/uidna.h>
  25. #include <unicode/ustring.h>
  26. #include "ext/standard/php_string.h"
  27. #include "intl_error.h"
  28. #include "intl_convert.h"
  29. /* }}} */
  30. #ifdef UIDNA_INFO_INITIALIZER
  31. #define HAVE_46_API 1 /* has UTS#46 API (introduced in ICU 4.6) */
  32. #endif
  33. enum {
  34. INTL_IDN_VARIANT_2003 = 0,
  35. INTL_IDN_VARIANT_UTS46
  36. };
  37. /* {{{ grapheme_register_constants
  38. * Register API constants
  39. */
  40. void idn_register_constants( INIT_FUNC_ARGS )
  41. {
  42. /* OPTIONS */
  43. /* Option to prohibit processing of unassigned codepoints in the input and
  44. do not check if the input conforms to STD-3 ASCII rules. */
  45. REGISTER_LONG_CONSTANT("IDNA_DEFAULT", UIDNA_DEFAULT, CONST_CS | CONST_PERSISTENT);
  46. /* Option to allow processing of unassigned codepoints in the input */
  47. REGISTER_LONG_CONSTANT("IDNA_ALLOW_UNASSIGNED", UIDNA_ALLOW_UNASSIGNED, CONST_CS | CONST_PERSISTENT);
  48. /* Option to check if input conforms to STD-3 ASCII rules */
  49. REGISTER_LONG_CONSTANT("IDNA_USE_STD3_RULES", UIDNA_USE_STD3_RULES, CONST_CS | CONST_PERSISTENT);
  50. #ifdef HAVE_46_API
  51. /* Option to check for whether the input conforms to the BiDi rules.
  52. * Ignored by the IDNA2003 implementation. (IDNA2003 always performs a BiDi check.) */
  53. REGISTER_LONG_CONSTANT("IDNA_CHECK_BIDI", UIDNA_CHECK_BIDI, CONST_CS | CONST_PERSISTENT);
  54. /* Option to check for whether the input conforms to the CONTEXTJ rules.
  55. * Ignored by the IDNA2003 implementation. (The CONTEXTJ check is new in IDNA2008.) */
  56. REGISTER_LONG_CONSTANT("IDNA_CHECK_CONTEXTJ", UIDNA_CHECK_CONTEXTJ, CONST_CS | CONST_PERSISTENT);
  57. /* Option for nontransitional processing in ToASCII().
  58. * By default, ToASCII() uses transitional processing.
  59. * Ignored by the IDNA2003 implementation. */
  60. REGISTER_LONG_CONSTANT("IDNA_NONTRANSITIONAL_TO_ASCII", UIDNA_NONTRANSITIONAL_TO_ASCII, CONST_CS | CONST_PERSISTENT);
  61. /* Option for nontransitional processing in ToUnicode().
  62. * By default, ToUnicode() uses transitional processing.
  63. * Ignored by the IDNA2003 implementation. */
  64. REGISTER_LONG_CONSTANT("IDNA_NONTRANSITIONAL_TO_UNICODE", UIDNA_NONTRANSITIONAL_TO_UNICODE, CONST_CS | CONST_PERSISTENT);
  65. #endif
  66. /* VARIANTS */
  67. REGISTER_LONG_CONSTANT("INTL_IDNA_VARIANT_2003", INTL_IDN_VARIANT_2003, CONST_CS | CONST_PERSISTENT);
  68. #ifdef HAVE_46_API
  69. REGISTER_LONG_CONSTANT("INTL_IDNA_VARIANT_UTS46", INTL_IDN_VARIANT_UTS46, CONST_CS | CONST_PERSISTENT);
  70. #endif
  71. #ifdef HAVE_46_API
  72. /* PINFO ERROR CODES */
  73. REGISTER_LONG_CONSTANT("IDNA_ERROR_EMPTY_LABEL", UIDNA_ERROR_EMPTY_LABEL, CONST_CS | CONST_PERSISTENT);
  74. REGISTER_LONG_CONSTANT("IDNA_ERROR_LABEL_TOO_LONG", UIDNA_ERROR_LABEL_TOO_LONG, CONST_CS | CONST_PERSISTENT);
  75. REGISTER_LONG_CONSTANT("IDNA_ERROR_DOMAIN_NAME_TOO_LONG", UIDNA_ERROR_DOMAIN_NAME_TOO_LONG, CONST_CS | CONST_PERSISTENT);
  76. REGISTER_LONG_CONSTANT("IDNA_ERROR_LEADING_HYPHEN", UIDNA_ERROR_LEADING_HYPHEN, CONST_CS | CONST_PERSISTENT);
  77. REGISTER_LONG_CONSTANT("IDNA_ERROR_TRAILING_HYPHEN", UIDNA_ERROR_TRAILING_HYPHEN, CONST_CS | CONST_PERSISTENT);
  78. REGISTER_LONG_CONSTANT("IDNA_ERROR_HYPHEN_3_4", UIDNA_ERROR_HYPHEN_3_4, CONST_CS | CONST_PERSISTENT);
  79. REGISTER_LONG_CONSTANT("IDNA_ERROR_LEADING_COMBINING_MARK", UIDNA_ERROR_LEADING_COMBINING_MARK, CONST_CS | CONST_PERSISTENT);
  80. REGISTER_LONG_CONSTANT("IDNA_ERROR_DISALLOWED", UIDNA_ERROR_DISALLOWED, CONST_CS | CONST_PERSISTENT);
  81. REGISTER_LONG_CONSTANT("IDNA_ERROR_PUNYCODE", UIDNA_ERROR_PUNYCODE, CONST_CS | CONST_PERSISTENT);
  82. REGISTER_LONG_CONSTANT("IDNA_ERROR_LABEL_HAS_DOT", UIDNA_ERROR_LABEL_HAS_DOT, CONST_CS | CONST_PERSISTENT);
  83. REGISTER_LONG_CONSTANT("IDNA_ERROR_INVALID_ACE_LABEL", UIDNA_ERROR_INVALID_ACE_LABEL, CONST_CS | CONST_PERSISTENT);
  84. REGISTER_LONG_CONSTANT("IDNA_ERROR_BIDI", UIDNA_ERROR_BIDI, CONST_CS | CONST_PERSISTENT);
  85. REGISTER_LONG_CONSTANT("IDNA_ERROR_CONTEXTJ", UIDNA_ERROR_CONTEXTJ, CONST_CS | CONST_PERSISTENT);
  86. #endif
  87. }
  88. /* }}} */
  89. enum {
  90. INTL_IDN_TO_ASCII = 0,
  91. INTL_IDN_TO_UTF8
  92. };
  93. /* like INTL_CHECK_STATUS, but as a function and varying the name of the func */
  94. static int php_intl_idn_check_status(UErrorCode err, const char *msg)
  95. {
  96. intl_error_set_code(NULL, err);
  97. if (U_FAILURE(err)) {
  98. char *buff;
  99. spprintf(&buff, 0, "%s: %s",
  100. get_active_function_name(),
  101. msg);
  102. intl_error_set_custom_msg(NULL, buff, 1);
  103. efree(buff);
  104. return FAILURE;
  105. }
  106. return SUCCESS;
  107. }
  108. static inline void php_intl_bad_args(const char *msg)
  109. {
  110. php_intl_idn_check_status(U_ILLEGAL_ARGUMENT_ERROR, msg);
  111. }
  112. #ifdef HAVE_46_API
  113. static void php_intl_idn_to_46(INTERNAL_FUNCTION_PARAMETERS,
  114. const zend_string *domain, uint32_t option, int mode, zval *idna_info)
  115. {
  116. UErrorCode status = U_ZERO_ERROR;
  117. UIDNA *uts46;
  118. int32_t len;
  119. zend_string *buffer;
  120. UIDNAInfo info = UIDNA_INFO_INITIALIZER;
  121. int buffer_used = 0;
  122. uts46 = uidna_openUTS46(option, &status);
  123. if (php_intl_idn_check_status(status, "failed to open UIDNA instance") == FAILURE) {
  124. RETURN_FALSE;
  125. }
  126. if (mode == INTL_IDN_TO_ASCII) {
  127. const int32_t buffer_capac = 255;
  128. buffer = zend_string_alloc(buffer_capac, 0);
  129. len = uidna_nameToASCII_UTF8(uts46, ZSTR_VAL(domain), ZSTR_LEN(domain),
  130. ZSTR_VAL(buffer), buffer_capac, &info, &status);
  131. if (len >= buffer_capac || php_intl_idn_check_status(status, "failed to convert name") == FAILURE) {
  132. uidna_close(uts46);
  133. zend_string_efree(buffer);
  134. RETURN_FALSE;
  135. }
  136. } else {
  137. const int32_t buffer_capac = 252*4;
  138. buffer = zend_string_alloc(buffer_capac, 0);
  139. len = uidna_nameToUnicodeUTF8(uts46, ZSTR_VAL(domain), ZSTR_LEN(domain),
  140. ZSTR_VAL(buffer), buffer_capac, &info, &status);
  141. if (len >= buffer_capac || php_intl_idn_check_status(status, "failed to convert name") == FAILURE) {
  142. uidna_close(uts46);
  143. zend_string_efree(buffer);
  144. RETURN_FALSE;
  145. }
  146. }
  147. ZSTR_VAL(buffer)[len] = '\0';
  148. ZSTR_LEN(buffer) = len;
  149. if (info.errors == 0) {
  150. RETVAL_STR(buffer);
  151. buffer_used = 1;
  152. } else {
  153. RETVAL_FALSE;
  154. }
  155. if (idna_info) {
  156. if (buffer_used) { /* used in return_value then */
  157. zval_addref_p(return_value);
  158. add_assoc_zval_ex(idna_info, "result", sizeof("result")-1, return_value);
  159. } else {
  160. zval zv;
  161. ZVAL_NEW_STR(&zv, buffer);
  162. buffer_used = 1;
  163. add_assoc_zval_ex(idna_info, "result", sizeof("result")-1, &zv);
  164. }
  165. add_assoc_bool_ex(idna_info, "isTransitionalDifferent",
  166. sizeof("isTransitionalDifferent")-1, info.isTransitionalDifferent);
  167. add_assoc_long_ex(idna_info, "errors", sizeof("errors")-1, (zend_long)info.errors);
  168. }
  169. if (!buffer_used) {
  170. zend_string_efree(buffer);
  171. }
  172. uidna_close(uts46);
  173. }
  174. #endif
  175. static void php_intl_idn_to(INTERNAL_FUNCTION_PARAMETERS,
  176. const zend_string *domain, uint32_t option, int mode)
  177. {
  178. UChar* ustring = NULL;
  179. int ustring_len = 0;
  180. UErrorCode status;
  181. zend_string *u8str;
  182. /* convert the string to UTF-16. */
  183. status = U_ZERO_ERROR;
  184. intl_convert_utf8_to_utf16(&ustring, &ustring_len, ZSTR_VAL(domain), ZSTR_LEN(domain), &status);
  185. if (U_FAILURE(status)) {
  186. intl_error_set_code(NULL, status);
  187. /* Set error messages. */
  188. intl_error_set_custom_msg( NULL, "Error converting input string to UTF-16", 0 );
  189. if (ustring) {
  190. efree(ustring);
  191. }
  192. RETURN_FALSE;
  193. } else {
  194. UParseError parse_error;
  195. UChar converted[MAXPATHLEN];
  196. int32_t converted_ret_len;
  197. status = U_ZERO_ERROR;
  198. #if defined(__clang__)
  199. # pragma clang diagnostic push
  200. # pragma clang diagnostic ignored "-Wdeprecated-declarations"
  201. #elif ZEND_GCC_VERSION >= 4008
  202. # pragma GCC diagnostic push
  203. # pragma GCC diagnostic ignored "-Wdeprecated-declarations"
  204. #endif
  205. if (mode == INTL_IDN_TO_ASCII) {
  206. converted_ret_len = uidna_IDNToASCII(ustring, ustring_len, converted, MAXPATHLEN, (int32_t)option, &parse_error, &status);
  207. } else {
  208. converted_ret_len = uidna_IDNToUnicode(ustring, ustring_len, converted, MAXPATHLEN, (int32_t)option, &parse_error, &status);
  209. }
  210. #if defined(__clang__)
  211. # pragma clang diagnostic pop
  212. #elif ZEND_GCC_VERSION >= 4008
  213. # pragma GCC diagnostic pop
  214. #endif
  215. efree(ustring);
  216. if (U_FAILURE(status)) {
  217. intl_error_set( NULL, status, "idn_to_ascii: cannot convert to ASCII", 0 );
  218. RETURN_FALSE;
  219. }
  220. status = U_ZERO_ERROR;
  221. u8str = intl_convert_utf16_to_utf8(converted, converted_ret_len, &status);
  222. if (!u8str) {
  223. /* Set global error code. */
  224. intl_error_set_code(NULL, status);
  225. /* Set error messages. */
  226. intl_error_set_custom_msg( NULL, "Error converting output string to UTF-8", 0 );
  227. RETURN_FALSE;
  228. }
  229. }
  230. /* return the allocated string, not a duplicate */
  231. RETVAL_NEW_STR(u8str);
  232. }
  233. static void php_intl_idn_handoff(INTERNAL_FUNCTION_PARAMETERS, int mode)
  234. {
  235. zend_string *domain;
  236. zend_long option = 0,
  237. variant = INTL_IDN_VARIANT_2003;
  238. zval *idna_info = NULL;
  239. intl_error_reset(NULL);
  240. if (zend_parse_parameters(ZEND_NUM_ARGS(), "S|llz/",
  241. &domain, &option, &variant, &idna_info) == FAILURE) {
  242. php_intl_bad_args("bad arguments");
  243. RETURN_NULL(); /* don't set FALSE because that's not the way it was before... */
  244. }
  245. #ifdef HAVE_46_API
  246. if (variant != INTL_IDN_VARIANT_2003 && variant != INTL_IDN_VARIANT_UTS46) {
  247. php_intl_bad_args("invalid variant, must be one of {"
  248. "INTL_IDNA_VARIANT_2003, INTL_IDNA_VARIANT_UTS46}");
  249. RETURN_FALSE;
  250. }
  251. #else
  252. if (variant != INTL_IDN_VARIANT_2003) {
  253. php_intl_bad_args("invalid variant, PHP was compiled against "
  254. "an old version of ICU and only supports INTL_IDN_VARIANT_2003");
  255. RETURN_FALSE;
  256. }
  257. #endif
  258. if (ZSTR_LEN(domain) < 1) {
  259. php_intl_bad_args("empty domain name");
  260. RETURN_FALSE;
  261. }
  262. if (ZSTR_LEN(domain) > INT32_MAX - 1) {
  263. php_intl_bad_args("domain name too large");
  264. RETURN_FALSE;
  265. }
  266. /* don't check options; it wasn't checked before */
  267. if (variant == INTL_IDN_VARIANT_2003) {
  268. php_error_docref(NULL, E_DEPRECATED, "INTL_IDNA_VARIANT_2003 is deprecated");
  269. }
  270. if (idna_info != NULL) {
  271. if (variant == INTL_IDN_VARIANT_2003) {
  272. php_error_docref0(NULL, E_NOTICE,
  273. "4 arguments were provided, but INTL_IDNA_VARIANT_2003 only "
  274. "takes 3 - extra argument ignored");
  275. } else {
  276. zval_ptr_dtor(idna_info);
  277. array_init(idna_info);
  278. }
  279. }
  280. if (variant == INTL_IDN_VARIANT_2003) {
  281. php_intl_idn_to(INTERNAL_FUNCTION_PARAM_PASSTHRU, domain, (uint32_t)option, mode);
  282. }
  283. #ifdef HAVE_46_API
  284. else {
  285. php_intl_idn_to_46(INTERNAL_FUNCTION_PARAM_PASSTHRU, domain, (uint32_t)option, mode, idna_info);
  286. }
  287. #endif
  288. }
  289. /* {{{ proto string idn_to_ascii(string domain[, int options[, int variant[, array &idna_info]]])
  290. Converts an Unicode domain to ASCII representation, as defined in the IDNA RFC */
  291. PHP_FUNCTION(idn_to_ascii)
  292. {
  293. php_intl_idn_handoff(INTERNAL_FUNCTION_PARAM_PASSTHRU, INTL_IDN_TO_ASCII);
  294. }
  295. /* }}} */
  296. /* {{{ proto string idn_to_utf8(string domain[, int options[, int variant[, array &idna_info]]])
  297. Converts an ASCII representation of the domain to Unicode (UTF-8), as defined in the IDNA RFC */
  298. PHP_FUNCTION(idn_to_utf8)
  299. {
  300. php_intl_idn_handoff(INTERNAL_FUNCTION_PARAM_PASSTHRU, INTL_IDN_TO_UTF8);
  301. }
  302. /* }}} */
  303. /*
  304. * Local variables:
  305. * tab-width: 4
  306. * c-basic-offset: 4
  307. * End:
  308. * vim600: fdm=marker
  309. * vim: noet sw=4 ts=4
  310. */