idn.c 8.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227
  1. /*
  2. +----------------------------------------------------------------------+
  3. | Copyright (c) The PHP Group |
  4. +----------------------------------------------------------------------+
  5. | This source file is subject to version 3.01 of the PHP license, |
  6. | that is bundled with this package in the file LICENSE, and is |
  7. | available through the world-wide-web at the following url: |
  8. | https://www.php.net/license/3_01.txt |
  9. | If you did not receive a copy of the PHP license and are unable to |
  10. | obtain it through the world-wide-web, please send a note to |
  11. | license@php.net so we can mail you a copy immediately. |
  12. +----------------------------------------------------------------------+
  13. | Author: Pierre A. Joye <pierre@php.net> |
  14. | Gustavo Lopes <cataphract@php.net> |
  15. +----------------------------------------------------------------------+
  16. */
  17. /* {{{ includes */
  18. #ifdef HAVE_CONFIG_H
  19. #include "config.h"
  20. #endif
  21. #include <php.h>
  22. #include <unicode/uidna.h>
  23. #include <unicode/ustring.h>
  24. #include "ext/standard/php_string.h"
  25. #include "intl_error.h"
  26. #include "intl_convert.h"
  27. /* }}} */
  28. enum {
  29. INTL_IDN_VARIANT_UTS46 = 1
  30. };
  31. /* {{{ grapheme_register_constants
  32. * Register API constants
  33. */
  34. void idn_register_constants( INIT_FUNC_ARGS )
  35. {
  36. /* OPTIONS */
  37. /* Option to prohibit processing of unassigned codepoints in the input and
  38. do not check if the input conforms to STD-3 ASCII rules. */
  39. REGISTER_LONG_CONSTANT("IDNA_DEFAULT", UIDNA_DEFAULT, CONST_CS | CONST_PERSISTENT);
  40. /* Option to allow processing of unassigned codepoints in the input */
  41. REGISTER_LONG_CONSTANT("IDNA_ALLOW_UNASSIGNED", UIDNA_ALLOW_UNASSIGNED, CONST_CS | CONST_PERSISTENT);
  42. /* Option to check if input conforms to STD-3 ASCII rules */
  43. REGISTER_LONG_CONSTANT("IDNA_USE_STD3_RULES", UIDNA_USE_STD3_RULES, CONST_CS | CONST_PERSISTENT);
  44. /* Option to check for whether the input conforms to the BiDi rules.
  45. * Ignored by the IDNA2003 implementation. (IDNA2003 always performs a BiDi check.) */
  46. REGISTER_LONG_CONSTANT("IDNA_CHECK_BIDI", UIDNA_CHECK_BIDI, CONST_CS | CONST_PERSISTENT);
  47. /* Option to check for whether the input conforms to the CONTEXTJ rules.
  48. * Ignored by the IDNA2003 implementation. (The CONTEXTJ check is new in IDNA2008.) */
  49. REGISTER_LONG_CONSTANT("IDNA_CHECK_CONTEXTJ", UIDNA_CHECK_CONTEXTJ, CONST_CS | CONST_PERSISTENT);
  50. /* Option for nontransitional processing in ToASCII().
  51. * By default, ToASCII() uses transitional processing.
  52. * Ignored by the IDNA2003 implementation. */
  53. REGISTER_LONG_CONSTANT("IDNA_NONTRANSITIONAL_TO_ASCII", UIDNA_NONTRANSITIONAL_TO_ASCII, CONST_CS | CONST_PERSISTENT);
  54. /* Option for nontransitional processing in ToUnicode().
  55. * By default, ToUnicode() uses transitional processing.
  56. * Ignored by the IDNA2003 implementation. */
  57. REGISTER_LONG_CONSTANT("IDNA_NONTRANSITIONAL_TO_UNICODE", UIDNA_NONTRANSITIONAL_TO_UNICODE, CONST_CS | CONST_PERSISTENT);
  58. /* VARIANTS */
  59. REGISTER_LONG_CONSTANT("INTL_IDNA_VARIANT_UTS46", INTL_IDN_VARIANT_UTS46, CONST_CS | CONST_PERSISTENT);
  60. /* PINFO ERROR CODES */
  61. REGISTER_LONG_CONSTANT("IDNA_ERROR_EMPTY_LABEL", UIDNA_ERROR_EMPTY_LABEL, CONST_CS | CONST_PERSISTENT);
  62. REGISTER_LONG_CONSTANT("IDNA_ERROR_LABEL_TOO_LONG", UIDNA_ERROR_LABEL_TOO_LONG, CONST_CS | CONST_PERSISTENT);
  63. REGISTER_LONG_CONSTANT("IDNA_ERROR_DOMAIN_NAME_TOO_LONG", UIDNA_ERROR_DOMAIN_NAME_TOO_LONG, CONST_CS | CONST_PERSISTENT);
  64. REGISTER_LONG_CONSTANT("IDNA_ERROR_LEADING_HYPHEN", UIDNA_ERROR_LEADING_HYPHEN, CONST_CS | CONST_PERSISTENT);
  65. REGISTER_LONG_CONSTANT("IDNA_ERROR_TRAILING_HYPHEN", UIDNA_ERROR_TRAILING_HYPHEN, CONST_CS | CONST_PERSISTENT);
  66. REGISTER_LONG_CONSTANT("IDNA_ERROR_HYPHEN_3_4", UIDNA_ERROR_HYPHEN_3_4, CONST_CS | CONST_PERSISTENT);
  67. REGISTER_LONG_CONSTANT("IDNA_ERROR_LEADING_COMBINING_MARK", UIDNA_ERROR_LEADING_COMBINING_MARK, CONST_CS | CONST_PERSISTENT);
  68. REGISTER_LONG_CONSTANT("IDNA_ERROR_DISALLOWED", UIDNA_ERROR_DISALLOWED, CONST_CS | CONST_PERSISTENT);
  69. REGISTER_LONG_CONSTANT("IDNA_ERROR_PUNYCODE", UIDNA_ERROR_PUNYCODE, CONST_CS | CONST_PERSISTENT);
  70. REGISTER_LONG_CONSTANT("IDNA_ERROR_LABEL_HAS_DOT", UIDNA_ERROR_LABEL_HAS_DOT, CONST_CS | CONST_PERSISTENT);
  71. REGISTER_LONG_CONSTANT("IDNA_ERROR_INVALID_ACE_LABEL", UIDNA_ERROR_INVALID_ACE_LABEL, CONST_CS | CONST_PERSISTENT);
  72. REGISTER_LONG_CONSTANT("IDNA_ERROR_BIDI", UIDNA_ERROR_BIDI, CONST_CS | CONST_PERSISTENT);
  73. REGISTER_LONG_CONSTANT("IDNA_ERROR_CONTEXTJ", UIDNA_ERROR_CONTEXTJ, CONST_CS | CONST_PERSISTENT);
  74. }
  75. /* }}} */
  76. enum {
  77. INTL_IDN_TO_ASCII = 0,
  78. INTL_IDN_TO_UTF8
  79. };
  80. /* like INTL_CHECK_STATUS, but as a function and varying the name of the func */
  81. static int php_intl_idn_check_status(UErrorCode err, const char *msg)
  82. {
  83. intl_error_set_code(NULL, err);
  84. if (U_FAILURE(err)) {
  85. char *buff;
  86. spprintf(&buff, 0, "%s: %s",
  87. get_active_function_name(),
  88. msg);
  89. intl_error_set_custom_msg(NULL, buff, 1);
  90. efree(buff);
  91. return FAILURE;
  92. }
  93. return SUCCESS;
  94. }
  95. static inline void php_intl_bad_args(const char *msg)
  96. {
  97. php_intl_idn_check_status(U_ILLEGAL_ARGUMENT_ERROR, msg);
  98. }
  99. static void php_intl_idn_to_46(INTERNAL_FUNCTION_PARAMETERS,
  100. const zend_string *domain, uint32_t option, int mode, zval *idna_info)
  101. {
  102. UErrorCode status = U_ZERO_ERROR;
  103. UIDNA *uts46;
  104. int32_t len;
  105. zend_string *buffer;
  106. UIDNAInfo info = UIDNA_INFO_INITIALIZER;
  107. uts46 = uidna_openUTS46(option, &status);
  108. if (php_intl_idn_check_status(status, "failed to open UIDNA instance") == FAILURE) {
  109. RETURN_FALSE;
  110. }
  111. if (mode == INTL_IDN_TO_ASCII) {
  112. const int32_t buffer_capac = 255;
  113. buffer = zend_string_alloc(buffer_capac, 0);
  114. len = uidna_nameToASCII_UTF8(uts46, ZSTR_VAL(domain), ZSTR_LEN(domain),
  115. ZSTR_VAL(buffer), buffer_capac, &info, &status);
  116. if (len >= buffer_capac || php_intl_idn_check_status(status, "failed to convert name") == FAILURE) {
  117. uidna_close(uts46);
  118. zend_string_efree(buffer);
  119. RETURN_FALSE;
  120. }
  121. } else {
  122. const int32_t buffer_capac = 252*4;
  123. buffer = zend_string_alloc(buffer_capac, 0);
  124. len = uidna_nameToUnicodeUTF8(uts46, ZSTR_VAL(domain), ZSTR_LEN(domain),
  125. ZSTR_VAL(buffer), buffer_capac, &info, &status);
  126. if (len >= buffer_capac || php_intl_idn_check_status(status, "failed to convert name") == FAILURE) {
  127. uidna_close(uts46);
  128. zend_string_efree(buffer);
  129. RETURN_FALSE;
  130. }
  131. }
  132. ZSTR_VAL(buffer)[len] = '\0';
  133. ZSTR_LEN(buffer) = len;
  134. if (info.errors == 0) {
  135. RETVAL_STR_COPY(buffer);
  136. } else {
  137. RETVAL_FALSE;
  138. }
  139. if (idna_info) {
  140. add_assoc_str_ex(idna_info, "result", sizeof("result")-1, zend_string_copy(buffer));
  141. add_assoc_bool_ex(idna_info, "isTransitionalDifferent",
  142. sizeof("isTransitionalDifferent")-1, info.isTransitionalDifferent);
  143. add_assoc_long_ex(idna_info, "errors", sizeof("errors")-1, (zend_long)info.errors);
  144. }
  145. zend_string_release(buffer);
  146. uidna_close(uts46);
  147. }
  148. static void php_intl_idn_handoff(INTERNAL_FUNCTION_PARAMETERS, int mode)
  149. {
  150. zend_string *domain;
  151. zend_long option = UIDNA_DEFAULT,
  152. variant = INTL_IDN_VARIANT_UTS46;
  153. zval *idna_info = NULL;
  154. intl_error_reset(NULL);
  155. if (zend_parse_parameters(ZEND_NUM_ARGS(), "S|llz",
  156. &domain, &option, &variant, &idna_info) == FAILURE) {
  157. RETURN_THROWS();
  158. }
  159. if (variant != INTL_IDN_VARIANT_UTS46) {
  160. php_intl_bad_args("invalid variant, must be INTL_IDNA_VARIANT_UTS46");
  161. RETURN_FALSE;
  162. }
  163. if (ZSTR_LEN(domain) < 1) {
  164. php_intl_bad_args("empty domain name");
  165. RETURN_FALSE;
  166. }
  167. if (ZSTR_LEN(domain) > INT32_MAX - 1) {
  168. php_intl_bad_args("domain name too large");
  169. RETURN_FALSE;
  170. }
  171. /* don't check options; it wasn't checked before */
  172. if (idna_info != NULL) {
  173. idna_info = zend_try_array_init(idna_info);
  174. if (!idna_info) {
  175. RETURN_THROWS();
  176. }
  177. }
  178. php_intl_idn_to_46(INTERNAL_FUNCTION_PARAM_PASSTHRU, domain, (uint32_t)option, mode, idna_info);
  179. }
  180. /* {{{ Converts an Unicode domain to ASCII representation, as defined in the IDNA RFC */
  181. PHP_FUNCTION(idn_to_ascii)
  182. {
  183. php_intl_idn_handoff(INTERNAL_FUNCTION_PARAM_PASSTHRU, INTL_IDN_TO_ASCII);
  184. }
  185. /* }}} */
  186. /* {{{ Converts an ASCII representation of the domain to Unicode (UTF-8), as defined in the IDNA RFC */
  187. PHP_FUNCTION(idn_to_utf8)
  188. {
  189. php_intl_idn_handoff(INTERNAL_FUNCTION_PARAM_PASSTHRU, INTL_IDN_TO_UTF8);
  190. }
  191. /* }}} */