intl_convert.c 4.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152
  1. /*
  2. +----------------------------------------------------------------------+
  3. | This source file is subject to version 3.01 of the PHP license, |
  4. | that is bundled with this package in the file LICENSE, and is |
  5. | available through the world-wide-web at the following url: |
  6. | https://www.php.net/license/3_01.txt |
  7. | If you did not receive a copy of the PHP license and are unable to |
  8. | obtain it through the world-wide-web, please send a note to |
  9. | license@php.net so we can mail you a copy immediately. |
  10. +----------------------------------------------------------------------+
  11. | Authors: Vadim Savchuk <vsavchuk@productengine.com> |
  12. | Dmitry Lakhtyuk <dlakhtyuk@productengine.com> |
  13. +----------------------------------------------------------------------+
  14. */
  15. #ifdef HAVE_CONFIG_H
  16. #include "config.h"
  17. #endif
  18. #include <php.h>
  19. #include "intl_common.h"
  20. #include "intl_convert.h"
  21. /* {{{ intl_convert_utf8_to_utf16
  22. * Convert given string from UTF-8 to UTF-16 to *target buffer.
  23. *
  24. * It *target is NULL then we allocate a large enough buffer,
  25. * store the converted string into it, and make target point to it.
  26. *
  27. * Otherwise, if *target is non-NULL, we assume that it points to a
  28. * dynamically allocated buffer of *target_len bytes length.
  29. * In this case the buffer will be used to store the converted string to,
  30. * and may be resized (made larger) if needed.
  31. *
  32. * Note that ICU uses int32_t as string length and PHP uses size_t. While
  33. * it is not likely in practical situations to have strings longer than
  34. * INT32_MAX, these are different types and need to be handled carefully.
  35. *
  36. * @param target Where to place the result.
  37. * @param target_len Result length.
  38. * @param source String to convert.
  39. * @param source_len Length of the source string.
  40. * @param status Conversion status.
  41. *
  42. * @return void This function does not return anything.
  43. */
  44. void intl_convert_utf8_to_utf16(
  45. UChar** target, int32_t* target_len,
  46. const char* src, size_t src_len,
  47. UErrorCode* status )
  48. {
  49. UChar* dst_buf = NULL;
  50. int32_t dst_len = 0;
  51. /* If *target is NULL determine required destination buffer size (pre-flighting).
  52. * Otherwise, attempt to convert source string; if *target buffer is not large enough
  53. * it will be resized appropriately.
  54. */
  55. *status = U_ZERO_ERROR;
  56. if(src_len > INT32_MAX) {
  57. /* we can not fit this string */
  58. *status = U_BUFFER_OVERFLOW_ERROR;
  59. return;
  60. }
  61. u_strFromUTF8( *target, *target_len, &dst_len, src, (int32_t)src_len, status );
  62. if( *status == U_ZERO_ERROR )
  63. {
  64. /* String is converted successfully */
  65. (*target)[dst_len] = 0;
  66. *target_len = dst_len;
  67. return;
  68. }
  69. /* Bail out if an unexpected error occurred.
  70. * (U_BUFFER_OVERFLOW_ERROR means that *target buffer is not large enough).
  71. * (U_STRING_NOT_TERMINATED_WARNING usually means that the input string is empty).
  72. */
  73. if( *status != U_BUFFER_OVERFLOW_ERROR && *status != U_STRING_NOT_TERMINATED_WARNING )
  74. return;
  75. /* Allocate memory for the destination buffer (it will be zero-terminated). */
  76. dst_buf = eumalloc( dst_len + 1 );
  77. /* Convert source string from UTF-8 to UTF-16. */
  78. *status = U_ZERO_ERROR;
  79. u_strFromUTF8( dst_buf, dst_len+1, NULL, src, src_len, status );
  80. if( U_FAILURE( *status ) )
  81. {
  82. efree( dst_buf );
  83. return;
  84. }
  85. dst_buf[dst_len] = 0;
  86. if( *target )
  87. efree( *target );
  88. *target = dst_buf;
  89. *target_len = dst_len;
  90. }
  91. /* }}} */
  92. /* {{{ intl_convert_utf16_to_utf8
  93. * Convert given string from UTF-16 to UTF-8.
  94. *
  95. * @param source String to convert.
  96. * @param source_len Length of the source string.
  97. * @param status Conversion status.
  98. *
  99. * @return zend_string
  100. */
  101. zend_string* intl_convert_utf16_to_utf8(
  102. const UChar* src, int32_t src_len,
  103. UErrorCode* status )
  104. {
  105. zend_string* dst;
  106. int32_t dst_len;
  107. /* Determine required destination buffer size (pre-flighting). */
  108. *status = U_ZERO_ERROR;
  109. u_strToUTF8( NULL, 0, &dst_len, src, src_len, status );
  110. /* Bail out if an unexpected error occurred.
  111. * (U_BUFFER_OVERFLOW_ERROR means that *target buffer is not large enough).
  112. * (U_STRING_NOT_TERMINATED_WARNING usually means that the input string is empty).
  113. */
  114. if( *status != U_BUFFER_OVERFLOW_ERROR && *status != U_STRING_NOT_TERMINATED_WARNING )
  115. return NULL;
  116. /* Allocate memory for the destination buffer (it will be zero-terminated). */
  117. dst = zend_string_alloc(dst_len, 0);
  118. /* Convert source string from UTF-8 to UTF-16. */
  119. *status = U_ZERO_ERROR;
  120. u_strToUTF8( ZSTR_VAL(dst), dst_len, NULL, src, src_len, status );
  121. if( U_FAILURE( *status ) )
  122. {
  123. zend_string_efree(dst);
  124. return NULL;
  125. }
  126. /* U_STRING_NOT_TERMINATED_WARNING is OK for us => reset 'status'. */
  127. *status = U_ZERO_ERROR;
  128. ZSTR_VAL(dst)[dst_len] = 0;
  129. return dst;
  130. }
  131. /* }}} */