intl_convert.c 5.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163
  1. /*
  2. +----------------------------------------------------------------------+
  3. | PHP Version 7 |
  4. +----------------------------------------------------------------------+
  5. | This source file is subject to version 3.01 of the PHP license, |
  6. | that is bundled with this package in the file LICENSE, and is |
  7. | available through the world-wide-web at the following url: |
  8. | http://www.php.net/license/3_01.txt |
  9. | If you did not receive a copy of the PHP license and are unable to |
  10. | obtain it through the world-wide-web, please send a note to |
  11. | license@php.net so we can mail you a copy immediately. |
  12. +----------------------------------------------------------------------+
  13. | Authors: Vadim Savchuk <vsavchuk@productengine.com> |
  14. | Dmitry Lakhtyuk <dlakhtyuk@productengine.com> |
  15. +----------------------------------------------------------------------+
  16. */
  17. #ifdef HAVE_CONFIG_H
  18. #include "config.h"
  19. #endif
  20. #include <php.h>
  21. #include "intl_common.h"
  22. #include "intl_convert.h"
  23. /* {{{ intl_convert_utf8_to_utf16
  24. * Convert given string from UTF-8 to UTF-16 to *target buffer.
  25. *
  26. * It *target is NULL then we allocate a large enough buffer,
  27. * store the converted string into it, and make target point to it.
  28. *
  29. * Otherwise, if *target is non-NULL, we assume that it points to a
  30. * dynamically allocated buffer of *target_len bytes length.
  31. * In this case the buffer will be used to store the converted string to,
  32. * and may be resized (made larger) if needed.
  33. *
  34. * Note that ICU uses int32_t as string length and PHP uses size_t. While
  35. * it is not likely in practical situations to have strings longer than
  36. * INT32_MAX, these are different types and need to be handled carefully.
  37. *
  38. * @param target Where to place the result.
  39. * @param target_len Result length.
  40. * @param source String to convert.
  41. * @param source_len Length of the source string.
  42. * @param status Conversion status.
  43. *
  44. * @return void This function does not return anything.
  45. */
  46. void intl_convert_utf8_to_utf16(
  47. UChar** target, int32_t* target_len,
  48. const char* src, size_t src_len,
  49. UErrorCode* status )
  50. {
  51. UChar* dst_buf = NULL;
  52. int32_t dst_len = 0;
  53. /* If *target is NULL determine required destination buffer size (pre-flighting).
  54. * Otherwise, attempt to convert source string; if *target buffer is not large enough
  55. * it will be resized appropriately.
  56. */
  57. *status = U_ZERO_ERROR;
  58. if(src_len > INT32_MAX) {
  59. /* we can not fit this string */
  60. *status = U_BUFFER_OVERFLOW_ERROR;
  61. return;
  62. }
  63. u_strFromUTF8( *target, *target_len, &dst_len, src, (int32_t)src_len, status );
  64. if( *status == U_ZERO_ERROR )
  65. {
  66. /* String is converted successfully */
  67. (*target)[dst_len] = 0;
  68. *target_len = dst_len;
  69. return;
  70. }
  71. /* Bail out if an unexpected error occurred.
  72. * (U_BUFFER_OVERFLOW_ERROR means that *target buffer is not large enough).
  73. * (U_STRING_NOT_TERMINATED_WARNING usually means that the input string is empty).
  74. */
  75. if( *status != U_BUFFER_OVERFLOW_ERROR && *status != U_STRING_NOT_TERMINATED_WARNING )
  76. return;
  77. /* Allocate memory for the destination buffer (it will be zero-terminated). */
  78. dst_buf = eumalloc( dst_len + 1 );
  79. /* Convert source string from UTF-8 to UTF-16. */
  80. *status = U_ZERO_ERROR;
  81. u_strFromUTF8( dst_buf, dst_len+1, NULL, src, src_len, status );
  82. if( U_FAILURE( *status ) )
  83. {
  84. efree( dst_buf );
  85. return;
  86. }
  87. dst_buf[dst_len] = 0;
  88. if( *target )
  89. efree( *target );
  90. *target = dst_buf;
  91. *target_len = dst_len;
  92. }
  93. /* }}} */
  94. /* {{{ intl_convert_utf16_to_utf8
  95. * Convert given string from UTF-16 to UTF-8.
  96. *
  97. * @param source String to convert.
  98. * @param source_len Length of the source string.
  99. * @param status Conversion status.
  100. *
  101. * @return zend_string
  102. */
  103. zend_string* intl_convert_utf16_to_utf8(
  104. const UChar* src, int32_t src_len,
  105. UErrorCode* status )
  106. {
  107. zend_string* dst;
  108. int32_t dst_len;
  109. /* Determine required destination buffer size (pre-flighting). */
  110. *status = U_ZERO_ERROR;
  111. u_strToUTF8( NULL, 0, &dst_len, src, src_len, status );
  112. /* Bail out if an unexpected error occurred.
  113. * (U_BUFFER_OVERFLOW_ERROR means that *target buffer is not large enough).
  114. * (U_STRING_NOT_TERMINATED_WARNING usually means that the input string is empty).
  115. */
  116. if( *status != U_BUFFER_OVERFLOW_ERROR && *status != U_STRING_NOT_TERMINATED_WARNING )
  117. return NULL;
  118. /* Allocate memory for the destination buffer (it will be zero-terminated). */
  119. dst = zend_string_alloc(dst_len, 0);
  120. /* Convert source string from UTF-8 to UTF-16. */
  121. *status = U_ZERO_ERROR;
  122. u_strToUTF8( ZSTR_VAL(dst), dst_len, NULL, src, src_len, status );
  123. if( U_FAILURE( *status ) )
  124. {
  125. zend_string_efree(dst);
  126. return NULL;
  127. }
  128. /* U_STRING_NOT_TERMINATED_WARNING is OK for us => reset 'status'. */
  129. *status = U_ZERO_ERROR;
  130. ZSTR_VAL(dst)[dst_len] = 0;
  131. return dst;
  132. }
  133. /* }}} */
  134. /*
  135. * Local variables:
  136. * tab-width: 4
  137. * c-basic-offset: 4
  138. * End:
  139. * vim600: noet sw=4 ts=4 fdm=marker
  140. * vim<600: noet sw=4 ts=4
  141. */