collator_convert.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454
  1. /*
  2. +----------------------------------------------------------------------+
  3. | PHP Version 7 |
  4. +----------------------------------------------------------------------+
  5. | This source file is subject to version 3.01 of the PHP license, |
  6. | that is bundled with this package in the file LICENSE, and is |
  7. | available through the world-wide-web at the following url: |
  8. | http://www.php.net/license/3_01.txt |
  9. | If you did not receive a copy of the PHP license and are unable to |
  10. | obtain it through the world-wide-web, please send a note to |
  11. | license@php.net so we can mail you a copy immediately. |
  12. +----------------------------------------------------------------------+
  13. | Authors: Vadim Savchuk <vsavchuk@productengine.com> |
  14. | Dmitry Lakhtyuk <dlakhtyuk@productengine.com> |
  15. +----------------------------------------------------------------------+
  16. */
  17. #ifdef HAVE_CONFIG_H
  18. #include "config.h"
  19. #endif
  20. #include "php_intl.h"
  21. #include "collator_class.h"
  22. #include "collator_is_numeric.h"
  23. #include "collator_convert.h"
  24. #include "intl_convert.h"
  25. #include <unicode/ustring.h>
  26. #include <php.h>
  27. #define COLLATOR_CONVERT_RETURN_FAILED(retval) { \
  28. Z_TRY_ADDREF_P(retval); \
  29. return retval; \
  30. }
  31. /* {{{ collator_convert_hash_item_from_utf8_to_utf16 */
  32. static void collator_convert_hash_item_from_utf8_to_utf16(
  33. HashTable* hash, zval *hashData, zend_string *hashKey, zend_ulong hashIndex,
  34. UErrorCode* status )
  35. {
  36. const char* old_val;
  37. size_t old_val_len;
  38. UChar* new_val = NULL;
  39. int32_t new_val_len = 0;
  40. zval znew_val;
  41. /* Process string values only. */
  42. if( Z_TYPE_P( hashData ) != IS_STRING )
  43. return;
  44. old_val = Z_STRVAL_P( hashData );
  45. old_val_len = Z_STRLEN_P( hashData );
  46. /* Convert it from UTF-8 to UTF-16LE and save the result to new_val[_len]. */
  47. intl_convert_utf8_to_utf16( &new_val, &new_val_len, old_val, old_val_len, status );
  48. if( U_FAILURE( *status ) )
  49. return;
  50. /* Update current hash item with the converted value. */
  51. ZVAL_STRINGL( &znew_val, (char*)new_val, UBYTES(new_val_len + 1) );
  52. //???
  53. efree(new_val);
  54. /* hack to fix use of initialized value */
  55. Z_STRLEN(znew_val) = Z_STRLEN(znew_val) - UBYTES(1);
  56. if( hashKey)
  57. {
  58. zend_hash_update( hash, hashKey, &znew_val);
  59. }
  60. else /* hashKeyType == HASH_KEY_IS_LONG */
  61. {
  62. zend_hash_index_update( hash, hashIndex, &znew_val);
  63. }
  64. }
  65. /* }}} */
  66. /* {{{ collator_convert_hash_item_from_utf16_to_utf8 */
  67. static void collator_convert_hash_item_from_utf16_to_utf8(
  68. HashTable* hash, zval * hashData, zend_string* hashKey, zend_ulong hashIndex,
  69. UErrorCode* status )
  70. {
  71. const char* old_val;
  72. size_t old_val_len;
  73. zend_string* u8str;
  74. zval znew_val;
  75. /* Process string values only. */
  76. if( Z_TYPE_P( hashData ) != IS_STRING )
  77. return;
  78. old_val = Z_STRVAL_P( hashData );
  79. old_val_len = Z_STRLEN_P( hashData );
  80. /* Convert it from UTF-16LE to UTF-8 and save the result to new_val[_len]. */
  81. u8str = intl_convert_utf16_to_utf8(
  82. (UChar*)old_val, UCHARS(old_val_len), status );
  83. if( !u8str )
  84. return;
  85. /* Update current hash item with the converted value. */
  86. ZVAL_NEW_STR( &znew_val, u8str);
  87. if( hashKey )
  88. {
  89. zend_hash_update( hash, hashKey, &znew_val);
  90. }
  91. else /* hashKeyType == HASH_KEY_IS_LONG */
  92. {
  93. zend_hash_index_update( hash, hashIndex, &znew_val);
  94. }
  95. }
  96. /* }}} */
  97. /* {{{ collator_convert_hash_from_utf8_to_utf16
  98. * Convert values of the given hash from UTF-8 encoding to UTF-16LE.
  99. */
  100. void collator_convert_hash_from_utf8_to_utf16( HashTable* hash, UErrorCode* status )
  101. {
  102. zend_ulong hashIndex;
  103. zval *hashData;
  104. zend_string *hashKey;
  105. ZEND_HASH_FOREACH_KEY_VAL(hash, hashIndex, hashKey, hashData) {
  106. /* Convert current hash item from UTF-8 to UTF-16LE. */
  107. collator_convert_hash_item_from_utf8_to_utf16(
  108. hash, hashData, hashKey, hashIndex, status );
  109. if( U_FAILURE( *status ) )
  110. return;
  111. } ZEND_HASH_FOREACH_END();
  112. }
  113. /* }}} */
  114. /* {{{ collator_convert_hash_from_utf16_to_utf8
  115. * Convert values of the given hash from UTF-16LE encoding to UTF-8.
  116. */
  117. void collator_convert_hash_from_utf16_to_utf8( HashTable* hash, UErrorCode* status )
  118. {
  119. zend_ulong hashIndex;
  120. zend_string *hashKey;
  121. zval *hashData;
  122. ZEND_HASH_FOREACH_KEY_VAL(hash, hashIndex, hashKey, hashData) {
  123. /* Convert current hash item from UTF-16LE to UTF-8. */
  124. collator_convert_hash_item_from_utf16_to_utf8(
  125. hash, hashData, hashKey, hashIndex, status );
  126. if( U_FAILURE( *status ) ) {
  127. return;
  128. }
  129. } ZEND_HASH_FOREACH_END();
  130. }
  131. /* }}} */
  132. /* {{{ collator_convert_zstr_utf16_to_utf8
  133. *
  134. * Convert string from utf16 to utf8.
  135. *
  136. * @param zval* utf16_zval String to convert.
  137. *
  138. * @return zval* Converted string.
  139. */
  140. zval* collator_convert_zstr_utf16_to_utf8( zval* utf16_zval, zval *rv )
  141. {
  142. zend_string* u8str;
  143. UErrorCode status = U_ZERO_ERROR;
  144. /* Convert to utf8 then. */
  145. u8str = intl_convert_utf16_to_utf8(
  146. (UChar*) Z_STRVAL_P(utf16_zval), UCHARS( Z_STRLEN_P(utf16_zval) ), &status );
  147. if( !u8str ) {
  148. php_error( E_WARNING, "Error converting utf16 to utf8 in collator_convert_zval_utf16_to_utf8()" );
  149. ZVAL_EMPTY_STRING( rv );
  150. } else {
  151. ZVAL_NEW_STR( rv, u8str );
  152. }
  153. return rv;
  154. }
  155. /* }}} */
  156. /* {{{ collator_convert_zstr_utf8_to_utf16
  157. *
  158. * Convert string from utf8 to utf16.
  159. *
  160. * @param zval* utf8_zval String to convert.
  161. *
  162. * @return zval* Converted string.
  163. */
  164. zval* collator_convert_zstr_utf8_to_utf16( zval* utf8_zval, zval *rv )
  165. {
  166. zval* zstr = NULL;
  167. UChar* ustr = NULL;
  168. int32_t ustr_len = 0;
  169. UErrorCode status = U_ZERO_ERROR;
  170. /* Convert the string to UTF-16. */
  171. intl_convert_utf8_to_utf16(
  172. &ustr, &ustr_len,
  173. Z_STRVAL_P( utf8_zval ), Z_STRLEN_P( utf8_zval ),
  174. &status );
  175. if( U_FAILURE( status ) )
  176. php_error( E_WARNING, "Error casting object to string in collator_convert_zstr_utf8_to_utf16()" );
  177. /* Set string. */
  178. zstr = rv;
  179. ZVAL_STRINGL( zstr, (char*)ustr, UBYTES(ustr_len));
  180. //???
  181. efree((char *)ustr);
  182. return zstr;
  183. }
  184. /* }}} */
  185. /* {{{ collator_convert_object_to_string
  186. * Convert object to UTF16-encoded string.
  187. */
  188. zval* collator_convert_object_to_string( zval* obj, zval *rv )
  189. {
  190. zval* zstr = NULL;
  191. UErrorCode status = U_ZERO_ERROR;
  192. UChar* ustr = NULL;
  193. int32_t ustr_len = 0;
  194. /* Bail out if it's not an object. */
  195. if( Z_TYPE_P( obj ) != IS_OBJECT )
  196. {
  197. COLLATOR_CONVERT_RETURN_FAILED( obj );
  198. }
  199. /* Try object's handlers. */
  200. if( Z_OBJ_HT_P(obj)->get )
  201. {
  202. zstr = Z_OBJ_HT_P(obj)->get( obj, rv );
  203. switch( Z_TYPE_P( zstr ) )
  204. {
  205. case IS_OBJECT:
  206. {
  207. /* Bail out. */
  208. zval_ptr_dtor( zstr );
  209. COLLATOR_CONVERT_RETURN_FAILED( obj );
  210. } break;
  211. case IS_STRING:
  212. break;
  213. default:
  214. {
  215. convert_to_string( zstr );
  216. } break;
  217. }
  218. }
  219. else if( Z_OBJ_HT_P(obj)->cast_object )
  220. {
  221. zstr = rv;
  222. if( Z_OBJ_HT_P(obj)->cast_object( obj, zstr, IS_STRING ) == FAILURE )
  223. {
  224. /* cast_object failed => bail out. */
  225. zval_ptr_dtor( zstr );
  226. COLLATOR_CONVERT_RETURN_FAILED( obj );
  227. }
  228. }
  229. /* Object wasn't successfully converted => bail out. */
  230. if( zstr == NULL )
  231. {
  232. COLLATOR_CONVERT_RETURN_FAILED( obj );
  233. }
  234. /* Convert the string to UTF-16. */
  235. intl_convert_utf8_to_utf16(
  236. &ustr, &ustr_len,
  237. Z_STRVAL_P( zstr ), Z_STRLEN_P( zstr ),
  238. &status );
  239. if( U_FAILURE( status ) )
  240. php_error( E_WARNING, "Error casting object to string in collator_convert_object_to_string()" );
  241. /* Cleanup zstr to hold utf16 string. */
  242. zval_ptr_dtor_str( zstr );
  243. /* Set string. */
  244. ZVAL_STRINGL( zstr, (char*)ustr, UBYTES(ustr_len));
  245. //???
  246. efree((char *)ustr);
  247. /* Don't free ustr cause it's set in zstr without copy.
  248. * efree( ustr );
  249. */
  250. return zstr;
  251. }
  252. /* }}} */
  253. /* {{{ collator_convert_string_to_number
  254. *
  255. * Convert string to number.
  256. *
  257. * @param zval* str String to convert.
  258. *
  259. * @return zval* Number. If str is not numeric string return number zero.
  260. */
  261. zval* collator_convert_string_to_number( zval* str, zval *rv )
  262. {
  263. zval* num = collator_convert_string_to_number_if_possible( str, rv );
  264. if( num == str )
  265. {
  266. /* String wasn't converted => return zero. */
  267. zval_ptr_dtor( num );
  268. num = rv;
  269. ZVAL_LONG( num, 0 );
  270. }
  271. return num;
  272. }
  273. /* }}} */
  274. /* {{{ collator_convert_string_to_double
  275. *
  276. * Convert string to double.
  277. *
  278. * @param zval* str String to convert.
  279. *
  280. * @return zval* Number. If str is not numeric string return number zero.
  281. */
  282. zval* collator_convert_string_to_double( zval* str, zval *rv )
  283. {
  284. zval* num = collator_convert_string_to_number( str, rv );
  285. if( Z_TYPE_P(num) == IS_LONG )
  286. {
  287. ZVAL_DOUBLE( num, Z_LVAL_P( num ) );
  288. }
  289. return num;
  290. }
  291. /* }}} */
  292. /* {{{ collator_convert_string_to_number_if_possible
  293. *
  294. * Convert string to numer.
  295. *
  296. * @param zval* str String to convert.
  297. *
  298. * @return zval* Number if str is numeric string. Otherwise
  299. * original str param.
  300. */
  301. zval* collator_convert_string_to_number_if_possible( zval* str, zval *rv )
  302. {
  303. int is_numeric = 0;
  304. zend_long lval = 0;
  305. double dval = 0;
  306. if( Z_TYPE_P( str ) != IS_STRING )
  307. {
  308. COLLATOR_CONVERT_RETURN_FAILED( str );
  309. }
  310. if( ( is_numeric = collator_is_numeric( (UChar*) Z_STRVAL_P(str), UCHARS( Z_STRLEN_P(str) ), &lval, &dval, 1 ) ) )
  311. {
  312. if( is_numeric == IS_LONG ) {
  313. ZVAL_LONG(rv, lval);
  314. }
  315. if( is_numeric == IS_DOUBLE )
  316. ZVAL_DOUBLE(rv, dval);
  317. }
  318. else
  319. {
  320. COLLATOR_CONVERT_RETURN_FAILED( str );
  321. }
  322. return rv;
  323. }
  324. /* }}} */
  325. /* {{{ collator_make_printable_zval
  326. *
  327. * Returns string from input zval.
  328. *
  329. * @param zval* arg zval to get string from
  330. *
  331. * @return zval* UTF16 string.
  332. */
  333. zval* collator_make_printable_zval( zval* arg, zval *rv)
  334. {
  335. zval arg_copy;
  336. zval* str = NULL;
  337. if( Z_TYPE_P(arg) != IS_STRING )
  338. {
  339. int use_copy = zend_make_printable_zval(arg, &arg_copy);
  340. if( use_copy )
  341. {
  342. str = collator_convert_zstr_utf8_to_utf16( &arg_copy, rv );
  343. zval_ptr_dtor_str( &arg_copy );
  344. }
  345. else
  346. {
  347. str = collator_convert_zstr_utf8_to_utf16( arg, rv );
  348. }
  349. }
  350. else
  351. {
  352. COLLATOR_CONVERT_RETURN_FAILED( arg );
  353. }
  354. return str;
  355. }
  356. /* }}} */
  357. /* {{{ collator_normalize_sort_argument
  358. *
  359. * Normalize argument to use in sort's compare function.
  360. *
  361. * @param zval* arg Sort's argument to normalize.
  362. *
  363. * @return zval* Normalized copy of arg or unmodified arg
  364. * if normalization is not needed.
  365. */
  366. zval* collator_normalize_sort_argument( zval* arg, zval *rv )
  367. {
  368. zval* n_arg = NULL;
  369. if( Z_TYPE_P( arg ) != IS_STRING )
  370. {
  371. /* If its not a string then nothing to do.
  372. * Return original arg.
  373. */
  374. COLLATOR_CONVERT_RETURN_FAILED( arg );
  375. }
  376. /* Try convert to number. */
  377. n_arg = collator_convert_string_to_number_if_possible( arg, rv );
  378. if( n_arg == arg )
  379. {
  380. /* Conversion to number failed. */
  381. zval_ptr_dtor( n_arg );
  382. /* Convert string to utf8. */
  383. n_arg = collator_convert_zstr_utf16_to_utf8( arg, rv );
  384. }
  385. return n_arg;
  386. }
  387. /* }}} */
  388. /*
  389. * Local variables:
  390. * tab-width: 4
  391. * c-basic-offset: 4
  392. * End:
  393. * vim600: noet sw=4 ts=4 fdm=marker
  394. * vim<600: noet sw=4 ts=4
  395. */