collator_convert.c 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485
  1. /*
  2. +----------------------------------------------------------------------+
  3. | PHP Version 5 |
  4. +----------------------------------------------------------------------+
  5. | This source file is subject to version 3.01 of the PHP license, |
  6. | that is bundled with this package in the file LICENSE, and is |
  7. | available through the world-wide-web at the following url: |
  8. | http://www.php.net/license/3_01.txt |
  9. | If you did not receive a copy of the PHP license and are unable to |
  10. | obtain it through the world-wide-web, please send a note to |
  11. | license@php.net so we can mail you a copy immediately. |
  12. +----------------------------------------------------------------------+
  13. | Authors: Vadim Savchuk <vsavchuk@productengine.com> |
  14. | Dmitry Lakhtyuk <dlakhtyuk@productengine.com> |
  15. +----------------------------------------------------------------------+
  16. */
  17. #ifdef HAVE_CONFIG_H
  18. #include "config.h"
  19. #endif
  20. #include "php_intl.h"
  21. #include "collator_class.h"
  22. #include "collator_is_numeric.h"
  23. #include "collator_convert.h"
  24. #include "intl_convert.h"
  25. #include <unicode/ustring.h>
  26. #include <php.h>
  27. #if (PHP_MAJOR_VERSION == 5 && PHP_MINOR_VERSION <= 1)
  28. #define CAST_OBJECT_SHOULD_FREE ,0
  29. #else
  30. #define CAST_OBJECT_SHOULD_FREE
  31. #endif
  32. #define COLLATOR_CONVERT_RETURN_FAILED(retval) { \
  33. zval_add_ref( &retval ); \
  34. return retval; \
  35. }
  36. /* {{{ collator_convert_hash_item_from_utf8_to_utf16 */
  37. static void collator_convert_hash_item_from_utf8_to_utf16(
  38. HashTable* hash, int hashKeyType, char* hashKey, ulong hashIndex,
  39. UErrorCode* status )
  40. {
  41. const char* old_val;
  42. int old_val_len;
  43. UChar* new_val = NULL;
  44. int new_val_len = 0;
  45. zval** hashData = NULL;
  46. zval* znew_val = NULL;
  47. /* Get current hash item. */
  48. zend_hash_get_current_data( hash, (void**) &hashData );
  49. /* Process string values only. */
  50. if( Z_TYPE_P( *hashData ) != IS_STRING )
  51. return;
  52. old_val = Z_STRVAL_P( *hashData );
  53. old_val_len = Z_STRLEN_P( *hashData );
  54. /* Convert it from UTF-8 to UTF-16LE and save the result to new_val[_len]. */
  55. intl_convert_utf8_to_utf16( &new_val, &new_val_len, old_val, old_val_len, status );
  56. if( U_FAILURE( *status ) )
  57. return;
  58. /* Update current hash item with the converted value. */
  59. MAKE_STD_ZVAL( znew_val );
  60. ZVAL_STRINGL( znew_val, (char*)new_val, UBYTES(new_val_len), FALSE );
  61. if( hashKeyType == HASH_KEY_IS_STRING )
  62. {
  63. zend_hash_update( hash, hashKey, strlen( hashKey ) + 1,
  64. (void*) &znew_val, sizeof(zval*), NULL );
  65. }
  66. else /* hashKeyType == HASH_KEY_IS_LONG */
  67. {
  68. zend_hash_index_update( hash, hashIndex,
  69. (void*) &znew_val, sizeof(zval*), NULL );
  70. }
  71. }
  72. /* }}} */
  73. /* {{{ collator_convert_hash_item_from_utf16_to_utf8 */
  74. static void collator_convert_hash_item_from_utf16_to_utf8(
  75. HashTable* hash, int hashKeyType, char* hashKey, ulong hashIndex,
  76. UErrorCode* status )
  77. {
  78. const char* old_val;
  79. int old_val_len;
  80. char* new_val = NULL;
  81. int new_val_len = 0;
  82. zval** hashData = NULL;
  83. zval* znew_val = NULL;
  84. /* Get current hash item. */
  85. zend_hash_get_current_data( hash, (void**) &hashData );
  86. /* Process string values only. */
  87. if( Z_TYPE_P( *hashData ) != IS_STRING )
  88. return;
  89. old_val = Z_STRVAL_P( *hashData );
  90. old_val_len = Z_STRLEN_P( *hashData );
  91. /* Convert it from UTF-16LE to UTF-8 and save the result to new_val[_len]. */
  92. intl_convert_utf16_to_utf8( &new_val, &new_val_len,
  93. (UChar*)old_val, UCHARS(old_val_len), status );
  94. if( U_FAILURE( *status ) )
  95. return;
  96. /* Update current hash item with the converted value. */
  97. MAKE_STD_ZVAL( znew_val );
  98. ZVAL_STRINGL( znew_val, (char*)new_val, new_val_len, FALSE );
  99. if( hashKeyType == HASH_KEY_IS_STRING )
  100. {
  101. zend_hash_update( hash, hashKey, strlen( hashKey ) + 1,
  102. (void*) &znew_val, sizeof(zval*), NULL );
  103. }
  104. else /* hashKeyType == HASH_KEY_IS_LONG */
  105. {
  106. zend_hash_index_update( hash, hashIndex,
  107. (void*) &znew_val, sizeof(zval*), NULL );
  108. }
  109. }
  110. /* }}} */
  111. /* {{{ collator_convert_hash_from_utf8_to_utf16
  112. * Convert values of the given hash from UTF-8 encoding to UTF-16LE.
  113. */
  114. void collator_convert_hash_from_utf8_to_utf16( HashTable* hash, UErrorCode* status )
  115. {
  116. ulong hashIndex = 0;
  117. char* hashKey = NULL;
  118. int hashKeyType = 0;
  119. zend_hash_internal_pointer_reset( hash );
  120. while( ( hashKeyType = zend_hash_get_current_key( hash, &hashKey, &hashIndex, 0 ) )
  121. != HASH_KEY_NON_EXISTENT )
  122. {
  123. /* Convert current hash item from UTF-8 to UTF-16LE. */
  124. collator_convert_hash_item_from_utf8_to_utf16(
  125. hash, hashKeyType, hashKey, hashIndex, status );
  126. if( U_FAILURE( *status ) )
  127. return;
  128. /* Proceed to the next item. */
  129. zend_hash_move_forward( hash );
  130. }
  131. }
  132. /* }}} */
  133. /* {{{ collator_convert_hash_from_utf16_to_utf8
  134. * Convert values of the given hash from UTF-16LE encoding to UTF-8.
  135. */
  136. void collator_convert_hash_from_utf16_to_utf8( HashTable* hash, UErrorCode* status )
  137. {
  138. ulong hashIndex = 0;
  139. char* hashKey = NULL;
  140. int hashKeyType = 0;
  141. zend_hash_internal_pointer_reset( hash );
  142. while( ( hashKeyType = zend_hash_get_current_key( hash, &hashKey, &hashIndex, 0 ) )
  143. != HASH_KEY_NON_EXISTENT )
  144. {
  145. /* Convert current hash item from UTF-16LE to UTF-8. */
  146. collator_convert_hash_item_from_utf16_to_utf8(
  147. hash, hashKeyType, hashKey, hashIndex, status );
  148. if( U_FAILURE( *status ) ) {
  149. return;
  150. }
  151. /* Proceed to the next item. */
  152. zend_hash_move_forward( hash );
  153. }
  154. }
  155. /* }}} */
  156. /* {{{ collator_convert_zstr_utf16_to_utf8
  157. *
  158. * Convert string from utf16 to utf8.
  159. *
  160. * @param zval* utf16_zval String to convert.
  161. *
  162. * @return zval* Converted string.
  163. */
  164. zval* collator_convert_zstr_utf16_to_utf8( zval* utf16_zval )
  165. {
  166. zval* utf8_zval = NULL;
  167. char* str = NULL;
  168. int str_len = 0;
  169. UErrorCode status = U_ZERO_ERROR;
  170. /* Convert to utf8 then. */
  171. intl_convert_utf16_to_utf8( &str, &str_len,
  172. (UChar*) Z_STRVAL_P(utf16_zval), UCHARS( Z_STRLEN_P(utf16_zval) ), &status );
  173. if( U_FAILURE( status ) )
  174. php_error( E_WARNING, "Error converting utf16 to utf8 in collator_convert_zval_utf16_to_utf8()" );
  175. ALLOC_INIT_ZVAL( utf8_zval );
  176. ZVAL_STRINGL( utf8_zval, str, str_len, FALSE );
  177. return utf8_zval;
  178. }
  179. /* }}} */
  180. /* {{{ collator_convert_zstr_utf8_to_utf16
  181. *
  182. * Convert string from utf8 to utf16.
  183. *
  184. * @param zval* utf8_zval String to convert.
  185. *
  186. * @return zval* Converted string.
  187. */
  188. zval* collator_convert_zstr_utf8_to_utf16( zval* utf8_zval )
  189. {
  190. zval* zstr = NULL;
  191. UChar* ustr = NULL;
  192. int ustr_len = 0;
  193. UErrorCode status = U_ZERO_ERROR;
  194. /* Convert the string to UTF-16. */
  195. intl_convert_utf8_to_utf16(
  196. &ustr, &ustr_len,
  197. Z_STRVAL_P( utf8_zval ), Z_STRLEN_P( utf8_zval ),
  198. &status );
  199. if( U_FAILURE( status ) )
  200. php_error( E_WARNING, "Error casting object to string in collator_convert_zstr_utf8_to_utf16()" );
  201. /* Set string. */
  202. ALLOC_INIT_ZVAL( zstr );
  203. ZVAL_STRINGL( zstr, (char*)ustr, UBYTES(ustr_len), FALSE );
  204. return zstr;
  205. }
  206. /* }}} */
  207. /* {{{ collator_convert_object_to_string
  208. * Convert object to UTF16-encoded string.
  209. */
  210. zval* collator_convert_object_to_string( zval* obj TSRMLS_DC )
  211. {
  212. zval* zstr = NULL;
  213. UErrorCode status = U_ZERO_ERROR;
  214. UChar* ustr = NULL;
  215. int ustr_len = 0;
  216. /* Bail out if it's not an object. */
  217. if( Z_TYPE_P( obj ) != IS_OBJECT )
  218. {
  219. COLLATOR_CONVERT_RETURN_FAILED( obj );
  220. }
  221. /* Try object's handlers. */
  222. if( Z_OBJ_HT_P(obj)->get )
  223. {
  224. zstr = Z_OBJ_HT_P(obj)->get( obj TSRMLS_CC );
  225. switch( Z_TYPE_P( zstr ) )
  226. {
  227. case IS_OBJECT:
  228. {
  229. /* Bail out. */
  230. zval_ptr_dtor( &zstr );
  231. COLLATOR_CONVERT_RETURN_FAILED( obj );
  232. } break;
  233. case IS_STRING:
  234. break;
  235. default:
  236. {
  237. convert_to_string( zstr );
  238. } break;
  239. }
  240. }
  241. else if( Z_OBJ_HT_P(obj)->cast_object )
  242. {
  243. ALLOC_INIT_ZVAL( zstr );
  244. if( Z_OBJ_HT_P(obj)->cast_object( obj, zstr, IS_STRING CAST_OBJECT_SHOULD_FREE TSRMLS_CC ) == FAILURE )
  245. {
  246. /* cast_object failed => bail out. */
  247. zval_ptr_dtor( &zstr );
  248. COLLATOR_CONVERT_RETURN_FAILED( obj );
  249. }
  250. }
  251. /* Object wasn't successfuly converted => bail out. */
  252. if( zstr == NULL )
  253. {
  254. COLLATOR_CONVERT_RETURN_FAILED( obj );
  255. }
  256. /* Convert the string to UTF-16. */
  257. intl_convert_utf8_to_utf16(
  258. &ustr, &ustr_len,
  259. Z_STRVAL_P( zstr ), Z_STRLEN_P( zstr ),
  260. &status );
  261. if( U_FAILURE( status ) )
  262. php_error( E_WARNING, "Error casting object to string in collator_convert_object_to_string()" );
  263. /* Cleanup zstr to hold utf16 string. */
  264. zval_dtor( zstr );
  265. /* Set string. */
  266. ZVAL_STRINGL( zstr, (char*)ustr, UBYTES(ustr_len), FALSE );
  267. /* Don't free ustr cause it's set in zstr without copy.
  268. * efree( ustr );
  269. */
  270. return zstr;
  271. }
  272. /* }}} */
  273. /* {{{ collator_convert_string_to_number
  274. *
  275. * Convert string to number.
  276. *
  277. * @param zval* str String to convert.
  278. *
  279. * @return zval* Number. If str is not numeric string return number zero.
  280. */
  281. zval* collator_convert_string_to_number( zval* str )
  282. {
  283. zval* num = collator_convert_string_to_number_if_possible( str );
  284. if( num == str )
  285. {
  286. /* String wasn't converted => return zero. */
  287. zval_ptr_dtor( &num );
  288. ALLOC_INIT_ZVAL( num );
  289. ZVAL_LONG( num, 0 );
  290. }
  291. return num;
  292. }
  293. /* }}} */
  294. /* {{{ collator_convert_string_to_double
  295. *
  296. * Convert string to double.
  297. *
  298. * @param zval* str String to convert.
  299. *
  300. * @return zval* Number. If str is not numeric string return number zero.
  301. */
  302. zval* collator_convert_string_to_double( zval* str )
  303. {
  304. zval* num = collator_convert_string_to_number( str );
  305. if( Z_TYPE_P(num) == IS_LONG )
  306. {
  307. ZVAL_DOUBLE( num, Z_LVAL_P( num ) );
  308. }
  309. return num;
  310. }
  311. /* }}} */
  312. /* {{{ collator_convert_string_to_number_if_possible
  313. *
  314. * Convert string to numer.
  315. *
  316. * @param zval* str String to convert.
  317. *
  318. * @return zval* Number if str is numeric string. Otherwise
  319. * original str param.
  320. */
  321. zval* collator_convert_string_to_number_if_possible( zval* str )
  322. {
  323. zval* num = NULL;
  324. int is_numeric = 0;
  325. long lval = 0;
  326. double dval = 0;
  327. if( Z_TYPE_P( str ) != IS_STRING )
  328. {
  329. COLLATOR_CONVERT_RETURN_FAILED( str );
  330. }
  331. if( ( is_numeric = collator_is_numeric( (UChar*) Z_STRVAL_P(str), UCHARS( Z_STRLEN_P(str) ), &lval, &dval, 1 ) ) )
  332. {
  333. ALLOC_INIT_ZVAL( num );
  334. if( is_numeric == IS_LONG )
  335. Z_LVAL_P(num) = lval;
  336. if( is_numeric == IS_DOUBLE )
  337. Z_DVAL_P(num) = dval;
  338. Z_TYPE_P(num) = is_numeric;
  339. }
  340. else
  341. {
  342. COLLATOR_CONVERT_RETURN_FAILED( str );
  343. }
  344. return num;
  345. }
  346. /* }}} */
  347. /* {{{ collator_make_printable_zval
  348. *
  349. * Returns string from input zval.
  350. *
  351. * @param zval* arg zval to get string from
  352. *
  353. * @return zval* UTF16 string.
  354. */
  355. zval* collator_make_printable_zval( zval* arg )
  356. {
  357. zval arg_copy;
  358. int use_copy = 0;
  359. zval* str = NULL;
  360. if( Z_TYPE_P(arg) != IS_STRING )
  361. {
  362. zend_make_printable_zval(arg, &arg_copy, &use_copy);
  363. if( use_copy )
  364. {
  365. str = collator_convert_zstr_utf8_to_utf16( &arg_copy );
  366. zval_dtor( &arg_copy );
  367. }
  368. else
  369. {
  370. str = collator_convert_zstr_utf8_to_utf16( arg );
  371. }
  372. }
  373. else
  374. {
  375. COLLATOR_CONVERT_RETURN_FAILED( arg );
  376. }
  377. return str;
  378. }
  379. /* }}} */
  380. /* {{{ collator_normalize_sort_argument
  381. *
  382. * Normalize argument to use in sort's compare function.
  383. *
  384. * @param zval* arg Sort's argument to normalize.
  385. *
  386. * @return zval* Normalized copy of arg or unmodified arg
  387. * if normalization is not needed.
  388. */
  389. zval* collator_normalize_sort_argument( zval* arg )
  390. {
  391. zval* n_arg = NULL;
  392. if( Z_TYPE_P( arg ) != IS_STRING )
  393. {
  394. /* If its not a string then nothing to do.
  395. * Return original arg.
  396. */
  397. COLLATOR_CONVERT_RETURN_FAILED( arg );
  398. }
  399. /* Try convert to number. */
  400. n_arg = collator_convert_string_to_number_if_possible( arg );
  401. if( n_arg == arg )
  402. {
  403. /* Conversion to number failed. */
  404. zval_ptr_dtor( &n_arg );
  405. /* Convert string to utf8. */
  406. n_arg = collator_convert_zstr_utf16_to_utf8( arg );
  407. }
  408. return n_arg;
  409. }
  410. /* }}} */
  411. /*
  412. * Local variables:
  413. * tab-width: 4
  414. * c-basic-offset: 4
  415. * End:
  416. * vim600: noet sw=4 ts=4 fdm=marker
  417. * vim<600: noet sw=4 ts=4
  418. */