transliterator_methods.c 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542
  1. /*
  2. +----------------------------------------------------------------------+
  3. | PHP Version 5 |
  4. +----------------------------------------------------------------------+
  5. | This source file is subject to version 3.01 of the PHP license, |
  6. | that is bundled with this package in the file LICENSE, and is |
  7. | available through the world-wide-web at the following url: |
  8. | http://www.php.net/license/3_01.txt |
  9. | If you did not receive a copy of the PHP license and are unable to |
  10. | obtain it through the world-wide-web, please send a note to |
  11. | license@php.net so we can mail you a copy immediately. |
  12. +----------------------------------------------------------------------+
  13. | Authors: Gustavo Lopes <cataphract@php.net> |
  14. +----------------------------------------------------------------------+
  15. */
  16. #ifdef HAVE_CONFIG_H
  17. #include "config.h"
  18. #endif
  19. #include "php_intl.h"
  20. #include "transliterator.h"
  21. #include "transliterator_class.h"
  22. #include "transliterator_methods.h"
  23. #include "intl_data.h"
  24. #include "intl_convert.h"
  25. #include <zend_exceptions.h>
  26. static int create_transliterator( char *str_id, int str_id_len, long direction, zval *object TSRMLS_DC )
  27. {
  28. Transliterator_object *to;
  29. UChar *ustr_id = NULL;
  30. int32_t ustr_id_len = 0;
  31. UTransliterator *utrans;
  32. UParseError parse_error = {0, -1};
  33. intl_error_reset( NULL TSRMLS_CC );
  34. if( ( direction != TRANSLITERATOR_FORWARD ) && (direction != TRANSLITERATOR_REVERSE ) )
  35. {
  36. intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
  37. "transliterator_create: invalid direction", 0 TSRMLS_CC );
  38. return FAILURE;
  39. }
  40. object_init_ex( object, Transliterator_ce_ptr );
  41. TRANSLITERATOR_METHOD_FETCH_OBJECT_NO_CHECK; /* fetch zend object from zval "object" into "to" */
  42. /* Convert transliterator id to UTF-16 */
  43. intl_convert_utf8_to_utf16( &ustr_id, &ustr_id_len, str_id, str_id_len, TRANSLITERATOR_ERROR_CODE_P( to ) );
  44. if( U_FAILURE( TRANSLITERATOR_ERROR_CODE( to ) ) )
  45. {
  46. intl_error_set_code( NULL, TRANSLITERATOR_ERROR_CODE( to ) TSRMLS_CC );
  47. intl_error_set_custom_msg( NULL, "String conversion of id to UTF-16 failed", 0 TSRMLS_CC );
  48. zval_dtor( object );
  49. return FAILURE;
  50. }
  51. /* Open ICU Transliterator. */
  52. utrans = utrans_openU( ustr_id, ustr_id_len, (UTransDirection ) direction,
  53. NULL, -1, &parse_error, TRANSLITERATOR_ERROR_CODE_P( to ) );
  54. if (ustr_id) {
  55. efree( ustr_id );
  56. }
  57. if( U_FAILURE( TRANSLITERATOR_ERROR_CODE( to ) ) )
  58. {
  59. char *buf = NULL;
  60. intl_error_set_code( NULL, TRANSLITERATOR_ERROR_CODE( to ) TSRMLS_CC );
  61. spprintf( &buf, 0, "transliterator_create: unable to open ICU transliterator"
  62. " with id \"%s\"", str_id );
  63. if( buf == NULL ) {
  64. intl_error_set_custom_msg( NULL,
  65. "transliterator_create: unable to open ICU transliterator", 0 TSRMLS_CC );
  66. }
  67. else
  68. {
  69. intl_error_set_custom_msg( NULL, buf, /* copy message */ 1 TSRMLS_CC );
  70. efree( buf );
  71. }
  72. zval_dtor( object );
  73. return FAILURE;
  74. }
  75. transliterator_object_construct( object, utrans, TRANSLITERATOR_ERROR_CODE_P( to ) TSRMLS_CC );
  76. /* no need to close the transliterator manually on construction error */
  77. if( U_FAILURE( TRANSLITERATOR_ERROR_CODE( to ) ) )
  78. {
  79. intl_error_set_code( NULL, TRANSLITERATOR_ERROR_CODE( to ) TSRMLS_CC );
  80. intl_error_set_custom_msg( NULL,
  81. "transliterator_create: internal constructor call failed", 0 TSRMLS_CC );
  82. zval_dtor( object );
  83. return FAILURE;
  84. }
  85. return SUCCESS;
  86. }
  87. /* {{{ proto Transliterator transliterator_create( string id [, int direction ] )
  88. * proto Transliterator Transliterator::create( string id [, int direction ] )
  89. * Opens a transliterator by id.
  90. */
  91. PHP_FUNCTION( transliterator_create )
  92. {
  93. char *str_id;
  94. int str_id_len;
  95. long direction = TRANSLITERATOR_FORWARD;
  96. int res;
  97. TRANSLITERATOR_METHOD_INIT_VARS;
  98. (void) to; /* unused */
  99. if( zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "s|l",
  100. &str_id, &str_id_len, &direction ) == FAILURE )
  101. {
  102. intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
  103. "transliterator_create: bad arguments", 0 TSRMLS_CC );
  104. RETURN_NULL();
  105. }
  106. object = return_value;
  107. res = create_transliterator( str_id, str_id_len, direction, object TSRMLS_CC );
  108. if( res == FAILURE )
  109. RETURN_NULL();
  110. /* success, leave return_value as it is (set by create_transliterator) */
  111. }
  112. /* }}} */
  113. /* {{{ proto Transliterator transliterator_create_from_rules( string rules [, int direction ] )
  114. * proto Transliterator Transliterator::createFromRules( string rules [, int direction ] )
  115. * Opens a transliterator by id.
  116. */
  117. PHP_FUNCTION( transliterator_create_from_rules )
  118. {
  119. char *str_rules;
  120. int str_rules_len;
  121. UChar *ustr_rules = NULL;
  122. int32_t ustr_rules_len = 0;
  123. long direction = TRANSLITERATOR_FORWARD;
  124. UParseError parse_error = {0, -1};
  125. UTransliterator *utrans;
  126. UChar id[] = {0x52, 0x75, 0x6C, 0x65, 0x73, 0x54, 0x72,
  127. 0x61, 0x6E, 0x73, 0x50, 0x48, 0x50, 0}; /* RulesTransPHP */
  128. TRANSLITERATOR_METHOD_INIT_VARS;
  129. if( zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "s|l",
  130. &str_rules, &str_rules_len, &direction ) == FAILURE )
  131. {
  132. intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
  133. "transliterator_create_from_rules: bad arguments", 0 TSRMLS_CC );
  134. RETURN_NULL();
  135. }
  136. if( ( direction != TRANSLITERATOR_FORWARD ) && (direction != TRANSLITERATOR_REVERSE ) )
  137. {
  138. intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
  139. "transliterator_create_from_rules: invalid direction", 0 TSRMLS_CC );
  140. RETURN_NULL();
  141. }
  142. object = return_value;
  143. object_init_ex( object, Transliterator_ce_ptr );
  144. TRANSLITERATOR_METHOD_FETCH_OBJECT_NO_CHECK;
  145. intl_convert_utf8_to_utf16( &ustr_rules, &ustr_rules_len,
  146. str_rules, str_rules_len, TRANSLITERATOR_ERROR_CODE_P( to ) );
  147. /* (I'm not a big fan of non-obvious flow control macros ).
  148. * This one checks the error value, destroys object and returns false */
  149. INTL_CTOR_CHECK_STATUS( to, "String conversion of rules to UTF-16 failed" );
  150. /* Open ICU Transliterator. */
  151. utrans = utrans_openU( id, ( sizeof( id ) - 1 ) / ( sizeof( *id ) ), (UTransDirection ) direction,
  152. ustr_rules, ustr_rules_len, &parse_error, TRANSLITERATOR_ERROR_CODE_P( to ) );
  153. if (ustr_rules) {
  154. efree( ustr_rules );
  155. }
  156. intl_error_set_code( NULL, INTL_DATA_ERROR_CODE( to ) TSRMLS_CC );
  157. if( U_FAILURE( INTL_DATA_ERROR_CODE( to ) ) )
  158. {
  159. char *msg = NULL;
  160. smart_str parse_error_str;
  161. parse_error_str = intl_parse_error_to_string( &parse_error );
  162. spprintf( &msg, 0, "transliterator_create_from_rules: unable to "
  163. "create ICU transliterator from rules (%s)", parse_error_str.c );
  164. smart_str_free( &parse_error_str );
  165. if( msg != NULL )
  166. {
  167. intl_errors_set_custom_msg( INTL_DATA_ERROR_P( to ), msg, 1 TSRMLS_CC );
  168. efree( msg );
  169. }
  170. zval_dtor( return_value );
  171. RETURN_NULL();
  172. }
  173. transliterator_object_construct( object, utrans, TRANSLITERATOR_ERROR_CODE_P( to ) TSRMLS_CC );
  174. /* no need to close the transliterator manually on construction error */
  175. INTL_CTOR_CHECK_STATUS( to, "transliterator_create_from_rules: internal constructor call failed" );
  176. }
  177. /* }}} */
  178. /* {{{ proto Transliterator transliterator_create_inverse( Transliterator orig_trans )
  179. * proto Transliterator Transliterator::createInverse()
  180. * Opens the inverse transliterator transliterator.
  181. */
  182. PHP_FUNCTION( transliterator_create_inverse )
  183. {
  184. Transliterator_object *to_orig;
  185. UTransliterator *utrans;
  186. TRANSLITERATOR_METHOD_INIT_VARS;
  187. if( zend_parse_method_parameters( ZEND_NUM_ARGS() TSRMLS_CC, getThis(), "O",
  188. &object, Transliterator_ce_ptr ) == FAILURE )
  189. {
  190. intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
  191. "transliterator_create_inverse: bad arguments", 0 TSRMLS_CC );
  192. RETURN_NULL();
  193. }
  194. TRANSLITERATOR_METHOD_FETCH_OBJECT;
  195. to_orig = to;
  196. object = return_value;
  197. object_init_ex( object, Transliterator_ce_ptr );
  198. TRANSLITERATOR_METHOD_FETCH_OBJECT_NO_CHECK; /* change "to" into new object (from "object" ) */
  199. utrans = utrans_openInverse( to_orig->utrans, TRANSLITERATOR_ERROR_CODE_P( to ) );
  200. INTL_CTOR_CHECK_STATUS( to, "transliterator_create_inverse: could not create "
  201. "inverse ICU transliterator" );
  202. transliterator_object_construct( object, utrans, TRANSLITERATOR_ERROR_CODE_P( to ) TSRMLS_CC );
  203. /* no need to close the transliterator manually on construction error */
  204. INTL_CTOR_CHECK_STATUS( to, "transliterator_create: internal constructor call failed" );
  205. }
  206. /* }}} */
  207. /* {{{ proto array transliterator_list_ids()
  208. * proto array Transliterator::listIDs()
  209. * Return an array with the registered transliterator IDs.
  210. */
  211. PHP_FUNCTION( transliterator_list_ids )
  212. {
  213. UEnumeration *en;
  214. const UChar *elem;
  215. int32_t elem_len;
  216. UErrorCode status = U_ZERO_ERROR;
  217. intl_error_reset( NULL TSRMLS_CC );
  218. if( zend_parse_parameters_none() == FAILURE )
  219. {
  220. /* seems to be the convention in this lib to return false instead of
  221. * null on bad parameter types, except on constructors and factory
  222. * methods */
  223. intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
  224. "transliterator_list_ids: bad arguments", 0 TSRMLS_CC );
  225. RETURN_FALSE;
  226. }
  227. en = utrans_openIDs( &status );
  228. INTL_CHECK_STATUS( status,
  229. "transliterator_list_ids: Failed to obtain registered transliterators" );
  230. array_init( return_value );
  231. while( (elem = uenum_unext( en, &elem_len, &status )) )
  232. {
  233. char *el_char = NULL;
  234. int el_len = 0;
  235. intl_convert_utf16_to_utf8( &el_char, &el_len, elem, elem_len, &status );
  236. if( U_FAILURE( status ) )
  237. {
  238. efree( el_char );
  239. break;
  240. }
  241. else
  242. {
  243. add_next_index_stringl( return_value, el_char, el_len, 0 );
  244. }
  245. }
  246. uenum_close( en );
  247. intl_error_set_code( NULL, status TSRMLS_CC );
  248. if( U_FAILURE( status ) )
  249. {
  250. zval_dtor( return_value );
  251. RETVAL_FALSE;
  252. intl_error_set_custom_msg( NULL, "transliterator_list_ids: "
  253. "Failed to build array of registered transliterators", 0 TSRMLS_CC );
  254. }
  255. }
  256. /* }}} */
  257. /* {{{ proto string transliterator_transliterate( Transliterator trans, string subject [, int start = 0 [, int end = -1 ]] )
  258. * proto string Transliterator::transliterate( string subject [, int start = 0 [, int end = -1 ]] )
  259. * Transliterate a string. */
  260. PHP_FUNCTION( transliterator_transliterate )
  261. {
  262. char *str;
  263. UChar *ustr = NULL,
  264. *uresult = NULL;
  265. int str_len;
  266. int32_t ustr_len = 0,
  267. capacity,
  268. uresult_len;
  269. long start = 0,
  270. limit = -1;
  271. int success = 0,
  272. temp_trans = 0;
  273. TRANSLITERATOR_METHOD_INIT_VARS;
  274. object = getThis();
  275. if( object == NULL )
  276. {
  277. /* in non-OOP version, accept both a transliterator and a string */
  278. zval **arg1;
  279. if( zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "Zs|ll",
  280. &arg1, &str, &str_len, &start, &limit ) == FAILURE )
  281. {
  282. intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
  283. "transliterator_transliterate: bad arguments", 0 TSRMLS_CC );
  284. RETURN_FALSE;
  285. }
  286. if( Z_TYPE_PP( arg1 ) == IS_OBJECT &&
  287. instanceof_function( Z_OBJCE_PP( arg1 ), Transliterator_ce_ptr TSRMLS_CC ) )
  288. {
  289. object = *arg1;
  290. }
  291. else
  292. { /* not a transliterator object as first argument */
  293. int res;
  294. if(Z_TYPE_PP( arg1 ) != IS_STRING )
  295. {
  296. SEPARATE_ZVAL( arg1 );
  297. convert_to_string( *arg1 );
  298. }
  299. ALLOC_INIT_ZVAL( object );
  300. temp_trans = 1;
  301. res = create_transliterator( Z_STRVAL_PP( arg1 ), Z_STRLEN_PP( arg1 ),
  302. TRANSLITERATOR_FORWARD, object TSRMLS_CC );
  303. if( res == FAILURE )
  304. {
  305. char *message = intl_error_get_message( NULL TSRMLS_CC );
  306. php_error_docref0( NULL TSRMLS_CC, E_WARNING, "Could not create "
  307. "transliterator with ID \"%s\" (%s)", Z_STRVAL_PP( arg1 ), message );
  308. efree( message );
  309. /* don't set U_ILLEGAL_ARGUMENT_ERROR to allow fetching of inner error */
  310. goto cleanup;
  311. }
  312. }
  313. }
  314. else if( zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "s|ll",
  315. &str, &str_len, &start, &limit ) == FAILURE )
  316. {
  317. intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
  318. "transliterator_transliterate: bad arguments", 0 TSRMLS_CC );
  319. RETURN_FALSE;
  320. }
  321. if( limit < -1 )
  322. {
  323. intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
  324. "transliterator_transliterate: \"end\" argument should be "
  325. "either non-negative or -1", 0 TSRMLS_CC );
  326. RETURN_FALSE;
  327. }
  328. if( start < 0 || ((limit != -1 ) && (start > limit )) )
  329. {
  330. intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
  331. "transliterator_transliterate: \"start\" argument should be "
  332. "non-negative and not bigger than \"end\" (if defined)", 0 TSRMLS_CC );
  333. RETURN_FALSE;
  334. }
  335. /* end argument parsing/validation */
  336. TRANSLITERATOR_METHOD_FETCH_OBJECT;
  337. intl_convert_utf8_to_utf16( &ustr, &ustr_len, str, str_len,
  338. TRANSLITERATOR_ERROR_CODE_P( to ) );
  339. INTL_METHOD_CHECK_STATUS( to, "String conversion of string to UTF-16 failed" );
  340. /* we've started allocating resources, goto from now on */
  341. if( ( start > ustr_len ) || (( limit != -1 ) && (limit > ustr_len ) ) )
  342. {
  343. char *msg;
  344. spprintf( &msg, 0,
  345. "transliterator_transliterate: Neither \"start\" nor the \"end\" "
  346. "arguments can exceed the number of UTF-16 code units "
  347. "(in this case, %d)", (int) ustr_len );
  348. if(msg != NULL )
  349. {
  350. intl_errors_set( TRANSLITERATOR_ERROR_P( to ), U_ILLEGAL_ARGUMENT_ERROR,
  351. msg, 1 TSRMLS_CC );
  352. efree( msg );
  353. }
  354. RETVAL_FALSE;
  355. goto cleanup;
  356. }
  357. uresult = safe_emalloc( ustr_len, sizeof( UChar ), 1 * sizeof( UChar ) );
  358. capacity = ustr_len + 1;
  359. while( 1 )
  360. {
  361. int32_t temp_limit = ( limit == -1 ? ustr_len : (int32_t) limit );
  362. memcpy( uresult, ustr, ustr_len * sizeof( UChar ) );
  363. uresult_len = ustr_len;
  364. utrans_transUChars( to->utrans, uresult, &uresult_len, capacity, (int32_t) start,
  365. &temp_limit, TRANSLITERATOR_ERROR_CODE_P( to ) );
  366. if( TRANSLITERATOR_ERROR_CODE( to ) == U_BUFFER_OVERFLOW_ERROR )
  367. {
  368. efree( uresult );
  369. uresult = safe_emalloc( uresult_len, sizeof( UChar ), 1 * sizeof( UChar ) );
  370. capacity = uresult_len + 1;
  371. intl_error_reset( TRANSLITERATOR_ERROR_P( to ) TSRMLS_CC );
  372. }
  373. else if(TRANSLITERATOR_ERROR_CODE( to ) == U_STRING_NOT_TERMINATED_WARNING )
  374. {
  375. uresult = safe_erealloc( uresult, uresult_len, sizeof( UChar ), 1 * sizeof( UChar ) );
  376. intl_error_reset( TRANSLITERATOR_ERROR_P( to ) TSRMLS_CC );
  377. break;
  378. }
  379. else if( U_FAILURE( TRANSLITERATOR_ERROR_CODE( to ) ) )
  380. {
  381. intl_error_set_code( NULL, TRANSLITERATOR_ERROR_CODE( to ) TSRMLS_CC );
  382. intl_errors_set_custom_msg( TRANSLITERATOR_ERROR_P( to ),
  383. "transliterator_transliterate: transliteration failed", 0 TSRMLS_CC );
  384. goto cleanup;
  385. }
  386. else
  387. break;
  388. }
  389. uresult[uresult_len] = (UChar) 0;
  390. success = 1;
  391. cleanup:
  392. if( ustr )
  393. efree( ustr );
  394. if( success ) {
  395. /* frees uresult even on error */
  396. INTL_METHOD_RETVAL_UTF8( to, uresult, uresult_len, 1 );
  397. }
  398. else
  399. {
  400. if( uresult )
  401. efree( uresult );
  402. RETVAL_FALSE;
  403. }
  404. if (temp_trans )
  405. zval_ptr_dtor( &object );
  406. }
  407. /* }}} */
  408. PHP_METHOD( Transliterator, __construct )
  409. {
  410. /* this constructor shouldn't be called as it's private */
  411. zend_throw_exception( NULL,
  412. "An object of this type cannot be created with the new operator.",
  413. 0 TSRMLS_CC );
  414. }
  415. /* {{{ proto int transliterator_get_error_code( Transliterator trans )
  416. * proto int Transliterator::getErrorCode()
  417. * Get the last error code for this transliterator.
  418. */
  419. PHP_FUNCTION( transliterator_get_error_code )
  420. {
  421. TRANSLITERATOR_METHOD_INIT_VARS
  422. if( zend_parse_method_parameters( ZEND_NUM_ARGS() TSRMLS_CC, getThis(), "O",
  423. &object, Transliterator_ce_ptr ) == FAILURE )
  424. {
  425. intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
  426. "transliterator_get_error_code: unable to parse input params", 0 TSRMLS_CC );
  427. RETURN_FALSE;
  428. }
  429. /* Fetch the object (without resetting its last error code ). */
  430. to = zend_object_store_get_object( object TSRMLS_CC );
  431. if (to == NULL )
  432. RETURN_FALSE;
  433. RETURN_LONG( (long) TRANSLITERATOR_ERROR_CODE( to ) );
  434. }
  435. /* }}} */
  436. /* {{{ proto string transliterator_get_error_message( Transliterator trans )
  437. * proto string Transliterator::getErrorMessage()
  438. * Get the last error message for this transliterator.
  439. */
  440. PHP_FUNCTION( transliterator_get_error_message )
  441. {
  442. const char* message = NULL;
  443. TRANSLITERATOR_METHOD_INIT_VARS
  444. if( zend_parse_method_parameters( ZEND_NUM_ARGS() TSRMLS_CC, getThis(), "O",
  445. &object, Transliterator_ce_ptr ) == FAILURE )
  446. {
  447. intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
  448. "transliterator_get_error_message: unable to parse input params", 0 TSRMLS_CC );
  449. RETURN_FALSE;
  450. }
  451. /* Fetch the object (without resetting its last error code ). */
  452. to = zend_object_store_get_object( object TSRMLS_CC );
  453. if (to == NULL )
  454. RETURN_FALSE;
  455. /* Return last error message. */
  456. message = intl_error_get_message( TRANSLITERATOR_ERROR_P( to ) TSRMLS_CC );
  457. RETURN_STRING( message, 0 );
  458. }
  459. /* }}} */
  460. /*
  461. * Local variables:
  462. * tab-width: 4
  463. * c-basic-offset: 4
  464. * End:
  465. * vim600: noet sw=4 ts=4 fdm=marker
  466. * vim<600: noet sw=4 ts=4
  467. */