converter.c 29 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031
  1. /*
  2. +----------------------------------------------------------------------+
  3. | This source file is subject to version 3.01 of the PHP license, |
  4. | that is bundled with this package in the file LICENSE, and is |
  5. | available through the world-wide-web at the following url: |
  6. | https://www.php.net/license/3_01.txt |
  7. | If you did not receive a copy of the PHP license and are unable to |
  8. | obtain it through the world-wide-web, please send a note to |
  9. | license@php.net so we can mail you a copy immediately. |
  10. +----------------------------------------------------------------------+
  11. | Authors: Sara Golemon <pollita@php.net> |
  12. +----------------------------------------------------------------------+
  13. */
  14. #include "converter.h"
  15. #include "converter_arginfo.h"
  16. #include "zend_exceptions.h"
  17. #include <unicode/utypes.h>
  18. #include <unicode/utf8.h>
  19. #include <unicode/utf16.h>
  20. #include <unicode/ucnv.h>
  21. #include <unicode/ustring.h>
  22. #include "../intl_error.h"
  23. #include "../intl_common.h"
  24. typedef struct _php_converter_object {
  25. UConverter *src, *dest;
  26. zend_fcall_info to_cb, from_cb;
  27. zend_fcall_info_cache to_cache, from_cache;
  28. intl_error error;
  29. zend_object obj;
  30. } php_converter_object;
  31. static inline php_converter_object *php_converter_fetch_object(zend_object *obj) {
  32. return (php_converter_object *)((char*)(obj) - XtOffsetOf(php_converter_object, obj));
  33. }
  34. #define Z_INTL_CONVERTER_P(zv) php_converter_fetch_object(Z_OBJ_P(zv))
  35. static zend_class_entry *php_converter_ce;
  36. static zend_object_handlers php_converter_object_handlers;
  37. #define CONV_GET(pzv) (Z_INTL_CONVERTER_P((pzv)))
  38. #define THROW_UFAILURE(obj, fname, error) php_converter_throw_failure(obj, error, \
  39. fname "() returned error " ZEND_LONG_FMT ": %s", (zend_long)error, u_errorName(error))
  40. /* {{{ php_converter_throw_failure */
  41. static inline void php_converter_throw_failure(php_converter_object *objval, UErrorCode error, const char *format, ...) {
  42. intl_error *err = objval ? &(objval->error) : NULL;
  43. char message[1024];
  44. va_list vargs;
  45. va_start(vargs, format);
  46. vsnprintf(message, sizeof(message), format, vargs);
  47. va_end(vargs);
  48. intl_errors_set(err, error, message, 1);
  49. }
  50. /* }}} */
  51. /* {{{ php_converter_default_callback */
  52. static void php_converter_default_callback(zval *return_value, zval *zobj, zend_long reason, zval *error) {
  53. /* Basic functionality so children can call parent::toUCallback() */
  54. switch (reason) {
  55. case UCNV_UNASSIGNED:
  56. case UCNV_ILLEGAL:
  57. case UCNV_IRREGULAR:
  58. {
  59. php_converter_object *objval = (php_converter_object*)CONV_GET(zobj);
  60. char chars[127];
  61. int8_t chars_len = sizeof(chars);
  62. UErrorCode uerror = U_ZERO_ERROR;
  63. if(!objval->src) {
  64. php_converter_throw_failure(objval, U_INVALID_STATE_ERROR, "Source Converter has not been initialized yet");
  65. chars[0] = 0x1A;
  66. chars[1] = 0;
  67. chars_len = 1;
  68. ZEND_TRY_ASSIGN_REF_LONG(error, U_INVALID_STATE_ERROR);
  69. RETVAL_STRINGL(chars, chars_len);
  70. return;
  71. }
  72. /* Yes, this is fairly wasteful at first glance,
  73. * but considering that the alternative is to store
  74. * what's sent into setSubstChars() and the fact
  75. * that this is an extremely unlikely codepath
  76. * I'd rather take the CPU hit here, than waste time
  77. * storing a value I'm unlikely to use.
  78. */
  79. ucnv_getSubstChars(objval->src, chars, &chars_len, &uerror);
  80. if (U_FAILURE(uerror)) {
  81. THROW_UFAILURE(objval, "ucnv_getSubstChars", uerror);
  82. chars[0] = 0x1A;
  83. chars[1] = 0;
  84. chars_len = 1;
  85. }
  86. ZEND_TRY_ASSIGN_REF_LONG(error, uerror);
  87. RETVAL_STRINGL(chars, chars_len);
  88. }
  89. }
  90. }
  91. /* }}} */
  92. /* {{{ */
  93. PHP_METHOD(UConverter, toUCallback) {
  94. zend_long reason;
  95. zend_string *source, *codeUnits;
  96. zval *error;
  97. if (zend_parse_parameters(ZEND_NUM_ARGS(), "lSSz",
  98. &reason, &source, &codeUnits, &error) == FAILURE) {
  99. RETURN_THROWS();
  100. }
  101. php_converter_default_callback(return_value, ZEND_THIS, reason, error);
  102. }
  103. /* }}} */
  104. /* {{{ */
  105. PHP_METHOD(UConverter, fromUCallback) {
  106. zend_long reason;
  107. zval *source, *error;
  108. zend_long codePoint;
  109. if (zend_parse_parameters(ZEND_NUM_ARGS(), "lalz",
  110. &reason, &source, &codePoint, &error) == FAILURE) {
  111. RETURN_THROWS();
  112. }
  113. php_converter_default_callback(return_value, ZEND_THIS, reason, error);
  114. }
  115. /* }}} */
  116. /* {{{ php_converter_check_limits */
  117. static inline bool php_converter_check_limits(php_converter_object *objval, zend_long available, zend_long needed) {
  118. if (available < needed) {
  119. php_converter_throw_failure(objval, U_BUFFER_OVERFLOW_ERROR, "Buffer overrun " ZEND_LONG_FMT " bytes needed, " ZEND_LONG_FMT " available", needed, available);
  120. return 0;
  121. }
  122. return 1;
  123. }
  124. /* }}} */
  125. #define TARGET_CHECK(cnvargs, needed) php_converter_check_limits(objval, cnvargs->targetLimit - cnvargs->target, needed)
  126. /* {{{ php_converter_append_toUnicode_target */
  127. static void php_converter_append_toUnicode_target(zval *val, UConverterToUnicodeArgs *args, php_converter_object *objval) {
  128. switch (Z_TYPE_P(val)) {
  129. case IS_NULL:
  130. /* Code unit is being skipped */
  131. return;
  132. case IS_LONG:
  133. {
  134. zend_long lval = Z_LVAL_P(val);
  135. if ((lval < 0) || (lval > 0x10FFFF)) {
  136. php_converter_throw_failure(objval, U_ILLEGAL_ARGUMENT_ERROR, "Invalid codepoint U+%04lx", lval);
  137. return;
  138. }
  139. if (lval > 0xFFFF) {
  140. /* Supplemental planes U+010000 - U+10FFFF */
  141. if (TARGET_CHECK(args, 2)) {
  142. /* TODO: Find the ICU call which does this properly */
  143. *(args->target++) = (UChar)(((lval - 0x10000) >> 10) | 0xD800);
  144. *(args->target++) = (UChar)(((lval - 0x10000) & 0x3FF) | 0xDC00);
  145. }
  146. return;
  147. }
  148. /* Non-suggogate BMP codepoint */
  149. if (TARGET_CHECK(args, 1)) {
  150. *(args->target++) = (UChar)lval;
  151. }
  152. return;
  153. }
  154. case IS_STRING:
  155. {
  156. const char *strval = Z_STRVAL_P(val);
  157. int i = 0, strlen = Z_STRLEN_P(val);
  158. while((i != strlen) && TARGET_CHECK(args, 1)) {
  159. UChar c;
  160. U8_NEXT(strval, i, strlen, c);
  161. *(args->target++) = c;
  162. }
  163. return;
  164. }
  165. case IS_ARRAY:
  166. {
  167. HashTable *ht = Z_ARRVAL_P(val);
  168. zval *tmpzval;
  169. ZEND_HASH_FOREACH_VAL(ht, tmpzval) {
  170. php_converter_append_toUnicode_target(tmpzval, args, objval);
  171. } ZEND_HASH_FOREACH_END();
  172. return;
  173. }
  174. default:
  175. php_converter_throw_failure(objval, U_ILLEGAL_ARGUMENT_ERROR,
  176. "toUCallback() specified illegal type for substitution character");
  177. }
  178. }
  179. /* }}} */
  180. /* {{{ php_converter_to_u_callback */
  181. static void php_converter_to_u_callback(const void *context,
  182. UConverterToUnicodeArgs *args,
  183. const char *codeUnits, int32_t length,
  184. UConverterCallbackReason reason,
  185. UErrorCode *pErrorCode) {
  186. php_converter_object *objval = (php_converter_object*)context;
  187. zval retval;
  188. zval zargs[4];
  189. ZVAL_LONG(&zargs[0], reason);
  190. if (args->source) {
  191. ZVAL_STRINGL(&zargs[1], args->source, args->sourceLimit - args->source);
  192. } else {
  193. ZVAL_EMPTY_STRING(&zargs[1]);
  194. }
  195. if (codeUnits) {
  196. ZVAL_STRINGL(&zargs[2], codeUnits, length);
  197. } else {
  198. ZVAL_EMPTY_STRING(&zargs[2]);
  199. }
  200. ZVAL_LONG(&zargs[3], *pErrorCode);
  201. ZVAL_MAKE_REF(&zargs[3]);
  202. objval->to_cb.param_count = 4;
  203. objval->to_cb.params = zargs;
  204. objval->to_cb.retval = &retval;
  205. if (zend_call_function(&(objval->to_cb), &(objval->to_cache)) == FAILURE) {
  206. /* Unlikely */
  207. php_converter_throw_failure(objval, U_INTERNAL_PROGRAM_ERROR, "Unexpected failure calling toUCallback()");
  208. } else if (!Z_ISUNDEF(retval)) {
  209. php_converter_append_toUnicode_target(&retval, args, objval);
  210. zval_ptr_dtor(&retval);
  211. }
  212. if (Z_TYPE(zargs[3]) == IS_LONG) {
  213. *pErrorCode = Z_LVAL(zargs[3]);
  214. } else if (Z_ISREF(zargs[3]) && Z_TYPE_P(Z_REFVAL(zargs[3])) == IS_LONG) {
  215. *pErrorCode = Z_LVAL_P(Z_REFVAL(zargs[3]));
  216. }
  217. zval_ptr_dtor(&zargs[0]);
  218. zval_ptr_dtor(&zargs[1]);
  219. zval_ptr_dtor(&zargs[2]);
  220. zval_ptr_dtor(&zargs[3]);
  221. }
  222. /* }}} */
  223. /* {{{ php_converter_append_fromUnicode_target */
  224. static void php_converter_append_fromUnicode_target(zval *val, UConverterFromUnicodeArgs *args, php_converter_object *objval) {
  225. switch (Z_TYPE_P(val)) {
  226. case IS_NULL:
  227. /* Ignore */
  228. return;
  229. case IS_LONG:
  230. if (TARGET_CHECK(args, 1)) {
  231. *(args->target++) = Z_LVAL_P(val);
  232. }
  233. return;
  234. case IS_STRING:
  235. {
  236. size_t vallen = Z_STRLEN_P(val);
  237. if (TARGET_CHECK(args, vallen)) {
  238. memcpy(args->target, Z_STRVAL_P(val), vallen);
  239. args->target += vallen;
  240. }
  241. return;
  242. }
  243. case IS_ARRAY:
  244. {
  245. HashTable *ht = Z_ARRVAL_P(val);
  246. zval *tmpzval;
  247. ZEND_HASH_FOREACH_VAL(ht, tmpzval) {
  248. php_converter_append_fromUnicode_target(tmpzval, args, objval);
  249. } ZEND_HASH_FOREACH_END();
  250. return;
  251. }
  252. default:
  253. php_converter_throw_failure(objval, U_ILLEGAL_ARGUMENT_ERROR, "fromUCallback() specified illegal type for substitution character");
  254. }
  255. }
  256. /* }}} */
  257. /* {{{ php_converter_from_u_callback */
  258. static void php_converter_from_u_callback(const void *context,
  259. UConverterFromUnicodeArgs *args,
  260. const UChar *codeUnits, int32_t length, UChar32 codePoint,
  261. UConverterCallbackReason reason,
  262. UErrorCode *pErrorCode) {
  263. php_converter_object *objval = (php_converter_object*)context;
  264. zval retval;
  265. zval zargs[4];
  266. int i;
  267. ZVAL_LONG(&zargs[0], reason);
  268. array_init(&zargs[1]);
  269. i = 0;
  270. while (i < length) {
  271. UChar32 c;
  272. U16_NEXT(codeUnits, i, length, c);
  273. add_next_index_long(&zargs[1], c);
  274. }
  275. ZVAL_LONG(&zargs[2], codePoint);
  276. ZVAL_LONG(&zargs[3], *pErrorCode);
  277. ZVAL_MAKE_REF(&zargs[3]);
  278. objval->from_cb.param_count = 4;
  279. objval->from_cb.params = zargs;
  280. objval->from_cb.retval = &retval;
  281. if (zend_call_function(&(objval->from_cb), &(objval->from_cache)) == FAILURE) {
  282. /* Unlikely */
  283. php_converter_throw_failure(objval, U_INTERNAL_PROGRAM_ERROR, "Unexpected failure calling fromUCallback()");
  284. } else if (!Z_ISUNDEF(retval)) {
  285. php_converter_append_fromUnicode_target(&retval, args, objval);
  286. zval_ptr_dtor(&retval);
  287. }
  288. if (Z_TYPE(zargs[3]) == IS_LONG) {
  289. *pErrorCode = Z_LVAL(zargs[3]);
  290. } else if (Z_ISREF(zargs[3]) && Z_TYPE_P(Z_REFVAL(zargs[3])) == IS_LONG) {
  291. *pErrorCode = Z_LVAL_P(Z_REFVAL(zargs[3]));
  292. }
  293. zval_ptr_dtor(&zargs[0]);
  294. zval_ptr_dtor(&zargs[1]);
  295. zval_ptr_dtor(&zargs[2]);
  296. zval_ptr_dtor(&zargs[3]);
  297. }
  298. /* }}} */
  299. /* {{{ php_converter_set_callbacks */
  300. static inline bool php_converter_set_callbacks(php_converter_object *objval, UConverter *cnv) {
  301. bool ret = 1;
  302. UErrorCode error = U_ZERO_ERROR;
  303. if (objval->obj.ce == php_converter_ce) {
  304. /* Short-circuit having to go through method calls and data marshalling
  305. * when we're using default behavior
  306. */
  307. return 1;
  308. }
  309. ucnv_setToUCallBack(cnv, (UConverterToUCallback)php_converter_to_u_callback, (const void*)objval,
  310. NULL, NULL, &error);
  311. if (U_FAILURE(error)) {
  312. THROW_UFAILURE(objval, "ucnv_setToUCallBack", error);
  313. ret = 0;
  314. }
  315. error = U_ZERO_ERROR;
  316. ucnv_setFromUCallBack(cnv, (UConverterFromUCallback)php_converter_from_u_callback, (const void*)objval,
  317. NULL, NULL, &error);
  318. if (U_FAILURE(error)) {
  319. THROW_UFAILURE(objval, "ucnv_setFromUCallBack", error);
  320. ret = 0;
  321. }
  322. return ret;
  323. }
  324. /* }}} */
  325. /* {{{ php_converter_set_encoding */
  326. static bool php_converter_set_encoding(php_converter_object *objval,
  327. UConverter **pcnv,
  328. const char *enc, size_t enc_len) {
  329. UErrorCode error = U_ZERO_ERROR;
  330. UConverter *cnv = ucnv_open(enc, &error);
  331. if (error == U_AMBIGUOUS_ALIAS_WARNING) {
  332. UErrorCode getname_error = U_ZERO_ERROR;
  333. const char *actual_encoding = ucnv_getName(cnv, &getname_error);
  334. if (U_FAILURE(getname_error)) {
  335. /* Should never happen */
  336. actual_encoding = "(unknown)";
  337. }
  338. php_error_docref(NULL, E_WARNING, "Ambiguous encoding specified, using %s", actual_encoding);
  339. } else if (U_FAILURE(error)) {
  340. if (objval) {
  341. THROW_UFAILURE(objval, "ucnv_open", error);
  342. } else {
  343. php_error_docref(NULL, E_WARNING, "Error setting encoding: %d - %s", (int)error, u_errorName(error));
  344. }
  345. return 0;
  346. }
  347. if (objval && !php_converter_set_callbacks(objval, cnv)) {
  348. return 0;
  349. }
  350. if (*pcnv) {
  351. ucnv_close(*pcnv);
  352. }
  353. *pcnv = cnv;
  354. return 1;
  355. }
  356. /* }}} */
  357. /* {{{ php_converter_do_set_encoding */
  358. static void php_converter_do_set_encoding(UConverter **pcnv, INTERNAL_FUNCTION_PARAMETERS) {
  359. php_converter_object *objval = CONV_GET(ZEND_THIS);
  360. char *enc;
  361. size_t enc_len;
  362. if (zend_parse_parameters(ZEND_NUM_ARGS(), "s", &enc, &enc_len) == FAILURE) {
  363. RETURN_THROWS();
  364. }
  365. intl_errors_reset(&objval->error);
  366. RETURN_BOOL(php_converter_set_encoding(objval, pcnv, enc, enc_len));
  367. }
  368. /* }}} */
  369. /* {{{ */
  370. PHP_METHOD(UConverter, setSourceEncoding) {
  371. php_converter_object *objval = CONV_GET(ZEND_THIS);
  372. php_converter_do_set_encoding(&(objval->src), INTERNAL_FUNCTION_PARAM_PASSTHRU);
  373. }
  374. /* }}} */
  375. /* {{{ */
  376. PHP_METHOD(UConverter, setDestinationEncoding) {
  377. php_converter_object *objval = CONV_GET(ZEND_THIS);
  378. php_converter_do_set_encoding(&(objval->dest), INTERNAL_FUNCTION_PARAM_PASSTHRU);
  379. }
  380. /* }}} */
  381. /* {{{ php_converter_do_get_encoding */
  382. static void php_converter_do_get_encoding(php_converter_object *objval, UConverter *cnv, INTERNAL_FUNCTION_PARAMETERS) {
  383. const char *name;
  384. if (zend_parse_parameters_none() == FAILURE) {
  385. RETURN_THROWS();
  386. }
  387. intl_errors_reset(&objval->error);
  388. if (!cnv) {
  389. RETURN_NULL();
  390. }
  391. name = ucnv_getName(cnv, &objval->error.code);
  392. if (U_FAILURE(objval->error.code)) {
  393. THROW_UFAILURE(objval, "ucnv_getName()", objval->error.code);
  394. RETURN_FALSE;
  395. }
  396. RETURN_STRING(name);
  397. }
  398. /* }}} */
  399. /* {{{ */
  400. PHP_METHOD(UConverter, getSourceEncoding) {
  401. php_converter_object *objval = CONV_GET(ZEND_THIS);
  402. php_converter_do_get_encoding(objval, objval->src, INTERNAL_FUNCTION_PARAM_PASSTHRU);
  403. }
  404. /* }}} */
  405. /* {{{ */
  406. PHP_METHOD(UConverter, getDestinationEncoding) {
  407. php_converter_object *objval = CONV_GET(ZEND_THIS);
  408. php_converter_do_get_encoding(objval, objval->dest, INTERNAL_FUNCTION_PARAM_PASSTHRU);
  409. }
  410. /* }}} */
  411. /* {{{ php_converter_do_get_type */
  412. static void php_converter_do_get_type(php_converter_object *objval, UConverter *cnv, INTERNAL_FUNCTION_PARAMETERS) {
  413. UConverterType t;
  414. if (zend_parse_parameters_none() == FAILURE) {
  415. RETURN_THROWS();
  416. }
  417. intl_errors_reset(&objval->error);
  418. if (!cnv) {
  419. RETURN_NULL();
  420. }
  421. t = ucnv_getType(cnv);
  422. if (U_FAILURE(objval->error.code)) {
  423. THROW_UFAILURE(objval, "ucnv_getType", objval->error.code);
  424. RETURN_FALSE;
  425. }
  426. RETURN_LONG(t);
  427. }
  428. /* }}} */
  429. /* {{{ */
  430. PHP_METHOD(UConverter, getSourceType) {
  431. php_converter_object *objval = CONV_GET(ZEND_THIS);
  432. php_converter_do_get_type(objval, objval->src, INTERNAL_FUNCTION_PARAM_PASSTHRU);
  433. }
  434. /* }}} */
  435. /* {{{ */
  436. PHP_METHOD(UConverter, getDestinationType) {
  437. php_converter_object *objval = CONV_GET(ZEND_THIS);
  438. php_converter_do_get_type(objval, objval->dest, INTERNAL_FUNCTION_PARAM_PASSTHRU);
  439. }
  440. /* }}} */
  441. /* {{{ php_converter_resolve_callback */
  442. static void php_converter_resolve_callback(zval *zobj,
  443. php_converter_object *objval,
  444. const char *callback_name,
  445. zend_fcall_info *finfo,
  446. zend_fcall_info_cache *fcache) {
  447. char *errstr = NULL;
  448. zval caller;
  449. array_init(&caller);
  450. Z_ADDREF_P(zobj);
  451. add_index_zval(&caller, 0, zobj);
  452. add_index_string(&caller, 1, callback_name);
  453. if (zend_fcall_info_init(&caller, 0, finfo, fcache, NULL, &errstr) == FAILURE) {
  454. php_converter_throw_failure(objval, U_INTERNAL_PROGRAM_ERROR, "Error setting converter callback: %s", errstr);
  455. }
  456. zend_array_destroy(Z_ARR(caller));
  457. ZVAL_UNDEF(&finfo->function_name);
  458. if (errstr) {
  459. efree(errstr);
  460. }
  461. }
  462. /* }}} */
  463. /* {{{ */
  464. PHP_METHOD(UConverter, __construct) {
  465. php_converter_object *objval = CONV_GET(ZEND_THIS);
  466. char *src = "utf-8";
  467. size_t src_len = sizeof("utf-8") - 1;
  468. char *dest = src;
  469. size_t dest_len = src_len;
  470. intl_error_reset(NULL);
  471. if (zend_parse_parameters(ZEND_NUM_ARGS(), "|s!s!", &dest, &dest_len, &src, &src_len) == FAILURE) {
  472. RETURN_THROWS();
  473. }
  474. php_converter_set_encoding(objval, &(objval->src), src, src_len );
  475. php_converter_set_encoding(objval, &(objval->dest), dest, dest_len);
  476. php_converter_resolve_callback(ZEND_THIS, objval, "toUCallback", &(objval->to_cb), &(objval->to_cache));
  477. php_converter_resolve_callback(ZEND_THIS, objval, "fromUCallback", &(objval->from_cb), &(objval->from_cache));
  478. }
  479. /* }}} */
  480. /* {{{ */
  481. PHP_METHOD(UConverter, setSubstChars) {
  482. php_converter_object *objval = CONV_GET(ZEND_THIS);
  483. char *chars;
  484. size_t chars_len;
  485. int ret = 1;
  486. if (zend_parse_parameters(ZEND_NUM_ARGS(), "s", &chars, &chars_len) == FAILURE) {
  487. RETURN_THROWS();
  488. }
  489. intl_errors_reset(&objval->error);
  490. if (objval->src) {
  491. UErrorCode error = U_ZERO_ERROR;
  492. ucnv_setSubstChars(objval->src, chars, chars_len, &error);
  493. if (U_FAILURE(error)) {
  494. THROW_UFAILURE(objval, "ucnv_setSubstChars", error);
  495. ret = 0;
  496. }
  497. } else {
  498. php_converter_throw_failure(objval, U_INVALID_STATE_ERROR, "Source Converter has not been initialized yet");
  499. ret = 0;
  500. }
  501. if (objval->dest) {
  502. UErrorCode error = U_ZERO_ERROR;
  503. ucnv_setSubstChars(objval->dest, chars, chars_len, &error);
  504. if (U_FAILURE(error)) {
  505. THROW_UFAILURE(objval, "ucnv_setSubstChars", error);
  506. ret = 0;
  507. }
  508. } else {
  509. php_converter_throw_failure(objval, U_INVALID_STATE_ERROR, "Destination Converter has not been initialized yet");
  510. ret = 0;
  511. }
  512. RETURN_BOOL(ret);
  513. }
  514. /* }}} */
  515. /* {{{ */
  516. PHP_METHOD(UConverter, getSubstChars) {
  517. php_converter_object *objval = CONV_GET(ZEND_THIS);
  518. char chars[127];
  519. int8_t chars_len = sizeof(chars);
  520. UErrorCode error = U_ZERO_ERROR;
  521. if (zend_parse_parameters_none() == FAILURE) {
  522. RETURN_THROWS();
  523. }
  524. intl_errors_reset(&objval->error);
  525. if (!objval->src) {
  526. RETURN_NULL();
  527. }
  528. /* src and dest get the same subst chars set,
  529. * so it doesn't really matter which one we read from
  530. */
  531. ucnv_getSubstChars(objval->src, chars, &chars_len, &error);
  532. if (U_FAILURE(error)) {
  533. THROW_UFAILURE(objval, "ucnv_getSubstChars", error);
  534. RETURN_FALSE;
  535. }
  536. RETURN_STRINGL(chars, chars_len);
  537. }
  538. /* }}} */
  539. /* {{{ php_converter_do_convert */
  540. static zend_string* php_converter_do_convert(UConverter *dest_cnv,
  541. UConverter *src_cnv, const char *src, int32_t src_len,
  542. php_converter_object *objval
  543. ) {
  544. UErrorCode error = U_ZERO_ERROR;
  545. int32_t temp_len, ret_len;
  546. zend_string *ret;
  547. UChar *temp;
  548. if (!src_cnv || !dest_cnv) {
  549. php_converter_throw_failure(objval, U_INVALID_STATE_ERROR,
  550. "Internal converters not initialized");
  551. return NULL;
  552. }
  553. /* Get necessary buffer size first */
  554. temp_len = 1 + ucnv_toUChars(src_cnv, NULL, 0, src, src_len, &error);
  555. if (U_FAILURE(error) && error != U_BUFFER_OVERFLOW_ERROR) {
  556. THROW_UFAILURE(objval, "ucnv_toUChars", error);
  557. return NULL;
  558. }
  559. temp = safe_emalloc(sizeof(UChar), temp_len, sizeof(UChar));
  560. /* Convert to intermediate UChar* array */
  561. error = U_ZERO_ERROR;
  562. temp_len = ucnv_toUChars(src_cnv, temp, temp_len, src, src_len, &error);
  563. if (U_FAILURE(error)) {
  564. THROW_UFAILURE(objval, "ucnv_toUChars", error);
  565. efree(temp);
  566. return NULL;
  567. }
  568. temp[temp_len] = 0;
  569. /* Get necessary output buffer size */
  570. ret_len = ucnv_fromUChars(dest_cnv, NULL, 0, temp, temp_len, &error);
  571. if (U_FAILURE(error) && error != U_BUFFER_OVERFLOW_ERROR) {
  572. THROW_UFAILURE(objval, "ucnv_fromUChars", error);
  573. efree(temp);
  574. return NULL;
  575. }
  576. ret = zend_string_alloc(ret_len, 0);
  577. /* Convert to final encoding */
  578. error = U_ZERO_ERROR;
  579. ZSTR_LEN(ret) = ucnv_fromUChars(dest_cnv, ZSTR_VAL(ret), ret_len+1, temp, temp_len, &error);
  580. efree(temp);
  581. if (U_FAILURE(error)) {
  582. THROW_UFAILURE(objval, "ucnv_fromUChars", error);
  583. zend_string_efree(ret);
  584. return NULL;
  585. }
  586. return ret;
  587. }
  588. /* }}} */
  589. /* {{{ */
  590. #define UCNV_REASON_CASE(v) case (UCNV_ ## v) : RETURN_STRINGL( "REASON_" #v , sizeof( "REASON_" #v ) - 1);
  591. PHP_METHOD(UConverter, reasonText) {
  592. zend_long reason;
  593. if (zend_parse_parameters(ZEND_NUM_ARGS(), "l", &reason) == FAILURE) {
  594. RETURN_THROWS();
  595. }
  596. intl_error_reset(NULL);
  597. switch (reason) {
  598. UCNV_REASON_CASE(UNASSIGNED)
  599. UCNV_REASON_CASE(ILLEGAL)
  600. UCNV_REASON_CASE(IRREGULAR)
  601. UCNV_REASON_CASE(RESET)
  602. UCNV_REASON_CASE(CLOSE)
  603. UCNV_REASON_CASE(CLONE)
  604. default:
  605. zend_argument_value_error(1, "must be a UConverter::REASON_* constant");
  606. RETURN_THROWS();
  607. }
  608. }
  609. /* }}} */
  610. /* {{{ */
  611. PHP_METHOD(UConverter, convert) {
  612. php_converter_object *objval = CONV_GET(ZEND_THIS);
  613. char *str;
  614. size_t str_len;
  615. zend_string *ret;
  616. bool reverse = 0;
  617. if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|b",
  618. &str, &str_len, &reverse) == FAILURE) {
  619. RETURN_THROWS();
  620. }
  621. intl_errors_reset(&objval->error);
  622. ret = php_converter_do_convert(reverse ? objval->src : objval->dest,
  623. reverse ? objval->dest : objval->src,
  624. str, str_len,
  625. objval);
  626. if (ret) {
  627. RETURN_NEW_STR(ret);
  628. } else {
  629. RETURN_FALSE;
  630. }
  631. }
  632. /* }}} */
  633. /* {{{ */
  634. PHP_METHOD(UConverter, transcode) {
  635. char *str, *src, *dest;
  636. size_t str_len, src_len, dest_len;
  637. zval *options = NULL;
  638. UConverter *src_cnv = NULL, *dest_cnv = NULL;
  639. if (zend_parse_parameters(ZEND_NUM_ARGS(), "sss|a!",
  640. &str, &str_len, &dest, &dest_len, &src, &src_len, &options) == FAILURE) {
  641. RETURN_THROWS();
  642. }
  643. intl_error_reset(NULL);
  644. if (php_converter_set_encoding(NULL, &src_cnv, src, src_len) &&
  645. php_converter_set_encoding(NULL, &dest_cnv, dest, dest_len)) {
  646. zend_string *ret;
  647. UErrorCode error = U_ZERO_ERROR;
  648. if (options && zend_hash_num_elements(Z_ARRVAL_P(options))) {
  649. zval *tmpzval;
  650. if (U_SUCCESS(error) &&
  651. (tmpzval = zend_hash_str_find(Z_ARRVAL_P(options), "from_subst", sizeof("from_subst") - 1)) != NULL &&
  652. Z_TYPE_P(tmpzval) == IS_STRING) {
  653. error = U_ZERO_ERROR;
  654. ucnv_setSubstChars(src_cnv, Z_STRVAL_P(tmpzval), Z_STRLEN_P(tmpzval) & 0x7F, &error);
  655. }
  656. if (U_SUCCESS(error) &&
  657. (tmpzval = zend_hash_str_find(Z_ARRVAL_P(options), "to_subst", sizeof("to_subst") - 1)) != NULL &&
  658. Z_TYPE_P(tmpzval) == IS_STRING) {
  659. error = U_ZERO_ERROR;
  660. ucnv_setSubstChars(dest_cnv, Z_STRVAL_P(tmpzval), Z_STRLEN_P(tmpzval) & 0x7F, &error);
  661. }
  662. }
  663. if (U_SUCCESS(error) &&
  664. (ret = php_converter_do_convert(dest_cnv, src_cnv, str, str_len, NULL)) != NULL) {
  665. RETVAL_NEW_STR(ret);
  666. }
  667. if (U_FAILURE(error)) {
  668. THROW_UFAILURE(NULL, "transcode", error);
  669. RETVAL_FALSE;
  670. }
  671. } else {
  672. RETVAL_FALSE;
  673. }
  674. if (src_cnv) {
  675. ucnv_close(src_cnv);
  676. }
  677. if (dest_cnv) {
  678. ucnv_close(dest_cnv);
  679. }
  680. }
  681. /* }}} */
  682. /* {{{ */
  683. PHP_METHOD(UConverter, getErrorCode) {
  684. php_converter_object *objval = CONV_GET(ZEND_THIS);
  685. if (zend_parse_parameters_none() == FAILURE) {
  686. RETURN_THROWS();
  687. }
  688. RETURN_LONG(intl_error_get_code(&(objval->error)));
  689. }
  690. /* }}} */
  691. /* {{{ */
  692. PHP_METHOD(UConverter, getErrorMessage) {
  693. php_converter_object *objval = CONV_GET(ZEND_THIS);
  694. if (zend_parse_parameters_none() == FAILURE) {
  695. RETURN_THROWS();
  696. }
  697. zend_string *message = intl_error_get_message(&(objval->error));
  698. if (message) {
  699. RETURN_STR(message);
  700. } else {
  701. RETURN_NULL();
  702. }
  703. }
  704. /* }}} */
  705. /* {{{ */
  706. PHP_METHOD(UConverter, getAvailable) {
  707. int32_t i,
  708. count = ucnv_countAvailable();
  709. if (zend_parse_parameters_none() == FAILURE) {
  710. RETURN_THROWS();
  711. }
  712. intl_error_reset(NULL);
  713. array_init(return_value);
  714. for(i = 0; i < count; i++) {
  715. const char *name = ucnv_getAvailableName(i);
  716. add_next_index_string(return_value, name);
  717. }
  718. }
  719. /* }}} */
  720. /* {{{ */
  721. PHP_METHOD(UConverter, getAliases) {
  722. char *name;
  723. size_t name_len;
  724. UErrorCode error = U_ZERO_ERROR;
  725. uint16_t i, count;
  726. if (zend_parse_parameters(ZEND_NUM_ARGS(), "s", &name, &name_len) == FAILURE) {
  727. RETURN_THROWS();
  728. }
  729. intl_error_reset(NULL);
  730. count = ucnv_countAliases(name, &error);
  731. if (U_FAILURE(error)) {
  732. THROW_UFAILURE(NULL, "ucnv_countAliases", error);
  733. RETURN_FALSE;
  734. }
  735. array_init(return_value);
  736. for(i = 0; i < count; i++) {
  737. const char *alias;
  738. error = U_ZERO_ERROR;
  739. alias = ucnv_getAlias(name, i, &error);
  740. if (U_FAILURE(error)) {
  741. THROW_UFAILURE(NULL, "ucnv_getAlias", error);
  742. zend_array_destroy(Z_ARR_P(return_value));
  743. RETURN_NULL();
  744. }
  745. add_next_index_string(return_value, alias);
  746. }
  747. }
  748. /* }}} */
  749. /* {{{ */
  750. PHP_METHOD(UConverter, getStandards) {
  751. uint16_t i, count;
  752. if (zend_parse_parameters_none() == FAILURE) {
  753. RETURN_THROWS();
  754. }
  755. intl_error_reset(NULL);
  756. array_init(return_value);
  757. count = ucnv_countStandards();
  758. for(i = 0; i < count; i++) {
  759. UErrorCode error = U_ZERO_ERROR;
  760. const char *name = ucnv_getStandard(i, &error);
  761. if (U_FAILURE(error)) {
  762. THROW_UFAILURE(NULL, "ucnv_getStandard", error);
  763. zend_array_destroy(Z_ARR_P(return_value));
  764. RETURN_NULL();
  765. }
  766. add_next_index_string(return_value, name);
  767. }
  768. }
  769. /* }}} */
  770. /* {{{ Converter create/clone/destroy */
  771. static void php_converter_free_object(zend_object *obj) {
  772. php_converter_object *objval = php_converter_fetch_object(obj);
  773. if (objval->src) {
  774. ucnv_close(objval->src);
  775. }
  776. if (objval->dest) {
  777. ucnv_close(objval->dest);
  778. }
  779. intl_error_reset(&objval->error);
  780. zend_object_std_dtor(obj);
  781. }
  782. static zend_object *php_converter_object_ctor(zend_class_entry *ce, php_converter_object **pobjval) {
  783. php_converter_object *objval;
  784. objval = zend_object_alloc(sizeof(php_converter_object), ce);
  785. zend_object_std_init(&objval->obj, ce);
  786. object_properties_init(&objval->obj, ce);
  787. intl_error_init(&(objval->error));
  788. objval->obj.handlers = &php_converter_object_handlers;
  789. *pobjval = objval;
  790. return &objval->obj;
  791. }
  792. static zend_object *php_converter_create_object(zend_class_entry *ce) {
  793. php_converter_object *objval = NULL;
  794. zend_object *retval = php_converter_object_ctor(ce, &objval);
  795. object_properties_init(&(objval->obj), ce);
  796. return retval;
  797. }
  798. static zend_object *php_converter_clone_object(zend_object *object) {
  799. php_converter_object *objval, *oldobj = php_converter_fetch_object(object);
  800. zend_object *retval = php_converter_object_ctor(object->ce, &objval);
  801. UErrorCode error = U_ZERO_ERROR;
  802. intl_errors_reset(&oldobj->error);
  803. #if U_ICU_VERSION_MAJOR_NUM > 70
  804. objval->src = ucnv_clone(oldobj->src, &error);
  805. #else
  806. objval->src = ucnv_safeClone(oldobj->src, NULL, NULL, &error);
  807. #endif
  808. if (U_SUCCESS(error)) {
  809. error = U_ZERO_ERROR;
  810. #if U_ICU_VERSION_MAJOR_NUM > 70
  811. objval->dest = ucnv_clone(oldobj->dest, &error);
  812. #else
  813. objval->dest = ucnv_safeClone(oldobj->dest, NULL, NULL, &error);
  814. #endif
  815. }
  816. if (U_FAILURE(error)) {
  817. zend_string *err_msg;
  818. THROW_UFAILURE(oldobj, "ucnv_safeClone", error);
  819. err_msg = intl_error_get_message(&oldobj->error);
  820. zend_throw_exception(NULL, ZSTR_VAL(err_msg), 0);
  821. zend_string_release_ex(err_msg, 0);
  822. return retval;
  823. }
  824. /* Update contexts for converter error handlers */
  825. php_converter_set_callbacks(objval, objval->src );
  826. php_converter_set_callbacks(objval, objval->dest);
  827. zend_objects_clone_members(&(objval->obj), &(oldobj->obj));
  828. /* Newly cloned object deliberately does not inherit error state from original object */
  829. return retval;
  830. }
  831. /* }}} */
  832. #define CONV_REASON_CONST(v) zend_declare_class_constant_long(php_converter_ce, "REASON_" #v, sizeof("REASON_" #v) - 1, UCNV_ ## v)
  833. #define CONV_TYPE_CONST(v) zend_declare_class_constant_long(php_converter_ce, #v , sizeof(#v) - 1, UCNV_ ## v)
  834. /* {{{ php_converter_minit */
  835. int php_converter_minit(INIT_FUNC_ARGS) {
  836. php_converter_ce = register_class_UConverter();
  837. php_converter_ce->create_object = php_converter_create_object;
  838. memcpy(&php_converter_object_handlers, &std_object_handlers, sizeof(zend_object_handlers));
  839. php_converter_object_handlers.offset = XtOffsetOf(php_converter_object, obj);
  840. php_converter_object_handlers.clone_obj = php_converter_clone_object;
  841. php_converter_object_handlers.free_obj = php_converter_free_object;
  842. /* enum UConverterCallbackReason */
  843. CONV_REASON_CONST(UNASSIGNED);
  844. CONV_REASON_CONST(ILLEGAL);
  845. CONV_REASON_CONST(IRREGULAR);
  846. CONV_REASON_CONST(RESET);
  847. CONV_REASON_CONST(CLOSE);
  848. CONV_REASON_CONST(CLONE);
  849. /* enum UConverterType */
  850. CONV_TYPE_CONST(UNSUPPORTED_CONVERTER);
  851. CONV_TYPE_CONST(SBCS);
  852. CONV_TYPE_CONST(DBCS);
  853. CONV_TYPE_CONST(MBCS);
  854. CONV_TYPE_CONST(LATIN_1);
  855. CONV_TYPE_CONST(UTF8);
  856. CONV_TYPE_CONST(UTF16_BigEndian);
  857. CONV_TYPE_CONST(UTF16_LittleEndian);
  858. CONV_TYPE_CONST(UTF32_BigEndian);
  859. CONV_TYPE_CONST(UTF32_LittleEndian);
  860. CONV_TYPE_CONST(EBCDIC_STATEFUL);
  861. CONV_TYPE_CONST(ISO_2022);
  862. CONV_TYPE_CONST(LMBCS_1);
  863. CONV_TYPE_CONST(LMBCS_2);
  864. CONV_TYPE_CONST(LMBCS_3);
  865. CONV_TYPE_CONST(LMBCS_4);
  866. CONV_TYPE_CONST(LMBCS_5);
  867. CONV_TYPE_CONST(LMBCS_6);
  868. CONV_TYPE_CONST(LMBCS_8);
  869. CONV_TYPE_CONST(LMBCS_11);
  870. CONV_TYPE_CONST(LMBCS_16);
  871. CONV_TYPE_CONST(LMBCS_17);
  872. CONV_TYPE_CONST(LMBCS_18);
  873. CONV_TYPE_CONST(LMBCS_19);
  874. CONV_TYPE_CONST(LMBCS_LAST);
  875. CONV_TYPE_CONST(HZ);
  876. CONV_TYPE_CONST(SCSU);
  877. CONV_TYPE_CONST(ISCII);
  878. CONV_TYPE_CONST(US_ASCII);
  879. CONV_TYPE_CONST(UTF7);
  880. CONV_TYPE_CONST(BOCU1);
  881. CONV_TYPE_CONST(UTF16);
  882. CONV_TYPE_CONST(UTF32);
  883. CONV_TYPE_CONST(CESU8);
  884. CONV_TYPE_CONST(IMAP_MAILBOX);
  885. return SUCCESS;
  886. }
  887. /* }}} */