uchar.c 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781
  1. #include "uchar.h"
  2. #include "intl_data.h"
  3. #include "intl_convert.h"
  4. #include <unicode/uchar.h>
  5. #if U_ICU_VERSION_MAJOR_NUM >= 49
  6. #include <unicode/utf8.h>
  7. #endif
  8. #define IC_METHOD(mname) PHP_METHOD(IntlChar, mname)
  9. static inline int convert_cp(UChar32* pcp, zval *zcp) {
  10. zend_long cp = -1;
  11. if (Z_TYPE_P(zcp) == IS_LONG) {
  12. cp = Z_LVAL_P(zcp);
  13. } else if (Z_TYPE_P(zcp) == IS_STRING) {
  14. int32_t i = 0;
  15. size_t zcp_len = Z_STRLEN_P(zcp);
  16. if (ZEND_SIZE_T_INT_OVFL(zcp_len)) {
  17. intl_error_set_code(NULL, U_ILLEGAL_ARGUMENT_ERROR);
  18. intl_error_set_custom_msg(NULL, "Input string is too long.", 0);
  19. return FAILURE;
  20. }
  21. U8_NEXT(Z_STRVAL_P(zcp), i, zcp_len, cp);
  22. if ((size_t)i != zcp_len) {
  23. intl_error_set_code(NULL, U_ILLEGAL_ARGUMENT_ERROR);
  24. intl_error_set_custom_msg(NULL, "Passing a UTF-8 character for codepoint requires a string which is exactly one UTF-8 codepoint long.", 0);
  25. return FAILURE;
  26. }
  27. } else {
  28. intl_error_set_code(NULL, U_ILLEGAL_ARGUMENT_ERROR);
  29. intl_error_set_custom_msg(NULL, "Invalid parameter for unicode point. Must be either integer or UTF-8 sequence.", 0);
  30. return FAILURE;
  31. }
  32. if ((cp < UCHAR_MIN_VALUE) || (cp > UCHAR_MAX_VALUE)) {
  33. intl_error_set_code(NULL, U_ILLEGAL_ARGUMENT_ERROR);
  34. intl_error_set_custom_msg(NULL, "Codepoint out of range", 0);
  35. return FAILURE;
  36. }
  37. *pcp = (UChar32)cp;
  38. return SUCCESS;
  39. }
  40. /* {{{ proto string IntlChar::chr(int|string $codepoint)
  41. * Converts a numeric codepoint to UTF-8
  42. * Acts as an identify function when given a valid UTF-8 encoded codepoint
  43. */
  44. ZEND_BEGIN_ARG_INFO_EX(chr_arginfo, 0, ZEND_RETURN_VALUE, 1)
  45. ZEND_ARG_INFO(0, codepoint)
  46. ZEND_END_ARG_INFO();
  47. IC_METHOD(chr) {
  48. UChar32 cp;
  49. zval *zcp;
  50. char buffer[5];
  51. int buffer_len = 0;
  52. if ((zend_parse_parameters(ZEND_NUM_ARGS(), "z", &zcp) == FAILURE) ||
  53. (convert_cp(&cp, zcp) == FAILURE)) {
  54. return;
  55. }
  56. /* We can use unsafe because we know the codepoint is in valid range
  57. * and that 4 bytes is enough for any unicode point
  58. */
  59. U8_APPEND_UNSAFE(buffer, buffer_len, cp);
  60. buffer[buffer_len] = 0;
  61. RETURN_STRINGL(buffer, buffer_len);
  62. }
  63. /* }}} */
  64. /* {{{ proto int IntlChar::ord(int|string $character)
  65. * Converts a UTf-8 encoded codepoint to its integer U32 value
  66. * Acts as an identity function when passed a valid integer codepoint
  67. */
  68. ZEND_BEGIN_ARG_INFO_EX(ord_arginfo, 0, ZEND_RETURN_VALUE, 1)
  69. ZEND_ARG_INFO(0, character)
  70. ZEND_END_ARG_INFO();
  71. IC_METHOD(ord) {
  72. UChar32 cp;
  73. zval *zcp;
  74. if ((zend_parse_parameters(ZEND_NUM_ARGS(), "z", &zcp) == FAILURE) ||
  75. (convert_cp(&cp, zcp) == FAILURE)) {
  76. return;
  77. }
  78. RETURN_LONG(cp);
  79. }
  80. /* }}} */
  81. /* {{{ proto bool IntlChar::hasBinaryProperty(int|string $codepoint, int $property) */
  82. ZEND_BEGIN_ARG_INFO_EX(hasBinaryProperty_arginfo, 0, ZEND_RETURN_VALUE, 2)
  83. ZEND_ARG_INFO(0, codepoint)
  84. ZEND_ARG_INFO(0, property)
  85. ZEND_END_ARG_INFO();
  86. IC_METHOD(hasBinaryProperty) {
  87. UChar32 cp;
  88. zend_long prop;
  89. zval *zcp;
  90. if ((zend_parse_parameters(ZEND_NUM_ARGS(), "zl", &zcp, &prop) == FAILURE) ||
  91. (convert_cp(&cp, zcp) == FAILURE)) {
  92. return;
  93. }
  94. RETURN_BOOL(u_hasBinaryProperty(cp, (UProperty)prop));
  95. }
  96. /* }}} */
  97. /* {{{ proto int IntlChar::getIntPropertyValue(int|string $codepoint, int $property) */
  98. ZEND_BEGIN_ARG_INFO_EX(getIntPropertyValue_arginfo, 0, ZEND_RETURN_VALUE, 2)
  99. ZEND_ARG_INFO(0, codepoint)
  100. ZEND_ARG_INFO(0, property)
  101. ZEND_END_ARG_INFO();
  102. IC_METHOD(getIntPropertyValue) {
  103. UChar32 cp;
  104. zend_long prop;
  105. zval *zcp;
  106. if ((zend_parse_parameters(ZEND_NUM_ARGS(), "zl", &zcp, &prop) == FAILURE) ||
  107. (convert_cp(&cp, zcp) == FAILURE)) {
  108. return;
  109. }
  110. RETURN_LONG(u_getIntPropertyValue(cp, (UProperty)prop));
  111. }
  112. /* }}} */
  113. /* {{{ proto int IntlChar::getIntPropertyMinValue(int $property) */
  114. ZEND_BEGIN_ARG_INFO_EX(getIntPropertyMinValue_arginfo, 0, ZEND_RETURN_VALUE, 1)
  115. ZEND_ARG_INFO(0, property)
  116. ZEND_END_ARG_INFO();
  117. IC_METHOD(getIntPropertyMinValue) {
  118. zend_long prop;
  119. if (zend_parse_parameters(ZEND_NUM_ARGS(), "l", &prop) == FAILURE) {
  120. return;
  121. }
  122. RETURN_LONG(u_getIntPropertyMinValue((UProperty)prop));
  123. }
  124. /* }}} */
  125. /* {{{ proto int IntlChar::getIntPropertyMaxValue(int $property) */
  126. ZEND_BEGIN_ARG_INFO_EX(getIntPropertyMaxValue_arginfo, 0, ZEND_RETURN_VALUE, 1)
  127. ZEND_ARG_INFO(0, property)
  128. ZEND_END_ARG_INFO();
  129. IC_METHOD(getIntPropertyMaxValue) {
  130. zend_long prop;
  131. if (zend_parse_parameters(ZEND_NUM_ARGS(), "l", &prop) == FAILURE) {
  132. return;
  133. }
  134. RETURN_LONG(u_getIntPropertyMaxValue((UProperty)prop));
  135. }
  136. /* }}} */
  137. /* {{{ proto float IntlChar::getNumericValue(int|string $codepoint) */
  138. ZEND_BEGIN_ARG_INFO_EX(getNumericValue_arginfo, 0, ZEND_RETURN_VALUE, 1)
  139. ZEND_ARG_INFO(0, codepoint)
  140. ZEND_END_ARG_INFO();
  141. IC_METHOD(getNumericValue) {
  142. UChar32 cp;
  143. zval *zcp;
  144. if ((zend_parse_parameters(ZEND_NUM_ARGS(), "z", &zcp) == FAILURE) ||
  145. (convert_cp(&cp, zcp) == FAILURE)) {
  146. return;
  147. }
  148. RETURN_DOUBLE(u_getNumericValue(cp));
  149. }
  150. /* }}} */
  151. /* {{{ proto void IntlChar::enumCharTypes(callable $callback) */
  152. ZEND_BEGIN_ARG_INFO_EX(enumCharTypes_arginfo, 0, ZEND_RETURN_VALUE, 0)
  153. ZEND_ARG_INFO(0, callback)
  154. ZEND_END_ARG_INFO();
  155. typedef struct _enumCharType_data {
  156. zend_fcall_info fci;
  157. zend_fcall_info_cache fci_cache;
  158. } enumCharType_data;
  159. static UBool enumCharType_callback(enumCharType_data *context,
  160. UChar32 start, UChar32 limit,
  161. UCharCategory type) {
  162. zval retval;
  163. zval args[3];
  164. ZVAL_NULL(&retval);
  165. /* Note that $start is INclusive, while $limit is EXclusive
  166. * Therefore (0, 32, 15) means CPs 0..31 are of type 15
  167. */
  168. ZVAL_LONG(&args[0], start);
  169. ZVAL_LONG(&args[1], limit);
  170. ZVAL_LONG(&args[2], type);
  171. context->fci.retval = &retval;
  172. context->fci.param_count = 3;
  173. context->fci.params = args;
  174. if (zend_call_function(&context->fci, &context->fci_cache) == FAILURE) {
  175. intl_error_set_code(NULL, U_INTERNAL_PROGRAM_ERROR);
  176. intl_errors_set_custom_msg(NULL, "enumCharTypes callback failed", 0);
  177. zval_ptr_dtor(&retval);
  178. return 0;
  179. }
  180. zval_ptr_dtor(&retval);
  181. return 1;
  182. }
  183. IC_METHOD(enumCharTypes) {
  184. enumCharType_data context;
  185. if (zend_parse_parameters(ZEND_NUM_ARGS(), "f", &context.fci, &context.fci_cache) == FAILURE) {
  186. return;
  187. }
  188. u_enumCharTypes((UCharEnumTypeRange*)enumCharType_callback, &context);
  189. }
  190. /* }}} */
  191. /* {{{ proto int IntlChar::getBlockCode(int|string $codepoint) */
  192. ZEND_BEGIN_ARG_INFO_EX(getBlockCode_arginfo, 0, ZEND_RETURN_VALUE, 1)
  193. ZEND_ARG_INFO(0, codepoint)
  194. ZEND_END_ARG_INFO()
  195. IC_METHOD(getBlockCode) {
  196. UChar32 cp;
  197. zval *zcp;
  198. if ((zend_parse_parameters(ZEND_NUM_ARGS(), "z", &zcp) == FAILURE) ||
  199. (convert_cp(&cp, zcp) == FAILURE)) {
  200. return;
  201. }
  202. RETURN_LONG(ublock_getCode(cp));
  203. }
  204. /* }}} */
  205. /* {{{ proto string IntlChar::charName(int|string $codepoint, int $nameChoice = IntlChar::UNICODE_CHAR_NAME) */
  206. ZEND_BEGIN_ARG_INFO_EX(charName_arginfo, 0, ZEND_RETURN_VALUE, 1)
  207. ZEND_ARG_INFO(0, codepoint)
  208. ZEND_ARG_INFO(0, nameChoice)
  209. ZEND_END_ARG_INFO()
  210. IC_METHOD(charName) {
  211. UChar32 cp;
  212. zval *zcp;
  213. UErrorCode error = U_ZERO_ERROR;
  214. zend_long nameChoice = U_UNICODE_CHAR_NAME;
  215. zend_string *buffer = NULL;
  216. int32_t buffer_len;
  217. if ((zend_parse_parameters(ZEND_NUM_ARGS(), "z|l", &zcp, &nameChoice) == FAILURE) ||
  218. (convert_cp(&cp, zcp) == FAILURE)) {
  219. RETURN_NULL();
  220. }
  221. buffer_len = u_charName(cp, (UCharNameChoice)nameChoice, NULL, 0, &error);
  222. buffer = zend_string_alloc(buffer_len, 0);
  223. error = U_ZERO_ERROR;
  224. buffer_len = u_charName(cp, (UCharNameChoice)nameChoice, ZSTR_VAL(buffer), ZSTR_LEN(buffer) + 1, &error);
  225. if (U_FAILURE(error)) {
  226. zend_string_efree(buffer);
  227. INTL_CHECK_STATUS_OR_NULL(error, "Failure getting character name");
  228. }
  229. RETURN_NEW_STR(buffer);
  230. }
  231. /* }}} */
  232. /* {{{ proto int IntlChar::charFromName(string $characterName, int $nameChoice = IntlChar::UNICODE_CHAR_NAME) */
  233. ZEND_BEGIN_ARG_INFO_EX(charFromName_arginfo, 0, ZEND_RETURN_VALUE, 1)
  234. ZEND_ARG_INFO(0, characterName)
  235. ZEND_ARG_INFO(0, nameChoice)
  236. ZEND_END_ARG_INFO()
  237. IC_METHOD(charFromName) {
  238. char *name;
  239. size_t name_len;
  240. zend_long nameChoice = U_UNICODE_CHAR_NAME;
  241. UChar32 ret;
  242. UErrorCode error = U_ZERO_ERROR;
  243. if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|l", &name, &name_len, &nameChoice) == FAILURE) {
  244. RETURN_NULL();
  245. }
  246. ret = u_charFromName((UCharNameChoice)nameChoice, name, &error);
  247. INTL_CHECK_STATUS_OR_NULL(error, NULL);
  248. RETURN_LONG(ret);
  249. }
  250. /* }}} */
  251. /* {{{ void void IntlChar::enumCharNames(int|string $start, int|string $limit, callable $callback, int $nameChoice = IntlChar::UNICODE_CHAR_NAME) */
  252. ZEND_BEGIN_ARG_INFO_EX(enumCharNames_arginfo, 0, ZEND_RETURN_VALUE, 3)
  253. ZEND_ARG_INFO(0, start)
  254. ZEND_ARG_INFO(0, limit)
  255. ZEND_ARG_INFO(0, callback)
  256. ZEND_ARG_INFO(0, nameChoice)
  257. ZEND_END_ARG_INFO();
  258. typedef struct _enumCharNames_data {
  259. zend_fcall_info fci;
  260. zend_fcall_info_cache fci_cache;
  261. } enumCharNames_data;
  262. static UBool enumCharNames_callback(enumCharNames_data *context,
  263. UChar32 code, UCharNameChoice nameChoice,
  264. const char *name, int32_t length) {
  265. zval retval;
  266. zval args[3];
  267. ZVAL_NULL(&retval);
  268. ZVAL_LONG(&args[0], code);
  269. ZVAL_LONG(&args[1], nameChoice);
  270. ZVAL_STRINGL(&args[2], name, length);
  271. context->fci.retval = &retval;
  272. context->fci.param_count = 3;
  273. context->fci.params = args;
  274. if (zend_call_function(&context->fci, &context->fci_cache) == FAILURE) {
  275. intl_error_set_code(NULL, U_INTERNAL_PROGRAM_ERROR);
  276. intl_error_set_custom_msg(NULL, "enumCharNames callback failed", 0);
  277. zval_ptr_dtor(&retval);
  278. zval_ptr_dtor_str(&args[2]);
  279. return 0;
  280. }
  281. zval_ptr_dtor(&retval);
  282. zval_ptr_dtor_str(&args[2]);
  283. return 1;
  284. }
  285. IC_METHOD(enumCharNames) {
  286. UChar32 start, limit;
  287. zval *zstart, *zlimit;
  288. enumCharNames_data context;
  289. zend_long nameChoice = U_UNICODE_CHAR_NAME;
  290. UErrorCode error = U_ZERO_ERROR;
  291. if ((zend_parse_parameters(ZEND_NUM_ARGS(), "zzf|l", &zstart, &zlimit, &context.fci, &context.fci_cache, &nameChoice) == FAILURE) ||
  292. (convert_cp(&start, zstart) == FAILURE) ||
  293. (convert_cp(&limit, zlimit) == FAILURE)) {
  294. return;
  295. }
  296. u_enumCharNames(start, limit, (UEnumCharNamesFn*)enumCharNames_callback, &context, nameChoice, &error);
  297. INTL_CHECK_STATUS(error, NULL);
  298. }
  299. /* }}} */
  300. /* {{{ proto string IntlChar::getPropertyName(int $property, int $nameChoice = IntlChar::LONG_PROPERTY_NAME) */
  301. ZEND_BEGIN_ARG_INFO_EX(getPropertyName_arginfo, 0, ZEND_RETURN_VALUE, 1)
  302. ZEND_ARG_INFO(0, property)
  303. ZEND_ARG_INFO(0, nameChoice)
  304. ZEND_END_ARG_INFO();
  305. IC_METHOD(getPropertyName) {
  306. zend_long property;
  307. zend_long nameChoice = U_LONG_PROPERTY_NAME;
  308. const char *ret;
  309. if (zend_parse_parameters(ZEND_NUM_ARGS(), "l|l", &property, &nameChoice) == FAILURE) {
  310. return;
  311. }
  312. ret = u_getPropertyName((UProperty)property, (UPropertyNameChoice)nameChoice);
  313. if (ret) {
  314. RETURN_STRING(ret);
  315. } else {
  316. intl_error_set_code(NULL, U_ILLEGAL_ARGUMENT_ERROR);
  317. intl_error_set_custom_msg(NULL, "Failed to get property name", 0);
  318. RETURN_FALSE;
  319. }
  320. }
  321. /* }}} */
  322. /* {{{ proto int IntlChar::getPropertyEnum(string $alias) */
  323. ZEND_BEGIN_ARG_INFO_EX(getPropertyEnum_arginfo, 0, ZEND_RETURN_VALUE, 1)
  324. ZEND_ARG_INFO(0, alias)
  325. ZEND_END_ARG_INFO();
  326. IC_METHOD(getPropertyEnum) {
  327. char *alias;
  328. size_t alias_len;
  329. if (zend_parse_parameters(ZEND_NUM_ARGS(), "s", &alias, &alias_len) == FAILURE) {
  330. return;
  331. }
  332. RETURN_LONG(u_getPropertyEnum(alias));
  333. }
  334. /* }}} */
  335. /* {{{ proto string IntlChar::getPropertyValueName(int $property, int $value[, int $nameChoice = IntlChar::LONG_PROPERTY_NAME) */
  336. ZEND_BEGIN_ARG_INFO_EX(getPropertyValueName_arginfo, 0, ZEND_RETURN_VALUE, 2)
  337. ZEND_ARG_INFO(0, property)
  338. ZEND_ARG_INFO(0, value)
  339. ZEND_ARG_INFO(0, nameChoice)
  340. ZEND_END_ARG_INFO();
  341. IC_METHOD(getPropertyValueName) {
  342. zend_long property, value, nameChoice = U_LONG_PROPERTY_NAME;
  343. const char *ret;
  344. if (zend_parse_parameters(ZEND_NUM_ARGS(), "ll|l", &property, &value, &nameChoice) == FAILURE) {
  345. return;
  346. }
  347. ret = u_getPropertyValueName((UProperty)property, value, (UPropertyNameChoice)nameChoice);
  348. if (ret) {
  349. RETURN_STRING(ret);
  350. } else {
  351. intl_error_set_code(NULL, U_ILLEGAL_ARGUMENT_ERROR);
  352. intl_error_set_custom_msg(NULL, "Failed to get property name", 0);
  353. RETURN_FALSE;
  354. }
  355. }
  356. /* }}} */
  357. /* {{{ proto int IntlChar::getPropertyValueEnum(int $property, string $name) */
  358. ZEND_BEGIN_ARG_INFO_EX(getPropertyValueEnum_arginfo, 0, ZEND_RETURN_VALUE, 2)
  359. ZEND_ARG_INFO(0, property)
  360. ZEND_ARG_INFO(0, name)
  361. ZEND_END_ARG_INFO();
  362. IC_METHOD(getPropertyValueEnum) {
  363. zend_long property;
  364. char *name;
  365. size_t name_len;
  366. if (zend_parse_parameters(ZEND_NUM_ARGS(), "ls", &property, &name, &name_len) == FAILURE) {
  367. return;
  368. }
  369. RETURN_LONG(u_getPropertyValueEnum((UProperty)property, name));
  370. }
  371. /* }}} */
  372. /* {{{ proto int|string IntlChar::foldCase(int|string $codepoint, int $options = IntlChar::FOLD_CASE_DEFAULT) */
  373. ZEND_BEGIN_ARG_INFO_EX(foldCase_arginfo, 0, ZEND_RETURN_VALUE, 1)
  374. ZEND_ARG_INFO(0, codepoint)
  375. ZEND_ARG_INFO(0, options)
  376. ZEND_END_ARG_INFO();
  377. IC_METHOD(foldCase) {
  378. UChar32 cp, ret;
  379. zval *zcp;
  380. zend_long options = U_FOLD_CASE_DEFAULT;
  381. if ((zend_parse_parameters(ZEND_NUM_ARGS(), "z|l", &zcp, &options) == FAILURE) ||
  382. (convert_cp(&cp, zcp) == FAILURE)) {
  383. return;
  384. }
  385. ret = u_foldCase(cp, options);
  386. if (Z_TYPE_P(zcp) == IS_STRING) {
  387. char buffer[5];
  388. int buffer_len = 0;
  389. U8_APPEND_UNSAFE(buffer, buffer_len, ret);
  390. buffer[buffer_len] = 0;
  391. RETURN_STRINGL(buffer, buffer_len);
  392. } else {
  393. RETURN_LONG(ret);
  394. }
  395. }
  396. /* }}} */
  397. /* {{{ proto int IntlChar::digit(int|string $codepoint[, int $radix = 10]) */
  398. ZEND_BEGIN_ARG_INFO_EX(digit_arginfo, 0, ZEND_RETURN_VALUE, 1)
  399. ZEND_ARG_INFO(0, codepoint)
  400. ZEND_ARG_INFO(0, radix)
  401. ZEND_END_ARG_INFO();
  402. IC_METHOD(digit) {
  403. UChar32 cp;
  404. zval *zcp;
  405. zend_long radix = 10;
  406. int ret;
  407. if ((zend_parse_parameters(ZEND_NUM_ARGS(), "z|l", &zcp, &radix) == FAILURE) ||
  408. (convert_cp(&cp, zcp) == FAILURE)) {
  409. return;
  410. }
  411. ret = u_digit(cp, radix);
  412. if (ret < 0) {
  413. intl_error_set_code(NULL, U_ILLEGAL_ARGUMENT_ERROR);
  414. intl_error_set_custom_msg(NULL, "Invalid digit", 0);
  415. RETURN_FALSE;
  416. }
  417. RETURN_LONG(ret);
  418. }
  419. /* }}} */
  420. /* {{{ proto int IntlChar::forDigit(int $digit[, int $radix = 10]) */
  421. ZEND_BEGIN_ARG_INFO_EX(forDigit_arginfo, 0, ZEND_RETURN_VALUE, 1)
  422. ZEND_ARG_INFO(0, digit)
  423. ZEND_ARG_INFO(0, radix)
  424. ZEND_END_ARG_INFO();
  425. IC_METHOD(forDigit) {
  426. zend_long digit, radix = 10;
  427. if (zend_parse_parameters(ZEND_NUM_ARGS(), "l|l", &digit, &radix) == FAILURE) {
  428. return;
  429. }
  430. RETURN_LONG(u_forDigit(digit, radix));
  431. }
  432. /* }}} */
  433. /* {{{ proto array IntlChar::charAge(int|string $codepoint) */
  434. ZEND_BEGIN_ARG_INFO_EX(charAge_arginfo, 0, ZEND_RETURN_VALUE, 1)
  435. ZEND_ARG_INFO(0, codepoint)
  436. ZEND_END_ARG_INFO();
  437. IC_METHOD(charAge) {
  438. UChar32 cp;
  439. zval *zcp;
  440. UVersionInfo version;
  441. int i;
  442. if ((zend_parse_parameters(ZEND_NUM_ARGS(), "z", &zcp) == FAILURE) ||
  443. (convert_cp(&cp, zcp) == FAILURE)) {
  444. return;
  445. }
  446. u_charAge(cp, version);
  447. array_init(return_value);
  448. for(i = 0; i < U_MAX_VERSION_LENGTH; ++i) {
  449. add_next_index_long(return_value, version[i]);
  450. }
  451. }
  452. /* }}} */
  453. /* {{{ proto array IntlChar::getUnicodeVersion() */
  454. ZEND_BEGIN_ARG_INFO_EX(getUnicodeVersion_arginfo, 0, ZEND_RETURN_VALUE, 0)
  455. ZEND_END_ARG_INFO();
  456. IC_METHOD(getUnicodeVersion) {
  457. UVersionInfo version;
  458. int i;
  459. u_getUnicodeVersion(version);
  460. array_init(return_value);
  461. for(i = 0; i < U_MAX_VERSION_LENGTH; ++i) {
  462. add_next_index_long(return_value, version[i]);
  463. }
  464. }
  465. /* }}} */
  466. /* {{{ proto string IntlChar::getFC_NFKC_Closure(int|string $codepoint) */
  467. ZEND_BEGIN_ARG_INFO_EX(getFC_NFKC_Closure_arginfo, 0, ZEND_RETURN_VALUE, 1)
  468. ZEND_ARG_INFO(0, codepoint)
  469. ZEND_END_ARG_INFO();
  470. IC_METHOD(getFC_NFKC_Closure) {
  471. UChar32 cp;
  472. zval *zcp;
  473. UChar *closure;
  474. zend_string *u8str;
  475. int32_t closure_len;
  476. UErrorCode error = U_ZERO_ERROR;
  477. if ((zend_parse_parameters(ZEND_NUM_ARGS(), "z", &zcp) == FAILURE) ||
  478. (convert_cp(&cp, zcp) == FAILURE)) {
  479. return;
  480. }
  481. closure_len = u_getFC_NFKC_Closure(cp, NULL, 0, &error);
  482. if (closure_len == 0) {
  483. RETURN_EMPTY_STRING();
  484. }
  485. closure = safe_emalloc(sizeof(UChar), closure_len + 1, 0);
  486. error = U_ZERO_ERROR;
  487. closure_len = u_getFC_NFKC_Closure(cp, closure, closure_len, &error);
  488. if (U_FAILURE(error)) {
  489. efree(closure);
  490. INTL_CHECK_STATUS(error, "Failed getting closure");
  491. }
  492. error = U_ZERO_ERROR;
  493. u8str = intl_convert_utf16_to_utf8(closure, closure_len, &error);
  494. INTL_CHECK_STATUS(error, "Failed converting output to UTF8");
  495. efree(closure);
  496. RETVAL_NEW_STR(u8str);
  497. }
  498. /* }}} */
  499. /* {{{ proto bool IntlChar::<name>(int|string $codepoint) */
  500. #define IC_BOOL_METHOD_CHAR(name) \
  501. ZEND_BEGIN_ARG_INFO_EX(name##_arginfo, 0, ZEND_RETURN_VALUE, 1) \
  502. ZEND_ARG_INFO(0, codepoint) \
  503. ZEND_END_ARG_INFO(); \
  504. IC_METHOD(name) { \
  505. UChar32 cp; zval *zcp; \
  506. if ((zend_parse_parameters(ZEND_NUM_ARGS(), "z", &zcp) == FAILURE) || \
  507. (convert_cp(&cp, zcp) == FAILURE)) { return; } \
  508. RETURN_BOOL(u_##name(cp)); \
  509. }
  510. IC_BOOL_METHOD_CHAR(isUAlphabetic)
  511. IC_BOOL_METHOD_CHAR(isULowercase)
  512. IC_BOOL_METHOD_CHAR(isUUppercase)
  513. IC_BOOL_METHOD_CHAR(isUWhiteSpace)
  514. IC_BOOL_METHOD_CHAR(islower)
  515. IC_BOOL_METHOD_CHAR(isupper)
  516. IC_BOOL_METHOD_CHAR(istitle)
  517. IC_BOOL_METHOD_CHAR(isdigit)
  518. IC_BOOL_METHOD_CHAR(isalpha)
  519. IC_BOOL_METHOD_CHAR(isalnum)
  520. IC_BOOL_METHOD_CHAR(isxdigit)
  521. IC_BOOL_METHOD_CHAR(ispunct)
  522. IC_BOOL_METHOD_CHAR(isgraph)
  523. IC_BOOL_METHOD_CHAR(isblank)
  524. IC_BOOL_METHOD_CHAR(isdefined)
  525. IC_BOOL_METHOD_CHAR(isspace)
  526. IC_BOOL_METHOD_CHAR(isJavaSpaceChar)
  527. IC_BOOL_METHOD_CHAR(isWhitespace)
  528. IC_BOOL_METHOD_CHAR(iscntrl)
  529. IC_BOOL_METHOD_CHAR(isISOControl)
  530. IC_BOOL_METHOD_CHAR(isprint)
  531. IC_BOOL_METHOD_CHAR(isbase)
  532. IC_BOOL_METHOD_CHAR(isMirrored)
  533. IC_BOOL_METHOD_CHAR(isIDStart)
  534. IC_BOOL_METHOD_CHAR(isIDPart)
  535. IC_BOOL_METHOD_CHAR(isIDIgnorable)
  536. IC_BOOL_METHOD_CHAR(isJavaIDStart)
  537. IC_BOOL_METHOD_CHAR(isJavaIDPart)
  538. #undef IC_BOOL_METHOD_CHAR
  539. /* }}} */
  540. /* {{{ proto int IntlChar::<name>(int|string $codepoint) */
  541. #define IC_INT_METHOD_CHAR(name) \
  542. ZEND_BEGIN_ARG_INFO_EX(name##_arginfo, 0, ZEND_RETURN_VALUE, 1) \
  543. ZEND_ARG_INFO(0, codepoint) \
  544. ZEND_END_ARG_INFO(); \
  545. IC_METHOD(name) { \
  546. UChar32 cp; zval *zcp; \
  547. if ((zend_parse_parameters(ZEND_NUM_ARGS(), "z", &zcp) == FAILURE) || \
  548. (convert_cp(&cp, zcp) == FAILURE)) { return; } \
  549. RETURN_LONG(u_##name(cp)); \
  550. }
  551. IC_INT_METHOD_CHAR(charDirection)
  552. IC_INT_METHOD_CHAR(charType)
  553. IC_INT_METHOD_CHAR(getCombiningClass)
  554. IC_INT_METHOD_CHAR(charDigitValue)
  555. #undef IC_INT_METHOD_CHAR
  556. /* }}} */
  557. /* {{{ proto int|string IntlChar::<name>(int|string $codepoint)
  558. * Returns a utf-8 character if codepoint was passed as a utf-8 sequence
  559. * Returns an int otherwise
  560. */
  561. #define IC_CHAR_METHOD_CHAR(name) \
  562. ZEND_BEGIN_ARG_INFO_EX(name##_arginfo, 0, ZEND_RETURN_VALUE, 1) \
  563. ZEND_ARG_INFO(0, codepoint) \
  564. ZEND_END_ARG_INFO(); \
  565. IC_METHOD(name) { \
  566. UChar32 cp, ret; zval *zcp; \
  567. if ((zend_parse_parameters(ZEND_NUM_ARGS(), "z", &zcp) == FAILURE) || \
  568. (convert_cp(&cp, zcp) == FAILURE)) { return; } \
  569. ret = u_##name(cp); \
  570. if (Z_TYPE_P(zcp) == IS_STRING) { \
  571. char buffer[5]; \
  572. int buffer_len = 0; \
  573. U8_APPEND_UNSAFE(buffer, buffer_len, ret); \
  574. buffer[buffer_len] = 0; \
  575. RETURN_STRINGL(buffer, buffer_len); \
  576. } else { \
  577. RETURN_LONG(ret); \
  578. } \
  579. }
  580. IC_CHAR_METHOD_CHAR(charMirror)
  581. IC_CHAR_METHOD_CHAR(tolower)
  582. IC_CHAR_METHOD_CHAR(toupper)
  583. IC_CHAR_METHOD_CHAR(totitle)
  584. #if U_ICU_VERSION_MAJOR_NUM >= 52
  585. IC_CHAR_METHOD_CHAR(getBidiPairedBracket)
  586. #endif /* ICU >= 52 */
  587. #undef IC_CHAR_METHOD_CHAR
  588. /* }}} */
  589. static const zend_function_entry intlchar_methods[] = {
  590. #define IC_ME(mname) PHP_ME(IntlChar, mname, mname##_arginfo, ZEND_ACC_PUBLIC | ZEND_ACC_STATIC)
  591. IC_ME(chr)
  592. IC_ME(ord)
  593. IC_ME(hasBinaryProperty)
  594. IC_ME(isUAlphabetic)
  595. IC_ME(isULowercase)
  596. IC_ME(isUUppercase)
  597. IC_ME(isUWhiteSpace)
  598. IC_ME(getIntPropertyValue)
  599. IC_ME(getIntPropertyMinValue)
  600. IC_ME(getIntPropertyMaxValue)
  601. IC_ME(getNumericValue)
  602. IC_ME(islower)
  603. IC_ME(isupper)
  604. IC_ME(istitle)
  605. IC_ME(isdigit)
  606. IC_ME(isalpha)
  607. IC_ME(isalnum)
  608. IC_ME(isxdigit)
  609. IC_ME(ispunct)
  610. IC_ME(isgraph)
  611. IC_ME(isblank)
  612. IC_ME(isdefined)
  613. IC_ME(isspace)
  614. IC_ME(isJavaSpaceChar)
  615. IC_ME(isWhitespace)
  616. IC_ME(iscntrl)
  617. IC_ME(isISOControl)
  618. IC_ME(isprint)
  619. IC_ME(isbase)
  620. IC_ME(charDirection)
  621. IC_ME(isMirrored)
  622. IC_ME(charMirror)
  623. #if U_ICU_VERSION_MAJOR_NUM >= 52
  624. IC_ME(getBidiPairedBracket)
  625. #endif /* ICU >= 52 */
  626. IC_ME(charType)
  627. IC_ME(enumCharTypes)
  628. IC_ME(getCombiningClass)
  629. IC_ME(charDigitValue)
  630. IC_ME(getBlockCode)
  631. IC_ME(charName)
  632. IC_ME(charFromName)
  633. IC_ME(enumCharNames)
  634. IC_ME(getPropertyName)
  635. IC_ME(getPropertyEnum)
  636. IC_ME(getPropertyValueName)
  637. IC_ME(getPropertyValueEnum)
  638. IC_ME(isIDStart)
  639. IC_ME(isIDPart)
  640. IC_ME(isIDIgnorable)
  641. IC_ME(isJavaIDStart)
  642. IC_ME(isJavaIDPart)
  643. IC_ME(tolower)
  644. IC_ME(toupper)
  645. IC_ME(totitle)
  646. IC_ME(foldCase)
  647. IC_ME(digit)
  648. IC_ME(forDigit)
  649. IC_ME(charAge)
  650. IC_ME(getUnicodeVersion)
  651. IC_ME(getFC_NFKC_Closure)
  652. #undef IC_ME
  653. PHP_FE_END
  654. };
  655. int php_uchar_minit(INIT_FUNC_ARGS) {
  656. zend_class_entry tmp, *ce;
  657. INIT_CLASS_ENTRY(tmp, "IntlChar", intlchar_methods);
  658. ce = zend_register_internal_class(&tmp);
  659. #define IC_CONSTL(name, val) \
  660. zend_declare_class_constant_long(ce, name, strlen(name), val);
  661. zend_declare_class_constant_string(ce, "UNICODE_VERSION", sizeof("UNICODE_VERISON")-1, U_UNICODE_VERSION);
  662. IC_CONSTL("CODEPOINT_MIN", UCHAR_MIN_VALUE)
  663. IC_CONSTL("CODEPOINT_MAX", UCHAR_MAX_VALUE)
  664. zend_declare_class_constant_double(ce, "NO_NUMERIC_VALUE", sizeof("NO_NUMERIC_VALUE")-1, U_NO_NUMERIC_VALUE);
  665. /* All enums used by the uchar APIs. There are a LOT of them,
  666. * so they're separated out into include files,
  667. * leaving this source file for actual implementation.
  668. */
  669. #define UPROPERTY(name) IC_CONSTL("PROPERTY_" #name, UCHAR_##name)
  670. #include "uproperty-enum.h"
  671. #undef UPROPERTY
  672. #define UCHARCATEGORY(name) IC_CONSTL("CHAR_CATEGORY_" #name, U_##name)
  673. #include "ucharcategory-enum.h"
  674. #undef UCHARCATEGORY
  675. #define UCHARDIRECTION(name) IC_CONSTL("CHAR_DIRECTION_" #name, U_##name)
  676. #include "uchardirection-enum.h"
  677. #undef UCHARDIRECTION
  678. #define UBLOCKCODE(name) IC_CONSTL("BLOCK_CODE_" #name, UBLOCK_##name)
  679. #include "ublockcode-enum.h"
  680. #undef UBLOCKCODE
  681. /* Smaller, self-destribing enums */
  682. #define UOTHER(name) IC_CONSTL(#name, U_##name)
  683. #include "uother-enum.h"
  684. #undef UOTHER
  685. #undef IC_CONSTL
  686. #undef IC_CONSTS
  687. return SUCCESS;
  688. }