rulebasedbreakiterator_methods.cpp 6.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221
  1. /*
  2. +----------------------------------------------------------------------+
  3. | PHP Version 7 |
  4. +----------------------------------------------------------------------+
  5. | This source file is subject to version 3.01 of the PHP license, |
  6. | that is bundled with this package in the file LICENSE, and is |
  7. | available through the world-wide-web at the following url: |
  8. | http://www.php.net/license/3_01.txt |
  9. | If you did not receive a copy of the PHP license and are unable to |
  10. | obtain it through the world-wide-web, please send a note to |
  11. | license@php.net so we can mail you a copy immediately. |
  12. +----------------------------------------------------------------------+
  13. | Authors: Gustavo Lopes <cataphract@php.net> |
  14. +----------------------------------------------------------------------+
  15. */
  16. #include <unicode/rbbi.h>
  17. extern "C" {
  18. #define USE_BREAKITERATOR_POINTER 1
  19. #include "breakiterator_class.h"
  20. #include <zend_exceptions.h>
  21. #include <limits.h>
  22. }
  23. #include "../intl_convertcpp.h"
  24. #include "../intl_common.h"
  25. using icu::RuleBasedBreakIterator;
  26. using icu::Locale;
  27. static inline RuleBasedBreakIterator *fetch_rbbi(BreakIterator_object *bio) {
  28. return (RuleBasedBreakIterator*)bio->biter;
  29. }
  30. static void _php_intlrbbi_constructor_body(INTERNAL_FUNCTION_PARAMETERS)
  31. {
  32. char *rules;
  33. size_t rules_len;
  34. zend_bool compiled = 0;
  35. UErrorCode status = U_ZERO_ERROR;
  36. intl_error_reset(NULL);
  37. if (zend_parse_parameters_throw(ZEND_NUM_ARGS(), "s|b",
  38. &rules, &rules_len, &compiled) == FAILURE) {
  39. intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR,
  40. "rbbi_create_instance: bad arguments", 0);
  41. return;
  42. }
  43. // instantiation of ICU object
  44. RuleBasedBreakIterator *rbbi;
  45. if (!compiled) {
  46. UnicodeString rulesStr;
  47. UParseError parseError = UParseError();
  48. if (intl_stringFromChar(rulesStr, rules, rules_len, &status)
  49. == FAILURE) {
  50. intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR,
  51. "rbbi_create_instance: rules were not a valid UTF-8 string",
  52. 0);
  53. RETURN_NULL();
  54. }
  55. rbbi = new RuleBasedBreakIterator(rulesStr, parseError, status);
  56. intl_error_set_code(NULL, status);
  57. if (U_FAILURE(status)) {
  58. char *msg;
  59. smart_str parse_error_str;
  60. parse_error_str = intl_parse_error_to_string(&parseError);
  61. spprintf(&msg, 0, "rbbi_create_instance: unable to create "
  62. "RuleBasedBreakIterator from rules (%s)", parse_error_str.s? ZSTR_VAL(parse_error_str.s) : "");
  63. smart_str_free(&parse_error_str);
  64. intl_error_set_custom_msg(NULL, msg, 1);
  65. efree(msg);
  66. delete rbbi;
  67. return;
  68. }
  69. } else { // compiled
  70. #if U_ICU_VERSION_MAJOR_NUM * 10 + U_ICU_VERSION_MINOR_NUM >= 48
  71. rbbi = new RuleBasedBreakIterator((uint8_t*)rules, rules_len, status);
  72. if (U_FAILURE(status)) {
  73. intl_error_set(NULL, status, "rbbi_create_instance: unable to "
  74. "create instance from compiled rules", 0);
  75. delete rbbi;
  76. return;
  77. }
  78. #else
  79. intl_error_set(NULL, U_UNSUPPORTED_ERROR, "rbbi_create_instance: "
  80. "compiled rules require ICU >= 4.8", 0);
  81. return;
  82. #endif
  83. }
  84. breakiterator_object_create(return_value, rbbi, 0);
  85. }
  86. U_CFUNC PHP_METHOD(IntlRuleBasedBreakIterator, __construct)
  87. {
  88. zend_error_handling error_handling;
  89. zend_replace_error_handling(EH_THROW, IntlException_ce_ptr, &error_handling);
  90. return_value = getThis();
  91. _php_intlrbbi_constructor_body(INTERNAL_FUNCTION_PARAM_PASSTHRU);
  92. zend_restore_error_handling(&error_handling);
  93. }
  94. U_CFUNC PHP_FUNCTION(rbbi_get_rules)
  95. {
  96. BREAKITER_METHOD_INIT_VARS;
  97. object = getThis();
  98. if (zend_parse_parameters_none() == FAILURE) {
  99. intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR,
  100. "rbbi_get_rules: bad arguments", 0);
  101. RETURN_FALSE;
  102. }
  103. BREAKITER_METHOD_FETCH_OBJECT;
  104. zend_string *u8str;
  105. const UnicodeString rules = fetch_rbbi(bio)->getRules();
  106. u8str = intl_charFromString(rules, BREAKITER_ERROR_CODE_P(bio));
  107. if (!u8str)
  108. {
  109. intl_errors_set(BREAKITER_ERROR_P(bio), BREAKITER_ERROR_CODE(bio),
  110. "rbbi_hash_code: Error converting result to UTF-8 string",
  111. 0);
  112. RETURN_FALSE;
  113. }
  114. RETVAL_STR(u8str);
  115. }
  116. U_CFUNC PHP_FUNCTION(rbbi_get_rule_status)
  117. {
  118. BREAKITER_METHOD_INIT_VARS;
  119. object = getThis();
  120. if (zend_parse_parameters_none() == FAILURE) {
  121. intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR,
  122. "rbbi_get_rule_status: bad arguments", 0);
  123. RETURN_FALSE;
  124. }
  125. BREAKITER_METHOD_FETCH_OBJECT;
  126. RETURN_LONG(fetch_rbbi(bio)->getRuleStatus());
  127. }
  128. U_CFUNC PHP_FUNCTION(rbbi_get_rule_status_vec)
  129. {
  130. BREAKITER_METHOD_INIT_VARS;
  131. object = getThis();
  132. if (zend_parse_parameters_none() == FAILURE) {
  133. intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR,
  134. "rbbi_get_rule_status_vec: bad arguments", 0);
  135. RETURN_FALSE;
  136. }
  137. BREAKITER_METHOD_FETCH_OBJECT;
  138. int32_t num_rules = fetch_rbbi(bio)->getRuleStatusVec(NULL, 0,
  139. BREAKITER_ERROR_CODE(bio));
  140. if (BREAKITER_ERROR_CODE(bio) == U_BUFFER_OVERFLOW_ERROR) {
  141. BREAKITER_ERROR_CODE(bio) = U_ZERO_ERROR;
  142. } else {
  143. // should not happen
  144. INTL_METHOD_CHECK_STATUS(bio, "rbbi_get_rule_status_vec: failed "
  145. " determining the number of status values");
  146. }
  147. int32_t *rules = new int32_t[num_rules];
  148. num_rules = fetch_rbbi(bio)->getRuleStatusVec(rules, num_rules,
  149. BREAKITER_ERROR_CODE(bio));
  150. if (U_FAILURE(BREAKITER_ERROR_CODE(bio))) {
  151. delete[] rules;
  152. intl_errors_set(BREAKITER_ERROR_P(bio), BREAKITER_ERROR_CODE(bio),
  153. "rbbi_get_rule_status_vec: failed obtaining the status values",
  154. 0);
  155. RETURN_FALSE;
  156. }
  157. array_init_size(return_value, num_rules);
  158. for (int32_t i = 0; i < num_rules; i++) {
  159. add_next_index_long(return_value, rules[i]);
  160. }
  161. delete[] rules;
  162. }
  163. #if U_ICU_VERSION_MAJOR_NUM * 10 + U_ICU_VERSION_MINOR_NUM >= 48
  164. U_CFUNC PHP_FUNCTION(rbbi_get_binary_rules)
  165. {
  166. BREAKITER_METHOD_INIT_VARS;
  167. object = getThis();
  168. if (zend_parse_parameters_none() == FAILURE) {
  169. intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR,
  170. "rbbi_get_binary_rules: bad arguments", 0);
  171. RETURN_FALSE;
  172. }
  173. BREAKITER_METHOD_FETCH_OBJECT;
  174. uint32_t rules_len;
  175. const uint8_t *rules = fetch_rbbi(bio)->getBinaryRules(rules_len);
  176. if (rules_len > INT_MAX - 1) {
  177. intl_errors_set(BREAKITER_ERROR_P(bio), BREAKITER_ERROR_CODE(bio),
  178. "rbbi_get_binary_rules: the rules are too large",
  179. 0);
  180. RETURN_FALSE;
  181. }
  182. zend_string *ret_rules = zend_string_alloc(rules_len, 0);
  183. memcpy(ZSTR_VAL(ret_rules), rules, rules_len);
  184. ZSTR_VAL(ret_rules)[rules_len] = '\0';
  185. RETURN_STR(ret_rules);
  186. }
  187. #endif