rulebasedbreakiterator_methods.cpp 6.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221
  1. /*
  2. +----------------------------------------------------------------------+
  3. | PHP Version 5 |
  4. +----------------------------------------------------------------------+
  5. | This source file is subject to version 3.01 of the PHP license, |
  6. | that is bundled with this package in the file LICENSE, and is |
  7. | available through the world-wide-web at the following url: |
  8. | http://www.php.net/license/3_01.txt |
  9. | If you did not receive a copy of the PHP license and are unable to |
  10. | obtain it through the world-wide-web, please send a note to |
  11. | license@php.net so we can mail you a copy immediately. |
  12. +----------------------------------------------------------------------+
  13. | Authors: Gustavo Lopes <cataphract@php.net> |
  14. +----------------------------------------------------------------------+
  15. */
  16. #include <unicode/rbbi.h>
  17. extern "C" {
  18. #define USE_BREAKITERATOR_POINTER 1
  19. #include "breakiterator_class.h"
  20. #include <zend_exceptions.h>
  21. #include <limits.h>
  22. }
  23. #include "../intl_convertcpp.h"
  24. static inline RuleBasedBreakIterator *fetch_rbbi(BreakIterator_object *bio) {
  25. return (RuleBasedBreakIterator*)bio->biter;
  26. }
  27. static void _php_intlrbbi_constructor_body(INTERNAL_FUNCTION_PARAMETERS)
  28. {
  29. zval *object = getThis();
  30. char *rules;
  31. int rules_len;
  32. zend_bool compiled = 0;
  33. UErrorCode status = U_ZERO_ERROR;
  34. intl_error_reset(NULL TSRMLS_CC);
  35. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|b",
  36. &rules, &rules_len, &compiled) == FAILURE) {
  37. intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR,
  38. "rbbi_create_instance: bad arguments", 0 TSRMLS_CC);
  39. RETURN_NULL();
  40. }
  41. // instantiation of ICU object
  42. RuleBasedBreakIterator *rbbi;
  43. if (!compiled) {
  44. UnicodeString rulesStr;
  45. UParseError parseError = UParseError();
  46. if (intl_stringFromChar(rulesStr, rules, rules_len, &status)
  47. == FAILURE) {
  48. intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR,
  49. "rbbi_create_instance: rules were not a valid UTF-8 string",
  50. 0 TSRMLS_CC);
  51. RETURN_NULL();
  52. }
  53. rbbi = new RuleBasedBreakIterator(rulesStr, parseError, status);
  54. intl_error_set_code(NULL, status TSRMLS_CC);
  55. if (U_FAILURE(status)) {
  56. char *msg;
  57. smart_str parse_error_str;
  58. parse_error_str = intl_parse_error_to_string(&parseError);
  59. spprintf(&msg, 0, "rbbi_create_instance: unable to create "
  60. "RuleBasedBreakIterator from rules (%s)", parse_error_str.c);
  61. smart_str_free(&parse_error_str);
  62. intl_error_set_custom_msg(NULL, msg, 1 TSRMLS_CC);
  63. efree(msg);
  64. delete rbbi;
  65. RETURN_NULL();
  66. }
  67. } else { // compiled
  68. #if U_ICU_VERSION_MAJOR_NUM * 10 + U_ICU_VERSION_MINOR_NUM >= 48
  69. rbbi = new RuleBasedBreakIterator((uint8_t*)rules, rules_len, status);
  70. if (U_FAILURE(status)) {
  71. intl_error_set(NULL, status, "rbbi_create_instance: unable to "
  72. "create instance from compiled rules", 0 TSRMLS_CC);
  73. delete rbbi;
  74. RETURN_NULL();
  75. }
  76. #else
  77. intl_error_set(NULL, U_UNSUPPORTED_ERROR, "rbbi_create_instance: "
  78. "compiled rules require ICU >= 4.8", 0 TSRMLS_CC);
  79. RETURN_NULL();
  80. #endif
  81. }
  82. breakiterator_object_create(return_value, rbbi TSRMLS_CC);
  83. }
  84. U_CFUNC PHP_METHOD(IntlRuleBasedBreakIterator, __construct)
  85. {
  86. zval orig_this = *getThis();
  87. return_value = getThis();
  88. //changes this to IS_NULL (without first destroying) if there's an error
  89. _php_intlrbbi_constructor_body(INTERNAL_FUNCTION_PARAM_PASSTHRU);
  90. if (Z_TYPE_P(return_value) == IS_NULL) {
  91. zend_object_store_ctor_failed(&orig_this TSRMLS_CC);
  92. zval_dtor(&orig_this);
  93. }
  94. }
  95. U_CFUNC PHP_FUNCTION(rbbi_get_rules)
  96. {
  97. BREAKITER_METHOD_INIT_VARS;
  98. object = getThis();
  99. if (zend_parse_parameters_none() == FAILURE) {
  100. intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR,
  101. "rbbi_get_rules: bad arguments", 0 TSRMLS_CC);
  102. RETURN_FALSE;
  103. }
  104. BREAKITER_METHOD_FETCH_OBJECT;
  105. const UnicodeString rules = fetch_rbbi(bio)->getRules();
  106. Z_TYPE_P(return_value) = IS_STRING;
  107. if (intl_charFromString(rules, &Z_STRVAL_P(return_value),
  108. &Z_STRLEN_P(return_value), BREAKITER_ERROR_CODE_P(bio)) == FAILURE)
  109. {
  110. intl_errors_set(BREAKITER_ERROR_P(bio), BREAKITER_ERROR_CODE(bio),
  111. "rbbi_hash_code: Error converting result to UTF-8 string",
  112. 0 TSRMLS_CC);
  113. RETURN_FALSE;
  114. }
  115. }
  116. U_CFUNC PHP_FUNCTION(rbbi_get_rule_status)
  117. {
  118. BREAKITER_METHOD_INIT_VARS;
  119. object = getThis();
  120. if (zend_parse_parameters_none() == FAILURE) {
  121. intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR,
  122. "rbbi_get_rule_status: bad arguments", 0 TSRMLS_CC);
  123. RETURN_FALSE;
  124. }
  125. BREAKITER_METHOD_FETCH_OBJECT;
  126. RETURN_LONG(fetch_rbbi(bio)->getRuleStatus());
  127. }
  128. U_CFUNC PHP_FUNCTION(rbbi_get_rule_status_vec)
  129. {
  130. BREAKITER_METHOD_INIT_VARS;
  131. object = getThis();
  132. if (zend_parse_parameters_none() == FAILURE) {
  133. intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR,
  134. "rbbi_get_rule_status_vec: bad arguments", 0 TSRMLS_CC);
  135. RETURN_FALSE;
  136. }
  137. BREAKITER_METHOD_FETCH_OBJECT;
  138. int32_t num_rules = fetch_rbbi(bio)->getRuleStatusVec(NULL, 0,
  139. BREAKITER_ERROR_CODE(bio));
  140. if (BREAKITER_ERROR_CODE(bio) == U_BUFFER_OVERFLOW_ERROR) {
  141. BREAKITER_ERROR_CODE(bio) = U_ZERO_ERROR;
  142. } else {
  143. // should not happen
  144. INTL_METHOD_CHECK_STATUS(bio, "rbbi_get_rule_status_vec: failed "
  145. " determining the number of status values");
  146. }
  147. int32_t *rules = new int32_t[num_rules];
  148. num_rules = fetch_rbbi(bio)->getRuleStatusVec(rules, num_rules,
  149. BREAKITER_ERROR_CODE(bio));
  150. if (U_FAILURE(BREAKITER_ERROR_CODE(bio))) {
  151. delete[] rules;
  152. intl_errors_set(BREAKITER_ERROR_P(bio), BREAKITER_ERROR_CODE(bio),
  153. "rbbi_get_rule_status_vec: failed obtaining the status values",
  154. 0 TSRMLS_CC);
  155. RETURN_FALSE;
  156. }
  157. array_init_size(return_value, num_rules);
  158. for (int32_t i = 0; i < num_rules; i++) {
  159. add_next_index_long(return_value, rules[i]);
  160. }
  161. delete[] rules;
  162. }
  163. #if U_ICU_VERSION_MAJOR_NUM * 10 + U_ICU_VERSION_MINOR_NUM >= 48
  164. U_CFUNC PHP_FUNCTION(rbbi_get_binary_rules)
  165. {
  166. BREAKITER_METHOD_INIT_VARS;
  167. object = getThis();
  168. if (zend_parse_parameters_none() == FAILURE) {
  169. intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR,
  170. "rbbi_get_binary_rules: bad arguments", 0 TSRMLS_CC);
  171. RETURN_FALSE;
  172. }
  173. BREAKITER_METHOD_FETCH_OBJECT;
  174. uint32_t rules_len;
  175. const uint8_t *rules = fetch_rbbi(bio)->getBinaryRules(rules_len);
  176. if (rules_len > INT_MAX - 1) {
  177. intl_errors_set(BREAKITER_ERROR_P(bio), BREAKITER_ERROR_CODE(bio),
  178. "rbbi_get_binary_rules: the rules are too large",
  179. 0 TSRMLS_CC);
  180. RETURN_FALSE;
  181. }
  182. char *ret_rules = static_cast<char*>(emalloc(rules_len + 1));
  183. memcpy(ret_rules, rules, rules_len);
  184. ret_rules[rules_len] = '\0';
  185. RETURN_STRINGL(ret_rules, rules_len, 0);
  186. }
  187. #endif