codepointiterator_internal.cpp 7.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294
  1. /*
  2. +----------------------------------------------------------------------+
  3. | PHP Version 7 |
  4. +----------------------------------------------------------------------+
  5. | This source file is subject to version 3.01 of the PHP license, |
  6. | that is bundled with this package in the file LICENSE, and is |
  7. | available through the world-wide-web at the following url: |
  8. | http://www.php.net/license/3_01.txt |
  9. | If you did not receive a copy of the PHP license and are unable to |
  10. | obtain it through the world-wide-web, please send a note to |
  11. | license@php.net so we can mail you a copy immediately. |
  12. +----------------------------------------------------------------------+
  13. | Authors: Gustavo Lopes <cataphract@php.net> |
  14. +----------------------------------------------------------------------+
  15. */
  16. #include "codepointiterator_internal.h"
  17. #include <unicode/uchriter.h>
  18. #include <typeinfo>
  19. #include "php.h"
  20. //copied from cmemory.h, which is not public
  21. typedef union {
  22. zend_long t1;
  23. double t2;
  24. void *t3;
  25. } UAlignedMemory;
  26. #define U_POINTER_MASK_LSB(ptr, mask) (((ptrdiff_t)(char *)(ptr)) & (mask))
  27. #define U_ALIGNMENT_OFFSET(ptr) U_POINTER_MASK_LSB(ptr, sizeof(UAlignedMemory) - 1)
  28. #define U_ALIGNMENT_OFFSET_UP(ptr) (sizeof(UAlignedMemory) - U_ALIGNMENT_OFFSET(ptr))
  29. using namespace PHP;
  30. using icu::UCharCharacterIterator;
  31. UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CodePointBreakIterator)
  32. CodePointBreakIterator::CodePointBreakIterator()
  33. : BreakIterator(), fCharIter(NULL), lastCodePoint(U_SENTINEL)
  34. {
  35. UErrorCode uec = UErrorCode();
  36. this->fText = utext_openUChars(NULL, NULL, 0, &uec);
  37. }
  38. CodePointBreakIterator::CodePointBreakIterator(const PHP::CodePointBreakIterator &other)
  39. : BreakIterator(other), fText(NULL), fCharIter(NULL), lastCodePoint(U_SENTINEL)
  40. {
  41. *this = other;
  42. }
  43. CodePointBreakIterator& CodePointBreakIterator::operator=(const CodePointBreakIterator& that)
  44. {
  45. UErrorCode uec = UErrorCode();
  46. if (this == &that) {
  47. return *this;
  48. }
  49. this->fText = utext_clone(this->fText, that.fText, FALSE, TRUE, &uec);
  50. //don't bother copying the character iterator, getText() is deprecated
  51. clearCurrentCharIter();
  52. this->lastCodePoint = that.lastCodePoint;
  53. return *this;
  54. }
  55. CodePointBreakIterator::~CodePointBreakIterator()
  56. {
  57. if (this->fText) {
  58. utext_close(this->fText);
  59. }
  60. clearCurrentCharIter();
  61. }
  62. UBool CodePointBreakIterator::operator==(const BreakIterator& that) const
  63. {
  64. if (typeid(*this) != typeid(that)) {
  65. return FALSE;
  66. }
  67. const CodePointBreakIterator& that2 =
  68. static_cast<const CodePointBreakIterator&>(that);
  69. if (!utext_equals(this->fText, that2.fText)) {
  70. return FALSE;
  71. }
  72. return TRUE;
  73. }
  74. CodePointBreakIterator* CodePointBreakIterator::clone(void) const
  75. {
  76. return new CodePointBreakIterator(*this);
  77. }
  78. CharacterIterator& CodePointBreakIterator::getText(void) const
  79. {
  80. if (this->fCharIter == NULL) {
  81. //this method is deprecated anyway; setup bogus iterator
  82. static const UChar c = 0;
  83. this->fCharIter = new UCharCharacterIterator(&c, 0);
  84. }
  85. return *this->fCharIter;
  86. }
  87. UText *CodePointBreakIterator::getUText(UText *fillIn, UErrorCode &status) const
  88. {
  89. return utext_clone(fillIn, this->fText, FALSE, TRUE, &status);
  90. }
  91. void CodePointBreakIterator::setText(const UnicodeString &text)
  92. {
  93. UErrorCode uec = UErrorCode();
  94. //this closes the previous utext, if any
  95. this->fText = utext_openConstUnicodeString(this->fText, &text, &uec);
  96. clearCurrentCharIter();
  97. }
  98. void CodePointBreakIterator::setText(UText *text, UErrorCode &status)
  99. {
  100. if (U_FAILURE(status)) {
  101. return;
  102. }
  103. this->fText = utext_clone(this->fText, text, FALSE, TRUE, &status);
  104. clearCurrentCharIter();
  105. }
  106. void CodePointBreakIterator::adoptText(CharacterIterator* it)
  107. {
  108. UErrorCode uec = UErrorCode();
  109. clearCurrentCharIter();
  110. this->fCharIter = it;
  111. this->fText = utext_openCharacterIterator(this->fText, it, &uec);
  112. }
  113. int32_t CodePointBreakIterator::first(void)
  114. {
  115. UTEXT_SETNATIVEINDEX(this->fText, 0);
  116. this->lastCodePoint = U_SENTINEL;
  117. return 0;
  118. }
  119. int32_t CodePointBreakIterator::last(void)
  120. {
  121. int32_t pos = (int32_t)utext_nativeLength(this->fText);
  122. UTEXT_SETNATIVEINDEX(this->fText, pos);
  123. this->lastCodePoint = U_SENTINEL;
  124. return pos;
  125. }
  126. int32_t CodePointBreakIterator::previous(void)
  127. {
  128. this->lastCodePoint = UTEXT_PREVIOUS32(this->fText);
  129. if (this->lastCodePoint == U_SENTINEL) {
  130. return BreakIterator::DONE;
  131. }
  132. return (int32_t)UTEXT_GETNATIVEINDEX(this->fText);
  133. }
  134. int32_t CodePointBreakIterator::next(void)
  135. {
  136. this->lastCodePoint = UTEXT_NEXT32(this->fText);
  137. if (this->lastCodePoint == U_SENTINEL) {
  138. return BreakIterator::DONE;
  139. }
  140. return (int32_t)UTEXT_GETNATIVEINDEX(this->fText);
  141. }
  142. int32_t CodePointBreakIterator::current(void) const
  143. {
  144. return (int32_t)UTEXT_GETNATIVEINDEX(this->fText);
  145. }
  146. int32_t CodePointBreakIterator::following(int32_t offset)
  147. {
  148. this->lastCodePoint = utext_next32From(this->fText, offset);
  149. if (this->lastCodePoint == U_SENTINEL) {
  150. return BreakIterator::DONE;
  151. }
  152. return (int32_t)UTEXT_GETNATIVEINDEX(this->fText);
  153. }
  154. int32_t CodePointBreakIterator::preceding(int32_t offset)
  155. {
  156. this->lastCodePoint = utext_previous32From(this->fText, offset);
  157. if (this->lastCodePoint == U_SENTINEL) {
  158. return BreakIterator::DONE;
  159. }
  160. return (int32_t)UTEXT_GETNATIVEINDEX(this->fText);
  161. }
  162. UBool CodePointBreakIterator::isBoundary(int32_t offset)
  163. {
  164. //this function has side effects, and it's supposed to
  165. utext_setNativeIndex(this->fText, offset);
  166. return (offset == utext_getNativeIndex(this->fText));
  167. }
  168. int32_t CodePointBreakIterator::next(int32_t n)
  169. {
  170. UBool res = utext_moveIndex32(this->fText, n);
  171. #ifndef UTEXT_CURRENT32
  172. #define UTEXT_CURRENT32 utext_current32
  173. #endif
  174. if (res) {
  175. this->lastCodePoint = UTEXT_CURRENT32(this->fText);
  176. return (int32_t)UTEXT_GETNATIVEINDEX(this->fText);
  177. } else {
  178. this->lastCodePoint = U_SENTINEL;
  179. return BreakIterator::DONE;
  180. }
  181. }
  182. CodePointBreakIterator *CodePointBreakIterator::createBufferClone(
  183. void *stackBuffer, int32_t &bufferSize, UErrorCode &status)
  184. {
  185. //see implementation of RuleBasedBreakIterator::createBufferClone()
  186. if (U_FAILURE(status)) {
  187. return NULL;
  188. }
  189. if (bufferSize <= 0) {
  190. bufferSize = sizeof(CodePointBreakIterator) + U_ALIGNMENT_OFFSET_UP(0);
  191. return NULL;
  192. }
  193. char *buf = (char*)stackBuffer;
  194. uint32_t s = bufferSize;
  195. if (stackBuffer == NULL) {
  196. s = 0;
  197. }
  198. if (U_ALIGNMENT_OFFSET(stackBuffer) != 0) {
  199. uint32_t offsetUp = (uint32_t)U_ALIGNMENT_OFFSET_UP(buf);
  200. s -= offsetUp;
  201. buf += offsetUp;
  202. }
  203. if (s < sizeof(CodePointBreakIterator)) {
  204. CodePointBreakIterator *clonedBI = new CodePointBreakIterator(*this);
  205. if (clonedBI == NULL) {
  206. status = U_MEMORY_ALLOCATION_ERROR;
  207. } else {
  208. status = U_SAFECLONE_ALLOCATED_WARNING;
  209. }
  210. return clonedBI;
  211. }
  212. return new(buf) CodePointBreakIterator(*this);
  213. }
  214. CodePointBreakIterator &CodePointBreakIterator::refreshInputText(UText *input, UErrorCode &status)
  215. {
  216. //see implementation of RuleBasedBreakIterator::createBufferClone()
  217. if (U_FAILURE(status)) {
  218. return *this;
  219. }
  220. if (input == NULL) {
  221. status = U_ILLEGAL_ARGUMENT_ERROR;
  222. return *this;
  223. }
  224. int64_t pos = utext_getNativeIndex(this->fText);
  225. this->fText = utext_clone(this->fText, input, FALSE, TRUE, &status);
  226. if (U_FAILURE(status)) {
  227. return *this;
  228. }
  229. utext_setNativeIndex(this->fText, pos);
  230. if (utext_getNativeIndex(fText) != pos) {
  231. status = U_ILLEGAL_ARGUMENT_ERROR;
  232. }
  233. return *this;
  234. }