codepointiterator_internal.cpp 7.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291
  1. /*
  2. +----------------------------------------------------------------------+
  3. | PHP Version 5 |
  4. +----------------------------------------------------------------------+
  5. | This source file is subject to version 3.01 of the PHP license, |
  6. | that is bundled with this package in the file LICENSE, and is |
  7. | available through the world-wide-web at the following url: |
  8. | http://www.php.net/license/3_01.txt |
  9. | If you did not receive a copy of the PHP license and are unable to |
  10. | obtain it through the world-wide-web, please send a note to |
  11. | license@php.net so we can mail you a copy immediately. |
  12. +----------------------------------------------------------------------+
  13. | Authors: Gustavo Lopes <cataphract@php.net> |
  14. +----------------------------------------------------------------------+
  15. */
  16. #include "codepointiterator_internal.h"
  17. #include <unicode/uchriter.h>
  18. #include <typeinfo>
  19. //copied from cmemory.h, which is not public
  20. typedef union {
  21. long t1;
  22. double t2;
  23. void *t3;
  24. } UAlignedMemory;
  25. #define U_POINTER_MASK_LSB(ptr, mask) (((ptrdiff_t)(char *)(ptr)) & (mask))
  26. #define U_ALIGNMENT_OFFSET(ptr) U_POINTER_MASK_LSB(ptr, sizeof(UAlignedMemory) - 1)
  27. #define U_ALIGNMENT_OFFSET_UP(ptr) (sizeof(UAlignedMemory) - U_ALIGNMENT_OFFSET(ptr))
  28. using namespace PHP;
  29. UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CodePointBreakIterator);
  30. CodePointBreakIterator::CodePointBreakIterator()
  31. : BreakIterator(), fCharIter(NULL), lastCodePoint(U_SENTINEL)
  32. {
  33. UErrorCode uec = UErrorCode();
  34. this->fText = utext_openUChars(NULL, NULL, 0, &uec);
  35. }
  36. CodePointBreakIterator::CodePointBreakIterator(const PHP::CodePointBreakIterator &other)
  37. : BreakIterator(other), fText(NULL), fCharIter(NULL), lastCodePoint(U_SENTINEL)
  38. {
  39. *this = other;
  40. }
  41. CodePointBreakIterator& CodePointBreakIterator::operator=(const CodePointBreakIterator& that)
  42. {
  43. UErrorCode uec = UErrorCode();
  44. UText *ut_clone = NULL;
  45. if (this == &that) {
  46. return *this;
  47. }
  48. this->fText = utext_clone(this->fText, that.fText, FALSE, TRUE, &uec);
  49. //don't bother copying the character iterator, getText() is deprecated
  50. clearCurrentCharIter();
  51. this->lastCodePoint = that.lastCodePoint;
  52. return *this;
  53. }
  54. CodePointBreakIterator::~CodePointBreakIterator()
  55. {
  56. if (this->fText) {
  57. utext_close(this->fText);
  58. }
  59. clearCurrentCharIter();
  60. }
  61. UBool CodePointBreakIterator::operator==(const BreakIterator& that) const
  62. {
  63. if (typeid(*this) != typeid(that)) {
  64. return FALSE;
  65. }
  66. const CodePointBreakIterator& that2 =
  67. static_cast<const CodePointBreakIterator&>(that);
  68. if (!utext_equals(this->fText, that2.fText)) {
  69. return FALSE;
  70. }
  71. return TRUE;
  72. }
  73. CodePointBreakIterator* CodePointBreakIterator::clone(void) const
  74. {
  75. return new CodePointBreakIterator(*this);
  76. }
  77. CharacterIterator& CodePointBreakIterator::getText(void) const
  78. {
  79. if (this->fCharIter == NULL) {
  80. //this method is deprecated anyway; setup bogus iterator
  81. static const UChar c = 0;
  82. this->fCharIter = new UCharCharacterIterator(&c, 0);
  83. }
  84. return *this->fCharIter;
  85. }
  86. UText *CodePointBreakIterator::getUText(UText *fillIn, UErrorCode &status) const
  87. {
  88. return utext_clone(fillIn, this->fText, FALSE, TRUE, &status);
  89. }
  90. void CodePointBreakIterator::setText(const UnicodeString &text)
  91. {
  92. UErrorCode uec = UErrorCode();
  93. //this closes the previous utext, if any
  94. this->fText = utext_openConstUnicodeString(this->fText, &text, &uec);
  95. clearCurrentCharIter();
  96. }
  97. void CodePointBreakIterator::setText(UText *text, UErrorCode &status)
  98. {
  99. if (U_FAILURE(status)) {
  100. return;
  101. }
  102. this->fText = utext_clone(this->fText, text, FALSE, TRUE, &status);
  103. clearCurrentCharIter();
  104. }
  105. void CodePointBreakIterator::adoptText(CharacterIterator* it)
  106. {
  107. UErrorCode uec = UErrorCode();
  108. clearCurrentCharIter();
  109. this->fCharIter = it;
  110. this->fText = utext_openCharacterIterator(this->fText, it, &uec);
  111. }
  112. int32_t CodePointBreakIterator::first(void)
  113. {
  114. UTEXT_SETNATIVEINDEX(this->fText, 0);
  115. this->lastCodePoint = U_SENTINEL;
  116. return 0;
  117. }
  118. int32_t CodePointBreakIterator::last(void)
  119. {
  120. int32_t pos = (int32_t)utext_nativeLength(this->fText);
  121. UTEXT_SETNATIVEINDEX(this->fText, pos);
  122. this->lastCodePoint = U_SENTINEL;
  123. return pos;
  124. }
  125. int32_t CodePointBreakIterator::previous(void)
  126. {
  127. this->lastCodePoint = UTEXT_PREVIOUS32(this->fText);
  128. if (this->lastCodePoint == U_SENTINEL) {
  129. return BreakIterator::DONE;
  130. }
  131. return (int32_t)UTEXT_GETNATIVEINDEX(this->fText);
  132. }
  133. int32_t CodePointBreakIterator::next(void)
  134. {
  135. this->lastCodePoint = UTEXT_NEXT32(this->fText);
  136. if (this->lastCodePoint == U_SENTINEL) {
  137. return BreakIterator::DONE;
  138. }
  139. return (int32_t)UTEXT_GETNATIVEINDEX(this->fText);
  140. }
  141. int32_t CodePointBreakIterator::current(void) const
  142. {
  143. return (int32_t)UTEXT_GETNATIVEINDEX(this->fText);
  144. }
  145. int32_t CodePointBreakIterator::following(int32_t offset)
  146. {
  147. this->lastCodePoint = utext_next32From(this->fText, offset);
  148. if (this->lastCodePoint == U_SENTINEL) {
  149. return BreakIterator::DONE;
  150. }
  151. return (int32_t)UTEXT_GETNATIVEINDEX(this->fText);
  152. }
  153. int32_t CodePointBreakIterator::preceding(int32_t offset)
  154. {
  155. this->lastCodePoint = utext_previous32From(this->fText, offset);
  156. if (this->lastCodePoint == U_SENTINEL) {
  157. return BreakIterator::DONE;
  158. }
  159. return (int32_t)UTEXT_GETNATIVEINDEX(this->fText);
  160. }
  161. UBool CodePointBreakIterator::isBoundary(int32_t offset)
  162. {
  163. //this function has side effects, and it's supposed to
  164. utext_setNativeIndex(this->fText, offset);
  165. return (offset == utext_getNativeIndex(this->fText));
  166. }
  167. int32_t CodePointBreakIterator::next(int32_t n)
  168. {
  169. UBool res = utext_moveIndex32(this->fText, n);
  170. #ifndef UTEXT_CURRENT32
  171. #define UTEXT_CURRENT32 utext_current32
  172. #endif
  173. if (res) {
  174. this->lastCodePoint = UTEXT_CURRENT32(this->fText);
  175. return (int32_t)UTEXT_GETNATIVEINDEX(this->fText);
  176. } else {
  177. this->lastCodePoint = U_SENTINEL;
  178. return BreakIterator::DONE;
  179. }
  180. }
  181. CodePointBreakIterator *CodePointBreakIterator::createBufferClone(
  182. void *stackBuffer, int32_t &bufferSize, UErrorCode &status)
  183. {
  184. //see implementation of RuleBasedBreakIterator::createBufferClone()
  185. if (U_FAILURE(status)) {
  186. return NULL;
  187. }
  188. if (bufferSize <= 0) {
  189. bufferSize = sizeof(CodePointBreakIterator) + U_ALIGNMENT_OFFSET_UP(0);
  190. return NULL;
  191. }
  192. char *buf = (char*)stackBuffer;
  193. uint32_t s = bufferSize;
  194. if (stackBuffer == NULL) {
  195. s = 0;
  196. }
  197. if (U_ALIGNMENT_OFFSET(stackBuffer) != 0) {
  198. uint32_t offsetUp = (uint32_t)U_ALIGNMENT_OFFSET_UP(buf);
  199. s -= offsetUp;
  200. buf += offsetUp;
  201. }
  202. if (s < sizeof(CodePointBreakIterator)) {
  203. CodePointBreakIterator *clonedBI = new CodePointBreakIterator(*this);
  204. if (clonedBI == NULL) {
  205. status = U_MEMORY_ALLOCATION_ERROR;
  206. } else {
  207. status = U_SAFECLONE_ALLOCATED_WARNING;
  208. }
  209. return clonedBI;
  210. }
  211. return new(buf) CodePointBreakIterator(*this);
  212. }
  213. CodePointBreakIterator &CodePointBreakIterator::refreshInputText(UText *input, UErrorCode &status)
  214. {
  215. //see implementation of RuleBasedBreakIterator::createBufferClone()
  216. if (U_FAILURE(status)) {
  217. return *this;
  218. }
  219. if (input == NULL) {
  220. status = U_ILLEGAL_ARGUMENT_ERROR;
  221. return *this;
  222. }
  223. int64_t pos = utext_getNativeIndex(this->fText);
  224. this->fText = utext_clone(this->fText, input, FALSE, TRUE, &status);
  225. if (U_FAILURE(status)) {
  226. return *this;
  227. }
  228. utext_setNativeIndex(this->fText, pos);
  229. if (utext_getNativeIndex(fText) != pos) {
  230. status = U_ILLEGAL_ARGUMENT_ERROR;
  231. }
  232. return *this;
  233. }