unistr.h 170 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603360436053606360736083609361036113612361336143615361636173618361936203621362236233624362536263627362836293630363136323633363436353636363736383639364036413642364336443645364636473648364936503651365236533654365536563657365836593660366136623663366436653666366736683669367036713672367336743675367636773678367936803681368236833684368536863687368836893690369136923693369436953696369736983699370037013702370337043705370637073708370937103711371237133714371537163717371837193720372137223723372437253726372737283729373037313732373337343735373637373738373937403741374237433744374537463747374837493750375137523753375437553756375737583759376037613762376337643765376637673768376937703771377237733774377537763777377837793780378137823783378437853786378737883789379037913792379337943795379637973798379938003801380238033804380538063807380838093810381138123813381438153816381738183819382038213822382338243825382638273828382938303831383238333834383538363837383838393840384138423843384438453846384738483849385038513852385338543855385638573858385938603861386238633864386538663867386838693870387138723873387438753876387738783879388038813882388338843885388638873888388938903891389238933894389538963897389838993900390139023903390439053906390739083909391039113912391339143915391639173918391939203921392239233924392539263927392839293930393139323933393439353936393739383939394039413942394339443945394639473948394939503951395239533954395539563957395839593960396139623963396439653966396739683969397039713972397339743975397639773978397939803981398239833984398539863987398839893990399139923993399439953996399739983999400040014002400340044005400640074008400940104011401240134014401540164017401840194020402140224023402440254026402740284029403040314032403340344035403640374038403940404041404240434044404540464047404840494050405140524053405440554056405740584059406040614062406340644065406640674068406940704071407240734074407540764077407840794080408140824083408440854086408740884089409040914092409340944095409640974098409941004101410241034104410541064107410841094110411141124113411441154116411741184119412041214122412341244125412641274128412941304131413241334134413541364137413841394140414141424143414441454146414741484149415041514152415341544155415641574158415941604161416241634164416541664167416841694170417141724173417441754176417741784179418041814182418341844185418641874188418941904191419241934194419541964197419841994200420142024203420442054206420742084209421042114212421342144215421642174218421942204221422242234224422542264227422842294230423142324233423442354236423742384239424042414242424342444245424642474248424942504251425242534254425542564257425842594260426142624263426442654266426742684269427042714272427342744275427642774278427942804281428242834284428542864287428842894290429142924293429442954296429742984299430043014302430343044305430643074308430943104311431243134314431543164317431843194320432143224323432443254326432743284329433043314332433343344335433643374338433943404341434243434344434543464347434843494350435143524353435443554356435743584359436043614362436343644365436643674368436943704371437243734374437543764377437843794380438143824383438443854386438743884389439043914392439343944395439643974398439944004401440244034404440544064407440844094410441144124413441444154416441744184419442044214422442344244425442644274428442944304431443244334434443544364437443844394440444144424443444444454446444744484449445044514452445344544455445644574458445944604461446244634464446544664467446844694470447144724473447444754476447744784479448044814482448344844485448644874488448944904491449244934494449544964497449844994500450145024503450445054506450745084509451045114512451345144515451645174518451945204521452245234524452545264527452845294530453145324533453445354536453745384539454045414542454345444545454645474548454945504551455245534554455545564557455845594560456145624563456445654566456745684569457045714572457345744575457645774578457945804581458245834584458545864587458845894590459145924593459445954596459745984599460046014602460346044605460646074608460946104611461246134614461546164617461846194620462146224623462446254626462746284629463046314632463346344635463646374638463946404641464246434644464546464647464846494650465146524653465446554656465746584659
  1. /*
  2. **********************************************************************
  3. * Copyright (C) 1998-2016, International Business Machines
  4. * Corporation and others. All Rights Reserved.
  5. **********************************************************************
  6. *
  7. * File unistr.h
  8. *
  9. * Modification History:
  10. *
  11. * Date Name Description
  12. * 09/25/98 stephen Creation.
  13. * 11/11/98 stephen Changed per 11/9 code review.
  14. * 04/20/99 stephen Overhauled per 4/16 code review.
  15. * 11/18/99 aliu Made to inherit from Replaceable. Added method
  16. * handleReplaceBetween(); other methods unchanged.
  17. * 06/25/01 grhoten Remove dependency on iostream.
  18. ******************************************************************************
  19. */
  20. #ifndef UNISTR_H
  21. #define UNISTR_H
  22. /**
  23. * \file
  24. * \brief C++ API: Unicode String
  25. */
  26. #include "unicode/utypes.h"
  27. #include "unicode/rep.h"
  28. #include "unicode/std_string.h"
  29. #include "unicode/stringpiece.h"
  30. #include "unicode/bytestream.h"
  31. #include "unicode/ucasemap.h"
  32. struct UConverter; // unicode/ucnv.h
  33. #ifndef U_COMPARE_CODE_POINT_ORDER
  34. /* see also ustring.h and unorm.h */
  35. /**
  36. * Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc:
  37. * Compare strings in code point order instead of code unit order.
  38. * @stable ICU 2.2
  39. */
  40. #define U_COMPARE_CODE_POINT_ORDER 0x8000
  41. #endif
  42. #ifndef USTRING_H
  43. /**
  44. * \ingroup ustring_ustrlen
  45. */
  46. U_STABLE int32_t U_EXPORT2
  47. u_strlen(const UChar *s);
  48. #endif
  49. /**
  50. * \def U_STRING_CASE_MAPPER_DEFINED
  51. * @internal
  52. */
  53. #ifndef U_STRING_CASE_MAPPER_DEFINED
  54. #define U_STRING_CASE_MAPPER_DEFINED
  55. /**
  56. * Internal string case mapping function type.
  57. * @internal
  58. */
  59. typedef int32_t U_CALLCONV
  60. UStringCaseMapper(const UCaseMap *csm,
  61. UChar *dest, int32_t destCapacity,
  62. const UChar *src, int32_t srcLength,
  63. UErrorCode *pErrorCode);
  64. #endif
  65. U_NAMESPACE_BEGIN
  66. class BreakIterator; // unicode/brkiter.h
  67. class Locale; // unicode/locid.h
  68. class StringCharacterIterator;
  69. class UnicodeStringAppendable; // unicode/appendable.h
  70. /* The <iostream> include has been moved to unicode/ustream.h */
  71. /**
  72. * Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constructor
  73. * which constructs a Unicode string from an invariant-character char * string.
  74. * About invariant characters see utypes.h.
  75. * This constructor has no runtime dependency on conversion code and is
  76. * therefore recommended over ones taking a charset name string
  77. * (where the empty string "" indicates invariant-character conversion).
  78. *
  79. * @stable ICU 3.2
  80. */
  81. #define US_INV icu::UnicodeString::kInvariant
  82. /**
  83. * Unicode String literals in C++.
  84. * Dependent on the platform properties, different UnicodeString
  85. * constructors should be used to create a UnicodeString object from
  86. * a string literal.
  87. * The macros are defined for maximum performance.
  88. * They work only for strings that contain "invariant characters", i.e.,
  89. * only latin letters, digits, and some punctuation.
  90. * See utypes.h for details.
  91. *
  92. * The string parameter must be a C string literal.
  93. * The length of the string, not including the terminating
  94. * <code>NUL</code>, must be specified as a constant.
  95. * The U_STRING_DECL macro should be invoked exactly once for one
  96. * such string variable before it is used.
  97. * @stable ICU 2.0
  98. */
  99. #if defined(U_DECLARE_UTF16)
  100. # define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, (const UChar *)U_DECLARE_UTF16(cs), _length)
  101. #elif U_SIZEOF_WCHAR_T==U_SIZEOF_UCHAR && (U_CHARSET_FAMILY==U_ASCII_FAMILY || (U_SIZEOF_UCHAR == 2 && defined(U_WCHAR_IS_UTF16)))
  102. # define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, (const UChar *)L ## cs, _length)
  103. #elif U_SIZEOF_UCHAR==1 && U_CHARSET_FAMILY==U_ASCII_FAMILY
  104. # define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, (const UChar *)cs, _length)
  105. #else
  106. # define UNICODE_STRING(cs, _length) icu::UnicodeString(cs, _length, US_INV)
  107. #endif
  108. /**
  109. * Unicode String literals in C++.
  110. * Dependent on the platform properties, different UnicodeString
  111. * constructors should be used to create a UnicodeString object from
  112. * a string literal.
  113. * The macros are defined for improved performance.
  114. * They work only for strings that contain "invariant characters", i.e.,
  115. * only latin letters, digits, and some punctuation.
  116. * See utypes.h for details.
  117. *
  118. * The string parameter must be a C string literal.
  119. * @stable ICU 2.0
  120. */
  121. #define UNICODE_STRING_SIMPLE(cs) UNICODE_STRING(cs, -1)
  122. /**
  123. * \def UNISTR_FROM_CHAR_EXPLICIT
  124. * This can be defined to be empty or "explicit".
  125. * If explicit, then the UnicodeString(UChar) and UnicodeString(UChar32)
  126. * constructors are marked as explicit, preventing their inadvertent use.
  127. * @stable ICU 49
  128. */
  129. #ifndef UNISTR_FROM_CHAR_EXPLICIT
  130. # if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION)
  131. // Auto-"explicit" in ICU library code.
  132. # define UNISTR_FROM_CHAR_EXPLICIT explicit
  133. # else
  134. // Empty by default for source code compatibility.
  135. # define UNISTR_FROM_CHAR_EXPLICIT
  136. # endif
  137. #endif
  138. /**
  139. * \def UNISTR_FROM_STRING_EXPLICIT
  140. * This can be defined to be empty or "explicit".
  141. * If explicit, then the UnicodeString(const char *) and UnicodeString(const UChar *)
  142. * constructors are marked as explicit, preventing their inadvertent use.
  143. *
  144. * In particular, this helps prevent accidentally depending on ICU conversion code
  145. * by passing a string literal into an API with a const UnicodeString & parameter.
  146. * @stable ICU 49
  147. */
  148. #ifndef UNISTR_FROM_STRING_EXPLICIT
  149. # if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION)
  150. // Auto-"explicit" in ICU library code.
  151. # define UNISTR_FROM_STRING_EXPLICIT explicit
  152. # else
  153. // Empty by default for source code compatibility.
  154. # define UNISTR_FROM_STRING_EXPLICIT
  155. # endif
  156. #endif
  157. /* Cannot make the following #ifndef U_HIDE_DRAFT_API,
  158. it is used to construct other non-internal constants */
  159. /**
  160. * \def UNISTR_OBJECT_SIZE
  161. * Desired sizeof(UnicodeString) in bytes.
  162. * It should be a multiple of sizeof(pointer) to avoid unusable space for padding.
  163. * The object size may want to be a multiple of 16 bytes,
  164. * which is a common granularity for heap allocation.
  165. *
  166. * Any space inside the object beyond sizeof(vtable pointer) + 2
  167. * is available for storing short strings inside the object.
  168. * The bigger the object, the longer a string that can be stored inside the object,
  169. * without additional heap allocation.
  170. *
  171. * Depending on a platform's pointer size, pointer alignment requirements,
  172. * and struct padding, the compiler will usually round up sizeof(UnicodeString)
  173. * to 4 * sizeof(pointer) (or 3 * sizeof(pointer) for P128 data models),
  174. * to hold the fields for heap-allocated strings.
  175. * Such a minimum size also ensures that the object is easily large enough
  176. * to hold at least 2 UChars, for one supplementary code point (U16_MAX_LENGTH).
  177. *
  178. * sizeof(UnicodeString) >= 48 should work for all known platforms.
  179. *
  180. * For example, on a 64-bit machine where sizeof(vtable pointer) is 8,
  181. * sizeof(UnicodeString) = 64 would leave space for
  182. * (64 - sizeof(vtable pointer) - 2) / U_SIZEOF_UCHAR = (64 - 8 - 2) / 2 = 27
  183. * UChars stored inside the object.
  184. *
  185. * The minimum object size on a 64-bit machine would be
  186. * 4 * sizeof(pointer) = 4 * 8 = 32 bytes,
  187. * and the internal buffer would hold up to 11 UChars in that case.
  188. *
  189. * @see U16_MAX_LENGTH
  190. * @draft ICU 56
  191. */
  192. #ifndef UNISTR_OBJECT_SIZE
  193. # define UNISTR_OBJECT_SIZE 64
  194. #endif
  195. /**
  196. * UnicodeString is a string class that stores Unicode characters directly and provides
  197. * similar functionality as the Java String and StringBuffer/StringBuilder classes.
  198. * It is a concrete implementation of the abstract class Replaceable (for transliteration).
  199. *
  200. * A UnicodeString may also "alias" an external array of characters
  201. * (that is, point to it, rather than own the array)
  202. * whose lifetime must then at least match the lifetime of the aliasing object.
  203. * This aliasing may be preserved when returning a UnicodeString by value,
  204. * depending on the compiler and the function implementation,
  205. * via Return Value Optimization (RVO) or the move assignment operator.
  206. * (However, the copy assignment operator does not preserve aliasing.)
  207. * For details see the description of storage models at the end of the class API docs
  208. * and in the User Guide chapter linked from there.
  209. *
  210. * The UnicodeString class is not suitable for subclassing.
  211. *
  212. * <p>For an overview of Unicode strings in C and C++ see the
  213. * <a href="http://userguide.icu-project.org/strings#TOC-Strings-in-C-C-">User Guide Strings chapter</a>.</p>
  214. *
  215. * <p>In ICU, a Unicode string consists of 16-bit Unicode <em>code units</em>.
  216. * A Unicode character may be stored with either one code unit
  217. * (the most common case) or with a matched pair of special code units
  218. * ("surrogates"). The data type for code units is UChar.
  219. * For single-character handling, a Unicode character code <em>point</em> is a value
  220. * in the range 0..0x10ffff. ICU uses the UChar32 type for code points.</p>
  221. *
  222. * <p>Indexes and offsets into and lengths of strings always count code units, not code points.
  223. * This is the same as with multi-byte char* strings in traditional string handling.
  224. * Operations on partial strings typically do not test for code point boundaries.
  225. * If necessary, the user needs to take care of such boundaries by testing for the code unit
  226. * values or by using functions like
  227. * UnicodeString::getChar32Start() and UnicodeString::getChar32Limit()
  228. * (or, in C, the equivalent macros U16_SET_CP_START() and U16_SET_CP_LIMIT(), see utf.h).</p>
  229. *
  230. * UnicodeString methods are more lenient with regard to input parameter values
  231. * than other ICU APIs. In particular:
  232. * - If indexes are out of bounds for a UnicodeString object
  233. * (<0 or >length()) then they are "pinned" to the nearest boundary.
  234. * - If primitive string pointer values (e.g., const UChar * or char *)
  235. * for input strings are NULL, then those input string parameters are treated
  236. * as if they pointed to an empty string.
  237. * However, this is <em>not</em> the case for char * parameters for charset names
  238. * or other IDs.
  239. * - Most UnicodeString methods do not take a UErrorCode parameter because
  240. * there are usually very few opportunities for failure other than a shortage
  241. * of memory, error codes in low-level C++ string methods would be inconvenient,
  242. * and the error code as the last parameter (ICU convention) would prevent
  243. * the use of default parameter values.
  244. * Instead, such methods set the UnicodeString into a "bogus" state
  245. * (see isBogus()) if an error occurs.
  246. *
  247. * In string comparisons, two UnicodeString objects that are both "bogus"
  248. * compare equal (to be transitive and prevent endless loops in sorting),
  249. * and a "bogus" string compares less than any non-"bogus" one.
  250. *
  251. * Const UnicodeString methods are thread-safe. Multiple threads can use
  252. * const methods on the same UnicodeString object simultaneously,
  253. * but non-const methods must not be called concurrently (in multiple threads)
  254. * with any other (const or non-const) methods.
  255. *
  256. * Similarly, const UnicodeString & parameters are thread-safe.
  257. * One object may be passed in as such a parameter concurrently in multiple threads.
  258. * This includes the const UnicodeString & parameters for
  259. * copy construction, assignment, and cloning.
  260. *
  261. * <p>UnicodeString uses several storage methods.
  262. * String contents can be stored inside the UnicodeString object itself,
  263. * in an allocated and shared buffer, or in an outside buffer that is "aliased".
  264. * Most of this is done transparently, but careful aliasing in particular provides
  265. * significant performance improvements.
  266. * Also, the internal buffer is accessible via special functions.
  267. * For details see the
  268. * <a href="http://userguide.icu-project.org/strings#TOC-Maximizing-Performance-with-the-UnicodeString-Storage-Model">User Guide Strings chapter</a>.</p>
  269. *
  270. * @see utf.h
  271. * @see CharacterIterator
  272. * @stable ICU 2.0
  273. */
  274. class U_COMMON_API UnicodeString : public Replaceable
  275. {
  276. public:
  277. /**
  278. * Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constructor
  279. * which constructs a Unicode string from an invariant-character char * string.
  280. * Use the macro US_INV instead of the full qualification for this value.
  281. *
  282. * @see US_INV
  283. * @stable ICU 3.2
  284. */
  285. enum EInvariant {
  286. /**
  287. * @see EInvariant
  288. * @stable ICU 3.2
  289. */
  290. kInvariant
  291. };
  292. //========================================
  293. // Read-only operations
  294. //========================================
  295. /* Comparison - bitwise only - for international comparison use collation */
  296. /**
  297. * Equality operator. Performs only bitwise comparison.
  298. * @param text The UnicodeString to compare to this one.
  299. * @return TRUE if <TT>text</TT> contains the same characters as this one,
  300. * FALSE otherwise.
  301. * @stable ICU 2.0
  302. */
  303. inline UBool operator== (const UnicodeString& text) const;
  304. /**
  305. * Inequality operator. Performs only bitwise comparison.
  306. * @param text The UnicodeString to compare to this one.
  307. * @return FALSE if <TT>text</TT> contains the same characters as this one,
  308. * TRUE otherwise.
  309. * @stable ICU 2.0
  310. */
  311. inline UBool operator!= (const UnicodeString& text) const;
  312. /**
  313. * Greater than operator. Performs only bitwise comparison.
  314. * @param text The UnicodeString to compare to this one.
  315. * @return TRUE if the characters in this are bitwise
  316. * greater than the characters in <code>text</code>, FALSE otherwise
  317. * @stable ICU 2.0
  318. */
  319. inline UBool operator> (const UnicodeString& text) const;
  320. /**
  321. * Less than operator. Performs only bitwise comparison.
  322. * @param text The UnicodeString to compare to this one.
  323. * @return TRUE if the characters in this are bitwise
  324. * less than the characters in <code>text</code>, FALSE otherwise
  325. * @stable ICU 2.0
  326. */
  327. inline UBool operator< (const UnicodeString& text) const;
  328. /**
  329. * Greater than or equal operator. Performs only bitwise comparison.
  330. * @param text The UnicodeString to compare to this one.
  331. * @return TRUE if the characters in this are bitwise
  332. * greater than or equal to the characters in <code>text</code>, FALSE otherwise
  333. * @stable ICU 2.0
  334. */
  335. inline UBool operator>= (const UnicodeString& text) const;
  336. /**
  337. * Less than or equal operator. Performs only bitwise comparison.
  338. * @param text The UnicodeString to compare to this one.
  339. * @return TRUE if the characters in this are bitwise
  340. * less than or equal to the characters in <code>text</code>, FALSE otherwise
  341. * @stable ICU 2.0
  342. */
  343. inline UBool operator<= (const UnicodeString& text) const;
  344. /**
  345. * Compare the characters bitwise in this UnicodeString to
  346. * the characters in <code>text</code>.
  347. * @param text The UnicodeString to compare to this one.
  348. * @return The result of bitwise character comparison: 0 if this
  349. * contains the same characters as <code>text</code>, -1 if the characters in
  350. * this are bitwise less than the characters in <code>text</code>, +1 if the
  351. * characters in this are bitwise greater than the characters
  352. * in <code>text</code>.
  353. * @stable ICU 2.0
  354. */
  355. inline int8_t compare(const UnicodeString& text) const;
  356. /**
  357. * Compare the characters bitwise in the range
  358. * [<TT>start</TT>, <TT>start + length</TT>) with the characters
  359. * in the <b>entire string</b> <TT>text</TT>.
  360. * (The parameters "start" and "length" are not applied to the other text "text".)
  361. * @param start the offset at which the compare operation begins
  362. * @param length the number of characters of text to compare.
  363. * @param text the other text to be compared against this string.
  364. * @return The result of bitwise character comparison: 0 if this
  365. * contains the same characters as <code>text</code>, -1 if the characters in
  366. * this are bitwise less than the characters in <code>text</code>, +1 if the
  367. * characters in this are bitwise greater than the characters
  368. * in <code>text</code>.
  369. * @stable ICU 2.0
  370. */
  371. inline int8_t compare(int32_t start,
  372. int32_t length,
  373. const UnicodeString& text) const;
  374. /**
  375. * Compare the characters bitwise in the range
  376. * [<TT>start</TT>, <TT>start + length</TT>) with the characters
  377. * in <TT>srcText</TT> in the range
  378. * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
  379. * @param start the offset at which the compare operation begins
  380. * @param length the number of characters in this to compare.
  381. * @param srcText the text to be compared
  382. * @param srcStart the offset into <TT>srcText</TT> to start comparison
  383. * @param srcLength the number of characters in <TT>src</TT> to compare
  384. * @return The result of bitwise character comparison: 0 if this
  385. * contains the same characters as <code>srcText</code>, -1 if the characters in
  386. * this are bitwise less than the characters in <code>srcText</code>, +1 if the
  387. * characters in this are bitwise greater than the characters
  388. * in <code>srcText</code>.
  389. * @stable ICU 2.0
  390. */
  391. inline int8_t compare(int32_t start,
  392. int32_t length,
  393. const UnicodeString& srcText,
  394. int32_t srcStart,
  395. int32_t srcLength) const;
  396. /**
  397. * Compare the characters bitwise in this UnicodeString with the first
  398. * <TT>srcLength</TT> characters in <TT>srcChars</TT>.
  399. * @param srcChars The characters to compare to this UnicodeString.
  400. * @param srcLength the number of characters in <TT>srcChars</TT> to compare
  401. * @return The result of bitwise character comparison: 0 if this
  402. * contains the same characters as <code>srcChars</code>, -1 if the characters in
  403. * this are bitwise less than the characters in <code>srcChars</code>, +1 if the
  404. * characters in this are bitwise greater than the characters
  405. * in <code>srcChars</code>.
  406. * @stable ICU 2.0
  407. */
  408. inline int8_t compare(const UChar *srcChars,
  409. int32_t srcLength) const;
  410. /**
  411. * Compare the characters bitwise in the range
  412. * [<TT>start</TT>, <TT>start + length</TT>) with the first
  413. * <TT>length</TT> characters in <TT>srcChars</TT>
  414. * @param start the offset at which the compare operation begins
  415. * @param length the number of characters to compare.
  416. * @param srcChars the characters to be compared
  417. * @return The result of bitwise character comparison: 0 if this
  418. * contains the same characters as <code>srcChars</code>, -1 if the characters in
  419. * this are bitwise less than the characters in <code>srcChars</code>, +1 if the
  420. * characters in this are bitwise greater than the characters
  421. * in <code>srcChars</code>.
  422. * @stable ICU 2.0
  423. */
  424. inline int8_t compare(int32_t start,
  425. int32_t length,
  426. const UChar *srcChars) const;
  427. /**
  428. * Compare the characters bitwise in the range
  429. * [<TT>start</TT>, <TT>start + length</TT>) with the characters
  430. * in <TT>srcChars</TT> in the range
  431. * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
  432. * @param start the offset at which the compare operation begins
  433. * @param length the number of characters in this to compare
  434. * @param srcChars the characters to be compared
  435. * @param srcStart the offset into <TT>srcChars</TT> to start comparison
  436. * @param srcLength the number of characters in <TT>srcChars</TT> to compare
  437. * @return The result of bitwise character comparison: 0 if this
  438. * contains the same characters as <code>srcChars</code>, -1 if the characters in
  439. * this are bitwise less than the characters in <code>srcChars</code>, +1 if the
  440. * characters in this are bitwise greater than the characters
  441. * in <code>srcChars</code>.
  442. * @stable ICU 2.0
  443. */
  444. inline int8_t compare(int32_t start,
  445. int32_t length,
  446. const UChar *srcChars,
  447. int32_t srcStart,
  448. int32_t srcLength) const;
  449. /**
  450. * Compare the characters bitwise in the range
  451. * [<TT>start</TT>, <TT>limit</TT>) with the characters
  452. * in <TT>srcText</TT> in the range
  453. * [<TT>srcStart</TT>, <TT>srcLimit</TT>).
  454. * @param start the offset at which the compare operation begins
  455. * @param limit the offset immediately following the compare operation
  456. * @param srcText the text to be compared
  457. * @param srcStart the offset into <TT>srcText</TT> to start comparison
  458. * @param srcLimit the offset into <TT>srcText</TT> to limit comparison
  459. * @return The result of bitwise character comparison: 0 if this
  460. * contains the same characters as <code>srcText</code>, -1 if the characters in
  461. * this are bitwise less than the characters in <code>srcText</code>, +1 if the
  462. * characters in this are bitwise greater than the characters
  463. * in <code>srcText</code>.
  464. * @stable ICU 2.0
  465. */
  466. inline int8_t compareBetween(int32_t start,
  467. int32_t limit,
  468. const UnicodeString& srcText,
  469. int32_t srcStart,
  470. int32_t srcLimit) const;
  471. /**
  472. * Compare two Unicode strings in code point order.
  473. * The result may be different from the results of compare(), operator<, etc.
  474. * if supplementary characters are present:
  475. *
  476. * In UTF-16, supplementary characters (with code points U+10000 and above) are
  477. * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
  478. * which means that they compare as less than some other BMP characters like U+feff.
  479. * This function compares Unicode strings in code point order.
  480. * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
  481. *
  482. * @param text Another string to compare this one to.
  483. * @return a negative/zero/positive integer corresponding to whether
  484. * this string is less than/equal to/greater than the second one
  485. * in code point order
  486. * @stable ICU 2.0
  487. */
  488. inline int8_t compareCodePointOrder(const UnicodeString& text) const;
  489. /**
  490. * Compare two Unicode strings in code point order.
  491. * The result may be different from the results of compare(), operator<, etc.
  492. * if supplementary characters are present:
  493. *
  494. * In UTF-16, supplementary characters (with code points U+10000 and above) are
  495. * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
  496. * which means that they compare as less than some other BMP characters like U+feff.
  497. * This function compares Unicode strings in code point order.
  498. * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
  499. *
  500. * @param start The start offset in this string at which the compare operation begins.
  501. * @param length The number of code units from this string to compare.
  502. * @param srcText Another string to compare this one to.
  503. * @return a negative/zero/positive integer corresponding to whether
  504. * this string is less than/equal to/greater than the second one
  505. * in code point order
  506. * @stable ICU 2.0
  507. */
  508. inline int8_t compareCodePointOrder(int32_t start,
  509. int32_t length,
  510. const UnicodeString& srcText) const;
  511. /**
  512. * Compare two Unicode strings in code point order.
  513. * The result may be different from the results of compare(), operator<, etc.
  514. * if supplementary characters are present:
  515. *
  516. * In UTF-16, supplementary characters (with code points U+10000 and above) are
  517. * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
  518. * which means that they compare as less than some other BMP characters like U+feff.
  519. * This function compares Unicode strings in code point order.
  520. * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
  521. *
  522. * @param start The start offset in this string at which the compare operation begins.
  523. * @param length The number of code units from this string to compare.
  524. * @param srcText Another string to compare this one to.
  525. * @param srcStart The start offset in that string at which the compare operation begins.
  526. * @param srcLength The number of code units from that string to compare.
  527. * @return a negative/zero/positive integer corresponding to whether
  528. * this string is less than/equal to/greater than the second one
  529. * in code point order
  530. * @stable ICU 2.0
  531. */
  532. inline int8_t compareCodePointOrder(int32_t start,
  533. int32_t length,
  534. const UnicodeString& srcText,
  535. int32_t srcStart,
  536. int32_t srcLength) const;
  537. /**
  538. * Compare two Unicode strings in code point order.
  539. * The result may be different from the results of compare(), operator<, etc.
  540. * if supplementary characters are present:
  541. *
  542. * In UTF-16, supplementary characters (with code points U+10000 and above) are
  543. * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
  544. * which means that they compare as less than some other BMP characters like U+feff.
  545. * This function compares Unicode strings in code point order.
  546. * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
  547. *
  548. * @param srcChars A pointer to another string to compare this one to.
  549. * @param srcLength The number of code units from that string to compare.
  550. * @return a negative/zero/positive integer corresponding to whether
  551. * this string is less than/equal to/greater than the second one
  552. * in code point order
  553. * @stable ICU 2.0
  554. */
  555. inline int8_t compareCodePointOrder(const UChar *srcChars,
  556. int32_t srcLength) const;
  557. /**
  558. * Compare two Unicode strings in code point order.
  559. * The result may be different from the results of compare(), operator<, etc.
  560. * if supplementary characters are present:
  561. *
  562. * In UTF-16, supplementary characters (with code points U+10000 and above) are
  563. * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
  564. * which means that they compare as less than some other BMP characters like U+feff.
  565. * This function compares Unicode strings in code point order.
  566. * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
  567. *
  568. * @param start The start offset in this string at which the compare operation begins.
  569. * @param length The number of code units from this string to compare.
  570. * @param srcChars A pointer to another string to compare this one to.
  571. * @return a negative/zero/positive integer corresponding to whether
  572. * this string is less than/equal to/greater than the second one
  573. * in code point order
  574. * @stable ICU 2.0
  575. */
  576. inline int8_t compareCodePointOrder(int32_t start,
  577. int32_t length,
  578. const UChar *srcChars) const;
  579. /**
  580. * Compare two Unicode strings in code point order.
  581. * The result may be different from the results of compare(), operator<, etc.
  582. * if supplementary characters are present:
  583. *
  584. * In UTF-16, supplementary characters (with code points U+10000 and above) are
  585. * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
  586. * which means that they compare as less than some other BMP characters like U+feff.
  587. * This function compares Unicode strings in code point order.
  588. * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
  589. *
  590. * @param start The start offset in this string at which the compare operation begins.
  591. * @param length The number of code units from this string to compare.
  592. * @param srcChars A pointer to another string to compare this one to.
  593. * @param srcStart The start offset in that string at which the compare operation begins.
  594. * @param srcLength The number of code units from that string to compare.
  595. * @return a negative/zero/positive integer corresponding to whether
  596. * this string is less than/equal to/greater than the second one
  597. * in code point order
  598. * @stable ICU 2.0
  599. */
  600. inline int8_t compareCodePointOrder(int32_t start,
  601. int32_t length,
  602. const UChar *srcChars,
  603. int32_t srcStart,
  604. int32_t srcLength) const;
  605. /**
  606. * Compare two Unicode strings in code point order.
  607. * The result may be different from the results of compare(), operator<, etc.
  608. * if supplementary characters are present:
  609. *
  610. * In UTF-16, supplementary characters (with code points U+10000 and above) are
  611. * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
  612. * which means that they compare as less than some other BMP characters like U+feff.
  613. * This function compares Unicode strings in code point order.
  614. * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
  615. *
  616. * @param start The start offset in this string at which the compare operation begins.
  617. * @param limit The offset after the last code unit from this string to compare.
  618. * @param srcText Another string to compare this one to.
  619. * @param srcStart The start offset in that string at which the compare operation begins.
  620. * @param srcLimit The offset after the last code unit from that string to compare.
  621. * @return a negative/zero/positive integer corresponding to whether
  622. * this string is less than/equal to/greater than the second one
  623. * in code point order
  624. * @stable ICU 2.0
  625. */
  626. inline int8_t compareCodePointOrderBetween(int32_t start,
  627. int32_t limit,
  628. const UnicodeString& srcText,
  629. int32_t srcStart,
  630. int32_t srcLimit) const;
  631. /**
  632. * Compare two strings case-insensitively using full case folding.
  633. * This is equivalent to this->foldCase(options).compare(text.foldCase(options)).
  634. *
  635. * @param text Another string to compare this one to.
  636. * @param options A bit set of options:
  637. * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
  638. * Comparison in code unit order with default case folding.
  639. *
  640. * - U_COMPARE_CODE_POINT_ORDER
  641. * Set to choose code point order instead of code unit order
  642. * (see u_strCompare for details).
  643. *
  644. * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
  645. *
  646. * @return A negative, zero, or positive integer indicating the comparison result.
  647. * @stable ICU 2.0
  648. */
  649. inline int8_t caseCompare(const UnicodeString& text, uint32_t options) const;
  650. /**
  651. * Compare two strings case-insensitively using full case folding.
  652. * This is equivalent to this->foldCase(options).compare(srcText.foldCase(options)).
  653. *
  654. * @param start The start offset in this string at which the compare operation begins.
  655. * @param length The number of code units from this string to compare.
  656. * @param srcText Another string to compare this one to.
  657. * @param options A bit set of options:
  658. * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
  659. * Comparison in code unit order with default case folding.
  660. *
  661. * - U_COMPARE_CODE_POINT_ORDER
  662. * Set to choose code point order instead of code unit order
  663. * (see u_strCompare for details).
  664. *
  665. * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
  666. *
  667. * @return A negative, zero, or positive integer indicating the comparison result.
  668. * @stable ICU 2.0
  669. */
  670. inline int8_t caseCompare(int32_t start,
  671. int32_t length,
  672. const UnicodeString& srcText,
  673. uint32_t options) const;
  674. /**
  675. * Compare two strings case-insensitively using full case folding.
  676. * This is equivalent to this->foldCase(options).compare(srcText.foldCase(options)).
  677. *
  678. * @param start The start offset in this string at which the compare operation begins.
  679. * @param length The number of code units from this string to compare.
  680. * @param srcText Another string to compare this one to.
  681. * @param srcStart The start offset in that string at which the compare operation begins.
  682. * @param srcLength The number of code units from that string to compare.
  683. * @param options A bit set of options:
  684. * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
  685. * Comparison in code unit order with default case folding.
  686. *
  687. * - U_COMPARE_CODE_POINT_ORDER
  688. * Set to choose code point order instead of code unit order
  689. * (see u_strCompare for details).
  690. *
  691. * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
  692. *
  693. * @return A negative, zero, or positive integer indicating the comparison result.
  694. * @stable ICU 2.0
  695. */
  696. inline int8_t caseCompare(int32_t start,
  697. int32_t length,
  698. const UnicodeString& srcText,
  699. int32_t srcStart,
  700. int32_t srcLength,
  701. uint32_t options) const;
  702. /**
  703. * Compare two strings case-insensitively using full case folding.
  704. * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
  705. *
  706. * @param srcChars A pointer to another string to compare this one to.
  707. * @param srcLength The number of code units from that string to compare.
  708. * @param options A bit set of options:
  709. * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
  710. * Comparison in code unit order with default case folding.
  711. *
  712. * - U_COMPARE_CODE_POINT_ORDER
  713. * Set to choose code point order instead of code unit order
  714. * (see u_strCompare for details).
  715. *
  716. * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
  717. *
  718. * @return A negative, zero, or positive integer indicating the comparison result.
  719. * @stable ICU 2.0
  720. */
  721. inline int8_t caseCompare(const UChar *srcChars,
  722. int32_t srcLength,
  723. uint32_t options) const;
  724. /**
  725. * Compare two strings case-insensitively using full case folding.
  726. * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
  727. *
  728. * @param start The start offset in this string at which the compare operation begins.
  729. * @param length The number of code units from this string to compare.
  730. * @param srcChars A pointer to another string to compare this one to.
  731. * @param options A bit set of options:
  732. * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
  733. * Comparison in code unit order with default case folding.
  734. *
  735. * - U_COMPARE_CODE_POINT_ORDER
  736. * Set to choose code point order instead of code unit order
  737. * (see u_strCompare for details).
  738. *
  739. * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
  740. *
  741. * @return A negative, zero, or positive integer indicating the comparison result.
  742. * @stable ICU 2.0
  743. */
  744. inline int8_t caseCompare(int32_t start,
  745. int32_t length,
  746. const UChar *srcChars,
  747. uint32_t options) const;
  748. /**
  749. * Compare two strings case-insensitively using full case folding.
  750. * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
  751. *
  752. * @param start The start offset in this string at which the compare operation begins.
  753. * @param length The number of code units from this string to compare.
  754. * @param srcChars A pointer to another string to compare this one to.
  755. * @param srcStart The start offset in that string at which the compare operation begins.
  756. * @param srcLength The number of code units from that string to compare.
  757. * @param options A bit set of options:
  758. * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
  759. * Comparison in code unit order with default case folding.
  760. *
  761. * - U_COMPARE_CODE_POINT_ORDER
  762. * Set to choose code point order instead of code unit order
  763. * (see u_strCompare for details).
  764. *
  765. * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
  766. *
  767. * @return A negative, zero, or positive integer indicating the comparison result.
  768. * @stable ICU 2.0
  769. */
  770. inline int8_t caseCompare(int32_t start,
  771. int32_t length,
  772. const UChar *srcChars,
  773. int32_t srcStart,
  774. int32_t srcLength,
  775. uint32_t options) const;
  776. /**
  777. * Compare two strings case-insensitively using full case folding.
  778. * This is equivalent to this->foldCase(options).compareBetween(text.foldCase(options)).
  779. *
  780. * @param start The start offset in this string at which the compare operation begins.
  781. * @param limit The offset after the last code unit from this string to compare.
  782. * @param srcText Another string to compare this one to.
  783. * @param srcStart The start offset in that string at which the compare operation begins.
  784. * @param srcLimit The offset after the last code unit from that string to compare.
  785. * @param options A bit set of options:
  786. * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
  787. * Comparison in code unit order with default case folding.
  788. *
  789. * - U_COMPARE_CODE_POINT_ORDER
  790. * Set to choose code point order instead of code unit order
  791. * (see u_strCompare for details).
  792. *
  793. * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
  794. *
  795. * @return A negative, zero, or positive integer indicating the comparison result.
  796. * @stable ICU 2.0
  797. */
  798. inline int8_t caseCompareBetween(int32_t start,
  799. int32_t limit,
  800. const UnicodeString& srcText,
  801. int32_t srcStart,
  802. int32_t srcLimit,
  803. uint32_t options) const;
  804. /**
  805. * Determine if this starts with the characters in <TT>text</TT>
  806. * @param text The text to match.
  807. * @return TRUE if this starts with the characters in <TT>text</TT>,
  808. * FALSE otherwise
  809. * @stable ICU 2.0
  810. */
  811. inline UBool startsWith(const UnicodeString& text) const;
  812. /**
  813. * Determine if this starts with the characters in <TT>srcText</TT>
  814. * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
  815. * @param srcText The text to match.
  816. * @param srcStart the offset into <TT>srcText</TT> to start matching
  817. * @param srcLength the number of characters in <TT>srcText</TT> to match
  818. * @return TRUE if this starts with the characters in <TT>text</TT>,
  819. * FALSE otherwise
  820. * @stable ICU 2.0
  821. */
  822. inline UBool startsWith(const UnicodeString& srcText,
  823. int32_t srcStart,
  824. int32_t srcLength) const;
  825. /**
  826. * Determine if this starts with the characters in <TT>srcChars</TT>
  827. * @param srcChars The characters to match.
  828. * @param srcLength the number of characters in <TT>srcChars</TT>
  829. * @return TRUE if this starts with the characters in <TT>srcChars</TT>,
  830. * FALSE otherwise
  831. * @stable ICU 2.0
  832. */
  833. inline UBool startsWith(const UChar *srcChars,
  834. int32_t srcLength) const;
  835. /**
  836. * Determine if this ends with the characters in <TT>srcChars</TT>
  837. * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
  838. * @param srcChars The characters to match.
  839. * @param srcStart the offset into <TT>srcText</TT> to start matching
  840. * @param srcLength the number of characters in <TT>srcChars</TT> to match
  841. * @return TRUE if this ends with the characters in <TT>srcChars</TT>, FALSE otherwise
  842. * @stable ICU 2.0
  843. */
  844. inline UBool startsWith(const UChar *srcChars,
  845. int32_t srcStart,
  846. int32_t srcLength) const;
  847. /**
  848. * Determine if this ends with the characters in <TT>text</TT>
  849. * @param text The text to match.
  850. * @return TRUE if this ends with the characters in <TT>text</TT>,
  851. * FALSE otherwise
  852. * @stable ICU 2.0
  853. */
  854. inline UBool endsWith(const UnicodeString& text) const;
  855. /**
  856. * Determine if this ends with the characters in <TT>srcText</TT>
  857. * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
  858. * @param srcText The text to match.
  859. * @param srcStart the offset into <TT>srcText</TT> to start matching
  860. * @param srcLength the number of characters in <TT>srcText</TT> to match
  861. * @return TRUE if this ends with the characters in <TT>text</TT>,
  862. * FALSE otherwise
  863. * @stable ICU 2.0
  864. */
  865. inline UBool endsWith(const UnicodeString& srcText,
  866. int32_t srcStart,
  867. int32_t srcLength) const;
  868. /**
  869. * Determine if this ends with the characters in <TT>srcChars</TT>
  870. * @param srcChars The characters to match.
  871. * @param srcLength the number of characters in <TT>srcChars</TT>
  872. * @return TRUE if this ends with the characters in <TT>srcChars</TT>,
  873. * FALSE otherwise
  874. * @stable ICU 2.0
  875. */
  876. inline UBool endsWith(const UChar *srcChars,
  877. int32_t srcLength) const;
  878. /**
  879. * Determine if this ends with the characters in <TT>srcChars</TT>
  880. * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
  881. * @param srcChars The characters to match.
  882. * @param srcStart the offset into <TT>srcText</TT> to start matching
  883. * @param srcLength the number of characters in <TT>srcChars</TT> to match
  884. * @return TRUE if this ends with the characters in <TT>srcChars</TT>,
  885. * FALSE otherwise
  886. * @stable ICU 2.0
  887. */
  888. inline UBool endsWith(const UChar *srcChars,
  889. int32_t srcStart,
  890. int32_t srcLength) const;
  891. /* Searching - bitwise only */
  892. /**
  893. * Locate in this the first occurrence of the characters in <TT>text</TT>,
  894. * using bitwise comparison.
  895. * @param text The text to search for.
  896. * @return The offset into this of the start of <TT>text</TT>,
  897. * or -1 if not found.
  898. * @stable ICU 2.0
  899. */
  900. inline int32_t indexOf(const UnicodeString& text) const;
  901. /**
  902. * Locate in this the first occurrence of the characters in <TT>text</TT>
  903. * starting at offset <TT>start</TT>, using bitwise comparison.
  904. * @param text The text to search for.
  905. * @param start The offset at which searching will start.
  906. * @return The offset into this of the start of <TT>text</TT>,
  907. * or -1 if not found.
  908. * @stable ICU 2.0
  909. */
  910. inline int32_t indexOf(const UnicodeString& text,
  911. int32_t start) const;
  912. /**
  913. * Locate in this the first occurrence in the range
  914. * [<TT>start</TT>, <TT>start + length</TT>) of the characters
  915. * in <TT>text</TT>, using bitwise comparison.
  916. * @param text The text to search for.
  917. * @param start The offset at which searching will start.
  918. * @param length The number of characters to search
  919. * @return The offset into this of the start of <TT>text</TT>,
  920. * or -1 if not found.
  921. * @stable ICU 2.0
  922. */
  923. inline int32_t indexOf(const UnicodeString& text,
  924. int32_t start,
  925. int32_t length) const;
  926. /**
  927. * Locate in this the first occurrence in the range
  928. * [<TT>start</TT>, <TT>start + length</TT>) of the characters
  929. * in <TT>srcText</TT> in the range
  930. * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
  931. * using bitwise comparison.
  932. * @param srcText The text to search for.
  933. * @param srcStart the offset into <TT>srcText</TT> at which
  934. * to start matching
  935. * @param srcLength the number of characters in <TT>srcText</TT> to match
  936. * @param start the offset into this at which to start matching
  937. * @param length the number of characters in this to search
  938. * @return The offset into this of the start of <TT>text</TT>,
  939. * or -1 if not found.
  940. * @stable ICU 2.0
  941. */
  942. inline int32_t indexOf(const UnicodeString& srcText,
  943. int32_t srcStart,
  944. int32_t srcLength,
  945. int32_t start,
  946. int32_t length) const;
  947. /**
  948. * Locate in this the first occurrence of the characters in
  949. * <TT>srcChars</TT>
  950. * starting at offset <TT>start</TT>, using bitwise comparison.
  951. * @param srcChars The text to search for.
  952. * @param srcLength the number of characters in <TT>srcChars</TT> to match
  953. * @param start the offset into this at which to start matching
  954. * @return The offset into this of the start of <TT>text</TT>,
  955. * or -1 if not found.
  956. * @stable ICU 2.0
  957. */
  958. inline int32_t indexOf(const UChar *srcChars,
  959. int32_t srcLength,
  960. int32_t start) const;
  961. /**
  962. * Locate in this the first occurrence in the range
  963. * [<TT>start</TT>, <TT>start + length</TT>) of the characters
  964. * in <TT>srcChars</TT>, using bitwise comparison.
  965. * @param srcChars The text to search for.
  966. * @param srcLength the number of characters in <TT>srcChars</TT>
  967. * @param start The offset at which searching will start.
  968. * @param length The number of characters to search
  969. * @return The offset into this of the start of <TT>srcChars</TT>,
  970. * or -1 if not found.
  971. * @stable ICU 2.0
  972. */
  973. inline int32_t indexOf(const UChar *srcChars,
  974. int32_t srcLength,
  975. int32_t start,
  976. int32_t length) const;
  977. /**
  978. * Locate in this the first occurrence in the range
  979. * [<TT>start</TT>, <TT>start + length</TT>) of the characters
  980. * in <TT>srcChars</TT> in the range
  981. * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
  982. * using bitwise comparison.
  983. * @param srcChars The text to search for.
  984. * @param srcStart the offset into <TT>srcChars</TT> at which
  985. * to start matching
  986. * @param srcLength the number of characters in <TT>srcChars</TT> to match
  987. * @param start the offset into this at which to start matching
  988. * @param length the number of characters in this to search
  989. * @return The offset into this of the start of <TT>text</TT>,
  990. * or -1 if not found.
  991. * @stable ICU 2.0
  992. */
  993. int32_t indexOf(const UChar *srcChars,
  994. int32_t srcStart,
  995. int32_t srcLength,
  996. int32_t start,
  997. int32_t length) const;
  998. /**
  999. * Locate in this the first occurrence of the BMP code point <code>c</code>,
  1000. * using bitwise comparison.
  1001. * @param c The code unit to search for.
  1002. * @return The offset into this of <TT>c</TT>, or -1 if not found.
  1003. * @stable ICU 2.0
  1004. */
  1005. inline int32_t indexOf(UChar c) const;
  1006. /**
  1007. * Locate in this the first occurrence of the code point <TT>c</TT>,
  1008. * using bitwise comparison.
  1009. *
  1010. * @param c The code point to search for.
  1011. * @return The offset into this of <TT>c</TT>, or -1 if not found.
  1012. * @stable ICU 2.0
  1013. */
  1014. inline int32_t indexOf(UChar32 c) const;
  1015. /**
  1016. * Locate in this the first occurrence of the BMP code point <code>c</code>,
  1017. * starting at offset <TT>start</TT>, using bitwise comparison.
  1018. * @param c The code unit to search for.
  1019. * @param start The offset at which searching will start.
  1020. * @return The offset into this of <TT>c</TT>, or -1 if not found.
  1021. * @stable ICU 2.0
  1022. */
  1023. inline int32_t indexOf(UChar c,
  1024. int32_t start) const;
  1025. /**
  1026. * Locate in this the first occurrence of the code point <TT>c</TT>
  1027. * starting at offset <TT>start</TT>, using bitwise comparison.
  1028. *
  1029. * @param c The code point to search for.
  1030. * @param start The offset at which searching will start.
  1031. * @return The offset into this of <TT>c</TT>, or -1 if not found.
  1032. * @stable ICU 2.0
  1033. */
  1034. inline int32_t indexOf(UChar32 c,
  1035. int32_t start) const;
  1036. /**
  1037. * Locate in this the first occurrence of the BMP code point <code>c</code>
  1038. * in the range [<TT>start</TT>, <TT>start + length</TT>),
  1039. * using bitwise comparison.
  1040. * @param c The code unit to search for.
  1041. * @param start the offset into this at which to start matching
  1042. * @param length the number of characters in this to search
  1043. * @return The offset into this of <TT>c</TT>, or -1 if not found.
  1044. * @stable ICU 2.0
  1045. */
  1046. inline int32_t indexOf(UChar c,
  1047. int32_t start,
  1048. int32_t length) const;
  1049. /**
  1050. * Locate in this the first occurrence of the code point <TT>c</TT>
  1051. * in the range [<TT>start</TT>, <TT>start + length</TT>),
  1052. * using bitwise comparison.
  1053. *
  1054. * @param c The code point to search for.
  1055. * @param start the offset into this at which to start matching
  1056. * @param length the number of characters in this to search
  1057. * @return The offset into this of <TT>c</TT>, or -1 if not found.
  1058. * @stable ICU 2.0
  1059. */
  1060. inline int32_t indexOf(UChar32 c,
  1061. int32_t start,
  1062. int32_t length) const;
  1063. /**
  1064. * Locate in this the last occurrence of the characters in <TT>text</TT>,
  1065. * using bitwise comparison.
  1066. * @param text The text to search for.
  1067. * @return The offset into this of the start of <TT>text</TT>,
  1068. * or -1 if not found.
  1069. * @stable ICU 2.0
  1070. */
  1071. inline int32_t lastIndexOf(const UnicodeString& text) const;
  1072. /**
  1073. * Locate in this the last occurrence of the characters in <TT>text</TT>
  1074. * starting at offset <TT>start</TT>, using bitwise comparison.
  1075. * @param text The text to search for.
  1076. * @param start The offset at which searching will start.
  1077. * @return The offset into this of the start of <TT>text</TT>,
  1078. * or -1 if not found.
  1079. * @stable ICU 2.0
  1080. */
  1081. inline int32_t lastIndexOf(const UnicodeString& text,
  1082. int32_t start) const;
  1083. /**
  1084. * Locate in this the last occurrence in the range
  1085. * [<TT>start</TT>, <TT>start + length</TT>) of the characters
  1086. * in <TT>text</TT>, using bitwise comparison.
  1087. * @param text The text to search for.
  1088. * @param start The offset at which searching will start.
  1089. * @param length The number of characters to search
  1090. * @return The offset into this of the start of <TT>text</TT>,
  1091. * or -1 if not found.
  1092. * @stable ICU 2.0
  1093. */
  1094. inline int32_t lastIndexOf(const UnicodeString& text,
  1095. int32_t start,
  1096. int32_t length) const;
  1097. /**
  1098. * Locate in this the last occurrence in the range
  1099. * [<TT>start</TT>, <TT>start + length</TT>) of the characters
  1100. * in <TT>srcText</TT> in the range
  1101. * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
  1102. * using bitwise comparison.
  1103. * @param srcText The text to search for.
  1104. * @param srcStart the offset into <TT>srcText</TT> at which
  1105. * to start matching
  1106. * @param srcLength the number of characters in <TT>srcText</TT> to match
  1107. * @param start the offset into this at which to start matching
  1108. * @param length the number of characters in this to search
  1109. * @return The offset into this of the start of <TT>text</TT>,
  1110. * or -1 if not found.
  1111. * @stable ICU 2.0
  1112. */
  1113. inline int32_t lastIndexOf(const UnicodeString& srcText,
  1114. int32_t srcStart,
  1115. int32_t srcLength,
  1116. int32_t start,
  1117. int32_t length) const;
  1118. /**
  1119. * Locate in this the last occurrence of the characters in <TT>srcChars</TT>
  1120. * starting at offset <TT>start</TT>, using bitwise comparison.
  1121. * @param srcChars The text to search for.
  1122. * @param srcLength the number of characters in <TT>srcChars</TT> to match
  1123. * @param start the offset into this at which to start matching
  1124. * @return The offset into this of the start of <TT>text</TT>,
  1125. * or -1 if not found.
  1126. * @stable ICU 2.0
  1127. */
  1128. inline int32_t lastIndexOf(const UChar *srcChars,
  1129. int32_t srcLength,
  1130. int32_t start) const;
  1131. /**
  1132. * Locate in this the last occurrence in the range
  1133. * [<TT>start</TT>, <TT>start + length</TT>) of the characters
  1134. * in <TT>srcChars</TT>, using bitwise comparison.
  1135. * @param srcChars The text to search for.
  1136. * @param srcLength the number of characters in <TT>srcChars</TT>
  1137. * @param start The offset at which searching will start.
  1138. * @param length The number of characters to search
  1139. * @return The offset into this of the start of <TT>srcChars</TT>,
  1140. * or -1 if not found.
  1141. * @stable ICU 2.0
  1142. */
  1143. inline int32_t lastIndexOf(const UChar *srcChars,
  1144. int32_t srcLength,
  1145. int32_t start,
  1146. int32_t length) const;
  1147. /**
  1148. * Locate in this the last occurrence in the range
  1149. * [<TT>start</TT>, <TT>start + length</TT>) of the characters
  1150. * in <TT>srcChars</TT> in the range
  1151. * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
  1152. * using bitwise comparison.
  1153. * @param srcChars The text to search for.
  1154. * @param srcStart the offset into <TT>srcChars</TT> at which
  1155. * to start matching
  1156. * @param srcLength the number of characters in <TT>srcChars</TT> to match
  1157. * @param start the offset into this at which to start matching
  1158. * @param length the number of characters in this to search
  1159. * @return The offset into this of the start of <TT>text</TT>,
  1160. * or -1 if not found.
  1161. * @stable ICU 2.0
  1162. */
  1163. int32_t lastIndexOf(const UChar *srcChars,
  1164. int32_t srcStart,
  1165. int32_t srcLength,
  1166. int32_t start,
  1167. int32_t length) const;
  1168. /**
  1169. * Locate in this the last occurrence of the BMP code point <code>c</code>,
  1170. * using bitwise comparison.
  1171. * @param c The code unit to search for.
  1172. * @return The offset into this of <TT>c</TT>, or -1 if not found.
  1173. * @stable ICU 2.0
  1174. */
  1175. inline int32_t lastIndexOf(UChar c) const;
  1176. /**
  1177. * Locate in this the last occurrence of the code point <TT>c</TT>,
  1178. * using bitwise comparison.
  1179. *
  1180. * @param c The code point to search for.
  1181. * @return The offset into this of <TT>c</TT>, or -1 if not found.
  1182. * @stable ICU 2.0
  1183. */
  1184. inline int32_t lastIndexOf(UChar32 c) const;
  1185. /**
  1186. * Locate in this the last occurrence of the BMP code point <code>c</code>
  1187. * starting at offset <TT>start</TT>, using bitwise comparison.
  1188. * @param c The code unit to search for.
  1189. * @param start The offset at which searching will start.
  1190. * @return The offset into this of <TT>c</TT>, or -1 if not found.
  1191. * @stable ICU 2.0
  1192. */
  1193. inline int32_t lastIndexOf(UChar c,
  1194. int32_t start) const;
  1195. /**
  1196. * Locate in this the last occurrence of the code point <TT>c</TT>
  1197. * starting at offset <TT>start</TT>, using bitwise comparison.
  1198. *
  1199. * @param c The code point to search for.
  1200. * @param start The offset at which searching will start.
  1201. * @return The offset into this of <TT>c</TT>, or -1 if not found.
  1202. * @stable ICU 2.0
  1203. */
  1204. inline int32_t lastIndexOf(UChar32 c,
  1205. int32_t start) const;
  1206. /**
  1207. * Locate in this the last occurrence of the BMP code point <code>c</code>
  1208. * in the range [<TT>start</TT>, <TT>start + length</TT>),
  1209. * using bitwise comparison.
  1210. * @param c The code unit to search for.
  1211. * @param start the offset into this at which to start matching
  1212. * @param length the number of characters in this to search
  1213. * @return The offset into this of <TT>c</TT>, or -1 if not found.
  1214. * @stable ICU 2.0
  1215. */
  1216. inline int32_t lastIndexOf(UChar c,
  1217. int32_t start,
  1218. int32_t length) const;
  1219. /**
  1220. * Locate in this the last occurrence of the code point <TT>c</TT>
  1221. * in the range [<TT>start</TT>, <TT>start + length</TT>),
  1222. * using bitwise comparison.
  1223. *
  1224. * @param c The code point to search for.
  1225. * @param start the offset into this at which to start matching
  1226. * @param length the number of characters in this to search
  1227. * @return The offset into this of <TT>c</TT>, or -1 if not found.
  1228. * @stable ICU 2.0
  1229. */
  1230. inline int32_t lastIndexOf(UChar32 c,
  1231. int32_t start,
  1232. int32_t length) const;
  1233. /* Character access */
  1234. /**
  1235. * Return the code unit at offset <tt>offset</tt>.
  1236. * If the offset is not valid (0..length()-1) then U+ffff is returned.
  1237. * @param offset a valid offset into the text
  1238. * @return the code unit at offset <tt>offset</tt>
  1239. * or 0xffff if the offset is not valid for this string
  1240. * @stable ICU 2.0
  1241. */
  1242. inline UChar charAt(int32_t offset) const;
  1243. /**
  1244. * Return the code unit at offset <tt>offset</tt>.
  1245. * If the offset is not valid (0..length()-1) then U+ffff is returned.
  1246. * @param offset a valid offset into the text
  1247. * @return the code unit at offset <tt>offset</tt>
  1248. * @stable ICU 2.0
  1249. */
  1250. inline UChar operator[] (int32_t offset) const;
  1251. /**
  1252. * Return the code point that contains the code unit
  1253. * at offset <tt>offset</tt>.
  1254. * If the offset is not valid (0..length()-1) then U+ffff is returned.
  1255. * @param offset a valid offset into the text
  1256. * that indicates the text offset of any of the code units
  1257. * that will be assembled into a code point (21-bit value) and returned
  1258. * @return the code point of text at <tt>offset</tt>
  1259. * or 0xffff if the offset is not valid for this string
  1260. * @stable ICU 2.0
  1261. */
  1262. UChar32 char32At(int32_t offset) const;
  1263. /**
  1264. * Adjust a random-access offset so that
  1265. * it points to the beginning of a Unicode character.
  1266. * The offset that is passed in points to
  1267. * any code unit of a code point,
  1268. * while the returned offset will point to the first code unit
  1269. * of the same code point.
  1270. * In UTF-16, if the input offset points to a second surrogate
  1271. * of a surrogate pair, then the returned offset will point
  1272. * to the first surrogate.
  1273. * @param offset a valid offset into one code point of the text
  1274. * @return offset of the first code unit of the same code point
  1275. * @see U16_SET_CP_START
  1276. * @stable ICU 2.0
  1277. */
  1278. int32_t getChar32Start(int32_t offset) const;
  1279. /**
  1280. * Adjust a random-access offset so that
  1281. * it points behind a Unicode character.
  1282. * The offset that is passed in points behind
  1283. * any code unit of a code point,
  1284. * while the returned offset will point behind the last code unit
  1285. * of the same code point.
  1286. * In UTF-16, if the input offset points behind the first surrogate
  1287. * (i.e., to the second surrogate)
  1288. * of a surrogate pair, then the returned offset will point
  1289. * behind the second surrogate (i.e., to the first surrogate).
  1290. * @param offset a valid offset after any code unit of a code point of the text
  1291. * @return offset of the first code unit after the same code point
  1292. * @see U16_SET_CP_LIMIT
  1293. * @stable ICU 2.0
  1294. */
  1295. int32_t getChar32Limit(int32_t offset) const;
  1296. /**
  1297. * Move the code unit index along the string by delta code points.
  1298. * Interpret the input index as a code unit-based offset into the string,
  1299. * move the index forward or backward by delta code points, and
  1300. * return the resulting index.
  1301. * The input index should point to the first code unit of a code point,
  1302. * if there is more than one.
  1303. *
  1304. * Both input and output indexes are code unit-based as for all
  1305. * string indexes/offsets in ICU (and other libraries, like MBCS char*).
  1306. * If delta<0 then the index is moved backward (toward the start of the string).
  1307. * If delta>0 then the index is moved forward (toward the end of the string).
  1308. *
  1309. * This behaves like CharacterIterator::move32(delta, kCurrent).
  1310. *
  1311. * Behavior for out-of-bounds indexes:
  1312. * <code>moveIndex32</code> pins the input index to 0..length(), i.e.,
  1313. * if the input index<0 then it is pinned to 0;
  1314. * if it is index>length() then it is pinned to length().
  1315. * Afterwards, the index is moved by <code>delta</code> code points
  1316. * forward or backward,
  1317. * but no further backward than to 0 and no further forward than to length().
  1318. * The resulting index return value will be in between 0 and length(), inclusively.
  1319. *
  1320. * Examples:
  1321. * <pre>
  1322. * // s has code points 'a' U+10000 'b' U+10ffff U+2029
  1323. * UnicodeString s=UNICODE_STRING("a\\U00010000b\\U0010ffff\\u2029", 31).unescape();
  1324. *
  1325. * // initial index: position of U+10000
  1326. * int32_t index=1;
  1327. *
  1328. * // the following examples will all result in index==4, position of U+10ffff
  1329. *
  1330. * // skip 2 code points from some position in the string
  1331. * index=s.moveIndex32(index, 2); // skips U+10000 and 'b'
  1332. *
  1333. * // go to the 3rd code point from the start of s (0-based)
  1334. * index=s.moveIndex32(0, 3); // skips 'a', U+10000, and 'b'
  1335. *
  1336. * // go to the next-to-last code point of s
  1337. * index=s.moveIndex32(s.length(), -2); // backward-skips U+2029 and U+10ffff
  1338. * </pre>
  1339. *
  1340. * @param index input code unit index
  1341. * @param delta (signed) code point count to move the index forward or backward
  1342. * in the string
  1343. * @return the resulting code unit index
  1344. * @stable ICU 2.0
  1345. */
  1346. int32_t moveIndex32(int32_t index, int32_t delta) const;
  1347. /* Substring extraction */
  1348. /**
  1349. * Copy the characters in the range
  1350. * [<tt>start</tt>, <tt>start + length</tt>) into the array <tt>dst</tt>,
  1351. * beginning at <tt>dstStart</tt>.
  1352. * If the string aliases to <code>dst</code> itself as an external buffer,
  1353. * then extract() will not copy the contents.
  1354. *
  1355. * @param start offset of first character which will be copied into the array
  1356. * @param length the number of characters to extract
  1357. * @param dst array in which to copy characters. The length of <tt>dst</tt>
  1358. * must be at least (<tt>dstStart + length</tt>).
  1359. * @param dstStart the offset in <TT>dst</TT> where the first character
  1360. * will be extracted
  1361. * @stable ICU 2.0
  1362. */
  1363. inline void extract(int32_t start,
  1364. int32_t length,
  1365. UChar *dst,
  1366. int32_t dstStart = 0) const;
  1367. /**
  1368. * Copy the contents of the string into dest.
  1369. * This is a convenience function that
  1370. * checks if there is enough space in dest,
  1371. * extracts the entire string if possible,
  1372. * and NUL-terminates dest if possible.
  1373. *
  1374. * If the string fits into dest but cannot be NUL-terminated
  1375. * (length()==destCapacity) then the error code is set to U_STRING_NOT_TERMINATED_WARNING.
  1376. * If the string itself does not fit into dest
  1377. * (length()>destCapacity) then the error code is set to U_BUFFER_OVERFLOW_ERROR.
  1378. *
  1379. * If the string aliases to <code>dest</code> itself as an external buffer,
  1380. * then extract() will not copy the contents.
  1381. *
  1382. * @param dest Destination string buffer.
  1383. * @param destCapacity Number of UChars available at dest.
  1384. * @param errorCode ICU error code.
  1385. * @return length()
  1386. * @stable ICU 2.0
  1387. */
  1388. int32_t
  1389. extract(UChar *dest, int32_t destCapacity,
  1390. UErrorCode &errorCode) const;
  1391. /**
  1392. * Copy the characters in the range
  1393. * [<tt>start</tt>, <tt>start + length</tt>) into the UnicodeString
  1394. * <tt>target</tt>.
  1395. * @param start offset of first character which will be copied
  1396. * @param length the number of characters to extract
  1397. * @param target UnicodeString into which to copy characters.
  1398. * @return A reference to <TT>target</TT>
  1399. * @stable ICU 2.0
  1400. */
  1401. inline void extract(int32_t start,
  1402. int32_t length,
  1403. UnicodeString& target) const;
  1404. /**
  1405. * Copy the characters in the range [<tt>start</tt>, <tt>limit</tt>)
  1406. * into the array <tt>dst</tt>, beginning at <tt>dstStart</tt>.
  1407. * @param start offset of first character which will be copied into the array
  1408. * @param limit offset immediately following the last character to be copied
  1409. * @param dst array in which to copy characters. The length of <tt>dst</tt>
  1410. * must be at least (<tt>dstStart + (limit - start)</tt>).
  1411. * @param dstStart the offset in <TT>dst</TT> where the first character
  1412. * will be extracted
  1413. * @stable ICU 2.0
  1414. */
  1415. inline void extractBetween(int32_t start,
  1416. int32_t limit,
  1417. UChar *dst,
  1418. int32_t dstStart = 0) const;
  1419. /**
  1420. * Copy the characters in the range [<tt>start</tt>, <tt>limit</tt>)
  1421. * into the UnicodeString <tt>target</tt>. Replaceable API.
  1422. * @param start offset of first character which will be copied
  1423. * @param limit offset immediately following the last character to be copied
  1424. * @param target UnicodeString into which to copy characters.
  1425. * @return A reference to <TT>target</TT>
  1426. * @stable ICU 2.0
  1427. */
  1428. virtual void extractBetween(int32_t start,
  1429. int32_t limit,
  1430. UnicodeString& target) const;
  1431. /**
  1432. * Copy the characters in the range
  1433. * [<tt>start</TT>, <tt>start + startLength</TT>) into an array of characters.
  1434. * All characters must be invariant (see utypes.h).
  1435. * Use US_INV as the last, signature-distinguishing parameter.
  1436. *
  1437. * This function does not write any more than <code>targetCapacity</code>
  1438. * characters but returns the length of the entire output string
  1439. * so that one can allocate a larger buffer and call the function again
  1440. * if necessary.
  1441. * The output string is NUL-terminated if possible.
  1442. *
  1443. * @param start offset of first character which will be copied
  1444. * @param startLength the number of characters to extract
  1445. * @param target the target buffer for extraction, can be NULL
  1446. * if targetLength is 0
  1447. * @param targetCapacity the length of the target buffer
  1448. * @param inv Signature-distinguishing paramater, use US_INV.
  1449. * @return the output string length, not including the terminating NUL
  1450. * @stable ICU 3.2
  1451. */
  1452. int32_t extract(int32_t start,
  1453. int32_t startLength,
  1454. char *target,
  1455. int32_t targetCapacity,
  1456. enum EInvariant inv) const;
  1457. #if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION
  1458. /**
  1459. * Copy the characters in the range
  1460. * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters
  1461. * in the platform's default codepage.
  1462. * This function does not write any more than <code>targetLength</code>
  1463. * characters but returns the length of the entire output string
  1464. * so that one can allocate a larger buffer and call the function again
  1465. * if necessary.
  1466. * The output string is NUL-terminated if possible.
  1467. *
  1468. * @param start offset of first character which will be copied
  1469. * @param startLength the number of characters to extract
  1470. * @param target the target buffer for extraction
  1471. * @param targetLength the length of the target buffer
  1472. * If <TT>target</TT> is NULL, then the number of bytes required for
  1473. * <TT>target</TT> is returned.
  1474. * @return the output string length, not including the terminating NUL
  1475. * @stable ICU 2.0
  1476. */
  1477. int32_t extract(int32_t start,
  1478. int32_t startLength,
  1479. char *target,
  1480. uint32_t targetLength) const;
  1481. #endif
  1482. #if !UCONFIG_NO_CONVERSION
  1483. /**
  1484. * Copy the characters in the range
  1485. * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters
  1486. * in a specified codepage.
  1487. * The output string is NUL-terminated.
  1488. *
  1489. * Recommendation: For invariant-character strings use
  1490. * extract(int32_t start, int32_t length, char *target, int32_t targetCapacity, enum EInvariant inv) const
  1491. * because it avoids object code dependencies of UnicodeString on
  1492. * the conversion code.
  1493. *
  1494. * @param start offset of first character which will be copied
  1495. * @param startLength the number of characters to extract
  1496. * @param target the target buffer for extraction
  1497. * @param codepage the desired codepage for the characters. 0 has
  1498. * the special meaning of the default codepage
  1499. * If <code>codepage</code> is an empty string (<code>""</code>),
  1500. * then a simple conversion is performed on the codepage-invariant
  1501. * subset ("invariant characters") of the platform encoding. See utypes.h.
  1502. * If <TT>target</TT> is NULL, then the number of bytes required for
  1503. * <TT>target</TT> is returned. It is assumed that the target is big enough
  1504. * to fit all of the characters.
  1505. * @return the output string length, not including the terminating NUL
  1506. * @stable ICU 2.0
  1507. */
  1508. inline int32_t extract(int32_t start,
  1509. int32_t startLength,
  1510. char *target,
  1511. const char *codepage = 0) const;
  1512. /**
  1513. * Copy the characters in the range
  1514. * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters
  1515. * in a specified codepage.
  1516. * This function does not write any more than <code>targetLength</code>
  1517. * characters but returns the length of the entire output string
  1518. * so that one can allocate a larger buffer and call the function again
  1519. * if necessary.
  1520. * The output string is NUL-terminated if possible.
  1521. *
  1522. * Recommendation: For invariant-character strings use
  1523. * extract(int32_t start, int32_t length, char *target, int32_t targetCapacity, enum EInvariant inv) const
  1524. * because it avoids object code dependencies of UnicodeString on
  1525. * the conversion code.
  1526. *
  1527. * @param start offset of first character which will be copied
  1528. * @param startLength the number of characters to extract
  1529. * @param target the target buffer for extraction
  1530. * @param targetLength the length of the target buffer
  1531. * @param codepage the desired codepage for the characters. 0 has
  1532. * the special meaning of the default codepage
  1533. * If <code>codepage</code> is an empty string (<code>""</code>),
  1534. * then a simple conversion is performed on the codepage-invariant
  1535. * subset ("invariant characters") of the platform encoding. See utypes.h.
  1536. * If <TT>target</TT> is NULL, then the number of bytes required for
  1537. * <TT>target</TT> is returned.
  1538. * @return the output string length, not including the terminating NUL
  1539. * @stable ICU 2.0
  1540. */
  1541. int32_t extract(int32_t start,
  1542. int32_t startLength,
  1543. char *target,
  1544. uint32_t targetLength,
  1545. const char *codepage) const;
  1546. /**
  1547. * Convert the UnicodeString into a codepage string using an existing UConverter.
  1548. * The output string is NUL-terminated if possible.
  1549. *
  1550. * This function avoids the overhead of opening and closing a converter if
  1551. * multiple strings are extracted.
  1552. *
  1553. * @param dest destination string buffer, can be NULL if destCapacity==0
  1554. * @param destCapacity the number of chars available at dest
  1555. * @param cnv the converter object to be used (ucnv_resetFromUnicode() will be called),
  1556. * or NULL for the default converter
  1557. * @param errorCode normal ICU error code
  1558. * @return the length of the output string, not counting the terminating NUL;
  1559. * if the length is greater than destCapacity, then the string will not fit
  1560. * and a buffer of the indicated length would need to be passed in
  1561. * @stable ICU 2.0
  1562. */
  1563. int32_t extract(char *dest, int32_t destCapacity,
  1564. UConverter *cnv,
  1565. UErrorCode &errorCode) const;
  1566. #endif
  1567. /**
  1568. * Create a temporary substring for the specified range.
  1569. * Unlike the substring constructor and setTo() functions,
  1570. * the object returned here will be a read-only alias (using getBuffer())
  1571. * rather than copying the text.
  1572. * As a result, this substring operation is much faster but requires
  1573. * that the original string not be modified or deleted during the lifetime
  1574. * of the returned substring object.
  1575. * @param start offset of the first character visible in the substring
  1576. * @param length length of the substring
  1577. * @return a read-only alias UnicodeString object for the substring
  1578. * @stable ICU 4.4
  1579. */
  1580. UnicodeString tempSubString(int32_t start=0, int32_t length=INT32_MAX) const;
  1581. /**
  1582. * Create a temporary substring for the specified range.
  1583. * Same as tempSubString(start, length) except that the substring range
  1584. * is specified as a (start, limit) pair (with an exclusive limit index)
  1585. * rather than a (start, length) pair.
  1586. * @param start offset of the first character visible in the substring
  1587. * @param limit offset immediately following the last character visible in the substring
  1588. * @return a read-only alias UnicodeString object for the substring
  1589. * @stable ICU 4.4
  1590. */
  1591. inline UnicodeString tempSubStringBetween(int32_t start, int32_t limit=INT32_MAX) const;
  1592. /**
  1593. * Convert the UnicodeString to UTF-8 and write the result
  1594. * to a ByteSink. This is called by toUTF8String().
  1595. * Unpaired surrogates are replaced with U+FFFD.
  1596. * Calls u_strToUTF8WithSub().
  1597. *
  1598. * @param sink A ByteSink to which the UTF-8 version of the string is written.
  1599. * sink.Flush() is called at the end.
  1600. * @stable ICU 4.2
  1601. * @see toUTF8String
  1602. */
  1603. void toUTF8(ByteSink &sink) const;
  1604. #if U_HAVE_STD_STRING
  1605. /**
  1606. * Convert the UnicodeString to UTF-8 and append the result
  1607. * to a standard string.
  1608. * Unpaired surrogates are replaced with U+FFFD.
  1609. * Calls toUTF8().
  1610. *
  1611. * @param result A standard string (or a compatible object)
  1612. * to which the UTF-8 version of the string is appended.
  1613. * @return The string object.
  1614. * @stable ICU 4.2
  1615. * @see toUTF8
  1616. */
  1617. template<typename StringClass>
  1618. StringClass &toUTF8String(StringClass &result) const {
  1619. StringByteSink<StringClass> sbs(&result);
  1620. toUTF8(sbs);
  1621. return result;
  1622. }
  1623. #endif
  1624. /**
  1625. * Convert the UnicodeString to UTF-32.
  1626. * Unpaired surrogates are replaced with U+FFFD.
  1627. * Calls u_strToUTF32WithSub().
  1628. *
  1629. * @param utf32 destination string buffer, can be NULL if capacity==0
  1630. * @param capacity the number of UChar32s available at utf32
  1631. * @param errorCode Standard ICU error code. Its input value must
  1632. * pass the U_SUCCESS() test, or else the function returns
  1633. * immediately. Check for U_FAILURE() on output or use with
  1634. * function chaining. (See User Guide for details.)
  1635. * @return The length of the UTF-32 string.
  1636. * @see fromUTF32
  1637. * @stable ICU 4.2
  1638. */
  1639. int32_t toUTF32(UChar32 *utf32, int32_t capacity, UErrorCode &errorCode) const;
  1640. /* Length operations */
  1641. /**
  1642. * Return the length of the UnicodeString object.
  1643. * The length is the number of UChar code units are in the UnicodeString.
  1644. * If you want the number of code points, please use countChar32().
  1645. * @return the length of the UnicodeString object
  1646. * @see countChar32
  1647. * @stable ICU 2.0
  1648. */
  1649. inline int32_t length(void) const;
  1650. /**
  1651. * Count Unicode code points in the length UChar code units of the string.
  1652. * A code point may occupy either one or two UChar code units.
  1653. * Counting code points involves reading all code units.
  1654. *
  1655. * This functions is basically the inverse of moveIndex32().
  1656. *
  1657. * @param start the index of the first code unit to check
  1658. * @param length the number of UChar code units to check
  1659. * @return the number of code points in the specified code units
  1660. * @see length
  1661. * @stable ICU 2.0
  1662. */
  1663. int32_t
  1664. countChar32(int32_t start=0, int32_t length=INT32_MAX) const;
  1665. /**
  1666. * Check if the length UChar code units of the string
  1667. * contain more Unicode code points than a certain number.
  1668. * This is more efficient than counting all code points in this part of the string
  1669. * and comparing that number with a threshold.
  1670. * This function may not need to scan the string at all if the length
  1671. * falls within a certain range, and
  1672. * never needs to count more than 'number+1' code points.
  1673. * Logically equivalent to (countChar32(start, length)>number).
  1674. * A Unicode code point may occupy either one or two UChar code units.
  1675. *
  1676. * @param start the index of the first code unit to check (0 for the entire string)
  1677. * @param length the number of UChar code units to check
  1678. * (use INT32_MAX for the entire string; remember that start/length
  1679. * values are pinned)
  1680. * @param number The number of code points in the (sub)string is compared against
  1681. * the 'number' parameter.
  1682. * @return Boolean value for whether the string contains more Unicode code points
  1683. * than 'number'. Same as (u_countChar32(s, length)>number).
  1684. * @see countChar32
  1685. * @see u_strHasMoreChar32Than
  1686. * @stable ICU 2.4
  1687. */
  1688. UBool
  1689. hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const;
  1690. /**
  1691. * Determine if this string is empty.
  1692. * @return TRUE if this string contains 0 characters, FALSE otherwise.
  1693. * @stable ICU 2.0
  1694. */
  1695. inline UBool isEmpty(void) const;
  1696. /**
  1697. * Return the capacity of the internal buffer of the UnicodeString object.
  1698. * This is useful together with the getBuffer functions.
  1699. * See there for details.
  1700. *
  1701. * @return the number of UChars available in the internal buffer
  1702. * @see getBuffer
  1703. * @stable ICU 2.0
  1704. */
  1705. inline int32_t getCapacity(void) const;
  1706. /* Other operations */
  1707. /**
  1708. * Generate a hash code for this object.
  1709. * @return The hash code of this UnicodeString.
  1710. * @stable ICU 2.0
  1711. */
  1712. inline int32_t hashCode(void) const;
  1713. /**
  1714. * Determine if this object contains a valid string.
  1715. * A bogus string has no value. It is different from an empty string,
  1716. * although in both cases isEmpty() returns TRUE and length() returns 0.
  1717. * setToBogus() and isBogus() can be used to indicate that no string value is available.
  1718. * For a bogus string, getBuffer() and getTerminatedBuffer() return NULL, and
  1719. * length() returns 0.
  1720. *
  1721. * @return TRUE if the string is bogus/invalid, FALSE otherwise
  1722. * @see setToBogus()
  1723. * @stable ICU 2.0
  1724. */
  1725. inline UBool isBogus(void) const;
  1726. //========================================
  1727. // Write operations
  1728. //========================================
  1729. /* Assignment operations */
  1730. /**
  1731. * Assignment operator. Replace the characters in this UnicodeString
  1732. * with the characters from <TT>srcText</TT>.
  1733. *
  1734. * Starting with ICU 2.4, the assignment operator and the copy constructor
  1735. * allocate a new buffer and copy the buffer contents even for readonly aliases.
  1736. * By contrast, the fastCopyFrom() function implements the old,
  1737. * more efficient but less safe behavior
  1738. * of making this string also a readonly alias to the same buffer.
  1739. *
  1740. * If the source object has an "open" buffer from getBuffer(minCapacity),
  1741. * then the copy is an empty string.
  1742. *
  1743. * @param srcText The text containing the characters to replace
  1744. * @return a reference to this
  1745. * @stable ICU 2.0
  1746. * @see fastCopyFrom
  1747. */
  1748. UnicodeString &operator=(const UnicodeString &srcText);
  1749. /**
  1750. * Almost the same as the assignment operator.
  1751. * Replace the characters in this UnicodeString
  1752. * with the characters from <code>srcText</code>.
  1753. *
  1754. * This function works the same as the assignment operator
  1755. * for all strings except for ones that are readonly aliases.
  1756. *
  1757. * Starting with ICU 2.4, the assignment operator and the copy constructor
  1758. * allocate a new buffer and copy the buffer contents even for readonly aliases.
  1759. * This function implements the old, more efficient but less safe behavior
  1760. * of making this string also a readonly alias to the same buffer.
  1761. *
  1762. * The fastCopyFrom function must be used only if it is known that the lifetime of
  1763. * this UnicodeString does not exceed the lifetime of the aliased buffer
  1764. * including its contents, for example for strings from resource bundles
  1765. * or aliases to string constants.
  1766. *
  1767. * If the source object has an "open" buffer from getBuffer(minCapacity),
  1768. * then the copy is an empty string.
  1769. *
  1770. * @param src The text containing the characters to replace.
  1771. * @return a reference to this
  1772. * @stable ICU 2.4
  1773. */
  1774. UnicodeString &fastCopyFrom(const UnicodeString &src);
  1775. #ifndef U_HIDE_DRAFT_API
  1776. #if U_HAVE_RVALUE_REFERENCES
  1777. /**
  1778. * Move assignment operator, might leave src in bogus state.
  1779. * This string will have the same contents and state that the source string had.
  1780. * The behavior is undefined if *this and src are the same object.
  1781. * @param src source string
  1782. * @return *this
  1783. * @draft ICU 56
  1784. */
  1785. UnicodeString &operator=(UnicodeString &&src) U_NOEXCEPT {
  1786. return moveFrom(src);
  1787. }
  1788. #endif
  1789. /**
  1790. * Move assignment, might leave src in bogus state.
  1791. * This string will have the same contents and state that the source string had.
  1792. * The behavior is undefined if *this and src are the same object.
  1793. *
  1794. * Can be called explicitly, does not need C++11 support.
  1795. * @param src source string
  1796. * @return *this
  1797. * @draft ICU 56
  1798. */
  1799. UnicodeString &moveFrom(UnicodeString &src) U_NOEXCEPT;
  1800. /**
  1801. * Swap strings.
  1802. * @param other other string
  1803. * @draft ICU 56
  1804. */
  1805. void swap(UnicodeString &other) U_NOEXCEPT;
  1806. #endif /* U_HIDE_DRAFT_API */
  1807. /**
  1808. * Non-member UnicodeString swap function.
  1809. * @param s1 will get s2's contents and state
  1810. * @param s2 will get s1's contents and state
  1811. * @draft ICU 56
  1812. */
  1813. friend U_COMMON_API inline void U_EXPORT2
  1814. swap(UnicodeString &s1, UnicodeString &s2) U_NOEXCEPT {
  1815. s1.swap(s2);
  1816. }
  1817. /**
  1818. * Assignment operator. Replace the characters in this UnicodeString
  1819. * with the code unit <TT>ch</TT>.
  1820. * @param ch the code unit to replace
  1821. * @return a reference to this
  1822. * @stable ICU 2.0
  1823. */
  1824. inline UnicodeString& operator= (UChar ch);
  1825. /**
  1826. * Assignment operator. Replace the characters in this UnicodeString
  1827. * with the code point <TT>ch</TT>.
  1828. * @param ch the code point to replace
  1829. * @return a reference to this
  1830. * @stable ICU 2.0
  1831. */
  1832. inline UnicodeString& operator= (UChar32 ch);
  1833. /**
  1834. * Set the text in the UnicodeString object to the characters
  1835. * in <TT>srcText</TT> in the range
  1836. * [<TT>srcStart</TT>, <TT>srcText.length()</TT>).
  1837. * <TT>srcText</TT> is not modified.
  1838. * @param srcText the source for the new characters
  1839. * @param srcStart the offset into <TT>srcText</TT> where new characters
  1840. * will be obtained
  1841. * @return a reference to this
  1842. * @stable ICU 2.2
  1843. */
  1844. inline UnicodeString& setTo(const UnicodeString& srcText,
  1845. int32_t srcStart);
  1846. /**
  1847. * Set the text in the UnicodeString object to the characters
  1848. * in <TT>srcText</TT> in the range
  1849. * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
  1850. * <TT>srcText</TT> is not modified.
  1851. * @param srcText the source for the new characters
  1852. * @param srcStart the offset into <TT>srcText</TT> where new characters
  1853. * will be obtained
  1854. * @param srcLength the number of characters in <TT>srcText</TT> in the
  1855. * replace string.
  1856. * @return a reference to this
  1857. * @stable ICU 2.0
  1858. */
  1859. inline UnicodeString& setTo(const UnicodeString& srcText,
  1860. int32_t srcStart,
  1861. int32_t srcLength);
  1862. /**
  1863. * Set the text in the UnicodeString object to the characters in
  1864. * <TT>srcText</TT>.
  1865. * <TT>srcText</TT> is not modified.
  1866. * @param srcText the source for the new characters
  1867. * @return a reference to this
  1868. * @stable ICU 2.0
  1869. */
  1870. inline UnicodeString& setTo(const UnicodeString& srcText);
  1871. /**
  1872. * Set the characters in the UnicodeString object to the characters
  1873. * in <TT>srcChars</TT>. <TT>srcChars</TT> is not modified.
  1874. * @param srcChars the source for the new characters
  1875. * @param srcLength the number of Unicode characters in srcChars.
  1876. * @return a reference to this
  1877. * @stable ICU 2.0
  1878. */
  1879. inline UnicodeString& setTo(const UChar *srcChars,
  1880. int32_t srcLength);
  1881. /**
  1882. * Set the characters in the UnicodeString object to the code unit
  1883. * <TT>srcChar</TT>.
  1884. * @param srcChar the code unit which becomes the UnicodeString's character
  1885. * content
  1886. * @return a reference to this
  1887. * @stable ICU 2.0
  1888. */
  1889. UnicodeString& setTo(UChar srcChar);
  1890. /**
  1891. * Set the characters in the UnicodeString object to the code point
  1892. * <TT>srcChar</TT>.
  1893. * @param srcChar the code point which becomes the UnicodeString's character
  1894. * content
  1895. * @return a reference to this
  1896. * @stable ICU 2.0
  1897. */
  1898. UnicodeString& setTo(UChar32 srcChar);
  1899. /**
  1900. * Aliasing setTo() function, analogous to the readonly-aliasing UChar* constructor.
  1901. * The text will be used for the UnicodeString object, but
  1902. * it will not be released when the UnicodeString is destroyed.
  1903. * This has copy-on-write semantics:
  1904. * When the string is modified, then the buffer is first copied into
  1905. * newly allocated memory.
  1906. * The aliased buffer is never modified.
  1907. *
  1908. * In an assignment to another UnicodeString, when using the copy constructor
  1909. * or the assignment operator, the text will be copied.
  1910. * When using fastCopyFrom(), the text will be aliased again,
  1911. * so that both strings then alias the same readonly-text.
  1912. *
  1913. * @param isTerminated specifies if <code>text</code> is <code>NUL</code>-terminated.
  1914. * This must be true if <code>textLength==-1</code>.
  1915. * @param text The characters to alias for the UnicodeString.
  1916. * @param textLength The number of Unicode characters in <code>text</code> to alias.
  1917. * If -1, then this constructor will determine the length
  1918. * by calling <code>u_strlen()</code>.
  1919. * @return a reference to this
  1920. * @stable ICU 2.0
  1921. */
  1922. UnicodeString &setTo(UBool isTerminated,
  1923. const UChar *text,
  1924. int32_t textLength);
  1925. /**
  1926. * Aliasing setTo() function, analogous to the writable-aliasing UChar* constructor.
  1927. * The text will be used for the UnicodeString object, but
  1928. * it will not be released when the UnicodeString is destroyed.
  1929. * This has write-through semantics:
  1930. * For as long as the capacity of the buffer is sufficient, write operations
  1931. * will directly affect the buffer. When more capacity is necessary, then
  1932. * a new buffer will be allocated and the contents copied as with regularly
  1933. * constructed strings.
  1934. * In an assignment to another UnicodeString, the buffer will be copied.
  1935. * The extract(UChar *dst) function detects whether the dst pointer is the same
  1936. * as the string buffer itself and will in this case not copy the contents.
  1937. *
  1938. * @param buffer The characters to alias for the UnicodeString.
  1939. * @param buffLength The number of Unicode characters in <code>buffer</code> to alias.
  1940. * @param buffCapacity The size of <code>buffer</code> in UChars.
  1941. * @return a reference to this
  1942. * @stable ICU 2.0
  1943. */
  1944. UnicodeString &setTo(UChar *buffer,
  1945. int32_t buffLength,
  1946. int32_t buffCapacity);
  1947. /**
  1948. * Make this UnicodeString object invalid.
  1949. * The string will test TRUE with isBogus().
  1950. *
  1951. * A bogus string has no value. It is different from an empty string.
  1952. * It can be used to indicate that no string value is available.
  1953. * getBuffer() and getTerminatedBuffer() return NULL, and
  1954. * length() returns 0.
  1955. *
  1956. * This utility function is used throughout the UnicodeString
  1957. * implementation to indicate that a UnicodeString operation failed,
  1958. * and may be used in other functions,
  1959. * especially but not exclusively when such functions do not
  1960. * take a UErrorCode for simplicity.
  1961. *
  1962. * The following methods, and no others, will clear a string object's bogus flag:
  1963. * - remove()
  1964. * - remove(0, INT32_MAX)
  1965. * - truncate(0)
  1966. * - operator=() (assignment operator)
  1967. * - setTo(...)
  1968. *
  1969. * The simplest ways to turn a bogus string into an empty one
  1970. * is to use the remove() function.
  1971. * Examples for other functions that are equivalent to "set to empty string":
  1972. * \code
  1973. * if(s.isBogus()) {
  1974. * s.remove(); // set to an empty string (remove all), or
  1975. * s.remove(0, INT32_MAX); // set to an empty string (remove all), or
  1976. * s.truncate(0); // set to an empty string (complete truncation), or
  1977. * s=UnicodeString(); // assign an empty string, or
  1978. * s.setTo((UChar32)-1); // set to a pseudo code point that is out of range, or
  1979. * static const UChar nul=0;
  1980. * s.setTo(&nul, 0); // set to an empty C Unicode string
  1981. * }
  1982. * \endcode
  1983. *
  1984. * @see isBogus()
  1985. * @stable ICU 2.0
  1986. */
  1987. void setToBogus();
  1988. /**
  1989. * Set the character at the specified offset to the specified character.
  1990. * @param offset A valid offset into the text of the character to set
  1991. * @param ch The new character
  1992. * @return A reference to this
  1993. * @stable ICU 2.0
  1994. */
  1995. UnicodeString& setCharAt(int32_t offset,
  1996. UChar ch);
  1997. /* Append operations */
  1998. /**
  1999. * Append operator. Append the code unit <TT>ch</TT> to the UnicodeString
  2000. * object.
  2001. * @param ch the code unit to be appended
  2002. * @return a reference to this
  2003. * @stable ICU 2.0
  2004. */
  2005. inline UnicodeString& operator+= (UChar ch);
  2006. /**
  2007. * Append operator. Append the code point <TT>ch</TT> to the UnicodeString
  2008. * object.
  2009. * @param ch the code point to be appended
  2010. * @return a reference to this
  2011. * @stable ICU 2.0
  2012. */
  2013. inline UnicodeString& operator+= (UChar32 ch);
  2014. /**
  2015. * Append operator. Append the characters in <TT>srcText</TT> to the
  2016. * UnicodeString object. <TT>srcText</TT> is not modified.
  2017. * @param srcText the source for the new characters
  2018. * @return a reference to this
  2019. * @stable ICU 2.0
  2020. */
  2021. inline UnicodeString& operator+= (const UnicodeString& srcText);
  2022. /**
  2023. * Append the characters
  2024. * in <TT>srcText</TT> in the range
  2025. * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) to the
  2026. * UnicodeString object at offset <TT>start</TT>. <TT>srcText</TT>
  2027. * is not modified.
  2028. * @param srcText the source for the new characters
  2029. * @param srcStart the offset into <TT>srcText</TT> where new characters
  2030. * will be obtained
  2031. * @param srcLength the number of characters in <TT>srcText</TT> in
  2032. * the append string
  2033. * @return a reference to this
  2034. * @stable ICU 2.0
  2035. */
  2036. inline UnicodeString& append(const UnicodeString& srcText,
  2037. int32_t srcStart,
  2038. int32_t srcLength);
  2039. /**
  2040. * Append the characters in <TT>srcText</TT> to the UnicodeString object.
  2041. * <TT>srcText</TT> is not modified.
  2042. * @param srcText the source for the new characters
  2043. * @return a reference to this
  2044. * @stable ICU 2.0
  2045. */
  2046. inline UnicodeString& append(const UnicodeString& srcText);
  2047. /**
  2048. * Append the characters in <TT>srcChars</TT> in the range
  2049. * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) to the UnicodeString
  2050. * object at offset
  2051. * <TT>start</TT>. <TT>srcChars</TT> is not modified.
  2052. * @param srcChars the source for the new characters
  2053. * @param srcStart the offset into <TT>srcChars</TT> where new characters
  2054. * will be obtained
  2055. * @param srcLength the number of characters in <TT>srcChars</TT> in
  2056. * the append string; can be -1 if <TT>srcChars</TT> is NUL-terminated
  2057. * @return a reference to this
  2058. * @stable ICU 2.0
  2059. */
  2060. inline UnicodeString& append(const UChar *srcChars,
  2061. int32_t srcStart,
  2062. int32_t srcLength);
  2063. /**
  2064. * Append the characters in <TT>srcChars</TT> to the UnicodeString object
  2065. * at offset <TT>start</TT>. <TT>srcChars</TT> is not modified.
  2066. * @param srcChars the source for the new characters
  2067. * @param srcLength the number of Unicode characters in <TT>srcChars</TT>;
  2068. * can be -1 if <TT>srcChars</TT> is NUL-terminated
  2069. * @return a reference to this
  2070. * @stable ICU 2.0
  2071. */
  2072. inline UnicodeString& append(const UChar *srcChars,
  2073. int32_t srcLength);
  2074. /**
  2075. * Append the code unit <TT>srcChar</TT> to the UnicodeString object.
  2076. * @param srcChar the code unit to append
  2077. * @return a reference to this
  2078. * @stable ICU 2.0
  2079. */
  2080. inline UnicodeString& append(UChar srcChar);
  2081. /**
  2082. * Append the code point <TT>srcChar</TT> to the UnicodeString object.
  2083. * @param srcChar the code point to append
  2084. * @return a reference to this
  2085. * @stable ICU 2.0
  2086. */
  2087. UnicodeString& append(UChar32 srcChar);
  2088. /* Insert operations */
  2089. /**
  2090. * Insert the characters in <TT>srcText</TT> in the range
  2091. * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) into the UnicodeString
  2092. * object at offset <TT>start</TT>. <TT>srcText</TT> is not modified.
  2093. * @param start the offset where the insertion begins
  2094. * @param srcText the source for the new characters
  2095. * @param srcStart the offset into <TT>srcText</TT> where new characters
  2096. * will be obtained
  2097. * @param srcLength the number of characters in <TT>srcText</TT> in
  2098. * the insert string
  2099. * @return a reference to this
  2100. * @stable ICU 2.0
  2101. */
  2102. inline UnicodeString& insert(int32_t start,
  2103. const UnicodeString& srcText,
  2104. int32_t srcStart,
  2105. int32_t srcLength);
  2106. /**
  2107. * Insert the characters in <TT>srcText</TT> into the UnicodeString object
  2108. * at offset <TT>start</TT>. <TT>srcText</TT> is not modified.
  2109. * @param start the offset where the insertion begins
  2110. * @param srcText the source for the new characters
  2111. * @return a reference to this
  2112. * @stable ICU 2.0
  2113. */
  2114. inline UnicodeString& insert(int32_t start,
  2115. const UnicodeString& srcText);
  2116. /**
  2117. * Insert the characters in <TT>srcChars</TT> in the range
  2118. * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) into the UnicodeString
  2119. * object at offset <TT>start</TT>. <TT>srcChars</TT> is not modified.
  2120. * @param start the offset at which the insertion begins
  2121. * @param srcChars the source for the new characters
  2122. * @param srcStart the offset into <TT>srcChars</TT> where new characters
  2123. * will be obtained
  2124. * @param srcLength the number of characters in <TT>srcChars</TT>
  2125. * in the insert string
  2126. * @return a reference to this
  2127. * @stable ICU 2.0
  2128. */
  2129. inline UnicodeString& insert(int32_t start,
  2130. const UChar *srcChars,
  2131. int32_t srcStart,
  2132. int32_t srcLength);
  2133. /**
  2134. * Insert the characters in <TT>srcChars</TT> into the UnicodeString object
  2135. * at offset <TT>start</TT>. <TT>srcChars</TT> is not modified.
  2136. * @param start the offset where the insertion begins
  2137. * @param srcChars the source for the new characters
  2138. * @param srcLength the number of Unicode characters in srcChars.
  2139. * @return a reference to this
  2140. * @stable ICU 2.0
  2141. */
  2142. inline UnicodeString& insert(int32_t start,
  2143. const UChar *srcChars,
  2144. int32_t srcLength);
  2145. /**
  2146. * Insert the code unit <TT>srcChar</TT> into the UnicodeString object at
  2147. * offset <TT>start</TT>.
  2148. * @param start the offset at which the insertion occurs
  2149. * @param srcChar the code unit to insert
  2150. * @return a reference to this
  2151. * @stable ICU 2.0
  2152. */
  2153. inline UnicodeString& insert(int32_t start,
  2154. UChar srcChar);
  2155. /**
  2156. * Insert the code point <TT>srcChar</TT> into the UnicodeString object at
  2157. * offset <TT>start</TT>.
  2158. * @param start the offset at which the insertion occurs
  2159. * @param srcChar the code point to insert
  2160. * @return a reference to this
  2161. * @stable ICU 2.0
  2162. */
  2163. inline UnicodeString& insert(int32_t start,
  2164. UChar32 srcChar);
  2165. /* Replace operations */
  2166. /**
  2167. * Replace the characters in the range
  2168. * [<TT>start</TT>, <TT>start + length</TT>) with the characters in
  2169. * <TT>srcText</TT> in the range
  2170. * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
  2171. * <TT>srcText</TT> is not modified.
  2172. * @param start the offset at which the replace operation begins
  2173. * @param length the number of characters to replace. The character at
  2174. * <TT>start + length</TT> is not modified.
  2175. * @param srcText the source for the new characters
  2176. * @param srcStart the offset into <TT>srcText</TT> where new characters
  2177. * will be obtained
  2178. * @param srcLength the number of characters in <TT>srcText</TT> in
  2179. * the replace string
  2180. * @return a reference to this
  2181. * @stable ICU 2.0
  2182. */
  2183. UnicodeString& replace(int32_t start,
  2184. int32_t length,
  2185. const UnicodeString& srcText,
  2186. int32_t srcStart,
  2187. int32_t srcLength);
  2188. /**
  2189. * Replace the characters in the range
  2190. * [<TT>start</TT>, <TT>start + length</TT>)
  2191. * with the characters in <TT>srcText</TT>. <TT>srcText</TT> is
  2192. * not modified.
  2193. * @param start the offset at which the replace operation begins
  2194. * @param length the number of characters to replace. The character at
  2195. * <TT>start + length</TT> is not modified.
  2196. * @param srcText the source for the new characters
  2197. * @return a reference to this
  2198. * @stable ICU 2.0
  2199. */
  2200. UnicodeString& replace(int32_t start,
  2201. int32_t length,
  2202. const UnicodeString& srcText);
  2203. /**
  2204. * Replace the characters in the range
  2205. * [<TT>start</TT>, <TT>start + length</TT>) with the characters in
  2206. * <TT>srcChars</TT> in the range
  2207. * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). <TT>srcChars</TT>
  2208. * is not modified.
  2209. * @param start the offset at which the replace operation begins
  2210. * @param length the number of characters to replace. The character at
  2211. * <TT>start + length</TT> is not modified.
  2212. * @param srcChars the source for the new characters
  2213. * @param srcStart the offset into <TT>srcChars</TT> where new characters
  2214. * will be obtained
  2215. * @param srcLength the number of characters in <TT>srcChars</TT>
  2216. * in the replace string
  2217. * @return a reference to this
  2218. * @stable ICU 2.0
  2219. */
  2220. UnicodeString& replace(int32_t start,
  2221. int32_t length,
  2222. const UChar *srcChars,
  2223. int32_t srcStart,
  2224. int32_t srcLength);
  2225. /**
  2226. * Replace the characters in the range
  2227. * [<TT>start</TT>, <TT>start + length</TT>) with the characters in
  2228. * <TT>srcChars</TT>. <TT>srcChars</TT> is not modified.
  2229. * @param start the offset at which the replace operation begins
  2230. * @param length number of characters to replace. The character at
  2231. * <TT>start + length</TT> is not modified.
  2232. * @param srcChars the source for the new characters
  2233. * @param srcLength the number of Unicode characters in srcChars
  2234. * @return a reference to this
  2235. * @stable ICU 2.0
  2236. */
  2237. inline UnicodeString& replace(int32_t start,
  2238. int32_t length,
  2239. const UChar *srcChars,
  2240. int32_t srcLength);
  2241. /**
  2242. * Replace the characters in the range
  2243. * [<TT>start</TT>, <TT>start + length</TT>) with the code unit
  2244. * <TT>srcChar</TT>.
  2245. * @param start the offset at which the replace operation begins
  2246. * @param length the number of characters to replace. The character at
  2247. * <TT>start + length</TT> is not modified.
  2248. * @param srcChar the new code unit
  2249. * @return a reference to this
  2250. * @stable ICU 2.0
  2251. */
  2252. inline UnicodeString& replace(int32_t start,
  2253. int32_t length,
  2254. UChar srcChar);
  2255. /**
  2256. * Replace the characters in the range
  2257. * [<TT>start</TT>, <TT>start + length</TT>) with the code point
  2258. * <TT>srcChar</TT>.
  2259. * @param start the offset at which the replace operation begins
  2260. * @param length the number of characters to replace. The character at
  2261. * <TT>start + length</TT> is not modified.
  2262. * @param srcChar the new code point
  2263. * @return a reference to this
  2264. * @stable ICU 2.0
  2265. */
  2266. UnicodeString& replace(int32_t start, int32_t length, UChar32 srcChar);
  2267. /**
  2268. * Replace the characters in the range [<TT>start</TT>, <TT>limit</TT>)
  2269. * with the characters in <TT>srcText</TT>. <TT>srcText</TT> is not modified.
  2270. * @param start the offset at which the replace operation begins
  2271. * @param limit the offset immediately following the replace range
  2272. * @param srcText the source for the new characters
  2273. * @return a reference to this
  2274. * @stable ICU 2.0
  2275. */
  2276. inline UnicodeString& replaceBetween(int32_t start,
  2277. int32_t limit,
  2278. const UnicodeString& srcText);
  2279. /**
  2280. * Replace the characters in the range [<TT>start</TT>, <TT>limit</TT>)
  2281. * with the characters in <TT>srcText</TT> in the range
  2282. * [<TT>srcStart</TT>, <TT>srcLimit</TT>). <TT>srcText</TT> is not modified.
  2283. * @param start the offset at which the replace operation begins
  2284. * @param limit the offset immediately following the replace range
  2285. * @param srcText the source for the new characters
  2286. * @param srcStart the offset into <TT>srcChars</TT> where new characters
  2287. * will be obtained
  2288. * @param srcLimit the offset immediately following the range to copy
  2289. * in <TT>srcText</TT>
  2290. * @return a reference to this
  2291. * @stable ICU 2.0
  2292. */
  2293. inline UnicodeString& replaceBetween(int32_t start,
  2294. int32_t limit,
  2295. const UnicodeString& srcText,
  2296. int32_t srcStart,
  2297. int32_t srcLimit);
  2298. /**
  2299. * Replace a substring of this object with the given text.
  2300. * @param start the beginning index, inclusive; <code>0 <= start
  2301. * <= limit</code>.
  2302. * @param limit the ending index, exclusive; <code>start <= limit
  2303. * <= length()</code>.
  2304. * @param text the text to replace characters <code>start</code>
  2305. * to <code>limit - 1</code>
  2306. * @stable ICU 2.0
  2307. */
  2308. virtual void handleReplaceBetween(int32_t start,
  2309. int32_t limit,
  2310. const UnicodeString& text);
  2311. /**
  2312. * Replaceable API
  2313. * @return TRUE if it has MetaData
  2314. * @stable ICU 2.4
  2315. */
  2316. virtual UBool hasMetaData() const;
  2317. /**
  2318. * Copy a substring of this object, retaining attribute (out-of-band)
  2319. * information. This method is used to duplicate or reorder substrings.
  2320. * The destination index must not overlap the source range.
  2321. *
  2322. * @param start the beginning index, inclusive; <code>0 <= start <=
  2323. * limit</code>.
  2324. * @param limit the ending index, exclusive; <code>start <= limit <=
  2325. * length()</code>.
  2326. * @param dest the destination index. The characters from
  2327. * <code>start..limit-1</code> will be copied to <code>dest</code>.
  2328. * Implementations of this method may assume that <code>dest <= start ||
  2329. * dest >= limit</code>.
  2330. * @stable ICU 2.0
  2331. */
  2332. virtual void copy(int32_t start, int32_t limit, int32_t dest);
  2333. /* Search and replace operations */
  2334. /**
  2335. * Replace all occurrences of characters in oldText with the characters
  2336. * in newText
  2337. * @param oldText the text containing the search text
  2338. * @param newText the text containing the replacement text
  2339. * @return a reference to this
  2340. * @stable ICU 2.0
  2341. */
  2342. inline UnicodeString& findAndReplace(const UnicodeString& oldText,
  2343. const UnicodeString& newText);
  2344. /**
  2345. * Replace all occurrences of characters in oldText with characters
  2346. * in newText
  2347. * in the range [<TT>start</TT>, <TT>start + length</TT>).
  2348. * @param start the start of the range in which replace will performed
  2349. * @param length the length of the range in which replace will be performed
  2350. * @param oldText the text containing the search text
  2351. * @param newText the text containing the replacement text
  2352. * @return a reference to this
  2353. * @stable ICU 2.0
  2354. */
  2355. inline UnicodeString& findAndReplace(int32_t start,
  2356. int32_t length,
  2357. const UnicodeString& oldText,
  2358. const UnicodeString& newText);
  2359. /**
  2360. * Replace all occurrences of characters in oldText in the range
  2361. * [<TT>oldStart</TT>, <TT>oldStart + oldLength</TT>) with the characters
  2362. * in newText in the range
  2363. * [<TT>newStart</TT>, <TT>newStart + newLength</TT>)
  2364. * in the range [<TT>start</TT>, <TT>start + length</TT>).
  2365. * @param start the start of the range in which replace will performed
  2366. * @param length the length of the range in which replace will be performed
  2367. * @param oldText the text containing the search text
  2368. * @param oldStart the start of the search range in <TT>oldText</TT>
  2369. * @param oldLength the length of the search range in <TT>oldText</TT>
  2370. * @param newText the text containing the replacement text
  2371. * @param newStart the start of the replacement range in <TT>newText</TT>
  2372. * @param newLength the length of the replacement range in <TT>newText</TT>
  2373. * @return a reference to this
  2374. * @stable ICU 2.0
  2375. */
  2376. UnicodeString& findAndReplace(int32_t start,
  2377. int32_t length,
  2378. const UnicodeString& oldText,
  2379. int32_t oldStart,
  2380. int32_t oldLength,
  2381. const UnicodeString& newText,
  2382. int32_t newStart,
  2383. int32_t newLength);
  2384. /* Remove operations */
  2385. /**
  2386. * Remove all characters from the UnicodeString object.
  2387. * @return a reference to this
  2388. * @stable ICU 2.0
  2389. */
  2390. inline UnicodeString& remove(void);
  2391. /**
  2392. * Remove the characters in the range
  2393. * [<TT>start</TT>, <TT>start + length</TT>) from the UnicodeString object.
  2394. * @param start the offset of the first character to remove
  2395. * @param length the number of characters to remove
  2396. * @return a reference to this
  2397. * @stable ICU 2.0
  2398. */
  2399. inline UnicodeString& remove(int32_t start,
  2400. int32_t length = (int32_t)INT32_MAX);
  2401. /**
  2402. * Remove the characters in the range
  2403. * [<TT>start</TT>, <TT>limit</TT>) from the UnicodeString object.
  2404. * @param start the offset of the first character to remove
  2405. * @param limit the offset immediately following the range to remove
  2406. * @return a reference to this
  2407. * @stable ICU 2.0
  2408. */
  2409. inline UnicodeString& removeBetween(int32_t start,
  2410. int32_t limit = (int32_t)INT32_MAX);
  2411. /**
  2412. * Retain only the characters in the range
  2413. * [<code>start</code>, <code>limit</code>) from the UnicodeString object.
  2414. * Removes characters before <code>start</code> and at and after <code>limit</code>.
  2415. * @param start the offset of the first character to retain
  2416. * @param limit the offset immediately following the range to retain
  2417. * @return a reference to this
  2418. * @stable ICU 4.4
  2419. */
  2420. inline UnicodeString &retainBetween(int32_t start, int32_t limit = INT32_MAX);
  2421. /* Length operations */
  2422. /**
  2423. * Pad the start of this UnicodeString with the character <TT>padChar</TT>.
  2424. * If the length of this UnicodeString is less than targetLength,
  2425. * length() - targetLength copies of padChar will be added to the
  2426. * beginning of this UnicodeString.
  2427. * @param targetLength the desired length of the string
  2428. * @param padChar the character to use for padding. Defaults to
  2429. * space (U+0020)
  2430. * @return TRUE if the text was padded, FALSE otherwise.
  2431. * @stable ICU 2.0
  2432. */
  2433. UBool padLeading(int32_t targetLength,
  2434. UChar padChar = 0x0020);
  2435. /**
  2436. * Pad the end of this UnicodeString with the character <TT>padChar</TT>.
  2437. * If the length of this UnicodeString is less than targetLength,
  2438. * length() - targetLength copies of padChar will be added to the
  2439. * end of this UnicodeString.
  2440. * @param targetLength the desired length of the string
  2441. * @param padChar the character to use for padding. Defaults to
  2442. * space (U+0020)
  2443. * @return TRUE if the text was padded, FALSE otherwise.
  2444. * @stable ICU 2.0
  2445. */
  2446. UBool padTrailing(int32_t targetLength,
  2447. UChar padChar = 0x0020);
  2448. /**
  2449. * Truncate this UnicodeString to the <TT>targetLength</TT>.
  2450. * @param targetLength the desired length of this UnicodeString.
  2451. * @return TRUE if the text was truncated, FALSE otherwise
  2452. * @stable ICU 2.0
  2453. */
  2454. inline UBool truncate(int32_t targetLength);
  2455. /**
  2456. * Trims leading and trailing whitespace from this UnicodeString.
  2457. * @return a reference to this
  2458. * @stable ICU 2.0
  2459. */
  2460. UnicodeString& trim(void);
  2461. /* Miscellaneous operations */
  2462. /**
  2463. * Reverse this UnicodeString in place.
  2464. * @return a reference to this
  2465. * @stable ICU 2.0
  2466. */
  2467. inline UnicodeString& reverse(void);
  2468. /**
  2469. * Reverse the range [<TT>start</TT>, <TT>start + length</TT>) in
  2470. * this UnicodeString.
  2471. * @param start the start of the range to reverse
  2472. * @param length the number of characters to to reverse
  2473. * @return a reference to this
  2474. * @stable ICU 2.0
  2475. */
  2476. inline UnicodeString& reverse(int32_t start,
  2477. int32_t length);
  2478. /**
  2479. * Convert the characters in this to UPPER CASE following the conventions of
  2480. * the default locale.
  2481. * @return A reference to this.
  2482. * @stable ICU 2.0
  2483. */
  2484. UnicodeString& toUpper(void);
  2485. /**
  2486. * Convert the characters in this to UPPER CASE following the conventions of
  2487. * a specific locale.
  2488. * @param locale The locale containing the conventions to use.
  2489. * @return A reference to this.
  2490. * @stable ICU 2.0
  2491. */
  2492. UnicodeString& toUpper(const Locale& locale);
  2493. /**
  2494. * Convert the characters in this to lower case following the conventions of
  2495. * the default locale.
  2496. * @return A reference to this.
  2497. * @stable ICU 2.0
  2498. */
  2499. UnicodeString& toLower(void);
  2500. /**
  2501. * Convert the characters in this to lower case following the conventions of
  2502. * a specific locale.
  2503. * @param locale The locale containing the conventions to use.
  2504. * @return A reference to this.
  2505. * @stable ICU 2.0
  2506. */
  2507. UnicodeString& toLower(const Locale& locale);
  2508. #if !UCONFIG_NO_BREAK_ITERATION
  2509. /**
  2510. * Titlecase this string, convenience function using the default locale.
  2511. *
  2512. * Casing is locale-dependent and context-sensitive.
  2513. * Titlecasing uses a break iterator to find the first characters of words
  2514. * that are to be titlecased. It titlecases those characters and lowercases
  2515. * all others.
  2516. *
  2517. * The titlecase break iterator can be provided to customize for arbitrary
  2518. * styles, using rules and dictionaries beyond the standard iterators.
  2519. * It may be more efficient to always provide an iterator to avoid
  2520. * opening and closing one for each string.
  2521. * The standard titlecase iterator for the root locale implements the
  2522. * algorithm of Unicode TR 21.
  2523. *
  2524. * This function uses only the setText(), first() and next() methods of the
  2525. * provided break iterator.
  2526. *
  2527. * @param titleIter A break iterator to find the first characters of words
  2528. * that are to be titlecased.
  2529. * If none is provided (0), then a standard titlecase
  2530. * break iterator is opened.
  2531. * Otherwise the provided iterator is set to the string's text.
  2532. * @return A reference to this.
  2533. * @stable ICU 2.1
  2534. */
  2535. UnicodeString &toTitle(BreakIterator *titleIter);
  2536. /**
  2537. * Titlecase this string.
  2538. *
  2539. * Casing is locale-dependent and context-sensitive.
  2540. * Titlecasing uses a break iterator to find the first characters of words
  2541. * that are to be titlecased. It titlecases those characters and lowercases
  2542. * all others.
  2543. *
  2544. * The titlecase break iterator can be provided to customize for arbitrary
  2545. * styles, using rules and dictionaries beyond the standard iterators.
  2546. * It may be more efficient to always provide an iterator to avoid
  2547. * opening and closing one for each string.
  2548. * The standard titlecase iterator for the root locale implements the
  2549. * algorithm of Unicode TR 21.
  2550. *
  2551. * This function uses only the setText(), first() and next() methods of the
  2552. * provided break iterator.
  2553. *
  2554. * @param titleIter A break iterator to find the first characters of words
  2555. * that are to be titlecased.
  2556. * If none is provided (0), then a standard titlecase
  2557. * break iterator is opened.
  2558. * Otherwise the provided iterator is set to the string's text.
  2559. * @param locale The locale to consider.
  2560. * @return A reference to this.
  2561. * @stable ICU 2.1
  2562. */
  2563. UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale);
  2564. /**
  2565. * Titlecase this string, with options.
  2566. *
  2567. * Casing is locale-dependent and context-sensitive.
  2568. * Titlecasing uses a break iterator to find the first characters of words
  2569. * that are to be titlecased. It titlecases those characters and lowercases
  2570. * all others. (This can be modified with options.)
  2571. *
  2572. * The titlecase break iterator can be provided to customize for arbitrary
  2573. * styles, using rules and dictionaries beyond the standard iterators.
  2574. * It may be more efficient to always provide an iterator to avoid
  2575. * opening and closing one for each string.
  2576. * The standard titlecase iterator for the root locale implements the
  2577. * algorithm of Unicode TR 21.
  2578. *
  2579. * This function uses only the setText(), first() and next() methods of the
  2580. * provided break iterator.
  2581. *
  2582. * @param titleIter A break iterator to find the first characters of words
  2583. * that are to be titlecased.
  2584. * If none is provided (0), then a standard titlecase
  2585. * break iterator is opened.
  2586. * Otherwise the provided iterator is set to the string's text.
  2587. * @param locale The locale to consider.
  2588. * @param options Options bit set, see ucasemap_open().
  2589. * @return A reference to this.
  2590. * @see U_TITLECASE_NO_LOWERCASE
  2591. * @see U_TITLECASE_NO_BREAK_ADJUSTMENT
  2592. * @see ucasemap_open
  2593. * @stable ICU 3.8
  2594. */
  2595. UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale, uint32_t options);
  2596. #endif
  2597. /**
  2598. * Case-folds the characters in this string.
  2599. *
  2600. * Case-folding is locale-independent and not context-sensitive,
  2601. * but there is an option for whether to include or exclude mappings for dotted I
  2602. * and dotless i that are marked with 'T' in CaseFolding.txt.
  2603. *
  2604. * The result may be longer or shorter than the original.
  2605. *
  2606. * @param options Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I
  2607. * @return A reference to this.
  2608. * @stable ICU 2.0
  2609. */
  2610. UnicodeString &foldCase(uint32_t options=0 /*U_FOLD_CASE_DEFAULT*/);
  2611. //========================================
  2612. // Access to the internal buffer
  2613. //========================================
  2614. /**
  2615. * Get a read/write pointer to the internal buffer.
  2616. * The buffer is guaranteed to be large enough for at least minCapacity UChars,
  2617. * writable, and is still owned by the UnicodeString object.
  2618. * Calls to getBuffer(minCapacity) must not be nested, and
  2619. * must be matched with calls to releaseBuffer(newLength).
  2620. * If the string buffer was read-only or shared,
  2621. * then it will be reallocated and copied.
  2622. *
  2623. * An attempted nested call will return 0, and will not further modify the
  2624. * state of the UnicodeString object.
  2625. * It also returns 0 if the string is bogus.
  2626. *
  2627. * The actual capacity of the string buffer may be larger than minCapacity.
  2628. * getCapacity() returns the actual capacity.
  2629. * For many operations, the full capacity should be used to avoid reallocations.
  2630. *
  2631. * While the buffer is "open" between getBuffer(minCapacity)
  2632. * and releaseBuffer(newLength), the following applies:
  2633. * - The string length is set to 0.
  2634. * - Any read API call on the UnicodeString object will behave like on a 0-length string.
  2635. * - Any write API call on the UnicodeString object is disallowed and will have no effect.
  2636. * - You can read from and write to the returned buffer.
  2637. * - The previous string contents will still be in the buffer;
  2638. * if you want to use it, then you need to call length() before getBuffer(minCapacity).
  2639. * If the length() was greater than minCapacity, then any contents after minCapacity
  2640. * may be lost.
  2641. * The buffer contents is not NUL-terminated by getBuffer().
  2642. * If length()<getCapacity() then you can terminate it by writing a NUL
  2643. * at index length().
  2644. * - You must call releaseBuffer(newLength) before and in order to
  2645. * return to normal UnicodeString operation.
  2646. *
  2647. * @param minCapacity the minimum number of UChars that are to be available
  2648. * in the buffer, starting at the returned pointer;
  2649. * default to the current string capacity if minCapacity==-1
  2650. * @return a writable pointer to the internal string buffer,
  2651. * or 0 if an error occurs (nested calls, out of memory)
  2652. *
  2653. * @see releaseBuffer
  2654. * @see getTerminatedBuffer()
  2655. * @stable ICU 2.0
  2656. */
  2657. UChar *getBuffer(int32_t minCapacity);
  2658. /**
  2659. * Release a read/write buffer on a UnicodeString object with an
  2660. * "open" getBuffer(minCapacity).
  2661. * This function must be called in a matched pair with getBuffer(minCapacity).
  2662. * releaseBuffer(newLength) must be called if and only if a getBuffer(minCapacity) is "open".
  2663. *
  2664. * It will set the string length to newLength, at most to the current capacity.
  2665. * If newLength==-1 then it will set the length according to the
  2666. * first NUL in the buffer, or to the capacity if there is no NUL.
  2667. *
  2668. * After calling releaseBuffer(newLength) the UnicodeString is back to normal operation.
  2669. *
  2670. * @param newLength the new length of the UnicodeString object;
  2671. * defaults to the current capacity if newLength is greater than that;
  2672. * if newLength==-1, it defaults to u_strlen(buffer) but not more than
  2673. * the current capacity of the string
  2674. *
  2675. * @see getBuffer(int32_t minCapacity)
  2676. * @stable ICU 2.0
  2677. */
  2678. void releaseBuffer(int32_t newLength=-1);
  2679. /**
  2680. * Get a read-only pointer to the internal buffer.
  2681. * This can be called at any time on a valid UnicodeString.
  2682. *
  2683. * It returns 0 if the string is bogus, or
  2684. * during an "open" getBuffer(minCapacity).
  2685. *
  2686. * It can be called as many times as desired.
  2687. * The pointer that it returns will remain valid until the UnicodeString object is modified,
  2688. * at which time the pointer is semantically invalidated and must not be used any more.
  2689. *
  2690. * The capacity of the buffer can be determined with getCapacity().
  2691. * The part after length() may or may not be initialized and valid,
  2692. * depending on the history of the UnicodeString object.
  2693. *
  2694. * The buffer contents is (probably) not NUL-terminated.
  2695. * You can check if it is with
  2696. * <code>(s.length()<s.getCapacity() && buffer[s.length()]==0)</code>.
  2697. * (See getTerminatedBuffer().)
  2698. *
  2699. * The buffer may reside in read-only memory. Its contents must not
  2700. * be modified.
  2701. *
  2702. * @return a read-only pointer to the internal string buffer,
  2703. * or 0 if the string is empty or bogus
  2704. *
  2705. * @see getBuffer(int32_t minCapacity)
  2706. * @see getTerminatedBuffer()
  2707. * @stable ICU 2.0
  2708. */
  2709. inline const UChar *getBuffer() const;
  2710. /**
  2711. * Get a read-only pointer to the internal buffer,
  2712. * making sure that it is NUL-terminated.
  2713. * This can be called at any time on a valid UnicodeString.
  2714. *
  2715. * It returns 0 if the string is bogus, or
  2716. * during an "open" getBuffer(minCapacity), or if the buffer cannot
  2717. * be NUL-terminated (because memory allocation failed).
  2718. *
  2719. * It can be called as many times as desired.
  2720. * The pointer that it returns will remain valid until the UnicodeString object is modified,
  2721. * at which time the pointer is semantically invalidated and must not be used any more.
  2722. *
  2723. * The capacity of the buffer can be determined with getCapacity().
  2724. * The part after length()+1 may or may not be initialized and valid,
  2725. * depending on the history of the UnicodeString object.
  2726. *
  2727. * The buffer contents is guaranteed to be NUL-terminated.
  2728. * getTerminatedBuffer() may reallocate the buffer if a terminating NUL
  2729. * is written.
  2730. * For this reason, this function is not const, unlike getBuffer().
  2731. * Note that a UnicodeString may also contain NUL characters as part of its contents.
  2732. *
  2733. * The buffer may reside in read-only memory. Its contents must not
  2734. * be modified.
  2735. *
  2736. * @return a read-only pointer to the internal string buffer,
  2737. * or 0 if the string is empty or bogus
  2738. *
  2739. * @see getBuffer(int32_t minCapacity)
  2740. * @see getBuffer()
  2741. * @stable ICU 2.2
  2742. */
  2743. const UChar *getTerminatedBuffer();
  2744. //========================================
  2745. // Constructors
  2746. //========================================
  2747. /** Construct an empty UnicodeString.
  2748. * @stable ICU 2.0
  2749. */
  2750. inline UnicodeString();
  2751. /**
  2752. * Construct a UnicodeString with capacity to hold <TT>capacity</TT> UChars
  2753. * @param capacity the number of UChars this UnicodeString should hold
  2754. * before a resize is necessary; if count is greater than 0 and count
  2755. * code points c take up more space than capacity, then capacity is adjusted
  2756. * accordingly.
  2757. * @param c is used to initially fill the string
  2758. * @param count specifies how many code points c are to be written in the
  2759. * string
  2760. * @stable ICU 2.0
  2761. */
  2762. UnicodeString(int32_t capacity, UChar32 c, int32_t count);
  2763. /**
  2764. * Single UChar (code unit) constructor.
  2765. *
  2766. * It is recommended to mark this constructor "explicit" by
  2767. * <code>-DUNISTR_FROM_CHAR_EXPLICIT=explicit</code>
  2768. * on the compiler command line or similar.
  2769. * @param ch the character to place in the UnicodeString
  2770. * @stable ICU 2.0
  2771. */
  2772. UNISTR_FROM_CHAR_EXPLICIT UnicodeString(UChar ch);
  2773. /**
  2774. * Single UChar32 (code point) constructor.
  2775. *
  2776. * It is recommended to mark this constructor "explicit" by
  2777. * <code>-DUNISTR_FROM_CHAR_EXPLICIT=explicit</code>
  2778. * on the compiler command line or similar.
  2779. * @param ch the character to place in the UnicodeString
  2780. * @stable ICU 2.0
  2781. */
  2782. UNISTR_FROM_CHAR_EXPLICIT UnicodeString(UChar32 ch);
  2783. /**
  2784. * UChar* constructor.
  2785. *
  2786. * It is recommended to mark this constructor "explicit" by
  2787. * <code>-DUNISTR_FROM_STRING_EXPLICIT=explicit</code>
  2788. * on the compiler command line or similar.
  2789. * @param text The characters to place in the UnicodeString. <TT>text</TT>
  2790. * must be NULL (U+0000) terminated.
  2791. * @stable ICU 2.0
  2792. */
  2793. UNISTR_FROM_STRING_EXPLICIT UnicodeString(const UChar *text);
  2794. /**
  2795. * UChar* constructor.
  2796. * @param text The characters to place in the UnicodeString.
  2797. * @param textLength The number of Unicode characters in <TT>text</TT>
  2798. * to copy.
  2799. * @stable ICU 2.0
  2800. */
  2801. UnicodeString(const UChar *text,
  2802. int32_t textLength);
  2803. /**
  2804. * Readonly-aliasing UChar* constructor.
  2805. * The text will be used for the UnicodeString object, but
  2806. * it will not be released when the UnicodeString is destroyed.
  2807. * This has copy-on-write semantics:
  2808. * When the string is modified, then the buffer is first copied into
  2809. * newly allocated memory.
  2810. * The aliased buffer is never modified.
  2811. *
  2812. * In an assignment to another UnicodeString, when using the copy constructor
  2813. * or the assignment operator, the text will be copied.
  2814. * When using fastCopyFrom(), the text will be aliased again,
  2815. * so that both strings then alias the same readonly-text.
  2816. *
  2817. * @param isTerminated specifies if <code>text</code> is <code>NUL</code>-terminated.
  2818. * This must be true if <code>textLength==-1</code>.
  2819. * @param text The characters to alias for the UnicodeString.
  2820. * @param textLength The number of Unicode characters in <code>text</code> to alias.
  2821. * If -1, then this constructor will determine the length
  2822. * by calling <code>u_strlen()</code>.
  2823. * @stable ICU 2.0
  2824. */
  2825. UnicodeString(UBool isTerminated,
  2826. const UChar *text,
  2827. int32_t textLength);
  2828. /**
  2829. * Writable-aliasing UChar* constructor.
  2830. * The text will be used for the UnicodeString object, but
  2831. * it will not be released when the UnicodeString is destroyed.
  2832. * This has write-through semantics:
  2833. * For as long as the capacity of the buffer is sufficient, write operations
  2834. * will directly affect the buffer. When more capacity is necessary, then
  2835. * a new buffer will be allocated and the contents copied as with regularly
  2836. * constructed strings.
  2837. * In an assignment to another UnicodeString, the buffer will be copied.
  2838. * The extract(UChar *dst) function detects whether the dst pointer is the same
  2839. * as the string buffer itself and will in this case not copy the contents.
  2840. *
  2841. * @param buffer The characters to alias for the UnicodeString.
  2842. * @param buffLength The number of Unicode characters in <code>buffer</code> to alias.
  2843. * @param buffCapacity The size of <code>buffer</code> in UChars.
  2844. * @stable ICU 2.0
  2845. */
  2846. UnicodeString(UChar *buffer, int32_t buffLength, int32_t buffCapacity);
  2847. #if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION
  2848. /**
  2849. * char* constructor.
  2850. * Uses the default converter (and thus depends on the ICU conversion code)
  2851. * unless U_CHARSET_IS_UTF8 is set to 1.
  2852. *
  2853. * For ASCII (really "invariant character") strings it is more efficient to use
  2854. * the constructor that takes a US_INV (for its enum EInvariant).
  2855. * For ASCII (invariant-character) string literals, see UNICODE_STRING and
  2856. * UNICODE_STRING_SIMPLE.
  2857. *
  2858. * It is recommended to mark this constructor "explicit" by
  2859. * <code>-DUNISTR_FROM_STRING_EXPLICIT=explicit</code>
  2860. * on the compiler command line or similar.
  2861. * @param codepageData an array of bytes, null-terminated,
  2862. * in the platform's default codepage.
  2863. * @stable ICU 2.0
  2864. * @see UNICODE_STRING
  2865. * @see UNICODE_STRING_SIMPLE
  2866. */
  2867. UNISTR_FROM_STRING_EXPLICIT UnicodeString(const char *codepageData);
  2868. /**
  2869. * char* constructor.
  2870. * Uses the default converter (and thus depends on the ICU conversion code)
  2871. * unless U_CHARSET_IS_UTF8 is set to 1.
  2872. * @param codepageData an array of bytes in the platform's default codepage.
  2873. * @param dataLength The number of bytes in <TT>codepageData</TT>.
  2874. * @stable ICU 2.0
  2875. */
  2876. UnicodeString(const char *codepageData, int32_t dataLength);
  2877. #endif
  2878. #if !UCONFIG_NO_CONVERSION
  2879. /**
  2880. * char* constructor.
  2881. * @param codepageData an array of bytes, null-terminated
  2882. * @param codepage the encoding of <TT>codepageData</TT>. The special
  2883. * value 0 for <TT>codepage</TT> indicates that the text is in the
  2884. * platform's default codepage.
  2885. *
  2886. * If <code>codepage</code> is an empty string (<code>""</code>),
  2887. * then a simple conversion is performed on the codepage-invariant
  2888. * subset ("invariant characters") of the platform encoding. See utypes.h.
  2889. * Recommendation: For invariant-character strings use the constructor
  2890. * UnicodeString(const char *src, int32_t length, enum EInvariant inv)
  2891. * because it avoids object code dependencies of UnicodeString on
  2892. * the conversion code.
  2893. *
  2894. * @stable ICU 2.0
  2895. */
  2896. UnicodeString(const char *codepageData, const char *codepage);
  2897. /**
  2898. * char* constructor.
  2899. * @param codepageData an array of bytes.
  2900. * @param dataLength The number of bytes in <TT>codepageData</TT>.
  2901. * @param codepage the encoding of <TT>codepageData</TT>. The special
  2902. * value 0 for <TT>codepage</TT> indicates that the text is in the
  2903. * platform's default codepage.
  2904. * If <code>codepage</code> is an empty string (<code>""</code>),
  2905. * then a simple conversion is performed on the codepage-invariant
  2906. * subset ("invariant characters") of the platform encoding. See utypes.h.
  2907. * Recommendation: For invariant-character strings use the constructor
  2908. * UnicodeString(const char *src, int32_t length, enum EInvariant inv)
  2909. * because it avoids object code dependencies of UnicodeString on
  2910. * the conversion code.
  2911. *
  2912. * @stable ICU 2.0
  2913. */
  2914. UnicodeString(const char *codepageData, int32_t dataLength, const char *codepage);
  2915. /**
  2916. * char * / UConverter constructor.
  2917. * This constructor uses an existing UConverter object to
  2918. * convert the codepage string to Unicode and construct a UnicodeString
  2919. * from that.
  2920. *
  2921. * The converter is reset at first.
  2922. * If the error code indicates a failure before this constructor is called,
  2923. * or if an error occurs during conversion or construction,
  2924. * then the string will be bogus.
  2925. *
  2926. * This function avoids the overhead of opening and closing a converter if
  2927. * multiple strings are constructed.
  2928. *
  2929. * @param src input codepage string
  2930. * @param srcLength length of the input string, can be -1 for NUL-terminated strings
  2931. * @param cnv converter object (ucnv_resetToUnicode() will be called),
  2932. * can be NULL for the default converter
  2933. * @param errorCode normal ICU error code
  2934. * @stable ICU 2.0
  2935. */
  2936. UnicodeString(
  2937. const char *src, int32_t srcLength,
  2938. UConverter *cnv,
  2939. UErrorCode &errorCode);
  2940. #endif
  2941. /**
  2942. * Constructs a Unicode string from an invariant-character char * string.
  2943. * About invariant characters see utypes.h.
  2944. * This constructor has no runtime dependency on conversion code and is
  2945. * therefore recommended over ones taking a charset name string
  2946. * (where the empty string "" indicates invariant-character conversion).
  2947. *
  2948. * Use the macro US_INV as the third, signature-distinguishing parameter.
  2949. *
  2950. * For example:
  2951. * \code
  2952. * void fn(const char *s) {
  2953. * UnicodeString ustr(s, -1, US_INV);
  2954. * // use ustr ...
  2955. * }
  2956. * \endcode
  2957. *
  2958. * @param src String using only invariant characters.
  2959. * @param length Length of src, or -1 if NUL-terminated.
  2960. * @param inv Signature-distinguishing paramater, use US_INV.
  2961. *
  2962. * @see US_INV
  2963. * @stable ICU 3.2
  2964. */
  2965. UnicodeString(const char *src, int32_t length, enum EInvariant inv);
  2966. /**
  2967. * Copy constructor.
  2968. *
  2969. * Starting with ICU 2.4, the assignment operator and the copy constructor
  2970. * allocate a new buffer and copy the buffer contents even for readonly aliases.
  2971. * By contrast, the fastCopyFrom() function implements the old,
  2972. * more efficient but less safe behavior
  2973. * of making this string also a readonly alias to the same buffer.
  2974. *
  2975. * If the source object has an "open" buffer from getBuffer(minCapacity),
  2976. * then the copy is an empty string.
  2977. *
  2978. * @param that The UnicodeString object to copy.
  2979. * @stable ICU 2.0
  2980. * @see fastCopyFrom
  2981. */
  2982. UnicodeString(const UnicodeString& that);
  2983. #ifndef U_HIDE_DRAFT_API
  2984. #if U_HAVE_RVALUE_REFERENCES
  2985. /**
  2986. * Move constructor, might leave src in bogus state.
  2987. * This string will have the same contents and state that the source string had.
  2988. * @param src source string
  2989. * @draft ICU 56
  2990. */
  2991. UnicodeString(UnicodeString &&src) U_NOEXCEPT;
  2992. #endif
  2993. #endif /* U_HIDE_DRAFT_API */
  2994. /**
  2995. * 'Substring' constructor from tail of source string.
  2996. * @param src The UnicodeString object to copy.
  2997. * @param srcStart The offset into <tt>src</tt> at which to start copying.
  2998. * @stable ICU 2.2
  2999. */
  3000. UnicodeString(const UnicodeString& src, int32_t srcStart);
  3001. /**
  3002. * 'Substring' constructor from subrange of source string.
  3003. * @param src The UnicodeString object to copy.
  3004. * @param srcStart The offset into <tt>src</tt> at which to start copying.
  3005. * @param srcLength The number of characters from <tt>src</tt> to copy.
  3006. * @stable ICU 2.2
  3007. */
  3008. UnicodeString(const UnicodeString& src, int32_t srcStart, int32_t srcLength);
  3009. /**
  3010. * Clone this object, an instance of a subclass of Replaceable.
  3011. * Clones can be used concurrently in multiple threads.
  3012. * If a subclass does not implement clone(), or if an error occurs,
  3013. * then NULL is returned.
  3014. * The clone functions in all subclasses return a pointer to a Replaceable
  3015. * because some compilers do not support covariant (same-as-this)
  3016. * return types; cast to the appropriate subclass if necessary.
  3017. * The caller must delete the clone.
  3018. *
  3019. * @return a clone of this object
  3020. *
  3021. * @see Replaceable::clone
  3022. * @see getDynamicClassID
  3023. * @stable ICU 2.6
  3024. */
  3025. virtual Replaceable *clone() const;
  3026. /** Destructor.
  3027. * @stable ICU 2.0
  3028. */
  3029. virtual ~UnicodeString();
  3030. /**
  3031. * Create a UnicodeString from a UTF-8 string.
  3032. * Illegal input is replaced with U+FFFD. Otherwise, errors result in a bogus string.
  3033. * Calls u_strFromUTF8WithSub().
  3034. *
  3035. * @param utf8 UTF-8 input string.
  3036. * Note that a StringPiece can be implicitly constructed
  3037. * from a std::string or a NUL-terminated const char * string.
  3038. * @return A UnicodeString with equivalent UTF-16 contents.
  3039. * @see toUTF8
  3040. * @see toUTF8String
  3041. * @stable ICU 4.2
  3042. */
  3043. static UnicodeString fromUTF8(const StringPiece &utf8);
  3044. /**
  3045. * Create a UnicodeString from a UTF-32 string.
  3046. * Illegal input is replaced with U+FFFD. Otherwise, errors result in a bogus string.
  3047. * Calls u_strFromUTF32WithSub().
  3048. *
  3049. * @param utf32 UTF-32 input string. Must not be NULL.
  3050. * @param length Length of the input string, or -1 if NUL-terminated.
  3051. * @return A UnicodeString with equivalent UTF-16 contents.
  3052. * @see toUTF32
  3053. * @stable ICU 4.2
  3054. */
  3055. static UnicodeString fromUTF32(const UChar32 *utf32, int32_t length);
  3056. /* Miscellaneous operations */
  3057. /**
  3058. * Unescape a string of characters and return a string containing
  3059. * the result. The following escape sequences are recognized:
  3060. *
  3061. * \\uhhhh 4 hex digits; h in [0-9A-Fa-f]
  3062. * \\Uhhhhhhhh 8 hex digits
  3063. * \\xhh 1-2 hex digits
  3064. * \\ooo 1-3 octal digits; o in [0-7]
  3065. * \\cX control-X; X is masked with 0x1F
  3066. *
  3067. * as well as the standard ANSI C escapes:
  3068. *
  3069. * \\a => U+0007, \\b => U+0008, \\t => U+0009, \\n => U+000A,
  3070. * \\v => U+000B, \\f => U+000C, \\r => U+000D, \\e => U+001B,
  3071. * \\&quot; => U+0022, \\' => U+0027, \\? => U+003F, \\\\ => U+005C
  3072. *
  3073. * Anything else following a backslash is generically escaped. For
  3074. * example, "[a\\-z]" returns "[a-z]".
  3075. *
  3076. * If an escape sequence is ill-formed, this method returns an empty
  3077. * string. An example of an ill-formed sequence is "\\u" followed by
  3078. * fewer than 4 hex digits.
  3079. *
  3080. * This function is similar to u_unescape() but not identical to it.
  3081. * The latter takes a source char*, so it does escape recognition
  3082. * and also invariant conversion.
  3083. *
  3084. * @return a string with backslash escapes interpreted, or an
  3085. * empty string on error.
  3086. * @see UnicodeString#unescapeAt()
  3087. * @see u_unescape()
  3088. * @see u_unescapeAt()
  3089. * @stable ICU 2.0
  3090. */
  3091. UnicodeString unescape() const;
  3092. /**
  3093. * Unescape a single escape sequence and return the represented
  3094. * character. See unescape() for a listing of the recognized escape
  3095. * sequences. The character at offset-1 is assumed (without
  3096. * checking) to be a backslash. If the escape sequence is
  3097. * ill-formed, or the offset is out of range, U_SENTINEL=-1 is
  3098. * returned.
  3099. *
  3100. * @param offset an input output parameter. On input, it is the
  3101. * offset into this string where the escape sequence is located,
  3102. * after the initial backslash. On output, it is advanced after the
  3103. * last character parsed. On error, it is not advanced at all.
  3104. * @return the character represented by the escape sequence at
  3105. * offset, or U_SENTINEL=-1 on error.
  3106. * @see UnicodeString#unescape()
  3107. * @see u_unescape()
  3108. * @see u_unescapeAt()
  3109. * @stable ICU 2.0
  3110. */
  3111. UChar32 unescapeAt(int32_t &offset) const;
  3112. /**
  3113. * ICU "poor man's RTTI", returns a UClassID for this class.
  3114. *
  3115. * @stable ICU 2.2
  3116. */
  3117. static UClassID U_EXPORT2 getStaticClassID();
  3118. /**
  3119. * ICU "poor man's RTTI", returns a UClassID for the actual class.
  3120. *
  3121. * @stable ICU 2.2
  3122. */
  3123. virtual UClassID getDynamicClassID() const;
  3124. //========================================
  3125. // Implementation methods
  3126. //========================================
  3127. protected:
  3128. /**
  3129. * Implement Replaceable::getLength() (see jitterbug 1027).
  3130. * @stable ICU 2.4
  3131. */
  3132. virtual int32_t getLength() const;
  3133. /**
  3134. * The change in Replaceable to use virtual getCharAt() allows
  3135. * UnicodeString::charAt() to be inline again (see jitterbug 709).
  3136. * @stable ICU 2.4
  3137. */
  3138. virtual UChar getCharAt(int32_t offset) const;
  3139. /**
  3140. * The change in Replaceable to use virtual getChar32At() allows
  3141. * UnicodeString::char32At() to be inline again (see jitterbug 709).
  3142. * @stable ICU 2.4
  3143. */
  3144. virtual UChar32 getChar32At(int32_t offset) const;
  3145. private:
  3146. // For char* constructors. Could be made public.
  3147. UnicodeString &setToUTF8(const StringPiece &utf8);
  3148. // For extract(char*).
  3149. // We could make a toUTF8(target, capacity, errorCode) public but not
  3150. // this version: New API will be cleaner if we make callers create substrings
  3151. // rather than having start+length on every method,
  3152. // and it should take a UErrorCode&.
  3153. int32_t
  3154. toUTF8(int32_t start, int32_t len,
  3155. char *target, int32_t capacity) const;
  3156. /**
  3157. * Internal string contents comparison, called by operator==.
  3158. * Requires: this & text not bogus and have same lengths.
  3159. */
  3160. UBool doEquals(const UnicodeString &text, int32_t len) const;
  3161. inline int8_t
  3162. doCompare(int32_t start,
  3163. int32_t length,
  3164. const UnicodeString& srcText,
  3165. int32_t srcStart,
  3166. int32_t srcLength) const;
  3167. int8_t doCompare(int32_t start,
  3168. int32_t length,
  3169. const UChar *srcChars,
  3170. int32_t srcStart,
  3171. int32_t srcLength) const;
  3172. inline int8_t
  3173. doCompareCodePointOrder(int32_t start,
  3174. int32_t length,
  3175. const UnicodeString& srcText,
  3176. int32_t srcStart,
  3177. int32_t srcLength) const;
  3178. int8_t doCompareCodePointOrder(int32_t start,
  3179. int32_t length,
  3180. const UChar *srcChars,
  3181. int32_t srcStart,
  3182. int32_t srcLength) const;
  3183. inline int8_t
  3184. doCaseCompare(int32_t start,
  3185. int32_t length,
  3186. const UnicodeString &srcText,
  3187. int32_t srcStart,
  3188. int32_t srcLength,
  3189. uint32_t options) const;
  3190. int8_t
  3191. doCaseCompare(int32_t start,
  3192. int32_t length,
  3193. const UChar *srcChars,
  3194. int32_t srcStart,
  3195. int32_t srcLength,
  3196. uint32_t options) const;
  3197. int32_t doIndexOf(UChar c,
  3198. int32_t start,
  3199. int32_t length) const;
  3200. int32_t doIndexOf(UChar32 c,
  3201. int32_t start,
  3202. int32_t length) const;
  3203. int32_t doLastIndexOf(UChar c,
  3204. int32_t start,
  3205. int32_t length) const;
  3206. int32_t doLastIndexOf(UChar32 c,
  3207. int32_t start,
  3208. int32_t length) const;
  3209. void doExtract(int32_t start,
  3210. int32_t length,
  3211. UChar *dst,
  3212. int32_t dstStart) const;
  3213. inline void doExtract(int32_t start,
  3214. int32_t length,
  3215. UnicodeString& target) const;
  3216. inline UChar doCharAt(int32_t offset) const;
  3217. UnicodeString& doReplace(int32_t start,
  3218. int32_t length,
  3219. const UnicodeString& srcText,
  3220. int32_t srcStart,
  3221. int32_t srcLength);
  3222. UnicodeString& doReplace(int32_t start,
  3223. int32_t length,
  3224. const UChar *srcChars,
  3225. int32_t srcStart,
  3226. int32_t srcLength);
  3227. UnicodeString& doAppend(const UnicodeString& src, int32_t srcStart, int32_t srcLength);
  3228. UnicodeString& doAppend(const UChar *srcChars, int32_t srcStart, int32_t srcLength);
  3229. UnicodeString& doReverse(int32_t start,
  3230. int32_t length);
  3231. // calculate hash code
  3232. int32_t doHashCode(void) const;
  3233. // get pointer to start of array
  3234. // these do not check for kOpenGetBuffer, unlike the public getBuffer() function
  3235. inline UChar* getArrayStart(void);
  3236. inline const UChar* getArrayStart(void) const;
  3237. inline UBool hasShortLength() const;
  3238. inline int32_t getShortLength() const;
  3239. // A UnicodeString object (not necessarily its current buffer)
  3240. // is writable unless it isBogus() or it has an "open" getBuffer(minCapacity).
  3241. inline UBool isWritable() const;
  3242. // Is the current buffer writable?
  3243. inline UBool isBufferWritable() const;
  3244. // None of the following does releaseArray().
  3245. inline void setZeroLength();
  3246. inline void setShortLength(int32_t len);
  3247. inline void setLength(int32_t len);
  3248. inline void setToEmpty();
  3249. inline void setArray(UChar *array, int32_t len, int32_t capacity); // sets length but not flags
  3250. // allocate the array; result may be the stack buffer
  3251. // sets refCount to 1 if appropriate
  3252. // sets fArray, fCapacity, and flags
  3253. // sets length to 0
  3254. // returns boolean for success or failure
  3255. UBool allocate(int32_t capacity);
  3256. // release the array if owned
  3257. void releaseArray(void);
  3258. // turn a bogus string into an empty one
  3259. void unBogus();
  3260. // implements assigment operator, copy constructor, and fastCopyFrom()
  3261. UnicodeString &copyFrom(const UnicodeString &src, UBool fastCopy=FALSE);
  3262. // Copies just the fields without memory management.
  3263. void copyFieldsFrom(UnicodeString &src, UBool setSrcToBogus) U_NOEXCEPT;
  3264. // Pin start and limit to acceptable values.
  3265. inline void pinIndex(int32_t& start) const;
  3266. inline void pinIndices(int32_t& start,
  3267. int32_t& length) const;
  3268. #if !UCONFIG_NO_CONVERSION
  3269. /* Internal extract() using UConverter. */
  3270. int32_t doExtract(int32_t start, int32_t length,
  3271. char *dest, int32_t destCapacity,
  3272. UConverter *cnv,
  3273. UErrorCode &errorCode) const;
  3274. /*
  3275. * Real constructor for converting from codepage data.
  3276. * It assumes that it is called with !fRefCounted.
  3277. *
  3278. * If <code>codepage==0</code>, then the default converter
  3279. * is used for the platform encoding.
  3280. * If <code>codepage</code> is an empty string (<code>""</code>),
  3281. * then a simple conversion is performed on the codepage-invariant
  3282. * subset ("invariant characters") of the platform encoding. See utypes.h.
  3283. */
  3284. void doCodepageCreate(const char *codepageData,
  3285. int32_t dataLength,
  3286. const char *codepage);
  3287. /*
  3288. * Worker function for creating a UnicodeString from
  3289. * a codepage string using a UConverter.
  3290. */
  3291. void
  3292. doCodepageCreate(const char *codepageData,
  3293. int32_t dataLength,
  3294. UConverter *converter,
  3295. UErrorCode &status);
  3296. #endif
  3297. /*
  3298. * This function is called when write access to the array
  3299. * is necessary.
  3300. *
  3301. * We need to make a copy of the array if
  3302. * the buffer is read-only, or
  3303. * the buffer is refCounted (shared), and refCount>1, or
  3304. * the buffer is too small.
  3305. *
  3306. * Return FALSE if memory could not be allocated.
  3307. */
  3308. UBool cloneArrayIfNeeded(int32_t newCapacity = -1,
  3309. int32_t growCapacity = -1,
  3310. UBool doCopyArray = TRUE,
  3311. int32_t **pBufferToDelete = 0,
  3312. UBool forceClone = FALSE);
  3313. /**
  3314. * Common function for UnicodeString case mappings.
  3315. * The stringCaseMapper has the same type UStringCaseMapper
  3316. * as in ustr_imp.h for ustrcase_map().
  3317. */
  3318. UnicodeString &
  3319. caseMap(const UCaseMap *csm, UStringCaseMapper *stringCaseMapper);
  3320. // ref counting
  3321. void addRef(void);
  3322. int32_t removeRef(void);
  3323. int32_t refCount(void) const;
  3324. // constants
  3325. enum {
  3326. /**
  3327. * Size of stack buffer for short strings.
  3328. * Must be at least U16_MAX_LENGTH for the single-code point constructor to work.
  3329. * @see UNISTR_OBJECT_SIZE
  3330. */
  3331. US_STACKBUF_SIZE=(int32_t)(UNISTR_OBJECT_SIZE-sizeof(void *)-2)/U_SIZEOF_UCHAR,
  3332. kInvalidUChar=0xffff, // U+FFFF returned by charAt(invalid index)
  3333. kGrowSize=128, // grow size for this buffer
  3334. kInvalidHashCode=0, // invalid hash code
  3335. kEmptyHashCode=1, // hash code for empty string
  3336. // bit flag values for fLengthAndFlags
  3337. kIsBogus=1, // this string is bogus, i.e., not valid or NULL
  3338. kUsingStackBuffer=2,// using fUnion.fStackFields instead of fUnion.fFields
  3339. kRefCounted=4, // there is a refCount field before the characters in fArray
  3340. kBufferIsReadonly=8,// do not write to this buffer
  3341. kOpenGetBuffer=16, // getBuffer(minCapacity) was called (is "open"),
  3342. // and releaseBuffer(newLength) must be called
  3343. kAllStorageFlags=0x1f,
  3344. kLengthShift=5, // remaining 11 bits for non-negative short length, or negative if long
  3345. kLength1=1<<kLengthShift,
  3346. kMaxShortLength=0x3ff, // max non-negative short length (leaves top bit 0)
  3347. kLengthIsLarge=0xffe0, // short length < 0, real length is in fUnion.fFields.fLength
  3348. // combined values for convenience
  3349. kShortString=kUsingStackBuffer,
  3350. kLongString=kRefCounted,
  3351. kReadonlyAlias=kBufferIsReadonly,
  3352. kWritableAlias=0
  3353. };
  3354. friend class UnicodeStringAppendable;
  3355. union StackBufferOrFields; // forward declaration necessary before friend declaration
  3356. friend union StackBufferOrFields; // make US_STACKBUF_SIZE visible inside fUnion
  3357. /*
  3358. * The following are all the class fields that are stored
  3359. * in each UnicodeString object.
  3360. * Note that UnicodeString has virtual functions,
  3361. * therefore there is an implicit vtable pointer
  3362. * as the first real field.
  3363. * The fields should be aligned such that no padding is necessary.
  3364. * On 32-bit machines, the size should be 32 bytes,
  3365. * on 64-bit machines (8-byte pointers), it should be 40 bytes.
  3366. *
  3367. * We use a hack to achieve this.
  3368. *
  3369. * With at least some compilers, each of the following is forced to
  3370. * a multiple of sizeof(pointer) [the largest field base unit here is a data pointer],
  3371. * rounded up with additional padding if the fields do not already fit that requirement:
  3372. * - sizeof(class UnicodeString)
  3373. * - offsetof(UnicodeString, fUnion)
  3374. * - sizeof(fUnion)
  3375. * - sizeof(fStackFields)
  3376. *
  3377. * We optimize for the longest possible internal buffer for short strings.
  3378. * fUnion.fStackFields begins with 2 bytes for storage flags
  3379. * and the length of relatively short strings,
  3380. * followed by the buffer for short string contents.
  3381. * There is no padding inside fStackFields.
  3382. *
  3383. * Heap-allocated and aliased strings use fUnion.fFields.
  3384. * Both fStackFields and fFields must begin with the same fields for flags and short length,
  3385. * that is, those must have the same memory offsets inside the object,
  3386. * because the flags must be inspected in order to decide which half of fUnion is being used.
  3387. * We assume that the compiler does not reorder the fields.
  3388. *
  3389. * (Padding at the end of fFields is ok:
  3390. * As long as it is no larger than fStackFields, it is not wasted space.)
  3391. *
  3392. * For some of the history of the UnicodeString class fields layout, see
  3393. * - ICU ticket #11551 "longer UnicodeString contents in stack buffer"
  3394. * - ICU ticket #11336 "UnicodeString: recombine stack buffer arrays"
  3395. * - ICU ticket #8322 "why is sizeof(UnicodeString)==48?"
  3396. */
  3397. // (implicit) *vtable;
  3398. union StackBufferOrFields {
  3399. // fStackFields is used iff (fLengthAndFlags&kUsingStackBuffer) else fFields is used.
  3400. // Each struct of the union must begin with fLengthAndFlags.
  3401. struct {
  3402. int16_t fLengthAndFlags; // bit fields: see constants above
  3403. UChar fBuffer[US_STACKBUF_SIZE]; // buffer for short strings
  3404. } fStackFields;
  3405. struct {
  3406. int16_t fLengthAndFlags; // bit fields: see constants above
  3407. int32_t fLength; // number of characters in fArray if >127; else undefined
  3408. int32_t fCapacity; // capacity of fArray (in UChars)
  3409. // array pointer last to minimize padding for machines with P128 data model
  3410. // or pointer sizes that are not a power of 2
  3411. UChar *fArray; // the Unicode data
  3412. } fFields;
  3413. } fUnion;
  3414. };
  3415. /**
  3416. * Create a new UnicodeString with the concatenation of two others.
  3417. *
  3418. * @param s1 The first string to be copied to the new one.
  3419. * @param s2 The second string to be copied to the new one, after s1.
  3420. * @return UnicodeString(s1).append(s2)
  3421. * @stable ICU 2.8
  3422. */
  3423. U_COMMON_API UnicodeString U_EXPORT2
  3424. operator+ (const UnicodeString &s1, const UnicodeString &s2);
  3425. //========================================
  3426. // Inline members
  3427. //========================================
  3428. //========================================
  3429. // Privates
  3430. //========================================
  3431. inline void
  3432. UnicodeString::pinIndex(int32_t& start) const
  3433. {
  3434. // pin index
  3435. if(start < 0) {
  3436. start = 0;
  3437. } else if(start > length()) {
  3438. start = length();
  3439. }
  3440. }
  3441. inline void
  3442. UnicodeString::pinIndices(int32_t& start,
  3443. int32_t& _length) const
  3444. {
  3445. // pin indices
  3446. int32_t len = length();
  3447. if(start < 0) {
  3448. start = 0;
  3449. } else if(start > len) {
  3450. start = len;
  3451. }
  3452. if(_length < 0) {
  3453. _length = 0;
  3454. } else if(_length > (len - start)) {
  3455. _length = (len - start);
  3456. }
  3457. }
  3458. inline UChar*
  3459. UnicodeString::getArrayStart() {
  3460. return (fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) ?
  3461. fUnion.fStackFields.fBuffer : fUnion.fFields.fArray;
  3462. }
  3463. inline const UChar*
  3464. UnicodeString::getArrayStart() const {
  3465. return (fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) ?
  3466. fUnion.fStackFields.fBuffer : fUnion.fFields.fArray;
  3467. }
  3468. //========================================
  3469. // Default constructor
  3470. //========================================
  3471. inline
  3472. UnicodeString::UnicodeString() {
  3473. fUnion.fStackFields.fLengthAndFlags=kShortString;
  3474. }
  3475. //========================================
  3476. // Read-only implementation methods
  3477. //========================================
  3478. inline UBool
  3479. UnicodeString::hasShortLength() const {
  3480. return fUnion.fFields.fLengthAndFlags>=0;
  3481. }
  3482. inline int32_t
  3483. UnicodeString::getShortLength() const {
  3484. // fLengthAndFlags must be non-negative -> short length >= 0
  3485. // and arithmetic or logical shift does not matter.
  3486. return fUnion.fFields.fLengthAndFlags>>kLengthShift;
  3487. }
  3488. inline int32_t
  3489. UnicodeString::length() const {
  3490. return hasShortLength() ? getShortLength() : fUnion.fFields.fLength;
  3491. }
  3492. inline int32_t
  3493. UnicodeString::getCapacity() const {
  3494. return (fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) ?
  3495. US_STACKBUF_SIZE : fUnion.fFields.fCapacity;
  3496. }
  3497. inline int32_t
  3498. UnicodeString::hashCode() const
  3499. { return doHashCode(); }
  3500. inline UBool
  3501. UnicodeString::isBogus() const
  3502. { return (UBool)(fUnion.fFields.fLengthAndFlags & kIsBogus); }
  3503. inline UBool
  3504. UnicodeString::isWritable() const
  3505. { return (UBool)!(fUnion.fFields.fLengthAndFlags&(kOpenGetBuffer|kIsBogus)); }
  3506. inline UBool
  3507. UnicodeString::isBufferWritable() const
  3508. {
  3509. return (UBool)(
  3510. !(fUnion.fFields.fLengthAndFlags&(kOpenGetBuffer|kIsBogus|kBufferIsReadonly)) &&
  3511. (!(fUnion.fFields.fLengthAndFlags&kRefCounted) || refCount()==1));
  3512. }
  3513. inline const UChar *
  3514. UnicodeString::getBuffer() const {
  3515. if(fUnion.fFields.fLengthAndFlags&(kIsBogus|kOpenGetBuffer)) {
  3516. return 0;
  3517. } else if(fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) {
  3518. return fUnion.fStackFields.fBuffer;
  3519. } else {
  3520. return fUnion.fFields.fArray;
  3521. }
  3522. }
  3523. //========================================
  3524. // Read-only alias methods
  3525. //========================================
  3526. inline int8_t
  3527. UnicodeString::doCompare(int32_t start,
  3528. int32_t thisLength,
  3529. const UnicodeString& srcText,
  3530. int32_t srcStart,
  3531. int32_t srcLength) const
  3532. {
  3533. if(srcText.isBogus()) {
  3534. return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
  3535. } else {
  3536. srcText.pinIndices(srcStart, srcLength);
  3537. return doCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength);
  3538. }
  3539. }
  3540. inline UBool
  3541. UnicodeString::operator== (const UnicodeString& text) const
  3542. {
  3543. if(isBogus()) {
  3544. return text.isBogus();
  3545. } else {
  3546. int32_t len = length(), textLength = text.length();
  3547. return !text.isBogus() && len == textLength && doEquals(text, len);
  3548. }
  3549. }
  3550. inline UBool
  3551. UnicodeString::operator!= (const UnicodeString& text) const
  3552. { return (! operator==(text)); }
  3553. inline UBool
  3554. UnicodeString::operator> (const UnicodeString& text) const
  3555. { return doCompare(0, length(), text, 0, text.length()) == 1; }
  3556. inline UBool
  3557. UnicodeString::operator< (const UnicodeString& text) const
  3558. { return doCompare(0, length(), text, 0, text.length()) == -1; }
  3559. inline UBool
  3560. UnicodeString::operator>= (const UnicodeString& text) const
  3561. { return doCompare(0, length(), text, 0, text.length()) != -1; }
  3562. inline UBool
  3563. UnicodeString::operator<= (const UnicodeString& text) const
  3564. { return doCompare(0, length(), text, 0, text.length()) != 1; }
  3565. inline int8_t
  3566. UnicodeString::compare(const UnicodeString& text) const
  3567. { return doCompare(0, length(), text, 0, text.length()); }
  3568. inline int8_t
  3569. UnicodeString::compare(int32_t start,
  3570. int32_t _length,
  3571. const UnicodeString& srcText) const
  3572. { return doCompare(start, _length, srcText, 0, srcText.length()); }
  3573. inline int8_t
  3574. UnicodeString::compare(const UChar *srcChars,
  3575. int32_t srcLength) const
  3576. { return doCompare(0, length(), srcChars, 0, srcLength); }
  3577. inline int8_t
  3578. UnicodeString::compare(int32_t start,
  3579. int32_t _length,
  3580. const UnicodeString& srcText,
  3581. int32_t srcStart,
  3582. int32_t srcLength) const
  3583. { return doCompare(start, _length, srcText, srcStart, srcLength); }
  3584. inline int8_t
  3585. UnicodeString::compare(int32_t start,
  3586. int32_t _length,
  3587. const UChar *srcChars) const
  3588. { return doCompare(start, _length, srcChars, 0, _length); }
  3589. inline int8_t
  3590. UnicodeString::compare(int32_t start,
  3591. int32_t _length,
  3592. const UChar *srcChars,
  3593. int32_t srcStart,
  3594. int32_t srcLength) const
  3595. { return doCompare(start, _length, srcChars, srcStart, srcLength); }
  3596. inline int8_t
  3597. UnicodeString::compareBetween(int32_t start,
  3598. int32_t limit,
  3599. const UnicodeString& srcText,
  3600. int32_t srcStart,
  3601. int32_t srcLimit) const
  3602. { return doCompare(start, limit - start,
  3603. srcText, srcStart, srcLimit - srcStart); }
  3604. inline int8_t
  3605. UnicodeString::doCompareCodePointOrder(int32_t start,
  3606. int32_t thisLength,
  3607. const UnicodeString& srcText,
  3608. int32_t srcStart,
  3609. int32_t srcLength) const
  3610. {
  3611. if(srcText.isBogus()) {
  3612. return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
  3613. } else {
  3614. srcText.pinIndices(srcStart, srcLength);
  3615. return doCompareCodePointOrder(start, thisLength, srcText.getArrayStart(), srcStart, srcLength);
  3616. }
  3617. }
  3618. inline int8_t
  3619. UnicodeString::compareCodePointOrder(const UnicodeString& text) const
  3620. { return doCompareCodePointOrder(0, length(), text, 0, text.length()); }
  3621. inline int8_t
  3622. UnicodeString::compareCodePointOrder(int32_t start,
  3623. int32_t _length,
  3624. const UnicodeString& srcText) const
  3625. { return doCompareCodePointOrder(start, _length, srcText, 0, srcText.length()); }
  3626. inline int8_t
  3627. UnicodeString::compareCodePointOrder(const UChar *srcChars,
  3628. int32_t srcLength) const
  3629. { return doCompareCodePointOrder(0, length(), srcChars, 0, srcLength); }
  3630. inline int8_t
  3631. UnicodeString::compareCodePointOrder(int32_t start,
  3632. int32_t _length,
  3633. const UnicodeString& srcText,
  3634. int32_t srcStart,
  3635. int32_t srcLength) const
  3636. { return doCompareCodePointOrder(start, _length, srcText, srcStart, srcLength); }
  3637. inline int8_t
  3638. UnicodeString::compareCodePointOrder(int32_t start,
  3639. int32_t _length,
  3640. const UChar *srcChars) const
  3641. { return doCompareCodePointOrder(start, _length, srcChars, 0, _length); }
  3642. inline int8_t
  3643. UnicodeString::compareCodePointOrder(int32_t start,
  3644. int32_t _length,
  3645. const UChar *srcChars,
  3646. int32_t srcStart,
  3647. int32_t srcLength) const
  3648. { return doCompareCodePointOrder(start, _length, srcChars, srcStart, srcLength); }
  3649. inline int8_t
  3650. UnicodeString::compareCodePointOrderBetween(int32_t start,
  3651. int32_t limit,
  3652. const UnicodeString& srcText,
  3653. int32_t srcStart,
  3654. int32_t srcLimit) const
  3655. { return doCompareCodePointOrder(start, limit - start,
  3656. srcText, srcStart, srcLimit - srcStart); }
  3657. inline int8_t
  3658. UnicodeString::doCaseCompare(int32_t start,
  3659. int32_t thisLength,
  3660. const UnicodeString &srcText,
  3661. int32_t srcStart,
  3662. int32_t srcLength,
  3663. uint32_t options) const
  3664. {
  3665. if(srcText.isBogus()) {
  3666. return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
  3667. } else {
  3668. srcText.pinIndices(srcStart, srcLength);
  3669. return doCaseCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength, options);
  3670. }
  3671. }
  3672. inline int8_t
  3673. UnicodeString::caseCompare(const UnicodeString &text, uint32_t options) const {
  3674. return doCaseCompare(0, length(), text, 0, text.length(), options);
  3675. }
  3676. inline int8_t
  3677. UnicodeString::caseCompare(int32_t start,
  3678. int32_t _length,
  3679. const UnicodeString &srcText,
  3680. uint32_t options) const {
  3681. return doCaseCompare(start, _length, srcText, 0, srcText.length(), options);
  3682. }
  3683. inline int8_t
  3684. UnicodeString::caseCompare(const UChar *srcChars,
  3685. int32_t srcLength,
  3686. uint32_t options) const {
  3687. return doCaseCompare(0, length(), srcChars, 0, srcLength, options);
  3688. }
  3689. inline int8_t
  3690. UnicodeString::caseCompare(int32_t start,
  3691. int32_t _length,
  3692. const UnicodeString &srcText,
  3693. int32_t srcStart,
  3694. int32_t srcLength,
  3695. uint32_t options) const {
  3696. return doCaseCompare(start, _length, srcText, srcStart, srcLength, options);
  3697. }
  3698. inline int8_t
  3699. UnicodeString::caseCompare(int32_t start,
  3700. int32_t _length,
  3701. const UChar *srcChars,
  3702. uint32_t options) const {
  3703. return doCaseCompare(start, _length, srcChars, 0, _length, options);
  3704. }
  3705. inline int8_t
  3706. UnicodeString::caseCompare(int32_t start,
  3707. int32_t _length,
  3708. const UChar *srcChars,
  3709. int32_t srcStart,
  3710. int32_t srcLength,
  3711. uint32_t options) const {
  3712. return doCaseCompare(start, _length, srcChars, srcStart, srcLength, options);
  3713. }
  3714. inline int8_t
  3715. UnicodeString::caseCompareBetween(int32_t start,
  3716. int32_t limit,
  3717. const UnicodeString &srcText,
  3718. int32_t srcStart,
  3719. int32_t srcLimit,
  3720. uint32_t options) const {
  3721. return doCaseCompare(start, limit - start, srcText, srcStart, srcLimit - srcStart, options);
  3722. }
  3723. inline int32_t
  3724. UnicodeString::indexOf(const UnicodeString& srcText,
  3725. int32_t srcStart,
  3726. int32_t srcLength,
  3727. int32_t start,
  3728. int32_t _length) const
  3729. {
  3730. if(!srcText.isBogus()) {
  3731. srcText.pinIndices(srcStart, srcLength);
  3732. if(srcLength > 0) {
  3733. return indexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length);
  3734. }
  3735. }
  3736. return -1;
  3737. }
  3738. inline int32_t
  3739. UnicodeString::indexOf(const UnicodeString& text) const
  3740. { return indexOf(text, 0, text.length(), 0, length()); }
  3741. inline int32_t
  3742. UnicodeString::indexOf(const UnicodeString& text,
  3743. int32_t start) const {
  3744. pinIndex(start);
  3745. return indexOf(text, 0, text.length(), start, length() - start);
  3746. }
  3747. inline int32_t
  3748. UnicodeString::indexOf(const UnicodeString& text,
  3749. int32_t start,
  3750. int32_t _length) const
  3751. { return indexOf(text, 0, text.length(), start, _length); }
  3752. inline int32_t
  3753. UnicodeString::indexOf(const UChar *srcChars,
  3754. int32_t srcLength,
  3755. int32_t start) const {
  3756. pinIndex(start);
  3757. return indexOf(srcChars, 0, srcLength, start, length() - start);
  3758. }
  3759. inline int32_t
  3760. UnicodeString::indexOf(const UChar *srcChars,
  3761. int32_t srcLength,
  3762. int32_t start,
  3763. int32_t _length) const
  3764. { return indexOf(srcChars, 0, srcLength, start, _length); }
  3765. inline int32_t
  3766. UnicodeString::indexOf(UChar c,
  3767. int32_t start,
  3768. int32_t _length) const
  3769. { return doIndexOf(c, start, _length); }
  3770. inline int32_t
  3771. UnicodeString::indexOf(UChar32 c,
  3772. int32_t start,
  3773. int32_t _length) const
  3774. { return doIndexOf(c, start, _length); }
  3775. inline int32_t
  3776. UnicodeString::indexOf(UChar c) const
  3777. { return doIndexOf(c, 0, length()); }
  3778. inline int32_t
  3779. UnicodeString::indexOf(UChar32 c) const
  3780. { return indexOf(c, 0, length()); }
  3781. inline int32_t
  3782. UnicodeString::indexOf(UChar c,
  3783. int32_t start) const {
  3784. pinIndex(start);
  3785. return doIndexOf(c, start, length() - start);
  3786. }
  3787. inline int32_t
  3788. UnicodeString::indexOf(UChar32 c,
  3789. int32_t start) const {
  3790. pinIndex(start);
  3791. return indexOf(c, start, length() - start);
  3792. }
  3793. inline int32_t
  3794. UnicodeString::lastIndexOf(const UChar *srcChars,
  3795. int32_t srcLength,
  3796. int32_t start,
  3797. int32_t _length) const
  3798. { return lastIndexOf(srcChars, 0, srcLength, start, _length); }
  3799. inline int32_t
  3800. UnicodeString::lastIndexOf(const UChar *srcChars,
  3801. int32_t srcLength,
  3802. int32_t start) const {
  3803. pinIndex(start);
  3804. return lastIndexOf(srcChars, 0, srcLength, start, length() - start);
  3805. }
  3806. inline int32_t
  3807. UnicodeString::lastIndexOf(const UnicodeString& srcText,
  3808. int32_t srcStart,
  3809. int32_t srcLength,
  3810. int32_t start,
  3811. int32_t _length) const
  3812. {
  3813. if(!srcText.isBogus()) {
  3814. srcText.pinIndices(srcStart, srcLength);
  3815. if(srcLength > 0) {
  3816. return lastIndexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length);
  3817. }
  3818. }
  3819. return -1;
  3820. }
  3821. inline int32_t
  3822. UnicodeString::lastIndexOf(const UnicodeString& text,
  3823. int32_t start,
  3824. int32_t _length) const
  3825. { return lastIndexOf(text, 0, text.length(), start, _length); }
  3826. inline int32_t
  3827. UnicodeString::lastIndexOf(const UnicodeString& text,
  3828. int32_t start) const {
  3829. pinIndex(start);
  3830. return lastIndexOf(text, 0, text.length(), start, length() - start);
  3831. }
  3832. inline int32_t
  3833. UnicodeString::lastIndexOf(const UnicodeString& text) const
  3834. { return lastIndexOf(text, 0, text.length(), 0, length()); }
  3835. inline int32_t
  3836. UnicodeString::lastIndexOf(UChar c,
  3837. int32_t start,
  3838. int32_t _length) const
  3839. { return doLastIndexOf(c, start, _length); }
  3840. inline int32_t
  3841. UnicodeString::lastIndexOf(UChar32 c,
  3842. int32_t start,
  3843. int32_t _length) const {
  3844. return doLastIndexOf(c, start, _length);
  3845. }
  3846. inline int32_t
  3847. UnicodeString::lastIndexOf(UChar c) const
  3848. { return doLastIndexOf(c, 0, length()); }
  3849. inline int32_t
  3850. UnicodeString::lastIndexOf(UChar32 c) const {
  3851. return lastIndexOf(c, 0, length());
  3852. }
  3853. inline int32_t
  3854. UnicodeString::lastIndexOf(UChar c,
  3855. int32_t start) const {
  3856. pinIndex(start);
  3857. return doLastIndexOf(c, start, length() - start);
  3858. }
  3859. inline int32_t
  3860. UnicodeString::lastIndexOf(UChar32 c,
  3861. int32_t start) const {
  3862. pinIndex(start);
  3863. return lastIndexOf(c, start, length() - start);
  3864. }
  3865. inline UBool
  3866. UnicodeString::startsWith(const UnicodeString& text) const
  3867. { return compare(0, text.length(), text, 0, text.length()) == 0; }
  3868. inline UBool
  3869. UnicodeString::startsWith(const UnicodeString& srcText,
  3870. int32_t srcStart,
  3871. int32_t srcLength) const
  3872. { return doCompare(0, srcLength, srcText, srcStart, srcLength) == 0; }
  3873. inline UBool
  3874. UnicodeString::startsWith(const UChar *srcChars, int32_t srcLength) const {
  3875. if(srcLength < 0) {
  3876. srcLength = u_strlen(srcChars);
  3877. }
  3878. return doCompare(0, srcLength, srcChars, 0, srcLength) == 0;
  3879. }
  3880. inline UBool
  3881. UnicodeString::startsWith(const UChar *srcChars, int32_t srcStart, int32_t srcLength) const {
  3882. if(srcLength < 0) {
  3883. srcLength = u_strlen(srcChars);
  3884. }
  3885. return doCompare(0, srcLength, srcChars, srcStart, srcLength) == 0;
  3886. }
  3887. inline UBool
  3888. UnicodeString::endsWith(const UnicodeString& text) const
  3889. { return doCompare(length() - text.length(), text.length(),
  3890. text, 0, text.length()) == 0; }
  3891. inline UBool
  3892. UnicodeString::endsWith(const UnicodeString& srcText,
  3893. int32_t srcStart,
  3894. int32_t srcLength) const {
  3895. srcText.pinIndices(srcStart, srcLength);
  3896. return doCompare(length() - srcLength, srcLength,
  3897. srcText, srcStart, srcLength) == 0;
  3898. }
  3899. inline UBool
  3900. UnicodeString::endsWith(const UChar *srcChars,
  3901. int32_t srcLength) const {
  3902. if(srcLength < 0) {
  3903. srcLength = u_strlen(srcChars);
  3904. }
  3905. return doCompare(length() - srcLength, srcLength,
  3906. srcChars, 0, srcLength) == 0;
  3907. }
  3908. inline UBool
  3909. UnicodeString::endsWith(const UChar *srcChars,
  3910. int32_t srcStart,
  3911. int32_t srcLength) const {
  3912. if(srcLength < 0) {
  3913. srcLength = u_strlen(srcChars + srcStart);
  3914. }
  3915. return doCompare(length() - srcLength, srcLength,
  3916. srcChars, srcStart, srcLength) == 0;
  3917. }
  3918. //========================================
  3919. // replace
  3920. //========================================
  3921. inline UnicodeString&
  3922. UnicodeString::replace(int32_t start,
  3923. int32_t _length,
  3924. const UnicodeString& srcText)
  3925. { return doReplace(start, _length, srcText, 0, srcText.length()); }
  3926. inline UnicodeString&
  3927. UnicodeString::replace(int32_t start,
  3928. int32_t _length,
  3929. const UnicodeString& srcText,
  3930. int32_t srcStart,
  3931. int32_t srcLength)
  3932. { return doReplace(start, _length, srcText, srcStart, srcLength); }
  3933. inline UnicodeString&
  3934. UnicodeString::replace(int32_t start,
  3935. int32_t _length,
  3936. const UChar *srcChars,
  3937. int32_t srcLength)
  3938. { return doReplace(start, _length, srcChars, 0, srcLength); }
  3939. inline UnicodeString&
  3940. UnicodeString::replace(int32_t start,
  3941. int32_t _length,
  3942. const UChar *srcChars,
  3943. int32_t srcStart,
  3944. int32_t srcLength)
  3945. { return doReplace(start, _length, srcChars, srcStart, srcLength); }
  3946. inline UnicodeString&
  3947. UnicodeString::replace(int32_t start,
  3948. int32_t _length,
  3949. UChar srcChar)
  3950. { return doReplace(start, _length, &srcChar, 0, 1); }
  3951. inline UnicodeString&
  3952. UnicodeString::replaceBetween(int32_t start,
  3953. int32_t limit,
  3954. const UnicodeString& srcText)
  3955. { return doReplace(start, limit - start, srcText, 0, srcText.length()); }
  3956. inline UnicodeString&
  3957. UnicodeString::replaceBetween(int32_t start,
  3958. int32_t limit,
  3959. const UnicodeString& srcText,
  3960. int32_t srcStart,
  3961. int32_t srcLimit)
  3962. { return doReplace(start, limit - start, srcText, srcStart, srcLimit - srcStart); }
  3963. inline UnicodeString&
  3964. UnicodeString::findAndReplace(const UnicodeString& oldText,
  3965. const UnicodeString& newText)
  3966. { return findAndReplace(0, length(), oldText, 0, oldText.length(),
  3967. newText, 0, newText.length()); }
  3968. inline UnicodeString&
  3969. UnicodeString::findAndReplace(int32_t start,
  3970. int32_t _length,
  3971. const UnicodeString& oldText,
  3972. const UnicodeString& newText)
  3973. { return findAndReplace(start, _length, oldText, 0, oldText.length(),
  3974. newText, 0, newText.length()); }
  3975. // ============================
  3976. // extract
  3977. // ============================
  3978. inline void
  3979. UnicodeString::doExtract(int32_t start,
  3980. int32_t _length,
  3981. UnicodeString& target) const
  3982. { target.replace(0, target.length(), *this, start, _length); }
  3983. inline void
  3984. UnicodeString::extract(int32_t start,
  3985. int32_t _length,
  3986. UChar *target,
  3987. int32_t targetStart) const
  3988. { doExtract(start, _length, target, targetStart); }
  3989. inline void
  3990. UnicodeString::extract(int32_t start,
  3991. int32_t _length,
  3992. UnicodeString& target) const
  3993. { doExtract(start, _length, target); }
  3994. #if !UCONFIG_NO_CONVERSION
  3995. inline int32_t
  3996. UnicodeString::extract(int32_t start,
  3997. int32_t _length,
  3998. char *dst,
  3999. const char *codepage) const
  4000. {
  4001. // This dstSize value will be checked explicitly
  4002. return extract(start, _length, dst, dst!=0 ? 0xffffffff : 0, codepage);
  4003. }
  4004. #endif
  4005. inline void
  4006. UnicodeString::extractBetween(int32_t start,
  4007. int32_t limit,
  4008. UChar *dst,
  4009. int32_t dstStart) const {
  4010. pinIndex(start);
  4011. pinIndex(limit);
  4012. doExtract(start, limit - start, dst, dstStart);
  4013. }
  4014. inline UnicodeString
  4015. UnicodeString::tempSubStringBetween(int32_t start, int32_t limit) const {
  4016. return tempSubString(start, limit - start);
  4017. }
  4018. inline UChar
  4019. UnicodeString::doCharAt(int32_t offset) const
  4020. {
  4021. if((uint32_t)offset < (uint32_t)length()) {
  4022. return getArrayStart()[offset];
  4023. } else {
  4024. return kInvalidUChar;
  4025. }
  4026. }
  4027. inline UChar
  4028. UnicodeString::charAt(int32_t offset) const
  4029. { return doCharAt(offset); }
  4030. inline UChar
  4031. UnicodeString::operator[] (int32_t offset) const
  4032. { return doCharAt(offset); }
  4033. inline UBool
  4034. UnicodeString::isEmpty() const {
  4035. // Arithmetic or logical right shift does not matter: only testing for 0.
  4036. return (fUnion.fFields.fLengthAndFlags>>kLengthShift) == 0;
  4037. }
  4038. //========================================
  4039. // Write implementation methods
  4040. //========================================
  4041. inline void
  4042. UnicodeString::setZeroLength() {
  4043. fUnion.fFields.fLengthAndFlags &= kAllStorageFlags;
  4044. }
  4045. inline void
  4046. UnicodeString::setShortLength(int32_t len) {
  4047. // requires 0 <= len <= kMaxShortLength
  4048. fUnion.fFields.fLengthAndFlags =
  4049. (int16_t)((fUnion.fFields.fLengthAndFlags & kAllStorageFlags) | (len << kLengthShift));
  4050. }
  4051. inline void
  4052. UnicodeString::setLength(int32_t len) {
  4053. if(len <= kMaxShortLength) {
  4054. setShortLength(len);
  4055. } else {
  4056. fUnion.fFields.fLengthAndFlags |= kLengthIsLarge;
  4057. fUnion.fFields.fLength = len;
  4058. }
  4059. }
  4060. inline void
  4061. UnicodeString::setToEmpty() {
  4062. fUnion.fFields.fLengthAndFlags = kShortString;
  4063. }
  4064. inline void
  4065. UnicodeString::setArray(UChar *array, int32_t len, int32_t capacity) {
  4066. setLength(len);
  4067. fUnion.fFields.fArray = array;
  4068. fUnion.fFields.fCapacity = capacity;
  4069. }
  4070. inline UnicodeString&
  4071. UnicodeString::operator= (UChar ch)
  4072. { return doReplace(0, length(), &ch, 0, 1); }
  4073. inline UnicodeString&
  4074. UnicodeString::operator= (UChar32 ch)
  4075. { return replace(0, length(), ch); }
  4076. inline UnicodeString&
  4077. UnicodeString::setTo(const UnicodeString& srcText,
  4078. int32_t srcStart,
  4079. int32_t srcLength)
  4080. {
  4081. unBogus();
  4082. return doReplace(0, length(), srcText, srcStart, srcLength);
  4083. }
  4084. inline UnicodeString&
  4085. UnicodeString::setTo(const UnicodeString& srcText,
  4086. int32_t srcStart)
  4087. {
  4088. unBogus();
  4089. srcText.pinIndex(srcStart);
  4090. return doReplace(0, length(), srcText, srcStart, srcText.length() - srcStart);
  4091. }
  4092. inline UnicodeString&
  4093. UnicodeString::setTo(const UnicodeString& srcText)
  4094. {
  4095. return copyFrom(srcText);
  4096. }
  4097. inline UnicodeString&
  4098. UnicodeString::setTo(const UChar *srcChars,
  4099. int32_t srcLength)
  4100. {
  4101. unBogus();
  4102. return doReplace(0, length(), srcChars, 0, srcLength);
  4103. }
  4104. inline UnicodeString&
  4105. UnicodeString::setTo(UChar srcChar)
  4106. {
  4107. unBogus();
  4108. return doReplace(0, length(), &srcChar, 0, 1);
  4109. }
  4110. inline UnicodeString&
  4111. UnicodeString::setTo(UChar32 srcChar)
  4112. {
  4113. unBogus();
  4114. return replace(0, length(), srcChar);
  4115. }
  4116. inline UnicodeString&
  4117. UnicodeString::append(const UnicodeString& srcText,
  4118. int32_t srcStart,
  4119. int32_t srcLength)
  4120. { return doAppend(srcText, srcStart, srcLength); }
  4121. inline UnicodeString&
  4122. UnicodeString::append(const UnicodeString& srcText)
  4123. { return doAppend(srcText, 0, srcText.length()); }
  4124. inline UnicodeString&
  4125. UnicodeString::append(const UChar *srcChars,
  4126. int32_t srcStart,
  4127. int32_t srcLength)
  4128. { return doAppend(srcChars, srcStart, srcLength); }
  4129. inline UnicodeString&
  4130. UnicodeString::append(const UChar *srcChars,
  4131. int32_t srcLength)
  4132. { return doAppend(srcChars, 0, srcLength); }
  4133. inline UnicodeString&
  4134. UnicodeString::append(UChar srcChar)
  4135. { return doAppend(&srcChar, 0, 1); }
  4136. inline UnicodeString&
  4137. UnicodeString::operator+= (UChar ch)
  4138. { return doAppend(&ch, 0, 1); }
  4139. inline UnicodeString&
  4140. UnicodeString::operator+= (UChar32 ch) {
  4141. return append(ch);
  4142. }
  4143. inline UnicodeString&
  4144. UnicodeString::operator+= (const UnicodeString& srcText)
  4145. { return doAppend(srcText, 0, srcText.length()); }
  4146. inline UnicodeString&
  4147. UnicodeString::insert(int32_t start,
  4148. const UnicodeString& srcText,
  4149. int32_t srcStart,
  4150. int32_t srcLength)
  4151. { return doReplace(start, 0, srcText, srcStart, srcLength); }
  4152. inline UnicodeString&
  4153. UnicodeString::insert(int32_t start,
  4154. const UnicodeString& srcText)
  4155. { return doReplace(start, 0, srcText, 0, srcText.length()); }
  4156. inline UnicodeString&
  4157. UnicodeString::insert(int32_t start,
  4158. const UChar *srcChars,
  4159. int32_t srcStart,
  4160. int32_t srcLength)
  4161. { return doReplace(start, 0, srcChars, srcStart, srcLength); }
  4162. inline UnicodeString&
  4163. UnicodeString::insert(int32_t start,
  4164. const UChar *srcChars,
  4165. int32_t srcLength)
  4166. { return doReplace(start, 0, srcChars, 0, srcLength); }
  4167. inline UnicodeString&
  4168. UnicodeString::insert(int32_t start,
  4169. UChar srcChar)
  4170. { return doReplace(start, 0, &srcChar, 0, 1); }
  4171. inline UnicodeString&
  4172. UnicodeString::insert(int32_t start,
  4173. UChar32 srcChar)
  4174. { return replace(start, 0, srcChar); }
  4175. inline UnicodeString&
  4176. UnicodeString::remove()
  4177. {
  4178. // remove() of a bogus string makes the string empty and non-bogus
  4179. if(isBogus()) {
  4180. setToEmpty();
  4181. } else {
  4182. setZeroLength();
  4183. }
  4184. return *this;
  4185. }
  4186. inline UnicodeString&
  4187. UnicodeString::remove(int32_t start,
  4188. int32_t _length)
  4189. {
  4190. if(start <= 0 && _length == INT32_MAX) {
  4191. // remove(guaranteed everything) of a bogus string makes the string empty and non-bogus
  4192. return remove();
  4193. }
  4194. return doReplace(start, _length, NULL, 0, 0);
  4195. }
  4196. inline UnicodeString&
  4197. UnicodeString::removeBetween(int32_t start,
  4198. int32_t limit)
  4199. { return doReplace(start, limit - start, NULL, 0, 0); }
  4200. inline UnicodeString &
  4201. UnicodeString::retainBetween(int32_t start, int32_t limit) {
  4202. truncate(limit);
  4203. return doReplace(0, start, NULL, 0, 0);
  4204. }
  4205. inline UBool
  4206. UnicodeString::truncate(int32_t targetLength)
  4207. {
  4208. if(isBogus() && targetLength == 0) {
  4209. // truncate(0) of a bogus string makes the string empty and non-bogus
  4210. unBogus();
  4211. return FALSE;
  4212. } else if((uint32_t)targetLength < (uint32_t)length()) {
  4213. setLength(targetLength);
  4214. return TRUE;
  4215. } else {
  4216. return FALSE;
  4217. }
  4218. }
  4219. inline UnicodeString&
  4220. UnicodeString::reverse()
  4221. { return doReverse(0, length()); }
  4222. inline UnicodeString&
  4223. UnicodeString::reverse(int32_t start,
  4224. int32_t _length)
  4225. { return doReverse(start, _length); }
  4226. U_NAMESPACE_END
  4227. #endif