messagepattern.h 34 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943
  1. /*
  2. *******************************************************************************
  3. * Copyright (C) 2011-2013, International Business Machines
  4. * Corporation and others. All Rights Reserved.
  5. *******************************************************************************
  6. * file name: messagepattern.h
  7. * encoding: US-ASCII
  8. * tab size: 8 (not used)
  9. * indentation:4
  10. *
  11. * created on: 2011mar14
  12. * created by: Markus W. Scherer
  13. */
  14. #ifndef __MESSAGEPATTERN_H__
  15. #define __MESSAGEPATTERN_H__
  16. /**
  17. * \file
  18. * \brief C++ API: MessagePattern class: Parses and represents ICU MessageFormat patterns.
  19. */
  20. #include "unicode/utypes.h"
  21. #if !UCONFIG_NO_FORMATTING
  22. #include "unicode/parseerr.h"
  23. #include "unicode/unistr.h"
  24. /**
  25. * Mode for when an apostrophe starts quoted literal text for MessageFormat output.
  26. * The default is DOUBLE_OPTIONAL unless overridden via uconfig.h
  27. * (UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE).
  28. * <p>
  29. * A pair of adjacent apostrophes always results in a single apostrophe in the output,
  30. * even when the pair is between two single, text-quoting apostrophes.
  31. * <p>
  32. * The following table shows examples of desired MessageFormat.format() output
  33. * with the pattern strings that yield that output.
  34. * <p>
  35. * <table>
  36. * <tr>
  37. * <th>Desired output</th>
  38. * <th>DOUBLE_OPTIONAL</th>
  39. * <th>DOUBLE_REQUIRED</th>
  40. * </tr>
  41. * <tr>
  42. * <td>I see {many}</td>
  43. * <td>I see '{many}'</td>
  44. * <td>(same)</td>
  45. * </tr>
  46. * <tr>
  47. * <td>I said {'Wow!'}</td>
  48. * <td>I said '{''Wow!''}'</td>
  49. * <td>(same)</td>
  50. * </tr>
  51. * <tr>
  52. * <td>I don't know</td>
  53. * <td>I don't know OR<br> I don''t know</td>
  54. * <td>I don''t know</td>
  55. * </tr>
  56. * </table>
  57. * @stable ICU 4.8
  58. * @see UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE
  59. */
  60. enum UMessagePatternApostropheMode {
  61. /**
  62. * A literal apostrophe is represented by
  63. * either a single or a double apostrophe pattern character.
  64. * Within a MessageFormat pattern, a single apostrophe only starts quoted literal text
  65. * if it immediately precedes a curly brace {},
  66. * or a pipe symbol | if inside a choice format,
  67. * or a pound symbol # if inside a plural format.
  68. * <p>
  69. * This is the default behavior starting with ICU 4.8.
  70. * @stable ICU 4.8
  71. */
  72. UMSGPAT_APOS_DOUBLE_OPTIONAL,
  73. /**
  74. * A literal apostrophe must be represented by
  75. * a double apostrophe pattern character.
  76. * A single apostrophe always starts quoted literal text.
  77. * <p>
  78. * This is the behavior of ICU 4.6 and earlier, and of the JDK.
  79. * @stable ICU 4.8
  80. */
  81. UMSGPAT_APOS_DOUBLE_REQUIRED
  82. };
  83. /**
  84. * @stable ICU 4.8
  85. */
  86. typedef enum UMessagePatternApostropheMode UMessagePatternApostropheMode;
  87. /**
  88. * MessagePattern::Part type constants.
  89. * @stable ICU 4.8
  90. */
  91. enum UMessagePatternPartType {
  92. /**
  93. * Start of a message pattern (main or nested).
  94. * The length is 0 for the top-level message
  95. * and for a choice argument sub-message, otherwise 1 for the '{'.
  96. * The value indicates the nesting level, starting with 0 for the main message.
  97. * <p>
  98. * There is always a later MSG_LIMIT part.
  99. * @stable ICU 4.8
  100. */
  101. UMSGPAT_PART_TYPE_MSG_START,
  102. /**
  103. * End of a message pattern (main or nested).
  104. * The length is 0 for the top-level message and
  105. * the last sub-message of a choice argument,
  106. * otherwise 1 for the '}' or (in a choice argument style) the '|'.
  107. * The value indicates the nesting level, starting with 0 for the main message.
  108. * @stable ICU 4.8
  109. */
  110. UMSGPAT_PART_TYPE_MSG_LIMIT,
  111. /**
  112. * Indicates a substring of the pattern string which is to be skipped when formatting.
  113. * For example, an apostrophe that begins or ends quoted text
  114. * would be indicated with such a part.
  115. * The value is undefined and currently always 0.
  116. * @stable ICU 4.8
  117. */
  118. UMSGPAT_PART_TYPE_SKIP_SYNTAX,
  119. /**
  120. * Indicates that a syntax character needs to be inserted for auto-quoting.
  121. * The length is 0.
  122. * The value is the character code of the insertion character. (U+0027=APOSTROPHE)
  123. * @stable ICU 4.8
  124. */
  125. UMSGPAT_PART_TYPE_INSERT_CHAR,
  126. /**
  127. * Indicates a syntactic (non-escaped) # symbol in a plural variant.
  128. * When formatting, replace this part's substring with the
  129. * (value-offset) for the plural argument value.
  130. * The value is undefined and currently always 0.
  131. * @stable ICU 4.8
  132. */
  133. UMSGPAT_PART_TYPE_REPLACE_NUMBER,
  134. /**
  135. * Start of an argument.
  136. * The length is 1 for the '{'.
  137. * The value is the ordinal value of the ArgType. Use getArgType().
  138. * <p>
  139. * This part is followed by either an ARG_NUMBER or ARG_NAME,
  140. * followed by optional argument sub-parts (see UMessagePatternArgType constants)
  141. * and finally an ARG_LIMIT part.
  142. * @stable ICU 4.8
  143. */
  144. UMSGPAT_PART_TYPE_ARG_START,
  145. /**
  146. * End of an argument.
  147. * The length is 1 for the '}'.
  148. * The value is the ordinal value of the ArgType. Use getArgType().
  149. * @stable ICU 4.8
  150. */
  151. UMSGPAT_PART_TYPE_ARG_LIMIT,
  152. /**
  153. * The argument number, provided by the value.
  154. * @stable ICU 4.8
  155. */
  156. UMSGPAT_PART_TYPE_ARG_NUMBER,
  157. /**
  158. * The argument name.
  159. * The value is undefined and currently always 0.
  160. * @stable ICU 4.8
  161. */
  162. UMSGPAT_PART_TYPE_ARG_NAME,
  163. /**
  164. * The argument type.
  165. * The value is undefined and currently always 0.
  166. * @stable ICU 4.8
  167. */
  168. UMSGPAT_PART_TYPE_ARG_TYPE,
  169. /**
  170. * The argument style text.
  171. * The value is undefined and currently always 0.
  172. * @stable ICU 4.8
  173. */
  174. UMSGPAT_PART_TYPE_ARG_STYLE,
  175. /**
  176. * A selector substring in a "complex" argument style.
  177. * The value is undefined and currently always 0.
  178. * @stable ICU 4.8
  179. */
  180. UMSGPAT_PART_TYPE_ARG_SELECTOR,
  181. /**
  182. * An integer value, for example the offset or an explicit selector value
  183. * in a PluralFormat style.
  184. * The part value is the integer value.
  185. * @stable ICU 4.8
  186. */
  187. UMSGPAT_PART_TYPE_ARG_INT,
  188. /**
  189. * A numeric value, for example the offset or an explicit selector value
  190. * in a PluralFormat style.
  191. * The part value is an index into an internal array of numeric values;
  192. * use getNumericValue().
  193. * @stable ICU 4.8
  194. */
  195. UMSGPAT_PART_TYPE_ARG_DOUBLE
  196. };
  197. /**
  198. * @stable ICU 4.8
  199. */
  200. typedef enum UMessagePatternPartType UMessagePatternPartType;
  201. /**
  202. * Argument type constants.
  203. * Returned by Part.getArgType() for ARG_START and ARG_LIMIT parts.
  204. *
  205. * Messages nested inside an argument are each delimited by MSG_START and MSG_LIMIT,
  206. * with a nesting level one greater than the surrounding message.
  207. * @stable ICU 4.8
  208. */
  209. enum UMessagePatternArgType {
  210. /**
  211. * The argument has no specified type.
  212. * @stable ICU 4.8
  213. */
  214. UMSGPAT_ARG_TYPE_NONE,
  215. /**
  216. * The argument has a "simple" type which is provided by the ARG_TYPE part.
  217. * An ARG_STYLE part might follow that.
  218. * @stable ICU 4.8
  219. */
  220. UMSGPAT_ARG_TYPE_SIMPLE,
  221. /**
  222. * The argument is a ChoiceFormat with one or more
  223. * ((ARG_INT | ARG_DOUBLE), ARG_SELECTOR, message) tuples.
  224. * @stable ICU 4.8
  225. */
  226. UMSGPAT_ARG_TYPE_CHOICE,
  227. /**
  228. * The argument is a cardinal-number PluralFormat with an optional ARG_INT or ARG_DOUBLE offset
  229. * (e.g., offset:1)
  230. * and one or more (ARG_SELECTOR [explicit-value] message) tuples.
  231. * If the selector has an explicit value (e.g., =2), then
  232. * that value is provided by the ARG_INT or ARG_DOUBLE part preceding the message.
  233. * Otherwise the message immediately follows the ARG_SELECTOR.
  234. * @stable ICU 4.8
  235. */
  236. UMSGPAT_ARG_TYPE_PLURAL,
  237. /**
  238. * The argument is a SelectFormat with one or more (ARG_SELECTOR, message) pairs.
  239. * @stable ICU 4.8
  240. */
  241. UMSGPAT_ARG_TYPE_SELECT,
  242. /**
  243. * The argument is an ordinal-number PluralFormat
  244. * with the same style parts sequence and semantics as UMSGPAT_ARG_TYPE_PLURAL.
  245. * @stable ICU 50
  246. */
  247. UMSGPAT_ARG_TYPE_SELECTORDINAL
  248. };
  249. /**
  250. * @stable ICU 4.8
  251. */
  252. typedef enum UMessagePatternArgType UMessagePatternArgType;
  253. /**
  254. * \def UMSGPAT_ARG_TYPE_HAS_PLURAL_STYLE
  255. * Returns TRUE if the argument type has a plural style part sequence and semantics,
  256. * for example UMSGPAT_ARG_TYPE_PLURAL and UMSGPAT_ARG_TYPE_SELECTORDINAL.
  257. * @stable ICU 50
  258. */
  259. #define UMSGPAT_ARG_TYPE_HAS_PLURAL_STYLE(argType) \
  260. ((argType)==UMSGPAT_ARG_TYPE_PLURAL || (argType)==UMSGPAT_ARG_TYPE_SELECTORDINAL)
  261. enum {
  262. /**
  263. * Return value from MessagePattern.validateArgumentName() for when
  264. * the string is a valid "pattern identifier" but not a number.
  265. * @stable ICU 4.8
  266. */
  267. UMSGPAT_ARG_NAME_NOT_NUMBER=-1,
  268. /**
  269. * Return value from MessagePattern.validateArgumentName() for when
  270. * the string is invalid.
  271. * It might not be a valid "pattern identifier",
  272. * or it have only ASCII digits but there is a leading zero or the number is too large.
  273. * @stable ICU 4.8
  274. */
  275. UMSGPAT_ARG_NAME_NOT_VALID=-2
  276. };
  277. /**
  278. * Special value that is returned by getNumericValue(Part) when no
  279. * numeric value is defined for a part.
  280. * @see MessagePattern.getNumericValue()
  281. * @stable ICU 4.8
  282. */
  283. #define UMSGPAT_NO_NUMERIC_VALUE ((double)(-123456789))
  284. U_NAMESPACE_BEGIN
  285. class MessagePatternDoubleList;
  286. class MessagePatternPartsList;
  287. /**
  288. * Parses and represents ICU MessageFormat patterns.
  289. * Also handles patterns for ChoiceFormat, PluralFormat and SelectFormat.
  290. * Used in the implementations of those classes as well as in tools
  291. * for message validation, translation and format conversion.
  292. * <p>
  293. * The parser handles all syntax relevant for identifying message arguments.
  294. * This includes "complex" arguments whose style strings contain
  295. * nested MessageFormat pattern substrings.
  296. * For "simple" arguments (with no nested MessageFormat pattern substrings),
  297. * the argument style is not parsed any further.
  298. * <p>
  299. * The parser handles named and numbered message arguments and allows both in one message.
  300. * <p>
  301. * Once a pattern has been parsed successfully, iterate through the parsed data
  302. * with countParts(), getPart() and related methods.
  303. * <p>
  304. * The data logically represents a parse tree, but is stored and accessed
  305. * as a list of "parts" for fast and simple parsing and to minimize object allocations.
  306. * Arguments and nested messages are best handled via recursion.
  307. * For every _START "part", MessagePattern.getLimitPartIndex() efficiently returns
  308. * the index of the corresponding _LIMIT "part".
  309. * <p>
  310. * List of "parts":
  311. * <pre>
  312. * message = MSG_START (SKIP_SYNTAX | INSERT_CHAR | REPLACE_NUMBER | argument)* MSG_LIMIT
  313. * argument = noneArg | simpleArg | complexArg
  314. * complexArg = choiceArg | pluralArg | selectArg
  315. *
  316. * noneArg = ARG_START.NONE (ARG_NAME | ARG_NUMBER) ARG_LIMIT.NONE
  317. * simpleArg = ARG_START.SIMPLE (ARG_NAME | ARG_NUMBER) ARG_TYPE [ARG_STYLE] ARG_LIMIT.SIMPLE
  318. * choiceArg = ARG_START.CHOICE (ARG_NAME | ARG_NUMBER) choiceStyle ARG_LIMIT.CHOICE
  319. * pluralArg = ARG_START.PLURAL (ARG_NAME | ARG_NUMBER) pluralStyle ARG_LIMIT.PLURAL
  320. * selectArg = ARG_START.SELECT (ARG_NAME | ARG_NUMBER) selectStyle ARG_LIMIT.SELECT
  321. *
  322. * choiceStyle = ((ARG_INT | ARG_DOUBLE) ARG_SELECTOR message)+
  323. * pluralStyle = [ARG_INT | ARG_DOUBLE] (ARG_SELECTOR [ARG_INT | ARG_DOUBLE] message)+
  324. * selectStyle = (ARG_SELECTOR message)+
  325. * </pre>
  326. * <ul>
  327. * <li>Literal output text is not represented directly by "parts" but accessed
  328. * between parts of a message, from one part's getLimit() to the next part's getIndex().
  329. * <li><code>ARG_START.CHOICE</code> stands for an ARG_START Part with ArgType CHOICE.
  330. * <li>In the choiceStyle, the ARG_SELECTOR has the '<', the '#' or
  331. * the less-than-or-equal-to sign (U+2264).
  332. * <li>In the pluralStyle, the first, optional numeric Part has the "offset:" value.
  333. * The optional numeric Part between each (ARG_SELECTOR, message) pair
  334. * is the value of an explicit-number selector like "=2",
  335. * otherwise the selector is a non-numeric identifier.
  336. * <li>The REPLACE_NUMBER Part can occur only in an immediate sub-message of the pluralStyle.
  337. * </ul>
  338. * <p>
  339. * This class is not intended for public subclassing.
  340. *
  341. * @stable ICU 4.8
  342. */
  343. class U_COMMON_API MessagePattern : public UObject {
  344. public:
  345. /**
  346. * Constructs an empty MessagePattern with default UMessagePatternApostropheMode.
  347. * @param errorCode Standard ICU error code. Its input value must
  348. * pass the U_SUCCESS() test, or else the function returns
  349. * immediately. Check for U_FAILURE() on output or use with
  350. * function chaining. (See User Guide for details.)
  351. * @stable ICU 4.8
  352. */
  353. MessagePattern(UErrorCode &errorCode);
  354. /**
  355. * Constructs an empty MessagePattern.
  356. * @param mode Explicit UMessagePatternApostropheMode.
  357. * @param errorCode Standard ICU error code. Its input value must
  358. * pass the U_SUCCESS() test, or else the function returns
  359. * immediately. Check for U_FAILURE() on output or use with
  360. * function chaining. (See User Guide for details.)
  361. * @stable ICU 4.8
  362. */
  363. MessagePattern(UMessagePatternApostropheMode mode, UErrorCode &errorCode);
  364. /**
  365. * Constructs a MessagePattern with default UMessagePatternApostropheMode and
  366. * parses the MessageFormat pattern string.
  367. * @param pattern a MessageFormat pattern string
  368. * @param parseError Struct to receive information on the position
  369. * of an error within the pattern.
  370. * Can be NULL.
  371. * @param errorCode Standard ICU error code. Its input value must
  372. * pass the U_SUCCESS() test, or else the function returns
  373. * immediately. Check for U_FAILURE() on output or use with
  374. * function chaining. (See User Guide for details.)
  375. * TODO: turn @throws into UErrorCode specifics?
  376. * @throws IllegalArgumentException for syntax errors in the pattern string
  377. * @throws IndexOutOfBoundsException if certain limits are exceeded
  378. * (e.g., argument number too high, argument name too long, etc.)
  379. * @throws NumberFormatException if a number could not be parsed
  380. * @stable ICU 4.8
  381. */
  382. MessagePattern(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode);
  383. /**
  384. * Copy constructor.
  385. * @param other Object to copy.
  386. * @stable ICU 4.8
  387. */
  388. MessagePattern(const MessagePattern &other);
  389. /**
  390. * Assignment operator.
  391. * @param other Object to copy.
  392. * @return *this=other
  393. * @stable ICU 4.8
  394. */
  395. MessagePattern &operator=(const MessagePattern &other);
  396. /**
  397. * Destructor.
  398. * @stable ICU 4.8
  399. */
  400. virtual ~MessagePattern();
  401. /**
  402. * Parses a MessageFormat pattern string.
  403. * @param pattern a MessageFormat pattern string
  404. * @param parseError Struct to receive information on the position
  405. * of an error within the pattern.
  406. * Can be NULL.
  407. * @param errorCode Standard ICU error code. Its input value must
  408. * pass the U_SUCCESS() test, or else the function returns
  409. * immediately. Check for U_FAILURE() on output or use with
  410. * function chaining. (See User Guide for details.)
  411. * @return *this
  412. * @throws IllegalArgumentException for syntax errors in the pattern string
  413. * @throws IndexOutOfBoundsException if certain limits are exceeded
  414. * (e.g., argument number too high, argument name too long, etc.)
  415. * @throws NumberFormatException if a number could not be parsed
  416. * @stable ICU 4.8
  417. */
  418. MessagePattern &parse(const UnicodeString &pattern,
  419. UParseError *parseError, UErrorCode &errorCode);
  420. /**
  421. * Parses a ChoiceFormat pattern string.
  422. * @param pattern a ChoiceFormat pattern string
  423. * @param parseError Struct to receive information on the position
  424. * of an error within the pattern.
  425. * Can be NULL.
  426. * @param errorCode Standard ICU error code. Its input value must
  427. * pass the U_SUCCESS() test, or else the function returns
  428. * immediately. Check for U_FAILURE() on output or use with
  429. * function chaining. (See User Guide for details.)
  430. * @return *this
  431. * @throws IllegalArgumentException for syntax errors in the pattern string
  432. * @throws IndexOutOfBoundsException if certain limits are exceeded
  433. * (e.g., argument number too high, argument name too long, etc.)
  434. * @throws NumberFormatException if a number could not be parsed
  435. * @stable ICU 4.8
  436. */
  437. MessagePattern &parseChoiceStyle(const UnicodeString &pattern,
  438. UParseError *parseError, UErrorCode &errorCode);
  439. /**
  440. * Parses a PluralFormat pattern string.
  441. * @param pattern a PluralFormat pattern string
  442. * @param parseError Struct to receive information on the position
  443. * of an error within the pattern.
  444. * Can be NULL.
  445. * @param errorCode Standard ICU error code. Its input value must
  446. * pass the U_SUCCESS() test, or else the function returns
  447. * immediately. Check for U_FAILURE() on output or use with
  448. * function chaining. (See User Guide for details.)
  449. * @return *this
  450. * @throws IllegalArgumentException for syntax errors in the pattern string
  451. * @throws IndexOutOfBoundsException if certain limits are exceeded
  452. * (e.g., argument number too high, argument name too long, etc.)
  453. * @throws NumberFormatException if a number could not be parsed
  454. * @stable ICU 4.8
  455. */
  456. MessagePattern &parsePluralStyle(const UnicodeString &pattern,
  457. UParseError *parseError, UErrorCode &errorCode);
  458. /**
  459. * Parses a SelectFormat pattern string.
  460. * @param pattern a SelectFormat pattern string
  461. * @param parseError Struct to receive information on the position
  462. * of an error within the pattern.
  463. * Can be NULL.
  464. * @param errorCode Standard ICU error code. Its input value must
  465. * pass the U_SUCCESS() test, or else the function returns
  466. * immediately. Check for U_FAILURE() on output or use with
  467. * function chaining. (See User Guide for details.)
  468. * @return *this
  469. * @throws IllegalArgumentException for syntax errors in the pattern string
  470. * @throws IndexOutOfBoundsException if certain limits are exceeded
  471. * (e.g., argument number too high, argument name too long, etc.)
  472. * @throws NumberFormatException if a number could not be parsed
  473. * @stable ICU 4.8
  474. */
  475. MessagePattern &parseSelectStyle(const UnicodeString &pattern,
  476. UParseError *parseError, UErrorCode &errorCode);
  477. /**
  478. * Clears this MessagePattern.
  479. * countParts() will return 0.
  480. * @stable ICU 4.8
  481. */
  482. void clear();
  483. /**
  484. * Clears this MessagePattern and sets the UMessagePatternApostropheMode.
  485. * countParts() will return 0.
  486. * @param mode The new UMessagePatternApostropheMode.
  487. * @stable ICU 4.8
  488. */
  489. void clearPatternAndSetApostropheMode(UMessagePatternApostropheMode mode) {
  490. clear();
  491. aposMode=mode;
  492. }
  493. /**
  494. * @param other another object to compare with.
  495. * @return TRUE if this object is equivalent to the other one.
  496. * @stable ICU 4.8
  497. */
  498. UBool operator==(const MessagePattern &other) const;
  499. /**
  500. * @param other another object to compare with.
  501. * @return FALSE if this object is equivalent to the other one.
  502. * @stable ICU 4.8
  503. */
  504. inline UBool operator!=(const MessagePattern &other) const {
  505. return !operator==(other);
  506. }
  507. /**
  508. * @return A hash code for this object.
  509. * @stable ICU 4.8
  510. */
  511. int32_t hashCode() const;
  512. /**
  513. * @return this instance's UMessagePatternApostropheMode.
  514. * @stable ICU 4.8
  515. */
  516. UMessagePatternApostropheMode getApostropheMode() const {
  517. return aposMode;
  518. }
  519. // Java has package-private jdkAposMode() here.
  520. // In C++, this is declared in the MessageImpl class.
  521. /**
  522. * @return the parsed pattern string (null if none was parsed).
  523. * @stable ICU 4.8
  524. */
  525. const UnicodeString &getPatternString() const {
  526. return msg;
  527. }
  528. /**
  529. * Does the parsed pattern have named arguments like {first_name}?
  530. * @return TRUE if the parsed pattern has at least one named argument.
  531. * @stable ICU 4.8
  532. */
  533. UBool hasNamedArguments() const {
  534. return hasArgNames;
  535. }
  536. /**
  537. * Does the parsed pattern have numbered arguments like {2}?
  538. * @return TRUE if the parsed pattern has at least one numbered argument.
  539. * @stable ICU 4.8
  540. */
  541. UBool hasNumberedArguments() const {
  542. return hasArgNumbers;
  543. }
  544. /**
  545. * Validates and parses an argument name or argument number string.
  546. * An argument name must be a "pattern identifier", that is, it must contain
  547. * no Unicode Pattern_Syntax or Pattern_White_Space characters.
  548. * If it only contains ASCII digits, then it must be a small integer with no leading zero.
  549. * @param name Input string.
  550. * @return &gt;=0 if the name is a valid number,
  551. * ARG_NAME_NOT_NUMBER (-1) if it is a "pattern identifier" but not all ASCII digits,
  552. * ARG_NAME_NOT_VALID (-2) if it is neither.
  553. * @stable ICU 4.8
  554. */
  555. static int32_t validateArgumentName(const UnicodeString &name);
  556. /**
  557. * Returns a version of the parsed pattern string where each ASCII apostrophe
  558. * is doubled (escaped) if it is not already, and if it is not interpreted as quoting syntax.
  559. * <p>
  560. * For example, this turns "I don't '{know}' {gender,select,female{h''er}other{h'im}}."
  561. * into "I don''t '{know}' {gender,select,female{h''er}other{h''im}}."
  562. * @return the deep-auto-quoted version of the parsed pattern string.
  563. * @see MessageFormat.autoQuoteApostrophe()
  564. * @stable ICU 4.8
  565. */
  566. UnicodeString autoQuoteApostropheDeep() const;
  567. class Part;
  568. /**
  569. * Returns the number of "parts" created by parsing the pattern string.
  570. * Returns 0 if no pattern has been parsed or clear() was called.
  571. * @return the number of pattern parts.
  572. * @stable ICU 4.8
  573. */
  574. int32_t countParts() const {
  575. return partsLength;
  576. }
  577. /**
  578. * Gets the i-th pattern "part".
  579. * @param i The index of the Part data. (0..countParts()-1)
  580. * @return the i-th pattern "part".
  581. * @stable ICU 4.8
  582. */
  583. const Part &getPart(int32_t i) const {
  584. return parts[i];
  585. }
  586. /**
  587. * Returns the UMessagePatternPartType of the i-th pattern "part".
  588. * Convenience method for getPart(i).getType().
  589. * @param i The index of the Part data. (0..countParts()-1)
  590. * @return The UMessagePatternPartType of the i-th Part.
  591. * @stable ICU 4.8
  592. */
  593. UMessagePatternPartType getPartType(int32_t i) const {
  594. return getPart(i).type;
  595. }
  596. /**
  597. * Returns the pattern index of the specified pattern "part".
  598. * Convenience method for getPart(partIndex).getIndex().
  599. * @param partIndex The index of the Part data. (0..countParts()-1)
  600. * @return The pattern index of this Part.
  601. * @stable ICU 4.8
  602. */
  603. int32_t getPatternIndex(int32_t partIndex) const {
  604. return getPart(partIndex).index;
  605. }
  606. /**
  607. * Returns the substring of the pattern string indicated by the Part.
  608. * Convenience method for getPatternString().substring(part.getIndex(), part.getLimit()).
  609. * @param part a part of this MessagePattern.
  610. * @return the substring associated with part.
  611. * @stable ICU 4.8
  612. */
  613. UnicodeString getSubstring(const Part &part) const {
  614. return msg.tempSubString(part.index, part.length);
  615. }
  616. /**
  617. * Compares the part's substring with the input string s.
  618. * @param part a part of this MessagePattern.
  619. * @param s a string.
  620. * @return TRUE if getSubstring(part).equals(s).
  621. * @stable ICU 4.8
  622. */
  623. UBool partSubstringMatches(const Part &part, const UnicodeString &s) const {
  624. return 0==msg.compare(part.index, part.length, s);
  625. }
  626. /**
  627. * Returns the numeric value associated with an ARG_INT or ARG_DOUBLE.
  628. * @param part a part of this MessagePattern.
  629. * @return the part's numeric value, or UMSGPAT_NO_NUMERIC_VALUE if this is not a numeric part.
  630. * @stable ICU 4.8
  631. */
  632. double getNumericValue(const Part &part) const;
  633. /**
  634. * Returns the "offset:" value of a PluralFormat argument, or 0 if none is specified.
  635. * @param pluralStart the index of the first PluralFormat argument style part. (0..countParts()-1)
  636. * @return the "offset:" value.
  637. * @stable ICU 4.8
  638. */
  639. double getPluralOffset(int32_t pluralStart) const;
  640. /**
  641. * Returns the index of the ARG|MSG_LIMIT part corresponding to the ARG|MSG_START at start.
  642. * @param start The index of some Part data (0..countParts()-1);
  643. * this Part should be of Type ARG_START or MSG_START.
  644. * @return The first i>start where getPart(i).getType()==ARG|MSG_LIMIT at the same nesting level,
  645. * or start itself if getPartType(msgStart)!=ARG|MSG_START.
  646. * @stable ICU 4.8
  647. */
  648. int32_t getLimitPartIndex(int32_t start) const {
  649. int32_t limit=getPart(start).limitPartIndex;
  650. if(limit<start) {
  651. return start;
  652. }
  653. return limit;
  654. }
  655. /**
  656. * A message pattern "part", representing a pattern parsing event.
  657. * There is a part for the start and end of a message or argument,
  658. * for quoting and escaping of and with ASCII apostrophes,
  659. * and for syntax elements of "complex" arguments.
  660. * @stable ICU 4.8
  661. */
  662. class Part : public UMemory {
  663. public:
  664. /**
  665. * Default constructor, do not use.
  666. * @internal
  667. */
  668. Part() {}
  669. /**
  670. * Returns the type of this part.
  671. * @return the part type.
  672. * @stable ICU 4.8
  673. */
  674. UMessagePatternPartType getType() const {
  675. return type;
  676. }
  677. /**
  678. * Returns the pattern string index associated with this Part.
  679. * @return this part's pattern string index.
  680. * @stable ICU 4.8
  681. */
  682. int32_t getIndex() const {
  683. return index;
  684. }
  685. /**
  686. * Returns the length of the pattern substring associated with this Part.
  687. * This is 0 for some parts.
  688. * @return this part's pattern substring length.
  689. * @stable ICU 4.8
  690. */
  691. int32_t getLength() const {
  692. return length;
  693. }
  694. /**
  695. * Returns the pattern string limit (exclusive-end) index associated with this Part.
  696. * Convenience method for getIndex()+getLength().
  697. * @return this part's pattern string limit index, same as getIndex()+getLength().
  698. * @stable ICU 4.8
  699. */
  700. int32_t getLimit() const {
  701. return index+length;
  702. }
  703. /**
  704. * Returns a value associated with this part.
  705. * See the documentation of each part type for details.
  706. * @return the part value.
  707. * @stable ICU 4.8
  708. */
  709. int32_t getValue() const {
  710. return value;
  711. }
  712. /**
  713. * Returns the argument type if this part is of type ARG_START or ARG_LIMIT,
  714. * otherwise UMSGPAT_ARG_TYPE_NONE.
  715. * @return the argument type for this part.
  716. * @stable ICU 4.8
  717. */
  718. UMessagePatternArgType getArgType() const {
  719. UMessagePatternPartType type=getType();
  720. if(type==UMSGPAT_PART_TYPE_ARG_START || type==UMSGPAT_PART_TYPE_ARG_LIMIT) {
  721. return (UMessagePatternArgType)value;
  722. } else {
  723. return UMSGPAT_ARG_TYPE_NONE;
  724. }
  725. }
  726. /**
  727. * Indicates whether the Part type has a numeric value.
  728. * If so, then that numeric value can be retrieved via MessagePattern.getNumericValue().
  729. * @param type The Part type to be tested.
  730. * @return TRUE if the Part type has a numeric value.
  731. * @stable ICU 4.8
  732. */
  733. static UBool hasNumericValue(UMessagePatternPartType type) {
  734. return type==UMSGPAT_PART_TYPE_ARG_INT || type==UMSGPAT_PART_TYPE_ARG_DOUBLE;
  735. }
  736. /**
  737. * @param other another object to compare with.
  738. * @return TRUE if this object is equivalent to the other one.
  739. * @stable ICU 4.8
  740. */
  741. UBool operator==(const Part &other) const;
  742. /**
  743. * @param other another object to compare with.
  744. * @return FALSE if this object is equivalent to the other one.
  745. * @stable ICU 4.8
  746. */
  747. inline UBool operator!=(const Part &other) const {
  748. return !operator==(other);
  749. }
  750. /**
  751. * @return A hash code for this object.
  752. * @stable ICU 4.8
  753. */
  754. int32_t hashCode() const {
  755. return ((type*37+index)*37+length)*37+value;
  756. }
  757. private:
  758. friend class MessagePattern;
  759. static const int32_t MAX_LENGTH=0xffff;
  760. static const int32_t MAX_VALUE=0x7fff;
  761. // Some fields are not final because they are modified during pattern parsing.
  762. // After pattern parsing, the parts are effectively immutable.
  763. UMessagePatternPartType type;
  764. int32_t index;
  765. uint16_t length;
  766. int16_t value;
  767. int32_t limitPartIndex;
  768. };
  769. private:
  770. void preParse(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode);
  771. void postParse();
  772. int32_t parseMessage(int32_t index, int32_t msgStartLength,
  773. int32_t nestingLevel, UMessagePatternArgType parentType,
  774. UParseError *parseError, UErrorCode &errorCode);
  775. int32_t parseArg(int32_t index, int32_t argStartLength, int32_t nestingLevel,
  776. UParseError *parseError, UErrorCode &errorCode);
  777. int32_t parseSimpleStyle(int32_t index, UParseError *parseError, UErrorCode &errorCode);
  778. int32_t parseChoiceStyle(int32_t index, int32_t nestingLevel,
  779. UParseError *parseError, UErrorCode &errorCode);
  780. int32_t parsePluralOrSelectStyle(UMessagePatternArgType argType, int32_t index, int32_t nestingLevel,
  781. UParseError *parseError, UErrorCode &errorCode);
  782. /**
  783. * Validates and parses an argument name or argument number string.
  784. * This internal method assumes that the input substring is a "pattern identifier".
  785. * @return &gt;=0 if the name is a valid number,
  786. * ARG_NAME_NOT_NUMBER (-1) if it is a "pattern identifier" but not all ASCII digits,
  787. * ARG_NAME_NOT_VALID (-2) if it is neither.
  788. * @see #validateArgumentName(String)
  789. */
  790. static int32_t parseArgNumber(const UnicodeString &s, int32_t start, int32_t limit);
  791. int32_t parseArgNumber(int32_t start, int32_t limit) {
  792. return parseArgNumber(msg, start, limit);
  793. }
  794. /**
  795. * Parses a number from the specified message substring.
  796. * @param start start index into the message string
  797. * @param limit limit index into the message string, must be start<limit
  798. * @param allowInfinity TRUE if U+221E is allowed (for ChoiceFormat)
  799. * @param parseError
  800. * @param errorCode
  801. */
  802. void parseDouble(int32_t start, int32_t limit, UBool allowInfinity,
  803. UParseError *parseError, UErrorCode &errorCode);
  804. // Java has package-private appendReducedApostrophes() here.
  805. // In C++, this is declared in the MessageImpl class.
  806. int32_t skipWhiteSpace(int32_t index);
  807. int32_t skipIdentifier(int32_t index);
  808. /**
  809. * Skips a sequence of characters that could occur in a double value.
  810. * Does not fully parse or validate the value.
  811. */
  812. int32_t skipDouble(int32_t index);
  813. static UBool isArgTypeChar(UChar32 c);
  814. UBool isChoice(int32_t index);
  815. UBool isPlural(int32_t index);
  816. UBool isSelect(int32_t index);
  817. UBool isOrdinal(int32_t index);
  818. /**
  819. * @return TRUE if we are inside a MessageFormat (sub-)pattern,
  820. * as opposed to inside a top-level choice/plural/select pattern.
  821. */
  822. UBool inMessageFormatPattern(int32_t nestingLevel);
  823. /**
  824. * @return TRUE if we are in a MessageFormat sub-pattern
  825. * of a top-level ChoiceFormat pattern.
  826. */
  827. UBool inTopLevelChoiceMessage(int32_t nestingLevel, UMessagePatternArgType parentType);
  828. void addPart(UMessagePatternPartType type, int32_t index, int32_t length,
  829. int32_t value, UErrorCode &errorCode);
  830. void addLimitPart(int32_t start,
  831. UMessagePatternPartType type, int32_t index, int32_t length,
  832. int32_t value, UErrorCode &errorCode);
  833. void addArgDoublePart(double numericValue, int32_t start, int32_t length, UErrorCode &errorCode);
  834. void setParseError(UParseError *parseError, int32_t index);
  835. UBool init(UErrorCode &errorCode);
  836. UBool copyStorage(const MessagePattern &other, UErrorCode &errorCode);
  837. UMessagePatternApostropheMode aposMode;
  838. UnicodeString msg;
  839. // ArrayList<Part> parts=new ArrayList<Part>();
  840. MessagePatternPartsList *partsList;
  841. Part *parts;
  842. int32_t partsLength;
  843. // ArrayList<Double> numericValues;
  844. MessagePatternDoubleList *numericValuesList;
  845. double *numericValues;
  846. int32_t numericValuesLength;
  847. UBool hasArgNames;
  848. UBool hasArgNumbers;
  849. UBool needsAutoQuoting;
  850. };
  851. U_NAMESPACE_END
  852. #endif // !UCONFIG_NO_FORMATTING
  853. #endif // __MESSAGEPATTERN_H__