parsing.hpp 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501
  1. // ----------------------------------------------------------------------------
  2. // parsing.hpp : implementation of the parsing member functions
  3. // ( parse, parse_printf_directive)
  4. // ----------------------------------------------------------------------------
  5. // Copyright Samuel Krempp 2003. Use, modification, and distribution are
  6. // subject to the Boost Software License, Version 1.0. (See accompanying
  7. // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  8. // see http://www.boost.org/libs/format for library home page
  9. // ----------------------------------------------------------------------------
  10. #ifndef BOOST_FORMAT_PARSING_HPP
  11. #define BOOST_FORMAT_PARSING_HPP
  12. #include <boost/format/format_class.hpp>
  13. #include <boost/format/exceptions.hpp>
  14. #include <boost/throw_exception.hpp>
  15. #include <boost/assert.hpp>
  16. namespace boost {
  17. namespace io {
  18. namespace detail {
  19. #if defined(BOOST_NO_STD_LOCALE)
  20. // streams will be used for narrow / widen. but these methods are not const
  21. template<class T>
  22. T& const_or_not(const T& x) {
  23. return const_cast<T&> (x);
  24. }
  25. #else
  26. template<class T>
  27. const T& const_or_not(const T& x) {
  28. return x;
  29. }
  30. #endif
  31. template<class Ch, class Facet> inline
  32. char wrap_narrow(const Facet& fac, Ch c, char deflt) {
  33. return const_or_not(fac).narrow(c, deflt);
  34. }
  35. template<class Ch, class Facet> inline
  36. bool wrap_isdigit(const Facet& fac, Ch c) {
  37. #if ! defined( BOOST_NO_LOCALE_ISDIGIT )
  38. return fac.is(std::ctype<Ch>::digit, c);
  39. # else
  40. (void) fac; // remove "unused parameter" warning
  41. using namespace std;
  42. return isdigit(c) != 0;
  43. #endif
  44. }
  45. template<class Iter, class Facet>
  46. Iter wrap_scan_notdigit(const Facet & fac, Iter beg, Iter end) {
  47. using namespace std;
  48. for( ; beg!=end && wrap_isdigit(fac, *beg); ++beg) ;
  49. return beg;
  50. }
  51. // Input : [start, last) iterators range and a
  52. // a Facet to use its widen/narrow member function
  53. // Effects : read sequence and convert digits into integral n, of type Res
  54. // Returns : n
  55. template<class Res, class Iter, class Facet>
  56. Iter str2int (const Iter & start, const Iter & last, Res & res,
  57. const Facet& fac)
  58. {
  59. using namespace std;
  60. Iter it;
  61. res=0;
  62. for(it=start; it != last && wrap_isdigit(fac, *it); ++it ) {
  63. char cur_ch = wrap_narrow(fac, *it, 0); // cant fail.
  64. res *= 10;
  65. res += cur_ch - '0'; // 22.2.1.1.2.13 of the C++ standard
  66. }
  67. return it;
  68. }
  69. // skip printf's "asterisk-fields" directives in the format-string buf
  70. // Input : char string, with starting index *pos_p
  71. // a Facet merely to use its widen/narrow member function
  72. // Effects : advance *pos_p by skipping printf's asterisk fields.
  73. // Returns : nothing
  74. template<class Iter, class Facet>
  75. Iter skip_asterisk(Iter start, Iter last, const Facet& fac)
  76. {
  77. using namespace std;
  78. ++ start;
  79. start = wrap_scan_notdigit(fac, start, last);
  80. if(start!=last && *start== const_or_not(fac).widen( '$') )
  81. ++start;
  82. return start;
  83. }
  84. // auxiliary func called by parse_printf_directive
  85. // for centralising error handling
  86. // it either throws if user sets the corresponding flag, or does nothing.
  87. inline void maybe_throw_exception(unsigned char exceptions,
  88. std::size_t pos, std::size_t size)
  89. {
  90. if(exceptions & io::bad_format_string_bit)
  91. boost::throw_exception(io::bad_format_string(pos, size) );
  92. }
  93. // Input: the position of a printf-directive in the format-string
  94. // a basic_ios& merely to use its widen/narrow member function
  95. // a bitset'exceptions' telling whether to throw exceptions on errors.
  96. // Returns:
  97. // true if parse succeeded (ignore some errors if exceptions disabled)
  98. // false if it failed so bad that the directive should be printed verbatim
  99. // Effects:
  100. // start is incremented so that *start is the first char after
  101. // this directive
  102. // *fpar is set with the parameters read in the directive
  103. template<class Ch, class Tr, class Alloc, class Iter, class Facet>
  104. bool parse_printf_directive(Iter & start, const Iter& last,
  105. detail::format_item<Ch, Tr, Alloc> * fpar,
  106. const Facet& fac,
  107. std::size_t offset, unsigned char exceptions)
  108. {
  109. typedef typename basic_format<Ch, Tr, Alloc>::format_item_t format_item_t;
  110. fpar->argN_ = format_item_t::argN_no_posit; // if no positional-directive
  111. bool precision_set = false;
  112. bool in_brackets=false;
  113. Iter start0 = start;
  114. std::size_t fstring_size = last-start0+offset;
  115. if(start>= last) { // empty directive : this is a trailing %
  116. maybe_throw_exception(exceptions, start-start0 + offset, fstring_size);
  117. return false;
  118. }
  119. if(*start== const_or_not(fac).widen( '|')) {
  120. in_brackets=true;
  121. if( ++start >= last ) {
  122. maybe_throw_exception(exceptions, start-start0 + offset, fstring_size);
  123. return false;
  124. }
  125. }
  126. // the flag '0' would be picked as a digit for argument order, but here it's a flag :
  127. if(*start== const_or_not(fac).widen( '0'))
  128. goto parse_flags;
  129. // handle argument order (%2$d) or possibly width specification: %2d
  130. if(wrap_isdigit(fac, *start)) {
  131. int n;
  132. start = str2int(start, last, n, fac);
  133. if( start >= last ) {
  134. maybe_throw_exception(exceptions, start-start0+offset, fstring_size);
  135. return false;
  136. }
  137. // %N% case : this is already the end of the directive
  138. if( *start == const_or_not(fac).widen( '%') ) {
  139. fpar->argN_ = n-1;
  140. ++start;
  141. if( in_brackets)
  142. maybe_throw_exception(exceptions, start-start0+offset, fstring_size);
  143. // but don't return. maybe "%" was used in lieu of '$', so we go on.
  144. else
  145. return true;
  146. }
  147. if ( *start== const_or_not(fac).widen( '$') ) {
  148. fpar->argN_ = n-1;
  149. ++start;
  150. }
  151. else {
  152. // non-positionnal directive
  153. fpar->fmtstate_.width_ = n;
  154. fpar->argN_ = format_item_t::argN_no_posit;
  155. goto parse_precision;
  156. }
  157. }
  158. parse_flags:
  159. // handle flags
  160. while ( start != last) { // as long as char is one of + - = _ # 0 l h or ' '
  161. // misc switches
  162. switch ( wrap_narrow(fac, *start, 0)) {
  163. case '\'' : break; // no effect yet. (painful to implement)
  164. case 'l':
  165. case 'h': // short/long modifier : for printf-comaptibility (no action needed)
  166. break;
  167. case '-':
  168. fpar->fmtstate_.flags_ |= std::ios_base::left;
  169. break;
  170. case '=':
  171. fpar->pad_scheme_ |= format_item_t::centered;
  172. break;
  173. case '_':
  174. fpar->fmtstate_.flags_ |= std::ios_base::internal;
  175. break;
  176. case ' ':
  177. fpar->pad_scheme_ |= format_item_t::spacepad;
  178. break;
  179. case '+':
  180. fpar->fmtstate_.flags_ |= std::ios_base::showpos;
  181. break;
  182. case '0':
  183. fpar->pad_scheme_ |= format_item_t::zeropad;
  184. // need to know alignment before really setting flags,
  185. // so just add 'zeropad' flag for now, it will be processed later.
  186. break;
  187. case '#':
  188. fpar->fmtstate_.flags_ |= std::ios_base::showpoint | std::ios_base::showbase;
  189. break;
  190. default:
  191. goto parse_width;
  192. }
  193. ++start;
  194. } // loop on flag.
  195. if( start>=last) {
  196. maybe_throw_exception(exceptions, start-start0+offset, fstring_size);
  197. return true;
  198. }
  199. parse_width:
  200. // handle width spec
  201. // first skip 'asterisk fields' : *, or *N$
  202. if(*start == const_or_not(fac).widen( '*') )
  203. start = skip_asterisk(start, last, fac);
  204. if(start!=last && wrap_isdigit(fac, *start))
  205. start = str2int(start, last, fpar->fmtstate_.width_, fac);
  206. parse_precision:
  207. if( start>= last) {
  208. maybe_throw_exception(exceptions, start-start0+offset, fstring_size);
  209. return true;
  210. }
  211. // handle precision spec
  212. if (*start== const_or_not(fac).widen( '.')) {
  213. ++start;
  214. if(start != last && *start == const_or_not(fac).widen( '*') )
  215. start = skip_asterisk(start, last, fac);
  216. if(start != last && wrap_isdigit(fac, *start)) {
  217. start = str2int(start, last, fpar->fmtstate_.precision_, fac);
  218. precision_set = true;
  219. }
  220. else
  221. fpar->fmtstate_.precision_ =0;
  222. }
  223. // handle formatting-type flags :
  224. while( start != last && ( *start== const_or_not(fac).widen( 'l')
  225. || *start== const_or_not(fac).widen( 'L')
  226. || *start== const_or_not(fac).widen( 'h')) )
  227. ++start;
  228. if( start>=last) {
  229. maybe_throw_exception(exceptions, start-start0+offset, fstring_size);
  230. return true;
  231. }
  232. if( in_brackets && *start== const_or_not(fac).widen( '|') ) {
  233. ++start;
  234. return true;
  235. }
  236. switch ( wrap_narrow(fac, *start, 0) ) {
  237. case 'X':
  238. fpar->fmtstate_.flags_ |= std::ios_base::uppercase;
  239. case 'p': // pointer => set hex.
  240. case 'x':
  241. fpar->fmtstate_.flags_ &= ~std::ios_base::basefield;
  242. fpar->fmtstate_.flags_ |= std::ios_base::hex;
  243. break;
  244. case 'o':
  245. fpar->fmtstate_.flags_ &= ~std::ios_base::basefield;
  246. fpar->fmtstate_.flags_ |= std::ios_base::oct;
  247. break;
  248. case 'E':
  249. fpar->fmtstate_.flags_ |= std::ios_base::uppercase;
  250. case 'e':
  251. fpar->fmtstate_.flags_ &= ~std::ios_base::floatfield;
  252. fpar->fmtstate_.flags_ |= std::ios_base::scientific;
  253. fpar->fmtstate_.flags_ &= ~std::ios_base::basefield;
  254. fpar->fmtstate_.flags_ |= std::ios_base::dec;
  255. break;
  256. case 'f':
  257. fpar->fmtstate_.flags_ &= ~std::ios_base::floatfield;
  258. fpar->fmtstate_.flags_ |= std::ios_base::fixed;
  259. case 'u':
  260. case 'd':
  261. case 'i':
  262. fpar->fmtstate_.flags_ &= ~std::ios_base::basefield;
  263. fpar->fmtstate_.flags_ |= std::ios_base::dec;
  264. break;
  265. case 'T':
  266. ++start;
  267. if( start >= last)
  268. maybe_throw_exception(exceptions, start-start0+offset, fstring_size);
  269. else
  270. fpar->fmtstate_.fill_ = *start;
  271. fpar->pad_scheme_ |= format_item_t::tabulation;
  272. fpar->argN_ = format_item_t::argN_tabulation;
  273. break;
  274. case 't':
  275. fpar->fmtstate_.fill_ = const_or_not(fac).widen( ' ');
  276. fpar->pad_scheme_ |= format_item_t::tabulation;
  277. fpar->argN_ = format_item_t::argN_tabulation;
  278. break;
  279. case 'G':
  280. fpar->fmtstate_.flags_ |= std::ios_base::uppercase;
  281. break;
  282. case 'g': // 'g' conversion is default for floats.
  283. fpar->fmtstate_.flags_ &= ~std::ios_base::basefield;
  284. fpar->fmtstate_.flags_ |= std::ios_base::dec;
  285. // CLEAR all floatield flags, so stream will CHOOSE
  286. fpar->fmtstate_.flags_ &= ~std::ios_base::floatfield;
  287. break;
  288. case 'C':
  289. case 'c':
  290. fpar->truncate_ = 1;
  291. break;
  292. case 'S':
  293. case 's':
  294. if(precision_set) // handle truncation manually, with own parameter.
  295. fpar->truncate_ = fpar->fmtstate_.precision_;
  296. fpar->fmtstate_.precision_ = 6; // default stream precision.
  297. break;
  298. case 'n' :
  299. fpar->argN_ = format_item_t::argN_ignored;
  300. break;
  301. default:
  302. maybe_throw_exception(exceptions, start-start0+offset, fstring_size);
  303. }
  304. ++start;
  305. if( in_brackets ) {
  306. if( start != last && *start== const_or_not(fac).widen( '|') ) {
  307. ++start;
  308. return true;
  309. }
  310. else maybe_throw_exception(exceptions, start-start0+offset, fstring_size);
  311. }
  312. return true;
  313. }
  314. // -end parse_printf_directive()
  315. template<class String, class Facet>
  316. int upper_bound_from_fstring(const String& buf,
  317. const typename String::value_type arg_mark,
  318. const Facet& fac,
  319. unsigned char exceptions)
  320. {
  321. // quick-parsing of the format-string to count arguments mark (arg_mark, '%')
  322. // returns : upper bound on the number of format items in the format strings
  323. using namespace boost::io;
  324. typename String::size_type i1=0;
  325. int num_items=0;
  326. while( (i1=buf.find(arg_mark,i1)) != String::npos ) {
  327. if( i1+1 >= buf.size() ) {
  328. if(exceptions & bad_format_string_bit)
  329. boost::throw_exception(bad_format_string(i1, buf.size() )); // must not end in ".. %"
  330. else {
  331. ++num_items;
  332. break;
  333. }
  334. }
  335. if(buf[i1+1] == buf[i1] ) {// escaped "%%"
  336. i1+=2; continue;
  337. }
  338. ++i1;
  339. // in case of %N% directives, dont count it double (wastes allocations..) :
  340. i1 = detail::wrap_scan_notdigit(fac, buf.begin()+i1, buf.end()) - buf.begin();
  341. if( i1 < buf.size() && buf[i1] == arg_mark )
  342. ++i1;
  343. ++num_items;
  344. }
  345. return num_items;
  346. }
  347. template<class String> inline
  348. void append_string(String& dst, const String& src,
  349. const typename String::size_type beg,
  350. const typename String::size_type end) {
  351. dst.append(src.begin()+beg, src.begin()+end);
  352. }
  353. } // detail namespace
  354. } // io namespace
  355. // -----------------------------------------------
  356. // format :: parse(..)
  357. template<class Ch, class Tr, class Alloc>
  358. basic_format<Ch, Tr, Alloc>& basic_format<Ch, Tr, Alloc>::
  359. parse (const string_type& buf) {
  360. // parse the format-string
  361. using namespace std;
  362. #if !defined(BOOST_NO_STD_LOCALE)
  363. const std::ctype<Ch> & fac = BOOST_USE_FACET( std::ctype<Ch>, getloc());
  364. #else
  365. io::basic_oaltstringstream<Ch, Tr, Alloc> fac;
  366. //has widen and narrow even on compilers without locale
  367. #endif
  368. const Ch arg_mark = io::detail::const_or_not(fac).widen( '%');
  369. bool ordered_args=true;
  370. int max_argN=-1;
  371. // A: find upper_bound on num_items and allocates arrays
  372. int num_items = io::detail::upper_bound_from_fstring(buf, arg_mark, fac, exceptions());
  373. make_or_reuse_data(num_items);
  374. // B: Now the real parsing of the format string :
  375. num_items=0;
  376. typename string_type::size_type i0=0, i1=0;
  377. typename string_type::const_iterator it;
  378. bool special_things=false;
  379. int cur_item=0;
  380. while( (i1=buf.find(arg_mark,i1)) != string_type::npos ) {
  381. string_type & piece = (cur_item==0) ? prefix_ : items_[cur_item-1].appendix_;
  382. if( buf[i1+1] == buf[i1] ) { // escaped mark, '%%'
  383. io::detail::append_string(piece, buf, i0, i1+1);
  384. i1+=2; i0=i1;
  385. continue;
  386. }
  387. BOOST_ASSERT( static_cast<unsigned int>(cur_item) < items_.size() || cur_item==0);
  388. if(i1!=i0) {
  389. io::detail::append_string(piece, buf, i0, i1);
  390. i0=i1;
  391. }
  392. ++i1;
  393. it = buf.begin()+i1;
  394. bool parse_ok = io::detail::parse_printf_directive(
  395. it, buf.end(), &items_[cur_item], fac, i1, exceptions());
  396. i1 = it - buf.begin();
  397. if( ! parse_ok ) // the directive will be printed verbatim
  398. continue;
  399. i0=i1;
  400. items_[cur_item].compute_states(); // process complex options, like zeropad, into params
  401. int argN=items_[cur_item].argN_;
  402. if(argN == format_item_t::argN_ignored)
  403. continue;
  404. if(argN ==format_item_t::argN_no_posit)
  405. ordered_args=false;
  406. else if(argN == format_item_t::argN_tabulation) special_things=true;
  407. else if(argN > max_argN) max_argN = argN;
  408. ++num_items;
  409. ++cur_item;
  410. } // loop on %'s
  411. BOOST_ASSERT(cur_item == num_items);
  412. // store the final piece of string
  413. {
  414. string_type & piece = (cur_item==0) ? prefix_ : items_[cur_item-1].appendix_;
  415. io::detail::append_string(piece, buf, i0, buf.size());
  416. }
  417. if( !ordered_args) {
  418. if(max_argN >= 0 ) { // dont mix positional with non-positionnal directives
  419. if(exceptions() & io::bad_format_string_bit)
  420. boost::throw_exception(
  421. io::bad_format_string(static_cast<std::size_t>(max_argN), 0));
  422. // else do nothing. => positionnal arguments are processed as non-positionnal
  423. }
  424. // set things like it would have been with positional directives :
  425. int non_ordered_items = 0;
  426. for(int i=0; i< num_items; ++i)
  427. if(items_[i].argN_ == format_item_t::argN_no_posit) {
  428. items_[i].argN_ = non_ordered_items;
  429. ++non_ordered_items;
  430. }
  431. max_argN = non_ordered_items-1;
  432. }
  433. // C: set some member data :
  434. items_.resize(num_items, format_item_t(io::detail::const_or_not(fac).widen( ' ')) );
  435. if(special_things) style_ |= special_needs;
  436. num_args_ = max_argN + 1;
  437. if(ordered_args) style_ |= ordered;
  438. else style_ &= ~ordered;
  439. return *this;
  440. }
  441. } // namespace boost
  442. #endif // BOOST_FORMAT_PARSING_HPP