scanf.c 29 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233
  1. /*
  2. +----------------------------------------------------------------------+
  3. | PHP Version 5 |
  4. +----------------------------------------------------------------------+
  5. | Copyright (c) 1997-2016 The PHP Group |
  6. +----------------------------------------------------------------------+
  7. | This source file is subject to version 3.01 of the PHP license, |
  8. | that is bundled with this package in the file LICENSE, and is |
  9. | available through the world-wide-web at the following url: |
  10. | http://www.php.net/license/3_01.txt |
  11. | If you did not receive a copy of the PHP license and are unable to |
  12. | obtain it through the world-wide-web, please send a note to |
  13. | license@php.net so we can mail you a copy immediately. |
  14. +----------------------------------------------------------------------+
  15. | Author: Clayton Collie <clcollie@mindspring.com> |
  16. +----------------------------------------------------------------------+
  17. */
  18. /* $Id$ */
  19. /*
  20. scanf.c --
  21. This file contains the base code which implements sscanf and by extension
  22. fscanf. Original code is from TCL8.3.0 and bears the following copyright:
  23. This software is copyrighted by the Regents of the University of
  24. California, Sun Microsystems, Inc., Scriptics Corporation,
  25. and other parties. The following terms apply to all files associated
  26. with the software unless explicitly disclaimed in individual files.
  27. The authors hereby grant permission to use, copy, modify, distribute,
  28. and license this software and its documentation for any purpose, provided
  29. that existing copyright notices are retained in all copies and that this
  30. notice is included verbatim in any distributions. No written agreement,
  31. license, or royalty fee is required for any of the authorized uses.
  32. Modifications to this software may be copyrighted by their authors
  33. and need not follow the licensing terms described here, provided that
  34. the new terms are clearly indicated on the first page of each file where
  35. they apply.
  36. IN NO EVENT SHALL THE AUTHORS OR DISTRIBUTORS BE LIABLE TO ANY PARTY
  37. FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
  38. ARISING OUT OF THE USE OF THIS SOFTWARE, ITS DOCUMENTATION, OR ANY
  39. DERIVATIVES THEREOF, EVEN IF THE AUTHORS HAVE BEEN ADVISED OF THE
  40. POSSIBILITY OF SUCH DAMAGE.
  41. THE AUTHORS AND DISTRIBUTORS SPECIFICALLY DISCLAIM ANY WARRANTIES,
  42. INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY,
  43. FITNESS FOR A PARTICULAR PURPOSE, AND NON-INFRINGEMENT. THIS SOFTWARE
  44. IS PROVIDED ON AN "AS IS" BASIS, AND THE AUTHORS AND DISTRIBUTORS HAVE
  45. NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR
  46. MODIFICATIONS.
  47. GOVERNMENT USE: If you are acquiring this software on behalf of the
  48. U.S. government, the Government shall have only "Restricted Rights"
  49. in the software and related documentation as defined in the Federal
  50. Acquisition Regulations (FARs) in Clause 52.227.19 (c) (2). If you
  51. are acquiring the software on behalf of the Department of Defense, the
  52. software shall be classified as "Commercial Computer Software" and the
  53. Government shall have only "Restricted Rights" as defined in Clause
  54. 252.227-7013 (c) (1) of DFARs. Notwithstanding the foregoing, the
  55. authors grant the U.S. Government and others acting in its behalf
  56. permission to use and distribute the software in accordance with the
  57. terms specified in this license.
  58. */
  59. #include <stdio.h>
  60. #include <limits.h>
  61. #include <ctype.h>
  62. #include "php.h"
  63. #include "php_variables.h"
  64. #ifdef HAVE_LOCALE_H
  65. #include <locale.h>
  66. #endif
  67. #include "zend_execute.h"
  68. #include "zend_operators.h"
  69. #include "zend_strtod.h"
  70. #include "php_globals.h"
  71. #include "basic_functions.h"
  72. #include "scanf.h"
  73. /*
  74. * Flag values used internally by [f|s]canf.
  75. */
  76. #define SCAN_NOSKIP 0x1 /* Don't skip blanks. */
  77. #define SCAN_SUPPRESS 0x2 /* Suppress assignment. */
  78. #define SCAN_UNSIGNED 0x4 /* Read an unsigned value. */
  79. #define SCAN_WIDTH 0x8 /* A width value was supplied. */
  80. #define SCAN_SIGNOK 0x10 /* A +/- character is allowed. */
  81. #define SCAN_NODIGITS 0x20 /* No digits have been scanned. */
  82. #define SCAN_NOZERO 0x40 /* No zero digits have been scanned. */
  83. #define SCAN_XOK 0x80 /* An 'x' is allowed. */
  84. #define SCAN_PTOK 0x100 /* Decimal point is allowed. */
  85. #define SCAN_EXPOK 0x200 /* An exponent is allowed. */
  86. #define UCHAR(x) (zend_uchar)(x)
  87. /*
  88. * The following structure contains the information associated with
  89. * a character set.
  90. */
  91. typedef struct CharSet {
  92. int exclude; /* 1 if this is an exclusion set. */
  93. int nchars;
  94. char *chars;
  95. int nranges;
  96. struct Range {
  97. char start;
  98. char end;
  99. } *ranges;
  100. } CharSet;
  101. /*
  102. * Declarations for functions used only in this file.
  103. */
  104. static char *BuildCharSet(CharSet *cset, char *format);
  105. static int CharInSet(CharSet *cset, int ch);
  106. static void ReleaseCharSet(CharSet *cset);
  107. static inline void scan_set_error_return(int numVars, zval **return_value);
  108. /* {{{ BuildCharSet
  109. *----------------------------------------------------------------------
  110. *
  111. * BuildCharSet --
  112. *
  113. * This function examines a character set format specification
  114. * and builds a CharSet containing the individual characters and
  115. * character ranges specified.
  116. *
  117. * Results:
  118. * Returns the next format position.
  119. *
  120. * Side effects:
  121. * Initializes the charset.
  122. *
  123. *----------------------------------------------------------------------
  124. */
  125. static char * BuildCharSet(CharSet *cset, char *format)
  126. {
  127. char *ch, start;
  128. int nranges;
  129. char *end;
  130. memset(cset, 0, sizeof(CharSet));
  131. ch = format;
  132. if (*ch == '^') {
  133. cset->exclude = 1;
  134. ch = ++format;
  135. }
  136. end = format + 1; /* verify this - cc */
  137. /*
  138. * Find the close bracket so we can overallocate the set.
  139. */
  140. if (*ch == ']') {
  141. ch = end++;
  142. }
  143. nranges = 0;
  144. while (*ch != ']') {
  145. if (*ch == '-') {
  146. nranges++;
  147. }
  148. ch = end++;
  149. }
  150. cset->chars = (char *) safe_emalloc(sizeof(char), (end - format - 1), 0);
  151. if (nranges > 0) {
  152. cset->ranges = (struct Range *) safe_emalloc(sizeof(struct Range), nranges, 0);
  153. } else {
  154. cset->ranges = NULL;
  155. }
  156. /*
  157. * Now build the character set.
  158. */
  159. cset->nchars = cset->nranges = 0;
  160. ch = format++;
  161. start = *ch;
  162. if (*ch == ']' || *ch == '-') {
  163. cset->chars[cset->nchars++] = *ch;
  164. ch = format++;
  165. }
  166. while (*ch != ']') {
  167. if (*format == '-') {
  168. /*
  169. * This may be the first character of a range, so don't add
  170. * it yet.
  171. */
  172. start = *ch;
  173. } else if (*ch == '-') {
  174. /*
  175. * Check to see if this is the last character in the set, in which
  176. * case it is not a range and we should add the previous character
  177. * as well as the dash.
  178. */
  179. if (*format == ']') {
  180. cset->chars[cset->nchars++] = start;
  181. cset->chars[cset->nchars++] = *ch;
  182. } else {
  183. ch = format++;
  184. /*
  185. * Check to see if the range is in reverse order.
  186. */
  187. if (start < *ch) {
  188. cset->ranges[cset->nranges].start = start;
  189. cset->ranges[cset->nranges].end = *ch;
  190. } else {
  191. cset->ranges[cset->nranges].start = *ch;
  192. cset->ranges[cset->nranges].end = start;
  193. }
  194. cset->nranges++;
  195. }
  196. } else {
  197. cset->chars[cset->nchars++] = *ch;
  198. }
  199. ch = format++;
  200. }
  201. return format;
  202. }
  203. /* }}} */
  204. /* {{{ CharInSet
  205. *----------------------------------------------------------------------
  206. *
  207. * CharInSet --
  208. *
  209. * Check to see if a character matches the given set.
  210. *
  211. * Results:
  212. * Returns non-zero if the character matches the given set.
  213. *
  214. * Side effects:
  215. * None.
  216. *
  217. *----------------------------------------------------------------------
  218. */
  219. static int CharInSet(CharSet *cset, int c)
  220. {
  221. char ch = (char) c;
  222. int i, match = 0;
  223. for (i = 0; i < cset->nchars; i++) {
  224. if (cset->chars[i] == ch) {
  225. match = 1;
  226. break;
  227. }
  228. }
  229. if (!match) {
  230. for (i = 0; i < cset->nranges; i++) {
  231. if ((cset->ranges[i].start <= ch)
  232. && (ch <= cset->ranges[i].end)) {
  233. match = 1;
  234. break;
  235. }
  236. }
  237. }
  238. return (cset->exclude ? !match : match);
  239. }
  240. /* }}} */
  241. /* {{{ ReleaseCharSet
  242. *----------------------------------------------------------------------
  243. *
  244. * ReleaseCharSet --
  245. *
  246. * Free the storage associated with a character set.
  247. *
  248. * Results:
  249. * None.
  250. *
  251. * Side effects:
  252. * None.
  253. *
  254. *----------------------------------------------------------------------
  255. */
  256. static void ReleaseCharSet(CharSet *cset)
  257. {
  258. efree((char *)cset->chars);
  259. if (cset->ranges) {
  260. efree((char *)cset->ranges);
  261. }
  262. }
  263. /* }}} */
  264. /* {{{ ValidateFormat
  265. *----------------------------------------------------------------------
  266. *
  267. * ValidateFormat --
  268. *
  269. * Parse the format string and verify that it is properly formed
  270. * and that there are exactly enough variables on the command line.
  271. *
  272. * Results:
  273. * FAILURE or SUCCESS.
  274. *
  275. * Side effects:
  276. * May set php_error based on abnormal conditions.
  277. *
  278. * Parameters :
  279. * format The format string.
  280. * numVars The number of variables passed to the scan command.
  281. * totalSubs The number of variables that will be required.
  282. *
  283. *----------------------------------------------------------------------
  284. */
  285. PHPAPI int ValidateFormat(char *format, int numVars, int *totalSubs)
  286. {
  287. #define STATIC_LIST_SIZE 16
  288. int gotXpg, gotSequential, value, i, flags;
  289. char *end, *ch = NULL;
  290. int staticAssign[STATIC_LIST_SIZE];
  291. int *nassign = staticAssign;
  292. int objIndex, xpgSize, nspace = STATIC_LIST_SIZE;
  293. TSRMLS_FETCH();
  294. /*
  295. * Initialize an array that records the number of times a variable
  296. * is assigned to by the format string. We use this to detect if
  297. * a variable is multiply assigned or left unassigned.
  298. */
  299. if (numVars > nspace) {
  300. nassign = (int*)safe_emalloc(sizeof(int), numVars, 0);
  301. nspace = numVars;
  302. }
  303. for (i = 0; i < nspace; i++) {
  304. nassign[i] = 0;
  305. }
  306. xpgSize = objIndex = gotXpg = gotSequential = 0;
  307. while (*format != '\0') {
  308. ch = format++;
  309. flags = 0;
  310. if (*ch != '%') {
  311. continue;
  312. }
  313. ch = format++;
  314. if (*ch == '%') {
  315. continue;
  316. }
  317. if (*ch == '*') {
  318. flags |= SCAN_SUPPRESS;
  319. ch = format++;
  320. goto xpgCheckDone;
  321. }
  322. if ( isdigit( (int)*ch ) ) {
  323. /*
  324. * Check for an XPG3-style %n$ specification. Note: there
  325. * must not be a mixture of XPG3 specs and non-XPG3 specs
  326. * in the same format string.
  327. */
  328. value = strtoul(format-1, &end, 10);
  329. if (*end != '$') {
  330. goto notXpg;
  331. }
  332. format = end+1;
  333. ch = format++;
  334. gotXpg = 1;
  335. if (gotSequential) {
  336. goto mixedXPG;
  337. }
  338. objIndex = value - 1;
  339. if ((objIndex < 0) || (numVars && (objIndex >= numVars))) {
  340. goto badIndex;
  341. } else if (numVars == 0) {
  342. /*
  343. * In the case where no vars are specified, the user can
  344. * specify %9999$ legally, so we have to consider special
  345. * rules for growing the assign array. 'value' is
  346. * guaranteed to be > 0.
  347. */
  348. /* set a lower artificial limit on this
  349. * in the interest of security and resource friendliness
  350. * 255 arguments should be more than enough. - cc
  351. */
  352. if (value > SCAN_MAX_ARGS) {
  353. goto badIndex;
  354. }
  355. xpgSize = (xpgSize > value) ? xpgSize : value;
  356. }
  357. goto xpgCheckDone;
  358. }
  359. notXpg:
  360. gotSequential = 1;
  361. if (gotXpg) {
  362. mixedXPG:
  363. php_error_docref(NULL TSRMLS_CC, E_WARNING, "%s", "cannot mix \"%\" and \"%n$\" conversion specifiers");
  364. goto error;
  365. }
  366. xpgCheckDone:
  367. /*
  368. * Parse any width specifier.
  369. */
  370. if (isdigit(UCHAR(*ch))) {
  371. value = strtoul(format-1, &format, 10);
  372. flags |= SCAN_WIDTH;
  373. ch = format++;
  374. }
  375. /*
  376. * Ignore size specifier.
  377. */
  378. if ((*ch == 'l') || (*ch == 'L') || (*ch == 'h')) {
  379. ch = format++;
  380. }
  381. if (!(flags & SCAN_SUPPRESS) && numVars && (objIndex >= numVars)) {
  382. goto badIndex;
  383. }
  384. /*
  385. * Handle the various field types.
  386. */
  387. switch (*ch) {
  388. case 'n':
  389. case 'd':
  390. case 'D':
  391. case 'i':
  392. case 'o':
  393. case 'x':
  394. case 'X':
  395. case 'u':
  396. case 'f':
  397. case 'e':
  398. case 'E':
  399. case 'g':
  400. case 's':
  401. break;
  402. case 'c':
  403. /* we differ here with the TCL implementation in allowing for */
  404. /* a character width specification, to be more consistent with */
  405. /* ANSI. since Zend auto allocates space for vars, this is no */
  406. /* problem - cc */
  407. /*
  408. if (flags & SCAN_WIDTH) {
  409. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Field width may not be specified in %c conversion");
  410. goto error;
  411. }
  412. */
  413. break;
  414. case '[':
  415. if (*format == '\0') {
  416. goto badSet;
  417. }
  418. ch = format++;
  419. if (*ch == '^') {
  420. if (*format == '\0') {
  421. goto badSet;
  422. }
  423. ch = format++;
  424. }
  425. if (*ch == ']') {
  426. if (*format == '\0') {
  427. goto badSet;
  428. }
  429. ch = format++;
  430. }
  431. while (*ch != ']') {
  432. if (*format == '\0') {
  433. goto badSet;
  434. }
  435. ch = format++;
  436. }
  437. break;
  438. badSet:
  439. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unmatched [ in format string");
  440. goto error;
  441. default: {
  442. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Bad scan conversion character \"%c\"", *ch);
  443. goto error;
  444. }
  445. }
  446. if (!(flags & SCAN_SUPPRESS)) {
  447. if (objIndex >= nspace) {
  448. /*
  449. * Expand the nassign buffer. If we are using XPG specifiers,
  450. * make sure that we grow to a large enough size. xpgSize is
  451. * guaranteed to be at least one larger than objIndex.
  452. */
  453. value = nspace;
  454. if (xpgSize) {
  455. nspace = xpgSize;
  456. } else {
  457. nspace += STATIC_LIST_SIZE;
  458. }
  459. if (nassign == staticAssign) {
  460. nassign = (void *)safe_emalloc(nspace, sizeof(int), 0);
  461. for (i = 0; i < STATIC_LIST_SIZE; ++i) {
  462. nassign[i] = staticAssign[i];
  463. }
  464. } else {
  465. nassign = (void *)erealloc((void *)nassign, nspace * sizeof(int));
  466. }
  467. for (i = value; i < nspace; i++) {
  468. nassign[i] = 0;
  469. }
  470. }
  471. nassign[objIndex]++;
  472. objIndex++;
  473. }
  474. } /* while (*format != '\0') */
  475. /*
  476. * Verify that all of the variable were assigned exactly once.
  477. */
  478. if (numVars == 0) {
  479. if (xpgSize) {
  480. numVars = xpgSize;
  481. } else {
  482. numVars = objIndex;
  483. }
  484. }
  485. if (totalSubs) {
  486. *totalSubs = numVars;
  487. }
  488. for (i = 0; i < numVars; i++) {
  489. if (nassign[i] > 1) {
  490. php_error_docref(NULL TSRMLS_CC, E_WARNING, "%s", "Variable is assigned by multiple \"%n$\" conversion specifiers");
  491. goto error;
  492. } else if (!xpgSize && (nassign[i] == 0)) {
  493. /*
  494. * If the space is empty, and xpgSize is 0 (means XPG wasn't
  495. * used, and/or numVars != 0), then too many vars were given
  496. */
  497. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Variable is not assigned by any conversion specifiers");
  498. goto error;
  499. }
  500. }
  501. if (nassign != staticAssign) {
  502. efree((char *)nassign);
  503. }
  504. return SCAN_SUCCESS;
  505. badIndex:
  506. if (gotXpg) {
  507. php_error_docref(NULL TSRMLS_CC, E_WARNING, "%s", "\"%n$\" argument index out of range");
  508. } else {
  509. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Different numbers of variable names and field specifiers");
  510. }
  511. error:
  512. if (nassign != staticAssign) {
  513. efree((char *)nassign);
  514. }
  515. return SCAN_ERROR_INVALID_FORMAT;
  516. #undef STATIC_LIST_SIZE
  517. }
  518. /* }}} */
  519. /* {{{ php_sscanf_internal
  520. * This is the internal function which does processing on behalf of
  521. * both sscanf() and fscanf()
  522. *
  523. * parameters :
  524. * string literal string to be processed
  525. * format format string
  526. * argCount total number of elements in the args array
  527. * args arguments passed in from user function (f|s)scanf
  528. * varStart offset (in args) of 1st variable passed in to (f|s)scanf
  529. * return_value set with the results of the scan
  530. */
  531. PHPAPI int php_sscanf_internal( char *string, char *format,
  532. int argCount, zval ***args,
  533. int varStart, zval **return_value TSRMLS_DC)
  534. {
  535. int numVars, nconversions, totalVars = -1;
  536. int i, result;
  537. long value;
  538. int objIndex;
  539. char *end, *baseString;
  540. zval **current;
  541. char op = 0;
  542. int base = 0;
  543. int underflow = 0;
  544. size_t width;
  545. long (*fn)() = NULL;
  546. char *ch, sch;
  547. int flags;
  548. char buf[64]; /* Temporary buffer to hold scanned number
  549. * strings before they are passed to strtoul() */
  550. /* do some sanity checking */
  551. if ((varStart > argCount) || (varStart < 0)){
  552. varStart = SCAN_MAX_ARGS + 1;
  553. }
  554. numVars = argCount - varStart;
  555. if (numVars < 0) {
  556. numVars = 0;
  557. }
  558. #if 0
  559. zend_printf("<br>in sscanf_internal : <br> string is \"%s\", format = \"%s\"<br> NumVars = %d. VarStart = %d<br>-------------------------<br>",
  560. string, format, numVars, varStart);
  561. #endif
  562. /*
  563. * Check for errors in the format string.
  564. */
  565. if (ValidateFormat(format, numVars, &totalVars) != SCAN_SUCCESS) {
  566. scan_set_error_return( numVars, return_value );
  567. return SCAN_ERROR_INVALID_FORMAT;
  568. }
  569. objIndex = numVars ? varStart : 0;
  570. /*
  571. * If any variables are passed, make sure they are all passed by reference
  572. */
  573. if (numVars) {
  574. for (i = varStart;i < argCount;i++){
  575. if ( ! PZVAL_IS_REF( *args[ i ] ) ) {
  576. php_error_docref(NULL TSRMLS_CC, E_WARNING, "Parameter %d must be passed by reference", i);
  577. scan_set_error_return(numVars, return_value);
  578. return SCAN_ERROR_VAR_PASSED_BYVAL;
  579. }
  580. }
  581. }
  582. /*
  583. * Allocate space for the result objects. Only happens when no variables
  584. * are specified
  585. */
  586. if (!numVars) {
  587. zval *tmp;
  588. /* allocate an array for return */
  589. array_init(*return_value);
  590. for (i = 0; i < totalVars; i++) {
  591. MAKE_STD_ZVAL(tmp);
  592. ZVAL_NULL(tmp);
  593. if (add_next_index_zval(*return_value, tmp) == FAILURE) {
  594. scan_set_error_return(0, return_value);
  595. return FAILURE;
  596. }
  597. }
  598. varStart = 0; /* Array index starts from 0 */
  599. }
  600. baseString = string;
  601. /*
  602. * Iterate over the format string filling in the result objects until
  603. * we reach the end of input, the end of the format string, or there
  604. * is a mismatch.
  605. */
  606. nconversions = 0;
  607. /* note ! - we need to limit the loop for objIndex to keep it in bounds */
  608. while (*format != '\0') {
  609. ch = format++;
  610. flags = 0;
  611. /*
  612. * If we see whitespace in the format, skip whitespace in the string.
  613. */
  614. if ( isspace( (int)*ch ) ) {
  615. sch = *string;
  616. while ( isspace( (int)sch ) ) {
  617. if (*string == '\0') {
  618. goto done;
  619. }
  620. string++;
  621. sch = *string;
  622. }
  623. continue;
  624. }
  625. if (*ch != '%') {
  626. literal:
  627. if (*string == '\0') {
  628. underflow = 1;
  629. goto done;
  630. }
  631. sch = *string;
  632. string++;
  633. if (*ch != sch) {
  634. goto done;
  635. }
  636. continue;
  637. }
  638. ch = format++;
  639. if (*ch == '%') {
  640. goto literal;
  641. }
  642. /*
  643. * Check for assignment suppression ('*') or an XPG3-style
  644. * assignment ('%n$').
  645. */
  646. if (*ch == '*') {
  647. flags |= SCAN_SUPPRESS;
  648. ch = format++;
  649. } else if ( isdigit(UCHAR(*ch))) {
  650. value = strtoul(format-1, &end, 10);
  651. if (*end == '$') {
  652. format = end+1;
  653. ch = format++;
  654. objIndex = varStart + value - 1;
  655. }
  656. }
  657. /*
  658. * Parse any width specifier.
  659. */
  660. if ( isdigit(UCHAR(*ch))) {
  661. width = strtoul(format-1, &format, 10);
  662. ch = format++;
  663. } else {
  664. width = 0;
  665. }
  666. /*
  667. * Ignore size specifier.
  668. */
  669. if ((*ch == 'l') || (*ch == 'L') || (*ch == 'h')) {
  670. ch = format++;
  671. }
  672. /*
  673. * Handle the various field types.
  674. */
  675. switch (*ch) {
  676. case 'n':
  677. if (!(flags & SCAN_SUPPRESS)) {
  678. if (numVars && objIndex >= argCount) {
  679. break;
  680. } else if (numVars) {
  681. zend_uint refcount;
  682. current = args[objIndex++];
  683. refcount = Z_REFCOUNT_PP(current);
  684. zval_dtor( *current );
  685. ZVAL_LONG( *current, (long)(string - baseString) );
  686. Z_SET_REFCOUNT_PP(current, refcount);
  687. Z_SET_ISREF_PP(current);
  688. } else {
  689. add_index_long(*return_value, objIndex++, string - baseString);
  690. }
  691. }
  692. nconversions++;
  693. continue;
  694. case 'd':
  695. case 'D':
  696. op = 'i';
  697. base = 10;
  698. fn = (long (*)())strtol;
  699. break;
  700. case 'i':
  701. op = 'i';
  702. base = 0;
  703. fn = (long (*)())strtol;
  704. break;
  705. case 'o':
  706. op = 'i';
  707. base = 8;
  708. fn = (long (*)())strtol;
  709. break;
  710. case 'x':
  711. case 'X':
  712. op = 'i';
  713. base = 16;
  714. fn = (long (*)())strtol;
  715. break;
  716. case 'u':
  717. op = 'i';
  718. base = 10;
  719. flags |= SCAN_UNSIGNED;
  720. fn = (long (*)())strtoul;
  721. break;
  722. case 'f':
  723. case 'e':
  724. case 'E':
  725. case 'g':
  726. op = 'f';
  727. break;
  728. case 's':
  729. op = 's';
  730. break;
  731. case 'c':
  732. op = 's';
  733. flags |= SCAN_NOSKIP;
  734. /*-cc-*/
  735. if (0 == width) {
  736. width = 1;
  737. }
  738. /*-cc-*/
  739. break;
  740. case '[':
  741. op = '[';
  742. flags |= SCAN_NOSKIP;
  743. break;
  744. } /* switch */
  745. /*
  746. * At this point, we will need additional characters from the
  747. * string to proceed.
  748. */
  749. if (*string == '\0') {
  750. underflow = 1;
  751. goto done;
  752. }
  753. /*
  754. * Skip any leading whitespace at the beginning of a field unless
  755. * the format suppresses this behavior.
  756. */
  757. if (!(flags & SCAN_NOSKIP)) {
  758. while (*string != '\0') {
  759. sch = *string;
  760. if (! isspace((int)sch) ) {
  761. break;
  762. }
  763. string++;
  764. }
  765. if (*string == '\0') {
  766. underflow = 1;
  767. goto done;
  768. }
  769. }
  770. /*
  771. * Perform the requested scanning operation.
  772. */
  773. switch (op) {
  774. case 'c':
  775. case 's':
  776. /*
  777. * Scan a string up to width characters or whitespace.
  778. */
  779. if (width == 0) {
  780. width = (size_t) ~0;
  781. }
  782. end = string;
  783. while (*end != '\0') {
  784. sch = *end;
  785. if ( isspace( (int)sch ) ) {
  786. break;
  787. }
  788. end++;
  789. if (--width == 0) {
  790. break;
  791. }
  792. }
  793. if (!(flags & SCAN_SUPPRESS)) {
  794. if (numVars && objIndex >= argCount) {
  795. break;
  796. } else if (numVars) {
  797. zend_uint refcount;
  798. current = args[objIndex++];
  799. refcount = Z_REFCOUNT_PP(current);
  800. zval_dtor( *current );
  801. ZVAL_STRINGL( *current, string, end-string, 1);
  802. Z_SET_REFCOUNT_PP(current, refcount);
  803. Z_SET_ISREF_PP(current);
  804. } else {
  805. add_index_stringl( *return_value, objIndex++, string, end-string, 1);
  806. }
  807. }
  808. string = end;
  809. break;
  810. case '[': {
  811. CharSet cset;
  812. if (width == 0) {
  813. width = (size_t) ~0;
  814. }
  815. end = string;
  816. format = BuildCharSet(&cset, format);
  817. while (*end != '\0') {
  818. sch = *end;
  819. if (!CharInSet(&cset, (int)sch)) {
  820. break;
  821. }
  822. end++;
  823. if (--width == 0) {
  824. break;
  825. }
  826. }
  827. ReleaseCharSet(&cset);
  828. if (string == end) {
  829. /*
  830. * Nothing matched the range, stop processing
  831. */
  832. goto done;
  833. }
  834. if (!(flags & SCAN_SUPPRESS)) {
  835. if (numVars && objIndex >= argCount) {
  836. break;
  837. } else if (numVars) {
  838. current = args[objIndex++];
  839. zval_dtor( *current );
  840. ZVAL_STRINGL( *current, string, end-string, 1);
  841. } else {
  842. add_index_stringl(*return_value, objIndex++, string, end-string, 1);
  843. }
  844. }
  845. string = end;
  846. break;
  847. }
  848. /*
  849. case 'c':
  850. / Scan a single character./
  851. sch = *string;
  852. string++;
  853. if (!(flags & SCAN_SUPPRESS)) {
  854. if (numVars) {
  855. char __buf[2];
  856. __buf[0] = sch;
  857. __buf[1] = '\0';;
  858. current = args[objIndex++];
  859. zval_dtor(*current);
  860. ZVAL_STRINGL( *current, __buf, 1, 1);
  861. } else {
  862. add_index_stringl(*return_value, objIndex++, &sch, 1, 1);
  863. }
  864. }
  865. break;
  866. */
  867. case 'i':
  868. /*
  869. * Scan an unsigned or signed integer.
  870. */
  871. /*-cc-*/
  872. buf[0] = '\0';
  873. /*-cc-*/
  874. if ((width == 0) || (width > sizeof(buf) - 1)) {
  875. width = sizeof(buf) - 1;
  876. }
  877. flags |= SCAN_SIGNOK | SCAN_NODIGITS | SCAN_NOZERO;
  878. for (end = buf; width > 0; width--) {
  879. switch (*string) {
  880. /*
  881. * The 0 digit has special meaning at the beginning of
  882. * a number. If we are unsure of the base, it
  883. * indicates that we are in base 8 or base 16 (if it is
  884. * followed by an 'x').
  885. */
  886. case '0':
  887. /*-cc-*/
  888. if (base == 16) {
  889. flags |= SCAN_XOK;
  890. }
  891. /*-cc-*/
  892. if (base == 0) {
  893. base = 8;
  894. flags |= SCAN_XOK;
  895. }
  896. if (flags & SCAN_NOZERO) {
  897. flags &= ~(SCAN_SIGNOK | SCAN_NODIGITS | SCAN_NOZERO);
  898. } else {
  899. flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
  900. }
  901. goto addToInt;
  902. case '1': case '2': case '3': case '4':
  903. case '5': case '6': case '7':
  904. if (base == 0) {
  905. base = 10;
  906. }
  907. flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
  908. goto addToInt;
  909. case '8': case '9':
  910. if (base == 0) {
  911. base = 10;
  912. }
  913. if (base <= 8) {
  914. break;
  915. }
  916. flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
  917. goto addToInt;
  918. case 'A': case 'B': case 'C':
  919. case 'D': case 'E': case 'F':
  920. case 'a': case 'b': case 'c':
  921. case 'd': case 'e': case 'f':
  922. if (base <= 10) {
  923. break;
  924. }
  925. flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
  926. goto addToInt;
  927. case '+': case '-':
  928. if (flags & SCAN_SIGNOK) {
  929. flags &= ~SCAN_SIGNOK;
  930. goto addToInt;
  931. }
  932. break;
  933. case 'x': case 'X':
  934. if ((flags & SCAN_XOK) && (end == buf+1)) {
  935. base = 16;
  936. flags &= ~SCAN_XOK;
  937. goto addToInt;
  938. }
  939. break;
  940. }
  941. /*
  942. * We got an illegal character so we are done accumulating.
  943. */
  944. break;
  945. addToInt:
  946. /*
  947. * Add the character to the temporary buffer.
  948. */
  949. *end++ = *string++;
  950. if (*string == '\0') {
  951. break;
  952. }
  953. }
  954. /*
  955. * Check to see if we need to back up because we only got a
  956. * sign or a trailing x after a 0.
  957. */
  958. if (flags & SCAN_NODIGITS) {
  959. if (*string == '\0') {
  960. underflow = 1;
  961. }
  962. goto done;
  963. } else if (end[-1] == 'x' || end[-1] == 'X') {
  964. end--;
  965. string--;
  966. }
  967. /*
  968. * Scan the value from the temporary buffer. If we are
  969. * returning a large unsigned value, we have to convert it back
  970. * to a string since PHP only supports signed values.
  971. */
  972. if (!(flags & SCAN_SUPPRESS)) {
  973. *end = '\0';
  974. value = (long) (*fn)(buf, NULL, base);
  975. if ((flags & SCAN_UNSIGNED) && (value < 0)) {
  976. snprintf(buf, sizeof(buf), "%lu", value); /* INTL: ISO digit */
  977. if (numVars && objIndex >= argCount) {
  978. break;
  979. } else if (numVars) {
  980. /* change passed value type to string */
  981. current = args[objIndex++];
  982. zval_dtor(*current);
  983. ZVAL_STRING( *current, buf, 1 );
  984. } else {
  985. add_index_string(*return_value, objIndex++, buf, 1);
  986. }
  987. } else {
  988. if (numVars && objIndex >= argCount) {
  989. break;
  990. } else if (numVars) {
  991. current = args[objIndex++];
  992. zval_dtor(*current);
  993. ZVAL_LONG(*current, value);
  994. } else {
  995. add_index_long(*return_value, objIndex++, value);
  996. }
  997. }
  998. }
  999. break;
  1000. case 'f':
  1001. /*
  1002. * Scan a floating point number
  1003. */
  1004. buf[0] = '\0'; /* call me pedantic */
  1005. if ((width == 0) || (width > sizeof(buf) - 1)) {
  1006. width = sizeof(buf) - 1;
  1007. }
  1008. flags |= SCAN_SIGNOK | SCAN_NODIGITS | SCAN_PTOK | SCAN_EXPOK;
  1009. for (end = buf; width > 0; width--) {
  1010. switch (*string) {
  1011. case '0': case '1': case '2': case '3':
  1012. case '4': case '5': case '6': case '7':
  1013. case '8': case '9':
  1014. flags &= ~(SCAN_SIGNOK | SCAN_NODIGITS);
  1015. goto addToFloat;
  1016. case '+':
  1017. case '-':
  1018. if (flags & SCAN_SIGNOK) {
  1019. flags &= ~SCAN_SIGNOK;
  1020. goto addToFloat;
  1021. }
  1022. break;
  1023. case '.':
  1024. if (flags & SCAN_PTOK) {
  1025. flags &= ~(SCAN_SIGNOK | SCAN_PTOK);
  1026. goto addToFloat;
  1027. }
  1028. break;
  1029. case 'e':
  1030. case 'E':
  1031. /*
  1032. * An exponent is not allowed until there has
  1033. * been at least one digit.
  1034. */
  1035. if ((flags & (SCAN_NODIGITS | SCAN_EXPOK)) == SCAN_EXPOK) {
  1036. flags = (flags & ~(SCAN_EXPOK|SCAN_PTOK))
  1037. | SCAN_SIGNOK | SCAN_NODIGITS;
  1038. goto addToFloat;
  1039. }
  1040. break;
  1041. }
  1042. /*
  1043. * We got an illegal character so we are done accumulating.
  1044. */
  1045. break;
  1046. addToFloat:
  1047. /*
  1048. * Add the character to the temporary buffer.
  1049. */
  1050. *end++ = *string++;
  1051. if (*string == '\0') {
  1052. break;
  1053. }
  1054. }
  1055. /*
  1056. * Check to see if we need to back up because we saw a
  1057. * trailing 'e' or sign.
  1058. */
  1059. if (flags & SCAN_NODIGITS) {
  1060. if (flags & SCAN_EXPOK) {
  1061. /*
  1062. * There were no digits at all so scanning has
  1063. * failed and we are done.
  1064. */
  1065. if (*string == '\0') {
  1066. underflow = 1;
  1067. }
  1068. goto done;
  1069. }
  1070. /*
  1071. * We got a bad exponent ('e' and maybe a sign).
  1072. */
  1073. end--;
  1074. string--;
  1075. if (*end != 'e' && *end != 'E') {
  1076. end--;
  1077. string--;
  1078. }
  1079. }
  1080. /*
  1081. * Scan the value from the temporary buffer.
  1082. */
  1083. if (!(flags & SCAN_SUPPRESS)) {
  1084. double dvalue;
  1085. *end = '\0';
  1086. dvalue = zend_strtod(buf, NULL);
  1087. if (numVars && objIndex >= argCount) {
  1088. break;
  1089. } else if (numVars) {
  1090. current = args[objIndex++];
  1091. zval_dtor(*current);
  1092. ZVAL_DOUBLE(*current, dvalue);
  1093. } else {
  1094. add_index_double( *return_value, objIndex++, dvalue );
  1095. }
  1096. }
  1097. break;
  1098. } /* switch (op) */
  1099. nconversions++;
  1100. } /* while (*format != '\0') */
  1101. done:
  1102. result = SCAN_SUCCESS;
  1103. if (underflow && (0==nconversions)) {
  1104. scan_set_error_return( numVars, return_value );
  1105. result = SCAN_ERROR_EOF;
  1106. } else if (numVars) {
  1107. convert_to_long( *return_value );
  1108. Z_LVAL_PP(return_value) = nconversions;
  1109. } else if (nconversions < totalVars) {
  1110. /* TODO: not all elements converted. we need to prune the list - cc */
  1111. }
  1112. return result;
  1113. }
  1114. /* }}} */
  1115. /* the compiler choked when i tried to make this a macro */
  1116. static inline void scan_set_error_return(int numVars, zval **return_value) /* {{{ */
  1117. {
  1118. if (numVars) {
  1119. Z_TYPE_PP(return_value) = IS_LONG;
  1120. Z_LVAL_PP(return_value) = SCAN_ERROR_EOF; /* EOF marker */
  1121. } else {
  1122. /* convert_to_null calls destructor */
  1123. convert_to_null( *return_value );
  1124. }
  1125. }
  1126. /* }}} */
  1127. /*
  1128. * Local variables:
  1129. * tab-width: 4
  1130. * c-basic-offset: 4
  1131. * End:
  1132. * vim600: sw=4 ts=4 fdm=marker
  1133. * vim<600: sw=4 ts=4
  1134. */