scanf.c 28 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193
  1. /*
  2. +----------------------------------------------------------------------+
  3. | Copyright (c) The PHP Group |
  4. +----------------------------------------------------------------------+
  5. | This source file is subject to version 3.01 of the PHP license, |
  6. | that is bundled with this package in the file LICENSE, and is |
  7. | available through the world-wide-web at the following url: |
  8. | https://www.php.net/license/3_01.txt |
  9. | If you did not receive a copy of the PHP license and are unable to |
  10. | obtain it through the world-wide-web, please send a note to |
  11. | license@php.net so we can mail you a copy immediately. |
  12. +----------------------------------------------------------------------+
  13. | Author: Clayton Collie <clcollie@mindspring.com> |
  14. +----------------------------------------------------------------------+
  15. */
  16. /*
  17. scanf.c --
  18. This file contains the base code which implements sscanf and by extension
  19. fscanf. Original code is from TCL8.3.0 and bears the following copyright:
  20. This software is copyrighted by the Regents of the University of
  21. California, Sun Microsystems, Inc., Scriptics Corporation,
  22. and other parties. The following terms apply to all files associated
  23. with the software unless explicitly disclaimed in individual files.
  24. The authors hereby grant permission to use, copy, modify, distribute,
  25. and license this software and its documentation for any purpose, provided
  26. that existing copyright notices are retained in all copies and that this
  27. notice is included verbatim in any distributions. No written agreement,
  28. license, or royalty fee is required for any of the authorized uses.
  29. Modifications to this software may be copyrighted by their authors
  30. and need not follow the licensing terms described here, provided that
  31. the new terms are clearly indicated on the first page of each file where
  32. they apply.
  33. IN NO EVENT SHALL THE AUTHORS OR DISTRIBUTORS BE LIABLE TO ANY PARTY
  34. FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
  35. ARISING OUT OF THE USE OF THIS SOFTWARE, ITS DOCUMENTATION, OR ANY
  36. DERIVATIVES THEREOF, EVEN IF THE AUTHORS HAVE BEEN ADVISED OF THE
  37. POSSIBILITY OF SUCH DAMAGE.
  38. THE AUTHORS AND DISTRIBUTORS SPECIFICALLY DISCLAIM ANY WARRANTIES,
  39. INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY,
  40. FITNESS FOR A PARTICULAR PURPOSE, AND NON-INFRINGEMENT. THIS SOFTWARE
  41. IS PROVIDED ON AN "AS IS" BASIS, AND THE AUTHORS AND DISTRIBUTORS HAVE
  42. NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR
  43. MODIFICATIONS.
  44. GOVERNMENT USE: If you are acquiring this software on behalf of the
  45. U.S. government, the Government shall have only "Restricted Rights"
  46. in the software and related documentation as defined in the Federal
  47. Acquisition Regulations (FARs) in Clause 52.227.19 (c) (2). If you
  48. are acquiring the software on behalf of the Department of Defense, the
  49. software shall be classified as "Commercial Computer Software" and the
  50. Government shall have only "Restricted Rights" as defined in Clause
  51. 252.227-7013 (c) (1) of DFARs. Notwithstanding the foregoing, the
  52. authors grant the U.S. Government and others acting in its behalf
  53. permission to use and distribute the software in accordance with the
  54. terms specified in this license.
  55. */
  56. #include <stdio.h>
  57. #include <limits.h>
  58. #include <ctype.h>
  59. #include "php.h"
  60. #include "php_variables.h"
  61. #include <locale.h>
  62. #include "zend_execute.h"
  63. #include "zend_operators.h"
  64. #include "zend_strtod.h"
  65. #include "php_globals.h"
  66. #include "basic_functions.h"
  67. #include "scanf.h"
  68. /*
  69. * Flag values used internally by [f|s]canf.
  70. */
  71. #define SCAN_NOSKIP 0x1 /* Don't skip blanks. */
  72. #define SCAN_SUPPRESS 0x2 /* Suppress assignment. */
  73. #define SCAN_UNSIGNED 0x4 /* Read an unsigned value. */
  74. #define SCAN_WIDTH 0x8 /* A width value was supplied. */
  75. #define SCAN_SIGNOK 0x10 /* A +/- character is allowed. */
  76. #define SCAN_NODIGITS 0x20 /* No digits have been scanned. */
  77. #define SCAN_NOZERO 0x40 /* No zero digits have been scanned. */
  78. #define SCAN_XOK 0x80 /* An 'x' is allowed. */
  79. #define SCAN_PTOK 0x100 /* Decimal point is allowed. */
  80. #define SCAN_EXPOK 0x200 /* An exponent is allowed. */
  81. #define UCHAR(x) (zend_uchar)(x)
  82. /*
  83. * The following structure contains the information associated with
  84. * a character set.
  85. */
  86. typedef struct CharSet {
  87. int exclude; /* 1 if this is an exclusion set. */
  88. int nchars;
  89. char *chars;
  90. int nranges;
  91. struct Range {
  92. char start;
  93. char end;
  94. } *ranges;
  95. } CharSet;
  96. typedef zend_long (*int_string_formater)(const char*, char**, int);
  97. /*
  98. * Declarations for functions used only in this file.
  99. */
  100. static char *BuildCharSet(CharSet *cset, char *format);
  101. static int CharInSet(CharSet *cset, int ch);
  102. static void ReleaseCharSet(CharSet *cset);
  103. static inline void scan_set_error_return(int numVars, zval *return_value);
  104. /* {{{ BuildCharSet
  105. *----------------------------------------------------------------------
  106. *
  107. * BuildCharSet --
  108. *
  109. * This function examines a character set format specification
  110. * and builds a CharSet containing the individual characters and
  111. * character ranges specified.
  112. *
  113. * Results:
  114. * Returns the next format position.
  115. *
  116. * Side effects:
  117. * Initializes the charset.
  118. *
  119. *----------------------------------------------------------------------
  120. */
  121. static char * BuildCharSet(CharSet *cset, char *format)
  122. {
  123. char *ch, start;
  124. int nranges;
  125. char *end;
  126. memset(cset, 0, sizeof(CharSet));
  127. ch = format;
  128. if (*ch == '^') {
  129. cset->exclude = 1;
  130. ch = ++format;
  131. }
  132. end = format + 1; /* verify this - cc */
  133. /*
  134. * Find the close bracket so we can overallocate the set.
  135. */
  136. if (*ch == ']') {
  137. ch = end++;
  138. }
  139. nranges = 0;
  140. while (*ch != ']') {
  141. if (*ch == '-') {
  142. nranges++;
  143. }
  144. ch = end++;
  145. }
  146. cset->chars = (char *) safe_emalloc(sizeof(char), (end - format - 1), 0);
  147. if (nranges > 0) {
  148. cset->ranges = (struct Range *) safe_emalloc(sizeof(struct Range), nranges, 0);
  149. } else {
  150. cset->ranges = NULL;
  151. }
  152. /*
  153. * Now build the character set.
  154. */
  155. cset->nchars = cset->nranges = 0;
  156. ch = format++;
  157. start = *ch;
  158. if (*ch == ']' || *ch == '-') {
  159. cset->chars[cset->nchars++] = *ch;
  160. ch = format++;
  161. }
  162. while (*ch != ']') {
  163. if (*format == '-') {
  164. /*
  165. * This may be the first character of a range, so don't add
  166. * it yet.
  167. */
  168. start = *ch;
  169. } else if (*ch == '-') {
  170. /*
  171. * Check to see if this is the last character in the set, in which
  172. * case it is not a range and we should add the previous character
  173. * as well as the dash.
  174. */
  175. if (*format == ']') {
  176. cset->chars[cset->nchars++] = start;
  177. cset->chars[cset->nchars++] = *ch;
  178. } else {
  179. ch = format++;
  180. /*
  181. * Check to see if the range is in reverse order.
  182. */
  183. if (start < *ch) {
  184. cset->ranges[cset->nranges].start = start;
  185. cset->ranges[cset->nranges].end = *ch;
  186. } else {
  187. cset->ranges[cset->nranges].start = *ch;
  188. cset->ranges[cset->nranges].end = start;
  189. }
  190. cset->nranges++;
  191. }
  192. } else {
  193. cset->chars[cset->nchars++] = *ch;
  194. }
  195. ch = format++;
  196. }
  197. return format;
  198. }
  199. /* }}} */
  200. /* {{{ CharInSet
  201. *----------------------------------------------------------------------
  202. *
  203. * CharInSet --
  204. *
  205. * Check to see if a character matches the given set.
  206. *
  207. * Results:
  208. * Returns non-zero if the character matches the given set.
  209. *
  210. * Side effects:
  211. * None.
  212. *
  213. *----------------------------------------------------------------------
  214. */
  215. static int CharInSet(CharSet *cset, int c)
  216. {
  217. char ch = (char) c;
  218. int i, match = 0;
  219. for (i = 0; i < cset->nchars; i++) {
  220. if (cset->chars[i] == ch) {
  221. match = 1;
  222. break;
  223. }
  224. }
  225. if (!match) {
  226. for (i = 0; i < cset->nranges; i++) {
  227. if ((cset->ranges[i].start <= ch)
  228. && (ch <= cset->ranges[i].end)) {
  229. match = 1;
  230. break;
  231. }
  232. }
  233. }
  234. return (cset->exclude ? !match : match);
  235. }
  236. /* }}} */
  237. /* {{{ ReleaseCharSet
  238. *----------------------------------------------------------------------
  239. *
  240. * ReleaseCharSet --
  241. *
  242. * Free the storage associated with a character set.
  243. *
  244. * Results:
  245. * None.
  246. *
  247. * Side effects:
  248. * None.
  249. *
  250. *----------------------------------------------------------------------
  251. */
  252. static void ReleaseCharSet(CharSet *cset)
  253. {
  254. efree((char *)cset->chars);
  255. if (cset->ranges) {
  256. efree((char *)cset->ranges);
  257. }
  258. }
  259. /* }}} */
  260. /* {{{ ValidateFormat
  261. *----------------------------------------------------------------------
  262. *
  263. * ValidateFormat --
  264. *
  265. * Parse the format string and verify that it is properly formed
  266. * and that there are exactly enough variables on the command line.
  267. *
  268. * Results:
  269. * FAILURE or SUCCESS.
  270. *
  271. * Side effects:
  272. * May set php_error based on abnormal conditions.
  273. *
  274. * Parameters :
  275. * format The format string.
  276. * numVars The number of variables passed to the scan command.
  277. * totalSubs The number of variables that will be required.
  278. *
  279. *----------------------------------------------------------------------
  280. */
  281. PHPAPI int ValidateFormat(char *format, int numVars, int *totalSubs)
  282. {
  283. #define STATIC_LIST_SIZE 16
  284. int gotXpg, gotSequential, value, i, flags;
  285. char *end, *ch = NULL;
  286. int staticAssign[STATIC_LIST_SIZE];
  287. int *nassign = staticAssign;
  288. int objIndex, xpgSize, nspace = STATIC_LIST_SIZE;
  289. /*
  290. * Initialize an array that records the number of times a variable
  291. * is assigned to by the format string. We use this to detect if
  292. * a variable is multiply assigned or left unassigned.
  293. */
  294. if (numVars > nspace) {
  295. nassign = (int*)safe_emalloc(sizeof(int), numVars, 0);
  296. nspace = numVars;
  297. }
  298. for (i = 0; i < nspace; i++) {
  299. nassign[i] = 0;
  300. }
  301. xpgSize = objIndex = gotXpg = gotSequential = 0;
  302. while (*format != '\0') {
  303. ch = format++;
  304. flags = 0;
  305. if (*ch != '%') {
  306. continue;
  307. }
  308. ch = format++;
  309. if (*ch == '%') {
  310. continue;
  311. }
  312. if (*ch == '*') {
  313. flags |= SCAN_SUPPRESS;
  314. ch = format++;
  315. goto xpgCheckDone;
  316. }
  317. if ( isdigit( (int)*ch ) ) {
  318. /*
  319. * Check for an XPG3-style %n$ specification. Note: there
  320. * must not be a mixture of XPG3 specs and non-XPG3 specs
  321. * in the same format string.
  322. */
  323. value = ZEND_STRTOUL(format-1, &end, 10);
  324. if (*end != '$') {
  325. goto notXpg;
  326. }
  327. format = end+1;
  328. ch = format++;
  329. gotXpg = 1;
  330. if (gotSequential) {
  331. goto mixedXPG;
  332. }
  333. objIndex = value - 1;
  334. if ((objIndex < 0) || (numVars && (objIndex >= numVars))) {
  335. goto badIndex;
  336. } else if (numVars == 0) {
  337. /*
  338. * In the case where no vars are specified, the user can
  339. * specify %9999$ legally, so we have to consider special
  340. * rules for growing the assign array. 'value' is
  341. * guaranteed to be > 0.
  342. */
  343. /* set a lower artificial limit on this
  344. * in the interest of security and resource friendliness
  345. * 255 arguments should be more than enough. - cc
  346. */
  347. if (value > SCAN_MAX_ARGS) {
  348. goto badIndex;
  349. }
  350. xpgSize = (xpgSize > value) ? xpgSize : value;
  351. }
  352. goto xpgCheckDone;
  353. }
  354. notXpg:
  355. gotSequential = 1;
  356. if (gotXpg) {
  357. mixedXPG:
  358. zend_value_error("%s", "cannot mix \"%\" and \"%n$\" conversion specifiers");
  359. goto error;
  360. }
  361. xpgCheckDone:
  362. /*
  363. * Parse any width specifier.
  364. */
  365. if (isdigit(UCHAR(*ch))) {
  366. value = ZEND_STRTOUL(format-1, &format, 10);
  367. flags |= SCAN_WIDTH;
  368. ch = format++;
  369. }
  370. /*
  371. * Ignore size specifier.
  372. */
  373. if ((*ch == 'l') || (*ch == 'L') || (*ch == 'h')) {
  374. ch = format++;
  375. }
  376. if (!(flags & SCAN_SUPPRESS) && numVars && (objIndex >= numVars)) {
  377. goto badIndex;
  378. }
  379. /*
  380. * Handle the various field types.
  381. */
  382. switch (*ch) {
  383. case 'n':
  384. case 'd':
  385. case 'D':
  386. case 'i':
  387. case 'o':
  388. case 'x':
  389. case 'X':
  390. case 'u':
  391. case 'f':
  392. case 'e':
  393. case 'E':
  394. case 'g':
  395. case 's':
  396. break;
  397. case 'c':
  398. /* we differ here with the TCL implementation in allowing for */
  399. /* a character width specification, to be more consistent with */
  400. /* ANSI. since Zend auto allocates space for vars, this is no */
  401. /* problem - cc */
  402. /*
  403. if (flags & SCAN_WIDTH) {
  404. php_error_docref(NULL, E_WARNING, "Field width may not be specified in %c conversion");
  405. goto error;
  406. }
  407. */
  408. break;
  409. case '[':
  410. if (*format == '\0') {
  411. goto badSet;
  412. }
  413. ch = format++;
  414. if (*ch == '^') {
  415. if (*format == '\0') {
  416. goto badSet;
  417. }
  418. ch = format++;
  419. }
  420. if (*ch == ']') {
  421. if (*format == '\0') {
  422. goto badSet;
  423. }
  424. ch = format++;
  425. }
  426. while (*ch != ']') {
  427. if (*format == '\0') {
  428. goto badSet;
  429. }
  430. ch = format++;
  431. }
  432. break;
  433. badSet:
  434. zend_value_error("Unmatched [ in format string");
  435. goto error;
  436. default: {
  437. zend_value_error("Bad scan conversion character \"%c\"", *ch);
  438. goto error;
  439. }
  440. }
  441. if (!(flags & SCAN_SUPPRESS)) {
  442. if (objIndex >= nspace) {
  443. /*
  444. * Expand the nassign buffer. If we are using XPG specifiers,
  445. * make sure that we grow to a large enough size. xpgSize is
  446. * guaranteed to be at least one larger than objIndex.
  447. */
  448. value = nspace;
  449. if (xpgSize) {
  450. nspace = xpgSize;
  451. } else {
  452. nspace += STATIC_LIST_SIZE;
  453. }
  454. if (nassign == staticAssign) {
  455. nassign = (void *)safe_emalloc(nspace, sizeof(int), 0);
  456. for (i = 0; i < STATIC_LIST_SIZE; ++i) {
  457. nassign[i] = staticAssign[i];
  458. }
  459. } else {
  460. nassign = (void *)erealloc((void *)nassign, nspace * sizeof(int));
  461. }
  462. for (i = value; i < nspace; i++) {
  463. nassign[i] = 0;
  464. }
  465. }
  466. nassign[objIndex]++;
  467. objIndex++;
  468. }
  469. } /* while (*format != '\0') */
  470. /*
  471. * Verify that all of the variable were assigned exactly once.
  472. */
  473. if (numVars == 0) {
  474. if (xpgSize) {
  475. numVars = xpgSize;
  476. } else {
  477. numVars = objIndex;
  478. }
  479. }
  480. if (totalSubs) {
  481. *totalSubs = numVars;
  482. }
  483. for (i = 0; i < numVars; i++) {
  484. if (nassign[i] > 1) {
  485. zend_value_error("%s", "Variable is assigned by multiple \"%n$\" conversion specifiers");
  486. goto error;
  487. } else if (!xpgSize && (nassign[i] == 0)) {
  488. /*
  489. * If the space is empty, and xpgSize is 0 (means XPG wasn't
  490. * used, and/or numVars != 0), then too many vars were given
  491. */
  492. zend_value_error("Variable is not assigned by any conversion specifiers");
  493. goto error;
  494. }
  495. }
  496. if (nassign != staticAssign) {
  497. efree((char *)nassign);
  498. }
  499. return SCAN_SUCCESS;
  500. badIndex:
  501. if (gotXpg) {
  502. zend_value_error("%s", "\"%n$\" argument index out of range");
  503. } else {
  504. zend_value_error("Different numbers of variable names and field specifiers");
  505. }
  506. error:
  507. if (nassign != staticAssign) {
  508. efree((char *)nassign);
  509. }
  510. return SCAN_ERROR_INVALID_FORMAT;
  511. #undef STATIC_LIST_SIZE
  512. }
  513. /* }}} */
  514. /* {{{ php_sscanf_internal
  515. * This is the internal function which does processing on behalf of
  516. * both sscanf() and fscanf()
  517. *
  518. * parameters :
  519. * string literal string to be processed
  520. * format format string
  521. * argCount total number of elements in the args array
  522. * args arguments passed in from user function (f|s)scanf
  523. * varStart offset (in args) of 1st variable passed in to (f|s)scanf
  524. * return_value set with the results of the scan
  525. */
  526. PHPAPI int php_sscanf_internal( char *string, char *format,
  527. int argCount, zval *args,
  528. int varStart, zval *return_value)
  529. {
  530. int numVars, nconversions, totalVars = -1;
  531. int i, result;
  532. zend_long value;
  533. int objIndex;
  534. char *end, *baseString;
  535. zval *current;
  536. char op = 0;
  537. int base = 0;
  538. int underflow = 0;
  539. size_t width;
  540. int_string_formater fn = NULL;
  541. char *ch, sch;
  542. int flags;
  543. char buf[64]; /* Temporary buffer to hold scanned number
  544. * strings before they are passed to strtoul() */
  545. /* do some sanity checking */
  546. if ((varStart > argCount) || (varStart < 0)){
  547. varStart = SCAN_MAX_ARGS + 1;
  548. }
  549. numVars = argCount - varStart;
  550. if (numVars < 0) {
  551. numVars = 0;
  552. }
  553. /*
  554. * Check for errors in the format string.
  555. */
  556. if (ValidateFormat(format, numVars, &totalVars) != SCAN_SUCCESS) {
  557. scan_set_error_return( numVars, return_value );
  558. return SCAN_ERROR_INVALID_FORMAT;
  559. }
  560. objIndex = numVars ? varStart : 0;
  561. /*
  562. * If any variables are passed, make sure they are all passed by reference
  563. */
  564. if (numVars) {
  565. for (i = varStart;i < argCount;i++){
  566. ZEND_ASSERT(Z_ISREF(args[i]) && "Parameter must be passed by reference");
  567. }
  568. }
  569. /*
  570. * Allocate space for the result objects. Only happens when no variables
  571. * are specified
  572. */
  573. if (!numVars) {
  574. zval tmp;
  575. /* allocate an array for return */
  576. array_init(return_value);
  577. for (i = 0; i < totalVars; i++) {
  578. ZVAL_NULL(&tmp);
  579. if (add_next_index_zval(return_value, &tmp) == FAILURE) {
  580. scan_set_error_return(0, return_value);
  581. return FAILURE;
  582. }
  583. }
  584. varStart = 0; /* Array index starts from 0 */
  585. }
  586. baseString = string;
  587. /*
  588. * Iterate over the format string filling in the result objects until
  589. * we reach the end of input, the end of the format string, or there
  590. * is a mismatch.
  591. */
  592. nconversions = 0;
  593. /* note ! - we need to limit the loop for objIndex to keep it in bounds */
  594. while (*format != '\0') {
  595. ch = format++;
  596. flags = 0;
  597. /*
  598. * If we see whitespace in the format, skip whitespace in the string.
  599. */
  600. if ( isspace( (int)*ch ) ) {
  601. sch = *string;
  602. while ( isspace( (int)sch ) ) {
  603. if (*string == '\0') {
  604. goto done;
  605. }
  606. string++;
  607. sch = *string;
  608. }
  609. continue;
  610. }
  611. if (*ch != '%') {
  612. literal:
  613. if (*string == '\0') {
  614. underflow = 1;
  615. goto done;
  616. }
  617. sch = *string;
  618. string++;
  619. if (*ch != sch) {
  620. goto done;
  621. }
  622. continue;
  623. }
  624. ch = format++;
  625. if (*ch == '%') {
  626. goto literal;
  627. }
  628. /*
  629. * Check for assignment suppression ('*') or an XPG3-style
  630. * assignment ('%n$').
  631. */
  632. if (*ch == '*') {
  633. flags |= SCAN_SUPPRESS;
  634. ch = format++;
  635. } else if ( isdigit(UCHAR(*ch))) {
  636. value = ZEND_STRTOUL(format-1, &end, 10);
  637. if (*end == '$') {
  638. format = end+1;
  639. ch = format++;
  640. objIndex = varStart + value - 1;
  641. }
  642. }
  643. /*
  644. * Parse any width specifier.
  645. */
  646. if ( isdigit(UCHAR(*ch))) {
  647. width = ZEND_STRTOUL(format-1, &format, 10);
  648. ch = format++;
  649. } else {
  650. width = 0;
  651. }
  652. /*
  653. * Ignore size specifier.
  654. */
  655. if ((*ch == 'l') || (*ch == 'L') || (*ch == 'h')) {
  656. ch = format++;
  657. }
  658. /*
  659. * Handle the various field types.
  660. */
  661. switch (*ch) {
  662. case 'n':
  663. if (!(flags & SCAN_SUPPRESS)) {
  664. if (numVars && objIndex >= argCount) {
  665. break;
  666. } else if (numVars) {
  667. current = args + objIndex++;
  668. ZEND_TRY_ASSIGN_REF_LONG(current, (zend_long) (string - baseString));
  669. } else {
  670. add_index_long(return_value, objIndex++, string - baseString);
  671. }
  672. }
  673. nconversions++;
  674. continue;
  675. case 'd':
  676. case 'D':
  677. op = 'i';
  678. base = 10;
  679. fn = (int_string_formater)ZEND_STRTOL_PTR;
  680. break;
  681. case 'i':
  682. op = 'i';
  683. base = 0;
  684. fn = (int_string_formater)ZEND_STRTOL_PTR;
  685. break;
  686. case 'o':
  687. op = 'i';
  688. base = 8;
  689. fn = (int_string_formater)ZEND_STRTOL_PTR;
  690. break;
  691. case 'x':
  692. case 'X':
  693. op = 'i';
  694. base = 16;
  695. fn = (int_string_formater)ZEND_STRTOL_PTR;
  696. break;
  697. case 'u':
  698. op = 'i';
  699. base = 10;
  700. flags |= SCAN_UNSIGNED;
  701. fn = (int_string_formater)ZEND_STRTOUL_PTR;
  702. break;
  703. case 'f':
  704. case 'e':
  705. case 'E':
  706. case 'g':
  707. op = 'f';
  708. break;
  709. case 's':
  710. op = 's';
  711. break;
  712. case 'c':
  713. op = 's';
  714. flags |= SCAN_NOSKIP;
  715. /*-cc-*/
  716. if (0 == width) {
  717. width = 1;
  718. }
  719. /*-cc-*/
  720. break;
  721. case '[':
  722. op = '[';
  723. flags |= SCAN_NOSKIP;
  724. break;
  725. } /* switch */
  726. /*
  727. * At this point, we will need additional characters from the
  728. * string to proceed.
  729. */
  730. if (*string == '\0') {
  731. underflow = 1;
  732. goto done;
  733. }
  734. /*
  735. * Skip any leading whitespace at the beginning of a field unless
  736. * the format suppresses this behavior.
  737. */
  738. if (!(flags & SCAN_NOSKIP)) {
  739. while (*string != '\0') {
  740. sch = *string;
  741. if (! isspace((int)sch) ) {
  742. break;
  743. }
  744. string++;
  745. }
  746. if (*string == '\0') {
  747. underflow = 1;
  748. goto done;
  749. }
  750. }
  751. /*
  752. * Perform the requested scanning operation.
  753. */
  754. switch (op) {
  755. case 'c':
  756. case 's':
  757. /*
  758. * Scan a string up to width characters or whitespace.
  759. */
  760. if (width == 0) {
  761. width = (size_t) ~0;
  762. }
  763. end = string;
  764. while (*end != '\0') {
  765. sch = *end;
  766. if ( isspace( (int)sch ) ) {
  767. break;
  768. }
  769. end++;
  770. if (--width == 0) {
  771. break;
  772. }
  773. }
  774. if (!(flags & SCAN_SUPPRESS)) {
  775. if (numVars && objIndex >= argCount) {
  776. break;
  777. } else if (numVars) {
  778. current = args + objIndex++;
  779. ZEND_TRY_ASSIGN_REF_STRINGL(current, string, end - string);
  780. } else {
  781. add_index_stringl(return_value, objIndex++, string, end-string);
  782. }
  783. }
  784. string = end;
  785. break;
  786. case '[': {
  787. CharSet cset;
  788. if (width == 0) {
  789. width = (size_t) ~0;
  790. }
  791. end = string;
  792. format = BuildCharSet(&cset, format);
  793. while (*end != '\0') {
  794. sch = *end;
  795. if (!CharInSet(&cset, (int)sch)) {
  796. break;
  797. }
  798. end++;
  799. if (--width == 0) {
  800. break;
  801. }
  802. }
  803. ReleaseCharSet(&cset);
  804. if (string == end) {
  805. /*
  806. * Nothing matched the range, stop processing
  807. */
  808. goto done;
  809. }
  810. if (!(flags & SCAN_SUPPRESS)) {
  811. if (numVars && objIndex >= argCount) {
  812. break;
  813. } else if (numVars) {
  814. current = args + objIndex++;
  815. ZEND_TRY_ASSIGN_REF_STRINGL(current, string, end - string);
  816. } else {
  817. add_index_stringl(return_value, objIndex++, string, end-string);
  818. }
  819. }
  820. string = end;
  821. break;
  822. }
  823. /*
  824. case 'c':
  825. / Scan a single character./
  826. sch = *string;
  827. string++;
  828. if (!(flags & SCAN_SUPPRESS)) {
  829. if (numVars) {
  830. char __buf[2];
  831. __buf[0] = sch;
  832. __buf[1] = '\0';
  833. current = args[objIndex++];
  834. zval_ptr_dtor_nogc(*current);
  835. ZVAL_STRINGL( *current, __buf, 1);
  836. } else {
  837. add_index_stringl(return_value, objIndex++, &sch, 1);
  838. }
  839. }
  840. break;
  841. */
  842. case 'i':
  843. /*
  844. * Scan an unsigned or signed integer.
  845. */
  846. /*-cc-*/
  847. buf[0] = '\0';
  848. /*-cc-*/
  849. if ((width == 0) || (width > sizeof(buf) - 1)) {
  850. width = sizeof(buf) - 1;
  851. }
  852. flags |= SCAN_SIGNOK | SCAN_NODIGITS | SCAN_NOZERO;
  853. for (end = buf; width > 0; width--) {
  854. switch (*string) {
  855. /*
  856. * The 0 digit has special meaning at the beginning of
  857. * a number. If we are unsure of the base, it
  858. * indicates that we are in base 8 or base 16 (if it is
  859. * followed by an 'x').
  860. */
  861. case '0':
  862. /*-cc-*/
  863. if (base == 16) {
  864. flags |= SCAN_XOK;
  865. }
  866. /*-cc-*/
  867. if (base == 0) {
  868. base = 8;
  869. flags |= SCAN_XOK;
  870. }
  871. if (flags & SCAN_NOZERO) {
  872. flags &= ~(SCAN_SIGNOK | SCAN_NODIGITS | SCAN_NOZERO);
  873. } else {
  874. flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
  875. }
  876. goto addToInt;
  877. case '1': case '2': case '3': case '4':
  878. case '5': case '6': case '7':
  879. if (base == 0) {
  880. base = 10;
  881. }
  882. flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
  883. goto addToInt;
  884. case '8': case '9':
  885. if (base == 0) {
  886. base = 10;
  887. }
  888. if (base <= 8) {
  889. break;
  890. }
  891. flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
  892. goto addToInt;
  893. case 'A': case 'B': case 'C':
  894. case 'D': case 'E': case 'F':
  895. case 'a': case 'b': case 'c':
  896. case 'd': case 'e': case 'f':
  897. if (base <= 10) {
  898. break;
  899. }
  900. flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
  901. goto addToInt;
  902. case '+': case '-':
  903. if (flags & SCAN_SIGNOK) {
  904. flags &= ~SCAN_SIGNOK;
  905. goto addToInt;
  906. }
  907. break;
  908. case 'x': case 'X':
  909. if ((flags & SCAN_XOK) && (end == buf+1)) {
  910. base = 16;
  911. flags &= ~SCAN_XOK;
  912. goto addToInt;
  913. }
  914. break;
  915. }
  916. /*
  917. * We got an illegal character so we are done accumulating.
  918. */
  919. break;
  920. addToInt:
  921. /*
  922. * Add the character to the temporary buffer.
  923. */
  924. *end++ = *string++;
  925. if (*string == '\0') {
  926. break;
  927. }
  928. }
  929. /*
  930. * Check to see if we need to back up because we only got a
  931. * sign or a trailing x after a 0.
  932. */
  933. if (flags & SCAN_NODIGITS) {
  934. if (*string == '\0') {
  935. underflow = 1;
  936. }
  937. goto done;
  938. } else if (end[-1] == 'x' || end[-1] == 'X') {
  939. end--;
  940. string--;
  941. }
  942. /*
  943. * Scan the value from the temporary buffer. If we are
  944. * returning a large unsigned value, we have to convert it back
  945. * to a string since PHP only supports signed values.
  946. */
  947. if (!(flags & SCAN_SUPPRESS)) {
  948. *end = '\0';
  949. value = (zend_long) (*fn)(buf, NULL, base);
  950. if ((flags & SCAN_UNSIGNED) && (value < 0)) {
  951. snprintf(buf, sizeof(buf), ZEND_ULONG_FMT, value); /* INTL: ISO digit */
  952. if (numVars && objIndex >= argCount) {
  953. break;
  954. } else if (numVars) {
  955. /* change passed value type to string */
  956. current = args + objIndex++;
  957. ZEND_TRY_ASSIGN_REF_STRING(current, buf);
  958. } else {
  959. add_index_string(return_value, objIndex++, buf);
  960. }
  961. } else {
  962. if (numVars && objIndex >= argCount) {
  963. break;
  964. } else if (numVars) {
  965. current = args + objIndex++;
  966. ZEND_TRY_ASSIGN_REF_LONG(current, value);
  967. } else {
  968. add_index_long(return_value, objIndex++, value);
  969. }
  970. }
  971. }
  972. break;
  973. case 'f':
  974. /*
  975. * Scan a floating point number
  976. */
  977. buf[0] = '\0'; /* call me pedantic */
  978. if ((width == 0) || (width > sizeof(buf) - 1)) {
  979. width = sizeof(buf) - 1;
  980. }
  981. flags |= SCAN_SIGNOK | SCAN_NODIGITS | SCAN_PTOK | SCAN_EXPOK;
  982. for (end = buf; width > 0; width--) {
  983. switch (*string) {
  984. case '0': case '1': case '2': case '3':
  985. case '4': case '5': case '6': case '7':
  986. case '8': case '9':
  987. flags &= ~(SCAN_SIGNOK | SCAN_NODIGITS);
  988. goto addToFloat;
  989. case '+':
  990. case '-':
  991. if (flags & SCAN_SIGNOK) {
  992. flags &= ~SCAN_SIGNOK;
  993. goto addToFloat;
  994. }
  995. break;
  996. case '.':
  997. if (flags & SCAN_PTOK) {
  998. flags &= ~(SCAN_SIGNOK | SCAN_PTOK);
  999. goto addToFloat;
  1000. }
  1001. break;
  1002. case 'e':
  1003. case 'E':
  1004. /*
  1005. * An exponent is not allowed until there has
  1006. * been at least one digit.
  1007. */
  1008. if ((flags & (SCAN_NODIGITS | SCAN_EXPOK)) == SCAN_EXPOK) {
  1009. flags = (flags & ~(SCAN_EXPOK|SCAN_PTOK))
  1010. | SCAN_SIGNOK | SCAN_NODIGITS;
  1011. goto addToFloat;
  1012. }
  1013. break;
  1014. }
  1015. /*
  1016. * We got an illegal character so we are done accumulating.
  1017. */
  1018. break;
  1019. addToFloat:
  1020. /*
  1021. * Add the character to the temporary buffer.
  1022. */
  1023. *end++ = *string++;
  1024. if (*string == '\0') {
  1025. break;
  1026. }
  1027. }
  1028. /*
  1029. * Check to see if we need to back up because we saw a
  1030. * trailing 'e' or sign.
  1031. */
  1032. if (flags & SCAN_NODIGITS) {
  1033. if (flags & SCAN_EXPOK) {
  1034. /*
  1035. * There were no digits at all so scanning has
  1036. * failed and we are done.
  1037. */
  1038. if (*string == '\0') {
  1039. underflow = 1;
  1040. }
  1041. goto done;
  1042. }
  1043. /*
  1044. * We got a bad exponent ('e' and maybe a sign).
  1045. */
  1046. end--;
  1047. string--;
  1048. if (*end != 'e' && *end != 'E') {
  1049. end--;
  1050. string--;
  1051. }
  1052. }
  1053. /*
  1054. * Scan the value from the temporary buffer.
  1055. */
  1056. if (!(flags & SCAN_SUPPRESS)) {
  1057. double dvalue;
  1058. *end = '\0';
  1059. dvalue = zend_strtod(buf, NULL);
  1060. if (numVars && objIndex >= argCount) {
  1061. break;
  1062. } else if (numVars) {
  1063. current = args + objIndex++;
  1064. ZEND_TRY_ASSIGN_REF_DOUBLE(current, dvalue);
  1065. } else {
  1066. add_index_double(return_value, objIndex++, dvalue );
  1067. }
  1068. }
  1069. break;
  1070. } /* switch (op) */
  1071. nconversions++;
  1072. } /* while (*format != '\0') */
  1073. done:
  1074. result = SCAN_SUCCESS;
  1075. if (underflow && (0==nconversions)) {
  1076. scan_set_error_return( numVars, return_value );
  1077. result = SCAN_ERROR_EOF;
  1078. } else if (numVars) {
  1079. zval_ptr_dtor(return_value );
  1080. ZVAL_LONG(return_value, nconversions);
  1081. } else if (nconversions < totalVars) {
  1082. /* TODO: not all elements converted. we need to prune the list - cc */
  1083. }
  1084. return result;
  1085. }
  1086. /* }}} */
  1087. /* the compiler choked when i tried to make this a macro */
  1088. static inline void scan_set_error_return(int numVars, zval *return_value) /* {{{ */
  1089. {
  1090. if (numVars) {
  1091. ZVAL_LONG(return_value, SCAN_ERROR_EOF); /* EOF marker */
  1092. } else {
  1093. /* convert_to_null calls destructor */
  1094. convert_to_null(return_value);
  1095. }
  1096. }
  1097. /* }}} */