pack.c 32 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317
  1. /*
  2. +----------------------------------------------------------------------+
  3. | Copyright (c) The PHP Group |
  4. +----------------------------------------------------------------------+
  5. | This source file is subject to version 3.01 of the PHP license, |
  6. | that is bundled with this package in the file LICENSE, and is |
  7. | available through the world-wide-web at the following url: |
  8. | https://www.php.net/license/3_01.txt |
  9. | If you did not receive a copy of the PHP license and are unable to |
  10. | obtain it through the world-wide-web, please send a note to |
  11. | license@php.net so we can mail you a copy immediately. |
  12. +----------------------------------------------------------------------+
  13. | Author: Chris Schneider <cschneid@relog.ch> |
  14. +----------------------------------------------------------------------+
  15. */
  16. #include "php.h"
  17. #include <stdio.h>
  18. #include <stdlib.h>
  19. #include <errno.h>
  20. #include <sys/types.h>
  21. #include <sys/stat.h>
  22. #include <fcntl.h>
  23. #ifdef PHP_WIN32
  24. #define O_RDONLY _O_RDONLY
  25. #include "win32/param.h"
  26. #else
  27. #include <sys/param.h>
  28. #endif
  29. #include "ext/standard/head.h"
  30. #include "php_string.h"
  31. #include "pack.h"
  32. #if HAVE_PWD_H
  33. #ifdef PHP_WIN32
  34. #include "win32/pwd.h"
  35. #else
  36. #include <pwd.h>
  37. #endif
  38. #endif
  39. #include "fsock.h"
  40. #if HAVE_NETINET_IN_H
  41. #include <netinet/in.h>
  42. #endif
  43. #define INC_OUTPUTPOS(a,b) \
  44. if ((a) < 0 || ((INT_MAX - outputpos)/((int)b)) < (a)) { \
  45. efree(formatcodes); \
  46. efree(formatargs); \
  47. zend_value_error("Type %c: integer overflow in format string", code); \
  48. RETURN_THROWS(); \
  49. } \
  50. outputpos += (a)*(b);
  51. #ifdef WORDS_BIGENDIAN
  52. #define MACHINE_LITTLE_ENDIAN 0
  53. #else
  54. #define MACHINE_LITTLE_ENDIAN 1
  55. #endif
  56. typedef ZEND_SET_ALIGNED(1, uint16_t unaligned_uint16_t);
  57. typedef ZEND_SET_ALIGNED(1, uint32_t unaligned_uint32_t);
  58. typedef ZEND_SET_ALIGNED(1, uint64_t unaligned_uint64_t);
  59. typedef ZEND_SET_ALIGNED(1, unsigned int unaligned_uint);
  60. typedef ZEND_SET_ALIGNED(1, int unaligned_int);
  61. /* Mapping of byte from char (8bit) to long for machine endian */
  62. static int byte_map[1];
  63. /* Mappings of bytes from int (machine dependent) to int for machine endian */
  64. static int int_map[sizeof(int)];
  65. /* Mappings of bytes from shorts (16bit) for all endian environments */
  66. static int machine_endian_short_map[2];
  67. static int big_endian_short_map[2];
  68. static int little_endian_short_map[2];
  69. /* Mappings of bytes from longs (32bit) for all endian environments */
  70. static int machine_endian_long_map[4];
  71. static int big_endian_long_map[4];
  72. static int little_endian_long_map[4];
  73. #if SIZEOF_ZEND_LONG > 4
  74. /* Mappings of bytes from quads (64bit) for all endian environments */
  75. static int machine_endian_longlong_map[8];
  76. static int big_endian_longlong_map[8];
  77. static int little_endian_longlong_map[8];
  78. #endif
  79. /* {{{ php_pack */
  80. static void php_pack(zval *val, size_t size, int *map, char *output)
  81. {
  82. size_t i;
  83. char *v;
  84. convert_to_long(val);
  85. v = (char *) &Z_LVAL_P(val);
  86. for (i = 0; i < size; i++) {
  87. *output++ = v[map[i]];
  88. }
  89. }
  90. /* }}} */
  91. static inline uint16_t php_pack_reverse_int16(uint16_t arg)
  92. {
  93. return ((arg & 0xFF) << 8) | ((arg >> 8) & 0xFF);
  94. }
  95. /* {{{ php_pack_reverse_int32 */
  96. static inline uint32_t php_pack_reverse_int32(uint32_t arg)
  97. {
  98. uint32_t result;
  99. result = ((arg & 0xFF) << 24) | ((arg & 0xFF00) << 8) | ((arg >> 8) & 0xFF00) | ((arg >> 24) & 0xFF);
  100. return result;
  101. }
  102. /* }}} */
  103. /* {{{ php_pack */
  104. static inline uint64_t php_pack_reverse_int64(uint64_t arg)
  105. {
  106. union Swap64 {
  107. uint64_t i;
  108. uint32_t ul[2];
  109. } tmp, result;
  110. tmp.i = arg;
  111. result.ul[0] = php_pack_reverse_int32(tmp.ul[1]);
  112. result.ul[1] = php_pack_reverse_int32(tmp.ul[0]);
  113. return result.i;
  114. }
  115. /* }}} */
  116. /* {{{ php_pack_copy_float */
  117. static void php_pack_copy_float(int is_little_endian, void * dst, float f)
  118. {
  119. union Copy32 {
  120. float f;
  121. uint32_t i;
  122. } m;
  123. m.f = f;
  124. #ifdef WORDS_BIGENDIAN
  125. if (is_little_endian) {
  126. m.i = php_pack_reverse_int32(m.i);
  127. }
  128. #else /* WORDS_BIGENDIAN */
  129. if (!is_little_endian) {
  130. m.i = php_pack_reverse_int32(m.i);
  131. }
  132. #endif /* WORDS_BIGENDIAN */
  133. memcpy(dst, &m.f, sizeof(float));
  134. }
  135. /* }}} */
  136. /* {{{ php_pack_copy_double */
  137. static void php_pack_copy_double(int is_little_endian, void * dst, double d)
  138. {
  139. union Copy64 {
  140. double d;
  141. uint64_t i;
  142. } m;
  143. m.d = d;
  144. #ifdef WORDS_BIGENDIAN
  145. if (is_little_endian) {
  146. m.i = php_pack_reverse_int64(m.i);
  147. }
  148. #else /* WORDS_BIGENDIAN */
  149. if (!is_little_endian) {
  150. m.i = php_pack_reverse_int64(m.i);
  151. }
  152. #endif /* WORDS_BIGENDIAN */
  153. memcpy(dst, &m.d, sizeof(double));
  154. }
  155. /* }}} */
  156. /* {{{ php_pack_parse_float */
  157. static float php_pack_parse_float(int is_little_endian, void * src)
  158. {
  159. union Copy32 {
  160. float f;
  161. uint32_t i;
  162. } m;
  163. memcpy(&m.i, src, sizeof(float));
  164. #ifdef WORDS_BIGENDIAN
  165. if (is_little_endian) {
  166. m.i = php_pack_reverse_int32(m.i);
  167. }
  168. #else /* WORDS_BIGENDIAN */
  169. if (!is_little_endian) {
  170. m.i = php_pack_reverse_int32(m.i);
  171. }
  172. #endif /* WORDS_BIGENDIAN */
  173. return m.f;
  174. }
  175. /* }}} */
  176. /* {{{ php_pack_parse_double */
  177. static double php_pack_parse_double(int is_little_endian, void * src)
  178. {
  179. union Copy64 {
  180. double d;
  181. uint64_t i;
  182. } m;
  183. memcpy(&m.i, src, sizeof(double));
  184. #ifdef WORDS_BIGENDIAN
  185. if (is_little_endian) {
  186. m.i = php_pack_reverse_int64(m.i);
  187. }
  188. #else /* WORDS_BIGENDIAN */
  189. if (!is_little_endian) {
  190. m.i = php_pack_reverse_int64(m.i);
  191. }
  192. #endif /* WORDS_BIGENDIAN */
  193. return m.d;
  194. }
  195. /* }}} */
  196. /* pack() idea stolen from Perl (implemented formats behave the same as there except J and P)
  197. * Implemented formats are Z, A, a, h, H, c, C, s, S, i, I, l, L, n, N, q, Q, J, P, f, d, x, X, @.
  198. * Added g, G for little endian float and big endian float, added e, E for little endian double and big endian double.
  199. */
  200. /* {{{ Takes one or more arguments and packs them into a binary string according to the format argument */
  201. PHP_FUNCTION(pack)
  202. {
  203. zval *argv = NULL;
  204. int num_args = 0;
  205. size_t i;
  206. int currentarg;
  207. char *format;
  208. size_t formatlen;
  209. char *formatcodes;
  210. int *formatargs;
  211. size_t formatcount = 0;
  212. int outputpos = 0, outputsize = 0;
  213. zend_string *output;
  214. ZEND_PARSE_PARAMETERS_START(1, -1)
  215. Z_PARAM_STRING(format, formatlen)
  216. Z_PARAM_VARIADIC('*', argv, num_args)
  217. ZEND_PARSE_PARAMETERS_END();
  218. /* We have a maximum of <formatlen> format codes to deal with */
  219. formatcodes = safe_emalloc(formatlen, sizeof(*formatcodes), 0);
  220. formatargs = safe_emalloc(formatlen, sizeof(*formatargs), 0);
  221. currentarg = 0;
  222. /* Preprocess format into formatcodes and formatargs */
  223. for (i = 0; i < formatlen; formatcount++) {
  224. char code = format[i++];
  225. int arg = 1;
  226. /* Handle format arguments if any */
  227. if (i < formatlen) {
  228. char c = format[i];
  229. if (c == '*') {
  230. arg = -1;
  231. i++;
  232. }
  233. else if (c >= '0' && c <= '9') {
  234. arg = atoi(&format[i]);
  235. while (format[i] >= '0' && format[i] <= '9' && i < formatlen) {
  236. i++;
  237. }
  238. }
  239. }
  240. /* Handle special arg '*' for all codes and check argv overflows */
  241. switch ((int) code) {
  242. /* Never uses any args */
  243. case 'x':
  244. case 'X':
  245. case '@':
  246. if (arg < 0) {
  247. php_error_docref(NULL, E_WARNING, "Type %c: '*' ignored", code);
  248. arg = 1;
  249. }
  250. break;
  251. /* Always uses one arg */
  252. case 'a':
  253. case 'A':
  254. case 'Z':
  255. case 'h':
  256. case 'H':
  257. if (currentarg >= num_args) {
  258. efree(formatcodes);
  259. efree(formatargs);
  260. zend_value_error("Type %c: not enough arguments", code);
  261. RETURN_THROWS();
  262. }
  263. if (arg < 0) {
  264. if (!try_convert_to_string(&argv[currentarg])) {
  265. efree(formatcodes);
  266. efree(formatargs);
  267. RETURN_THROWS();
  268. }
  269. arg = Z_STRLEN(argv[currentarg]);
  270. if (code == 'Z') {
  271. /* add one because Z is always NUL-terminated:
  272. * pack("Z*", "aa") === "aa\0"
  273. * pack("Z2", "aa") === "a\0" */
  274. arg++;
  275. }
  276. }
  277. currentarg++;
  278. break;
  279. /* Use as many args as specified */
  280. case 'q':
  281. case 'Q':
  282. case 'J':
  283. case 'P':
  284. #if SIZEOF_ZEND_LONG < 8
  285. efree(formatcodes);
  286. efree(formatargs);
  287. zend_value_error("64-bit format codes are not available for 32-bit versions of PHP");
  288. RETURN_THROWS();
  289. #endif
  290. case 'c':
  291. case 'C':
  292. case 's':
  293. case 'S':
  294. case 'i':
  295. case 'I':
  296. case 'l':
  297. case 'L':
  298. case 'n':
  299. case 'N':
  300. case 'v':
  301. case 'V':
  302. case 'f': /* float */
  303. case 'g': /* little endian float */
  304. case 'G': /* big endian float */
  305. case 'd': /* double */
  306. case 'e': /* little endian double */
  307. case 'E': /* big endian double */
  308. if (arg < 0) {
  309. arg = num_args - currentarg;
  310. }
  311. if (currentarg > INT_MAX - arg) {
  312. goto too_few_args;
  313. }
  314. currentarg += arg;
  315. if (currentarg > num_args) {
  316. too_few_args:
  317. efree(formatcodes);
  318. efree(formatargs);
  319. zend_value_error("Type %c: too few arguments", code);
  320. RETURN_THROWS();
  321. }
  322. break;
  323. default:
  324. efree(formatcodes);
  325. efree(formatargs);
  326. zend_value_error("Type %c: unknown format code", code);
  327. RETURN_THROWS();
  328. }
  329. formatcodes[formatcount] = code;
  330. formatargs[formatcount] = arg;
  331. }
  332. if (currentarg < num_args) {
  333. php_error_docref(NULL, E_WARNING, "%d arguments unused", (num_args - currentarg));
  334. }
  335. /* Calculate output length and upper bound while processing*/
  336. for (i = 0; i < formatcount; i++) {
  337. int code = (int) formatcodes[i];
  338. int arg = formatargs[i];
  339. switch ((int) code) {
  340. case 'h':
  341. case 'H':
  342. INC_OUTPUTPOS((arg + (arg % 2)) / 2,1) /* 4 bit per arg */
  343. break;
  344. case 'a':
  345. case 'A':
  346. case 'Z':
  347. case 'c':
  348. case 'C':
  349. case 'x':
  350. INC_OUTPUTPOS(arg,1) /* 8 bit per arg */
  351. break;
  352. case 's':
  353. case 'S':
  354. case 'n':
  355. case 'v':
  356. INC_OUTPUTPOS(arg,2) /* 16 bit per arg */
  357. break;
  358. case 'i':
  359. case 'I':
  360. INC_OUTPUTPOS(arg,sizeof(int))
  361. break;
  362. case 'l':
  363. case 'L':
  364. case 'N':
  365. case 'V':
  366. INC_OUTPUTPOS(arg,4) /* 32 bit per arg */
  367. break;
  368. #if SIZEOF_ZEND_LONG > 4
  369. case 'q':
  370. case 'Q':
  371. case 'J':
  372. case 'P':
  373. INC_OUTPUTPOS(arg,8) /* 32 bit per arg */
  374. break;
  375. #endif
  376. case 'f': /* float */
  377. case 'g': /* little endian float */
  378. case 'G': /* big endian float */
  379. INC_OUTPUTPOS(arg,sizeof(float))
  380. break;
  381. case 'd': /* double */
  382. case 'e': /* little endian double */
  383. case 'E': /* big endian double */
  384. INC_OUTPUTPOS(arg,sizeof(double))
  385. break;
  386. case 'X':
  387. outputpos -= arg;
  388. if (outputpos < 0) {
  389. php_error_docref(NULL, E_WARNING, "Type %c: outside of string", code);
  390. outputpos = 0;
  391. }
  392. break;
  393. case '@':
  394. outputpos = arg;
  395. break;
  396. }
  397. if (outputsize < outputpos) {
  398. outputsize = outputpos;
  399. }
  400. }
  401. output = zend_string_alloc(outputsize, 0);
  402. outputpos = 0;
  403. currentarg = 0;
  404. /* Do actual packing */
  405. for (i = 0; i < formatcount; i++) {
  406. int code = (int) formatcodes[i];
  407. int arg = formatargs[i];
  408. switch ((int) code) {
  409. case 'a':
  410. case 'A':
  411. case 'Z': {
  412. size_t arg_cp = (code != 'Z') ? arg : MAX(0, arg - 1);
  413. zend_string *tmp_str;
  414. zend_string *str = zval_get_tmp_string(&argv[currentarg++], &tmp_str);
  415. memset(&ZSTR_VAL(output)[outputpos], (code == 'a' || code == 'Z') ? '\0' : ' ', arg);
  416. memcpy(&ZSTR_VAL(output)[outputpos], ZSTR_VAL(str),
  417. (ZSTR_LEN(str) < arg_cp) ? ZSTR_LEN(str) : arg_cp);
  418. outputpos += arg;
  419. zend_tmp_string_release(tmp_str);
  420. break;
  421. }
  422. case 'h':
  423. case 'H': {
  424. int nibbleshift = (code == 'h') ? 0 : 4;
  425. int first = 1;
  426. zend_string *tmp_str;
  427. zend_string *str = zval_get_tmp_string(&argv[currentarg++], &tmp_str);
  428. char *v = ZSTR_VAL(str);
  429. outputpos--;
  430. if ((size_t)arg > ZSTR_LEN(str)) {
  431. php_error_docref(NULL, E_WARNING, "Type %c: not enough characters in string", code);
  432. arg = ZSTR_LEN(str);
  433. }
  434. while (arg-- > 0) {
  435. char n = *v++;
  436. if (n >= '0' && n <= '9') {
  437. n -= '0';
  438. } else if (n >= 'A' && n <= 'F') {
  439. n -= ('A' - 10);
  440. } else if (n >= 'a' && n <= 'f') {
  441. n -= ('a' - 10);
  442. } else {
  443. php_error_docref(NULL, E_WARNING, "Type %c: illegal hex digit %c", code, n);
  444. n = 0;
  445. }
  446. if (first--) {
  447. ZSTR_VAL(output)[++outputpos] = 0;
  448. } else {
  449. first = 1;
  450. }
  451. ZSTR_VAL(output)[outputpos] |= (n << nibbleshift);
  452. nibbleshift = (nibbleshift + 4) & 7;
  453. }
  454. outputpos++;
  455. zend_tmp_string_release(tmp_str);
  456. break;
  457. }
  458. case 'c':
  459. case 'C':
  460. while (arg-- > 0) {
  461. php_pack(&argv[currentarg++], 1, byte_map, &ZSTR_VAL(output)[outputpos]);
  462. outputpos++;
  463. }
  464. break;
  465. case 's':
  466. case 'S':
  467. case 'n':
  468. case 'v': {
  469. int *map = machine_endian_short_map;
  470. if (code == 'n') {
  471. map = big_endian_short_map;
  472. } else if (code == 'v') {
  473. map = little_endian_short_map;
  474. }
  475. while (arg-- > 0) {
  476. php_pack(&argv[currentarg++], 2, map, &ZSTR_VAL(output)[outputpos]);
  477. outputpos += 2;
  478. }
  479. break;
  480. }
  481. case 'i':
  482. case 'I':
  483. while (arg-- > 0) {
  484. php_pack(&argv[currentarg++], sizeof(int), int_map, &ZSTR_VAL(output)[outputpos]);
  485. outputpos += sizeof(int);
  486. }
  487. break;
  488. case 'l':
  489. case 'L':
  490. case 'N':
  491. case 'V': {
  492. int *map = machine_endian_long_map;
  493. if (code == 'N') {
  494. map = big_endian_long_map;
  495. } else if (code == 'V') {
  496. map = little_endian_long_map;
  497. }
  498. while (arg-- > 0) {
  499. php_pack(&argv[currentarg++], 4, map, &ZSTR_VAL(output)[outputpos]);
  500. outputpos += 4;
  501. }
  502. break;
  503. }
  504. #if SIZEOF_ZEND_LONG > 4
  505. case 'q':
  506. case 'Q':
  507. case 'J':
  508. case 'P': {
  509. int *map = machine_endian_longlong_map;
  510. if (code == 'J') {
  511. map = big_endian_longlong_map;
  512. } else if (code == 'P') {
  513. map = little_endian_longlong_map;
  514. }
  515. while (arg-- > 0) {
  516. php_pack(&argv[currentarg++], 8, map, &ZSTR_VAL(output)[outputpos]);
  517. outputpos += 8;
  518. }
  519. break;
  520. }
  521. #endif
  522. case 'f': {
  523. while (arg-- > 0) {
  524. float v = (float) zval_get_double(&argv[currentarg++]);
  525. memcpy(&ZSTR_VAL(output)[outputpos], &v, sizeof(v));
  526. outputpos += sizeof(v);
  527. }
  528. break;
  529. }
  530. case 'g': {
  531. /* pack little endian float */
  532. while (arg-- > 0) {
  533. float v = (float) zval_get_double(&argv[currentarg++]);
  534. php_pack_copy_float(1, &ZSTR_VAL(output)[outputpos], v);
  535. outputpos += sizeof(v);
  536. }
  537. break;
  538. }
  539. case 'G': {
  540. /* pack big endian float */
  541. while (arg-- > 0) {
  542. float v = (float) zval_get_double(&argv[currentarg++]);
  543. php_pack_copy_float(0, &ZSTR_VAL(output)[outputpos], v);
  544. outputpos += sizeof(v);
  545. }
  546. break;
  547. }
  548. case 'd': {
  549. while (arg-- > 0) {
  550. double v = (double) zval_get_double(&argv[currentarg++]);
  551. memcpy(&ZSTR_VAL(output)[outputpos], &v, sizeof(v));
  552. outputpos += sizeof(v);
  553. }
  554. break;
  555. }
  556. case 'e': {
  557. /* pack little endian double */
  558. while (arg-- > 0) {
  559. double v = (double) zval_get_double(&argv[currentarg++]);
  560. php_pack_copy_double(1, &ZSTR_VAL(output)[outputpos], v);
  561. outputpos += sizeof(v);
  562. }
  563. break;
  564. }
  565. case 'E': {
  566. /* pack big endian double */
  567. while (arg-- > 0) {
  568. double v = (double) zval_get_double(&argv[currentarg++]);
  569. php_pack_copy_double(0, &ZSTR_VAL(output)[outputpos], v);
  570. outputpos += sizeof(v);
  571. }
  572. break;
  573. }
  574. case 'x':
  575. memset(&ZSTR_VAL(output)[outputpos], '\0', arg);
  576. outputpos += arg;
  577. break;
  578. case 'X':
  579. outputpos -= arg;
  580. if (outputpos < 0) {
  581. outputpos = 0;
  582. }
  583. break;
  584. case '@':
  585. if (arg > outputpos) {
  586. memset(&ZSTR_VAL(output)[outputpos], '\0', arg - outputpos);
  587. }
  588. outputpos = arg;
  589. break;
  590. }
  591. }
  592. efree(formatcodes);
  593. efree(formatargs);
  594. ZSTR_VAL(output)[outputpos] = '\0';
  595. ZSTR_LEN(output) = outputpos;
  596. RETURN_NEW_STR(output);
  597. }
  598. /* }}} */
  599. /* unpack() is based on Perl's unpack(), but is modified a bit from there.
  600. * Rather than depending on error-prone ordered lists or syntactically
  601. * unpleasant pass-by-reference, we return an object with named parameters
  602. * (like *_fetch_object()). Syntax is "f[repeat]name/...", where "f" is the
  603. * formatter char (like pack()), "[repeat]" is the optional repeater argument,
  604. * and "name" is the name of the variable to use.
  605. * Example: "c2chars/nints" will return an object with fields
  606. * chars1, chars2, and ints.
  607. * Numeric pack types will return numbers, a and A will return strings,
  608. * f and d will return doubles.
  609. * Implemented formats are Z, A, a, h, H, c, C, s, S, i, I, l, L, n, N, q, Q, J, P, f, d, x, X, @.
  610. * Added g, G for little endian float and big endian float, added e, E for little endian double and big endian double.
  611. */
  612. /* {{{ Unpack binary string into named array elements according to format argument */
  613. PHP_FUNCTION(unpack)
  614. {
  615. char *format, *input;
  616. zend_string *formatarg, *inputarg;
  617. zend_long formatlen, inputpos, inputlen;
  618. int i;
  619. zend_long offset = 0;
  620. ZEND_PARSE_PARAMETERS_START(2, 3)
  621. Z_PARAM_STR(formatarg)
  622. Z_PARAM_STR(inputarg)
  623. Z_PARAM_OPTIONAL
  624. Z_PARAM_LONG(offset)
  625. ZEND_PARSE_PARAMETERS_END();
  626. format = ZSTR_VAL(formatarg);
  627. formatlen = ZSTR_LEN(formatarg);
  628. input = ZSTR_VAL(inputarg);
  629. inputlen = ZSTR_LEN(inputarg);
  630. inputpos = 0;
  631. if (offset < 0 || offset > inputlen) {
  632. zend_argument_value_error(3, "must be contained in argument #2 ($data)");
  633. RETURN_THROWS();
  634. }
  635. input += offset;
  636. inputlen -= offset;
  637. array_init(return_value);
  638. while (formatlen-- > 0) {
  639. char type = *(format++);
  640. char c;
  641. int repetitions = 1, argb;
  642. char *name;
  643. int namelen;
  644. int size = 0;
  645. /* Handle format arguments if any */
  646. if (formatlen > 0) {
  647. c = *format;
  648. if (c >= '0' && c <= '9') {
  649. repetitions = atoi(format);
  650. while (formatlen > 0 && *format >= '0' && *format <= '9') {
  651. format++;
  652. formatlen--;
  653. }
  654. } else if (c == '*') {
  655. repetitions = -1;
  656. format++;
  657. formatlen--;
  658. }
  659. }
  660. /* Get of new value in array */
  661. name = format;
  662. argb = repetitions;
  663. while (formatlen > 0 && *format != '/') {
  664. formatlen--;
  665. format++;
  666. }
  667. namelen = format - name;
  668. if (namelen > 200)
  669. namelen = 200;
  670. switch ((int) type) {
  671. /* Never use any input */
  672. case 'X':
  673. size = -1;
  674. if (repetitions < 0) {
  675. php_error_docref(NULL, E_WARNING, "Type %c: '*' ignored", type);
  676. repetitions = 1;
  677. }
  678. break;
  679. case '@':
  680. size = 0;
  681. break;
  682. case 'a':
  683. case 'A':
  684. case 'Z':
  685. size = repetitions;
  686. repetitions = 1;
  687. break;
  688. case 'h':
  689. case 'H':
  690. size = (repetitions > 0) ? (repetitions + (repetitions % 2)) / 2 : repetitions;
  691. repetitions = 1;
  692. break;
  693. /* Use 1 byte of input */
  694. case 'c':
  695. case 'C':
  696. case 'x':
  697. size = 1;
  698. break;
  699. /* Use 2 bytes of input */
  700. case 's':
  701. case 'S':
  702. case 'n':
  703. case 'v':
  704. size = 2;
  705. break;
  706. /* Use sizeof(int) bytes of input */
  707. case 'i':
  708. case 'I':
  709. size = sizeof(int);
  710. break;
  711. /* Use 4 bytes of input */
  712. case 'l':
  713. case 'L':
  714. case 'N':
  715. case 'V':
  716. size = 4;
  717. break;
  718. /* Use 8 bytes of input */
  719. case 'q':
  720. case 'Q':
  721. case 'J':
  722. case 'P':
  723. #if SIZEOF_ZEND_LONG > 4
  724. size = 8;
  725. break;
  726. #else
  727. zend_value_error("64-bit format codes are not available for 32-bit versions of PHP");
  728. RETURN_THROWS();
  729. #endif
  730. /* Use sizeof(float) bytes of input */
  731. case 'f':
  732. case 'g':
  733. case 'G':
  734. size = sizeof(float);
  735. break;
  736. /* Use sizeof(double) bytes of input */
  737. case 'd':
  738. case 'e':
  739. case 'E':
  740. size = sizeof(double);
  741. break;
  742. default:
  743. zend_value_error("Invalid format type %c", type);
  744. RETURN_THROWS();
  745. }
  746. if (size != 0 && size != -1 && size < 0) {
  747. php_error_docref(NULL, E_WARNING, "Type %c: integer overflow", type);
  748. zend_array_destroy(Z_ARR_P(return_value));
  749. RETURN_FALSE;
  750. }
  751. /* Do actual unpacking */
  752. for (i = 0; i != repetitions; i++ ) {
  753. if (size != 0 && size != -1 && INT_MAX - size + 1 < inputpos) {
  754. php_error_docref(NULL, E_WARNING, "Type %c: integer overflow", type);
  755. zend_array_destroy(Z_ARR_P(return_value));
  756. RETURN_FALSE;
  757. }
  758. if ((inputpos + size) <= inputlen) {
  759. zend_string* real_name;
  760. zval val;
  761. if (repetitions == 1 && namelen > 0) {
  762. /* Use a part of the formatarg argument directly as the name. */
  763. real_name = zend_string_init_fast(name, namelen);
  764. } else {
  765. /* Need to add the 1-based element number to the name */
  766. char buf[MAX_LENGTH_OF_LONG + 1];
  767. char *res = zend_print_ulong_to_buf(buf + sizeof(buf) - 1, i+1);
  768. size_t digits = buf + sizeof(buf) - 1 - res;
  769. real_name = zend_string_concat2(name, namelen, res, digits);
  770. }
  771. switch ((int) type) {
  772. case 'a': {
  773. /* a will not strip any trailing whitespace or null padding */
  774. zend_long len = inputlen - inputpos; /* Remaining string */
  775. /* If size was given take minimum of len and size */
  776. if ((size >= 0) && (len > size)) {
  777. len = size;
  778. }
  779. size = len;
  780. ZVAL_STRINGL(&val, &input[inputpos], len);
  781. zend_symtable_update(Z_ARRVAL_P(return_value), real_name, &val);
  782. break;
  783. }
  784. case 'A': {
  785. /* A will strip any trailing whitespace */
  786. char padn = '\0'; char pads = ' '; char padt = '\t'; char padc = '\r'; char padl = '\n';
  787. zend_long len = inputlen - inputpos; /* Remaining string */
  788. /* If size was given take minimum of len and size */
  789. if ((size >= 0) && (len > size)) {
  790. len = size;
  791. }
  792. size = len;
  793. /* Remove trailing white space and nulls chars from unpacked data */
  794. while (--len >= 0) {
  795. if (input[inputpos + len] != padn
  796. && input[inputpos + len] != pads
  797. && input[inputpos + len] != padt
  798. && input[inputpos + len] != padc
  799. && input[inputpos + len] != padl
  800. )
  801. break;
  802. }
  803. ZVAL_STRINGL(&val, &input[inputpos], len + 1);
  804. zend_symtable_update(Z_ARRVAL_P(return_value), real_name, &val);
  805. break;
  806. }
  807. /* New option added for Z to remain in-line with the Perl implementation */
  808. case 'Z': {
  809. /* Z will strip everything after the first null character */
  810. char pad = '\0';
  811. zend_long s,
  812. len = inputlen - inputpos; /* Remaining string */
  813. /* If size was given take minimum of len and size */
  814. if ((size >= 0) && (len > size)) {
  815. len = size;
  816. }
  817. size = len;
  818. /* Remove everything after the first null */
  819. for (s=0 ; s < len ; s++) {
  820. if (input[inputpos + s] == pad)
  821. break;
  822. }
  823. len = s;
  824. ZVAL_STRINGL(&val, &input[inputpos], len);
  825. zend_symtable_update(Z_ARRVAL_P(return_value), real_name, &val);
  826. break;
  827. }
  828. case 'h':
  829. case 'H': {
  830. zend_long len = (inputlen - inputpos) * 2; /* Remaining */
  831. int nibbleshift = (type == 'h') ? 0 : 4;
  832. int first = 1;
  833. zend_string *buf;
  834. zend_long ipos, opos;
  835. /* If size was given take minimum of len and size */
  836. if (size >= 0 && len > (size * 2)) {
  837. len = size * 2;
  838. }
  839. if (len > 0 && argb > 0) {
  840. len -= argb % 2;
  841. }
  842. buf = zend_string_alloc(len, 0);
  843. for (ipos = opos = 0; opos < len; opos++) {
  844. char cc = (input[inputpos + ipos] >> nibbleshift) & 0xf;
  845. if (cc < 10) {
  846. cc += '0';
  847. } else {
  848. cc += 'a' - 10;
  849. }
  850. ZSTR_VAL(buf)[opos] = cc;
  851. nibbleshift = (nibbleshift + 4) & 7;
  852. if (first-- == 0) {
  853. ipos++;
  854. first = 1;
  855. }
  856. }
  857. ZSTR_VAL(buf)[len] = '\0';
  858. ZVAL_STR(&val, buf);
  859. zend_symtable_update(Z_ARRVAL_P(return_value), real_name, &val);
  860. break;
  861. }
  862. case 'c': /* signed */
  863. case 'C': { /* unsigned */
  864. uint8_t x = input[inputpos];
  865. zend_long v = (type == 'c') ? (int8_t) x : x;
  866. ZVAL_LONG(&val, v);
  867. zend_symtable_update(Z_ARRVAL_P(return_value), real_name, &val);
  868. break;
  869. }
  870. case 's': /* signed machine endian */
  871. case 'S': /* unsigned machine endian */
  872. case 'n': /* unsigned big endian */
  873. case 'v': { /* unsigned little endian */
  874. zend_long v = 0;
  875. uint16_t x = *((unaligned_uint16_t*) &input[inputpos]);
  876. if (type == 's') {
  877. v = (int16_t) x;
  878. } else if ((type == 'n' && MACHINE_LITTLE_ENDIAN) || (type == 'v' && !MACHINE_LITTLE_ENDIAN)) {
  879. v = php_pack_reverse_int16(x);
  880. } else {
  881. v = x;
  882. }
  883. ZVAL_LONG(&val, v);
  884. zend_symtable_update(Z_ARRVAL_P(return_value), real_name, &val);
  885. break;
  886. }
  887. case 'i': /* signed integer, machine size, machine endian */
  888. case 'I': { /* unsigned integer, machine size, machine endian */
  889. zend_long v;
  890. if (type == 'i') {
  891. int x = *((unaligned_int*) &input[inputpos]);
  892. v = x;
  893. } else {
  894. unsigned int x = *((unaligned_uint*) &input[inputpos]);
  895. v = x;
  896. }
  897. ZVAL_LONG(&val, v);
  898. zend_symtable_update(Z_ARRVAL_P(return_value), real_name, &val);
  899. break;
  900. }
  901. case 'l': /* signed machine endian */
  902. case 'L': /* unsigned machine endian */
  903. case 'N': /* unsigned big endian */
  904. case 'V': { /* unsigned little endian */
  905. zend_long v = 0;
  906. uint32_t x = *((unaligned_uint32_t*) &input[inputpos]);
  907. if (type == 'l') {
  908. v = (int32_t) x;
  909. } else if ((type == 'N' && MACHINE_LITTLE_ENDIAN) || (type == 'V' && !MACHINE_LITTLE_ENDIAN)) {
  910. v = php_pack_reverse_int32(x);
  911. } else {
  912. v = x;
  913. }
  914. ZVAL_LONG(&val, v);
  915. zend_symtable_update(Z_ARRVAL_P(return_value), real_name, &val);
  916. break;
  917. }
  918. #if SIZEOF_ZEND_LONG > 4
  919. case 'q': /* signed machine endian */
  920. case 'Q': /* unsigned machine endian */
  921. case 'J': /* unsigned big endian */
  922. case 'P': { /* unsigned little endian */
  923. zend_long v = 0;
  924. uint64_t x = *((unaligned_uint64_t*) &input[inputpos]);
  925. if (type == 'q') {
  926. v = (int64_t) x;
  927. } else if ((type == 'J' && MACHINE_LITTLE_ENDIAN) || (type == 'P' && !MACHINE_LITTLE_ENDIAN)) {
  928. v = php_pack_reverse_int64(x);
  929. } else {
  930. v = x;
  931. }
  932. ZVAL_LONG(&val, v);
  933. zend_symtable_update(Z_ARRVAL_P(return_value), real_name, &val);
  934. break;
  935. }
  936. #endif
  937. case 'f': /* float */
  938. case 'g': /* little endian float*/
  939. case 'G': /* big endian float*/
  940. {
  941. float v;
  942. if (type == 'g') {
  943. v = php_pack_parse_float(1, &input[inputpos]);
  944. } else if (type == 'G') {
  945. v = php_pack_parse_float(0, &input[inputpos]);
  946. } else {
  947. memcpy(&v, &input[inputpos], sizeof(float));
  948. }
  949. ZVAL_DOUBLE(&val, v);
  950. zend_symtable_update(Z_ARRVAL_P(return_value), real_name, &val);
  951. break;
  952. }
  953. case 'd': /* double */
  954. case 'e': /* little endian float */
  955. case 'E': /* big endian float */
  956. {
  957. double v;
  958. if (type == 'e') {
  959. v = php_pack_parse_double(1, &input[inputpos]);
  960. } else if (type == 'E') {
  961. v = php_pack_parse_double(0, &input[inputpos]);
  962. } else {
  963. memcpy(&v, &input[inputpos], sizeof(double));
  964. }
  965. ZVAL_DOUBLE(&val, v);
  966. zend_symtable_update(Z_ARRVAL_P(return_value), real_name, &val);
  967. break;
  968. }
  969. case 'x':
  970. /* Do nothing with input, just skip it */
  971. break;
  972. case 'X':
  973. if (inputpos < size) {
  974. inputpos = -size;
  975. i = repetitions - 1; /* Break out of for loop */
  976. if (repetitions >= 0) {
  977. php_error_docref(NULL, E_WARNING, "Type %c: outside of string", type);
  978. }
  979. }
  980. break;
  981. case '@':
  982. if (repetitions <= inputlen) {
  983. inputpos = repetitions;
  984. } else {
  985. php_error_docref(NULL, E_WARNING, "Type %c: outside of string", type);
  986. }
  987. i = repetitions - 1; /* Done, break out of for loop */
  988. break;
  989. }
  990. zend_string_release(real_name);
  991. inputpos += size;
  992. if (inputpos < 0) {
  993. if (size != -1) { /* only print warning if not working with * */
  994. php_error_docref(NULL, E_WARNING, "Type %c: outside of string", type);
  995. }
  996. inputpos = 0;
  997. }
  998. } else if (repetitions < 0) {
  999. /* Reached end of input for '*' repeater */
  1000. break;
  1001. } else {
  1002. php_error_docref(NULL, E_WARNING, "Type %c: not enough input, need %d, have " ZEND_LONG_FMT, type, size, inputlen - inputpos);
  1003. zend_array_destroy(Z_ARR_P(return_value));
  1004. RETURN_FALSE;
  1005. }
  1006. }
  1007. if (formatlen > 0) {
  1008. formatlen--; /* Skip '/' separator, does no harm if inputlen == 0 */
  1009. format++;
  1010. }
  1011. }
  1012. }
  1013. /* }}} */
  1014. /* {{{ PHP_MINIT_FUNCTION */
  1015. PHP_MINIT_FUNCTION(pack)
  1016. {
  1017. int i;
  1018. if (MACHINE_LITTLE_ENDIAN) {
  1019. /* Where to get lo to hi bytes from */
  1020. byte_map[0] = 0;
  1021. for (i = 0; i < (int)sizeof(int); i++) {
  1022. int_map[i] = i;
  1023. }
  1024. machine_endian_short_map[0] = 0;
  1025. machine_endian_short_map[1] = 1;
  1026. big_endian_short_map[0] = 1;
  1027. big_endian_short_map[1] = 0;
  1028. little_endian_short_map[0] = 0;
  1029. little_endian_short_map[1] = 1;
  1030. machine_endian_long_map[0] = 0;
  1031. machine_endian_long_map[1] = 1;
  1032. machine_endian_long_map[2] = 2;
  1033. machine_endian_long_map[3] = 3;
  1034. big_endian_long_map[0] = 3;
  1035. big_endian_long_map[1] = 2;
  1036. big_endian_long_map[2] = 1;
  1037. big_endian_long_map[3] = 0;
  1038. little_endian_long_map[0] = 0;
  1039. little_endian_long_map[1] = 1;
  1040. little_endian_long_map[2] = 2;
  1041. little_endian_long_map[3] = 3;
  1042. #if SIZEOF_ZEND_LONG > 4
  1043. machine_endian_longlong_map[0] = 0;
  1044. machine_endian_longlong_map[1] = 1;
  1045. machine_endian_longlong_map[2] = 2;
  1046. machine_endian_longlong_map[3] = 3;
  1047. machine_endian_longlong_map[4] = 4;
  1048. machine_endian_longlong_map[5] = 5;
  1049. machine_endian_longlong_map[6] = 6;
  1050. machine_endian_longlong_map[7] = 7;
  1051. big_endian_longlong_map[0] = 7;
  1052. big_endian_longlong_map[1] = 6;
  1053. big_endian_longlong_map[2] = 5;
  1054. big_endian_longlong_map[3] = 4;
  1055. big_endian_longlong_map[4] = 3;
  1056. big_endian_longlong_map[5] = 2;
  1057. big_endian_longlong_map[6] = 1;
  1058. big_endian_longlong_map[7] = 0;
  1059. little_endian_longlong_map[0] = 0;
  1060. little_endian_longlong_map[1] = 1;
  1061. little_endian_longlong_map[2] = 2;
  1062. little_endian_longlong_map[3] = 3;
  1063. little_endian_longlong_map[4] = 4;
  1064. little_endian_longlong_map[5] = 5;
  1065. little_endian_longlong_map[6] = 6;
  1066. little_endian_longlong_map[7] = 7;
  1067. #endif
  1068. }
  1069. else {
  1070. zval val;
  1071. int size = sizeof(Z_LVAL(val));
  1072. Z_LVAL(val)=0; /*silence a warning*/
  1073. /* Where to get hi to lo bytes from */
  1074. byte_map[0] = size - 1;
  1075. for (i = 0; i < (int)sizeof(int); i++) {
  1076. int_map[i] = size - (sizeof(int) - i);
  1077. }
  1078. machine_endian_short_map[0] = size - 2;
  1079. machine_endian_short_map[1] = size - 1;
  1080. big_endian_short_map[0] = size - 2;
  1081. big_endian_short_map[1] = size - 1;
  1082. little_endian_short_map[0] = size - 1;
  1083. little_endian_short_map[1] = size - 2;
  1084. machine_endian_long_map[0] = size - 4;
  1085. machine_endian_long_map[1] = size - 3;
  1086. machine_endian_long_map[2] = size - 2;
  1087. machine_endian_long_map[3] = size - 1;
  1088. big_endian_long_map[0] = size - 4;
  1089. big_endian_long_map[1] = size - 3;
  1090. big_endian_long_map[2] = size - 2;
  1091. big_endian_long_map[3] = size - 1;
  1092. little_endian_long_map[0] = size - 1;
  1093. little_endian_long_map[1] = size - 2;
  1094. little_endian_long_map[2] = size - 3;
  1095. little_endian_long_map[3] = size - 4;
  1096. #if SIZEOF_ZEND_LONG > 4
  1097. machine_endian_longlong_map[0] = size - 8;
  1098. machine_endian_longlong_map[1] = size - 7;
  1099. machine_endian_longlong_map[2] = size - 6;
  1100. machine_endian_longlong_map[3] = size - 5;
  1101. machine_endian_longlong_map[4] = size - 4;
  1102. machine_endian_longlong_map[5] = size - 3;
  1103. machine_endian_longlong_map[6] = size - 2;
  1104. machine_endian_longlong_map[7] = size - 1;
  1105. big_endian_longlong_map[0] = size - 8;
  1106. big_endian_longlong_map[1] = size - 7;
  1107. big_endian_longlong_map[2] = size - 6;
  1108. big_endian_longlong_map[3] = size - 5;
  1109. big_endian_longlong_map[4] = size - 4;
  1110. big_endian_longlong_map[5] = size - 3;
  1111. big_endian_longlong_map[6] = size - 2;
  1112. big_endian_longlong_map[7] = size - 1;
  1113. little_endian_longlong_map[0] = size - 1;
  1114. little_endian_longlong_map[1] = size - 2;
  1115. little_endian_longlong_map[2] = size - 3;
  1116. little_endian_longlong_map[3] = size - 4;
  1117. little_endian_longlong_map[4] = size - 5;
  1118. little_endian_longlong_map[5] = size - 6;
  1119. little_endian_longlong_map[6] = size - 7;
  1120. little_endian_longlong_map[7] = size - 8;
  1121. #endif
  1122. }
  1123. return SUCCESS;
  1124. }
  1125. /* }}} */