compress.c 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992
  1. /*
  2. * Copyright (c) Ian F. Darwin 1986-1995.
  3. * Software written by Ian F. Darwin and others;
  4. * maintained 1995-present by Christos Zoulas and others.
  5. *
  6. * Redistribution and use in source and binary forms, with or without
  7. * modification, are permitted provided that the following conditions
  8. * are met:
  9. * 1. Redistributions of source code must retain the above copyright
  10. * notice immediately at the beginning of the file, without modification,
  11. * this list of conditions, and the following disclaimer.
  12. * 2. Redistributions in binary form must reproduce the above copyright
  13. * notice, this list of conditions and the following disclaimer in the
  14. * documentation and/or other materials provided with the distribution.
  15. *
  16. * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  17. * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  18. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  19. * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
  20. * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  21. * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  22. * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  23. * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  24. * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  25. * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  26. * SUCH DAMAGE.
  27. */
  28. /*
  29. * compress routines:
  30. * zmagic() - returns 0 if not recognized, uncompresses and prints
  31. * information if recognized
  32. * uncompress(method, old, n, newch) - uncompress old into new,
  33. * using method, return sizeof new
  34. */
  35. #include "file.h"
  36. #ifndef lint
  37. FILE_RCSID("@(#)$File: compress.c,v 1.129 2020/12/08 21:26:00 christos Exp $")
  38. #endif
  39. #include "magic.h"
  40. #include <stdlib.h>
  41. #ifdef HAVE_UNISTD_H
  42. #include <unistd.h>
  43. #endif
  44. #include <string.h>
  45. #include <errno.h>
  46. #include <ctype.h>
  47. #include <stdarg.h>
  48. #include <signal.h>
  49. #ifndef HAVE_SIG_T
  50. typedef void (*sig_t)(int);
  51. #endif /* HAVE_SIG_T */
  52. #ifndef PHP_WIN32
  53. #include <sys/ioctl.h>
  54. #endif
  55. #ifdef HAVE_SYS_WAIT_H
  56. #include <sys/wait.h>
  57. #endif
  58. #if defined(HAVE_SYS_TIME_H)
  59. #include <sys/time.h>
  60. #endif
  61. #if defined(HAVE_ZLIB_H) && defined(PHP_FILEINFO_UNCOMPRESS)
  62. #define BUILTIN_DECOMPRESS
  63. #include <zlib.h>
  64. #endif
  65. #undef FIONREAD
  66. #if defined(PHP_FILEINFO_UNCOMPRESS)
  67. #define BUILTIN_BZLIB
  68. #include <bzlib.h>
  69. #endif
  70. #if defined(HAVE_LZMA_H) && defined(XZLIBSUPPORT)
  71. #define BUILTIN_XZLIB
  72. #include <lzma.h>
  73. #endif
  74. #ifdef DEBUG
  75. int tty = -1;
  76. #define DPRINTF(...) do { \
  77. if (tty == -1) \
  78. tty = open("/dev/tty", O_RDWR); \
  79. if (tty == -1) \
  80. abort(); \
  81. dprintf(tty, __VA_ARGS__); \
  82. } while (/*CONSTCOND*/0)
  83. #else
  84. #define DPRINTF(...)
  85. #endif
  86. #ifdef ZLIBSUPPORT
  87. /*
  88. * The following python code is not really used because ZLIBSUPPORT is only
  89. * defined if we have a built-in zlib, and the built-in zlib handles that.
  90. * That is not true for android where we have zlib.h and not -lz.
  91. */
  92. static const char zlibcode[] =
  93. "import sys, zlib; sys.stdout.write(zlib.decompress(sys.stdin.read()))";
  94. static const char *zlib_args[] = { "python", "-c", zlibcode, NULL };
  95. static int
  96. zlibcmp(const unsigned char *buf)
  97. {
  98. unsigned short x = 1;
  99. unsigned char *s = CAST(unsigned char *, CAST(void *, &x));
  100. if ((buf[0] & 0xf) != 8 || (buf[0] & 0x80) != 0)
  101. return 0;
  102. if (s[0] != 1) /* endianness test */
  103. x = buf[0] | (buf[1] << 8);
  104. else
  105. x = buf[1] | (buf[0] << 8);
  106. if (x % 31)
  107. return 0;
  108. return 1;
  109. }
  110. #endif
  111. #ifdef PHP_FILEINFO_UNCOMPRESS
  112. static int
  113. lzmacmp(const unsigned char *buf)
  114. {
  115. if (buf[0] != 0x5d || buf[1] || buf[2])
  116. return 0;
  117. if (buf[12] && buf[12] != 0xff)
  118. return 0;
  119. return 1;
  120. }
  121. #define gzip_flags "-cd"
  122. #define lrzip_flags "-do"
  123. #define lzip_flags gzip_flags
  124. static const char *gzip_args[] = {
  125. "gzip", gzip_flags, NULL
  126. };
  127. static const char *uncompress_args[] = {
  128. "uncompress", "-c", NULL
  129. };
  130. static const char *bzip2_args[] = {
  131. "bzip2", "-cd", NULL
  132. };
  133. static const char *lzip_args[] = {
  134. "lzip", lzip_flags, NULL
  135. };
  136. static const char *xz_args[] = {
  137. "xz", "-cd", NULL
  138. };
  139. static const char *lrzip_args[] = {
  140. "lrzip", lrzip_flags, NULL
  141. };
  142. static const char *lz4_args[] = {
  143. "lz4", "-cd", NULL
  144. };
  145. static const char *zstd_args[] = {
  146. "zstd", "-cd", NULL
  147. };
  148. #define do_zlib NULL
  149. #define do_bzlib NULL
  150. private const struct {
  151. union {
  152. const char *magic;
  153. int (*func)(const unsigned char *);
  154. } u;
  155. int maglen;
  156. const char **argv;
  157. void *unused;
  158. } compr[] = {
  159. #define METH_FROZEN 2
  160. #define METH_BZIP 7
  161. #define METH_XZ 9
  162. #define METH_LZMA 13
  163. #define METH_ZLIB 14
  164. { { .magic = "\037\235" }, 2, gzip_args, NULL }, /* 0, compressed */
  165. /* Uncompress can get stuck; so use gzip first if we have it
  166. * Idea from Damien Clark, thanks! */
  167. { { .magic = "\037\235" }, 2, uncompress_args, NULL },/* 1, compressed */
  168. { { .magic = "\037\213" }, 2, gzip_args, do_zlib },/* 2, gzipped */
  169. { { .magic = "\037\236" }, 2, gzip_args, NULL }, /* 3, frozen */
  170. { { .magic = "\037\240" }, 2, gzip_args, NULL }, /* 4, SCO LZH */
  171. /* the standard pack utilities do not accept standard input */
  172. { { .magic = "\037\036" }, 2, gzip_args, NULL }, /* 5, packed */
  173. { { .magic = "PK\3\4" }, 4, gzip_args, NULL }, /* 6, pkziped */
  174. /* ...only first file examined */
  175. { { .magic = "BZh" }, 3, bzip2_args, do_bzlib },/* 7, bzip2-ed */
  176. { { .magic = "LZIP" }, 4, lzip_args, NULL }, /* 8, lzip-ed */
  177. { { .magic = "\3757zXZ\0" },6, xz_args, NULL }, /* 9, XZ Util */
  178. { { .magic = "LRZI" }, 4, lrzip_args, NULL }, /* 10, LRZIP */
  179. { { .magic = "\004\"M\030" },4, lz4_args, NULL }, /* 11, LZ4 */
  180. { { .magic = "\x28\xB5\x2F\xFD" }, 4, zstd_args, NULL },/* 12, zstd */
  181. { { .func = lzmacmp }, -13, xz_args, NULL }, /* 13, lzma */
  182. #ifdef ZLIBSUPPORT
  183. { { .func = zlibcmp }, -2, zlib_args, NULL }, /* 14, zlib */
  184. #endif
  185. };
  186. #define OKDATA 0
  187. #define NODATA 1
  188. #define ERRDATA 2
  189. private ssize_t swrite(int, const void *, size_t);
  190. #if HAVE_FORK
  191. private size_t ncompr = __arraycount(compr);
  192. private int uncompressbuf(int, size_t, size_t, const unsigned char *,
  193. unsigned char **, size_t *);
  194. #ifdef BUILTIN_DECOMPRESS
  195. private int uncompresszlib(const unsigned char *, unsigned char **, size_t,
  196. size_t *, int);
  197. private int uncompressgzipped(const unsigned char *, unsigned char **, size_t,
  198. size_t *);
  199. #endif
  200. #ifdef BUILTIN_BZLIB
  201. private int uncompressbzlib(const unsigned char *, unsigned char **, size_t,
  202. size_t *);
  203. #endif
  204. #ifdef BUILTIN_XZLIB
  205. private int uncompressxzlib(const unsigned char *, unsigned char **, size_t,
  206. size_t *);
  207. #endif
  208. static int makeerror(unsigned char **, size_t *, const char *, ...);
  209. private const char *methodname(size_t);
  210. private int
  211. format_decompression_error(struct magic_set *ms, size_t i, unsigned char *buf)
  212. {
  213. unsigned char *p;
  214. int mime = ms->flags & MAGIC_MIME;
  215. if (!mime)
  216. return file_printf(ms, "ERROR:[%s: %s]", methodname(i), buf);
  217. for (p = buf; *p; p++)
  218. if (!isalnum(*p))
  219. *p = '-';
  220. return file_printf(ms, "application/x-decompression-error-%s-%s",
  221. methodname(i), buf);
  222. }
  223. protected int
  224. file_zmagic(struct magic_set *ms, const struct buffer *b, const char *name)
  225. {
  226. unsigned char *newbuf = NULL;
  227. size_t i, nsz;
  228. char *rbuf;
  229. file_pushbuf_t *pb;
  230. int urv, prv, rv = 0;
  231. int mime = ms->flags & MAGIC_MIME;
  232. int fd = b->fd;
  233. const unsigned char *buf = CAST(const unsigned char *, b->fbuf);
  234. size_t nbytes = b->flen;
  235. int sa_saved = 0;
  236. struct sigaction sig_act;
  237. if ((ms->flags & MAGIC_COMPRESS) == 0)
  238. return 0;
  239. for (i = 0; i < ncompr; i++) {
  240. int zm;
  241. if (nbytes < CAST(size_t, abs(compr[i].maglen)))
  242. continue;
  243. if (compr[i].maglen < 0) {
  244. zm = (*compr[i].u.func)(buf);
  245. } else {
  246. zm = memcmp(buf, compr[i].u.magic,
  247. CAST(size_t, compr[i].maglen)) == 0;
  248. }
  249. if (!zm)
  250. continue;
  251. /* Prevent SIGPIPE death if child dies unexpectedly */
  252. if (!sa_saved) {
  253. //We can use sig_act for both new and old, but
  254. struct sigaction new_act;
  255. memset(&new_act, 0, sizeof(new_act));
  256. new_act.sa_handler = SIG_IGN;
  257. sa_saved = sigaction(SIGPIPE, &new_act, &sig_act) != -1;
  258. }
  259. nsz = nbytes;
  260. urv = uncompressbuf(fd, ms->bytes_max, i, buf, &newbuf, &nsz);
  261. DPRINTF("uncompressbuf = %d, %s, %" SIZE_T_FORMAT "u\n", urv,
  262. (char *)newbuf, nsz);
  263. switch (urv) {
  264. case OKDATA:
  265. case ERRDATA:
  266. ms->flags &= ~MAGIC_COMPRESS;
  267. if (urv == ERRDATA)
  268. prv = format_decompression_error(ms, i, newbuf);
  269. else
  270. prv = file_buffer(ms, NULL, NULL, name, newbuf, nsz);
  271. if (prv == -1)
  272. goto error;
  273. rv = 1;
  274. if ((ms->flags & MAGIC_COMPRESS_TRANSP) != 0)
  275. goto out;
  276. if (mime != MAGIC_MIME && mime != 0)
  277. goto out;
  278. if ((file_printf(ms,
  279. mime ? " compressed-encoding=" : " (")) == -1)
  280. goto error;
  281. if ((pb = file_push_buffer(ms)) == NULL)
  282. goto error;
  283. /*
  284. * XXX: If file_buffer fails here, we overwrite
  285. * the compressed text. FIXME.
  286. */
  287. if (file_buffer(ms, NULL, NULL, NULL, buf, nbytes) == -1) {
  288. if (file_pop_buffer(ms, pb) != NULL)
  289. abort();
  290. goto error;
  291. }
  292. if ((rbuf = file_pop_buffer(ms, pb)) != NULL) {
  293. if (file_printf(ms, "%s", rbuf) == -1) {
  294. efree(rbuf);
  295. goto error;
  296. }
  297. efree(rbuf);
  298. }
  299. if (!mime && file_printf(ms, ")") == -1)
  300. goto error;
  301. /*FALLTHROUGH*/
  302. case NODATA:
  303. break;
  304. default:
  305. abort();
  306. /*NOTREACHED*/
  307. error:
  308. rv = -1;
  309. break;
  310. }
  311. }
  312. out:
  313. DPRINTF("rv = %d\n", rv);
  314. if (sa_saved && sig_act.sa_handler != SIG_IGN)
  315. (void)sigaction(SIGPIPE, &sig_act, NULL);
  316. if (newbuf)
  317. efree(newbuf);
  318. ms->flags |= MAGIC_COMPRESS;
  319. DPRINTF("Zmagic returns %d\n", rv);
  320. return rv;
  321. }
  322. #endif
  323. /*
  324. * `safe' write for sockets and pipes.
  325. */
  326. private ssize_t
  327. swrite(int fd, const void *buf, size_t n)
  328. {
  329. ssize_t rv;
  330. size_t rn = n;
  331. do
  332. switch (rv = write(fd, buf, n)) {
  333. case -1:
  334. if (errno == EINTR)
  335. continue;
  336. return -1;
  337. default:
  338. n -= rv;
  339. buf = CAST(const char *, buf) + rv;
  340. break;
  341. }
  342. while (n > 0);
  343. return rn;
  344. }
  345. /*
  346. * `safe' read for sockets and pipes.
  347. */
  348. protected ssize_t
  349. sread(int fd, void *buf, size_t n, int canbepipe)
  350. {
  351. ssize_t rv;
  352. #ifdef FIONREAD
  353. int t = 0;
  354. #endif
  355. size_t rn = n;
  356. if (fd == STDIN_FILENO)
  357. goto nocheck;
  358. #ifdef FIONREAD
  359. if (canbepipe && (ioctl(fd, FIONREAD, &t) == -1 || t == 0)) {
  360. #ifdef FD_ZERO
  361. ssize_t cnt;
  362. for (cnt = 0;; cnt++) {
  363. fd_set check;
  364. struct timeval tout = {0, 100 * 1000};
  365. int selrv;
  366. FD_ZERO(&check);
  367. FD_SET(fd, &check);
  368. /*
  369. * Avoid soft deadlock: do not read if there
  370. * is nothing to read from sockets and pipes.
  371. */
  372. selrv = select(fd + 1, &check, NULL, NULL, &tout);
  373. if (selrv == -1) {
  374. if (errno == EINTR || errno == EAGAIN)
  375. continue;
  376. } else if (selrv == 0 && cnt >= 5) {
  377. return 0;
  378. } else
  379. break;
  380. }
  381. #endif
  382. (void)ioctl(fd, FIONREAD, &t);
  383. }
  384. if (t > 0 && CAST(size_t, t) < n) {
  385. n = t;
  386. rn = n;
  387. }
  388. #endif
  389. nocheck:
  390. do
  391. switch ((rv = FINFO_READ_FUNC(fd, buf, n))) {
  392. case -1:
  393. if (errno == EINTR)
  394. continue;
  395. return -1;
  396. case 0:
  397. return rn - n;
  398. default:
  399. n -= rv;
  400. buf = CAST(char *, CCAST(void *, buf)) + rv;
  401. break;
  402. }
  403. while (n > 0);
  404. return rn;
  405. }
  406. protected int
  407. file_pipe2file(struct magic_set *ms, int fd, const void *startbuf,
  408. size_t nbytes)
  409. {
  410. char buf[4096];
  411. ssize_t r;
  412. int tfd;
  413. (void)strlcpy(buf, "/tmp/file.XXXXXX", sizeof buf);
  414. #ifndef HAVE_MKSTEMP
  415. {
  416. char *ptr = mktemp(buf);
  417. tfd = open(ptr, O_RDWR|O_TRUNC|O_EXCL|O_CREAT, 0600);
  418. r = errno;
  419. (void)unlink(ptr);
  420. errno = r;
  421. }
  422. #else
  423. {
  424. int te;
  425. mode_t ou = umask(0);
  426. tfd = mkstemp(buf);
  427. (void)umask(ou);
  428. te = errno;
  429. (void)unlink(buf);
  430. errno = te;
  431. }
  432. #endif
  433. if (tfd == -1) {
  434. file_error(ms, errno,
  435. "cannot create temporary file for pipe copy");
  436. return -1;
  437. }
  438. if (swrite(tfd, startbuf, nbytes) != CAST(ssize_t, nbytes))
  439. r = 1;
  440. else {
  441. while ((r = sread(fd, buf, sizeof(buf), 1)) > 0)
  442. if (swrite(tfd, buf, CAST(size_t, r)) != r)
  443. break;
  444. }
  445. switch (r) {
  446. case -1:
  447. file_error(ms, errno, "error copying from pipe to temp file");
  448. return -1;
  449. case 0:
  450. break;
  451. default:
  452. file_error(ms, errno, "error while writing to temp file");
  453. return -1;
  454. }
  455. /*
  456. * We duplicate the file descriptor, because fclose on a
  457. * tmpfile will delete the file, but any open descriptors
  458. * can still access the phantom inode.
  459. */
  460. if ((fd = dup2(tfd, fd)) == -1) {
  461. file_error(ms, errno, "could not dup descriptor for temp file");
  462. return -1;
  463. }
  464. (void)close(tfd);
  465. if (FINFO_LSEEK_FUNC(fd, (zend_off_t)0, SEEK_SET) == (zend_off_t)-1) {
  466. file_badseek(ms);
  467. return -1;
  468. }
  469. return fd;
  470. }
  471. #ifdef PHP_FILEINFO_UNCOMPRESS
  472. #ifdef BUILTIN_DECOMPRESS
  473. #define FHCRC (1 << 1)
  474. #define FEXTRA (1 << 2)
  475. #define FNAME (1 << 3)
  476. #define FCOMMENT (1 << 4)
  477. private int
  478. uncompressgzipped(const unsigned char *old, unsigned char **newch,
  479. size_t bytes_max, size_t *n)
  480. {
  481. unsigned char flg = old[3];
  482. size_t data_start = 10;
  483. if (flg & FEXTRA) {
  484. if (data_start + 1 >= *n)
  485. goto err;
  486. data_start += 2 + old[data_start] + old[data_start + 1] * 256;
  487. }
  488. if (flg & FNAME) {
  489. while(data_start < *n && old[data_start])
  490. data_start++;
  491. data_start++;
  492. }
  493. if (flg & FCOMMENT) {
  494. while(data_start < *n && old[data_start])
  495. data_start++;
  496. data_start++;
  497. }
  498. if (flg & FHCRC)
  499. data_start += 2;
  500. if (data_start >= *n)
  501. goto err;
  502. *n -= data_start;
  503. old += data_start;
  504. return uncompresszlib(old, newch, bytes_max, n, 0);
  505. err:
  506. return makeerror(newch, n, "File too short");
  507. }
  508. private int
  509. uncompresszlib(const unsigned char *old, unsigned char **newch,
  510. size_t bytes_max, size_t *n, int zlib)
  511. {
  512. int rc;
  513. z_stream z;
  514. if ((*newch = CAST(unsigned char *, emalloc(bytes_max + 1))) == NULL)
  515. return makeerror(newch, n, "No buffer, %s", strerror(errno));
  516. z.next_in = CCAST(Bytef *, old);
  517. z.avail_in = CAST(uint32_t, *n);
  518. z.next_out = *newch;
  519. z.avail_out = CAST(unsigned int, bytes_max);
  520. z.zalloc = Z_NULL;
  521. z.zfree = Z_NULL;
  522. z.opaque = Z_NULL;
  523. /* LINTED bug in header macro */
  524. rc = zlib ? inflateInit(&z) : inflateInit2(&z, -15);
  525. if (rc != Z_OK)
  526. goto err;
  527. rc = inflate(&z, Z_SYNC_FLUSH);
  528. if (rc != Z_OK && rc != Z_STREAM_END)
  529. goto err;
  530. *n = CAST(size_t, z.total_out);
  531. rc = inflateEnd(&z);
  532. if (rc != Z_OK)
  533. goto err;
  534. /* let's keep the nul-terminate tradition */
  535. (*newch)[*n] = '\0';
  536. return OKDATA;
  537. err:
  538. strlcpy(RCAST(char *, *newch), z.msg ? z.msg : zError(rc), bytes_max);
  539. *n = strlen(RCAST(char *, *newch));
  540. return ERRDATA;
  541. }
  542. #endif
  543. #ifdef BUILTIN_BZLIB
  544. private int
  545. uncompressbzlib(const unsigned char *old, unsigned char **newch,
  546. size_t bytes_max, size_t *n)
  547. {
  548. int rc;
  549. bz_stream bz;
  550. memset(&bz, 0, sizeof(bz));
  551. rc = BZ2_bzDecompressInit(&bz, 0, 0);
  552. if (rc != BZ_OK)
  553. goto err;
  554. if ((*newch = CAST(unsigned char *, malloc(bytes_max + 1))) == NULL)
  555. return makeerror(newch, n, "No buffer, %s", strerror(errno));
  556. bz.next_in = CCAST(char *, RCAST(const char *, old));
  557. bz.avail_in = CAST(uint32_t, *n);
  558. bz.next_out = RCAST(char *, *newch);
  559. bz.avail_out = CAST(unsigned int, bytes_max);
  560. rc = BZ2_bzDecompress(&bz);
  561. if (rc != BZ_OK && rc != BZ_STREAM_END)
  562. goto err;
  563. /* Assume byte_max is within 32bit */
  564. /* assert(bz.total_out_hi32 == 0); */
  565. *n = CAST(size_t, bz.total_out_lo32);
  566. rc = BZ2_bzDecompressEnd(&bz);
  567. if (rc != BZ_OK)
  568. goto err;
  569. /* let's keep the nul-terminate tradition */
  570. (*newch)[*n] = '\0';
  571. return OKDATA;
  572. err:
  573. snprintf(RCAST(char *, *newch), bytes_max, "bunzip error %d", rc);
  574. *n = strlen(RCAST(char *, *newch));
  575. return ERRDATA;
  576. }
  577. #endif
  578. #ifdef BUILTIN_XZLIB
  579. private int
  580. uncompressxzlib(const unsigned char *old, unsigned char **newch,
  581. size_t bytes_max, size_t *n)
  582. {
  583. int rc;
  584. lzma_stream xz;
  585. memset(&xz, 0, sizeof(xz));
  586. rc = lzma_auto_decoder(&xz, UINT64_MAX, 0);
  587. if (rc != LZMA_OK)
  588. goto err;
  589. if ((*newch = CAST(unsigned char *, malloc(bytes_max + 1))) == NULL)
  590. return makeerror(newch, n, "No buffer, %s", strerror(errno));
  591. xz.next_in = CCAST(const uint8_t *, old);
  592. xz.avail_in = CAST(uint32_t, *n);
  593. xz.next_out = RCAST(uint8_t *, *newch);
  594. xz.avail_out = CAST(unsigned int, bytes_max);
  595. rc = lzma_code(&xz, LZMA_RUN);
  596. if (rc != LZMA_OK && rc != LZMA_STREAM_END)
  597. goto err;
  598. *n = CAST(size_t, xz.total_out);
  599. lzma_end(&xz);
  600. /* let's keep the nul-terminate tradition */
  601. (*newch)[*n] = '\0';
  602. return OKDATA;
  603. err:
  604. snprintf(RCAST(char *, *newch), bytes_max, "unxz error %d", rc);
  605. *n = strlen(RCAST(char *, *newch));
  606. return ERRDATA;
  607. }
  608. #endif
  609. static int
  610. makeerror(unsigned char **buf, size_t *len, const char *fmt, ...)
  611. {
  612. char *msg;
  613. va_list ap;
  614. int rv;
  615. va_start(ap, fmt);
  616. rv = vasprintf(&msg, fmt, ap);
  617. va_end(ap);
  618. if (rv < 0) {
  619. *buf = NULL;
  620. *len = 0;
  621. return NODATA;
  622. }
  623. *buf = RCAST(unsigned char *, msg);
  624. *len = strlen(msg);
  625. return ERRDATA;
  626. }
  627. static void
  628. closefd(int *fd, size_t i)
  629. {
  630. if (fd[i] == -1)
  631. return;
  632. (void) close(fd[i]);
  633. fd[i] = -1;
  634. }
  635. static void
  636. closep(int *fd)
  637. {
  638. size_t i;
  639. for (i = 0; i < 2; i++)
  640. closefd(fd, i);
  641. }
  642. static int
  643. copydesc(int i, int fd)
  644. {
  645. if (fd == i)
  646. return 0; /* "no dup was necessary" */
  647. if (dup2(fd, i) == -1) {
  648. DPRINTF("dup(%d, %d) failed (%s)\n", fd, i, strerror(errno));
  649. exit(1);
  650. }
  651. return 1;
  652. }
  653. static pid_t
  654. writechild(int fd, const void *old, size_t n)
  655. {
  656. pid_t pid;
  657. /*
  658. * fork again, to avoid blocking because both
  659. * pipes filled
  660. */
  661. pid = fork();
  662. if (pid == -1) {
  663. DPRINTF("Fork failed (%s)\n", strerror(errno));
  664. exit(1);
  665. }
  666. if (pid == 0) {
  667. /* child */
  668. if (swrite(fd, old, n) != CAST(ssize_t, n)) {
  669. DPRINTF("Write failed (%s)\n", strerror(errno));
  670. exit(1);
  671. }
  672. exit(0);
  673. }
  674. /* parent */
  675. return pid;
  676. }
  677. static ssize_t
  678. filter_error(unsigned char *ubuf, ssize_t n)
  679. {
  680. char *p;
  681. char *buf;
  682. ubuf[n] = '\0';
  683. buf = RCAST(char *, ubuf);
  684. while (isspace(CAST(unsigned char, *buf)))
  685. buf++;
  686. DPRINTF("Filter error[[[%s]]]\n", buf);
  687. if ((p = strchr(CAST(char *, buf), '\n')) != NULL)
  688. *p = '\0';
  689. if ((p = strchr(CAST(char *, buf), ';')) != NULL)
  690. *p = '\0';
  691. if ((p = strrchr(CAST(char *, buf), ':')) != NULL) {
  692. ++p;
  693. while (isspace(CAST(unsigned char, *p)))
  694. p++;
  695. n = strlen(p);
  696. memmove(ubuf, p, CAST(size_t, n + 1));
  697. }
  698. DPRINTF("Filter error after[[[%s]]]\n", (char *)ubuf);
  699. if (islower(*ubuf))
  700. *ubuf = toupper(*ubuf);
  701. return n;
  702. }
  703. private const char *
  704. methodname(size_t method)
  705. {
  706. switch (method) {
  707. #ifdef BUILTIN_DECOMPRESS
  708. case METH_FROZEN:
  709. case METH_ZLIB:
  710. return "zlib";
  711. #endif
  712. #ifdef BUILTIN_BZLIB
  713. case METH_BZIP:
  714. return "bzlib";
  715. #endif
  716. #ifdef BUILTIN_XZLIB
  717. case METH_XZ:
  718. case METH_LZMA:
  719. return "xzlib";
  720. #endif
  721. default:
  722. return compr[method].argv[0];
  723. }
  724. }
  725. private int
  726. uncompressbuf(int fd, size_t bytes_max, size_t method, const unsigned char *old,
  727. unsigned char **newch, size_t* n)
  728. {
  729. int fdp[3][2];
  730. int status, rv, w;
  731. pid_t pid;
  732. pid_t writepid = -1;
  733. size_t i;
  734. ssize_t r;
  735. switch (method) {
  736. #ifdef BUILTIN_DECOMPRESS
  737. case METH_FROZEN:
  738. return uncompressgzipped(old, newch, bytes_max, n);
  739. case METH_ZLIB:
  740. return uncompresszlib(old, newch, bytes_max, n, 1);
  741. #endif
  742. #ifdef BUILTIN_BZLIB
  743. case METH_BZIP:
  744. return uncompressbzlib(old, newch, bytes_max, n);
  745. #endif
  746. #ifdef BUILTIN_XZLIB
  747. case METH_XZ:
  748. case METH_LZMA:
  749. return uncompressxzlib(old, newch, bytes_max, n);
  750. #endif
  751. default:
  752. break;
  753. }
  754. (void)fflush(stdout);
  755. (void)fflush(stderr);
  756. for (i = 0; i < __arraycount(fdp); i++)
  757. fdp[i][0] = fdp[i][1] = -1;
  758. /*
  759. * There are multithreaded users who run magic_file()
  760. * from dozens of threads. If two parallel magic_file() calls
  761. * analyze two large compressed files, both will spawn
  762. * an uncompressing child here, which writes out uncompressed data.
  763. * We read some portion, then close the pipe, then waitpid() the child.
  764. * If uncompressed data is larger, child shound get EPIPE and exit.
  765. * However, with *parallel* calls OTHER child may unintentionally
  766. * inherit pipe fds, thus keeping pipe open and making writes in
  767. * our child block instead of failing with EPIPE!
  768. * (For the bug to occur, two threads must mutually inherit their pipes,
  769. * and both must have large outputs. Thus it happens not that often).
  770. * To avoid this, be sure to create pipes with O_CLOEXEC.
  771. */
  772. if ((fd == -1 && file_pipe_closexec(fdp[STDIN_FILENO]) == -1) ||
  773. file_pipe_closexec(fdp[STDOUT_FILENO]) == -1 ||
  774. file_pipe_closexec(fdp[STDERR_FILENO]) == -1) {
  775. closep(fdp[STDIN_FILENO]);
  776. closep(fdp[STDOUT_FILENO]);
  777. return makeerror(newch, n, "Cannot create pipe, %s",
  778. strerror(errno));
  779. }
  780. /* For processes with large mapped virtual sizes, vfork
  781. * may be _much_ faster (10-100 times) than fork.
  782. */
  783. pid = vfork();
  784. if (pid == -1) {
  785. return makeerror(newch, n, "Cannot vfork, %s",
  786. strerror(errno));
  787. }
  788. if (pid == 0) {
  789. /* child */
  790. /* Note: we are after vfork, do not modify memory
  791. * in a way which confuses parent. In particular,
  792. * do not modify fdp[i][j].
  793. */
  794. if (fd != -1) {
  795. (void) lseek(fd, CAST(off_t, 0), SEEK_SET);
  796. if (copydesc(STDIN_FILENO, fd))
  797. (void) close(fd);
  798. } else {
  799. if (copydesc(STDIN_FILENO, fdp[STDIN_FILENO][0]))
  800. (void) close(fdp[STDIN_FILENO][0]);
  801. if (fdp[STDIN_FILENO][1] > 2)
  802. (void) close(fdp[STDIN_FILENO][1]);
  803. }
  804. file_clear_closexec(STDIN_FILENO);
  805. ///FIXME: if one of the fdp[i][j] is 0 or 1, this can bomb spectacularly
  806. if (copydesc(STDOUT_FILENO, fdp[STDOUT_FILENO][1]))
  807. (void) close(fdp[STDOUT_FILENO][1]);
  808. if (fdp[STDOUT_FILENO][0] > 2)
  809. (void) close(fdp[STDOUT_FILENO][0]);
  810. file_clear_closexec(STDOUT_FILENO);
  811. if (copydesc(STDERR_FILENO, fdp[STDERR_FILENO][1]))
  812. (void) close(fdp[STDERR_FILENO][1]);
  813. if (fdp[STDERR_FILENO][0] > 2)
  814. (void) close(fdp[STDERR_FILENO][0]);
  815. file_clear_closexec(STDERR_FILENO);
  816. (void)execvp(compr[method].argv[0],
  817. RCAST(char *const *, RCAST(intptr_t, compr[method].argv)));
  818. dprintf(STDERR_FILENO, "exec `%s' failed, %s",
  819. compr[method].argv[0], strerror(errno));
  820. _exit(1); /* _exit(), not exit(), because of vfork */
  821. }
  822. /* parent */
  823. /* Close write sides of child stdout/err pipes */
  824. for (i = 1; i < __arraycount(fdp); i++)
  825. closefd(fdp[i], 1);
  826. /* Write the buffer data to child stdin, if we don't have fd */
  827. if (fd == -1) {
  828. closefd(fdp[STDIN_FILENO], 0);
  829. writepid = writechild(fdp[STDIN_FILENO][1], old, *n);
  830. closefd(fdp[STDIN_FILENO], 1);
  831. }
  832. *newch = CAST(unsigned char *, malloc(bytes_max + 1));
  833. if (*newch == NULL) {
  834. rv = makeerror(newch, n, "No buffer, %s",
  835. strerror(errno));
  836. goto err;
  837. }
  838. rv = OKDATA;
  839. r = sread(fdp[STDOUT_FILENO][0], *newch, bytes_max, 0);
  840. if (r <= 0) {
  841. DPRINTF("Read stdout failed %d (%s)\n", fdp[STDOUT_FILENO][0],
  842. r != -1 ? strerror(errno) : "no data");
  843. rv = ERRDATA;
  844. if (r == 0 &&
  845. (r = sread(fdp[STDERR_FILENO][0], *newch, bytes_max, 0)) > 0)
  846. {
  847. r = filter_error(*newch, r);
  848. goto ok;
  849. }
  850. free(*newch);
  851. if (r == 0)
  852. rv = makeerror(newch, n, "Read failed, %s",
  853. strerror(errno));
  854. else
  855. rv = makeerror(newch, n, "No data");
  856. goto err;
  857. }
  858. ok:
  859. *n = r;
  860. /* NUL terminate, as every buffer is handled here. */
  861. (*newch)[*n] = '\0';
  862. err:
  863. closefd(fdp[STDIN_FILENO], 1);
  864. closefd(fdp[STDOUT_FILENO], 0);
  865. closefd(fdp[STDERR_FILENO], 0);
  866. w = waitpid(pid, &status, 0);
  867. wait_err:
  868. if (w == -1) {
  869. free(*newch);
  870. rv = makeerror(newch, n, "Wait failed, %s", strerror(errno));
  871. DPRINTF("Child wait return %#x\n", status);
  872. } else if (!WIFEXITED(status)) {
  873. DPRINTF("Child not exited (%#x)\n", status);
  874. } else if (WEXITSTATUS(status) != 0) {
  875. DPRINTF("Child exited (%#x)\n", WEXITSTATUS(status));
  876. }
  877. if (writepid > 0) {
  878. /* _After_ we know decompressor has exited, our input writer
  879. * definitely will exit now (at worst, writing fails in it,
  880. * since output fd is closed now on the reading size).
  881. */
  882. w = waitpid(writepid, &status, 0);
  883. writepid = -1;
  884. goto wait_err;
  885. }
  886. closefd(fdp[STDIN_FILENO], 0); //why? it is already closed here!
  887. DPRINTF("Returning %p n=%" SIZE_T_FORMAT "u rv=%d\n", *newch, *n, rv);
  888. return rv;
  889. }
  890. #endif
  891. #endif