compress.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505
  1. /*
  2. * Copyright (c) Ian F. Darwin 1986-1995.
  3. * Software written by Ian F. Darwin and others;
  4. * maintained 1995-present by Christos Zoulas and others.
  5. *
  6. * Redistribution and use in source and binary forms, with or without
  7. * modification, are permitted provided that the following conditions
  8. * are met:
  9. * 1. Redistributions of source code must retain the above copyright
  10. * notice immediately at the beginning of the file, without modification,
  11. * this list of conditions, and the following disclaimer.
  12. * 2. Redistributions in binary form must reproduce the above copyright
  13. * notice, this list of conditions and the following disclaimer in the
  14. * documentation and/or other materials provided with the distribution.
  15. *
  16. * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  17. * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  18. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  19. * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
  20. * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  21. * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  22. * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  23. * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  24. * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  25. * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  26. * SUCH DAMAGE.
  27. */
  28. /*
  29. * compress routines:
  30. * zmagic() - returns 0 if not recognized, uncompresses and prints
  31. * information if recognized
  32. * uncompress(method, old, n, newch) - uncompress old into new,
  33. * using method, return sizeof new
  34. */
  35. #include "config.h"
  36. #include "file.h"
  37. #ifndef lint
  38. FILE_RCSID("@(#)$File: compress.c,v 1.73 2014/01/05 15:55:21 christos Exp $")
  39. #endif
  40. #include "magic.h"
  41. #include <stdlib.h>
  42. #ifdef HAVE_UNISTD_H
  43. #include <unistd.h>
  44. #endif
  45. #include <string.h>
  46. #include <errno.h>
  47. #include <sys/types.h>
  48. #ifndef PHP_WIN32
  49. #include <sys/ioctl.h>
  50. #endif
  51. #ifdef HAVE_SYS_WAIT_H
  52. #include <sys/wait.h>
  53. #endif
  54. #if defined(HAVE_SYS_TIME_H)
  55. #include <sys/time.h>
  56. #endif
  57. #if defined(HAVE_ZLIB_H) && defined(HAVE_LIBZ)
  58. #define BUILTIN_DECOMPRESS
  59. #include <zlib.h>
  60. #endif
  61. #undef FIONREAD
  62. private const struct {
  63. const char magic[8];
  64. size_t maglen;
  65. const char *argv[3];
  66. int silent;
  67. } compr[] = {
  68. { "\037\235", 2, { "gzip", "-cdq", NULL }, 1 }, /* compressed */
  69. /* Uncompress can get stuck; so use gzip first if we have it
  70. * Idea from Damien Clark, thanks! */
  71. { "\037\235", 2, { "uncompress", "-c", NULL }, 1 }, /* compressed */
  72. { "\037\213", 2, { "gzip", "-cdq", NULL }, 1 }, /* gzipped */
  73. { "\037\236", 2, { "gzip", "-cdq", NULL }, 1 }, /* frozen */
  74. { "\037\240", 2, { "gzip", "-cdq", NULL }, 1 }, /* SCO LZH */
  75. /* the standard pack utilities do not accept standard input */
  76. { "\037\036", 2, { "gzip", "-cdq", NULL }, 0 }, /* packed */
  77. { "PK\3\4", 4, { "gzip", "-cdq", NULL }, 1 }, /* pkzipped, */
  78. /* ...only first file examined */
  79. { "BZh", 3, { "bzip2", "-cd", NULL }, 1 }, /* bzip2-ed */
  80. { "LZIP", 4, { "lzip", "-cdq", NULL }, 1 },
  81. { "\3757zXZ\0",6,{ "xz", "-cd", NULL }, 1 }, /* XZ Utils */
  82. { "LRZI", 4, { "lrzip", "-dqo-", NULL }, 1 }, /* LRZIP */
  83. { "\004\"M\030", 4, { "lz4", "-cd", NULL }, 1 }, /* LZ4 */
  84. };
  85. #define NODATA ((size_t)~0)
  86. private ssize_t swrite(int, const void *, size_t);
  87. #ifdef PHP_FILEINFO_UNCOMPRESS
  88. private size_t uncompressbuf(struct magic_set *, int, size_t,
  89. const unsigned char *, unsigned char **, size_t);
  90. #ifdef BUILTIN_DECOMPRESS
  91. private size_t uncompressgzipped(struct magic_set *, const unsigned char *,
  92. unsigned char **, size_t);
  93. #endif
  94. protected int
  95. file_zmagic(struct magic_set *ms, int fd, const char *name,
  96. const unsigned char *buf, size_t nbytes)
  97. {
  98. unsigned char *newbuf = NULL;
  99. size_t i, nsz;
  100. int rv = 0;
  101. int mime = ms->flags & MAGIC_MIME;
  102. size_t ncompr;
  103. if ((ms->flags & MAGIC_COMPRESS) == 0)
  104. return 0;
  105. ncompr = sizeof(compr) / sizeof(compr[0]);
  106. for (i = 0; i < ncompr; i++) {
  107. if (nbytes < compr[i].maglen)
  108. continue;
  109. if (memcmp(buf, compr[i].magic, compr[i].maglen) == 0 &&
  110. (nsz = uncompressbuf(ms, fd, i, buf, &newbuf,
  111. nbytes)) != NODATA) {
  112. ms->flags &= ~MAGIC_COMPRESS;
  113. rv = -1;
  114. if (file_buffer(ms, -1, name, newbuf, nsz) == -1)
  115. goto error;
  116. if (mime == MAGIC_MIME || mime == 0) {
  117. if (file_printf(ms, mime ?
  118. " compressed-encoding=" : " (") == -1)
  119. goto error;
  120. if (file_buffer(ms, -1, NULL, buf, nbytes) == -1)
  121. goto error;
  122. if (!mime && file_printf(ms, ")") == -1)
  123. goto error;
  124. }
  125. rv = 1;
  126. break;
  127. }
  128. }
  129. error:
  130. if (newbuf)
  131. efree(newbuf);
  132. ms->flags |= MAGIC_COMPRESS;
  133. return rv;
  134. }
  135. #endif
  136. /*
  137. * `safe' write for sockets and pipes.
  138. */
  139. private ssize_t
  140. swrite(int fd, const void *buf, size_t n)
  141. {
  142. ssize_t rv;
  143. size_t rn = n;
  144. do
  145. switch (rv = write(fd, buf, n)) {
  146. case -1:
  147. if (errno == EINTR)
  148. continue;
  149. return -1;
  150. default:
  151. n -= rv;
  152. buf = CAST(const char *, buf) + rv;
  153. break;
  154. }
  155. while (n > 0);
  156. return rn;
  157. }
  158. /*
  159. * `safe' read for sockets and pipes.
  160. */
  161. protected ssize_t
  162. sread(int fd, void *buf, size_t n, int canbepipe)
  163. {
  164. ssize_t rv;
  165. #ifdef FIONREAD
  166. int t = 0;
  167. #endif
  168. size_t rn = n;
  169. if (fd == STDIN_FILENO)
  170. goto nocheck;
  171. #ifdef FIONREAD
  172. if (canbepipe && (ioctl(fd, FIONREAD, &t) == -1 || t == 0)) {
  173. #ifdef FD_ZERO
  174. ssize_t cnt;
  175. for (cnt = 0;; cnt++) {
  176. fd_set check;
  177. struct timeval tout = {0, 100 * 1000};
  178. int selrv;
  179. FD_ZERO(&check);
  180. FD_SET(fd, &check);
  181. /*
  182. * Avoid soft deadlock: do not read if there
  183. * is nothing to read from sockets and pipes.
  184. */
  185. selrv = select(fd + 1, &check, NULL, NULL, &tout);
  186. if (selrv == -1) {
  187. if (errno == EINTR || errno == EAGAIN)
  188. continue;
  189. } else if (selrv == 0 && cnt >= 5) {
  190. return 0;
  191. } else
  192. break;
  193. }
  194. #endif
  195. (void)ioctl(fd, FIONREAD, &t);
  196. }
  197. if (t > 0 && (size_t)t < n) {
  198. n = t;
  199. rn = n;
  200. }
  201. #endif
  202. nocheck:
  203. do
  204. switch ((rv = FINFO_READ_FUNC(fd, buf, n))) {
  205. case -1:
  206. if (errno == EINTR)
  207. continue;
  208. return -1;
  209. case 0:
  210. return rn - n;
  211. default:
  212. n -= rv;
  213. buf = ((char *)buf) + rv;
  214. break;
  215. }
  216. while (n > 0);
  217. return rn;
  218. }
  219. protected int
  220. file_pipe2file(struct magic_set *ms, int fd, const void *startbuf,
  221. size_t nbytes)
  222. {
  223. char buf[4096];
  224. ssize_t r;
  225. int tfd;
  226. (void)strlcpy(buf, "/tmp/file.XXXXXX", sizeof buf);
  227. #ifndef HAVE_MKSTEMP
  228. {
  229. char *ptr = mktemp(buf);
  230. tfd = open(ptr, O_RDWR|O_TRUNC|O_EXCL|O_CREAT, 0600);
  231. r = errno;
  232. (void)unlink(ptr);
  233. errno = r;
  234. }
  235. #else
  236. {
  237. int te;
  238. tfd = mkstemp(buf);
  239. te = errno;
  240. (void)unlink(buf);
  241. errno = te;
  242. }
  243. #endif
  244. if (tfd == -1) {
  245. file_error(ms, errno,
  246. "cannot create temporary file for pipe copy");
  247. return -1;
  248. }
  249. if (swrite(tfd, startbuf, nbytes) != (ssize_t)nbytes)
  250. r = 1;
  251. else {
  252. while ((r = sread(fd, buf, sizeof(buf), 1)) > 0)
  253. if (swrite(tfd, buf, (size_t)r) != r)
  254. break;
  255. }
  256. switch (r) {
  257. case -1:
  258. file_error(ms, errno, "error copying from pipe to temp file");
  259. return -1;
  260. case 0:
  261. break;
  262. default:
  263. file_error(ms, errno, "error while writing to temp file");
  264. return -1;
  265. }
  266. /*
  267. * We duplicate the file descriptor, because fclose on a
  268. * tmpfile will delete the file, but any open descriptors
  269. * can still access the phantom inode.
  270. */
  271. if ((fd = dup2(tfd, fd)) == -1) {
  272. file_error(ms, errno, "could not dup descriptor for temp file");
  273. return -1;
  274. }
  275. (void)close(tfd);
  276. if (FINFO_LSEEK_FUNC(fd, (off_t)0, SEEK_SET) == (off_t)-1) {
  277. file_badseek(ms);
  278. return -1;
  279. }
  280. return fd;
  281. }
  282. #ifdef PHP_FILEINFO_UNCOMPRESS
  283. #ifdef BUILTIN_DECOMPRESS
  284. #define FHCRC (1 << 1)
  285. #define FEXTRA (1 << 2)
  286. #define FNAME (1 << 3)
  287. #define FCOMMENT (1 << 4)
  288. private size_t
  289. uncompressgzipped(struct magic_set *ms, const unsigned char *old,
  290. unsigned char **newch, size_t n)
  291. {
  292. unsigned char flg = old[3];
  293. size_t data_start = 10;
  294. z_stream z;
  295. int rc;
  296. if (flg & FEXTRA) {
  297. if (data_start+1 >= n)
  298. return 0;
  299. data_start += 2 + old[data_start] + old[data_start + 1] * 256;
  300. }
  301. if (flg & FNAME) {
  302. while(data_start < n && old[data_start])
  303. data_start++;
  304. data_start++;
  305. }
  306. if(flg & FCOMMENT) {
  307. while(data_start < n && old[data_start])
  308. data_start++;
  309. data_start++;
  310. }
  311. if(flg & FHCRC)
  312. data_start += 2;
  313. if (data_start >= n)
  314. return 0;
  315. if ((*newch = CAST(unsigned char *, emalloc(HOWMANY + 1))) == NULL) {
  316. return 0;
  317. }
  318. /* XXX: const castaway, via strchr */
  319. z.next_in = (Bytef *)strchr((const char *)old + data_start,
  320. old[data_start]);
  321. z.avail_in = CAST(uint32_t, (n - data_start));
  322. z.next_out = *newch;
  323. z.avail_out = HOWMANY;
  324. z.zalloc = Z_NULL;
  325. z.zfree = Z_NULL;
  326. z.opaque = Z_NULL;
  327. /* LINTED bug in header macro */
  328. rc = inflateInit2(&z, -15);
  329. if (rc != Z_OK) {
  330. file_error(ms, 0, "zlib: %s", z.msg);
  331. return 0;
  332. }
  333. rc = inflate(&z, Z_SYNC_FLUSH);
  334. if (rc != Z_OK && rc != Z_STREAM_END) {
  335. file_error(ms, 0, "zlib: %s", z.msg);
  336. return 0;
  337. }
  338. n = (size_t)z.total_out;
  339. (void)inflateEnd(&z);
  340. /* let's keep the nul-terminate tradition */
  341. (*newch)[n] = '\0';
  342. return n;
  343. }
  344. #endif
  345. private size_t
  346. uncompressbuf(struct magic_set *ms, int fd, size_t method,
  347. const unsigned char *old, unsigned char **newch, size_t n)
  348. {
  349. int fdin[2], fdout[2];
  350. ssize_t r;
  351. pid_t pid;
  352. #ifdef BUILTIN_DECOMPRESS
  353. /* FIXME: This doesn't cope with bzip2 */
  354. if (method == 2)
  355. return uncompressgzipped(ms, old, newch, n);
  356. #endif
  357. (void)fflush(stdout);
  358. (void)fflush(stderr);
  359. if ((fd != -1 && pipe(fdin) == -1) || pipe(fdout) == -1) {
  360. file_error(ms, errno, "cannot create pipe");
  361. return NODATA;
  362. }
  363. switch (pid = fork()) {
  364. case 0: /* child */
  365. (void) close(0);
  366. if (fd != -1) {
  367. (void) dup(fd);
  368. (void) FINFO_LSEEK_FUNC(0, (off_t)0, SEEK_SET);
  369. } else {
  370. (void) dup(fdin[0]);
  371. (void) close(fdin[0]);
  372. (void) close(fdin[1]);
  373. }
  374. (void) close(1);
  375. (void) dup(fdout[1]);
  376. (void) close(fdout[0]);
  377. (void) close(fdout[1]);
  378. #ifndef DEBUG
  379. if (compr[method].silent)
  380. (void)close(2);
  381. #endif
  382. (void)execvp(compr[method].argv[0],
  383. (char *const *)(intptr_t)compr[method].argv);
  384. #ifdef DEBUG
  385. (void)fprintf(stderr, "exec `%s' failed (%s)\n",
  386. compr[method].argv[0], strerror(errno));
  387. #endif
  388. exit(1);
  389. /*NOTREACHED*/
  390. case -1:
  391. file_error(ms, errno, "could not fork");
  392. return NODATA;
  393. default: /* parent */
  394. (void) close(fdout[1]);
  395. if (fd == -1) {
  396. (void) close(fdin[0]);
  397. /*
  398. * fork again, to avoid blocking because both
  399. * pipes filled
  400. */
  401. switch (fork()) {
  402. case 0: /* child */
  403. (void)close(fdout[0]);
  404. if (swrite(fdin[1], old, n) != (ssize_t)n) {
  405. #ifdef DEBUG
  406. (void)fprintf(stderr,
  407. "Write failed (%s)\n",
  408. strerror(errno));
  409. #endif
  410. exit(1);
  411. }
  412. exit(0);
  413. /*NOTREACHED*/
  414. case -1:
  415. #ifdef DEBUG
  416. (void)fprintf(stderr, "Fork failed (%s)\n",
  417. strerror(errno));
  418. #endif
  419. exit(1);
  420. /*NOTREACHED*/
  421. default: /* parent */
  422. break;
  423. }
  424. (void) close(fdin[1]);
  425. fdin[1] = -1;
  426. }
  427. *newch = (unsigned char *) emalloc(HOWMANY + 1);
  428. if ((r = sread(fdout[0], *newch, HOWMANY, 0)) <= 0) {
  429. #ifdef DEBUG
  430. (void)fprintf(stderr, "Read failed (%s)\n",
  431. strerror(errno));
  432. #endif
  433. efree(*newch);
  434. n = 0;
  435. *newch = NULL;
  436. goto err;
  437. } else {
  438. n = r;
  439. }
  440. /* NUL terminate, as every buffer is handled here. */
  441. (*newch)[n] = '\0';
  442. err:
  443. if (fdin[1] != -1)
  444. (void) close(fdin[1]);
  445. (void) close(fdout[0]);
  446. #ifdef WNOHANG
  447. while (waitpid(pid, NULL, WNOHANG) != -1)
  448. continue;
  449. #else
  450. (void)wait(NULL);
  451. #endif
  452. (void) close(fdin[0]);
  453. return n;
  454. }
  455. }
  456. #endif /* if PHP_FILEINFO_UNCOMPRESS */