readcdf.c 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440
  1. /*-
  2. * Copyright (c) 2008 Christos Zoulas
  3. * All rights reserved.
  4. *
  5. * Redistribution and use in source and binary forms, with or without
  6. * modification, are permitted provided that the following conditions
  7. * are met:
  8. * 1. Redistributions of source code must retain the above copyright
  9. * notice, this list of conditions and the following disclaimer.
  10. * 2. Redistributions in binary form must reproduce the above copyright
  11. * notice, this list of conditions and the following disclaimer in the
  12. * documentation and/or other materials provided with the distribution.
  13. *
  14. * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
  15. * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
  16. * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  17. * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
  18. * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  19. * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  20. * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  21. * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  22. * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  23. * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  24. * POSSIBILITY OF SUCH DAMAGE.
  25. */
  26. #include "file.h"
  27. #ifndef lint
  28. FILE_RCSID("@(#)$File: readcdf.c,v 1.40 2014/03/06 15:23:33 christos Exp $")
  29. #endif
  30. #include <stdlib.h>
  31. #ifdef PHP_WIN32
  32. #include "win32/unistd.h"
  33. #else
  34. #include <unistd.h>
  35. #endif
  36. #include <string.h>
  37. #include <time.h>
  38. #include <ctype.h>
  39. #if defined(HAVE_LOCALE_H)
  40. #include <locale.h>
  41. #endif
  42. #include "cdf.h"
  43. #include "magic.h"
  44. #define NOTMIME(ms) (((ms)->flags & MAGIC_MIME) == 0)
  45. static const struct nv {
  46. const char *pattern;
  47. const char *mime;
  48. } app2mime[] = {
  49. { "Word", "msword", },
  50. { "Excel", "vnd.ms-excel", },
  51. { "Powerpoint", "vnd.ms-powerpoint", },
  52. { "Crystal Reports", "x-rpt", },
  53. { "Advanced Installer", "vnd.ms-msi", },
  54. { "InstallShield", "vnd.ms-msi", },
  55. { "Microsoft Patch Compiler", "vnd.ms-msi", },
  56. { "NAnt", "vnd.ms-msi", },
  57. { "Windows Installer", "vnd.ms-msi", },
  58. { NULL, NULL, },
  59. }, name2mime[] = {
  60. { "WordDocument", "msword", },
  61. { "PowerPoint", "vnd.ms-powerpoint", },
  62. { "DigitalSignature", "vnd.ms-msi", },
  63. { NULL, NULL, },
  64. }, name2desc[] = {
  65. { "WordDocument", "Microsoft Office Word",},
  66. { "PowerPoint", "Microsoft PowerPoint", },
  67. { "DigitalSignature", "Microsoft Installer", },
  68. { NULL, NULL, },
  69. };
  70. #ifdef PHP_WIN32
  71. # define strcasestr strstr
  72. #endif
  73. static const struct cv {
  74. uint64_t clsid[2];
  75. const char *mime;
  76. } clsid2mime[] = {
  77. {
  78. #ifdef PHP_WIN32
  79. { 0x00000000000c1084ui64, 0x46000000000000c0ui64 },
  80. #else
  81. { 0x00000000000c1084LLU, 0x46000000000000c0LLU },
  82. #endif
  83. "x-msi",
  84. },
  85. { { 0, 0 },
  86. NULL,
  87. }
  88. }, clsid2desc[] = {
  89. {
  90. #ifdef PHP_WIN32
  91. { 0x00000000000c1084ui64, 0x46000000000000c0ui64 },
  92. #else
  93. { 0x00000000000c1084LLU, 0x46000000000000c0LLU },
  94. #endif
  95. "MSI Installer",
  96. },
  97. { { 0, 0 },
  98. NULL,
  99. }
  100. };
  101. private const char *
  102. cdf_clsid_to_mime(const uint64_t clsid[2], const struct cv *cv)
  103. {
  104. size_t i;
  105. for (i = 0; cv[i].mime != NULL; i++) {
  106. if (clsid[0] == cv[i].clsid[0] && clsid[1] == cv[i].clsid[1])
  107. return cv[i].mime;
  108. }
  109. return NULL;
  110. }
  111. private const char *
  112. cdf_app_to_mime(const char *vbuf, const struct nv *nv)
  113. {
  114. size_t i;
  115. const char *rv = NULL;
  116. (void)setlocale(LC_CTYPE, "C");
  117. for (i = 0; nv[i].pattern != NULL; i++)
  118. if (strcasestr(vbuf, nv[i].pattern) != NULL) {
  119. rv = nv[i].mime;
  120. break;
  121. }
  122. (void)setlocale(LC_CTYPE, "");
  123. return rv;
  124. }
  125. private int
  126. cdf_file_property_info(struct magic_set *ms, const cdf_property_info_t *info,
  127. size_t count, const cdf_directory_t *root_storage)
  128. {
  129. size_t i;
  130. cdf_timestamp_t tp;
  131. struct timeval ts;
  132. char buf[64];
  133. const char *str = NULL;
  134. const char *s;
  135. int len;
  136. memset(&ts, 0, sizeof(ts));
  137. if (!NOTMIME(ms) && root_storage)
  138. str = cdf_clsid_to_mime(root_storage->d_storage_uuid, clsid2mime);
  139. for (i = 0; i < count; i++) {
  140. cdf_print_property_name(buf, sizeof(buf), info[i].pi_id);
  141. switch (info[i].pi_type) {
  142. case CDF_NULL:
  143. break;
  144. case CDF_SIGNED16:
  145. if (NOTMIME(ms) && file_printf(ms, ", %s: %hd", buf,
  146. info[i].pi_s16) == -1)
  147. return -1;
  148. break;
  149. case CDF_SIGNED32:
  150. if (NOTMIME(ms) && file_printf(ms, ", %s: %d", buf,
  151. info[i].pi_s32) == -1)
  152. return -1;
  153. break;
  154. case CDF_UNSIGNED32:
  155. if (NOTMIME(ms) && file_printf(ms, ", %s: %u", buf,
  156. info[i].pi_u32) == -1)
  157. return -1;
  158. break;
  159. case CDF_FLOAT:
  160. if (NOTMIME(ms) && file_printf(ms, ", %s: %g", buf,
  161. info[i].pi_f) == -1)
  162. return -1;
  163. break;
  164. case CDF_DOUBLE:
  165. if (NOTMIME(ms) && file_printf(ms, ", %s: %g", buf,
  166. info[i].pi_d) == -1)
  167. return -1;
  168. break;
  169. case CDF_LENGTH32_STRING:
  170. case CDF_LENGTH32_WSTRING:
  171. len = info[i].pi_str.s_len;
  172. if (len > 1) {
  173. char vbuf[1024];
  174. size_t j, k = 1;
  175. if (info[i].pi_type == CDF_LENGTH32_WSTRING)
  176. k++;
  177. s = info[i].pi_str.s_buf;
  178. for (j = 0; j < sizeof(vbuf) && len--;
  179. j++, s += k) {
  180. if (*s == '\0')
  181. break;
  182. if (isprint((unsigned char)*s))
  183. vbuf[j] = *s;
  184. }
  185. if (j == sizeof(vbuf))
  186. --j;
  187. vbuf[j] = '\0';
  188. if (NOTMIME(ms)) {
  189. if (vbuf[0]) {
  190. if (file_printf(ms, ", %s: %s",
  191. buf, vbuf) == -1)
  192. return -1;
  193. }
  194. } else if (str == NULL && info[i].pi_id ==
  195. CDF_PROPERTY_NAME_OF_APPLICATION) {
  196. str = cdf_app_to_mime(vbuf, app2mime);
  197. }
  198. }
  199. break;
  200. case CDF_FILETIME:
  201. tp = info[i].pi_tp;
  202. if (tp != 0) {
  203. char tbuf[64];
  204. #if defined(PHP_WIN32) && _MSC_VER <= 1500
  205. if (tp < 1000000000000000i64) {
  206. #else
  207. if (tp < 1000000000000000LL) {
  208. #endif
  209. cdf_print_elapsed_time(tbuf,
  210. sizeof(tbuf), tp);
  211. if (NOTMIME(ms) && file_printf(ms,
  212. ", %s: %s", buf, tbuf) == -1)
  213. return -1;
  214. } else {
  215. char *c, *ec;
  216. const time_t sec = ts.tv_sec;
  217. if (cdf_timestamp_to_timespec(&ts, tp) == -1) {
  218. return -1;
  219. }
  220. c = cdf_ctime(&sec, tbuf);
  221. if (c != NULL &&
  222. (ec = strchr(c, '\n')) != NULL)
  223. *ec = '\0';
  224. if (NOTMIME(ms) && file_printf(ms,
  225. ", %s: %s", buf, c) == -1)
  226. return -1;
  227. }
  228. }
  229. break;
  230. case CDF_CLIPBOARD:
  231. break;
  232. default:
  233. return -1;
  234. }
  235. }
  236. if (!NOTMIME(ms)) {
  237. if (str == NULL)
  238. return 0;
  239. if (file_printf(ms, "application/%s", str) == -1)
  240. return -1;
  241. }
  242. return 1;
  243. }
  244. private int
  245. cdf_file_summary_info(struct magic_set *ms, const cdf_header_t *h,
  246. const cdf_stream_t *sst, const cdf_directory_t *root_storage)
  247. {
  248. cdf_summary_info_header_t si;
  249. cdf_property_info_t *info;
  250. size_t count;
  251. int m;
  252. if (cdf_unpack_summary_info(sst, h, &si, &info, &count) == -1)
  253. return -1;
  254. if (NOTMIME(ms)) {
  255. const char *str;
  256. if (file_printf(ms, "Composite Document File V2 Document")
  257. == -1)
  258. return -1;
  259. if (file_printf(ms, ", %s Endian",
  260. si.si_byte_order == 0xfffe ? "Little" : "Big") == -1)
  261. return -2;
  262. switch (si.si_os) {
  263. case 2:
  264. if (file_printf(ms, ", Os: Windows, Version %d.%d",
  265. si.si_os_version & 0xff,
  266. (uint32_t)si.si_os_version >> 8) == -1)
  267. return -2;
  268. break;
  269. case 1:
  270. if (file_printf(ms, ", Os: MacOS, Version %d.%d",
  271. (uint32_t)si.si_os_version >> 8,
  272. si.si_os_version & 0xff) == -1)
  273. return -2;
  274. break;
  275. default:
  276. if (file_printf(ms, ", Os %d, Version: %d.%d", si.si_os,
  277. si.si_os_version & 0xff,
  278. (uint32_t)si.si_os_version >> 8) == -1)
  279. return -2;
  280. break;
  281. }
  282. if (root_storage) {
  283. str = cdf_clsid_to_mime(root_storage->d_storage_uuid, clsid2desc);
  284. if (str)
  285. if (file_printf(ms, ", %s", str) == -1)
  286. return -2;
  287. }
  288. }
  289. m = cdf_file_property_info(ms, info, count, root_storage);
  290. free(info);
  291. return m == -1 ? -2 : m;
  292. }
  293. protected int
  294. file_trycdf(struct magic_set *ms, int fd, const unsigned char *buf,
  295. size_t nbytes)
  296. {
  297. cdf_info_t info;
  298. cdf_header_t h;
  299. cdf_sat_t sat, ssat;
  300. cdf_stream_t sst, scn;
  301. cdf_dir_t dir;
  302. int i;
  303. const char *expn = "";
  304. const char *corrupt = "corrupt: ";
  305. const cdf_directory_t *root_storage;
  306. info.i_fd = fd;
  307. info.i_buf = buf;
  308. info.i_len = nbytes;
  309. if (ms->flags & MAGIC_APPLE)
  310. return 0;
  311. if (cdf_read_header(&info, &h) == -1)
  312. return 0;
  313. #ifdef CDF_DEBUG
  314. cdf_dump_header(&h);
  315. #endif
  316. if ((i = cdf_read_sat(&info, &h, &sat)) == -1) {
  317. expn = "Can't read SAT";
  318. goto out0;
  319. }
  320. #ifdef CDF_DEBUG
  321. cdf_dump_sat("SAT", &sat, CDF_SEC_SIZE(&h));
  322. #endif
  323. if ((i = cdf_read_ssat(&info, &h, &sat, &ssat)) == -1) {
  324. expn = "Can't read SSAT";
  325. goto out1;
  326. }
  327. #ifdef CDF_DEBUG
  328. cdf_dump_sat("SSAT", &ssat, CDF_SHORT_SEC_SIZE(&h));
  329. #endif
  330. if ((i = cdf_read_dir(&info, &h, &sat, &dir)) == -1) {
  331. expn = "Can't read directory";
  332. goto out2;
  333. }
  334. if ((i = cdf_read_short_stream(&info, &h, &sat, &dir, &sst,
  335. &root_storage)) == -1) {
  336. expn = "Cannot read short stream";
  337. goto out3;
  338. }
  339. #ifdef CDF_DEBUG
  340. cdf_dump_dir(&info, &h, &sat, &ssat, &sst, &dir);
  341. #endif
  342. if ((i = cdf_read_summary_info(&info, &h, &sat, &ssat, &sst, &dir,
  343. &scn)) == -1) {
  344. if (errno == ESRCH) {
  345. corrupt = expn;
  346. expn = "No summary info";
  347. } else {
  348. expn = "Cannot read summary info";
  349. }
  350. goto out4;
  351. }
  352. #ifdef CDF_DEBUG
  353. cdf_dump_summary_info(&h, &scn);
  354. #endif
  355. if ((i = cdf_file_summary_info(ms, &h, &scn, root_storage)) < 0)
  356. expn = "Can't expand summary_info";
  357. if (i == 0) {
  358. const char *str = NULL;
  359. cdf_directory_t *d;
  360. char name[__arraycount(d->d_name)];
  361. size_t j, k;
  362. for (j = 0; str == NULL && j < dir.dir_len; j++) {
  363. d = &dir.dir_tab[j];
  364. for (k = 0; k < sizeof(name); k++)
  365. name[k] = (char)cdf_tole2(d->d_name[k]);
  366. str = cdf_app_to_mime(name,
  367. NOTMIME(ms) ? name2desc : name2mime);
  368. }
  369. if (NOTMIME(ms)) {
  370. if (str != NULL) {
  371. if (file_printf(ms, "%s", str) == -1)
  372. return -1;
  373. i = 1;
  374. }
  375. } else {
  376. if (str == NULL)
  377. str = "vnd.ms-office";
  378. if (file_printf(ms, "application/%s", str) == -1)
  379. return -1;
  380. i = 1;
  381. }
  382. }
  383. free(scn.sst_tab);
  384. out4:
  385. free(sst.sst_tab);
  386. out3:
  387. free(dir.dir_tab);
  388. out2:
  389. free(ssat.sat_tab);
  390. out1:
  391. free(sat.sat_tab);
  392. out0:
  393. if (i == -1) {
  394. if (NOTMIME(ms)) {
  395. if (file_printf(ms,
  396. "Composite Document File V2 Document") == -1)
  397. return -1;
  398. if (*expn)
  399. if (file_printf(ms, ", %s%s", corrupt, expn) == -1)
  400. return -1;
  401. } else {
  402. if (file_printf(ms, "application/CDFV2-corrupt") == -1)
  403. return -1;
  404. }
  405. i = 1;
  406. }
  407. return i;
  408. }