archive_entry_link_resolver.c 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447
  1. /*-
  2. * Copyright (c) 2003-2007 Tim Kientzle
  3. * All rights reserved.
  4. *
  5. * Redistribution and use in source and binary forms, with or without
  6. * modification, are permitted provided that the following conditions
  7. * are met:
  8. * 1. Redistributions of source code must retain the above copyright
  9. * notice, this list of conditions and the following disclaimer.
  10. * 2. Redistributions in binary form must reproduce the above copyright
  11. * notice, this list of conditions and the following disclaimer in the
  12. * documentation and/or other materials provided with the distribution.
  13. *
  14. * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
  15. * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  16. * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  17. * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
  18. * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  19. * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  20. * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  21. * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  22. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  23. * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  24. */
  25. #include "archive_platform.h"
  26. __FBSDID("$FreeBSD: head/lib/libarchive/archive_entry_link_resolver.c 201100 2009-12-28 03:05:31Z kientzle $");
  27. #ifdef HAVE_SYS_STAT_H
  28. #include <sys/stat.h>
  29. #endif
  30. #ifdef HAVE_ERRNO_H
  31. #include <errno.h>
  32. #endif
  33. #include <stdio.h>
  34. #ifdef HAVE_STDLIB_H
  35. #include <stdlib.h>
  36. #endif
  37. #ifdef HAVE_STRING_H
  38. #include <string.h>
  39. #endif
  40. #include "archive.h"
  41. #include "archive_entry.h"
  42. /*
  43. * This is mostly a pretty straightforward hash table implementation.
  44. * The only interesting bit is the different strategies used to
  45. * match up links. These strategies match those used by various
  46. * archiving formats:
  47. * tar - content stored with first link, remainder refer back to it.
  48. * This requires us to match each subsequent link up with the
  49. * first appearance.
  50. * cpio - Old cpio just stored body with each link, match-ups were
  51. * implicit. This is trivial.
  52. * new cpio - New cpio only stores body with last link, match-ups
  53. * are implicit. This is actually quite tricky; see the notes
  54. * below.
  55. */
  56. /* Users pass us a format code, we translate that into a strategy here. */
  57. #define ARCHIVE_ENTRY_LINKIFY_LIKE_TAR 0
  58. #define ARCHIVE_ENTRY_LINKIFY_LIKE_MTREE 1
  59. #define ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO 2
  60. #define ARCHIVE_ENTRY_LINKIFY_LIKE_NEW_CPIO 3
  61. /* Initial size of link cache. */
  62. #define links_cache_initial_size 1024
  63. struct links_entry {
  64. struct links_entry *next;
  65. struct links_entry *previous;
  66. struct archive_entry *canonical;
  67. struct archive_entry *entry;
  68. size_t hash;
  69. unsigned int links; /* # links not yet seen */
  70. };
  71. struct archive_entry_linkresolver {
  72. struct links_entry **buckets;
  73. struct links_entry *spare;
  74. unsigned long number_entries;
  75. size_t number_buckets;
  76. int strategy;
  77. };
  78. #define NEXT_ENTRY_DEFERRED 1
  79. #define NEXT_ENTRY_PARTIAL 2
  80. #define NEXT_ENTRY_ALL (NEXT_ENTRY_DEFERRED | NEXT_ENTRY_PARTIAL)
  81. static struct links_entry *find_entry(struct archive_entry_linkresolver *,
  82. struct archive_entry *);
  83. static void grow_hash(struct archive_entry_linkresolver *);
  84. static struct links_entry *insert_entry(struct archive_entry_linkresolver *,
  85. struct archive_entry *);
  86. static struct links_entry *next_entry(struct archive_entry_linkresolver *,
  87. int);
  88. struct archive_entry_linkresolver *
  89. archive_entry_linkresolver_new(void)
  90. {
  91. struct archive_entry_linkresolver *res;
  92. /* Check for positive power-of-two */
  93. if (links_cache_initial_size == 0 ||
  94. (links_cache_initial_size & (links_cache_initial_size - 1)) != 0)
  95. return (NULL);
  96. res = calloc(1, sizeof(struct archive_entry_linkresolver));
  97. if (res == NULL)
  98. return (NULL);
  99. res->number_buckets = links_cache_initial_size;
  100. res->buckets = calloc(res->number_buckets, sizeof(res->buckets[0]));
  101. if (res->buckets == NULL) {
  102. free(res);
  103. return (NULL);
  104. }
  105. return (res);
  106. }
  107. void
  108. archive_entry_linkresolver_set_strategy(struct archive_entry_linkresolver *res,
  109. int fmt)
  110. {
  111. int fmtbase = fmt & ARCHIVE_FORMAT_BASE_MASK;
  112. switch (fmtbase) {
  113. case ARCHIVE_FORMAT_7ZIP:
  114. case ARCHIVE_FORMAT_AR:
  115. case ARCHIVE_FORMAT_ZIP:
  116. res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO;
  117. break;
  118. case ARCHIVE_FORMAT_CPIO:
  119. switch (fmt) {
  120. case ARCHIVE_FORMAT_CPIO_SVR4_NOCRC:
  121. case ARCHIVE_FORMAT_CPIO_SVR4_CRC:
  122. res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_NEW_CPIO;
  123. break;
  124. default:
  125. res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO;
  126. break;
  127. }
  128. break;
  129. case ARCHIVE_FORMAT_MTREE:
  130. res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_MTREE;
  131. break;
  132. case ARCHIVE_FORMAT_ISO9660:
  133. case ARCHIVE_FORMAT_SHAR:
  134. case ARCHIVE_FORMAT_TAR:
  135. case ARCHIVE_FORMAT_XAR:
  136. res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_TAR;
  137. break;
  138. default:
  139. res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO;
  140. break;
  141. }
  142. }
  143. void
  144. archive_entry_linkresolver_free(struct archive_entry_linkresolver *res)
  145. {
  146. struct links_entry *le;
  147. if (res == NULL)
  148. return;
  149. while ((le = next_entry(res, NEXT_ENTRY_ALL)) != NULL)
  150. archive_entry_free(le->entry);
  151. free(res->buckets);
  152. free(res);
  153. }
  154. void
  155. archive_entry_linkify(struct archive_entry_linkresolver *res,
  156. struct archive_entry **e, struct archive_entry **f)
  157. {
  158. struct links_entry *le;
  159. struct archive_entry *t;
  160. *f = NULL; /* Default: Don't return a second entry. */
  161. if (*e == NULL) {
  162. le = next_entry(res, NEXT_ENTRY_DEFERRED);
  163. if (le != NULL) {
  164. *e = le->entry;
  165. le->entry = NULL;
  166. }
  167. return;
  168. }
  169. /* If it has only one link, then we're done. */
  170. if (archive_entry_nlink(*e) == 1)
  171. return;
  172. /* Directories, devices never have hardlinks. */
  173. if (archive_entry_filetype(*e) == AE_IFDIR
  174. || archive_entry_filetype(*e) == AE_IFBLK
  175. || archive_entry_filetype(*e) == AE_IFCHR)
  176. return;
  177. switch (res->strategy) {
  178. case ARCHIVE_ENTRY_LINKIFY_LIKE_TAR:
  179. le = find_entry(res, *e);
  180. if (le != NULL) {
  181. archive_entry_unset_size(*e);
  182. archive_entry_copy_hardlink(*e,
  183. archive_entry_pathname(le->canonical));
  184. } else
  185. insert_entry(res, *e);
  186. return;
  187. case ARCHIVE_ENTRY_LINKIFY_LIKE_MTREE:
  188. le = find_entry(res, *e);
  189. if (le != NULL) {
  190. archive_entry_copy_hardlink(*e,
  191. archive_entry_pathname(le->canonical));
  192. } else
  193. insert_entry(res, *e);
  194. return;
  195. case ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO:
  196. /* This one is trivial. */
  197. return;
  198. case ARCHIVE_ENTRY_LINKIFY_LIKE_NEW_CPIO:
  199. le = find_entry(res, *e);
  200. if (le != NULL) {
  201. /*
  202. * Put the new entry in le, return the
  203. * old entry from le.
  204. */
  205. t = *e;
  206. *e = le->entry;
  207. le->entry = t;
  208. /* Make the old entry into a hardlink. */
  209. archive_entry_unset_size(*e);
  210. archive_entry_copy_hardlink(*e,
  211. archive_entry_pathname(le->canonical));
  212. /* If we ran out of links, return the
  213. * final entry as well. */
  214. if (le->links == 0) {
  215. *f = le->entry;
  216. le->entry = NULL;
  217. }
  218. } else {
  219. /*
  220. * If we haven't seen it, tuck it away
  221. * for future use.
  222. */
  223. le = insert_entry(res, *e);
  224. if (le == NULL)
  225. /* XXX We should return an error code XXX */
  226. return;
  227. le->entry = *e;
  228. *e = NULL;
  229. }
  230. return;
  231. default:
  232. break;
  233. }
  234. return;
  235. }
  236. static struct links_entry *
  237. find_entry(struct archive_entry_linkresolver *res,
  238. struct archive_entry *entry)
  239. {
  240. struct links_entry *le;
  241. size_t hash, bucket;
  242. dev_t dev;
  243. int64_t ino;
  244. /* Free a held entry. */
  245. if (res->spare != NULL) {
  246. archive_entry_free(res->spare->canonical);
  247. archive_entry_free(res->spare->entry);
  248. free(res->spare);
  249. res->spare = NULL;
  250. }
  251. dev = archive_entry_dev(entry);
  252. ino = archive_entry_ino64(entry);
  253. hash = (size_t)(dev ^ ino);
  254. /* Try to locate this entry in the links cache. */
  255. bucket = hash & (res->number_buckets - 1);
  256. for (le = res->buckets[bucket]; le != NULL; le = le->next) {
  257. if (le->hash == hash
  258. && dev == archive_entry_dev(le->canonical)
  259. && ino == archive_entry_ino64(le->canonical)) {
  260. /*
  261. * Decrement link count each time and release
  262. * the entry if it hits zero. This saves
  263. * memory and is necessary for detecting
  264. * missed links.
  265. */
  266. --le->links;
  267. if (le->links > 0)
  268. return (le);
  269. /* Remove it from this hash bucket. */
  270. if (le->previous != NULL)
  271. le->previous->next = le->next;
  272. if (le->next != NULL)
  273. le->next->previous = le->previous;
  274. if (res->buckets[bucket] == le)
  275. res->buckets[bucket] = le->next;
  276. res->number_entries--;
  277. /* Defer freeing this entry. */
  278. res->spare = le;
  279. return (le);
  280. }
  281. }
  282. return (NULL);
  283. }
  284. static struct links_entry *
  285. next_entry(struct archive_entry_linkresolver *res, int mode)
  286. {
  287. struct links_entry *le;
  288. size_t bucket;
  289. /* Free a held entry. */
  290. if (res->spare != NULL) {
  291. archive_entry_free(res->spare->canonical);
  292. archive_entry_free(res->spare->entry);
  293. free(res->spare);
  294. res->spare = NULL;
  295. }
  296. /* Look for next non-empty bucket in the links cache. */
  297. for (bucket = 0; bucket < res->number_buckets; bucket++) {
  298. for (le = res->buckets[bucket]; le != NULL; le = le->next) {
  299. if (le->entry != NULL &&
  300. (mode & NEXT_ENTRY_DEFERRED) == 0)
  301. continue;
  302. if (le->entry == NULL &&
  303. (mode & NEXT_ENTRY_PARTIAL) == 0)
  304. continue;
  305. /* Remove it from this hash bucket. */
  306. if (le->next != NULL)
  307. le->next->previous = le->previous;
  308. if (le->previous != NULL)
  309. le->previous->next = le->next;
  310. else
  311. res->buckets[bucket] = le->next;
  312. res->number_entries--;
  313. /* Defer freeing this entry. */
  314. res->spare = le;
  315. return (le);
  316. }
  317. }
  318. return (NULL);
  319. }
  320. static struct links_entry *
  321. insert_entry(struct archive_entry_linkresolver *res,
  322. struct archive_entry *entry)
  323. {
  324. struct links_entry *le;
  325. size_t hash, bucket;
  326. /* Add this entry to the links cache. */
  327. le = calloc(1, sizeof(struct links_entry));
  328. if (le == NULL)
  329. return (NULL);
  330. le->canonical = archive_entry_clone(entry);
  331. /* If the links cache is getting too full, enlarge the hash table. */
  332. if (res->number_entries > res->number_buckets * 2)
  333. grow_hash(res);
  334. hash = (size_t)(archive_entry_dev(entry) ^ archive_entry_ino64(entry));
  335. bucket = hash & (res->number_buckets - 1);
  336. /* If we could allocate the entry, record it. */
  337. if (res->buckets[bucket] != NULL)
  338. res->buckets[bucket]->previous = le;
  339. res->number_entries++;
  340. le->next = res->buckets[bucket];
  341. le->previous = NULL;
  342. res->buckets[bucket] = le;
  343. le->hash = hash;
  344. le->links = archive_entry_nlink(entry) - 1;
  345. return (le);
  346. }
  347. static void
  348. grow_hash(struct archive_entry_linkresolver *res)
  349. {
  350. struct links_entry *le, **new_buckets;
  351. size_t new_size;
  352. size_t i, bucket;
  353. /* Try to enlarge the bucket list. */
  354. new_size = res->number_buckets * 2;
  355. if (new_size < res->number_buckets)
  356. return;
  357. new_buckets = calloc(new_size, sizeof(struct links_entry *));
  358. if (new_buckets == NULL)
  359. return;
  360. for (i = 0; i < res->number_buckets; i++) {
  361. while (res->buckets[i] != NULL) {
  362. /* Remove entry from old bucket. */
  363. le = res->buckets[i];
  364. res->buckets[i] = le->next;
  365. /* Add entry to new bucket. */
  366. bucket = le->hash & (new_size - 1);
  367. if (new_buckets[bucket] != NULL)
  368. new_buckets[bucket]->previous = le;
  369. le->next = new_buckets[bucket];
  370. le->previous = NULL;
  371. new_buckets[bucket] = le;
  372. }
  373. }
  374. free(res->buckets);
  375. res->buckets = new_buckets;
  376. res->number_buckets = new_size;
  377. }
  378. struct archive_entry *
  379. archive_entry_partial_links(struct archive_entry_linkresolver *res,
  380. unsigned int *links)
  381. {
  382. struct archive_entry *e;
  383. struct links_entry *le;
  384. /* Free a held entry. */
  385. if (res->spare != NULL) {
  386. archive_entry_free(res->spare->canonical);
  387. archive_entry_free(res->spare->entry);
  388. free(res->spare);
  389. res->spare = NULL;
  390. }
  391. le = next_entry(res, NEXT_ENTRY_PARTIAL);
  392. if (le != NULL) {
  393. e = le->canonical;
  394. if (links != NULL)
  395. *links = le->links;
  396. le->canonical = NULL;
  397. } else {
  398. e = NULL;
  399. if (links != NULL)
  400. *links = 0;
  401. }
  402. return (e);
  403. }