bzip2recover.c 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514
  1. /*-----------------------------------------------------------*/
  2. /*--- Block recoverer program for bzip2 ---*/
  3. /*--- bzip2recover.c ---*/
  4. /*-----------------------------------------------------------*/
  5. /* ------------------------------------------------------------------
  6. This file is part of bzip2/libbzip2, a program and library for
  7. lossless, block-sorting data compression.
  8. bzip2/libbzip2 version 1.0.6 of 6 September 2010
  9. Copyright (C) 1996-2010 Julian Seward <jseward@bzip.org>
  10. Please read the WARNING, DISCLAIMER and PATENTS sections in the
  11. README file.
  12. This program is released under the terms of the license contained
  13. in the file LICENSE.
  14. ------------------------------------------------------------------ */
  15. /* This program is a complete hack and should be rewritten properly.
  16. It isn't very complicated. */
  17. #include <stdio.h>
  18. #include <errno.h>
  19. #include <stdlib.h>
  20. #include <string.h>
  21. /* This program records bit locations in the file to be recovered.
  22. That means that if 64-bit ints are not supported, we will not
  23. be able to recover .bz2 files over 512MB (2^32 bits) long.
  24. On GNU supported platforms, we take advantage of the 64-bit
  25. int support to circumvent this problem. Ditto MSVC.
  26. This change occurred in version 1.0.2; all prior versions have
  27. the 512MB limitation.
  28. */
  29. #ifdef __GNUC__
  30. typedef unsigned long long int MaybeUInt64;
  31. # define MaybeUInt64_FMT "%Lu"
  32. #else
  33. #ifdef _MSC_VER
  34. typedef unsigned __int64 MaybeUInt64;
  35. # define MaybeUInt64_FMT "%I64u"
  36. #else
  37. typedef unsigned int MaybeUInt64;
  38. # define MaybeUInt64_FMT "%u"
  39. #endif
  40. #endif
  41. typedef unsigned int UInt32;
  42. typedef int Int32;
  43. typedef unsigned char UChar;
  44. typedef char Char;
  45. typedef unsigned char Bool;
  46. #define True ((Bool)1)
  47. #define False ((Bool)0)
  48. #define BZ_MAX_FILENAME 2000
  49. Char inFileName[BZ_MAX_FILENAME];
  50. Char outFileName[BZ_MAX_FILENAME];
  51. Char progName[BZ_MAX_FILENAME];
  52. MaybeUInt64 bytesOut = 0;
  53. MaybeUInt64 bytesIn = 0;
  54. /*---------------------------------------------------*/
  55. /*--- Header bytes ---*/
  56. /*---------------------------------------------------*/
  57. #define BZ_HDR_B 0x42 /* 'B' */
  58. #define BZ_HDR_Z 0x5a /* 'Z' */
  59. #define BZ_HDR_h 0x68 /* 'h' */
  60. #define BZ_HDR_0 0x30 /* '0' */
  61. /*---------------------------------------------------*/
  62. /*--- I/O errors ---*/
  63. /*---------------------------------------------------*/
  64. /*---------------------------------------------*/
  65. static void readError ( void )
  66. {
  67. fprintf ( stderr,
  68. "%s: I/O error reading `%s', possible reason follows.\n",
  69. progName, inFileName );
  70. perror ( progName );
  71. fprintf ( stderr, "%s: warning: output file(s) may be incomplete.\n",
  72. progName );
  73. exit ( 1 );
  74. }
  75. /*---------------------------------------------*/
  76. static void writeError ( void )
  77. {
  78. fprintf ( stderr,
  79. "%s: I/O error reading `%s', possible reason follows.\n",
  80. progName, inFileName );
  81. perror ( progName );
  82. fprintf ( stderr, "%s: warning: output file(s) may be incomplete.\n",
  83. progName );
  84. exit ( 1 );
  85. }
  86. /*---------------------------------------------*/
  87. static void mallocFail ( Int32 n )
  88. {
  89. fprintf ( stderr,
  90. "%s: malloc failed on request for %d bytes.\n",
  91. progName, n );
  92. fprintf ( stderr, "%s: warning: output file(s) may be incomplete.\n",
  93. progName );
  94. exit ( 1 );
  95. }
  96. /*---------------------------------------------*/
  97. static void tooManyBlocks ( Int32 max_handled_blocks )
  98. {
  99. fprintf ( stderr,
  100. "%s: `%s' appears to contain more than %d blocks\n",
  101. progName, inFileName, max_handled_blocks );
  102. fprintf ( stderr,
  103. "%s: and cannot be handled. To fix, increase\n",
  104. progName );
  105. fprintf ( stderr,
  106. "%s: BZ_MAX_HANDLED_BLOCKS in bzip2recover.c, and recompile.\n",
  107. progName );
  108. exit ( 1 );
  109. }
  110. /*---------------------------------------------------*/
  111. /*--- Bit stream I/O ---*/
  112. /*---------------------------------------------------*/
  113. typedef
  114. struct {
  115. FILE* handle;
  116. Int32 buffer;
  117. Int32 buffLive;
  118. Char mode;
  119. }
  120. BitStream;
  121. /*---------------------------------------------*/
  122. static BitStream* bsOpenReadStream ( FILE* stream )
  123. {
  124. BitStream *bs = malloc ( sizeof(BitStream) );
  125. if (bs == NULL) mallocFail ( sizeof(BitStream) );
  126. bs->handle = stream;
  127. bs->buffer = 0;
  128. bs->buffLive = 0;
  129. bs->mode = 'r';
  130. return bs;
  131. }
  132. /*---------------------------------------------*/
  133. static BitStream* bsOpenWriteStream ( FILE* stream )
  134. {
  135. BitStream *bs = malloc ( sizeof(BitStream) );
  136. if (bs == NULL) mallocFail ( sizeof(BitStream) );
  137. bs->handle = stream;
  138. bs->buffer = 0;
  139. bs->buffLive = 0;
  140. bs->mode = 'w';
  141. return bs;
  142. }
  143. /*---------------------------------------------*/
  144. static void bsPutBit ( BitStream* bs, Int32 bit )
  145. {
  146. if (bs->buffLive == 8) {
  147. Int32 retVal = putc ( (UChar) bs->buffer, bs->handle );
  148. if (retVal == EOF) writeError();
  149. bytesOut++;
  150. bs->buffLive = 1;
  151. bs->buffer = bit & 0x1;
  152. } else {
  153. bs->buffer = ( (bs->buffer << 1) | (bit & 0x1) );
  154. bs->buffLive++;
  155. };
  156. }
  157. /*---------------------------------------------*/
  158. /*--
  159. Returns 0 or 1, or 2 to indicate EOF.
  160. --*/
  161. static Int32 bsGetBit ( BitStream* bs )
  162. {
  163. if (bs->buffLive > 0) {
  164. bs->buffLive --;
  165. return ( ((bs->buffer) >> (bs->buffLive)) & 0x1 );
  166. } else {
  167. Int32 retVal = getc ( bs->handle );
  168. if ( retVal == EOF ) {
  169. if (errno != 0) readError();
  170. return 2;
  171. }
  172. bs->buffLive = 7;
  173. bs->buffer = retVal;
  174. return ( ((bs->buffer) >> 7) & 0x1 );
  175. }
  176. }
  177. /*---------------------------------------------*/
  178. static void bsClose ( BitStream* bs )
  179. {
  180. Int32 retVal;
  181. if ( bs->mode == 'w' ) {
  182. while ( bs->buffLive < 8 ) {
  183. bs->buffLive++;
  184. bs->buffer <<= 1;
  185. };
  186. retVal = putc ( (UChar) (bs->buffer), bs->handle );
  187. if (retVal == EOF) writeError();
  188. bytesOut++;
  189. retVal = fflush ( bs->handle );
  190. if (retVal == EOF) writeError();
  191. }
  192. retVal = fclose ( bs->handle );
  193. if (retVal == EOF) {
  194. if (bs->mode == 'w') writeError(); else readError();
  195. }
  196. free ( bs );
  197. }
  198. /*---------------------------------------------*/
  199. static void bsPutUChar ( BitStream* bs, UChar c )
  200. {
  201. Int32 i;
  202. for (i = 7; i >= 0; i--)
  203. bsPutBit ( bs, (((UInt32) c) >> i) & 0x1 );
  204. }
  205. /*---------------------------------------------*/
  206. static void bsPutUInt32 ( BitStream* bs, UInt32 c )
  207. {
  208. Int32 i;
  209. for (i = 31; i >= 0; i--)
  210. bsPutBit ( bs, (c >> i) & 0x1 );
  211. }
  212. /*---------------------------------------------*/
  213. static Bool endsInBz2 ( Char* name )
  214. {
  215. Int32 n = strlen ( name );
  216. if (n <= 4) return False;
  217. return
  218. (name[n-4] == '.' &&
  219. name[n-3] == 'b' &&
  220. name[n-2] == 'z' &&
  221. name[n-1] == '2');
  222. }
  223. /*---------------------------------------------------*/
  224. /*--- ---*/
  225. /*---------------------------------------------------*/
  226. /* This logic isn't really right when it comes to Cygwin. */
  227. #ifdef _WIN32
  228. # define BZ_SPLIT_SYM '\\' /* path splitter on Windows platform */
  229. #else
  230. # define BZ_SPLIT_SYM '/' /* path splitter on Unix platform */
  231. #endif
  232. #define BLOCK_HEADER_HI 0x00003141UL
  233. #define BLOCK_HEADER_LO 0x59265359UL
  234. #define BLOCK_ENDMARK_HI 0x00001772UL
  235. #define BLOCK_ENDMARK_LO 0x45385090UL
  236. /* Increase if necessary. However, a .bz2 file with > 50000 blocks
  237. would have an uncompressed size of at least 40GB, so the chances
  238. are low you'll need to up this.
  239. */
  240. #define BZ_MAX_HANDLED_BLOCKS 50000
  241. MaybeUInt64 bStart [BZ_MAX_HANDLED_BLOCKS];
  242. MaybeUInt64 bEnd [BZ_MAX_HANDLED_BLOCKS];
  243. MaybeUInt64 rbStart[BZ_MAX_HANDLED_BLOCKS];
  244. MaybeUInt64 rbEnd [BZ_MAX_HANDLED_BLOCKS];
  245. Int32 main ( Int32 argc, Char** argv )
  246. {
  247. FILE* inFile;
  248. FILE* outFile;
  249. BitStream* bsIn, *bsWr;
  250. Int32 b, wrBlock, currBlock, rbCtr;
  251. MaybeUInt64 bitsRead;
  252. UInt32 buffHi, buffLo, blockCRC;
  253. Char* p;
  254. strcpy ( progName, argv[0] );
  255. inFileName[0] = outFileName[0] = 0;
  256. fprintf ( stderr,
  257. "bzip2recover 1.0.6: extracts blocks from damaged .bz2 files.\n" );
  258. if (argc != 2) {
  259. fprintf ( stderr, "%s: usage is `%s damaged_file_name'.\n",
  260. progName, progName );
  261. switch (sizeof(MaybeUInt64)) {
  262. case 8:
  263. fprintf(stderr,
  264. "\trestrictions on size of recovered file: None\n");
  265. break;
  266. case 4:
  267. fprintf(stderr,
  268. "\trestrictions on size of recovered file: 512 MB\n");
  269. fprintf(stderr,
  270. "\tto circumvent, recompile with MaybeUInt64 as an\n"
  271. "\tunsigned 64-bit int.\n");
  272. break;
  273. default:
  274. fprintf(stderr,
  275. "\tsizeof(MaybeUInt64) is not 4 or 8 -- "
  276. "configuration error.\n");
  277. break;
  278. }
  279. exit(1);
  280. }
  281. if (strlen(argv[1]) >= BZ_MAX_FILENAME-20) {
  282. fprintf ( stderr,
  283. "%s: supplied filename is suspiciously (>= %d chars) long. Bye!\n",
  284. progName, (int)strlen(argv[1]) );
  285. exit(1);
  286. }
  287. strcpy ( inFileName, argv[1] );
  288. inFile = fopen ( inFileName, "rb" );
  289. if (inFile == NULL) {
  290. fprintf ( stderr, "%s: can't read `%s'\n", progName, inFileName );
  291. exit(1);
  292. }
  293. bsIn = bsOpenReadStream ( inFile );
  294. fprintf ( stderr, "%s: searching for block boundaries ...\n", progName );
  295. bitsRead = 0;
  296. buffHi = buffLo = 0;
  297. currBlock = 0;
  298. bStart[currBlock] = 0;
  299. rbCtr = 0;
  300. while (True) {
  301. b = bsGetBit ( bsIn );
  302. bitsRead++;
  303. if (b == 2) {
  304. if (bitsRead >= bStart[currBlock] &&
  305. (bitsRead - bStart[currBlock]) >= 40) {
  306. bEnd[currBlock] = bitsRead-1;
  307. if (currBlock > 0)
  308. fprintf ( stderr, " block %d runs from " MaybeUInt64_FMT
  309. " to " MaybeUInt64_FMT " (incomplete)\n",
  310. currBlock, bStart[currBlock], bEnd[currBlock] );
  311. } else
  312. currBlock--;
  313. break;
  314. }
  315. buffHi = (buffHi << 1) | (buffLo >> 31);
  316. buffLo = (buffLo << 1) | (b & 1);
  317. if ( ( (buffHi & 0x0000ffff) == BLOCK_HEADER_HI
  318. && buffLo == BLOCK_HEADER_LO)
  319. ||
  320. ( (buffHi & 0x0000ffff) == BLOCK_ENDMARK_HI
  321. && buffLo == BLOCK_ENDMARK_LO)
  322. ) {
  323. if (bitsRead > 49) {
  324. bEnd[currBlock] = bitsRead-49;
  325. } else {
  326. bEnd[currBlock] = 0;
  327. }
  328. if (currBlock > 0 &&
  329. (bEnd[currBlock] - bStart[currBlock]) >= 130) {
  330. fprintf ( stderr, " block %d runs from " MaybeUInt64_FMT
  331. " to " MaybeUInt64_FMT "\n",
  332. rbCtr+1, bStart[currBlock], bEnd[currBlock] );
  333. rbStart[rbCtr] = bStart[currBlock];
  334. rbEnd[rbCtr] = bEnd[currBlock];
  335. rbCtr++;
  336. }
  337. if (currBlock >= BZ_MAX_HANDLED_BLOCKS)
  338. tooManyBlocks(BZ_MAX_HANDLED_BLOCKS);
  339. currBlock++;
  340. bStart[currBlock] = bitsRead;
  341. }
  342. }
  343. bsClose ( bsIn );
  344. /*-- identified blocks run from 1 to rbCtr inclusive. --*/
  345. if (rbCtr < 1) {
  346. fprintf ( stderr,
  347. "%s: sorry, I couldn't find any block boundaries.\n",
  348. progName );
  349. exit(1);
  350. };
  351. fprintf ( stderr, "%s: splitting into blocks\n", progName );
  352. inFile = fopen ( inFileName, "rb" );
  353. if (inFile == NULL) {
  354. fprintf ( stderr, "%s: can't open `%s'\n", progName, inFileName );
  355. exit(1);
  356. }
  357. bsIn = bsOpenReadStream ( inFile );
  358. /*-- placate gcc's dataflow analyser --*/
  359. blockCRC = 0; bsWr = 0;
  360. bitsRead = 0;
  361. outFile = NULL;
  362. wrBlock = 0;
  363. while (True) {
  364. b = bsGetBit(bsIn);
  365. if (b == 2) break;
  366. buffHi = (buffHi << 1) | (buffLo >> 31);
  367. buffLo = (buffLo << 1) | (b & 1);
  368. if (bitsRead == 47+rbStart[wrBlock])
  369. blockCRC = (buffHi << 16) | (buffLo >> 16);
  370. if (outFile != NULL && bitsRead >= rbStart[wrBlock]
  371. && bitsRead <= rbEnd[wrBlock]) {
  372. bsPutBit ( bsWr, b );
  373. }
  374. bitsRead++;
  375. if (bitsRead == rbEnd[wrBlock]+1) {
  376. if (outFile != NULL) {
  377. bsPutUChar ( bsWr, 0x17 ); bsPutUChar ( bsWr, 0x72 );
  378. bsPutUChar ( bsWr, 0x45 ); bsPutUChar ( bsWr, 0x38 );
  379. bsPutUChar ( bsWr, 0x50 ); bsPutUChar ( bsWr, 0x90 );
  380. bsPutUInt32 ( bsWr, blockCRC );
  381. bsClose ( bsWr );
  382. }
  383. if (wrBlock >= rbCtr) break;
  384. wrBlock++;
  385. } else
  386. if (bitsRead == rbStart[wrBlock]) {
  387. /* Create the output file name, correctly handling leading paths.
  388. (31.10.2001 by Sergey E. Kusikov) */
  389. Char* split;
  390. Int32 ofs, k;
  391. for (k = 0; k < BZ_MAX_FILENAME; k++)
  392. outFileName[k] = 0;
  393. strcpy (outFileName, inFileName);
  394. split = strrchr (outFileName, BZ_SPLIT_SYM);
  395. if (split == NULL) {
  396. split = outFileName;
  397. } else {
  398. ++split;
  399. }
  400. /* Now split points to the start of the basename. */
  401. ofs = split - outFileName;
  402. sprintf (split, "rec%5d", wrBlock+1);
  403. for (p = split; *p != 0; p++) if (*p == ' ') *p = '0';
  404. strcat (outFileName, inFileName + ofs);
  405. if ( !endsInBz2(outFileName)) strcat ( outFileName, ".bz2" );
  406. fprintf ( stderr, " writing block %d to `%s' ...\n",
  407. wrBlock+1, outFileName );
  408. outFile = fopen ( outFileName, "wb" );
  409. if (outFile == NULL) {
  410. fprintf ( stderr, "%s: can't write `%s'\n",
  411. progName, outFileName );
  412. exit(1);
  413. }
  414. bsWr = bsOpenWriteStream ( outFile );
  415. bsPutUChar ( bsWr, BZ_HDR_B );
  416. bsPutUChar ( bsWr, BZ_HDR_Z );
  417. bsPutUChar ( bsWr, BZ_HDR_h );
  418. bsPutUChar ( bsWr, BZ_HDR_0 + 9 );
  419. bsPutUChar ( bsWr, 0x31 ); bsPutUChar ( bsWr, 0x41 );
  420. bsPutUChar ( bsWr, 0x59 ); bsPutUChar ( bsWr, 0x26 );
  421. bsPutUChar ( bsWr, 0x53 ); bsPutUChar ( bsWr, 0x59 );
  422. }
  423. }
  424. fprintf ( stderr, "%s: finished\n", progName );
  425. return 0;
  426. }
  427. /*-----------------------------------------------------------*/
  428. /*--- end bzip2recover.c ---*/
  429. /*-----------------------------------------------------------*/