summaryrefslogtreecommitdiffstats
path: root/bzip2.c
diff options
context:
space:
mode:
authorJulian Seward <jseward@acm.org>1997-08-07 22:13:13 +0200
committerJulian Seward <jseward@acm.org>1997-08-07 22:13:13 +0200
commit33d134030248633ffa7d60c0a35a783c46da034b (patch)
treeb760dc34185dccc7054989c1472478574223cc31 /bzip2.c
downloadbzip2-33d134030248633ffa7d60c0a35a783c46da034b.tar.gz
bzip2-33d134030248633ffa7d60c0a35a783c46da034b.tar.bz2
bzip2-33d134030248633ffa7d60c0a35a783c46da034b.tar.xz
bzip2-0.1bzip2-0.1
Diffstat (limited to 'bzip2.c')
-rw-r--r--bzip2.c4036
1 files changed, 4036 insertions, 0 deletions
diff --git a/bzip2.c b/bzip2.c
new file mode 100644
index 0000000..0fb45fb
--- /dev/null
+++ b/bzip2.c
@@ -0,0 +1,4036 @@
1
2/*-----------------------------------------------------------*/
3/*--- A block-sorting, lossless compressor bzip2.c ---*/
4/*-----------------------------------------------------------*/
5
6/*--
7 This program is bzip2, a lossless, block-sorting data compressor,
8 version 0.1pl0, dated 17-Aug-1997.
9
10 Copyright (C) 1996, 1997 by Julian Seward.
11 Guildford, Surrey, UK
12 email: jseward@acm.org
13
14 This program is free software; you can redistribute it and/or modify
15 it under the terms of the GNU General Public License as published by
16 the Free Software Foundation; either version 2 of the License, or
17 (at your option) any later version.
18
19 This program is distributed in the hope that it will be useful,
20 but WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 GNU General Public License for more details.
23
24 You should have received a copy of the GNU General Public License
25 along with this program; if not, write to the Free Software
26 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
27
28 The GNU General Public License is contained in the file LICENSE.
29
30 This program is based on (at least) the work of:
31 Mike Burrows
32 David Wheeler
33 Peter Fenwick
34 Alistair Moffat
35 Radford Neal
36 Ian H. Witten
37 Robert Sedgewick
38 Jon L. Bentley
39
40 For more information on these sources, see the file ALGORITHMS.
41--*/
42
43/*----------------------------------------------------*/
44/*--- IMPORTANT ---*/
45/*----------------------------------------------------*/
46
47/*--
48 WARNING:
49 This program (attempts to) compress data by performing several
50 non-trivial transformations on it. Unless you are 100% familiar
51 with *all* the algorithms contained herein, and with the
52 consequences of modifying them, you should NOT meddle with the
53 compression or decompression machinery. Incorrect changes can
54 and very likely *will* lead to disasterous loss of data.
55
56 DISCLAIMER:
57 I TAKE NO RESPONSIBILITY FOR ANY LOSS OF DATA ARISING FROM THE
58 USE OF THIS PROGRAM, HOWSOEVER CAUSED.
59
60 Every compression of a file implies an assumption that the
61 compressed file can be decompressed to reproduce the original.
62 Great efforts in design, coding and testing have been made to
63 ensure that this program works correctly. However, the
64 complexity of the algorithms, and, in particular, the presence
65 of various special cases in the code which occur with very low
66 but non-zero probability make it impossible to rule out the
67 possibility of bugs remaining in the program. DO NOT COMPRESS
68 ANY DATA WITH THIS PROGRAM UNLESS YOU ARE PREPARED TO ACCEPT THE
69 POSSIBILITY, HOWEVER SMALL, THAT THE DATA WILL NOT BE RECOVERABLE.
70
71 That is not to say this program is inherently unreliable.
72 Indeed, I very much hope the opposite is true. bzip2 has been
73 carefully constructed and extensively tested.
74--*/
75
76
77
78/*----------------------------------------------------*/
79/*--- and now for something much more pleasant :-) ---*/
80/*----------------------------------------------------*/
81
82/*---------------------------------------------*/
83/*--
84 Place a 1 beside your platform, and 0 elsewhere.
85--*/
86
87/*--
88 Generic 32-bit Unix.
89 Also works on 64-bit Unix boxes.
90--*/
91#define BZ_UNIX 1
92
93/*--
94 Win32, as seen by Jacob Navia's excellent
95 port of (Chris Fraser & David Hanson)'s excellent
96 lcc compiler.
97--*/
98#define BZ_LCCWIN32 0
99
100
101
102/*---------------------------------------------*/
103/*--
104 Some stuff for all platforms.
105--*/
106
107#include <stdio.h>
108#include <stdlib.h>
109#if DEBUG
110 #include <assert.h>
111#endif
112#include <string.h>
113#include <signal.h>
114#include <errno.h>
115#include <math.h>
116
117#define ERROR_IF_EOF(i) { if ((i) == EOF) ioError(); }
118#define ERROR_IF_NOT_ZERO(i) { if ((i) != 0) ioError(); }
119#define ERROR_IF_MINUS_ONE(i) { if ((i) == (-1)) ioError(); }
120
121
122/*---------------------------------------------*/
123/*--
124 Platform-specific stuff.
125--*/
126
127#if BZ_UNIX
128 #include <utime.h>
129 #include <unistd.h>
130 #include <malloc.h>
131 #include <sys/stat.h>
132 #include <sys/times.h>
133
134 #define Int32 int
135 #define UInt32 unsigned int
136 #define Char char
137 #define UChar unsigned char
138 #define Int16 short
139 #define UInt16 unsigned short
140
141 #define PATH_SEP '/'
142 #define MY_LSTAT lstat
143 #define MY_S_IFREG S_ISREG
144 #define MY_STAT stat
145
146 #define APPEND_FILESPEC(root, name) \
147 root=snocString((root), (name))
148
149 #define SET_BINARY_MODE(fd) /**/
150
151 /*--
152 You should try very hard to persuade your C compiler
153 to inline the bits marked INLINE. Otherwise bzip2 will
154 run rather slowly. gcc version 2.x is recommended.
155 --*/
156 #ifdef __GNUC__
157 #define INLINE inline
158 #define NORETURN __attribute__ ((noreturn))
159 #else
160 #define INLINE /**/
161 #define NORETURN /**/
162 #endif
163#endif
164
165
166
167#if BZ_LCCWIN32
168 #include <io.h>
169 #include <fcntl.h>
170 #include <sys\stat.h>
171
172 #define Int32 int
173 #define UInt32 unsigned int
174 #define Int16 short
175 #define UInt16 unsigned short
176 #define Char char
177 #define UChar unsigned char
178
179 #define INLINE /**/
180 #define NORETURN /**/
181 #define PATH_SEP '\\'
182 #define MY_LSTAT _stat
183 #define MY_STAT _stat
184 #define MY_S_IFREG(x) ((x) & _S_IFREG)
185
186 #if 0
187 /*-- lcc-win32 seems to expand wildcards itself --*/
188 #define APPEND_FILESPEC(root, spec) \
189 do { \
190 if ((spec)[0] == '-') { \
191 root = snocString((root), (spec)); \
192 } else { \
193 struct _finddata_t c_file; \
194 long hFile; \
195 hFile = _findfirst((spec), &c_file); \
196 if ( hFile == -1L ) { \
197 root = snocString ((root), (spec)); \
198 } else { \
199 int anInt = 0; \
200 while ( anInt == 0 ) { \
201 root = snocString((root), \
202 &c_file.name[0]); \
203 anInt = _findnext(hFile, &c_file); \
204 } \
205 } \
206 } \
207 } while ( 0 )
208 #else
209 #define APPEND_FILESPEC(root, name) \
210 root = snocString ((root), (name))
211 #endif
212
213 #define SET_BINARY_MODE(fd) \
214 do { \
215 int retVal = setmode ( fileno ( fd ), \
216 O_BINARY ); \
217 ERROR_IF_MINUS_ONE ( retVal ); \
218 } while ( 0 )
219
220#endif
221
222
223/*---------------------------------------------*/
224/*--
225 Some more stuff for all platforms :-)
226--*/
227
228#define Bool unsigned char
229#define True 1
230#define False 0
231
232/*--
233 IntNative is your platform's `native' int size.
234 Only here to avoid probs with 64-bit platforms.
235--*/
236#define IntNative int
237
238
239/*--
240 change to 1, or compile with -DDEBUG=1 to debug
241--*/
242#ifndef DEBUG
243#define DEBUG 0
244#endif
245
246
247/*---------------------------------------------------*/
248/*--- ---*/
249/*---------------------------------------------------*/
250
251/*--
252 Implementation notes, July 1997
253 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
254
255 Memory allocation
256 ~~~~~~~~~~~~~~~~~
257 All large data structures are allocated on the C heap,
258 for better or for worse. That includes the various
259 arrays of pointers, striped words, bytes, frequency
260 tables and buffers for compression and decompression.
261
262 bzip2 can operate at various block-sizes, ranging from
263 100k to 900k in 100k steps, and it allocates only as
264 much as it needs to. When compressing, we know from the
265 command-line options what the block-size is going to be,
266 so all allocation can be done at start-up; if that
267 succeeds, there can be no further allocation problems.
268
269 Decompression is more complicated. Each compressed file
270 contains, in its header, a byte indicating the block
271 size used for compression. This means bzip2 potentially
272 needs to reallocate memory for each file it deals with,
273 which in turn opens the possibility for a memory allocation
274 failure part way through a run of files, by encountering
275 a file requiring a much larger block size than all the
276 ones preceding it.
277
278 The policy is to simply give up if a memory allocation
279 failure occurs. During decompression, it would be
280 possible to move on to subsequent files in the hope that
281 some might ask for a smaller block size, but the
282 complications for doing this seem more trouble than they
283 are worth.
284
285
286 Compressed file formats
287 ~~~~~~~~~~~~~~~~~~~~~~~
288 [This is now entirely different from both 0.21, and from
289 any previous Huffman-coded variant of bzip.
290 See the associated file bzip2.txt for details.]
291
292
293 Error conditions
294 ~~~~~~~~~~~~~~~~
295 Dealing with error conditions is the least satisfactory
296 aspect of bzip2. The policy is to try and leave the
297 filesystem in a consistent state, then quit, even if it
298 means not processing some of the files mentioned in the
299 command line. `A consistent state' means that a file
300 exists either in its compressed or uncompressed form,
301 but not both. This boils down to the rule `delete the
302 output file if an error condition occurs, leaving the
303 input intact'. Input files are only deleted when we can
304 be pretty sure the output file has been written and
305 closed successfully.
306
307 Errors are a dog because there's so many things to
308 deal with. The following can happen mid-file, and
309 require cleaning up.
310
311 internal `panics' -- indicating a bug
312 corrupted or inconsistent compressed file
313 can't allocate enough memory to decompress this file
314 I/O error reading/writing/opening/closing
315 signal catches -- Control-C, SIGTERM, SIGHUP.
316
317 Other conditions, primarily pertaining to file names,
318 can be checked in-between files, which makes dealing
319 with them easier.
320--*/
321
322
323
324/*---------------------------------------------------*/
325/*--- Misc (file handling) data decls ---*/
326/*---------------------------------------------------*/
327
328UInt32 bytesIn, bytesOut;
329Int32 verbosity;
330Bool keepInputFiles, smallMode, testFailsExist;
331UInt32 globalCrc;
332Int32 numFileNames, numFilesProcessed;
333
334
335/*-- source modes; F==file, I==stdin, O==stdout --*/
336#define SM_I2O 1
337#define SM_F2O 2
338#define SM_F2F 3
339
340/*-- operation modes --*/
341#define OM_Z 1
342#define OM_UNZ 2
343#define OM_TEST 3
344
345Int32 opMode;
346Int32 srcMode;
347
348
349Int32 longestFileName;
350Char inName[1024];
351Char outName[1024];
352Char *progName;
353Char progNameReally[1024];
354FILE *outputHandleJustInCase;
355
356void panic ( Char* ) NORETURN;
357void ioError ( void ) NORETURN;
358void compressOutOfMemory ( Int32, Int32 ) NORETURN;
359void uncompressOutOfMemory ( Int32, Int32 ) NORETURN;
360void blockOverrun ( void ) NORETURN;
361void badBlockHeader ( void ) NORETURN;
362void badBGLengths ( void ) NORETURN;
363void crcError ( UInt32, UInt32 ) NORETURN;
364void bitStreamEOF ( void ) NORETURN;
365void cleanUpAndFail ( Int32 ) NORETURN;
366void compressedStreamEOF ( void ) NORETURN;
367
368void* myMalloc ( Int32 );
369
370
371
372/*---------------------------------------------------*/
373/*--- Data decls for the front end ---*/
374/*---------------------------------------------------*/
375
376/*--
377 The overshoot bytes allow us to avoid most of
378 the cost of pointer renormalisation during
379 comparison of rotations in sorting.
380 The figure of 20 is derived as follows:
381 qSort3 allows an overshoot of up to 10.
382 It then calls simpleSort, which calls
383 fullGtU, also with max overshoot 10.
384 fullGtU does up to 10 comparisons without
385 renormalising, giving 10+10 == 20.
386--*/
387#define NUM_OVERSHOOT_BYTES 20
388
389/*--
390 These are the main data structures for
391 the Burrows-Wheeler transform.
392--*/
393
394/*--
395 Pointers to compression and decompression
396 structures. Set by
397 allocateCompressStructures and
398 setDecompressStructureSizes
399
400 The structures are always set to be suitable
401 for a block of size 100000 * blockSize100k.
402--*/
403UChar *block; /*-- compress --*/
404UInt16 *quadrant; /*-- compress --*/
405Int32 *zptr; /*-- compress --*/
406UInt16 *szptr; /*-- overlays zptr ---*/
407Int32 *ftab; /*-- compress --*/
408
409UInt16 *ll16; /*-- small decompress --*/
410UChar *ll4; /*-- small decompress --*/
411
412Int32 *tt; /*-- fast decompress --*/
413UChar *ll8; /*-- fast decompress --*/
414
415
416/*--
417 freq table collected to save a pass over the data
418 during decompression.
419--*/
420Int32 unzftab[256];
421
422
423/*--
424 index of the last char in the block, so
425 the block size == last + 1.
426--*/
427Int32 last;
428
429
430/*--
431 index in zptr[] of original string after sorting.
432--*/
433Int32 origPtr;
434
435
436/*--
437 always: in the range 0 .. 9.
438 The current block size is 100000 * this number.
439--*/
440Int32 blockSize100k;
441
442
443/*--
444 Used when sorting. If too many long comparisons
445 happen, we stop sorting, randomise the block
446 slightly, and try again.
447--*/
448
449Int32 workFactor;
450Int32 workDone;
451Int32 workLimit;
452Bool blockRandomised;
453Bool firstAttempt;
454Int32 nBlocksRandomised;
455
456
457
458/*---------------------------------------------------*/
459/*--- Data decls for the back end ---*/
460/*---------------------------------------------------*/
461
462#define MAX_ALPHA_SIZE 258
463#define MAX_CODE_LEN 23
464
465#define RUNA 0
466#define RUNB 1
467
468#define N_GROUPS 6
469#define G_SIZE 50
470#define N_ITERS 4
471
472#define MAX_SELECTORS (2 + (900000 / G_SIZE))
473
474Bool inUse[256];
475Int32 nInUse;
476
477UChar seqToUnseq[256];
478UChar unseqToSeq[256];
479
480UChar selector [MAX_SELECTORS];
481UChar selectorMtf[MAX_SELECTORS];
482
483Int32 nMTF;
484
485Int32 mtfFreq[MAX_ALPHA_SIZE];
486
487UChar len [N_GROUPS][MAX_ALPHA_SIZE];
488
489/*-- decompress only --*/
490Int32 limit [N_GROUPS][MAX_ALPHA_SIZE];
491Int32 base [N_GROUPS][MAX_ALPHA_SIZE];
492Int32 perm [N_GROUPS][MAX_ALPHA_SIZE];
493Int32 minLens[N_GROUPS];
494
495/*-- compress only --*/
496Int32 code [N_GROUPS][MAX_ALPHA_SIZE];
497Int32 rfreq[N_GROUPS][MAX_ALPHA_SIZE];
498
499
500/*---------------------------------------------------*/
501/*--- 32-bit CRC grunge ---*/
502/*---------------------------------------------------*/
503
504/*--
505 I think this is an implementation of the AUTODIN-II,
506 Ethernet & FDDI 32-bit CRC standard. Vaguely derived
507 from code by Rob Warnock, in Section 51 of the
508 comp.compression FAQ.
509--*/
510
511UInt32 crc32Table[256] = {
512
513 /*-- Ugly, innit? --*/
514
515 0x00000000L, 0x04c11db7L, 0x09823b6eL, 0x0d4326d9L,
516 0x130476dcL, 0x17c56b6bL, 0x1a864db2L, 0x1e475005L,
517 0x2608edb8L, 0x22c9f00fL, 0x2f8ad6d6L, 0x2b4bcb61L,
518 0x350c9b64L, 0x31cd86d3L, 0x3c8ea00aL, 0x384fbdbdL,
519 0x4c11db70L, 0x48d0c6c7L, 0x4593e01eL, 0x4152fda9L,
520 0x5f15adacL, 0x5bd4b01bL, 0x569796c2L, 0x52568b75L,
521 0x6a1936c8L, 0x6ed82b7fL, 0x639b0da6L, 0x675a1011L,
522 0x791d4014L, 0x7ddc5da3L, 0x709f7b7aL, 0x745e66cdL,
523 0x9823b6e0L, 0x9ce2ab57L, 0x91a18d8eL, 0x95609039L,
524 0x8b27c03cL, 0x8fe6dd8bL, 0x82a5fb52L, 0x8664e6e5L,
525 0xbe2b5b58L, 0xbaea46efL, 0xb7a96036L, 0xb3687d81L,
526 0xad2f2d84L, 0xa9ee3033L, 0xa4ad16eaL, 0xa06c0b5dL,
527 0xd4326d90L, 0xd0f37027L, 0xddb056feL, 0xd9714b49L,
528 0xc7361b4cL, 0xc3f706fbL, 0xceb42022L, 0xca753d95L,
529 0xf23a8028L, 0xf6fb9d9fL, 0xfbb8bb46L, 0xff79a6f1L,
530 0xe13ef6f4L, 0xe5ffeb43L, 0xe8bccd9aL, 0xec7dd02dL,
531 0x34867077L, 0x30476dc0L, 0x3d044b19L, 0x39c556aeL,
532 0x278206abL, 0x23431b1cL, 0x2e003dc5L, 0x2ac12072L,
533 0x128e9dcfL, 0x164f8078L, 0x1b0ca6a1L, 0x1fcdbb16L,
534 0x018aeb13L, 0x054bf6a4L, 0x0808d07dL, 0x0cc9cdcaL,
535 0x7897ab07L, 0x7c56b6b0L, 0x71159069L, 0x75d48ddeL,
536 0x6b93dddbL, 0x6f52c06cL, 0x6211e6b5L, 0x66d0fb02L,
537 0x5e9f46bfL, 0x5a5e5b08L, 0x571d7dd1L, 0x53dc6066L,
538 0x4d9b3063L, 0x495a2dd4L, 0x44190b0dL, 0x40d816baL,
539 0xaca5c697L, 0xa864db20L, 0xa527fdf9L, 0xa1e6e04eL,
540 0xbfa1b04bL, 0xbb60adfcL, 0xb6238b25L, 0xb2e29692L,
541 0x8aad2b2fL, 0x8e6c3698L, 0x832f1041L, 0x87ee0df6L,
542 0x99a95df3L, 0x9d684044L, 0x902b669dL, 0x94ea7b2aL,
543 0xe0b41de7L, 0xe4750050L, 0xe9362689L, 0xedf73b3eL,
544 0xf3b06b3bL, 0xf771768cL, 0xfa325055L, 0xfef34de2L,
545 0xc6bcf05fL, 0xc27dede8L, 0xcf3ecb31L, 0xcbffd686L,
546 0xd5b88683L, 0xd1799b34L, 0xdc3abdedL, 0xd8fba05aL,
547 0x690ce0eeL, 0x6dcdfd59L, 0x608edb80L, 0x644fc637L,
548 0x7a089632L, 0x7ec98b85L, 0x738aad5cL, 0x774bb0ebL,
549 0x4f040d56L, 0x4bc510e1L, 0x46863638L, 0x42472b8fL,
550 0x5c007b8aL, 0x58c1663dL, 0x558240e4L, 0x51435d53L,
551 0x251d3b9eL, 0x21dc2629L, 0x2c9f00f0L, 0x285e1d47L,
552 0x36194d42L, 0x32d850f5L, 0x3f9b762cL, 0x3b5a6b9bL,
553 0x0315d626L, 0x07d4cb91L, 0x0a97ed48L, 0x0e56f0ffL,
554 0x1011a0faL, 0x14d0bd4dL, 0x19939b94L, 0x1d528623L,
555 0xf12f560eL, 0xf5ee4bb9L, 0xf8ad6d60L, 0xfc6c70d7L,
556 0xe22b20d2L, 0xe6ea3d65L, 0xeba91bbcL, 0xef68060bL,
557 0xd727bbb6L, 0xd3e6a601L, 0xdea580d8L, 0xda649d6fL,
558 0xc423cd6aL, 0xc0e2d0ddL, 0xcda1f604L, 0xc960ebb3L,
559 0xbd3e8d7eL, 0xb9ff90c9L, 0xb4bcb610L, 0xb07daba7L,
560 0xae3afba2L, 0xaafbe615L, 0xa7b8c0ccL, 0xa379dd7bL,
561 0x9b3660c6L, 0x9ff77d71L, 0x92b45ba8L, 0x9675461fL,
562 0x8832161aL, 0x8cf30badL, 0x81b02d74L, 0x857130c3L,
563 0x5d8a9099L, 0x594b8d2eL, 0x5408abf7L, 0x50c9b640L,
564 0x4e8ee645L, 0x4a4ffbf2L, 0x470cdd2bL, 0x43cdc09cL,
565 0x7b827d21L, 0x7f436096L, 0x7200464fL, 0x76c15bf8L,
566 0x68860bfdL, 0x6c47164aL, 0x61043093L, 0x65c52d24L,
567 0x119b4be9L, 0x155a565eL, 0x18197087L, 0x1cd86d30L,
568 0x029f3d35L, 0x065e2082L, 0x0b1d065bL, 0x0fdc1becL,
569 0x3793a651L, 0x3352bbe6L, 0x3e119d3fL, 0x3ad08088L,
570 0x2497d08dL, 0x2056cd3aL, 0x2d15ebe3L, 0x29d4f654L,
571 0xc5a92679L, 0xc1683bceL, 0xcc2b1d17L, 0xc8ea00a0L,
572 0xd6ad50a5L, 0xd26c4d12L, 0xdf2f6bcbL, 0xdbee767cL,
573 0xe3a1cbc1L, 0xe760d676L, 0xea23f0afL, 0xeee2ed18L,
574 0xf0a5bd1dL, 0xf464a0aaL, 0xf9278673L, 0xfde69bc4L,
575 0x89b8fd09L, 0x8d79e0beL, 0x803ac667L, 0x84fbdbd0L,
576 0x9abc8bd5L, 0x9e7d9662L, 0x933eb0bbL, 0x97ffad0cL,
577 0xafb010b1L, 0xab710d06L, 0xa6322bdfL, 0xa2f33668L,
578 0xbcb4666dL, 0xb8757bdaL, 0xb5365d03L, 0xb1f740b4L
579};
580
581
582/*---------------------------------------------*/
583void initialiseCRC ( void )
584{
585 globalCrc = 0xffffffffL;
586}
587
588
589/*---------------------------------------------*/
590UInt32 getFinalCRC ( void )
591{
592 return ~globalCrc;
593}
594
595
596/*---------------------------------------------*/
597UInt32 getGlobalCRC ( void )
598{
599 return globalCrc;
600}
601
602
603/*---------------------------------------------*/
604void setGlobalCRC ( UInt32 newCrc )
605{
606 globalCrc = newCrc;
607}
608
609
610/*---------------------------------------------*/
611#define UPDATE_CRC(crcVar,cha) \
612{ \
613 crcVar = (crcVar << 8) ^ \
614 crc32Table[(crcVar >> 24) ^ \
615 ((UChar)cha)]; \
616}
617
618
619/*---------------------------------------------------*/
620/*--- Bit stream I/O ---*/
621/*---------------------------------------------------*/
622
623
624UInt32 bsBuff;
625Int32 bsLive;
626FILE* bsStream;
627Bool bsWriting;
628
629
630/*---------------------------------------------*/
631void bsSetStream ( FILE* f, Bool wr )
632{
633 if (bsStream != NULL) panic ( "bsSetStream" );
634 bsStream = f;
635 bsLive = 0;
636 bsBuff = 0;
637 bytesOut = 0;
638 bytesIn = 0;
639 bsWriting = wr;
640}
641
642
643/*---------------------------------------------*/
644void bsFinishedWithStream ( void )
645{
646 if (bsWriting)
647 while (bsLive > 0) {
648 fputc ( (UChar)(bsBuff >> 24), bsStream );
649 bsBuff <<= 8;
650 bsLive -= 8;
651 bytesOut++;
652 }
653 bsStream = NULL;
654}
655
656
657/*---------------------------------------------*/
658#define bsNEEDR(nz) \
659{ \
660 while (bsLive < nz) { \
661 Int32 zzi = fgetc ( bsStream ); \
662 if (zzi == EOF) compressedStreamEOF(); \
663 bsBuff = (bsBuff << 8) | (zzi & 0xffL); \
664 bsLive += 8; \
665 } \
666}
667
668
669/*---------------------------------------------*/
670#define bsNEEDW(nz) \
671{ \
672 while (bsLive >= 8) { \
673 fputc ( (UChar)(bsBuff >> 24), \
674 bsStream ); \
675 bsBuff <<= 8; \
676 bsLive -= 8; \
677 bytesOut++; \
678 } \
679}
680
681
682/*---------------------------------------------*/
683#define bsR1(vz) \
684{ \
685 bsNEEDR(1); \
686 vz = (bsBuff >> (bsLive-1)) & 1; \
687 bsLive--; \
688}
689
690
691/*---------------------------------------------*/
692INLINE UInt32 bsR ( Int32 n )
693{
694 UInt32 v;
695 bsNEEDR ( n );
696 v = (bsBuff >> (bsLive-n)) & ((1 << n)-1);
697 bsLive -= n;
698 return v;
699}
700
701
702/*---------------------------------------------*/
703INLINE void bsW ( Int32 n, UInt32 v )
704{
705 bsNEEDW ( n );
706 bsBuff |= (v << (32 - bsLive - n));
707 bsLive += n;
708}
709
710
711/*---------------------------------------------*/
712UChar bsGetUChar ( void )
713{
714 return (UChar)bsR(8);
715}
716
717
718/*---------------------------------------------*/
719void bsPutUChar ( UChar c )
720{
721 bsW(8, (UInt32)c );
722}
723
724
725/*---------------------------------------------*/
726Int32 bsGetUInt32 ( void )
727{
728 UInt32 u;
729 u = 0;
730 u = (u << 8) | bsR(8);
731 u = (u << 8) | bsR(8);
732 u = (u << 8) | bsR(8);
733 u = (u << 8) | bsR(8);
734 return u;
735}
736
737
738/*---------------------------------------------*/
739UInt32 bsGetIntVS ( UInt32 numBits )
740{
741 return (UInt32)bsR(numBits);
742}
743
744
745/*---------------------------------------------*/
746UInt32 bsGetInt32 ( void )
747{
748 return (Int32)bsGetUInt32();
749}
750
751
752/*---------------------------------------------*/
753void bsPutUInt32 ( UInt32 u )
754{
755 bsW ( 8, (u >> 24) & 0xffL );
756 bsW ( 8, (u >> 16) & 0xffL );
757 bsW ( 8, (u >> 8) & 0xffL );
758 bsW ( 8, u & 0xffL );
759}
760
761
762/*---------------------------------------------*/
763void bsPutInt32 ( Int32 c )
764{
765 bsPutUInt32 ( (UInt32)c );
766}
767
768
769/*---------------------------------------------*/
770void bsPutIntVS ( Int32 numBits, UInt32 c )
771{
772 bsW ( numBits, c );
773}
774
775
776/*---------------------------------------------------*/
777/*--- Huffman coding low-level stuff ---*/
778/*---------------------------------------------------*/
779
780#define WEIGHTOF(zz0) ((zz0) & 0xffffff00)
781#define DEPTHOF(zz1) ((zz1) & 0x000000ff)
782#define MYMAX(zz2,zz3) ((zz2) > (zz3) ? (zz2) : (zz3))
783
784#define ADDWEIGHTS(zw1,zw2) \
785 (WEIGHTOF(zw1)+WEIGHTOF(zw2)) | \
786 (1 + MYMAX(DEPTHOF(zw1),DEPTHOF(zw2)))
787
788#define UPHEAP(z) \
789{ \
790 Int32 zz, tmp; \
791 zz = z; tmp = heap[zz]; \
792 while (weight[tmp] < weight[heap[zz >> 1]]) { \
793 heap[zz] = heap[zz >> 1]; \
794 zz >>= 1; \
795 } \
796 heap[zz] = tmp; \
797}
798
799#define DOWNHEAP(z) \
800{ \
801 Int32 zz, yy, tmp; \
802 zz = z; tmp = heap[zz]; \
803 while (True) { \
804 yy = zz << 1; \
805 if (yy > nHeap) break; \
806 if (yy < nHeap && \
807 weight[heap[yy+1]] < weight[heap[yy]]) \
808 yy++; \
809 if (weight[tmp] < weight[heap[yy]]) break; \
810 heap[zz] = heap[yy]; \
811 zz = yy; \
812 } \
813 heap[zz] = tmp; \
814}
815
816
817/*---------------------------------------------*/
818void hbMakeCodeLengths ( UChar *len,
819 Int32 *freq,
820 Int32 alphaSize,
821 Int32 maxLen )
822{
823 /*--
824 Nodes and heap entries run from 1. Entry 0
825 for both the heap and nodes is a sentinel.
826 --*/
827 Int32 nNodes, nHeap, n1, n2, i, j, k;
828 Bool tooLong;
829
830 Int32 heap [ MAX_ALPHA_SIZE + 2 ];
831 Int32 weight [ MAX_ALPHA_SIZE * 2 ];
832 Int32 parent [ MAX_ALPHA_SIZE * 2 ];
833
834 for (i = 0; i < alphaSize; i++)
835 weight[i+1] = (freq[i] == 0 ? 1 : freq[i]) << 8;
836
837 while (True) {
838
839 nNodes = alphaSize;
840 nHeap = 0;
841
842 heap[0] = 0;
843 weight[0] = 0;
844 parent[0] = -2;
845
846 for (i = 1; i <= alphaSize; i++) {
847 parent[i] = -1;
848 nHeap++;
849 heap[nHeap] = i;
850 UPHEAP(nHeap);
851 }
852 if (!(nHeap < (MAX_ALPHA_SIZE+2)))
853 panic ( "hbMakeCodeLengths(1)" );
854
855 while (nHeap > 1) {
856 n1 = heap[1]; heap[1] = heap[nHeap]; nHeap--; DOWNHEAP(1);
857 n2 = heap[1]; heap[1] = heap[nHeap]; nHeap--; DOWNHEAP(1);
858 nNodes++;
859 parent[n1] = parent[n2] = nNodes;
860 weight[nNodes] = ADDWEIGHTS(weight[n1], weight[n2]);
861 parent[nNodes] = -1;
862 nHeap++;
863 heap[nHeap] = nNodes;
864 UPHEAP(nHeap);
865 }
866 if (!(nNodes < (MAX_ALPHA_SIZE * 2)))
867 panic ( "hbMakeCodeLengths(2)" );
868
869 tooLong = False;
870 for (i = 1; i <= alphaSize; i++) {
871 j = 0;
872 k = i;
873 while (parent[k] >= 0) { k = parent[k]; j++; }
874 len[i-1] = j;
875 if (j > maxLen) tooLong = True;
876 }
877
878 if (! tooLong) break;
879
880 for (i = 1; i < alphaSize; i++) {
881 j = weight[i] >> 8;
882 j = 1 + (j / 2);
883 weight[i] = j << 8;
884 }
885 }
886}
887
888
889/*---------------------------------------------*/
890void hbAssignCodes ( Int32 *code,
891 UChar *length,
892 Int32 minLen,
893 Int32 maxLen,
894 Int32 alphaSize )
895{
896 Int32 n, vec, i;
897
898 vec = 0;
899 for (n = minLen; n <= maxLen; n++) {
900 for (i = 0; i < alphaSize; i++)
901 if (length[i] == n) { code[i] = vec; vec++; };
902 vec <<= 1;
903 }
904}
905
906
907/*---------------------------------------------*/
908void hbCreateDecodeTables ( Int32 *limit,
909 Int32 *base,
910 Int32 *perm,
911 UChar *length,
912 Int32 minLen,
913 Int32 maxLen,
914 Int32 alphaSize )
915{
916 Int32 pp, i, j, vec;
917
918 pp = 0;
919 for (i = minLen; i <= maxLen; i++)
920 for (j = 0; j < alphaSize; j++)
921 if (length[j] == i) { perm[pp] = j; pp++; };
922
923 for (i = 0; i < MAX_CODE_LEN; i++) base[i] = 0;
924 for (i = 0; i < alphaSize; i++) base[length[i]+1]++;
925
926 for (i = 1; i < MAX_CODE_LEN; i++) base[i] += base[i-1];
927
928 for (i = 0; i < MAX_CODE_LEN; i++) limit[i] = 0;
929 vec = 0;
930
931 for (i = minLen; i <= maxLen; i++) {
932 vec += (base[i+1] - base[i]);
933 limit[i] = vec-1;
934 vec <<= 1;
935 }
936 for (i = minLen + 1; i <= maxLen; i++)
937 base[i] = ((limit[i-1] + 1) << 1) - base[i];
938}
939
940
941
942/*---------------------------------------------------*/
943/*--- Undoing the reversible transformation ---*/
944/*---------------------------------------------------*/
945
946/*---------------------------------------------*/
947#define SET_LL4(i,n) \
948 { if (((i) & 0x1) == 0) \
949 ll4[(i) >> 1] = (ll4[(i) >> 1] & 0xf0) | (n); else \
950 ll4[(i) >> 1] = (ll4[(i) >> 1] & 0x0f) | ((n) << 4); \
951 }
952
953#define GET_LL4(i) \
954 (((UInt32)(ll4[(i) >> 1])) >> (((i) << 2) & 0x4) & 0xF)
955
956#define SET_LL(i,n) \
957 { ll16[i] = (UInt16)(n & 0x0000ffff); \
958 SET_LL4(i, n >> 16); \
959 }
960
961#define GET_LL(i) \
962 (((UInt32)ll16[i]) | (GET_LL4(i) << 16))
963
964
965/*---------------------------------------------*/
966/*--
967 Manage memory for compression/decompression.
968 When compressing, a single block size applies to
969 all files processed, and that's set when the
970 program starts. But when decompressing, each file
971 processed could have been compressed with a
972 different block size, so we may have to free
973 and reallocate on a per-file basis.
974
975 A call with argument of zero means
976 `free up everything.' And a value of zero for
977 blockSize100k means no memory is currently allocated.
978--*/
979
980
981/*---------------------------------------------*/
982void allocateCompressStructures ( void )
983{
984 Int32 n = 100000 * blockSize100k;
985 block = malloc ( (n + 1 + NUM_OVERSHOOT_BYTES) * sizeof(UChar) );
986 quadrant = malloc ( (n + NUM_OVERSHOOT_BYTES) * sizeof(Int16) );
987 zptr = malloc ( n * sizeof(Int32) );
988 ftab = malloc ( 65537 * sizeof(Int32) );
989
990 if (block == NULL || quadrant == NULL ||
991 zptr == NULL || ftab == NULL) {
992 Int32 totalDraw
993 = (n + 1 + NUM_OVERSHOOT_BYTES) * sizeof(UChar) +
994 (n + NUM_OVERSHOOT_BYTES) * sizeof(Int16) +
995 n * sizeof(Int32) +
996 65537 * sizeof(Int32);
997
998 compressOutOfMemory ( totalDraw, n );
999 }
1000
1001 /*--
1002 Since we want valid indexes for block of
1003 -1 to n + NUM_OVERSHOOT_BYTES - 1
1004 inclusive.
1005 --*/
1006 block++;
1007
1008 /*--
1009 The back end needs a place to store the MTF values
1010 whilst it calculates the coding tables. We could
1011 put them in the zptr array. However, these values
1012 will fit in a short, so we overlay szptr at the
1013 start of zptr, in the hope of reducing the number
1014 of cache misses induced by the multiple traversals
1015 of the MTF values when calculating coding tables.
1016 Seems to improve compression speed by about 1%.
1017 --*/
1018 szptr = (UInt16*)zptr;
1019}
1020
1021
1022/*---------------------------------------------*/
1023void setDecompressStructureSizes ( Int32 newSize100k )
1024{
1025 if (! (0 <= newSize100k && newSize100k <= 9 &&
1026 0 <= blockSize100k && blockSize100k <= 9))
1027 panic ( "setDecompressStructureSizes" );
1028
1029 if (newSize100k == blockSize100k) return;
1030
1031 blockSize100k = newSize100k;
1032
1033 if (ll16 != NULL) free ( ll16 );
1034 if (ll4 != NULL) free ( ll4 );
1035 if (ll8 != NULL) free ( ll8 );
1036 if (tt != NULL) free ( tt );
1037
1038 if (newSize100k == 0) return;
1039
1040 if (smallMode) {
1041
1042 Int32 n = 100000 * newSize100k;
1043 ll16 = malloc ( n * sizeof(UInt16) );
1044 ll4 = malloc ( ((n+1) >> 1) * sizeof(UChar) );
1045
1046 if (ll4 == NULL || ll16 == NULL) {
1047 Int32 totalDraw
1048 = n * sizeof(Int16) + ((n+1) >> 1) * sizeof(UChar);
1049 uncompressOutOfMemory ( totalDraw, n );
1050 }
1051
1052 } else {
1053
1054 Int32 n = 100000 * newSize100k;
1055 ll8 = malloc ( n * sizeof(UChar) );
1056 tt = malloc ( n * sizeof(Int32) );
1057
1058 if (ll8 == NULL || tt == NULL) {
1059 Int32 totalDraw
1060 = n * sizeof(UChar) + n * sizeof(UInt32);
1061 uncompressOutOfMemory ( totalDraw, n );
1062 }
1063
1064 }
1065}
1066
1067
1068
1069/*---------------------------------------------------*/
1070/*--- The new back end ---*/
1071/*---------------------------------------------------*/
1072
1073/*---------------------------------------------*/
1074void makeMaps ( void )
1075{
1076 Int32 i;
1077 nInUse = 0;
1078 for (i = 0; i < 256; i++)
1079 if (inUse[i]) {
1080 seqToUnseq[nInUse] = i;
1081 unseqToSeq[i] = nInUse;
1082 nInUse++;
1083 }
1084}
1085
1086
1087/*---------------------------------------------*/
1088void generateMTFValues ( void )
1089{
1090 UChar yy[256];
1091 Int32 i, j;
1092 UChar tmp;
1093 UChar tmp2;
1094 Int32 zPend;
1095 Int32 wr;
1096 Int32 EOB;
1097
1098 makeMaps();
1099 EOB = nInUse+1;
1100
1101 for (i = 0; i <= EOB; i++) mtfFreq[i] = 0;
1102
1103 wr = 0;
1104 zPend = 0;
1105 for (i = 0; i < nInUse; i++) yy[i] = (UChar) i;
1106
1107
1108 for (i = 0; i <= last; i++) {
1109 UChar ll_i;
1110
1111 #if DEBUG
1112 assert (wr <= i);
1113 #endif
1114
1115 ll_i = unseqToSeq[block[zptr[i] - 1]];
1116 #if DEBUG
1117 assert (ll_i < nInUse);
1118 #endif
1119
1120 j = 0;
1121 tmp = yy[j];
1122 while ( ll_i != tmp ) {
1123 j++;
1124 tmp2 = tmp;
1125 tmp = yy[j];
1126 yy[j] = tmp2;
1127 };
1128 yy[0] = tmp;
1129
1130 if (j == 0) {
1131 zPend++;
1132 } else {
1133 if (zPend > 0) {
1134 zPend--;
1135 while (True) {
1136 switch (zPend % 2) {
1137 case 0: szptr[wr] = RUNA; wr++; mtfFreq[RUNA]++; break;
1138 case 1: szptr[wr] = RUNB; wr++; mtfFreq[RUNB]++; break;
1139 };
1140 if (zPend < 2) break;
1141 zPend = (zPend - 2) / 2;
1142 };
1143 zPend = 0;
1144 }
1145 szptr[wr] = j+1; wr++; mtfFreq[j+1]++;
1146 }
1147 }
1148
1149 if (zPend > 0) {
1150 zPend--;
1151 while (True) {
1152 switch (zPend % 2) {
1153 case 0: szptr[wr] = RUNA; wr++; mtfFreq[RUNA]++; break;
1154 case 1: szptr[wr] = RUNB; wr++; mtfFreq[RUNB]++; break;
1155 };
1156 if (zPend < 2) break;
1157 zPend = (zPend - 2) / 2;
1158 };
1159 }
1160
1161 szptr[wr] = EOB; wr++; mtfFreq[EOB]++;
1162
1163 nMTF = wr;
1164}
1165
1166
1167/*---------------------------------------------*/
1168#define LESSER_ICOST 0
1169#define GREATER_ICOST 15
1170
1171void sendMTFValues ( void )
1172{
1173 Int32 v, t, i, j, gs, ge, totc, bt, bc, iter;
1174 Int32 nSelectors, alphaSize, minLen, maxLen, selCtr;
1175 Int32 nGroups, nBytes;
1176
1177 /*--
1178 UChar len [N_GROUPS][MAX_ALPHA_SIZE];
1179 is a global since the decoder also needs it.
1180
1181 Int32 code[N_GROUPS][MAX_ALPHA_SIZE];
1182 Int32 rfreq[N_GROUPS][MAX_ALPHA_SIZE];
1183 are also globals only used in this proc.
1184 Made global to keep stack frame size small.
1185 --*/
1186
1187
1188 UInt16 cost[N_GROUPS];
1189 Int32 fave[N_GROUPS];
1190
1191 if (verbosity >= 3)
1192 fprintf ( stderr,
1193 " %d in block, %d after MTF & 1-2 coding, %d+2 syms in use\n",
1194 last+1, nMTF, nInUse );
1195
1196 alphaSize = nInUse+2;
1197 for (t = 0; t < N_GROUPS; t++)
1198 for (v = 0; v < alphaSize; v++)
1199 len[t][v] = GREATER_ICOST;
1200
1201 /*--- Decide how many coding tables to use ---*/
1202 if (nMTF <= 0) panic ( "sendMTFValues(0)" );
1203 if (nMTF < 200) nGroups = 2; else
1204 if (nMTF < 800) nGroups = 4; else
1205 nGroups = 6;
1206
1207 /*--- Generate an initial set of coding tables ---*/
1208 {
1209 Int32 nPart, remF, tFreq, aFreq;
1210
1211 nPart = nGroups;
1212 remF = nMTF;
1213 gs = 0;
1214 while (nPart > 0) {
1215 tFreq = remF / nPart;
1216 ge = gs-1;
1217 aFreq = 0;
1218 while (aFreq < tFreq && ge < alphaSize-1) {
1219 ge++;
1220 aFreq += mtfFreq[ge];
1221 }
1222
1223 if (ge > gs
1224 && nPart != nGroups && nPart != 1
1225 && ((nGroups-nPart) % 2 == 1)) {
1226 aFreq -= mtfFreq[ge];
1227 ge--;
1228 }
1229
1230 if (verbosity >= 3)
1231 fprintf ( stderr,
1232 " initial group %d, [%d .. %d], has %d syms (%4.1f%%)\n",
1233 nPart, gs, ge, aFreq,
1234 (100.0 * (float)aFreq) / (float)nMTF );
1235
1236 for (v = 0; v < alphaSize; v++)
1237 if (v >= gs && v <= ge)
1238 len[nPart-1][v] = LESSER_ICOST; else
1239 len[nPart-1][v] = GREATER_ICOST;
1240
1241 nPart--;
1242 gs = ge+1;
1243 remF -= aFreq;
1244 }
1245 }
1246
1247 /*---
1248 Iterate up to N_ITERS times to improve the tables.
1249 ---*/
1250 for (iter = 0; iter < N_ITERS; iter++) {
1251
1252 for (t = 0; t < nGroups; t++) fave[t] = 0;
1253
1254 for (t = 0; t < nGroups; t++)
1255 for (v = 0; v < alphaSize; v++)
1256 rfreq[t][v] = 0;
1257
1258 nSelectors = 0;
1259 totc = 0;
1260 gs = 0;
1261 while (True) {
1262
1263 /*--- Set group start & end marks. --*/
1264 if (gs >= nMTF) break;
1265 ge = gs + G_SIZE - 1;
1266 if (ge >= nMTF) ge = nMTF-1;
1267
1268 /*--
1269 Calculate the cost of this group as coded
1270 by each of the coding tables.
1271 --*/
1272 for (t = 0; t < nGroups; t++) cost[t] = 0;
1273
1274 if (nGroups == 6) {
1275 register UInt16 cost0, cost1, cost2, cost3, cost4, cost5;
1276 cost0 = cost1 = cost2 = cost3 = cost4 = cost5 = 0;
1277 for (i = gs; i <= ge; i++) {
1278 UInt16 icv = szptr[i];
1279 cost0 += len[0][icv];
1280 cost1 += len[1][icv];
1281 cost2 += len[2][icv];
1282 cost3 += len[3][icv];
1283 cost4 += len[4][icv];
1284 cost5 += len[5][icv];
1285 }
1286 cost[0] = cost0; cost[1] = cost1; cost[2] = cost2;
1287 cost[3] = cost3; cost[4] = cost4; cost[5] = cost5;
1288 } else {
1289 for (i = gs; i <= ge; i++) {
1290 UInt16 icv = szptr[i];
1291 for (t = 0; t < nGroups; t++) cost[t] += len[t][icv];
1292 }
1293 }
1294
1295 /*--
1296 Find the coding table which is best for this group,
1297 and record its identity in the selector table.
1298 --*/
1299 bc = 999999999; bt = -1;
1300 for (t = 0; t < nGroups; t++)
1301 if (cost[t] < bc) { bc = cost[t]; bt = t; };
1302 totc += bc;
1303 fave[bt]++;
1304 selector[nSelectors] = bt;
1305 nSelectors++;
1306
1307 /*--
1308 Increment the symbol frequencies for the selected table.
1309 --*/
1310 for (i = gs; i <= ge; i++)
1311 rfreq[bt][ szptr[i] ]++;
1312
1313 gs = ge+1;
1314 }
1315 if (verbosity >= 3) {
1316 fprintf ( stderr,
1317 " pass %d: size is %d, grp uses are ",
1318 iter+1, totc/8 );
1319 for (t = 0; t < nGroups; t++)
1320 fprintf ( stderr, "%d ", fave[t] );
1321 fprintf ( stderr, "\n" );
1322 }
1323
1324 /*--
1325 Recompute the tables based on the accumulated frequencies.
1326 --*/
1327 for (t = 0; t < nGroups; t++)
1328 hbMakeCodeLengths ( &len[t][0], &rfreq[t][0], alphaSize, 20 );
1329 }
1330
1331
1332 if (!(nGroups < 8)) panic ( "sendMTFValues(1)" );
1333 if (!(nSelectors < 32768 &&
1334 nSelectors <= (2 + (900000 / G_SIZE))))
1335 panic ( "sendMTFValues(2)" );
1336
1337
1338 /*--- Compute MTF values for the selectors. ---*/
1339 {
1340 UChar pos[N_GROUPS], ll_i, tmp2, tmp;
1341 for (i = 0; i < nGroups; i++) pos[i] = i;
1342 for (i = 0; i < nSelectors; i++) {
1343 ll_i = selector[i];
1344 j = 0;
1345 tmp = pos[j];
1346 while ( ll_i != tmp ) {
1347 j++;
1348 tmp2 = tmp;
1349 tmp = pos[j];
1350 pos[j] = tmp2;
1351 };
1352 pos[0] = tmp;
1353 selectorMtf[i] = j;
1354 }
1355 };
1356
1357 /*--- Assign actual codes for the tables. --*/
1358 for (t = 0; t < nGroups; t++) {
1359 minLen = 32;
1360 maxLen = 0;
1361 for (i = 0; i < alphaSize; i++) {
1362 if (len[t][i] > maxLen) maxLen = len[t][i];
1363 if (len[t][i] < minLen) minLen = len[t][i];
1364 }
1365 if (maxLen > 20) panic ( "sendMTFValues(3)" );
1366 if (minLen < 1) panic ( "sendMTFValues(4)" );
1367 hbAssignCodes ( &code[t][0], &len[t][0],
1368 minLen, maxLen, alphaSize );
1369 }
1370
1371 /*--- Transmit the mapping table. ---*/
1372 {
1373 Bool inUse16[16];
1374 for (i = 0; i < 16; i++) {
1375 inUse16[i] = False;
1376 for (j = 0; j < 16; j++)
1377 if (inUse[i * 16 + j]) inUse16[i] = True;
1378 }
1379
1380 nBytes = bytesOut;
1381 for (i = 0; i < 16; i++)
1382 if (inUse16[i]) bsW(1,1); else bsW(1,0);
1383
1384 for (i = 0; i < 16; i++)
1385 if (inUse16[i])
1386 for (j = 0; j < 16; j++)
1387 if (inUse[i * 16 + j]) bsW(1,1); else bsW(1,0);
1388
1389 if (verbosity >= 3)
1390 fprintf ( stderr, " bytes: mapping %d, ", bytesOut-nBytes );
1391 }
1392
1393 /*--- Now the selectors. ---*/
1394 nBytes = bytesOut;
1395 bsW ( 3, nGroups );
1396 bsW ( 15, nSelectors );
1397 for (i = 0; i < nSelectors; i++) {
1398 for (j = 0; j < selectorMtf[i]; j++) bsW(1,1);
1399 bsW(1,0);
1400 }
1401 if (verbosity >= 3)
1402 fprintf ( stderr, "selectors %d, ", bytesOut-nBytes );
1403
1404 /*--- Now the coding tables. ---*/
1405 nBytes = bytesOut;
1406
1407 for (t = 0; t < nGroups; t++) {
1408 Int32 curr = len[t][0];
1409 bsW ( 5, curr );
1410 for (i = 0; i < alphaSize; i++) {
1411 while (curr < len[t][i]) { bsW(2,2); curr++; /* 10 */ };
1412 while (curr > len[t][i]) { bsW(2,3); curr--; /* 11 */ };
1413 bsW ( 1, 0 );
1414 }
1415 }
1416
1417 if (verbosity >= 3)
1418 fprintf ( stderr, "code lengths %d, ", bytesOut-nBytes );
1419
1420 /*--- And finally, the block data proper ---*/
1421 nBytes = bytesOut;
1422 selCtr = 0;
1423 gs = 0;
1424 while (True) {
1425 if (gs >= nMTF) break;
1426 ge = gs + G_SIZE - 1;
1427 if (ge >= nMTF) ge = nMTF-1;
1428 for (i = gs; i <= ge; i++) {
1429 #if DEBUG
1430 assert (selector[selCtr] < nGroups);
1431 #endif
1432 bsW ( len [selector[selCtr]] [szptr[i]],
1433 code [selector[selCtr]] [szptr[i]] );
1434 }
1435
1436 gs = ge+1;
1437 selCtr++;
1438 }
1439 if (!(selCtr == nSelectors)) panic ( "sendMTFValues(5)" );
1440
1441 if (verbosity >= 3)
1442 fprintf ( stderr, "codes %d\n", bytesOut-nBytes );
1443}
1444
1445
1446/*---------------------------------------------*/
1447void moveToFrontCodeAndSend ( void )
1448{
1449 bsPutIntVS ( 24, origPtr );
1450 generateMTFValues();
1451 sendMTFValues();
1452}
1453
1454
1455/*---------------------------------------------*/
1456void recvDecodingTables ( void )
1457{
1458 Int32 i, j, t, nGroups, nSelectors, alphaSize;
1459 Int32 minLen, maxLen;
1460 Bool inUse16[16];
1461
1462 /*--- Receive the mapping table ---*/
1463 for (i = 0; i < 16; i++)
1464 if (bsR(1) == 1)
1465 inUse16[i] = True; else
1466 inUse16[i] = False;
1467
1468 for (i = 0; i < 256; i++) inUse[i] = False;
1469
1470 for (i = 0; i < 16; i++)
1471 if (inUse16[i])
1472 for (j = 0; j < 16; j++)
1473 if (bsR(1) == 1) inUse[i * 16 + j] = True;
1474
1475 makeMaps();
1476 alphaSize = nInUse+2;
1477
1478 /*--- Now the selectors ---*/
1479 nGroups = bsR ( 3 );
1480 nSelectors = bsR ( 15 );
1481 for (i = 0; i < nSelectors; i++) {
1482 j = 0;
1483 while (bsR(1) == 1) j++;
1484 selectorMtf[i] = j;
1485 }
1486
1487 /*--- Undo the MTF values for the selectors. ---*/
1488 {
1489 UChar pos[N_GROUPS], tmp, v;
1490 for (v = 0; v < nGroups; v++) pos[v] = v;
1491
1492 for (i = 0; i < nSelectors; i++) {
1493 v = selectorMtf[i];
1494 tmp = pos[v];
1495 while (v > 0) { pos[v] = pos[v-1]; v--; }
1496 pos[0] = tmp;
1497 selector[i] = tmp;
1498 }
1499 }
1500
1501 /*--- Now the coding tables ---*/
1502 for (t = 0; t < nGroups; t++) {
1503 Int32 curr = bsR ( 5 );
1504 for (i = 0; i < alphaSize; i++) {
1505 while (bsR(1) == 1) {
1506 if (bsR(1) == 0) curr++; else curr--;
1507 }
1508 len[t][i] = curr;
1509 }
1510 }
1511
1512 /*--- Create the Huffman decoding tables ---*/
1513 for (t = 0; t < nGroups; t++) {
1514 minLen = 32;
1515 maxLen = 0;
1516 for (i = 0; i < alphaSize; i++) {
1517 if (len[t][i] > maxLen) maxLen = len[t][i];
1518 if (len[t][i] < minLen) minLen = len[t][i];
1519 }
1520 hbCreateDecodeTables (
1521 &limit[t][0], &base[t][0], &perm[t][0], &len[t][0],
1522 minLen, maxLen, alphaSize
1523 );
1524 minLens[t] = minLen;
1525 }
1526}
1527
1528
1529/*---------------------------------------------*/
1530#define GET_MTF_VAL(lval) \
1531{ \
1532 Int32 zt, zn, zvec, zj; \
1533 if (groupPos == 0) { \
1534 groupNo++; \
1535 groupPos = G_SIZE; \
1536 } \
1537 groupPos--; \
1538 zt = selector[groupNo]; \
1539 zn = minLens[zt]; \
1540 zvec = bsR ( zn ); \
1541 while (zvec > limit[zt][zn]) { \
1542 zn++; bsR1(zj); \
1543 zvec = (zvec << 1) | zj; \
1544 }; \
1545 lval = perm[zt][zvec - base[zt][zn]]; \
1546}
1547
1548
1549/*---------------------------------------------*/
1550void getAndMoveToFrontDecode ( void )
1551{
1552 UChar yy[256];
1553 Int32 i, j, nextSym, limitLast;
1554 Int32 EOB, groupNo, groupPos;
1555
1556 limitLast = 100000 * blockSize100k;
1557 origPtr = bsGetIntVS ( 24 );
1558
1559 recvDecodingTables();
1560 EOB = nInUse+1;
1561 groupNo = -1;
1562 groupPos = 0;
1563
1564 /*--
1565 Setting up the unzftab entries here is not strictly
1566 necessary, but it does save having to do it later
1567 in a separate pass, and so saves a block's worth of
1568 cache misses.
1569 --*/
1570 for (i = 0; i <= 255; i++) unzftab[i] = 0;
1571
1572 for (i = 0; i <= 255; i++) yy[i] = (UChar) i;
1573
1574 last = -1;
1575
1576 GET_MTF_VAL(nextSym);
1577
1578 while (True) {
1579
1580 if (nextSym == EOB) break;
1581
1582 if (nextSym == RUNA || nextSym == RUNB) {
1583 UChar ch;
1584 Int32 s = -1;
1585 Int32 N = 1;
1586 do {
1587 if (nextSym == RUNA) s = s + (0+1) * N; else
1588 if (nextSym == RUNB) s = s + (1+1) * N;
1589 N = N * 2;
1590 GET_MTF_VAL(nextSym);
1591 }
1592 while (nextSym == RUNA || nextSym == RUNB);
1593
1594 s++;
1595 ch = seqToUnseq[yy[0]];
1596 unzftab[ch] += s;
1597
1598 if (smallMode)
1599 while (s > 0) {
1600 last++;
1601 ll16[last] = ch;
1602 s--;
1603 }
1604 else
1605 while (s > 0) {
1606 last++;
1607 ll8[last] = ch;
1608 s--;
1609 };
1610
1611 if (last >= limitLast) blockOverrun();
1612 continue;
1613
1614 } else {
1615
1616 UChar tmp;
1617 last++; if (last >= limitLast) blockOverrun();
1618
1619 tmp = yy[nextSym-1];
1620 unzftab[seqToUnseq[tmp]]++;
1621 if (smallMode)
1622 ll16[last] = seqToUnseq[tmp]; else
1623 ll8[last] = seqToUnseq[tmp];
1624
1625 /*--
1626 This loop is hammered during decompression,
1627 hence the unrolling.
1628
1629 for (j = nextSym-1; j > 0; j--) yy[j] = yy[j-1];
1630 --*/
1631
1632 j = nextSym-1;
1633 for (; j > 3; j -= 4) {
1634 yy[j] = yy[j-1];
1635 yy[j-1] = yy[j-2];
1636 yy[j-2] = yy[j-3];
1637 yy[j-3] = yy[j-4];
1638 }
1639 for (; j > 0; j--) yy[j] = yy[j-1];
1640
1641 yy[0] = tmp;
1642 GET_MTF_VAL(nextSym);
1643 continue;
1644 }
1645 }
1646}
1647
1648
1649/*---------------------------------------------------*/
1650/*--- Block-sorting machinery ---*/
1651/*---------------------------------------------------*/
1652
1653/*---------------------------------------------*/
1654/*--
1655 Compare two strings in block. We assume (see
1656 discussion above) that i1 and i2 have a max
1657 offset of 10 on entry, and that the first
1658 bytes of both block and quadrant have been
1659 copied into the "overshoot area", ie
1660 into the subscript range
1661 [last+1 .. last+NUM_OVERSHOOT_BYTES].
1662--*/
1663INLINE Bool fullGtU ( Int32 i1, Int32 i2 )
1664{
1665 Int32 k;
1666 UChar c1, c2;
1667 UInt16 s1, s2;
1668
1669 #if DEBUG
1670 /*--
1671 shellsort shouldn't ask to compare
1672 something with itself.
1673 --*/
1674 assert (i1 != i2);
1675 #endif
1676
1677 c1 = block[i1];
1678 c2 = block[i2];
1679 if (c1 != c2) return (c1 > c2);
1680 i1++; i2++;
1681
1682 c1 = block[i1];
1683 c2 = block[i2];
1684 if (c1 != c2) return (c1 > c2);
1685 i1++; i2++;
1686
1687 c1 = block[i1];
1688 c2 = block[i2];
1689 if (c1 != c2) return (c1 > c2);
1690 i1++; i2++;
1691
1692 c1 = block[i1];
1693 c2 = block[i2];
1694 if (c1 != c2) return (c1 > c2);
1695 i1++; i2++;
1696
1697 c1 = block[i1];
1698 c2 = block[i2];
1699 if (c1 != c2) return (c1 > c2);
1700 i1++; i2++;
1701
1702 c1 = block[i1];
1703 c2 = block[i2];
1704 if (c1 != c2) return (c1 > c2);
1705 i1++; i2++;
1706
1707 k = last + 1;
1708
1709 do {
1710
1711 c1 = block[i1];
1712 c2 = block[i2];
1713 if (c1 != c2) return (c1 > c2);
1714 s1 = quadrant[i1];
1715 s2 = quadrant[i2];
1716 if (s1 != s2) return (s1 > s2);
1717 i1++; i2++;
1718
1719 c1 = block[i1];
1720 c2 = block[i2];
1721 if (c1 != c2) return (c1 > c2);
1722 s1 = quadrant[i1];
1723 s2 = quadrant[i2];
1724 if (s1 != s2) return (s1 > s2);
1725 i1++; i2++;
1726
1727 c1 = block[i1];
1728 c2 = block[i2];
1729 if (c1 != c2) return (c1 > c2);
1730 s1 = quadrant[i1];
1731 s2 = quadrant[i2];
1732 if (s1 != s2) return (s1 > s2);
1733 i1++; i2++;
1734
1735 c1 = block[i1];
1736 c2 = block[i2];
1737 if (c1 != c2) return (c1 > c2);
1738 s1 = quadrant[i1];
1739 s2 = quadrant[i2];
1740 if (s1 != s2) return (s1 > s2);
1741 i1++; i2++;
1742
1743 if (i1 > last) { i1 -= last; i1--; };
1744 if (i2 > last) { i2 -= last; i2--; };
1745
1746 k -= 4;
1747 workDone++;
1748 }
1749 while (k >= 0);
1750
1751 return False;
1752}
1753
1754/*---------------------------------------------*/
1755/*--
1756 Knuth's increments seem to work better
1757 than Incerpi-Sedgewick here. Possibly
1758 because the number of elems to sort is
1759 usually small, typically <= 20.
1760--*/
1761Int32 incs[14] = { 1, 4, 13, 40, 121, 364, 1093, 3280,
1762 9841, 29524, 88573, 265720,
1763 797161, 2391484 };
1764
1765void simpleSort ( Int32 lo, Int32 hi, Int32 d )
1766{
1767 Int32 i, j, h, bigN, hp;
1768 Int32 v;
1769
1770 bigN = hi - lo + 1;
1771 if (bigN < 2) return;
1772
1773 hp = 0;
1774 while (incs[hp] < bigN) hp++;
1775 hp--;
1776
1777 for (; hp >= 0; hp--) {
1778 h = incs[hp];
1779 if (verbosity >= 5)
1780 fprintf ( stderr, " shell increment %d\n", h );
1781
1782 i = lo + h;
1783 while (True) {
1784
1785 /*-- copy 1 --*/
1786 if (i > hi) break;
1787 v = zptr[i];
1788 j = i;
1789 while ( fullGtU ( zptr[j-h]+d, v+d ) ) {
1790 zptr[j] = zptr[j-h];
1791 j = j - h;
1792 if (j <= (lo + h - 1)) break;
1793 }
1794 zptr[j] = v;
1795 i++;
1796
1797 /*-- copy 2 --*/
1798 if (i > hi) break;
1799 v = zptr[i];
1800 j = i;
1801 while ( fullGtU ( zptr[j-h]+d, v+d ) ) {
1802 zptr[j] = zptr[j-h];
1803 j = j - h;
1804 if (j <= (lo + h - 1)) break;
1805 }
1806 zptr[j] = v;
1807 i++;
1808
1809 /*-- copy 3 --*/
1810 if (i > hi) break;
1811 v = zptr[i];
1812 j = i;
1813 while ( fullGtU ( zptr[j-h]+d, v+d ) ) {
1814 zptr[j] = zptr[j-h];
1815 j = j - h;
1816 if (j <= (lo + h - 1)) break;
1817 }
1818 zptr[j] = v;
1819 i++;
1820
1821 if (workDone > workLimit && firstAttempt) return;
1822 }
1823 }
1824}
1825
1826
1827/*---------------------------------------------*/
1828/*--
1829 The following is an implementation of
1830 an elegant 3-way quicksort for strings,
1831 described in a paper "Fast Algorithms for
1832 Sorting and Searching Strings", by Robert
1833 Sedgewick and Jon L. Bentley.
1834--*/
1835
1836#define swap(lv1, lv2) \
1837 { Int32 tmp = lv1; lv1 = lv2; lv2 = tmp; }
1838
1839INLINE void vswap ( Int32 p1, Int32 p2, Int32 n )
1840{
1841 while (n > 0) {
1842 swap(zptr[p1], zptr[p2]);
1843 p1++; p2++; n--;
1844 }
1845}
1846
1847INLINE UChar med3 ( UChar a, UChar b, UChar c )
1848{
1849 UChar t;
1850 if (a > b) { t = a; a = b; b = t; };
1851 if (b > c) { t = b; b = c; c = t; };
1852 if (a > b) b = a;
1853 return b;
1854}
1855
1856
1857#define min(a,b) ((a) < (b)) ? (a) : (b)
1858
1859typedef
1860 struct { Int32 ll; Int32 hh; Int32 dd; }
1861 StackElem;
1862
1863#define push(lz,hz,dz) { stack[sp].ll = lz; \
1864 stack[sp].hh = hz; \
1865 stack[sp].dd = dz; \
1866 sp++; }
1867
1868#define pop(lz,hz,dz) { sp--; \
1869 lz = stack[sp].ll; \
1870 hz = stack[sp].hh; \
1871 dz = stack[sp].dd; }
1872
1873#define SMALL_THRESH 20
1874#define DEPTH_THRESH 10
1875
1876/*--
1877 If you are ever unlucky/improbable enough
1878 to get a stack overflow whilst sorting,
1879 increase the following constant and try
1880 again. In practice I have never seen the
1881 stack go above 27 elems, so the following
1882 limit seems very generous.
1883--*/
1884#define QSORT_STACK_SIZE 1000
1885
1886
1887void qSort3 ( Int32 loSt, Int32 hiSt, Int32 dSt )
1888{
1889 Int32 unLo, unHi, ltLo, gtHi, med, n, m;
1890 Int32 sp, lo, hi, d;
1891 StackElem stack[QSORT_STACK_SIZE];
1892
1893 sp = 0;
1894 push ( loSt, hiSt, dSt );
1895
1896 while (sp > 0) {
1897
1898 if (sp >= QSORT_STACK_SIZE) panic ( "stack overflow in qSort3" );
1899
1900 pop ( lo, hi, d );
1901
1902 if (hi - lo < SMALL_THRESH || d > DEPTH_THRESH) {
1903 simpleSort ( lo, hi, d );
1904 if (workDone > workLimit && firstAttempt) return;
1905 continue;
1906 }
1907
1908 med = med3 ( block[zptr[ lo ]+d],
1909 block[zptr[ hi ]+d],
1910 block[zptr[ (lo+hi)>>1 ]+d] );
1911
1912 unLo = ltLo = lo;
1913 unHi = gtHi = hi;
1914
1915 while (True) {
1916 while (True) {
1917 if (unLo > unHi) break;
1918 n = ((Int32)block[zptr[unLo]+d]) - med;
1919 if (n == 0) { swap(zptr[unLo], zptr[ltLo]); ltLo++; unLo++; continue; };
1920 if (n > 0) break;
1921 unLo++;
1922 }
1923 while (True) {
1924 if (unLo > unHi) break;
1925 n = ((Int32)block[zptr[unHi]+d]) - med;
1926 if (n == 0) { swap(zptr[unHi], zptr[gtHi]); gtHi--; unHi--; continue; };
1927 if (n < 0) break;
1928 unHi--;
1929 }
1930 if (unLo > unHi) break;
1931 swap(zptr[unLo], zptr[unHi]); unLo++; unHi--;
1932 }
1933 #if DEBUG
1934 assert (unHi == unLo-1);
1935 #endif
1936
1937 if (gtHi < ltLo) {
1938 push(lo, hi, d+1 );
1939 continue;
1940 }
1941
1942 n = min(ltLo-lo, unLo-ltLo); vswap(lo, unLo-n, n);
1943 m = min(hi-gtHi, gtHi-unHi); vswap(unLo, hi-m+1, m);
1944
1945 n = lo + unLo - ltLo - 1;
1946 m = hi - (gtHi - unHi) + 1;
1947
1948 push ( lo, n, d );
1949 push ( n+1, m-1, d+1 );
1950 push ( m, hi, d );
1951 }
1952}
1953
1954
1955/*---------------------------------------------*/
1956
1957#define BIGFREQ(b) (ftab[((b)+1) << 8] - ftab[(b) << 8])
1958
1959#define SETMASK (1 << 21)
1960#define CLEARMASK (~(SETMASK))
1961
1962void sortIt ( void )
1963{
1964 Int32 i, j, ss, sb;
1965 Int32 runningOrder[256];
1966 Int32 copy[256];
1967 Bool bigDone[256];
1968 UChar c1, c2;
1969 Int32 numQSorted;
1970
1971 /*--
1972 In the various block-sized structures, live data runs
1973 from 0 to last+NUM_OVERSHOOT_BYTES inclusive. First,
1974 set up the overshoot area for block.
1975 --*/
1976
1977 if (verbosity >= 4) fprintf ( stderr, " sort initialise ...\n" );
1978 for (i = 0; i < NUM_OVERSHOOT_BYTES; i++)
1979 block[last+i+1] = block[i % (last+1)];
1980 for (i = 0; i <= last+NUM_OVERSHOOT_BYTES; i++)
1981 quadrant[i] = 0;
1982
1983 block[-1] = block[last];
1984
1985 if (last < 4000) {
1986
1987 /*--
1988 Use simpleSort(), since the full sorting mechanism
1989 has quite a large constant overhead.
1990 --*/
1991 if (verbosity >= 4) fprintf ( stderr, " simpleSort ...\n" );
1992 for (i = 0; i <= last; i++) zptr[i] = i;
1993 firstAttempt = False;
1994 workDone = workLimit = 0;
1995 simpleSort ( 0, last, 0 );
1996 if (verbosity >= 4) fprintf ( stderr, " simpleSort done.\n" );
1997
1998 } else {
1999
2000 numQSorted = 0;
2001 for (i = 0; i <= 255; i++) bigDone[i] = False;
2002
2003 if (verbosity >= 4) fprintf ( stderr, " bucket sorting ...\n" );
2004
2005 for (i = 0; i <= 65536; i++) ftab[i] = 0;
2006
2007 c1 = block[-1];
2008 for (i = 0; i <= last; i++) {
2009 c2 = block[i];
2010 ftab[(c1 << 8) + c2]++;
2011 c1 = c2;
2012 }
2013
2014 for (i = 1; i <= 65536; i++) ftab[i] += ftab[i-1];
2015
2016 c1 = block[0];
2017 for (i = 0; i < last; i++) {
2018 c2 = block[i+1];
2019 j = (c1 << 8) + c2;
2020 c1 = c2;
2021 ftab[j]--;
2022 zptr[ftab[j]] = i;
2023 }
2024 j = (block[last] << 8) + block[0];
2025 ftab[j]--;
2026 zptr[ftab[j]] = last;
2027
2028 /*--
2029 Now ftab contains the first loc of every small bucket.
2030 Calculate the running order, from smallest to largest
2031 big bucket.
2032 --*/
2033
2034 for (i = 0; i <= 255; i++) runningOrder[i] = i;
2035
2036 {
2037 Int32 vv;
2038 Int32 h = 1;
2039 do h = 3 * h + 1; while (h <= 256);
2040 do {
2041 h = h / 3;
2042 for (i = h; i <= 255; i++) {
2043 vv = runningOrder[i];
2044 j = i;
2045 while ( BIGFREQ(runningOrder[j-h]) > BIGFREQ(vv) ) {
2046 runningOrder[j] = runningOrder[j-h];
2047 j = j - h;
2048 if (j <= (h - 1)) goto zero;
2049 }
2050 zero:
2051 runningOrder[j] = vv;
2052 }
2053 } while (h != 1);
2054 }
2055
2056 /*--
2057 The main sorting loop.
2058 --*/
2059
2060 for (i = 0; i <= 255; i++) {
2061
2062 /*--
2063 Process big buckets, starting with the least full.
2064 --*/
2065 ss = runningOrder[i];
2066
2067 /*--
2068 Complete the big bucket [ss] by quicksorting
2069 any unsorted small buckets [ss, j]. Hopefully
2070 previous pointer-scanning phases have already
2071 completed many of the small buckets [ss, j], so
2072 we don't have to sort them at all.
2073 --*/
2074 for (j = 0; j <= 255; j++) {
2075 sb = (ss << 8) + j;
2076 if ( ! (ftab[sb] & SETMASK) ) {
2077 Int32 lo = ftab[sb] & CLEARMASK;
2078 Int32 hi = (ftab[sb+1] & CLEARMASK) - 1;
2079 if (hi > lo) {
2080 if (verbosity >= 4)
2081 fprintf ( stderr,
2082 " qsort [0x%x, 0x%x] done %d this %d\n",
2083 ss, j, numQSorted, hi - lo + 1 );
2084 qSort3 ( lo, hi, 2 );
2085 numQSorted += ( hi - lo + 1 );
2086 if (workDone > workLimit && firstAttempt) return;
2087 }
2088 ftab[sb] |= SETMASK;
2089 }
2090 }
2091
2092 /*--
2093 The ss big bucket is now done. Record this fact,
2094 and update the quadrant descriptors. Remember to
2095 update quadrants in the overshoot area too, if
2096 necessary. The "if (i < 255)" test merely skips
2097 this updating for the last bucket processed, since
2098 updating for the last bucket is pointless.
2099 --*/
2100 bigDone[ss] = True;
2101
2102 if (i < 255) {
2103 Int32 bbStart = ftab[ss << 8] & CLEARMASK;
2104 Int32 bbSize = (ftab[(ss+1) << 8] & CLEARMASK) - bbStart;
2105 Int32 shifts = 0;
2106
2107 while ((bbSize >> shifts) > 65534) shifts++;
2108
2109 for (j = 0; j < bbSize; j++) {
2110 Int32 a2update = zptr[bbStart + j];
2111 UInt16 qVal = (UInt16)(j >> shifts);
2112 quadrant[a2update] = qVal;
2113 if (a2update < NUM_OVERSHOOT_BYTES)
2114 quadrant[a2update + last + 1] = qVal;
2115 }
2116
2117 if (! ( ((bbSize-1) >> shifts) <= 65535 )) panic ( "sortIt" );
2118 }
2119
2120 /*--
2121 Now scan this big bucket so as to synthesise the
2122 sorted order for small buckets [t, ss] for all t != ss.
2123 --*/
2124 for (j = 0; j <= 255; j++)
2125 copy[j] = ftab[(j << 8) + ss] & CLEARMASK;
2126
2127 for (j = ftab[ss << 8] & CLEARMASK;
2128 j < (ftab[(ss+1) << 8] & CLEARMASK);
2129 j++) {
2130 c1 = block[zptr[j]-1];
2131 if ( ! bigDone[c1] ) {
2132 zptr[copy[c1]] = zptr[j] == 0 ? last : zptr[j] - 1;
2133 copy[c1] ++;
2134 }
2135 }
2136
2137 for (j = 0; j <= 255; j++) ftab[(j << 8) + ss] |= SETMASK;
2138 }
2139 if (verbosity >= 4)
2140 fprintf ( stderr, " %d pointers, %d sorted, %d scanned\n",
2141 last+1, numQSorted, (last+1) - numQSorted );
2142 }
2143}
2144
2145
2146/*---------------------------------------------------*/
2147/*--- Stuff for randomising repetitive blocks ---*/
2148/*---------------------------------------------------*/
2149
2150/*---------------------------------------------*/
2151Int32 rNums[512] = {
2152 619, 720, 127, 481, 931, 816, 813, 233, 566, 247,
2153 985, 724, 205, 454, 863, 491, 741, 242, 949, 214,
2154 733, 859, 335, 708, 621, 574, 73, 654, 730, 472,
2155 419, 436, 278, 496, 867, 210, 399, 680, 480, 51,
2156 878, 465, 811, 169, 869, 675, 611, 697, 867, 561,
2157 862, 687, 507, 283, 482, 129, 807, 591, 733, 623,
2158 150, 238, 59, 379, 684, 877, 625, 169, 643, 105,
2159 170, 607, 520, 932, 727, 476, 693, 425, 174, 647,
2160 73, 122, 335, 530, 442, 853, 695, 249, 445, 515,
2161 909, 545, 703, 919, 874, 474, 882, 500, 594, 612,
2162 641, 801, 220, 162, 819, 984, 589, 513, 495, 799,
2163 161, 604, 958, 533, 221, 400, 386, 867, 600, 782,
2164 382, 596, 414, 171, 516, 375, 682, 485, 911, 276,
2165 98, 553, 163, 354, 666, 933, 424, 341, 533, 870,
2166 227, 730, 475, 186, 263, 647, 537, 686, 600, 224,
2167 469, 68, 770, 919, 190, 373, 294, 822, 808, 206,
2168 184, 943, 795, 384, 383, 461, 404, 758, 839, 887,
2169 715, 67, 618, 276, 204, 918, 873, 777, 604, 560,
2170 951, 160, 578, 722, 79, 804, 96, 409, 713, 940,
2171 652, 934, 970, 447, 318, 353, 859, 672, 112, 785,
2172 645, 863, 803, 350, 139, 93, 354, 99, 820, 908,
2173 609, 772, 154, 274, 580, 184, 79, 626, 630, 742,
2174 653, 282, 762, 623, 680, 81, 927, 626, 789, 125,
2175 411, 521, 938, 300, 821, 78, 343, 175, 128, 250,
2176 170, 774, 972, 275, 999, 639, 495, 78, 352, 126,
2177 857, 956, 358, 619, 580, 124, 737, 594, 701, 612,
2178 669, 112, 134, 694, 363, 992, 809, 743, 168, 974,
2179 944, 375, 748, 52, 600, 747, 642, 182, 862, 81,
2180 344, 805, 988, 739, 511, 655, 814, 334, 249, 515,
2181 897, 955, 664, 981, 649, 113, 974, 459, 893, 228,
2182 433, 837, 553, 268, 926, 240, 102, 654, 459, 51,
2183 686, 754, 806, 760, 493, 403, 415, 394, 687, 700,
2184 946, 670, 656, 610, 738, 392, 760, 799, 887, 653,
2185 978, 321, 576, 617, 626, 502, 894, 679, 243, 440,
2186 680, 879, 194, 572, 640, 724, 926, 56, 204, 700,
2187 707, 151, 457, 449, 797, 195, 791, 558, 945, 679,
2188 297, 59, 87, 824, 713, 663, 412, 693, 342, 606,
2189 134, 108, 571, 364, 631, 212, 174, 643, 304, 329,
2190 343, 97, 430, 751, 497, 314, 983, 374, 822, 928,
2191 140, 206, 73, 263, 980, 736, 876, 478, 430, 305,
2192 170, 514, 364, 692, 829, 82, 855, 953, 676, 246,
2193 369, 970, 294, 750, 807, 827, 150, 790, 288, 923,
2194 804, 378, 215, 828, 592, 281, 565, 555, 710, 82,
2195 896, 831, 547, 261, 524, 462, 293, 465, 502, 56,
2196 661, 821, 976, 991, 658, 869, 905, 758, 745, 193,
2197 768, 550, 608, 933, 378, 286, 215, 979, 792, 961,
2198 61, 688, 793, 644, 986, 403, 106, 366, 905, 644,
2199 372, 567, 466, 434, 645, 210, 389, 550, 919, 135,
2200 780, 773, 635, 389, 707, 100, 626, 958, 165, 504,
2201 920, 176, 193, 713, 857, 265, 203, 50, 668, 108,
2202 645, 990, 626, 197, 510, 357, 358, 850, 858, 364,
2203 936, 638
2204};
2205
2206
2207#define RAND_DECLS \
2208 Int32 rNToGo = 0; \
2209 Int32 rTPos = 0; \
2210
2211#define RAND_MASK ((rNToGo == 1) ? 1 : 0)
2212
2213#define RAND_UPD_MASK \
2214 if (rNToGo == 0) { \
2215 rNToGo = rNums[rTPos]; \
2216 rTPos++; if (rTPos == 512) rTPos = 0; \
2217 } \
2218 rNToGo--;
2219
2220
2221
2222/*---------------------------------------------------*/
2223/*--- The Reversible Transformation (tm) ---*/
2224/*---------------------------------------------------*/
2225
2226/*---------------------------------------------*/
2227void randomiseBlock ( void )
2228{
2229 Int32 i;
2230 RAND_DECLS;
2231 for (i = 0; i < 256; i++) inUse[i] = False;
2232
2233 for (i = 0; i <= last; i++) {
2234 RAND_UPD_MASK;
2235 block[i] ^= RAND_MASK;
2236 inUse[block[i]] = True;
2237 }
2238}
2239
2240
2241/*---------------------------------------------*/
2242void doReversibleTransformation ( void )
2243{
2244 Int32 i;
2245
2246 if (verbosity >= 2) fprintf ( stderr, "\n" );
2247
2248 workLimit = workFactor * last;
2249 workDone = 0;
2250 blockRandomised = False;
2251 firstAttempt = True;
2252
2253 sortIt ();
2254
2255 if (verbosity >= 3)
2256 fprintf ( stderr, " %d work, %d block, ratio %5.2f\n",
2257 workDone, last, (float)workDone / (float)(last) );
2258
2259 if (workDone > workLimit && firstAttempt) {
2260 if (verbosity >= 2)
2261 fprintf ( stderr, " sorting aborted; randomising block\n" );
2262 randomiseBlock ();
2263 workLimit = workDone = 0;
2264 blockRandomised = True;
2265 firstAttempt = False;
2266 sortIt();
2267 if (verbosity >= 3)
2268 fprintf ( stderr, " %d work, %d block, ratio %f\n",
2269 workDone, last, (float)workDone / (float)(last) );
2270 }
2271
2272 origPtr = -1;
2273 for (i = 0; i <= last; i++)
2274 if (zptr[i] == 0)
2275 { origPtr = i; break; };
2276
2277 if (origPtr == -1) panic ( "doReversibleTransformation" );
2278}
2279
2280
2281/*---------------------------------------------*/
2282
2283INLINE Int32 indexIntoF ( Int32 indx, Int32 *cftab )
2284{
2285 Int32 nb, na, mid;
2286 nb = 0;
2287 na = 256;
2288 do {
2289 mid = (nb + na) >> 1;
2290 if (indx >= cftab[mid]) nb = mid; else na = mid;
2291 }
2292 while (na - nb != 1);
2293 return nb;
2294}
2295
2296
2297#define GET_SMALL(cccc) \
2298 \
2299 cccc = indexIntoF ( tPos, cftab ); \
2300 tPos = GET_LL(tPos);
2301
2302
2303void undoReversibleTransformation_small ( FILE* dst )
2304{
2305 Int32 cftab[257], cftabAlso[257];
2306 Int32 i, j, tmp, tPos;
2307 UChar ch;
2308
2309 /*--
2310 We assume here that the global array unzftab will
2311 already be holding the frequency counts for
2312 ll8[0 .. last].
2313 --*/
2314
2315 /*-- Set up cftab to facilitate generation of indexIntoF --*/
2316 cftab[0] = 0;
2317 for (i = 1; i <= 256; i++) cftab[i] = unzftab[i-1];
2318 for (i = 1; i <= 256; i++) cftab[i] += cftab[i-1];
2319
2320 /*-- Make a copy of it, used in generation of T --*/
2321 for (i = 0; i <= 256; i++) cftabAlso[i] = cftab[i];
2322
2323 /*-- compute the T vector --*/
2324 for (i = 0; i <= last; i++) {
2325 ch = (UChar)ll16[i];
2326 SET_LL(i, cftabAlso[ch]);
2327 cftabAlso[ch]++;
2328 }
2329
2330 /*--
2331 Compute T^(-1) by pointer reversal on T. This is rather
2332 subtle, in that, if the original block was two or more
2333 (in general, N) concatenated copies of the same thing,
2334 the T vector will consist of N cycles, each of length
2335 blocksize / N, and decoding will involve traversing one
2336 of these cycles N times. Which particular cycle doesn't
2337 matter -- they are all equivalent. The tricky part is to
2338 make sure that the pointer reversal creates a correct
2339 reversed cycle for us to traverse. So, the code below
2340 simply reverses whatever cycle origPtr happens to fall into,
2341 without regard to the cycle length. That gives one reversed
2342 cycle, which for normal blocks, is the entire block-size long.
2343 For repeated blocks, it will be interspersed with the other
2344 N-1 non-reversed cycles. Providing that the F-subscripting
2345 phase which follows starts at origPtr, all then works ok.
2346 --*/
2347 i = origPtr;
2348 j = GET_LL(i);
2349 do {
2350 tmp = GET_LL(j);
2351 SET_LL(j, i);
2352 i = j;
2353 j = tmp;
2354 }
2355 while (i != origPtr);
2356
2357 /*--
2358 We recreate the original by subscripting F through T^(-1).
2359 The run-length-decoder below requires characters incrementally,
2360 so tPos is set to a starting value, and is updated by
2361 the GET_SMALL macro.
2362 --*/
2363 tPos = origPtr;
2364
2365 /*-------------------------------------------------*/
2366 /*--
2367 This is pretty much a verbatim copy of the
2368 run-length decoder present in the distribution
2369 bzip-0.21; it has to be here to avoid creating
2370 block[] as an intermediary structure. As in 0.21,
2371 this code derives from some sent to me by
2372 Christian von Roques.
2373
2374 It allows dst==NULL, so as to support the test (-t)
2375 option without slowing down the fast decompression
2376 code.
2377 --*/
2378 {
2379 IntNative retVal;
2380 Int32 i2, count, chPrev, ch2;
2381 UInt32 localCrc;
2382
2383 count = 0;
2384 i2 = 0;
2385 ch2 = 256; /*-- not a char and not EOF --*/
2386 localCrc = getGlobalCRC();
2387
2388 {
2389 RAND_DECLS;
2390 while ( i2 <= last ) {
2391 chPrev = ch2;
2392 GET_SMALL(ch2);
2393 if (blockRandomised) {
2394 RAND_UPD_MASK;
2395 ch2 ^= (UInt32)RAND_MASK;
2396 }
2397 i2++;
2398
2399 if (dst)
2400 retVal = putc ( ch2, dst );
2401
2402 UPDATE_CRC ( localCrc, (UChar)ch2 );
2403
2404 if (ch2 != chPrev) {
2405 count = 1;
2406 } else {
2407 count++;
2408 if (count >= 4) {
2409 Int32 j2;
2410 UChar z;
2411 GET_SMALL(z);
2412 if (blockRandomised) {
2413 RAND_UPD_MASK;
2414 z ^= RAND_MASK;
2415 }
2416 for (j2 = 0; j2 < (Int32)z; j2++) {
2417 if (dst) retVal = putc (ch2, dst);
2418 UPDATE_CRC ( localCrc, (UChar)ch2 );
2419 }
2420 i2++;
2421 count = 0;
2422 }
2423 }
2424 }
2425 }
2426
2427 setGlobalCRC ( localCrc );
2428 }
2429 /*-- end of the in-line run-length-decoder. --*/
2430}
2431#undef GET_SMALL
2432
2433
2434/*---------------------------------------------*/
2435
2436#define GET_FAST(cccc) \
2437 \
2438 cccc = ll8[tPos]; \
2439 tPos = tt[tPos];
2440
2441
2442void undoReversibleTransformation_fast ( FILE* dst )
2443{
2444 Int32 cftab[257];
2445 Int32 i, tPos;
2446 UChar ch;
2447
2448 /*--
2449 We assume here that the global array unzftab will
2450 already be holding the frequency counts for
2451 ll8[0 .. last].
2452 --*/
2453
2454 /*-- Set up cftab to facilitate generation of T^(-1) --*/
2455 cftab[0] = 0;
2456 for (i = 1; i <= 256; i++) cftab[i] = unzftab[i-1];
2457 for (i = 1; i <= 256; i++) cftab[i] += cftab[i-1];
2458
2459 /*-- compute the T^(-1) vector --*/
2460 for (i = 0; i <= last; i++) {
2461 ch = (UChar)ll8[i];
2462 tt[cftab[ch]] = i;
2463 cftab[ch]++;
2464 }
2465
2466 /*--
2467 We recreate the original by subscripting L through T^(-1).
2468 The run-length-decoder below requires characters incrementally,
2469 so tPos is set to a starting value, and is updated by
2470 the GET_FAST macro.
2471 --*/
2472 tPos = tt[origPtr];
2473
2474 /*-------------------------------------------------*/
2475 /*--
2476 This is pretty much a verbatim copy of the
2477 run-length decoder present in the distribution
2478 bzip-0.21; it has to be here to avoid creating
2479 block[] as an intermediary structure. As in 0.21,
2480 this code derives from some sent to me by
2481 Christian von Roques.
2482 --*/
2483 {
2484 IntNative retVal;
2485 Int32 i2, count, chPrev, ch2;
2486 UInt32 localCrc;
2487
2488 count = 0;
2489 i2 = 0;
2490 ch2 = 256; /*-- not a char and not EOF --*/
2491 localCrc = getGlobalCRC();
2492
2493 if (blockRandomised) {
2494 RAND_DECLS;
2495 while ( i2 <= last ) {
2496 chPrev = ch2;
2497 GET_FAST(ch2);
2498 RAND_UPD_MASK;
2499 ch2 ^= (UInt32)RAND_MASK;
2500 i2++;
2501
2502 retVal = putc ( ch2, dst );
2503 UPDATE_CRC ( localCrc, (UChar)ch2 );
2504
2505 if (ch2 != chPrev) {
2506 count = 1;
2507 } else {
2508 count++;
2509 if (count >= 4) {
2510 Int32 j2;
2511 UChar z;
2512 GET_FAST(z);
2513 RAND_UPD_MASK;
2514 z ^= RAND_MASK;
2515 for (j2 = 0; j2 < (Int32)z; j2++) {
2516 retVal = putc (ch2, dst);
2517 UPDATE_CRC ( localCrc, (UChar)ch2 );
2518 }
2519 i2++;
2520 count = 0;
2521 }
2522 }
2523 }
2524
2525 } else {
2526
2527 while ( i2 <= last ) {
2528 chPrev = ch2;
2529 GET_FAST(ch2);
2530 i2++;
2531
2532 retVal = putc ( ch2, dst );
2533 UPDATE_CRC ( localCrc, (UChar)ch2 );
2534
2535 if (ch2 != chPrev) {
2536 count = 1;
2537 } else {
2538 count++;
2539 if (count >= 4) {
2540 Int32 j2;
2541 UChar z;
2542 GET_FAST(z);
2543 for (j2 = 0; j2 < (Int32)z; j2++) {
2544 retVal = putc (ch2, dst);
2545 UPDATE_CRC ( localCrc, (UChar)ch2 );
2546 }
2547 i2++;
2548 count = 0;
2549 }
2550 }
2551 }
2552
2553 } /*-- if (blockRandomised) --*/
2554
2555 setGlobalCRC ( localCrc );
2556 }
2557 /*-- end of the in-line run-length-decoder. --*/
2558}
2559#undef GET_FAST
2560
2561
2562/*---------------------------------------------------*/
2563/*--- The block loader and RLEr ---*/
2564/*---------------------------------------------------*/
2565
2566/*---------------------------------------------*/
2567/* Top 16: run length, 1 to 255.
2568* Lower 16: the char, or MY_EOF for EOF.
2569*/
2570
2571#define MY_EOF 257
2572
2573INLINE Int32 getRLEpair ( FILE* src )
2574{
2575 Int32 runLength;
2576 IntNative ch, chLatest;
2577
2578 ch = getc ( src );
2579
2580 /*--- Because I have no idea what kind of a value EOF is. ---*/
2581 if (ch == EOF) {
2582 ERROR_IF_NOT_ZERO ( errno );
2583 return (1 << 16) | MY_EOF;
2584 }
2585
2586 runLength = 0;
2587 do {
2588 chLatest = getc ( src );
2589 runLength++;
2590 bytesIn++;
2591 }
2592 while (ch == chLatest && runLength < 255);
2593
2594 if ( chLatest != EOF ) {
2595 if ( ungetc ( chLatest, src ) == EOF )
2596 panic ( "getRLEpair: ungetc failed" );
2597 } else {
2598 ERROR_IF_NOT_ZERO ( errno );
2599 }
2600
2601 /*--- Conditional is just a speedup hack. ---*/
2602 if (runLength == 1) {
2603 UPDATE_CRC ( globalCrc, (UChar)ch );
2604 return (1 << 16) | ch;
2605 } else {
2606 Int32 i;
2607 for (i = 1; i <= runLength; i++)
2608 UPDATE_CRC ( globalCrc, (UChar)ch );
2609 return (runLength << 16) | ch;
2610 }
2611}
2612
2613
2614/*---------------------------------------------*/
2615void loadAndRLEsource ( FILE* src )
2616{
2617 Int32 ch, allowableBlockSize, i;
2618
2619 last = -1;
2620 ch = 0;
2621
2622 for (i = 0; i < 256; i++) inUse[i] = False;
2623
2624 /*--- 20 is just a paranoia constant ---*/
2625 allowableBlockSize = 100000 * blockSize100k - 20;
2626
2627 while (last < allowableBlockSize && ch != MY_EOF) {
2628 Int32 rlePair, runLen;
2629 rlePair = getRLEpair ( src );
2630 ch = rlePair & 0xFFFF;
2631 runLen = (UInt32)rlePair >> 16;
2632
2633 #if DEBUG
2634 assert (runLen >= 1 && runLen <= 255);
2635 #endif
2636
2637 if (ch != MY_EOF) {
2638 inUse[ch] = True;
2639 switch (runLen) {
2640 case 1:
2641 last++; block[last] = (UChar)ch; break;
2642 case 2:
2643 last++; block[last] = (UChar)ch;
2644 last++; block[last] = (UChar)ch; break;
2645 case 3:
2646 last++; block[last] = (UChar)ch;
2647 last++; block[last] = (UChar)ch;
2648 last++; block[last] = (UChar)ch; break;
2649 default:
2650 inUse[runLen-4] = True;
2651 last++; block[last] = (UChar)ch;
2652 last++; block[last] = (UChar)ch;
2653 last++; block[last] = (UChar)ch;
2654 last++; block[last] = (UChar)ch;
2655 last++; block[last] = (UChar)(runLen-4); break;
2656 }
2657 }
2658 }
2659}
2660
2661
2662/*---------------------------------------------------*/
2663/*--- Processing of complete files and streams ---*/
2664/*---------------------------------------------------*/
2665
2666/*---------------------------------------------*/
2667void compressStream ( FILE *stream, FILE *zStream )
2668{
2669 IntNative retVal;
2670 UInt32 blockCRC, combinedCRC;
2671 Int32 blockNo;
2672
2673 blockNo = 0;
2674 bytesIn = 0;
2675 bytesOut = 0;
2676 nBlocksRandomised = 0;
2677
2678 SET_BINARY_MODE(stream);
2679 SET_BINARY_MODE(zStream);
2680
2681 ERROR_IF_NOT_ZERO ( ferror(stream) );
2682 ERROR_IF_NOT_ZERO ( ferror(zStream) );
2683
2684 bsSetStream ( zStream, True );
2685
2686 /*--- Write `magic' bytes B and Z,
2687 then h indicating file-format == huffmanised,
2688 followed by a digit indicating blockSize100k.
2689 ---*/
2690 bsPutUChar ( 'B' );
2691 bsPutUChar ( 'Z' );
2692 bsPutUChar ( 'h' );
2693 bsPutUChar ( '0' + blockSize100k );
2694
2695 combinedCRC = 0;
2696
2697 if (verbosity >= 2) fprintf ( stderr, "\n" );
2698
2699 while (True) {
2700
2701 blockNo++;
2702 initialiseCRC ();
2703 loadAndRLEsource ( stream );
2704 ERROR_IF_NOT_ZERO ( ferror(stream) );
2705 if (last == -1) break;
2706
2707 blockCRC = getFinalCRC ();
2708 combinedCRC = (combinedCRC << 1) | (combinedCRC >> 31);
2709 combinedCRC ^= blockCRC;
2710
2711 if (verbosity >= 2)
2712 fprintf ( stderr, " block %d: crc = 0x%8x, combined CRC = 0x%8x, size = %d",
2713 blockNo, blockCRC, combinedCRC, last+1 );
2714
2715 /*-- sort the block and establish posn of original string --*/
2716 doReversibleTransformation ();
2717
2718 /*--
2719 A 6-byte block header, the value chosen arbitrarily
2720 as 0x314159265359 :-). A 32 bit value does not really
2721 give a strong enough guarantee that the value will not
2722 appear by chance in the compressed datastream. Worst-case
2723 probability of this event, for a 900k block, is about
2724 2.0e-3 for 32 bits, 1.0e-5 for 40 bits and 4.0e-8 for 48 bits.
2725 For a compressed file of size 100Gb -- about 100000 blocks --
2726 only a 48-bit marker will do. NB: normal compression/
2727 decompression do *not* rely on these statistical properties.
2728 They are only important when trying to recover blocks from
2729 damaged files.
2730 --*/
2731 bsPutUChar ( 0x31 ); bsPutUChar ( 0x41 );
2732 bsPutUChar ( 0x59 ); bsPutUChar ( 0x26 );
2733 bsPutUChar ( 0x53 ); bsPutUChar ( 0x59 );
2734
2735 /*-- Now the block's CRC, so it is in a known place. --*/
2736 bsPutUInt32 ( blockCRC );
2737
2738 /*-- Now a single bit indicating randomisation. --*/
2739 if (blockRandomised) {
2740 bsW(1,1); nBlocksRandomised++;
2741 } else
2742 bsW(1,0);
2743
2744 /*-- Finally, block's contents proper. --*/
2745 moveToFrontCodeAndSend ();
2746
2747 ERROR_IF_NOT_ZERO ( ferror(zStream) );
2748 }
2749
2750 if (verbosity >= 2 && nBlocksRandomised > 0)
2751 fprintf ( stderr, " %d block%s needed randomisation\n",
2752 nBlocksRandomised,
2753 nBlocksRandomised == 1 ? "" : "s" );
2754
2755 /*--
2756 Now another magic 48-bit number, 0x177245385090, to
2757 indicate the end of the last block. (sqrt(pi), if
2758 you want to know. I did want to use e, but it contains
2759 too much repetition -- 27 18 28 18 28 46 -- for me
2760 to feel statistically comfortable. Call me paranoid.)
2761 --*/
2762
2763 bsPutUChar ( 0x17 ); bsPutUChar ( 0x72 );
2764 bsPutUChar ( 0x45 ); bsPutUChar ( 0x38 );
2765 bsPutUChar ( 0x50 ); bsPutUChar ( 0x90 );
2766
2767 bsPutUInt32 ( combinedCRC );
2768 if (verbosity >= 2)
2769 fprintf ( stderr, " final combined CRC = 0x%x\n ", combinedCRC );
2770
2771 /*-- Close the files in an utterly paranoid way. --*/
2772 bsFinishedWithStream ();
2773
2774 ERROR_IF_NOT_ZERO ( ferror(zStream) );
2775 retVal = fflush ( zStream );
2776 ERROR_IF_EOF ( retVal );
2777 retVal = fclose ( zStream );
2778 ERROR_IF_EOF ( retVal );
2779
2780 ERROR_IF_NOT_ZERO ( ferror(stream) );
2781 retVal = fclose ( stream );
2782 ERROR_IF_EOF ( retVal );
2783
2784 if (bytesIn == 0) bytesIn = 1;
2785 if (bytesOut == 0) bytesOut = 1;
2786
2787 if (verbosity >= 1)
2788 fprintf ( stderr, "%6.3f:1, %6.3f bits/byte, "
2789 "%5.2f%% saved, %d in, %d out.\n",
2790 (float)bytesIn / (float)bytesOut,
2791 (8.0 * (float)bytesOut) / (float)bytesIn,
2792 100.0 * (1.0 - (float)bytesOut / (float)bytesIn),
2793 bytesIn,
2794 bytesOut
2795 );
2796}
2797
2798
2799/*---------------------------------------------*/
2800Bool uncompressStream ( FILE *zStream, FILE *stream )
2801{
2802 UChar magic1, magic2, magic3, magic4;
2803 UChar magic5, magic6;
2804 UInt32 storedBlockCRC, storedCombinedCRC;
2805 UInt32 computedBlockCRC, computedCombinedCRC;
2806 Int32 currBlockNo;
2807 IntNative retVal;
2808
2809 SET_BINARY_MODE(stream);
2810 SET_BINARY_MODE(zStream);
2811
2812 ERROR_IF_NOT_ZERO ( ferror(stream) );
2813 ERROR_IF_NOT_ZERO ( ferror(zStream) );
2814
2815 bsSetStream ( zStream, False );
2816
2817 /*--
2818 A bad magic number is `recoverable from';
2819 return with False so the caller skips the file.
2820 --*/
2821 magic1 = bsGetUChar ();
2822 magic2 = bsGetUChar ();
2823 magic3 = bsGetUChar ();
2824 magic4 = bsGetUChar ();
2825 if (magic1 != 'B' ||
2826 magic2 != 'Z' ||
2827 magic3 != 'h' ||
2828 magic4 < '1' ||
2829 magic4 > '9') {
2830 bsFinishedWithStream();
2831 retVal = fclose ( stream );
2832 ERROR_IF_EOF ( retVal );
2833 return False;
2834 }
2835
2836 setDecompressStructureSizes ( magic4 - '0' );
2837 computedCombinedCRC = 0;
2838
2839 if (verbosity >= 2) fprintf ( stderr, "\n " );
2840 currBlockNo = 0;
2841
2842 while (True) {
2843 magic1 = bsGetUChar ();
2844 magic2 = bsGetUChar ();
2845 magic3 = bsGetUChar ();
2846 magic4 = bsGetUChar ();
2847 magic5 = bsGetUChar ();
2848 magic6 = bsGetUChar ();
2849 if (magic1 == 0x17 && magic2 == 0x72 &&
2850 magic3 == 0x45 && magic4 == 0x38 &&
2851 magic5 == 0x50 && magic6 == 0x90) break;
2852
2853 if (magic1 != 0x31 || magic2 != 0x41 ||
2854 magic3 != 0x59 || magic4 != 0x26 ||
2855 magic5 != 0x53 || magic6 != 0x59) badBlockHeader();
2856
2857 storedBlockCRC = bsGetUInt32 ();
2858
2859 if (bsR(1) == 1)
2860 blockRandomised = True; else
2861 blockRandomised = False;
2862
2863 currBlockNo++;
2864 if (verbosity >= 2)
2865 fprintf ( stderr, "[%d: huff+mtf ", currBlockNo );
2866 getAndMoveToFrontDecode ();
2867 ERROR_IF_NOT_ZERO ( ferror(zStream) );
2868
2869 initialiseCRC();
2870 if (verbosity >= 2) fprintf ( stderr, "rt+rld" );
2871 if (smallMode)
2872 undoReversibleTransformation_small ( stream );
2873 else
2874 undoReversibleTransformation_fast ( stream );
2875
2876 ERROR_IF_NOT_ZERO ( ferror(stream) );
2877
2878 computedBlockCRC = getFinalCRC();
2879 if (verbosity >= 3)
2880 fprintf ( stderr, " {0x%x, 0x%x}", storedBlockCRC, computedBlockCRC );
2881 if (verbosity >= 2) fprintf ( stderr, "] " );
2882
2883 /*-- A bad CRC is considered a fatal error. --*/
2884 if (storedBlockCRC != computedBlockCRC)
2885 crcError ( storedBlockCRC, computedBlockCRC );
2886
2887 computedCombinedCRC = (computedCombinedCRC << 1) | (computedCombinedCRC >> 31);
2888 computedCombinedCRC ^= computedBlockCRC;
2889 };
2890
2891 if (verbosity >= 2) fprintf ( stderr, "\n " );
2892
2893 storedCombinedCRC = bsGetUInt32 ();
2894 if (verbosity >= 2)
2895 fprintf ( stderr,
2896 "combined CRCs: stored = 0x%x, computed = 0x%x\n ",
2897 storedCombinedCRC, computedCombinedCRC );
2898 if (storedCombinedCRC != computedCombinedCRC)
2899 crcError ( storedCombinedCRC, computedCombinedCRC );
2900
2901
2902 bsFinishedWithStream ();
2903 ERROR_IF_NOT_ZERO ( ferror(zStream) );
2904 retVal = fclose ( zStream );
2905 ERROR_IF_EOF ( retVal );
2906
2907 ERROR_IF_NOT_ZERO ( ferror(stream) );
2908 retVal = fflush ( stream );
2909 ERROR_IF_NOT_ZERO ( retVal );
2910 if (stream != stdout) {
2911 retVal = fclose ( stream );
2912 ERROR_IF_EOF ( retVal );
2913 }
2914 return True;
2915}
2916
2917
2918/*---------------------------------------------*/
2919Bool testStream ( FILE *zStream )
2920{
2921 UChar magic1, magic2, magic3, magic4;
2922 UChar magic5, magic6;
2923 UInt32 storedBlockCRC, storedCombinedCRC;
2924 UInt32 computedBlockCRC, computedCombinedCRC;
2925 Int32 currBlockNo;
2926 IntNative retVal;
2927
2928 SET_BINARY_MODE(zStream);
2929 ERROR_IF_NOT_ZERO ( ferror(zStream) );
2930
2931 bsSetStream ( zStream, False );
2932
2933 magic1 = bsGetUChar ();
2934 magic2 = bsGetUChar ();
2935 magic3 = bsGetUChar ();
2936 magic4 = bsGetUChar ();
2937 if (magic1 != 'B' ||
2938 magic2 != 'Z' ||
2939 magic3 != 'h' ||
2940 magic4 < '1' ||
2941 magic4 > '9') {
2942 bsFinishedWithStream();
2943 fclose ( zStream );
2944 fprintf ( stderr, "\n%s: bad magic number (ie, not created by bzip2)\n",
2945 inName );
2946 return False;
2947 }
2948
2949 smallMode = True;
2950 setDecompressStructureSizes ( magic4 - '0' );
2951 computedCombinedCRC = 0;
2952
2953 if (verbosity >= 2) fprintf ( stderr, "\n" );
2954 currBlockNo = 0;
2955
2956 while (True) {
2957 magic1 = bsGetUChar ();
2958 magic2 = bsGetUChar ();
2959 magic3 = bsGetUChar ();
2960 magic4 = bsGetUChar ();
2961 magic5 = bsGetUChar ();
2962 magic6 = bsGetUChar ();
2963 if (magic1 == 0x17 && magic2 == 0x72 &&
2964 magic3 == 0x45 && magic4 == 0x38 &&
2965 magic5 == 0x50 && magic6 == 0x90) break;
2966
2967 currBlockNo++;
2968 if (magic1 != 0x31 || magic2 != 0x41 ||
2969 magic3 != 0x59 || magic4 != 0x26 ||
2970 magic5 != 0x53 || magic6 != 0x59) {
2971 bsFinishedWithStream();
2972 fclose ( zStream );
2973 fprintf ( stderr,
2974 "\n%s, block %d: bad header (not == 0x314159265359)\n",
2975 inName, currBlockNo );
2976 return False;
2977 }
2978 storedBlockCRC = bsGetUInt32 ();
2979
2980 if (bsR(1) == 1)
2981 blockRandomised = True; else
2982 blockRandomised = False;
2983
2984 if (verbosity >= 2)
2985 fprintf ( stderr, " block [%d: huff+mtf ", currBlockNo );
2986 getAndMoveToFrontDecode ();
2987 ERROR_IF_NOT_ZERO ( ferror(zStream) );
2988
2989 initialiseCRC();
2990 if (verbosity >= 2) fprintf ( stderr, "rt+rld" );
2991 undoReversibleTransformation_small ( NULL );
2992
2993 computedBlockCRC = getFinalCRC();
2994 if (verbosity >= 3)
2995 fprintf ( stderr, " {0x%x, 0x%x}", storedBlockCRC, computedBlockCRC );
2996 if (verbosity >= 2) fprintf ( stderr, "] " );
2997
2998 if (storedBlockCRC != computedBlockCRC) {
2999 bsFinishedWithStream();
3000 fclose ( zStream );
3001 fprintf ( stderr, "\n%s, block %d: computed CRC does not match stored one\n",
3002 inName, currBlockNo );
3003 return False;
3004 }
3005
3006 if (verbosity >= 2) fprintf ( stderr, "ok\n" );
3007 computedCombinedCRC = (computedCombinedCRC << 1) | (computedCombinedCRC >> 31);
3008 computedCombinedCRC ^= computedBlockCRC;
3009 };
3010
3011 storedCombinedCRC = bsGetUInt32 ();
3012 if (verbosity >= 2)
3013 fprintf ( stderr,
3014 " combined CRCs: stored = 0x%x, computed = 0x%x\n ",
3015 storedCombinedCRC, computedCombinedCRC );
3016 if (storedCombinedCRC != computedCombinedCRC) {
3017 bsFinishedWithStream();
3018 fclose ( zStream );
3019 fprintf ( stderr, "\n%s: computed CRC does not match stored one\n",
3020 inName );
3021 return False;
3022 }
3023
3024 bsFinishedWithStream ();
3025 ERROR_IF_NOT_ZERO ( ferror(zStream) );
3026 retVal = fclose ( zStream );
3027 ERROR_IF_EOF ( retVal );
3028 return True;
3029}
3030
3031
3032
3033/*---------------------------------------------------*/
3034/*--- Error [non-] handling grunge ---*/
3035/*---------------------------------------------------*/
3036
3037/*---------------------------------------------*/
3038void cadvise ( void )
3039{
3040 fprintf (
3041 stderr,
3042 "\nIt is possible that the compressed file(s) have become corrupted.\n"
3043 "You can use the -tvv option to test integrity of such files.\n\n"
3044 "You can use the `bzip2recover' program to *attempt* to recover\n"
3045 "data from undamaged sections of corrupted files.\n\n"
3046 );
3047}
3048
3049
3050/*---------------------------------------------*/
3051void showFileNames ( void )
3052{
3053 fprintf (
3054 stderr,
3055 "\tInput file = %s, output file = %s\n",
3056 inName==NULL ? "(null)" : inName,
3057 outName==NULL ? "(null)" : outName
3058 );
3059}
3060
3061
3062/*---------------------------------------------*/
3063void cleanUpAndFail ( Int32 ec )
3064{
3065 IntNative retVal;
3066
3067 if ( srcMode == SM_F2F && opMode != OM_TEST ) {
3068 fprintf ( stderr, "%s: Deleting output file %s, if it exists.\n",
3069 progName,
3070 outName==NULL ? "(null)" : outName );
3071 if (outputHandleJustInCase != NULL)
3072 fclose ( outputHandleJustInCase );
3073 retVal = remove ( outName );
3074 if (retVal != 0)
3075 fprintf ( stderr,
3076 "%s: WARNING: deletion of output file (apparently) failed.\n",
3077 progName );
3078 }
3079 if (numFileNames > 0 && numFilesProcessed < numFileNames) {
3080 fprintf ( stderr,
3081 "%s: WARNING: some files have not been processed:\n"
3082 "\t%d specified on command line, %d not processed yet.\n\n",
3083 progName, numFileNames,
3084 numFileNames - numFilesProcessed );
3085 }
3086 exit ( ec );
3087}
3088
3089
3090/*---------------------------------------------*/
3091void panic ( Char* s )
3092{
3093 fprintf ( stderr,
3094 "\n%s: PANIC -- internal consistency error:\n"
3095 "\t%s\n"
3096 "\tThis is a BUG. Please report it to me at:\n"
3097 "\tjseward@acm.org\n",
3098 progName, s );
3099 showFileNames();
3100 cleanUpAndFail( 3 );
3101}
3102
3103
3104/*---------------------------------------------*/
3105void badBGLengths ( void )
3106{
3107 fprintf ( stderr,
3108 "\n%s: error when reading background model code lengths,\n"
3109 "\twhich probably means the compressed file is corrupted.\n",
3110 progName );
3111 showFileNames();
3112 cadvise();
3113 cleanUpAndFail( 2 );
3114}
3115
3116
3117/*---------------------------------------------*/
3118void crcError ( UInt32 crcStored, UInt32 crcComputed )
3119{
3120 fprintf ( stderr,
3121 "\n%s: Data integrity error when decompressing.\n"
3122 "\tStored CRC = 0x%x, computed CRC = 0x%x\n",
3123 progName, crcStored, crcComputed );
3124 showFileNames();
3125 cadvise();
3126 cleanUpAndFail( 2 );
3127}
3128
3129
3130/*---------------------------------------------*/
3131void compressedStreamEOF ( void )
3132{
3133 fprintf ( stderr,
3134 "\n%s: Compressed file ends unexpectedly;\n\t"
3135 "perhaps it is corrupted? *Possible* reason follows.\n",
3136 progName );
3137 perror ( progName );
3138 showFileNames();
3139 cadvise();
3140 cleanUpAndFail( 2 );
3141}
3142
3143
3144/*---------------------------------------------*/
3145void ioError ( )
3146{
3147 fprintf ( stderr,
3148 "\n%s: I/O or other error, bailing out. Possible reason follows.\n",
3149 progName );
3150 perror ( progName );
3151 showFileNames();
3152 cleanUpAndFail( 1 );
3153}
3154
3155
3156/*---------------------------------------------*/
3157void blockOverrun ()
3158{
3159 fprintf ( stderr,
3160 "\n%s: block overrun during decompression,\n"
3161 "\twhich probably means the compressed file\n"
3162 "\tis corrupted.\n",
3163 progName );
3164 showFileNames();
3165 cadvise();
3166 cleanUpAndFail( 2 );
3167}
3168
3169
3170/*---------------------------------------------*/
3171void badBlockHeader ()
3172{
3173 fprintf ( stderr,
3174 "\n%s: bad block header in the compressed file,\n"
3175 "\twhich probably means it is corrupted.\n",
3176 progName );
3177 showFileNames();
3178 cadvise();
3179 cleanUpAndFail( 2 );
3180}
3181
3182
3183/*---------------------------------------------*/
3184void bitStreamEOF ()
3185{
3186 fprintf ( stderr,
3187 "\n%s: read past the end of compressed data,\n"
3188 "\twhich probably means it is corrupted.\n",
3189 progName );
3190 showFileNames();
3191 cadvise();
3192 cleanUpAndFail( 2 );
3193}
3194
3195
3196/*---------------------------------------------*/
3197void mySignalCatcher ( IntNative n )
3198{
3199 fprintf ( stderr,
3200 "\n%s: Control-C (or similar) caught, quitting.\n",
3201 progName );
3202 cleanUpAndFail(1);
3203}
3204
3205
3206/*---------------------------------------------*/
3207void mySIGSEGVorSIGBUScatcher ( IntNative n )
3208{
3209 if (opMode == OM_Z)
3210 fprintf ( stderr,
3211 "\n%s: Caught a SIGSEGV or SIGBUS whilst compressing,\n"
3212 "\twhich probably indicates a bug in bzip2. Please\n"
3213 "\treport it to me at: jseward@acm.org\n",
3214 progName );
3215 else
3216 fprintf ( stderr,
3217 "\n%s: Caught a SIGSEGV or SIGBUS whilst decompressing,\n"
3218 "\twhich probably indicates that the compressed data\n"
3219 "\tis corrupted.\n",
3220 progName );
3221
3222 showFileNames();
3223 if (opMode == OM_Z)
3224 cleanUpAndFail( 3 ); else
3225 { cadvise(); cleanUpAndFail( 2 ); }
3226}
3227
3228
3229/*---------------------------------------------*/
3230void uncompressOutOfMemory ( Int32 draw, Int32 blockSize )
3231{
3232 fprintf ( stderr,
3233 "\n%s: Can't allocate enough memory for decompression.\n"
3234 "\tRequested %d bytes for a block size of %d.\n"
3235 "\tTry selecting space-economic decompress (with flag -s)\n"
3236 "\tand failing that, find a machine with more memory.\n",
3237 progName, draw, blockSize );
3238 showFileNames();
3239 cleanUpAndFail(1);
3240}
3241
3242
3243/*---------------------------------------------*/
3244void compressOutOfMemory ( Int32 draw, Int32 blockSize )
3245{
3246 fprintf ( stderr,
3247 "\n%s: Can't allocate enough memory for compression.\n"
3248 "\tRequested %d bytes for a block size of %d.\n"
3249 "\tTry selecting a small block size (with flag -s).\n",
3250 progName, draw, blockSize );
3251 showFileNames();
3252 cleanUpAndFail(1);
3253}
3254
3255
3256/*---------------------------------------------------*/
3257/*--- The main driver machinery ---*/
3258/*---------------------------------------------------*/
3259
3260/*---------------------------------------------*/
3261void pad ( Char *s )
3262{
3263 Int32 i;
3264 if ( (Int32)strlen(s) >= longestFileName ) return;
3265 for (i = 1; i <= longestFileName - (Int32)strlen(s); i++)
3266 fprintf ( stderr, " " );
3267}
3268
3269
3270/*---------------------------------------------*/
3271Bool fileExists ( Char* name )
3272{
3273 FILE *tmp = fopen ( name, "rb" );
3274 Bool exists = (tmp != NULL);
3275 if (tmp != NULL) fclose ( tmp );
3276 return exists;
3277}
3278
3279
3280/*---------------------------------------------*/
3281/*--
3282 if in doubt, return True
3283--*/
3284Bool notABogStandardFile ( Char* name )
3285{
3286 IntNative i;
3287 struct MY_STAT statBuf;
3288
3289 i = MY_LSTAT ( name, &statBuf );
3290 if (i != 0) return True;
3291 if (MY_S_IFREG(statBuf.st_mode)) return False;
3292 return True;
3293}
3294
3295
3296/*---------------------------------------------*/
3297void copyDateAndPermissions ( Char *srcName, Char *dstName )
3298{
3299 #if BZ_UNIX
3300 IntNative retVal;
3301 struct MY_STAT statBuf;
3302 struct utimbuf uTimBuf;
3303
3304 retVal = MY_LSTAT ( srcName, &statBuf );
3305 ERROR_IF_NOT_ZERO ( retVal );
3306 uTimBuf.actime = statBuf.st_atime;
3307 uTimBuf.modtime = statBuf.st_mtime;
3308
3309 retVal = chmod ( dstName, statBuf.st_mode );
3310 ERROR_IF_NOT_ZERO ( retVal );
3311 retVal = utime ( dstName, &uTimBuf );
3312 ERROR_IF_NOT_ZERO ( retVal );
3313 #endif
3314}
3315
3316
3317/*---------------------------------------------*/
3318Bool endsInBz2 ( Char* name )
3319{
3320 Int32 n = strlen ( name );
3321 if (n <= 4) return False;
3322 return
3323 (name[n-4] == '.' &&
3324 name[n-3] == 'b' &&
3325 name[n-2] == 'z' &&
3326 name[n-1] == '2');
3327}
3328
3329
3330/*---------------------------------------------*/
3331Bool containsDubiousChars ( Char* name )
3332{
3333 Bool cdc = False;
3334 for (; *name != '\0'; name++)
3335 if (*name == '?' || *name == '*') cdc = True;
3336 return cdc;
3337}
3338
3339
3340/*---------------------------------------------*/
3341void compress ( Char *name )
3342{
3343 FILE *inStr;
3344 FILE *outStr;
3345
3346 if (name == NULL && srcMode != SM_I2O)
3347 panic ( "compress: bad modes\n" );
3348
3349 switch (srcMode) {
3350 case SM_I2O: strcpy ( inName, "(stdin)" );
3351 strcpy ( outName, "(stdout)" ); break;
3352 case SM_F2F: strcpy ( inName, name );
3353 strcpy ( outName, name );
3354 strcat ( outName, ".bz2" ); break;
3355 case SM_F2O: strcpy ( inName, name );
3356 strcpy ( outName, "(stdout)" ); break;
3357 }
3358
3359 if ( srcMode != SM_I2O && containsDubiousChars ( inName ) ) {
3360 fprintf ( stderr, "%s: There are no files matching `%s'.\n",
3361 progName, inName );
3362 return;
3363 }
3364 if ( srcMode != SM_I2O && !fileExists ( inName ) ) {
3365 fprintf ( stderr, "%s: Input file %s doesn't exist, skipping.\n",
3366 progName, inName );
3367 return;
3368 }
3369 if ( srcMode != SM_I2O && endsInBz2 ( inName )) {
3370 fprintf ( stderr, "%s: Input file name %s ends in `.bz2', skipping.\n",
3371 progName, inName );
3372 return;
3373 }
3374 if ( srcMode != SM_I2O && notABogStandardFile ( inName )) {
3375 fprintf ( stderr, "%s: Input file %s is not a normal file, skipping.\n",
3376 progName, inName );
3377 return;
3378 }
3379 if ( srcMode == SM_F2F && fileExists ( outName ) ) {
3380 fprintf ( stderr, "%s: Output file %s already exists, skipping.\n",
3381 progName, outName );
3382 return;
3383 }
3384
3385 switch ( srcMode ) {
3386
3387 case SM_I2O:
3388 inStr = stdin;
3389 outStr = stdout;
3390 if ( isatty ( fileno ( stdout ) ) ) {
3391 fprintf ( stderr,
3392 "%s: I won't write compressed data to a terminal.\n",
3393 progName );
3394 fprintf ( stderr, "%s: For help, type: `%s --help'.\n",
3395 progName, progName );
3396 return;
3397 };
3398 break;
3399
3400 case SM_F2O:
3401 inStr = fopen ( inName, "rb" );
3402 outStr = stdout;
3403 if ( isatty ( fileno ( stdout ) ) ) {
3404 fprintf ( stderr,
3405 "%s: I won't write compressed data to a terminal.\n",
3406 progName );
3407 fprintf ( stderr, "%s: For help, type: `%s --help'.\n",
3408 progName, progName );
3409 return;
3410 };
3411 if ( inStr == NULL ) {
3412 fprintf ( stderr, "%s: Can't open input file %s, skipping.\n",
3413 progName, inName );
3414 return;
3415 };
3416 break;
3417
3418 case SM_F2F:
3419 inStr = fopen ( inName, "rb" );
3420 outStr = fopen ( outName, "wb" );
3421 if ( outStr == NULL) {
3422 fprintf ( stderr, "%s: Can't create output file %s, skipping.\n",
3423 progName, outName );
3424 return;
3425 }
3426 if ( inStr == NULL ) {
3427 fprintf ( stderr, "%s: Can't open input file %s, skipping.\n",
3428 progName, inName );
3429 return;
3430 };
3431 break;
3432
3433 default:
3434 panic ( "compress: bad srcMode" );
3435 break;
3436 }
3437
3438 if (verbosity >= 1) {
3439 fprintf ( stderr, " %s: ", inName );
3440 pad ( inName );
3441 fflush ( stderr );
3442 }
3443
3444 /*--- Now the input and output handles are sane. Do the Biz. ---*/
3445 errno = 0;
3446 outputHandleJustInCase = outStr;
3447 compressStream ( inStr, outStr );
3448 outputHandleJustInCase = NULL;
3449
3450 /*--- If there was an I/O error, we won't get here. ---*/
3451 if ( srcMode == SM_F2F ) {
3452 copyDateAndPermissions ( inName, outName );
3453 if ( !keepInputFiles ) {
3454 IntNative retVal = remove ( inName );
3455 ERROR_IF_NOT_ZERO ( retVal );
3456 }
3457 }
3458}
3459
3460
3461/*---------------------------------------------*/
3462void uncompress ( Char *name )
3463{
3464 FILE *inStr;
3465 FILE *outStr;
3466 Bool magicNumberOK;
3467
3468 if (name == NULL && srcMode != SM_I2O)
3469 panic ( "uncompress: bad modes\n" );
3470
3471 switch (srcMode) {
3472 case SM_I2O: strcpy ( inName, "(stdin)" );
3473 strcpy ( outName, "(stdout)" ); break;
3474 case SM_F2F: strcpy ( inName, name );
3475 strcpy ( outName, name );
3476 if (endsInBz2 ( outName ))
3477 outName [ strlen ( outName ) - 4 ] = '\0';
3478 break;
3479 case SM_F2O: strcpy ( inName, name );
3480 strcpy ( outName, "(stdout)" ); break;
3481 }
3482
3483 if ( srcMode != SM_I2O && containsDubiousChars ( inName ) ) {
3484 fprintf ( stderr, "%s: There are no files matching `%s'.\n",
3485 progName, inName );
3486 return;
3487 }
3488 if ( srcMode != SM_I2O && !fileExists ( inName ) ) {
3489 fprintf ( stderr, "%s: Input file %s doesn't exist, skipping.\n",
3490 progName, inName );
3491 return;
3492 }
3493 if ( srcMode != SM_I2O && !endsInBz2 ( inName )) {
3494 fprintf ( stderr,
3495 "%s: Input file name %s doesn't end in `.bz2', skipping.\n",
3496 progName, inName );
3497 return;
3498 }
3499 if ( srcMode != SM_I2O && notABogStandardFile ( inName )) {
3500 fprintf ( stderr, "%s: Input file %s is not a normal file, skipping.\n",
3501 progName, inName );
3502 return;
3503 }
3504 if ( srcMode == SM_F2F && fileExists ( outName ) ) {
3505 fprintf ( stderr, "%s: Output file %s already exists, skipping.\n",
3506 progName, outName );
3507 return;
3508 }
3509
3510 switch ( srcMode ) {
3511
3512 case SM_I2O:
3513 inStr = stdin;
3514 outStr = stdout;
3515 if ( isatty ( fileno ( stdin ) ) ) {
3516 fprintf ( stderr,
3517 "%s: I won't read compressed data from a terminal.\n",
3518 progName );
3519 fprintf ( stderr, "%s: For help, type: `%s --help'.\n",
3520 progName, progName );
3521 return;
3522 };
3523 break;
3524
3525 case SM_F2O:
3526 inStr = fopen ( inName, "rb" );
3527 outStr = stdout;
3528 if ( inStr == NULL ) {
3529 fprintf ( stderr, "%s: Can't open input file %s, skipping.\n",
3530 progName, inName );
3531 return;
3532 };
3533 break;
3534
3535 case SM_F2F:
3536 inStr = fopen ( inName, "rb" );
3537 outStr = fopen ( outName, "wb" );
3538 if ( outStr == NULL) {
3539 fprintf ( stderr, "%s: Can't create output file %s, skipping.\n",
3540 progName, outName );
3541 return;
3542 }
3543 if ( inStr == NULL ) {
3544 fprintf ( stderr, "%s: Can't open input file %s, skipping.\n",
3545 progName, inName );
3546 return;
3547 };
3548 break;
3549
3550 default:
3551 panic ( "uncompress: bad srcMode" );
3552 break;
3553 }
3554
3555 if (verbosity >= 1) {
3556 fprintf ( stderr, " %s: ", inName );
3557 pad ( inName );
3558 fflush ( stderr );
3559 }
3560
3561 /*--- Now the input and output handles are sane. Do the Biz. ---*/
3562 errno = 0;
3563 outputHandleJustInCase = outStr;
3564 magicNumberOK = uncompressStream ( inStr, outStr );
3565 outputHandleJustInCase = NULL;
3566
3567 /*--- If there was an I/O error, we won't get here. ---*/
3568 if ( magicNumberOK ) {
3569 if ( srcMode == SM_F2F ) {
3570 copyDateAndPermissions ( inName, outName );
3571 if ( !keepInputFiles ) {
3572 IntNative retVal = remove ( inName );
3573 ERROR_IF_NOT_ZERO ( retVal );
3574 }
3575 }
3576 } else {
3577 if ( srcMode == SM_F2F ) {
3578 IntNative retVal = remove ( outName );
3579 ERROR_IF_NOT_ZERO ( retVal );
3580 }
3581 }
3582
3583 if ( magicNumberOK ) {
3584 if (verbosity >= 1)
3585 fprintf ( stderr, "done\n" );
3586 } else {
3587 if (verbosity >= 1)
3588 fprintf ( stderr, "not a bzip2 file, skipping.\n" ); else
3589 fprintf ( stderr,
3590 "%s: %s is not a bzip2 file, skipping.\n",
3591 progName, inName );
3592 }
3593
3594}
3595
3596
3597/*---------------------------------------------*/
3598void testf ( Char *name )
3599{
3600 FILE *inStr;
3601 Bool allOK;
3602
3603 if (name == NULL && srcMode != SM_I2O)
3604 panic ( "testf: bad modes\n" );
3605
3606 strcpy ( outName, "(none)" );
3607 switch (srcMode) {
3608 case SM_I2O: strcpy ( inName, "(stdin)" ); break;
3609 case SM_F2F: strcpy ( inName, name ); break;
3610 case SM_F2O: strcpy ( inName, name ); break;
3611 }
3612
3613 if ( srcMode != SM_I2O && containsDubiousChars ( inName ) ) {
3614 fprintf ( stderr, "%s: There are no files matching `%s'.\n",
3615 progName, inName );
3616 return;
3617 }
3618 if ( srcMode != SM_I2O && !fileExists ( inName ) ) {
3619 fprintf ( stderr, "%s: Input file %s doesn't exist, skipping.\n",
3620 progName, inName );
3621 return;
3622 }
3623 if ( srcMode != SM_I2O && !endsInBz2 ( inName )) {
3624 fprintf ( stderr,
3625 "%s: Input file name %s doesn't end in `.bz2', skipping.\n",
3626 progName, inName );
3627 return;
3628 }
3629 if ( srcMode != SM_I2O && notABogStandardFile ( inName )) {
3630 fprintf ( stderr, "%s: Input file %s is not a normal file, skipping.\n",
3631 progName, inName );
3632 return;
3633 }
3634
3635 switch ( srcMode ) {
3636
3637 case SM_I2O:
3638 if ( isatty ( fileno ( stdin ) ) ) {
3639 fprintf ( stderr,
3640 "%s: I won't read compressed data from a terminal.\n",
3641 progName );
3642 fprintf ( stderr, "%s: For help, type: `%s --help'.\n",
3643 progName, progName );
3644 return;
3645 };
3646 inStr = stdin;
3647 break;
3648
3649 case SM_F2O: case SM_F2F:
3650 inStr = fopen ( inName, "rb" );
3651 if ( inStr == NULL ) {
3652 fprintf ( stderr, "%s: Can't open input file %s, skipping.\n",
3653 progName, inName );
3654 return;
3655 };
3656 break;
3657
3658 default:
3659 panic ( "testf: bad srcMode" );
3660 break;
3661 }
3662
3663 if (verbosity >= 1) {
3664 fprintf ( stderr, " %s: ", inName );
3665 pad ( inName );
3666 fflush ( stderr );
3667 }
3668
3669 /*--- Now the input handle is sane. Do the Biz. ---*/
3670 errno = 0;
3671 allOK = testStream ( inStr );
3672
3673 if (allOK && verbosity >= 1) fprintf ( stderr, "ok\n" );
3674 if (!allOK) testFailsExist = True;
3675}
3676
3677
3678/*---------------------------------------------*/
3679void license ( void )
3680{
3681 fprintf ( stderr,
3682
3683 "bzip2, a block-sorting file compressor. "
3684 "Version 0.1pl0, 17-Aug-97.\n"
3685 " \n"
3686 " Copyright (C) 1996, 1997 by Julian Seward.\n"
3687 " \n"
3688 " This program is free software; you can redistribute it and/or modify\n"
3689 " it under the terms of the GNU General Public License as published by\n"
3690 " the Free Software Foundation; either version 2 of the License, or\n"
3691 " (at your option) any later version.\n"
3692 " \n"
3693 " This program is distributed in the hope that it will be useful,\n"
3694 " but WITHOUT ANY WARRANTY; without even the implied warranty of\n"
3695 " MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n"
3696 " GNU General Public License for more details.\n"
3697 " \n"
3698 " You should have received a copy of the GNU General Public License\n"
3699 " along with this program; if not, write to the Free Software\n"
3700 " Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.\n"
3701 " \n"
3702 " The GNU General Public License is contained in the file LICENSE.\n"
3703 " \n"
3704 );
3705}
3706
3707
3708/*---------------------------------------------*/
3709void usage ( Char *fullProgName )
3710{
3711 fprintf (
3712 stderr,
3713 "bzip2, a block-sorting file compressor. "
3714 "Version 0.1pl0, 17-Aug-97.\n"
3715 "\n usage: %s [flags and input files in any order]\n"
3716 "\n"
3717 " -h --help print this message\n"
3718 " -d --decompress force decompression\n"
3719 " -f --compress force compression\n"
3720 " -t --test test compressed file integrity\n"
3721 " -c --stdout output to standard out\n"
3722 " -v --verbose be verbose (a 2nd -v gives more)\n"
3723 " -k --keep keep (don't delete) input files\n"
3724 " -L --license display software version & license\n"
3725 " -V --version display software version & license\n"
3726 " -s --small use less memory (at most 2500k)\n"
3727 " -1 .. -9 set block size to 100k .. 900k\n"
3728 " --repetitive-fast compress repetitive blocks faster\n"
3729 " --repetitive-best compress repetitive blocks better\n"
3730 "\n"
3731 " If invoked as `bzip2', the default action is to compress.\n"
3732 " as `bunzip2', the default action is to decompress.\n"
3733 "\n"
3734 " If no file names are given, bzip2 compresses or decompresses\n"
3735 " from standard input to standard output. You can combine\n"
3736 " flags, so `-v -4' means the same as -v4 or -4v, &c.\n"
3737 #if BZ_UNIX
3738 "\n"
3739 #endif
3740 ,
3741
3742 fullProgName
3743 );
3744}
3745
3746
3747/*---------------------------------------------*/
3748/*--
3749 All the garbage from here to main() is purely to
3750 implement a linked list of command-line arguments,
3751 into which main() copies argv[1 .. argc-1].
3752
3753 The purpose of this ridiculous exercise is to
3754 facilitate the expansion of wildcard characters
3755 * and ? in filenames for halfwitted OSs like
3756 MSDOS, Windows 95 and NT.
3757
3758 The actual Dirty Work is done by the platform-specific
3759 macro APPEND_FILESPEC.
3760--*/
3761
3762typedef
3763 struct zzzz {
3764 Char *name;
3765 struct zzzz *link;
3766 }
3767 Cell;
3768
3769
3770/*---------------------------------------------*/
3771void *myMalloc ( Int32 n )
3772{
3773 void* p;
3774
3775 p = malloc ( (size_t)n );
3776 if (p == NULL) {
3777 fprintf (
3778 stderr,
3779 "%s: `malloc' failed on request for %d bytes.\n",
3780 progName, n
3781 );
3782 exit ( 1 );
3783 }
3784 return p;
3785}
3786
3787
3788/*---------------------------------------------*/
3789Cell *mkCell ( void )
3790{
3791 Cell *c;
3792
3793 c = (Cell*) myMalloc ( sizeof ( Cell ) );
3794 c->name = NULL;
3795 c->link = NULL;
3796 return c;
3797}
3798
3799
3800/*---------------------------------------------*/
3801Cell *snocString ( Cell *root, Char *name )
3802{
3803 if (root == NULL) {
3804 Cell *tmp = mkCell();
3805 tmp->name = (Char*) myMalloc ( 5 + strlen(name) );
3806 strcpy ( tmp->name, name );
3807 return tmp;
3808 } else {
3809 Cell *tmp = root;
3810 while (tmp->link != NULL) tmp = tmp->link;
3811 tmp->link = snocString ( tmp->link, name );
3812 return root;
3813 }
3814}
3815
3816
3817
3818/*---------------------------------------------*/
3819#define ISFLAG(s) (strcmp(aa->name, (s))==0)
3820
3821
3822IntNative main ( IntNative argc, Char *argv[] )
3823{
3824 Int32 i, j;
3825 Char *tmp;
3826 Cell *argList;
3827 Cell *aa;
3828
3829
3830 #if DEBUG
3831 fprintf ( stderr, "bzip2: *** compiled with debugging ON ***\n" );
3832 #endif
3833
3834 /*-- Be really really really paranoid :-) --*/
3835 if (sizeof(Int32) != 4 || sizeof(UInt32) != 4 ||
3836 sizeof(Int16) != 2 || sizeof(UInt16) != 2 ||
3837 sizeof(Char) != 1 || sizeof(UChar) != 1) {
3838 fprintf ( stderr,
3839 "bzip2: I'm not configured correctly for this platform!\n"
3840 "\tI require Int32, Int16 and Char to have sizes\n"
3841 "\tof 4, 2 and 1 bytes to run properly, and they don't.\n"
3842 "\tProbably you can fix this by defining them correctly,\n"
3843 "\tand recompiling. Bye!\n" );
3844 exit(1);
3845 }
3846
3847
3848 /*-- Set up signal handlers --*/
3849 signal (SIGINT, mySignalCatcher);
3850 signal (SIGTERM, mySignalCatcher);
3851 signal (SIGSEGV, mySIGSEGVorSIGBUScatcher);
3852 #if BZ_UNIX
3853 signal (SIGHUP, mySignalCatcher);
3854 signal (SIGBUS, mySIGSEGVorSIGBUScatcher);
3855 #endif
3856
3857
3858 /*-- Initialise --*/
3859 outputHandleJustInCase = NULL;
3860 ftab = NULL;
3861 ll4 = NULL;
3862 ll16 = NULL;
3863 ll8 = NULL;
3864 tt = NULL;
3865 block = NULL;
3866 zptr = NULL;
3867 errno = 0;
3868 smallMode = False;
3869 keepInputFiles = False;
3870 verbosity = 0;
3871 blockSize100k = 9;
3872 testFailsExist = False;
3873 bsStream = NULL;
3874 numFileNames = 0;
3875 numFilesProcessed = 0;
3876 workFactor = 30;
3877
3878 strcpy ( inName, "(none)" );
3879 strcpy ( outName, "(none)" );
3880
3881 strcpy ( progNameReally, argv[0] );
3882 progName = &progNameReally[0];
3883 for (tmp = &progNameReally[0]; *tmp != '\0'; tmp++)
3884 if (*tmp == PATH_SEP) progName = tmp + 1;
3885
3886
3887 /*-- Expand filename wildcards in arg list --*/
3888 argList = NULL;
3889 for (i = 1; i <= argc-1; i++)
3890 APPEND_FILESPEC(argList, argv[i]);
3891
3892
3893 /*-- Find the length of the longest filename --*/
3894 longestFileName = 7;
3895 numFileNames = 0;
3896 for (aa = argList; aa != NULL; aa = aa->link)
3897 if (aa->name[0] != '-') {
3898 numFileNames++;
3899 if (longestFileName < (Int32)strlen(aa->name) )
3900 longestFileName = (Int32)strlen(aa->name);
3901 }
3902
3903
3904 /*-- Determine what to do (compress/uncompress/test). --*/
3905 /*-- Note that subsequent flag handling may change this. --*/
3906 opMode = OM_Z;
3907
3908 if ( (strcmp ( "bunzip2", progName ) == 0) ||
3909 (strcmp ( "BUNZIP2", progName ) == 0) ||
3910 (strcmp ( "bunzip2.exe", progName ) == 0) ||
3911 (strcmp ( "BUNZIP2.EXE", progName ) == 0) )
3912 opMode = OM_UNZ;
3913
3914
3915 /*-- Determine source modes; flag handling may change this too. --*/
3916 if (numFileNames == 0)
3917 srcMode = SM_I2O; else srcMode = SM_F2F;
3918
3919
3920 /*-- Look at the flags. --*/
3921 for (aa = argList; aa != NULL; aa = aa->link)
3922 if (aa->name[0] == '-' && aa->name[1] != '-')
3923 for (j = 1; aa->name[j] != '\0'; j++)
3924 switch (aa->name[j]) {
3925 case 'c': srcMode = SM_F2O; break;
3926 case 'd': opMode = OM_UNZ; break;
3927 case 'f': opMode = OM_Z; break;
3928 case 't': opMode = OM_TEST; break;
3929 case 'k': keepInputFiles = True; break;
3930 case 's': smallMode = True; break;
3931 case '1': blockSize100k = 1; break;
3932 case '2': blockSize100k = 2; break;
3933 case '3': blockSize100k = 3; break;
3934 case '4': blockSize100k = 4; break;
3935 case '5': blockSize100k = 5; break;
3936 case '6': blockSize100k = 6; break;
3937 case '7': blockSize100k = 7; break;
3938 case '8': blockSize100k = 8; break;
3939 case '9': blockSize100k = 9; break;
3940 case 'V':
3941 case 'L': license(); break;
3942 case 'v': verbosity++; break;
3943 case 'h': usage ( progName );
3944 exit ( 1 );
3945 break;
3946 default: fprintf ( stderr, "%s: Bad flag `%s'\n",
3947 progName, aa->name );
3948 usage ( progName );
3949 exit ( 1 );
3950 break;
3951 }
3952
3953 /*-- And again ... --*/
3954 for (aa = argList; aa != NULL; aa = aa->link) {
3955 if (ISFLAG("--stdout")) srcMode = SM_F2O; else
3956 if (ISFLAG("--decompress")) opMode = OM_UNZ; else
3957 if (ISFLAG("--compress")) opMode = OM_Z; else
3958 if (ISFLAG("--test")) opMode = OM_TEST; else
3959 if (ISFLAG("--keep")) keepInputFiles = True; else
3960 if (ISFLAG("--small")) smallMode = True; else
3961 if (ISFLAG("--version")) license(); else
3962 if (ISFLAG("--license")) license(); else
3963 if (ISFLAG("--repetitive-fast")) workFactor = 5; else
3964 if (ISFLAG("--repetitive-best")) workFactor = 150; else
3965 if (ISFLAG("--verbose")) verbosity++; else
3966 if (ISFLAG("--help")) { usage ( progName ); exit ( 1 ); }
3967 else
3968 if (strncmp ( aa->name, "--", 2) == 0) {
3969 fprintf ( stderr, "%s: Bad flag `%s'\n", progName, aa->name );
3970 usage ( progName );
3971 exit ( 1 );
3972 }
3973 }
3974
3975 if (opMode == OM_Z && smallMode) blockSize100k = 2;
3976
3977 if (opMode == OM_Z && srcMode == SM_F2O && numFileNames > 1) {
3978 fprintf ( stderr, "%s: I won't compress multiple files to stdout.\n",
3979 progName );
3980 exit ( 1 );
3981 }
3982
3983 if (opMode == OM_TEST && srcMode == SM_F2O) {
3984 fprintf ( stderr, "%s: -c and -t cannot be used together.\n",
3985 progName );
3986 exit ( 1 );
3987 }
3988
3989 if (opMode != OM_Z) blockSize100k = 0;
3990
3991 if (opMode == OM_Z) {
3992 allocateCompressStructures();
3993 if (srcMode == SM_I2O)
3994 compress ( NULL );
3995 else
3996 for (aa = argList; aa != NULL; aa = aa->link)
3997 if (aa->name[0] != '-') {
3998 numFilesProcessed++;
3999 compress ( aa->name );
4000 }
4001 } else
4002 if (opMode == OM_UNZ) {
4003 if (srcMode == SM_I2O)
4004 uncompress ( NULL );
4005 else
4006 for (aa = argList; aa != NULL; aa = aa->link)
4007 if (aa->name[0] != '-') {
4008 numFilesProcessed++;
4009 uncompress ( aa->name );
4010 }
4011 } else {
4012 testFailsExist = False;
4013 if (srcMode == SM_I2O)
4014 testf ( NULL );
4015 else
4016 for (aa = argList; aa != NULL; aa = aa->link)
4017 if (aa->name[0] != '-') {
4018 numFilesProcessed++;
4019 testf ( aa->name );
4020 }
4021 if (testFailsExist) {
4022 fprintf ( stderr,
4023 "\n"
4024 "You can use the `bzip2recover' program to *attempt* to recover\n"
4025 "data from undamaged sections of corrupted files.\n\n"
4026 );
4027 exit(2);
4028 }
4029 }
4030 return 0;
4031}
4032
4033
4034/*-----------------------------------------------------------*/
4035/*--- end bzip2.c ---*/
4036/*-----------------------------------------------------------*/