|
DecompressArithmetic |
|
import java.io.*;
/**
* DecompressArithmetic.java<P>
*
* Mark Nelson<BR>
* March 8, 1996<BR>
* http://web2.airmail.net/markn<BR>
* **** moded by David Scott in May of 2001 to make bijective<BR>
* Ported to Java by Tim Tyler, July 2001 - http://mandala.co.uk/biac/<P>
*
* DESCRIPTION<BR>
* -----------<P>
*
* This program performs an order-0 adaptive arithmetic decoding
* function on an input file/stream, and sends the result to an
* output file or stream.<P>
*
* This program contains the source code from the 1987 CACM
* article by Witten, Neal, and Cleary. I have taken the
* source modules and combined them into this single file for
* ease of distribution and compilation. Other than that,
* the code is essentially unchanged.<P>
*
* This program takes two arguments: an input file and an output
* file. You can leave off one argument and send your output to
* stdout. Leave off two arguments and read your input from stdin
* as well.<P>
*
* This program accompanies my article "Data Compression with the
* Burrows-Wheeler Transform."<P>
*
* Build Instructions<BR>
* ------------------<P>
*
* Define the constant unix for UNIX or UNIX-like systems. The
* use of this constant turns off the code used to force the MS-DOS
* file system into binary mode. g++ does this already, your UNIX
* C++ compiler might also.<P>
*
* Borland C++ 4.5 16 bit : bcc -w unari.cpp<BR>
* Borland C++ 4.5 32 bit : bcc32 -w unari.cpp<BR>
* Microsoft Visual C++ 1.52 : cl /W4 unari.cpp<BR>
* Microsoft Visual C++ 2.1 : cl /W4 unari.cpp<BR>
* g++ : g++ -o unari unari.cpp<P>
*
* Typical Use<BR>
* -----------<P>
*
* unari < compressed-file | unrle | unmtf | unbwt | unrle > raw-file<P>
*
*/
public class DecompressArithmetic {
final static int No_of_chars = 256; /* Number of character symbols */
static FileOutputStream out = null;
static FileInputStream in = null;
static byte[] array = new byte[1];
/* THE SET OF SYMBOLS THAT MAY BE ENCODED. */
final static int No_of_symbols = No_of_chars; /* Total number of symbols */
// The number of symbols is 256 not 257 EOF is not needed
/* TRANSLATION TABLES BETWEEN CHARACTERS AND SYMBOL INDEXES. */
static int char_to_index[] = new int[No_of_chars]; /* To index from character */
static char index_to_char[] = new char[No_of_symbols+1]; /* To character from index */
static long freq[] = new long[No_of_symbols+1]; /* Symbol frequencies */
static long cum_freq[] = new long[No_of_symbols+1]; /* Cumulative symbol frequencies */
// only 1 to 256 used for the symbols
/* CUMULATIVE FREQUENCY TABLE. */
/* DECLARATIONS USED FOR ARITHMETIC ENCODING AND DECODING */
/* SIZE OF ARITHMETIC CODE VALUES. */
final static int Code_value_bits = 62; /* Number of bits in a code value */
/* BIT INPUT ROUTINES. */
/* THE BIT BUFFER. */
static int buffer; /* Bits waiting to be input */
static int bufferx; /* Bits waiting to be input */
static int bits_to_go; /* Number of bits still in buffer */
static int garbage_bits; /* Number of bits past end-of-file */
static int zbuffer; /* nimber of zero blocks */
static int lobuffer; /* specail handle of input flag */
static int last_byte; /* one when last byte of input read */
static int ZEND; /* next 2 flags to tell when last one bit in converted to FOF */
static int ZATE; /* has just been processed. ***NOT always last one bin file** */
static boolean zerf; /* Next 2 flags for eof of file handling */
static boolean onef;
/* INITIALIZE BIT INPUT. */
static void start_inputing_bits() throws IOException {
bits_to_go = 0; /* Buffer starts out with */
garbage_bits = 0; /* no bits in it. */
ZEND = 0;
ZATE = 1;
zerf = false;
onef = false;
zbuffer = 0;
lobuffer = 0;
bufferx = getc();
if (bufferx == -1) {
fprintf(" **zero input file fatal** \n"); // :-(
System.exit(1);
}
}
/* INPUT A BIT. */
static int input_bit() throws IOException {
int t;
if (bits_to_go==0) { /* Read the next byte if no */
buffer = bufferx;
if (buffer != 0x40) lobuffer = 0;
if (buffer == 0x40 && zbuffer == 1) lobuffer = 1;
if (buffer == 0) zbuffer = 1;
else zbuffer = 0;
bufferx = getc(); /* bits are left in buffer. */
if (bufferx == -1) {
if (zbuffer == 1 || lobuffer == 1) { /* add in last one bit */
bufferx = 0x40;
zbuffer = 0;
lobuffer = 0;
}
else last_byte = 1;
}
if (buffer == -1) {
buffer = 0;
garbage_bits += 1; /* Return arbitrary bits*/
if (garbage_bits>Code_value_bits-2) { /* after eof, but check */
fprintf("**Bad should not occurr** \n");
System.exit(-1);
}
}
bits_to_go = 8;
}
t = buffer&1; /* Return the next bit from */
buffer >>= 1; /* the bottom of the byte. */
bits_to_go -= 1;
if (buffer == 0 && last_byte == 1) ZEND = 1; /* last one sent */
return t;
}
/* ARITHMETIC DECODING ALGORITHM. */
/* CURRENT STATE OF THE DECODING. */
static long value; /* Currently-seen code value */
static long low, high, swape; /* Ends of current code region */
/* START DECODING A STREAM OF SYMBOLS. */
static void start_decoding() throws IOException {
int i;
value = 0; /* Input bits to fill the */
zbuffer = 0;
lobuffer = 0;
last_byte = 0;
for (i = 1; i<=Code_value_bits; i++) { /* code value. */
value = 2*value + input_bit();
if ((ZEND != 0) && (ZATE != 0)) {
if (ZATE++ > Code_value_bits-0) ZATE = 0;
}
}
low = 0; /* Full code range. */
high = (((long)1<<Code_value_bits)-1);
}
static long dss(long r,long top,long bot){
long t1,t2; /* r could be 64 bits top and bot 32 bits */
/* top < bot */
if (bot == 0) fprintf(" not possible \n");
t1 = r/bot;
t2 = r - (t1*bot);
t1 = t1*top;
t2 = t2*top;
t2 = t2/bot;
return (t1 + t2);
/* return r*top/bot; */
}
/* DECODE THE NEXT SYMBOL. */
static int decode_symbol(long cum_freq[]) throws IOException {
long range; /* Size of current code region */
int symbol; /* Symbol decoded */
/**/
low ^= (((long)1<<Code_value_bits)-1);
high ^= (((long)1<<Code_value_bits)-1);
value ^= (((long)1<<Code_value_bits)-1);
swape = low;
low = high;
high = swape;
/**/
range = (long)(high-low)+1;
high = low + range;
symbol = 0;
while(value < high)high = low + dss(range,cum_freq[++symbol],cum_freq[0]);
swape = high;
high = low + dss(range,cum_freq[symbol-1],cum_freq[0]) -1;
low = swape;
low ^= (((long)1<<Code_value_bits)-1);
high ^= (((long)1<<Code_value_bits)-1);
swape = low;
value ^= (((long)1<<Code_value_bits)-1);
low = high;
high = swape;
for (;;) { /* Loop to get rid of bits. */
if (high<(2*((((long)1<<Code_value_bits)-1)/4+1))) {
/* nothing */ /* Expand low half. */
}
else if (low>=(2*((((long)1<<Code_value_bits)-1)/4+1))) { /* Expand high half. */
value -= (2*((((long)1<<Code_value_bits)-1)/4+1));
low -= (2*((((long)1<<Code_value_bits)-1)/4+1)); /* Subtract offset to top. */
high -= (2*((((long)1<<Code_value_bits)-1)/4+1));
}
else if (low>=((((long)1<<Code_value_bits)-1)/4+1) /* Expand middle half. */
&& high<(3*((((long)1<<Code_value_bits)-1)/4+1))) {
value -= ((((long)1<<Code_value_bits)-1)/4+1);
low -= ((((long)1<<Code_value_bits)-1)/4+1); /* Subtract offset to middle*/
high -= ((((long)1<<Code_value_bits)-1)/4+1);
}
else
break; /* Otherwise exit loop. */
low = 2*low;
high = 2*high+1; /* Scale up code range. */
value = 2*value+input_bit(); /* Move in next input bit. */
if ((ZEND != 0) && (ZATE != 0)) {
if (ZATE++ > Code_value_bits-0) ZATE = 0;
}
if ((ZEND != 0) && (ZATE != 0) && value == 0) ZATE = 0;
if ((ZEND != 0) && (ZATE != 0) && value == (2*((((long)1<<Code_value_bits)-1)/4+1))) ZATE = 0;
}
return symbol;
}
/* THE ADAPTIVE SOURCE MODEL */
/* INITIALIZE THE MODEL. */
static void start_model()
{ int i;
for (i = 0; i<No_of_chars; i++) { /* Set up tables that */
char_to_index[i] = i+1; /* translate between symbol */
index_to_char[i+1] = (char) i; /* indexes and characters. */
}
for (i = 0; i<=No_of_symbols; i++) { /* Set up initial frequency */
freq[i] = 1; /* counts to be one for all */
cum_freq[i] = No_of_symbols-i; /* symbols. */
}
freq[0] = 0; /* Freq[0] must not be the */
} /* same as freq[1]. */
/* UPDATE THE MODEL TO ACCOUNT FOR A NEW SYMBOL. */
static void update_model(int symbol) {
int i; /* New index for symbol */
for (i = symbol; freq[i]==freq[i-1]; i--) ; /* Find symbol's new index. */
if (i<symbol) {
int ch_i, ch_symbol;
ch_i = index_to_char[i]; /* Update the translation */
ch_symbol = index_to_char[symbol]; /* tables if the symbol has */
index_to_char[i] = (char) ch_symbol; /* moved. */
index_to_char[symbol] = (char) ch_i;
char_to_index[ch_i] = symbol;
char_to_index[ch_symbol] = i;
}
freq[i] += 1; /* Increment the frequency */
while (i>0) { /* count for the symbol and */
i -= 1; /* update the cumulative */
cum_freq[i] += 1; /* frequencies. */
}
}
static void fprintf(String s) {
System.out.println(s);
}
static void writeOut(int i) throws IOException {
out.write((byte)i);
}
static int getc() throws IOException {
int rb;
if ((rb = in.read(array, 0, 1)) == -1) {/* Read the next character. */
return -1; // EOF flag...
}
return array[0] & 0xff;
}
/**
* Decompresses input_file and stores the results in output_file
*/
public static void decompress(String input_file, String output_file) throws IOException {
int symbol = 0;
try {
in = new FileInputStream(input_file);
out = new FileOutputStream(output_file);
byte[] _array = new byte[1];
start_model(); /* Set up other modules. */
start_inputing_bits();
start_decoding();
for (;;) { /* Loop through characters. */
int ch;
symbol = decode_symbol(cum_freq); /* Decode next symbol. */
if (symbol == 1) zerf = true;
if (zerf && symbol == 2) onef = true;
if (symbol > 1) zerf = false;
if (symbol > 2) onef = false;
// if (symbol==EOF_symbol) break; /* Exit loop if EOF symbol. */
ch = index_to_char[symbol]; /* Translate to a character.*/
if ((ZATE == 0) && (symbol != 1)) {
break;
}
// no ticker
writeOut(ch); /* Write that character. */
update_model(symbol); /* Update the model. */
}
if (!(zerf || onef)) {
writeOut(index_to_char[symbol]);
}
in.close();
out.close();
}
catch(Exception e) {
fprintf("\nError while processing file:");
e.printStackTrace();
}
}
/**
* Handles the Commmand-line interface to the compressor
*/
public static void main(String[] args) throws IOException {
String input_file = null;
String output_file = null;
fprintf("Bijective Arithmetic decoder version May 30, 2001\n");
fprintf("Arithmetic decoding from ");
if (args.length > 0) {
input_file = args[0];
fprintf(args[0]);
}
else {
fprintf("\nError: No input file specified");
System.exit(0);
}
fprintf(" to ");
if (args.length > 1) {
output_file = args[1];
fprintf(args[1]);
}
else {
fprintf("\nError: No output file specified");
System.exit(0);
}
fprintf("\n");
decompress(input_file, output_file);
}
}
|
DecompressArithmetic |
|