**EDIT:**Got it working, see post below instead.

Okay, trying to understand arithmetic coding, so that I can start learning QM-coding and all that. Having almost no luck.

I was able to write an encoder, but it seems to crash whenever I try scaling the probability table up.

For instance, the below code allows me to compress the Chrono Trigger ROM from 4096kb to 3978kb, and then decompress it successfully.

However, when I set a range > 192, it starts failing. The higher the value, the faster it fails. At 16384, compression ratio is great, getting me down to 3500kb (well, figuratively great for pure entropy coding on a mostly already compressed ROM). But I can only decompress the first four bytes correctly before errors occur.

Can anyone shed any light on what I'm doing wrong? Many thanks in advance ...

Code: Select all

```
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <conio.h>
typedef unsigned char uint8_t;
typedef unsigned short uint16_t;
typedef unsigned long uint32_t;
typedef unsigned long long uint64_t;
FILE *fp;
uint8_t *data;
void aricode(const char *infn, const char *outfn) {
fp = fopen(infn, "rb");
if(!fp) return;
fseek(fp, 0, SEEK_END);
unsigned size = ftell(fp);
rewind(fp);
data = new uint8_t[size];
fread(data, 1, size, fp);
fclose(fp);
fp = fopen(outfn, "wb");
//========================================
//generate probability tables
//========================================
unsigned otable[256];
memset(&otable, 0, sizeof otable);
for(unsigned i = 0; i < size; i++) otable[data[i]]++;
uint16_t ptable_lo[256], ptable_hi[256];
memset(&ptable_lo, 0, sizeof ptable_lo);
memset(&ptable_hi, 0, sizeof ptable_hi);
unsigned scale = 0;
for(unsigned i = 0; i < 256; i++) {
//the higher the scalar, the better the compression.
//optimal value would be 16384, yet this crashes with > 196 ...
unsigned probability = double(otable[i]) / size * 196.0;
if(probability == 0) probability = 1;
unsigned prev = !i ? 0 : i - 1;
ptable_lo[i] = ptable_hi[prev];
ptable_hi[i] = ptable_lo[i] + probability;
scale += probability;
}
for(unsigned i = 0; i < 256; i++) {
ptable_hi[i]--;
//printf("%3d range = %3d-%3d\n", i, ptable_lo[i], ptable_hi[i]);
}
printf("scale = %d\n", scale);
fputc(size, fp);
fputc(size >> 8, fp);
fputc(size >> 16, fp);
fputc(size >> 24, fp);
fwrite(ptable_lo, 1, sizeof ptable_lo, fp);
fwrite(ptable_hi, 1, sizeof ptable_hi, fp);
//========================================
//encode
//========================================
uint16_t lo = 0x0000;
uint16_t hi = 0xffff;
unsigned bitpos = 0, bitval = 0;
for(unsigned i = 0; i < size; i++) {
unsigned range = (hi - lo) + 1;
unsigned symbol = data[i];
unsigned problo = ptable_lo[symbol];
unsigned probhi = ptable_hi[symbol];
//printf("range = %0.5x, lo = %0.4x, hi = %0.4x, sym = %0.2x, pl = %3d, ph = %3d\n", range, lo, hi, symbol, problo, probhi);
hi = lo + range * (double(probhi) / scale);
lo = lo + range * (double(problo) / scale);
//printf("adjust: range = %0.5x, lo = %0.4x, hi = %0.4x\n", (hi - lo) + 1, lo, hi);
while(((hi - lo) + 1) < scale) {
bitval = (bitval << 1) | (lo >> 15);
if(++bitpos == 8) { fputc(bitval, fp); bitval = bitpos = 0; }
lo = (lo << 1);
hi = (hi << 1) + 1;
//printf("renormalize: range = %0.5x, lo = %0.4x, hi = %0.4x\n", (hi - lo) + 1, lo, hi);
}
}
if(bitpos > 0) fputc(bitval, fp);
fputc(lo >> 8, fp);
fputc(lo, fp);
//========================================
//finish
//========================================
delete[] data;
fclose(fp);
}
void aridecode(const char *infn, const char *outfn) {
fp = fopen(infn, "rb");
if(!fp) return;
fseek(fp, 0, SEEK_END);
unsigned size = ftell(fp);
rewind(fp);
unsigned decompsize = fgetc(fp);
decompsize |= fgetc(fp) << 8;
decompsize |= fgetc(fp) << 16;
decompsize |= fgetc(fp) << 24;
//========================================
//read probability tables
//========================================
uint16_t ptable_lo[256], ptable_hi[256];
memset(&ptable_lo, 0, sizeof ptable_lo);
memset(&ptable_hi, 0, sizeof ptable_hi);
fread(ptable_lo, 1, sizeof ptable_lo, fp);
fread(ptable_hi, 1, sizeof ptable_hi, fp);
unsigned scale = 0;
for(unsigned i = 0; i < 256; i++) {
scale += ptable_hi[i] - ptable_lo[i] + 1;
}
printf("scale = %3d\n", scale);
size -= 4;
size -= sizeof ptable_lo;
size -= sizeof ptable_hi;
data = new uint8_t[size + 64];
memset(data, 0, size + 64);
fread(data, 1, size, fp);
fclose(fp);
unsigned p = 0;
fp = fopen(outfn, "wb");
//========================================
//decode
//========================================
uint16_t lo = 0x0000;
uint16_t hi = 0xffff;
uint16_t code = 0;
code = data[p++] << 8;
code |= data[p++];
unsigned bitval = data[p++], bitpos = 0;
unsigned outsize = 0;
while(outsize < decompsize) {
unsigned range = (hi - lo) + 1;
uint16_t pos = (code - lo) + 1;
pos = pos * scale / range;
unsigned symbol;
for(unsigned i = 0; i < 256; i++) {
if(pos >= ptable_lo[i] && pos <= ptable_hi[i]) {
//found symbol
symbol = i;
fputc(symbol, fp);
outsize++;
break;
}
}
unsigned problo = ptable_lo[symbol];
unsigned probhi = ptable_hi[symbol];
hi = lo + range * double(probhi) / scale;
lo = lo + range * double(problo) / scale;
while(((hi - lo) + 1) < scale) {
code = (code << 1) + ((bitval >> 7) & 1);
bitval <<= 1;
if(++bitpos >= 8) { bitval = data[p++]; bitpos = 0; }
lo = (lo << 1);
hi = (hi << 1) + 1;
}
}
//========================================
//finish
//========================================
delete[] data;
fclose(fp);
}
int main() {
aricode("test.bin", "test.ari");
aridecode("test.ari", "output.bin");
printf("done\n");
getch();
return 0;
}
```