**EDIT:** Got it working, see post below instead.

Okay, trying to understand arithmetic coding, so that I can start learning QM-coding and all that. Having almost no luck.

I was able to write an encoder, but it seems to crash whenever I try scaling the probability table up.

For instance, the below code allows me to compress the Chrono Trigger ROM from 4096kb to 3978kb, and then decompress it successfully.

However, when I set a range > 192, it starts failing. The higher the value, the faster it fails. At 16384, compression ratio is great, getting me down to 3500kb (well, figuratively great for pure entropy coding on a mostly already compressed ROM). But I can only decompress the first four bytes correctly before errors occur.

Can anyone shed any light on what I'm doing wrong? Many thanks in advance ...

**Code:**

#include <stdio.h>

#include <stdlib.h>

#include <string.h>

#include <conio.h>

typedef unsigned char uint8_t;

typedef unsigned short uint16_t;

typedef unsigned long uint32_t;

typedef unsigned long long uint64_t;

FILE *fp;

uint8_t *data;

void aricode(const char *infn, const char *outfn) {

fp = fopen(infn, "rb");

if(!fp) return;

fseek(fp, 0, SEEK_END);

unsigned size = ftell(fp);

rewind(fp);

data = new uint8_t[size];

fread(data, 1, size, fp);

fclose(fp);

fp = fopen(outfn, "wb");

//========================================

//generate probability tables

//========================================

unsigned otable[256];

memset(&otable, 0, sizeof otable);

for(unsigned i = 0; i < size; i++) otable[data[i]]++;

uint16_t ptable_lo[256], ptable_hi[256];

memset(&ptable_lo, 0, sizeof ptable_lo);

memset(&ptable_hi, 0, sizeof ptable_hi);

unsigned scale = 0;

for(unsigned i = 0; i < 256; i++) {

//the higher the scalar, the better the compression.

//optimal value would be 16384, yet this crashes with > 196 ...

unsigned probability = double(otable[i]) / size * 196.0;

if(probability == 0) probability = 1;

unsigned prev = !i ? 0 : i - 1;

ptable_lo[i] = ptable_hi[prev];

ptable_hi[i] = ptable_lo[i] + probability;

scale += probability;

}

for(unsigned i = 0; i < 256; i++) {

ptable_hi[i]--;

//printf("%3d range = %3d-%3d\n", i, ptable_lo[i], ptable_hi[i]);

}

printf("scale = %d\n", scale);

fputc(size, fp);

fputc(size >> 8, fp);

fputc(size >> 16, fp);

fputc(size >> 24, fp);

fwrite(ptable_lo, 1, sizeof ptable_lo, fp);

fwrite(ptable_hi, 1, sizeof ptable_hi, fp);

//========================================

//encode

//========================================

uint16_t lo = 0x0000;

uint16_t hi = 0xffff;

unsigned bitpos = 0, bitval = 0;

for(unsigned i = 0; i < size; i++) {

unsigned range = (hi - lo) + 1;

unsigned symbol = data[i];

unsigned problo = ptable_lo[symbol];

unsigned probhi = ptable_hi[symbol];

//printf("range = %0.5x, lo = %0.4x, hi = %0.4x, sym = %0.2x, pl = %3d, ph = %3d\n", range, lo, hi, symbol, problo, probhi);

hi = lo + range * (double(probhi) / scale);

lo = lo + range * (double(problo) / scale);

//printf("adjust: range = %0.5x, lo = %0.4x, hi = %0.4x\n", (hi - lo) + 1, lo, hi);

while(((hi - lo) + 1) < scale) {

bitval = (bitval << 1) | (lo >> 15);

if(++bitpos == 8) { fputc(bitval, fp); bitval = bitpos = 0; }

lo = (lo << 1);

hi = (hi << 1) + 1;

//printf("renormalize: range = %0.5x, lo = %0.4x, hi = %0.4x\n", (hi - lo) + 1, lo, hi);

}

}

if(bitpos > 0) fputc(bitval, fp);

fputc(lo >> 8, fp);

fputc(lo, fp);

//========================================

//finish

//========================================

delete[] data;

fclose(fp);

}

void aridecode(const char *infn, const char *outfn) {

fp = fopen(infn, "rb");

if(!fp) return;

fseek(fp, 0, SEEK_END);

unsigned size = ftell(fp);

rewind(fp);

unsigned decompsize = fgetc(fp);

decompsize |= fgetc(fp) << 8;

decompsize |= fgetc(fp) << 16;

decompsize |= fgetc(fp) << 24;

//========================================

//read probability tables

//========================================

uint16_t ptable_lo[256], ptable_hi[256];

memset(&ptable_lo, 0, sizeof ptable_lo);

memset(&ptable_hi, 0, sizeof ptable_hi);

fread(ptable_lo, 1, sizeof ptable_lo, fp);

fread(ptable_hi, 1, sizeof ptable_hi, fp);

unsigned scale = 0;

for(unsigned i = 0; i < 256; i++) {

scale += ptable_hi[i] - ptable_lo[i] + 1;

}

printf("scale = %3d\n", scale);

size -= 4;

size -= sizeof ptable_lo;

size -= sizeof ptable_hi;

data = new uint8_t[size + 64];

memset(data, 0, size + 64);

fread(data, 1, size, fp);

fclose(fp);

unsigned p = 0;

fp = fopen(outfn, "wb");

//========================================

//decode

//========================================

uint16_t lo = 0x0000;

uint16_t hi = 0xffff;

uint16_t code = 0;

code = data[p++] << 8;

code |= data[p++];

unsigned bitval = data[p++], bitpos = 0;

unsigned outsize = 0;

while(outsize < decompsize) {

unsigned range = (hi - lo) + 1;

uint16_t pos = (code - lo) + 1;

pos = pos * scale / range;

unsigned symbol;

for(unsigned i = 0; i < 256; i++) {

if(pos >= ptable_lo[i] && pos <= ptable_hi[i]) {

//found symbol

symbol = i;

fputc(symbol, fp);

outsize++;

break;

}

}

unsigned problo = ptable_lo[symbol];

unsigned probhi = ptable_hi[symbol];

hi = lo + range * double(probhi) / scale;

lo = lo + range * double(problo) / scale;

while(((hi - lo) + 1) < scale) {

code = (code << 1) + ((bitval >> 7) & 1);

bitval <<= 1;

if(++bitpos >= 8) { bitval = data[p++]; bitpos = 0; }

lo = (lo << 1);

hi = (hi << 1) + 1;

}

}

//========================================

//finish

//========================================

delete[] data;

fclose(fp);

}

int main() {

aricode("test.bin", "test.ari");

aridecode("test.ari", "output.bin");

printf("done\n");

getch();

return 0;

}