I wonder if anyone could help solve this mystery.

Bear in mind I am not extremely familiar with the hardware and I may say something stupid - feel free to call me out.

I am using a simple LZSS algorithm for a ROM hack, and it works very well; up to the point when I put it on a cartridge using the above mentioned AX5904 chip.

This shows a weird behaviour, where the first output byte from decompression is always $FF - or at least that's the value written to RAM, hard to debug on the hardware and I could not replicate this in any emulator.

So I went through the trouble of conservatively putting the same flash chip on an actual Nintendo MMC1B2 board, and it worked perfectly with the expected output.

This is the code I am on about:

Code: Select all

```
; $41, $42 = source addr
; $29, $2a = destination addr
; $2b, $2c = temp addr
; $b4 = flags
; $b5 = flags used
LZSSUnpack:
; destination address
lda #$78
sta $2a
lda #$00
sta $29
; end address
dounpack:
lda #$00
sta $b4 ; Zero flags
lda #$07
sta $b5
unpack:
lda $b4
lsr
sta $b4
lda $2a
cmp #$80
bne notend
lda $29
cmp #$00
bne notend
rts
notend:
clc
lda $b5
adc #$01
sta $b5 ; Increment flags used count
cmp #$08
bne nonewflags
jsr getbyte
sta $b4
lda #$00
sta $b5
nonewflags:
; get flags and test bit 0
lda $b4
lsr
bcc unpackdata
jsr getbyte
jsr putbyte
jmp unpack
unpackdata:
; read the offset
jsr getbyte
; negate
eor #$ff
; save
pha
; get the count
jsr getbyte
tax
pla
tay
txa
pha
jsr getbyte2
jsr putbyte
jsr getbyte2
jsr putbyte
jsr getbyte2
jsr putbyte
pla
cmp #$00
beq unpack
unpackloop:
; save counter
pha
jsr getbyte2
jsr putbyte
pla
sec
sbc #$01
bne unpackloop
jmp unpack
; gets a byte from the source data in A and updates the pointer
getbyte:
ldx #$00
lda ($41,x)
clc
pha
lda $41
adc #$01
sta $41
lda $42
adc #$00
sta $42
pla
rts
; gets a byte from the source data in A with negative offset in X
getbyte2:
clc
lda $29
sta $2b
lda $2a
adc #$FF
sta $2c
lda ($2b),y
rts
; put a byte to the destination data from A
putbyte:
clc
ldx #$00
sta ($29,x)
lda $29
adc #$01
sta $29
lda $2a
adc #$00
sta $2a
rts
```

Code: Select all

```
; $41, $42 = source addr
; $29, $2a = destination addr
; $2b, $2c = temp addr
; $b4 = flags
; $b5 = flags used
LZSSUnpack:
; destination address
lda #$78
sta $2a
lda #$00
sta $29
dounpack:
lda #$00
sta $b4 ; Zero flags
lda #$07
sta $b5
unpack:
lsr $b4
lda $2a
cmp #$80
bne notend
lda $29
cmp #$00
bne notend
rts
notend:
; Increment flags used count
lda #$09
isc $b5
bne nonewflags
ldx #$00
lda ($41,x)
clc
sta $4f
lda $41
adc #$01
sta $41
lda $42
adc #$00
sta $42
lda $4f
sta $b4
lda #$00
sta $b5
nonewflags:
; get flags and test bit 0
lda $b4
lsr
bcc unpackdata
ldx #$00
lda ($41,x)
clc
sta $4f
lda $41
adc #$01
sta $41
lda $42
adc #$00
sta $42
lda $4f
clc
ldx #$00
sta ($29,x)
lda $29
adc #$01
sta $29
lda $2a
adc #$00
sta $2a
jmp unpack
unpackdata:
; read the offset
ldx #$00
lda ($41,x)
clc
sta $4f
lda $41
adc #$01
sta $41
lda $42
adc #$00
sta $42
lda $4f
; negate
eor #$ff
sta $4f
; get the count
ldx #$00
lda ($41,x)
clc
sta $50
lda $41
adc #$01
sta $41
lda $42
adc #$00
sta $42
lda $50
tax
lda $4f
tay
txa
sta $4f
clc
lda $29
sta $2b
lda $2a
adc #$FF
sta $2c
lda ($2b),y
clc
ldx #$00
sta ($29,x)
lda $29
adc #$01
sta $29
lda $2a
adc #$00
sta $2a
clc
lda $29
sta $2b
lda $2a
adc #$FF
sta $2c
lda ($2b),y
clc
ldx #$00
sta ($29,x)
lda $29
adc #$01
sta $29
lda $2a
adc #$00
sta $2a
clc
lda $29
sta $2b
lda $2a
adc #$FF
sta $2c
lda ($2b),y
clc
ldx #$00
sta ($29,x)
lda $29
adc #$01
sta $29
lda $2a
adc #$00
sta $2a
lda $4f
cmp #$00
beq _unpack
unpackloop:
; save counter
sta $4f
clc
lda $29
sta $2b
lda $2a
adc #$FF
sta $2c
lda ($2b),y
clc
ldx #$00
sta ($29,x)
lda $29
adc #$01
sta $29
lda $2a
adc #$00
sta $2a
lda $4f
sec
sbc #$01
bne unpackloop
_unpack:
jmp unpack
```

The cart itself is an SNROM - no WRAM banking, so maybe that is not the culprit?

The only other difference is the RAM chip itself: what I am using has a 55ns access time, while the working one uses a much slower access chip I believe 120-150ns. Could that be the issue?