Revamped Nemesis Decompression.asm with documentation from Sega Retro

author: GabrielRavier <gabravier@gmail.com> 2018-08-14 17:55:25 +0200
committer: flamewing <flamewing.sonic@gmail.com> 2018-09-21 16:38:48 +0200
commit: fa41aff02ffadc9715be22f6789eee4dc9c0b9f9 (patch)
tree: d17cdb70409b3a7051234fb87420ee800e8ea7f6 /_inc
parent: 65d519f7ca5c54029cc912e722c6e97a83636eba (diff)
1 files changed, 115 insertions, 97 deletions
diff --git a/_inc/Nemesis Decompression.asm b/_inc/Nemesis Decompression.asm
index 68f3440..10fa2cb 100644
--- a/_inc/Nemesis Decompression.asm
+++ b/_inc/Nemesis Decompression.asm
@@ -1,5 +1,7 @@
 ; ---------------------------------------------------------------------------
-; Nemesis decompression	algorithm
+; Nemesis decompression	subroutine, decompresses art directly to VRAM
+; Inputs:
+; a0 = art address
 
 ; For format explanation see http://info.sonicretro.org/Nemesis_compression
 ; ---------------------------------------------------------------------------
@@ -9,95 +11,100 @@
 ; Nemesis decompression to VRAM
 NemDec:
 		movem.l	d0-a1/a3-a5,-(sp)
-		lea	(NemDec_WriteAndStay).l,a3	; write all data to the same location
+		lea	(NemPCD_WriteRowToVDP).l,a3	; write all data to the same location
 		lea	(vdp_data_port).l,a4	; specifically, to the VDP data port
 		bra.s	NemDecMain
 
 ; ||||||||||||||| S U B R O U T I N E |||||||||||||||||||||||||||||||||||||||
 
-; Nemesis decompression to RAM
-; input: a4 = starting address of destination
+; Nemesis decompression subroutine, decompresses art to RAM
+; Inputs:
+; a0 = art address
+; a4 = destination RAM address
 NemDecToRAM:
 		movem.l	d0-a1/a3-a5,-(sp)
-		lea	(NemDec_WriteAndAdvance).l,a3
+		lea	(NemPCD_WriteRowToRAM).l,a3
 
 NemDecMain:
 		lea	(v_ngfx_buffer).w,a1
-		move.w	(a0)+,d2
+		move.w	(a0)+,d2	; get number of patterns
 		lsl.w	#1,d2
-		bcc.s	loc_146A
-		adda.w	#$A,a3
+		bcc.s	loc_146A	; branch if the sign bit isn't set
+		adda.w	#NemPCD_WriteRowToVDP_XOR-NemPCD_WriteRowToVDP,a3	; otherwise the file uses XOR mode
 
 loc_146A:
-		lsl.w	#2,d2
-		movea.w	d2,a5
-		moveq	#8,d3
+		lsl.w	#2,d2	; get number of 8-pixel rows in the uncompressed data
+		movea.w	d2,a5	; and store it in a5 because there aren't any spare data registers
+		moveq	#8,d3	; 8 pixels in a pattern row
 		moveq	#0,d2
 		moveq	#0,d4
-		bsr.w	NemDecPrepare
-		move.b	(a0)+,d5
-		asl.w	#8,d5
-		move.b	(a0)+,d5
-		move.w	#$10,d6
-		bsr.s	NemDecRun
+		bsr.w	NemDec_BuildCodeTable
+		move.b	(a0)+,d5	; get first byte of compressed data
+		asl.w	#8,d5	; shift up by a byte
+		move.b	(a0)+,d5	; get second byte of compressed data
+		move.w	#$10,d6	; set initial shift value
+		bsr.s	NemDec_ProcessCompressedData
 		movem.l	(sp)+,d0-a1/a3-a5
 		rts	
 ; End of function NemDec
 
+; ---------------------------------------------------------------------------
+; Part of the Nemesis decompressor, processes the actual compressed data
+; ---------------------------------------------------------------------------
 
 ; ||||||||||||||| S U B	R O U T	I N E |||||||||||||||||||||||||||||||||||||||
 
 
-NemDecRun:
+NemDec_ProcessCompressedData:
 		move.w	d6,d7
-		subq.w	#8,d7
+		subq.w	#8,d7	; get shift value
 		move.w	d5,d1
-		lsr.w	d7,d1
-		cmpi.b	#-4,d1
-		bcc.s	loc_14D6
+		lsr.w	d7,d1	; shift so that high bit of the code is in bit position 7
+		cmpi.b	#%11111100,d1	; are the high 6 bits set?
+		bcc.s	NemPCD_InlineData	; if they are, it signifies inline data
 		andi.w	#$FF,d1
 		add.w	d1,d1
-		move.b	(a1,d1.w),d0
+		move.b	(a1,d1.w),d0	; get the length of the code in bits
 		ext.w	d0
-		sub.w	d0,d6
-		cmpi.w	#9,d6
-		bcc.s	loc_14B2
+		sub.w	d0,d6	; subtract from shift value so that the next code is read next time around
+		cmpi.w	#9,d6	; does a new byte need to be read?
+		bcc.s	loc_14B2	; if not, branch
 		addq.w	#8,d6
 		asl.w	#8,d5
-		move.b	(a0)+,d5
+		move.b	(a0)+,d5	; read next byte
 
 loc_14B2:
 		move.b	1(a1,d1.w),d1
 		move.w	d1,d0
-		andi.w	#$F,d1
+		andi.w	#$F,d1	; get palette index for pixel
 		andi.w	#$F0,d0
 
-loc_14C0:
-		lsr.w	#4,d0
+NemPCD_ProcessCompressedData:
+		lsr.w	#4,d0	; get repeat count
 
-loc_14C2:
-		lsl.l	#4,d4
-		or.b	d1,d4
-		subq.w	#1,d3
-		bne.s	NemDec_WriteIter_Part2
-		jmp	(a3) ; dynamic jump! to NemDec_WriteAndStay, NemDec_WriteAndAdvance, NemDec_WriteAndStay_XOR, or NemDec_WriteAndAdvance_XOR
-; End of function NemDecRun
+NemPCD_WritePixel:
+		lsl.l	#4,d4	; shift up by a nybble
+		or.b	d1,d4	; write pixel
+		subq.w	#1,d3	; has an entire 8-pixel row been written?
+		bne.s	NemPCD_WritePixel_Loop	; if not, loop
+		jmp	(a3)	; otherwise, write the row to its destination, by doing a dynamic jump to NemPCD_WriteRowToVDP, NemDec_WriteAndAdvance, NemPCD_WriteRowToVDP_XOR, or NemDec_WriteAndAdvance_XOR
+; End of function NemDec_ProcessCompressedData
 
 
 ; ||||||||||||||| S U B	R O U T	I N E |||||||||||||||||||||||||||||||||||||||
 
 
-NemDec_WriteIter:
-		moveq	#0,d4
-		moveq	#8,d3
+NemPCD_NewRow:
+		moveq	#0,d4	; reset row
+		moveq	#8,d3	; reset nybble counter
 
-NemDec_WriteIter_Part2:
-		dbf	d0,loc_14C2
-		bra.s	NemDecRun
+NemPCD_WritePixel_Loop:
+		dbf	d0,NemPCD_WritePixel
+		bra.s	NemDec_ProcessCompressedData
 ; ===========================================================================
 
-loc_14D6:
-		subq.w	#6,d6
+NemPCD_InlineData:
+		subq.w	#6,d6	; 6 bits needed to signal inline data
 		cmpi.w	#9,d6
 		bcc.s	loc_14E4
 		addq.w	#8,d6
@@ -105,98 +112,109 @@ loc_14D6:
 		move.b	(a0)+,d5
 
 loc_14E4:
-		subq.w	#7,d6
+		subq.w	#7,d6	; and 7 bits needed for the inline data itself
 		move.w	d5,d1
-		lsr.w	d6,d1
+		lsr.w	d6,d1	; shift so that low bit of the code is in bit position 0
 		move.w	d1,d0
-		andi.w	#$F,d1
-		andi.w	#$70,d0
+		andi.w	#$F,d1	; get palette index for pixel
+		andi.w	#$70,d0	; high nybble is repeat count for pixel
 		cmpi.w	#9,d6
-		bcc.s	loc_14C0
+		bcc.s	NemPCD_ProcessCompressedData
 		addq.w	#8,d6
 		asl.w	#8,d5
 		move.b	(a0)+,d5
-		bra.s	loc_14C0
-; End of function NemDec_WriteIter
+		bra.s	NemPCD_ProcessCompressedData
+; End of function NemPCD_NewRow
 
 ; ===========================================================================
 
-NemDec_WriteAndStay:
-		move.l	d4,(a4)
+NemPCD_WriteRowToVDP:
+		move.l	d4,(a4)	; write 8-pixel row
 		subq.w	#1,a5
-		move.w	a5,d4
-		bne.s	NemDec_WriteIter
-		rts	
+		move.w	a5,d4	; have all the 8-pixel rows been written?
+		bne.s	NemPCD_NewRow	; if not, branch
+		rts		; otherwise the decompression is finished
 ; ===========================================================================
-		eor.l	d4,d2
-		move.l	d2,(a4)
+NemPCD_WriteRowToVDP_XOR
+		eor.l	d4,d2	; XOR the previous row by the current row
+		move.l	d2,(a4)	; and write the result
 		subq.w	#1,a5
 		move.w	a5,d4
-		bne.s	NemDec_WriteIter
+		bne.s	NemPCD_NewRow
 		rts	
 ; ===========================================================================
 
-NemDec_WriteAndAdvance:
+NemPCD_WriteRowToRAM:
 		move.l	d4,(a4)+
 		subq.w	#1,a5
 		move.w	a5,d4
-		bne.s	NemDec_WriteIter
+		bne.s	NemPCD_NewRow
 		rts	
 ; ===========================================================================
+NemPCD_WriteRowToRAM_XOR:
 		eor.l	d4,d2
 		move.l	d2,(a4)+
 		subq.w	#1,a5
 		move.w	a5,d4
-		bne.s	NemDec_WriteIter
+		bne.s	NemPCD_NewRow
 		rts	
 
 ; ||||||||||||||| S U B	R O U T	I N E |||||||||||||||||||||||||||||||||||||||
+; ---------------------------------------------------------------------------
+; Part of the Nemesis decompressor, builds the code table (in RAM)
+; ---------------------------------------------------------------------------
 
 
-NemDecPrepare:
-		move.b	(a0)+,d0
+NemDec_BuildCodeTable:
+		move.b	(a0)+,d0	; read first byte
 
-loc_1530:
-		cmpi.b	#-1,d0
-		bne.s	loc_1538
-		rts	
+NemBCT_ChkEnd:
+		cmpi.b	#$FF,d0	; has the end of the code table description been reached?
+		bne.s	NemBCT_NewPALIndex	; if not, branch
+		rts	; otherwise, this subroutine's work is done
 ; ===========================================================================
 
-loc_1538:
+NemBCT_NewPALIndex:
 		move.w	d0,d7
 
-loc_153A:
-		move.b	(a0)+,d0
-		cmpi.b	#$80,d0
-		bcc.s	loc_1530
+NemBCT_Loop:
+		move.b	(a0)+,d0	; read next byte
+		cmpi.b	#$80,d0	; sign bit being set signifies a new palette index
+		bcc.s	NemBCT_ChkEnd	; a bmi could have been used instead of a compare and bcc
 		move.b	d0,d1
-		andi.w	#$F,d7
-		andi.w	#$70,d1
-		or.w	d1,d7
-		andi.w	#$F,d0
+		andi.w	#$F,d7	; get palette index
+		andi.w	#$70,d1	; get repeat count for palette index
+		or.w	d1,d7	; combine the two
+		andi.w	#$F,d0	; get the length of the code in bits
 		move.b	d0,d1
 		lsl.w	#8,d1
-		or.w	d1,d7
+		or.w	d1,d7	; combine with palette index and repeat count to form code table entry
 		moveq	#8,d1
-		sub.w	d0,d1
-		bne.s	loc_1568
-		move.b	(a0)+,d0
-		add.w	d0,d0
-		move.w	d7,(a1,d0.w)
-		bra.s	loc_153A
+		sub.w	d0,d1	; is the code 8 bits long?
+		bne.s	NemBCT_ShortCode	; if not, a bit of extra processing is needed
+		move.b	(a0)+,d0	; get code
+		add.w	d0,d0	; each code gets a word-sized entry in the table
+		move.w	d7,(a1,d0.w)	; store the entry for the code
+		bra.s	NemBCT_Loop	; repeat
 ; ===========================================================================
 
-loc_1568:
-		move.b	(a0)+,d0
-		lsl.w	d1,d0
-		add.w	d0,d0
+; the Nemesis decompressor uses prefix-free codes (no valid code is a prefix of a longer code)
+; e.g. if 10 is a valid 2-bit code, 110 is a valid 3-bit code but 100 isn't
+; also, when the actual compressed data is processed the high bit of each code is in bit position 7
+; so the code needs to be bit-shifted appropriately over here before being used as a code table index
+; additionally, the code needs multiple entries in the table because no masking is done during compressed data processing
+; so if 11000 is a valid code then all indices of the form 11000XXX need to have the same entry
+NemBCT_ShortCode:
+		move.b	(a0)+,d0	; get code
+		add.w	d0,d0	; shift so that high bit is in bit position 7
+		lsl.w	d1,d0	; get index into code table
 		moveq	#1,d5
 		lsl.w	d1,d5
-		subq.w	#1,d5
-
-loc_1574:
-		move.w	d7,(a1,d0.w)
-		addq.w	#2,d0
-		dbf	d5,loc_1574
-		bra.s	loc_153A
-; End of function NemDecPrepare
+		subq.w	#1,d5	; d5 = 2^d1 - 1
+
+NemBCT_ShortCode_Loop:
+		move.w	d7,(a1,d0.w)	; store entry
+		addq.w	#2,d0	; increment index
+		dbf	d5,NemBCT_ShortCode_Loop	; repeat for required number of entries
+		bra.s	NemBCT_Loop
+; End of function NemDec_BuildCodeTable
author	GabrielRavier <gabravier@gmail.com>	2018-08-14 17:55:25 +0200
committer	flamewing <flamewing.sonic@gmail.com>	2018-09-21 16:38:48 +0200
commit	fa41aff02ffadc9715be22f6789eee4dc9c0b9f9 (patch)
tree	d17cdb70409b3a7051234fb87420ee800e8ea7f6 /_inc
parent	65d519f7ca5c54029cc912e722c6e97a83636eba (diff)