Adding compress/decompress script for use with gfx

2025-11-21 09:20:59 -06:00
parent d21daccf4c
commit 00fb9e6c5d
2 changed files with 247 additions and 0 deletions
--- a/menu/compress.py
+++ b/menu/compress.py
@@ -0,0 +1,135 @@
+import sys
+import os
+
+# Compression function reverse-engineered from ALTTP's decompression routine at $00E7DE
+def compress(data):
+    out = bytearray()
+    i = 0
+    
+    while i < len(data):
+        # Check for repeating byte pattern
+        if i + 1 < len(data) and data[i] == data[i + 1]:
+            length = 2
+            while i + length < len(data) and data[i] == data[i + length] and length < 32:
+                length += 1
+            # Repeating byte: 0x20-0x3F
+            out.append(0x20 | (length - 1))
+            out.append(data[i])
+            i += length
+            continue
+        
+        # Check for incremental byte pattern
+        if i + 2 < len(data) and data[i + 1] == data[i] + 1 and data[i + 2] == data[i] + 2:
+            length = 3
+            while i + length < len(data) and data[i + length] == data[i] + length and length < 32:
+                length += 1
+            # Incremental: 0x60-0x7F
+            out.append(0x60 | (length - 1))
+            out.append(data[i])
+            i += length
+            continue
+        
+        # Check for repeating word pattern (alternating two bytes)
+        if i + 3 < len(data):
+            # Check if we have an alternating pattern: A B A B...
+            byte_a = data[i]
+            byte_b = data[i + 1]
+            length = 2
+            while i + length < len(data) and length < 32:
+                if length % 2 == 0:
+                    if data[i + length] != byte_a:
+                        break
+                else:
+                    if data[i + length] != byte_b:
+                        break
+                length += 1
+            
+            if length >= 4:  # Need at least 4 bytes (2 alternations) to make it worthwhile
+                # Repeating word: 0x40-0x5F
+                out.append(0x40 | (length - 1))
+                out.append(byte_a)
+                out.append(byte_b)
+                i += length
+                continue
+        
+        # Check for copy from past (LZ with absolute offset)
+        best_len = 0
+        best_off = 0
+        search_start = max(0, i - 65536)  # Can reference anywhere in output
+        for j in range(search_start, i):
+            length = 0
+            while i + length < len(data) and data[j + length] == data[i + length] and length < 1024:
+                length += 1
+            if length >= 2 and length > best_len:
+                best_len = length
+                best_off = j  # Absolute offset, not relative!
+        
+        if best_len >= 2:
+            # Copy from past: 0x80-0xDF or 0xE0-0xFE (extended)
+            # Offset is ABSOLUTE position in the output buffer
+            if best_len <= 32:
+                # Standard copy: 0x80-0xDF (5 bits for length-1, 16 bits for absolute offset)
+                out.append(0x80 | ((best_len - 1) & 0x1F))
+                out.append(best_off & 0xFF)
+                out.append((best_off >> 8) & 0xFF)
+            else:
+                # Extended copy: 0xE0-0xFE
+                if best_len > 1024:
+                    best_len = 1024
+                # Command byte: 111LLLLL where L is length bits
+                cmd = 0xE0 | (((best_len - 1) >> 8) & 0x1F)
+                out.append(cmd)
+                out.append((best_len - 1) & 0xFF)
+                out.append(best_off & 0xFF)
+                out.append((best_off >> 8) & 0xFF)
+            i += best_len
+            continue
+        
+        # Raw copy (no pattern found)
+        size = 1
+        while size < 32 and i + size < len(data):
+            # Don't extend raw copy if we find a better pattern ahead
+            if i + size + 1 < len(data) and data[i + size] == data[i + size + 1]:
+                break
+            if i + size + 2 < len(data) and data[i + size + 1] == data[i + size] + 1:
+                break
+            # Check LZ
+            found_lz = False
+            for j in range(max(0, i + size - 2048), i + size):
+                if i + size + 1 < len(data) and data[j] == data[i + size] and data[j + 1] == data[i + size + 1]:
+                    found_lz = True
+                    break
+            if found_lz:
+                break
+            size += 1
+        
+        # Raw copy: 0x00-0x1F
+        out.append(size - 1)
+        out.extend(data[i:i + size])
+        i += size
+    
+    # End marker
+    out.append(0xFF)
+    return out
+
+if __name__ == '__main__':
+    if len(sys.argv) != 3:
+        print("Usage: python compress.py <input_file> <output_file>")
+        sys.exit(1)
+
+    input_file_path = sys.argv[1]
+    output_file_path = sys.argv[2]
+
+    if not os.path.exists(input_file_path):
+        print(f"Error: Input file not found at {input_file_path}")
+        sys.exit(1)
+
+    with open(input_file_path, 'rb') as f:
+        input_data = f.read()
+
+    compressed_data = compress(input_data)
+
+    with open(output_file_path, 'wb') as f:
+        f.write(compressed_data)
+
+    print(f"Successfully compressed '{input_file_path}' to '{output_file_path}'")
--- a/menu/decompress.py
+++ b/menu/decompress.py
@@ -0,0 +1,112 @@
+import sys
+
+def decompress(compressed_data):
+    out = bytearray()
+    i = 0
+    
+    while i < len(compressed_data):
+        cmd = compressed_data[i]
+        
+        if cmd == 0xFF:
+            # End marker
+            break
+        
+        i += 1
+        
+        # Decode based on top 3 bits
+        top_bits = cmd & 0xE0
+        
+        if cmd < 0xE0:
+            # Standard commands
+            length = (cmd & 0x1F) + 1
+            
+            if top_bits == 0x00:
+                # Raw copy
+                out.extend(compressed_data[i:i+length])
+                i += length
+            elif top_bits == 0x20:
+                # Repeating byte
+                byte_val = compressed_data[i]
+                out.extend([byte_val] * length)
+                i += 1
+            elif top_bits == 0x40:
+                # Repeating word - alternates between two bytes
+                byte_a = compressed_data[i]
+                byte_b = compressed_data[i+1]
+                for j in range(length):
+                    if j % 2 == 0:
+                        out.append(byte_a)
+                    else:
+                        out.append(byte_b)
+                i += 2
+            elif top_bits == 0x60:
+                # Incremental
+                start_val = compressed_data[i]
+                for j in range(length):
+                    out.append((start_val + j) & 0xFF)
+                i += 1
+            elif top_bits >= 0x80:
+                # Copy from past (absolute offset)
+                offset = compressed_data[i] | (compressed_data[i+1] << 8)
+                for j in range(length):
+                    out.append(out[offset + j])
+                i += 2
+        else:
+            # Extended command (0xE0-0xFE)
+            # Command type from bits 5-7 (after shifting)
+            cmd_type = ((cmd << 3) & 0xE0)
+            # Length from bits 0-1 of command (high) + next byte (low)
+            length_high = cmd & 0x03
+            length_low = compressed_data[i]
+            length = (length_high << 8) | length_low
+            length += 1
+            i += 1
+            
+            if cmd_type == 0x00:
+                # Extended raw copy
+                out.extend(compressed_data[i:i+length])
+                i += length
+            elif cmd_type == 0x20:
+                # Extended repeating byte
+                byte_val = compressed_data[i]
+                out.extend([byte_val] * length)
+                i += 1
+            elif cmd_type == 0x40:
+                # Extended repeating word - alternates between two bytes
+                byte_a = compressed_data[i]
+                byte_b = compressed_data[i+1]
+                for j in range(length):
+                    if j % 2 == 0:
+                        out.append(byte_a)
+                    else:
+                        out.append(byte_b)
+                i += 2
+            elif cmd_type == 0x60:
+                # Extended incremental
+                start_val = compressed_data[i]
+                for j in range(length):
+                    out.append((start_val + j) & 0xFF)
+                i += 1
+            elif cmd_type >= 0x80:
+                # Extended copy from past
+                offset = compressed_data[i] | (compressed_data[i+1] << 8)
+                for j in range(length):
+                    out.append(out[offset + j])
+                i += 2
+    
+    return out
+
+if __name__ == '__main__':
+    if len(sys.argv) != 3:
+        print("Usage: python decompress.py <input_file> <output_file>")
+        sys.exit(1)
+    
+    with open(sys.argv[1], 'rb') as f:
+        compressed = f.read()
+    
+    decompressed = decompress(compressed)
+    
+    with open(sys.argv[2], 'wb') as f:
+        f.write(decompressed)
+    
+    print(f"Decompressed {len(compressed)} bytes to {len(decompressed)} bytes")