From 79450e8f1612ed63fa0da611f8d04ccf4ef444dc Mon Sep 17 00:00:00 2001 From: tmpz23 <28760271+tmpz23@users.noreply.github.com> Date: Mon, 11 Apr 2022 20:47:11 +0200 Subject: [PATCH] Create pzztool.py --- pzztool/pzztool.py | 503 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 503 insertions(+) create mode 100644 pzztool/pzztool.py diff --git a/pzztool/pzztool.py b/pzztool/pzztool.py new file mode 100644 index 0000000..3699ebb --- /dev/null +++ b/pzztool/pzztool.py @@ -0,0 +1,503 @@ +#!/usr/bin/env python3 +from math import ceil +from pathlib import Path +import shutil +import logging + + +__version__ = "0.14.8" +__author__ = "rigodron, algoflash, GGLinnk" +__OriginalAutor__ = "infval" +__license__ = "MIT" +__status__ = "developpement" + + +# For more information on the PZZ file format: +# http://virtualre.rf.gd/index.php/PZZ_(Gotcha_Force) + + +BIT_COMPRESSION_FLAG = 0x40000000 +FILE_LENGTH_MASK = 0x3FFFFFFF +BLOCK_SIZE = 0x800 +TPL_MAGIC_NUMBER = b"\x00\x20\xAF\x30" # http://virtualre.rf.gd/index.php/TPL_(Format_de_fichier) +CHD_MAGIC_NUMBER = b"Head" +BINHITS_MAGIC_NUMBER = b"STIH" +TSB_MAGIC_NUMBER = b"TSBD" +ICON_MAGIC_NUMBER = b"GOTCHA FORCE" + + +def get_file_path(file_data: bytes, path: Path): + # If it's a plxxxx or a dpxxxx.pzz + # 001 is always absent for dpxxxx + if path.name[5:7] == "pl" or path.name[5:7] == "dp": + if path.name[0:3] == "000": + if path.stem[-4:] != "data": + return path.with_name(path.name + "data").with_suffix(".bin") + return path.with_suffix(".bin") + if path.name[0:3] == "002": + if path.stem[-3:] != "hit": + return path.with_name(path.name + "hit").with_suffix(".bin") + return path.with_suffix(".bin") + if path.name[0:3] == "003": + if path.stem[-3:] != "mot": + return path.with_name(path.name + "mot").with_suffix(".bin") + return path.with_suffix(".bin") + if path.name[0:3] == "004": + if path.stem[-4:] != "_mdl": + return path.with_name(path.name + "_mdl").with_suffix(".arc") + return path.with_suffix(".arc") + if path.name[0:3] == "005": + if path.stem[-5:] != "b_mdl": + return path.with_name(path.name + "b_mdl").with_suffix(".arc") + return path.with_suffix(".arc") + if path.name[0:3] == "006": + if path.stem[-5:] != "g_mdl": + return path.with_name(path.name + "g_mdl").with_suffix(".arc") + return path.with_suffix(".arc") + if path.name[0:3] == "007": + if path.stem[-5:] != "s_mdl": + return path.with_name(path.name + "s_mdl").with_suffix(".arc") + return path.with_suffix(".arc") + if path.name[0:3] == "008": + if path.stem[-5:] != "c_mdl": + return path.with_name(path.name + "c_mdl").with_suffix(".arc") + return path.with_suffix(".arc") + if path.name[0:3] == "009": + if path.stem[-5:] != "k_mdl": + return path.with_name(path.name + "k_mdl").with_suffix(".arc") + return path.with_suffix(".arc") + elif path.name[5:9] == "efct": + if path.name[0:3] == "001": + if path.stem[-6:] != "00_mdl": + return path.with_name(path.name + "00_mdl").with_suffix(".arc") + return path.with_suffix(".arc") + if path.name[0:3] == "002": + if path.stem[-6:] != "01_mdl": + return path.with_name(path.name + "01_mdl").with_suffix(".arc") + return path.with_suffix(".arc") + elif file_data.startswith(ICON_MAGIC_NUMBER): + if path.stem[-4:] != "icon": + return path.with_name(path.name + "icon").with_suffix(".bin") + return path.with_suffix(".bin") + if file_data.startswith(TPL_MAGIC_NUMBER): + return path.with_suffix(".tpl") + if file_data.startswith(CHD_MAGIC_NUMBER): + return path.with_suffix(".chd") + if file_data.startswith(TSB_MAGIC_NUMBER): + return path.with_suffix(".tsb") + if file_data.startswith(BINHITS_MAGIC_NUMBER): + return path.with_suffix(".bin") + # Default value + return path.with_suffix(".dat") + + +# Not implemented: remove pad at the end of unpacked files +# The problem is that we can't know the exact initial Null bytes pad of the file. +# -> So we can't remove the trailing pad +def remove_padding(file_data: bytearray): + return file_data + # return file_data.rstrip(b'\x00') + + +def block_align(bout: bytes): + # As demonstrated by pl080d/006C_pl080d.pzzp, we ad BLOCK_SIZE if it's aligned on a multiple of BLOCK_SIZE + if len(bout) % BLOCK_SIZE == 0: + return bout.ljust(BLOCK_SIZE * int(len(bout) / BLOCK_SIZE + 1), b"\x00") + return bout.ljust(BLOCK_SIZE * ceil(len(bout) / BLOCK_SIZE), b"\x00") + + +def fix_pad_decompress(bout: bytes, path: Path): + # We ajust file_len according to the file format after decompress + if path.name[5:7] == "pl" and path.suffix == ".arc" or \ + path.name[5:7] == "dp" and path.suffix == ".arc" or \ + path.name[5:9] == "efct" and path.suffix == ".arc": + return bout[:-1] + return bout + + +def pzz_decompress(compressed_bytes: bytes): + uncompressed_bytes = bytearray() + compressed_bytes_size = len(compressed_bytes) // 2 * 2 + + cb = 0 # Control bytes + cb_bit = -1 # We rotate from 15 to 0 for compress flag + i = 0 + while i < compressed_bytes_size: + if cb_bit < 0: + cb = compressed_bytes[i + 1] + cb |= compressed_bytes[i + 0] << 8 + cb_bit = 15 + i += 2 + continue + + compress_flag = cb & (1 << cb_bit) + cb_bit -= 1 + + if compress_flag: + c = compressed_bytes[i + 1] + c |= compressed_bytes[i + 0] << 8 + + offset = (c & 0x7FF) * 2 + if offset == 0: + break # End of the compressed data + count = (c >> 11) * 2 + if count == 0: + i += 2 + c = compressed_bytes[i + 1] + c |= compressed_bytes[i + 0] << 8 + count = c * 2 + + index = len(uncompressed_bytes) - offset + for j in range(count): + uncompressed_bytes.append(uncompressed_bytes[index + j]) # aaa a améliorer avec un slice + else: + uncompressed_bytes += compressed_bytes[i: i+2] + i += 2 + + return uncompressed_bytes + + +def pzz_compress(uncompressed_bytes: bytes): + uncompressed_bytes += b"\x00" # # Adding pad doesn't change the result of compress + compressed_bytes = bytearray(2) + uncompressed_bytes_len = len(uncompressed_bytes) // 2 * 2 + + cb = 0 # Control bytes + cb_bit = 15 # We rotate from 15 to 0 for compress flag + cb_pos = 0 + + i = 0 + while i < uncompressed_bytes_len: + start = max(i - 4094, 0) # start = 2 if i = 4096 (BLOCK_SIZE*2) + count_r = 0 + max_i = -1 + + ####################################################### + # start: contains index .. (analysis of the algorithm is not redacted yet) + ####################################################### + while True: + # start = index first occurencie of uncompressed_bytes[i:i+2] between start and i+1 + # We look in the 4094 last bytes + start = uncompressed_bytes.find(uncompressed_bytes[i: i+2], start, i+1) + + # if the current 2 bytes aren't in the 4094 last bytes + if start == -1: + break + + # If the first occurencie isn't an index multiple of 2, we ignore it + if start % 2 != 0: + start += 1 + continue + count = 2 + while i < uncompressed_bytes_len - count and \ + count < 0xFFFF * 2 and \ + uncompressed_bytes[start+count] == uncompressed_bytes[i+count] and \ + uncompressed_bytes[start+count+1] == uncompressed_bytes[i+count+1]: + count += 2 + if count_r < count: + count_r = count + max_i = start + start += 2 + start = max_i + + compress_flag = 0 + if count_r >= 4: + compress_flag = 1 + offset = (i - start) // 2 + count_r //= 2 + c = offset + if count_r <= 0x1F: + c |= count_r << 11 + compressed_bytes += c.to_bytes(2, "big") + else: + compressed_bytes += c.to_bytes(2, "big") + count_r.to_bytes(2, "big") + i += count_r * 2 + else: + compressed_bytes += uncompressed_bytes[i: i+2] + i += 2 + cb |= (compress_flag << cb_bit) + cb_bit -= 1 + if cb_bit < 0: + compressed_bytes[cb_pos:cb_pos + 2] = cb.to_bytes(2, "big") + cb = 0 + cb_bit = 15 + cb_pos = len(compressed_bytes) + compressed_bytes += b"\x00\x00" + + cb |= (1 << cb_bit) + compressed_bytes[cb_pos:cb_pos + 2] = cb.to_bytes(2, "big") + compressed_bytes += b"\x00\x00" + + return block_align(compressed_bytes) + + +def pzz_unpack(pzz_path:Path, folder_path:Path, auto_decompress:bool = False): + if pzz_path.suffix != ".pzz" and pzz_path.suffix != ".mdt": + logging.warning(f"Invalid file format '{pzz_path.suffix}'; it should be .pzz or .mdt file format") + + if folder_path != Path('.'): + unpacked_pzz_path = folder_path + else: + unpacked_pzz_path = pzz_path.parent / pzz_path.stem + + if auto_decompress: + logging.info(f" unpzz({pzz_path}) in folder {unpacked_pzz_path}") + else: + logging.info(f" unpacking {pzz_path} in folder {unpacked_pzz_path}") + unpacked_pzz_path.mkdir(exist_ok=True) + + with pzz_path.open("rb") as pzz_file: + file_count = int.from_bytes(pzz_file.read(4), "big") + logging.debug(f" -> File count: {file_count}") + + # get a list with header file descriptors + files_descriptors_data = pzz_file.read(file_count * 4) + files_descriptors = [int.from_bytes(files_descriptors_data[i:i+4], "big") for i in range(0, file_count*4, 4)] + + file_offset = BLOCK_SIZE + for index, file_descriptor in enumerate(files_descriptors): + # bit 30 is the compression flag (bits from 0 to 31) + if file_descriptor & BIT_COMPRESSION_FLAG == 0: + compression_status = 'U' # For the extracted filename: initialy not compressed + else: + compression_status = 'C' # For the extracted filename: initialy compressed (file will have ".pzzp" extension) + + # We keep the 30 first bits in file_descriptor (file_len / BLOCK_SIZE) + file_descriptor &= FILE_LENGTH_MASK + + # file_len is padded according to BLOCK_SIZE + file_len = file_descriptor * BLOCK_SIZE + + # We generate file name + filename = f"{index:03}{compression_status}_{pzz_path.stem}" + file_path = unpacked_pzz_path / filename + + logging.debug(f" -> Offset: {file_offset:010} - {file_path}") + + # If file_len is Null we create an empty file and we pass to the next file_descriptor + if file_len == 0: + file_path.with_suffix(".dat").touch() + continue + + # We seek at the file_offset + pzz_file.seek(file_offset) + + # We extract the file and if auto_decompress is set we decompress all files + file_data = pzz_file.read(file_len) + if auto_decompress and compression_status == 'C': + file_data = pzz_decompress(file_data) + file_data = remove_padding(bytearray(file_data)) + + if not auto_decompress and compression_status == 'C': + file_path = file_path.with_suffix(".pzzp") + else: + file_path = get_file_path(file_data, file_path) + + file_path.write_bytes(fix_pad_decompress(file_data, file_path)) + + # next file_offset = file_offset + file_len + # File_len is aligned to BLOCK_SIZE with Null bytes + file_offset += file_len + + +def pzz_pack(folder_path:Path, pzz_path:Path, auto_compress:bool = False): + if pzz_path == Path('.'): + pzz_path = folder_path.with_suffix(".pzz") + if pzz_path.suffix != ".pzz" and pzz_path.suffix != ".mdt": + logging.warning(f"Invalid file format '{pzz_path.suffix}': dest must be a pzz or mdt") + + # We get all filenames from the folder to pzz + files_path = list(folder_path.glob("*")) + + if auto_compress: + logging.info(f" pzz({folder_path}) in pzz {pzz_path}") + else: + logging.info(f" packing({folder_path}) in pzz {pzz_path}") + logging.debug(f" -> {len(files_path)} files to pack") + + with pzz_path.open("wb") as pzz_file: + # We seek to the end of the header + pzz_file.seek(BLOCK_SIZE) + + # We get total files count to put it at the begining of the pzz header + header_bytes = len(files_path).to_bytes(4, byteorder='big') + + # We write every files at the end of the pzz_file + for file_path in files_path: + is_compressed = file_path.suffix == ".pzzp" + compression_status = file_path.name[3:4] + + file_data = file_path.read_bytes() + + # The file has to be compressed before packing + if compression_status == 'C' and not is_compressed and auto_compress: + file_data = pzz_compress(file_data) + # The file has to be decompressed before packing + elif compression_status == 'U' and is_compressed and auto_compress: + file_data = pzz_decompress(file_data) # pad is not handled yet + + """ + # we add pad to be aligned to BLOCK_SIZE + if compression_status == 'U': + if (len(file_data) % BLOCK_SIZE) > 0: + file_data.extend(b"\x00" * (BLOCK_SIZE - (len(file_data) % BLOCK_SIZE))) + """ + + # file_descriptor = ceil of the len of the file / BLOCK_SIZE + file_descriptor = ceil(len(file_data) / BLOCK_SIZE) + + # We add the compression flag bit to the file_descriptor + if compression_status == 'C': + file_descriptor |= BIT_COMPRESSION_FLAG + + header_bytes += file_descriptor.to_bytes(4, byteorder='big') + pzz_file.write(file_data) + + pzz_file.seek(0) + # We write the header + pzz_file.write(header_bytes) + + +def unpzz(pzz_path:Path, folder_path:Path): + pzz_unpack(pzz_path, folder_path, auto_decompress = True) + + +def pzz(folder_path:Path, pzz_file:Path): + pzz_pack(folder_path, pzz_file, auto_compress = True) + + +def get_argparser(): + import argparse + parser = argparse.ArgumentParser(description='PZZ / MDT (de)compressor & unpacker - [GameCube] Gotcha Force v' + __version__) + parser.add_argument('--version', action='version', version='%(prog)s ' + __version__) + parser.add_argument('-v', '--verbose', action='store_true', help='verbose mode') + parser.add_argument('-di', '--disable-ignore', action='store_true', help="Disable .pzzp or .pzz file extension verification.") + parser.add_argument('input_path', metavar='INPUT', help='') + parser.add_argument('output_path', metavar='OUTPUT', help='', nargs='?', default="") + + group = parser.add_mutually_exclusive_group(required=True) + group.add_argument('-pzz', '--pzz', action='store_true', help="-pzz source_folder (dest_file.pzz): pzz source_folder in new file source_folder.pzz or dest_file if specified") + group.add_argument('-unpzz', '--unpzz', action='store_true', help="-unpzz source_folder.pzz (dest_folder): unpzz the pzz in new folder source_folder or dest_folder if specified") + group.add_argument('-bpzz', '--batch-pzz', action='store_true', help='-bpzz source_folder (dest_folder): Batch pzz (auto compress) all pzz_folder from source_folder into source_folder or dest_folder if specified') + group.add_argument('-bunpzz', '--batch-unpzz', action='store_true', help='-bunpzz source_folder (dest_folder): Batch unpzz (auto decompress) all pzz from source_folder into source_folder or dest_folder if specified') + group.add_argument('-p', '--pack', action='store_true', help="-p source_folder (dest_file.pzz): Pack source_folder in new file source_folder.pzz or dest_file if specified") + group.add_argument('-u', '--unpack', action='store_true', help='-u source_folder.pzz (dest_folder): Unpack the pzz in new folder source_folder or dest_folder if specified') + group.add_argument('-bp', '--batch-pack', action='store_true', help='-bp source_folder (dest_folder): Batch pack all pzz_folder from source_folder into source_folder or dest_folder if specified') + group.add_argument('-bu', '--batch-unpack', action='store_true', help='-bu source_folder (dest_folder): Batch unpack all pzz from source_folder into source_folder or dest_folder if specified') + group.add_argument('-c', '--compress', action='store_true', help='-c source_file (dest_file): compress source_file in source_file.pzzp or dest_file if specified') + group.add_argument('-d', '--decompress', action='store_true', help='-d source_file.pzzp (dest_file): decompress source_file.pzzp in source_file or dest_file if specified') + group.add_argument('-bc', '--batch-compress', action='store_true', help='-bc source_folder dest_folder: compress all files from source_folder into dest_folder') + group.add_argument('-bd', '--batch-decompress', action='store_true', help='-bd source_folder dest_folder: decompress all files from source_folder into dest_folder') + return parser + + +if __name__ == '__main__': + logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.INFO) + args = get_argparser().parse_args() + + p_input = Path(args.input_path) + p_output = Path(args.output_path) + + if args.verbose: + logging.getLogger().setLevel(logging.DEBUG) + + if args.compress: + logging.info("### Compress") + if(p_output == Path(".")): + p_output = Path(p_input.with_suffix(".pzzp")) + + # Extension check + if not args.disable_ignore and p_input.suffix == ".pzzp": + logging.warning(f"Ignored - {p_input} - bad extension - must not be a pzzp") + elif not args.disable_ignore and p_output.suffix != ".pzzp": + logging.warning(f"Ignored - {p_output} - bad extension - must be a pzzp") + else: + logging.info(f"Compressing {p_input} in {p_output}") + p_output.write_bytes(pzz_compress(p_input.read_bytes())) + elif args.decompress: + logging.info("### Decompress") + + # Extension check + if not args.disable_ignore and p_input.suffix != ".pzzp": + logging.warning(f"Ignored - {p_input} - bad extension - must be a pzzp") + else: + output_file_data = pzz_decompress(p_input.read_bytes()) + if p_output == Path("."): + p_output = get_file_path(output_file_data, p_input.parent / p_input.stem) + logging.info(f"Decompressing {p_input} in {p_output}") + p_output.write_bytes(fix_pad_decompress(output_file_data, p_output)) + elif args.batch_compress: + logging.info("### Batch Compress") + if(p_output == Path(".")): + p_output = p_input + p_output.mkdir(exist_ok=True) + + for pzzfile_path in p_input.glob("*"): + # Extension check + if not args.disable_ignore and pzzfile_path.suffix == ".pzzp": + logging.warning(f"Ignored - {pzzfile_path} - bad extension - musn't be a pzzp") + if p_input != p_output: + shutil.copy(pzzfile_path, p_output / pzzfile_path.name) + continue + logging.info(f"Compressing {pzzfile_path} in {p_output / pzzfile_path.stem}.pzzp") + (p_output / pzzfile_path.stem).with_suffix(".pzzp").write_bytes(pzz_compress(pzzfile_path.read_bytes())) + elif args.batch_decompress: + logging.info("### Batch Decompress") + if(p_output == Path(".")): + p_output = Path(p_input) + p_output.mkdir(exist_ok=True) + + for file_path in p_input.glob("*"): + if not args.disable_ignore and file_path.suffix != ".pzzp": + logging.warning(f"Ignored - {file_path} - bad extension - must be a pzzp") + if p_input != p_output: + shutil.copy(file_path, p_output / file_path.name) + continue + logging.info(f"Decompressing {file_path} in {p_output / file_path.stem}?.?") + uncompressed_content = pzz_decompress(file_path.read_bytes()) + uncompressed_path = get_file_path(uncompressed_content, p_output / file_path.name) + uncompressed_path.write_bytes(fix_pad_decompress(uncompressed_content, uncompressed_path)) + elif args.pack: + logging.info("### Pack") + pzz_pack(p_input, p_output) + elif args.unpack: + logging.info("### Unpack") + pzz_unpack(p_input, p_output) + elif args.pzz: + logging.info("### PZZ") + pzz(p_input, p_output) + elif args.unpzz: + logging.info("### UNPZZ") + unpzz(p_input, p_output) + elif args.batch_pack: + logging.info("### Batch Pack") + p_output.mkdir(exist_ok=True) + + if(p_output == Path('.')): + p_output = p_input + for folder_path in p_input.glob("*"): + pzz_pack(folder_path, p_output / Path(folder_path.name).with_suffix(".pzz")) + elif args.batch_unpack: + logging.info("### Batch Unpack") + p_output.mkdir(exist_ok=True) + + if(p_output == Path('.')): + p_output = p_input + for file_path in p_input.glob("*"): + pzz_unpack(file_path, p_output / file_path.stem) + elif args.batch_pzz: + logging.info("### Batch PZZ") + p_output.mkdir(exist_ok=True) + + if(p_output == Path('.')): + p_output = p_input + for folder_path in p_input.glob("*"): + pzz(folder_path, p_output / Path(folder_path.name).with_suffix(".pzz")) + elif args.batch_unpzz: + logging.info("### Batch UNPZZ") + p_output.mkdir(exist_ok=True) + + if(p_output == Path('.')): + p_output = p_input + for file_path in p_input.glob("*"): + unpzz(file_path, p_output / file_path.stem)