From 9a6b751b5aa27dd4467f632a387bdd9a37ed5479 Mon Sep 17 00:00:00 2001 From: tmpz23 <28760271+tmpz23@users.noreply.github.com> Date: Sun, 16 Jan 2022 17:39:17 +0100 Subject: [PATCH] Update pzztool.py --- pzztool.py | 294 ++++++++++++++++++++++++++--------------------------- 1 file changed, 142 insertions(+), 152 deletions(-) diff --git a/pzztool.py b/pzztool.py index febf63a..7efbda4 100644 --- a/pzztool.py +++ b/pzztool.py @@ -2,33 +2,33 @@ from math import ceil from pathlib import Path import shutil -from struct import unpack -from os import listdir import logging -__version__ = "0.14.6" +__version__ = "0.14.7" __author__ = "rigodron, algoflash, GGLinnk" __OriginalAutor__ = "infval" __license__ = "MIT" __status__ = "developpement" -# Pour plus d'informations sur le format PZZ : +# For more information on the PZZ file format : # http://virtualre.rf.gd/index.php/PZZ_(Gotcha_Force) BIT_COMPRESSION_FLAG = 0x40000000 FILE_LENGTH_MASK = 0x3FFFFFFF -CHUNK_SIZE = 0x800 +BLOCK_SIZE = 0x800 TPL_MAGIC_NUMBER = b"\x00\x20\xAF\x30" # http://virtualre.rf.gd/index.php/TPL_(Format_de_fichier) CHD_MAGIC_NUMBER = b"Head" -BIN_HITS_MAGICNUMBER = b"STIH" +BINHITS_MAGIC_NUMBER = b"STIH" TSB_MAGIC_NUMBER = b"TSBD" ICON_MAGIC_NUMBER = b"GOTCHA FORCE" -def get_file_path(file_content: bytes, path: Path): - # Attention à l'implémentation de 001 pour les dpxxxx - if path.name[5:7] == "pl" or path.name[5:7] == "dp": # si c'est un plxxxx ou un dpxxxx.pzz + +def get_file_path(file_data: bytes, path: Path): + # If it's a plxxxx or a dpxxxx.pzz + # 001 is always absent for dpxxxx + if path.name[5:7] == "pl" or path.name[5:7] == "dp": if path.name[0:3] == "000": return path.with_name(path.name + "data").with_suffix(".bin") if path.name[0:3] == "002": @@ -52,36 +52,37 @@ def get_file_path(file_content: bytes, path: Path): return path.with_name(path.name + "00_mdl").with_suffix(".arc") if path.name[0:3] == "002": return path.with_name(path.name + "01_mdl").with_suffix(".arc") - elif file_content.startswith(ICON_MAGIC_NUMBER): + elif file_data.startswith(ICON_MAGIC_NUMBER): return path.with_name(path.name + "icon").with_suffix(".bin") - if file_content.startswith(TPL_MAGIC_NUMBER): + if file_data.startswith(TPL_MAGIC_NUMBER): return path.with_suffix(".tpl") - if file_content.startswith(CHD_MAGIC_NUMBER): + if file_data.startswith(CHD_MAGIC_NUMBER): return path.with_suffix(".chd") - if file_content.startswith(TSB_MAGIC_NUMBER): + if file_data.startswith(TSB_MAGIC_NUMBER): return path.with_suffix(".tsb") - if file_content.startswith(BIN_HITS_MAGICNUMBER): + if file_data.startswith(BINHITS_MAGIC_NUMBER): return path.with_suffix(".bin") - # Par défaut + # Default value return path.with_suffix(".dat") -# Non implémenté : pour supprimer le pad à la fin des fichiers unpack -# Les fichiers sans pad se terminent éventuellement par des b"\x00" -# ce qui impose de connaître le format de fichier pour implémenter cette fonction -def remove_padding(file_content: bytearray): - return file_content - # return file_content.rstrip(b'\x00') + +# Not implemented : remove pad at the end of unpacked files +# The problem is that we can't know the exact initial Null bytes pad of the file. +# -> So we can't remove the trailing pad +def remove_padding(file_data: bytearray): + return file_data + # return file_data.rstrip(b'\x00') -def bytes_align_compress(bout: bytes): - # Comme le montre le fichier pl080d/006C_pl080d.pzzp, on ajoute 0x800 si c'est aligné sur un multiple - if len(bout) % CHUNK_SIZE == 0: - return bout.ljust(CHUNK_SIZE * int(len(bout) / CHUNK_SIZE + 1), b"\x00") - return bout.ljust(CHUNK_SIZE * ceil(len(bout) / CHUNK_SIZE), b"\x00") +def block_align(bout: bytes): + # As demonstrated by pl080d/006C_pl080d.pzzp, we ad BLOCK_SIZE if it's aligned on a multiple of BLOCK_SIZE + if len(bout) % BLOCK_SIZE == 0: + return bout.ljust(BLOCK_SIZE * int(len(bout) / BLOCK_SIZE + 1), b"\x00") + return bout.ljust(BLOCK_SIZE * ceil(len(bout) / BLOCK_SIZE), b"\x00") -def bytes_align_decompress(bout: bytes, path: Path): - # Suite à la décompression, on réajuste la taille en fonction du format du fichier +def fix_pad_decompress(bout: bytes, path: Path): + # We ajust file_len according to the file format after decompress if path.name[5:7] == "pl" and path.suffix == ".arc" or \ path.name[5:7] == "dp" and path.suffix == ".arc" or \ path.name[5:9] == "efct" and path.suffix == ".arc": @@ -94,10 +95,10 @@ def pzz_decompress(compressed_bytes: bytes): compressed_bytes_size = len(compressed_bytes) // 2 * 2 cb = 0 # Control bytes - cb_bit = -1 # rotations de 15 à 0 pour le flag de compression + cb_bit = -1 # We rotate from 15 to 0 for compress flag i = 0 while i < compressed_bytes_size: - if cb_bit < 0: # tous les + if cb_bit < 0: cb = compressed_bytes[i + 1] cb |= compressed_bytes[i + 0] << 8 cb_bit = 15 @@ -123,7 +124,7 @@ def pzz_decompress(compressed_bytes: bytes): index = len(uncompressed_bytes) - offset for j in range(count): - uncompressed_bytes.append(uncompressed_bytes[index + j]) + uncompressed_bytes.append(uncompressed_bytes[index + j]) # aaa a améliorer avec un slice else: uncompressed_bytes += compressed_bytes[i: i+2] i += 2 @@ -132,38 +133,38 @@ def pzz_decompress(compressed_bytes: bytes): def pzz_compress(uncompressed_bytes: bytes): - uncompressed_bytes += b"\x00" # l'ajout de padding ne change pas le resultat de la compression + uncompressed_bytes += b"\x00" # # Adding pad doesn't change the result of compress compressed_bytes = bytearray(2) - size_uncompressed_bytes = len(uncompressed_bytes) // 2 * 2 + uncompressed_bytes_len = len(uncompressed_bytes) // 2 * 2 cb = 0 # Control bytes - cb_bit = 15 # rotations de 15 à 0 pour le flag de compression + cb_bit = 15 # We rotate from 15 to 0 for compress flag cb_pos = 0 i = 0 - while i < size_uncompressed_bytes: - start = max(i - 4094, 0) # start = 2 si i = 4096 (0x800*2) + while i < uncompressed_bytes_len: + start = max(i - 4094, 0) # start = 2 if i = 4096 (BLOCK_SIZE*2) count_r = 0 max_i = -1 ####################################################### - # start : contient l'index .. (en cours de rédaction) + # start : contains index .. (analysis of the algorithm is not redacted yet) ####################################################### while True: - # start = index première occurence de uncompressed_bytes[i:i+2] entre start et i+1 - # on regarde maxi dans les 4094 derniers octets + # start = index first occurencie of uncompressed_bytes[i:i+2] between start and i+1 + # We look in the 4094 last bytes start = uncompressed_bytes.find(uncompressed_bytes[i: i+2], start, i+1) - # si les 2 octets étudiés n'apparaissent pas dans les 4094 derniers octets + # if the current 2 bytes aren't in the 4094 last bytes if start == -1: break - # si la première occurence n'est pas à un index multiple de 2, on l'ignore + # If the first occurencie isn't an index multiple of 2, we ignore it if start % 2 != 0: start += 1 continue count = 2 - while i < size_uncompressed_bytes - count and \ + while i < uncompressed_bytes_len - count and \ count < 0xFFFF * 2 and \ uncompressed_bytes[start+count] == uncompressed_bytes[i+count] and \ uncompressed_bytes[start+count+1] == uncompressed_bytes[i+count+1]: @@ -174,9 +175,6 @@ def pzz_compress(uncompressed_bytes: bytes): start += 2 start = max_i - ####################################################### - # - ####################################################### compress_flag = 0 if count_r >= 4: compress_flag = 1 @@ -205,150 +203,142 @@ def pzz_compress(uncompressed_bytes: bytes): compressed_bytes[cb_pos:cb_pos + 2] = cb.to_bytes(2, "big") compressed_bytes += b"\x00\x00" - return bytes_align_compress(compressed_bytes) + return block_align(compressed_bytes) -def pzz_unpack(pzz_path: Path, dest_folder: Path, auto_decompress: bool = False): +def pzz_unpack(pzz_path:Path, folder_path:Path, auto_decompress:bool = False): if pzz_path.suffix != ".pzz" and pzz_path.suffix != ".mdt": logging.warning(f"Invalid file format '{pzz_path.suffix}'; it should be .pzz or .mdt file format") - if dest_folder != Path('.'): - unpacked_pzz_path = dest_folder + if folder_path != Path('.'): + unpacked_pzz_path = folder_path else: unpacked_pzz_path = pzz_path.parent / pzz_path.stem - if(auto_decompress): + if auto_decompress: logging.info(f" unpzz({pzz_path}) in folder {unpacked_pzz_path}") else: logging.info(f" unpacking {pzz_path} in folder {unpacked_pzz_path}") unpacked_pzz_path.mkdir(exist_ok=True) with pzz_path.open("rb") as pzz_file: - # file_count reçoit le nombre de fichiers présent dans le PZZ : - # On lit les 4 premiers octets (uint32 big-endian) - file_count, = unpack(">I", pzz_file.read(4)) - - # files_descriptors reçoit un tuple avec l'ensemble des descripteurs de fichiers (groupes d'uint32 big-endian) - files_descriptors = unpack(f">{file_count}I", pzz_file.read(file_count * 4)) - + file_count = int.from_bytes(pzz_file.read(4), "big") logging.debug(f" -> File count : {file_count}") - offset = CHUNK_SIZE - # on parcours le tuple de descripteurs de fichiers + # get a list with header file descriptors + files_descriptors_data = pzz_file.read(file_count * 4) + files_descriptors = [int.from_bytes(files_descriptors_data[i:i+4], "big") for i in range(0, file_count*4, 4)] + + file_offset = BLOCK_SIZE for index, file_descriptor in enumerate(files_descriptors): + # bit 30 is the compression flag (bits from 0 to 31) + if file_descriptor & BIT_COMPRESSION_FLAG == 0: + compression_status = 'U' # For the extracted filename: initialy not compressed + else: + compression_status = 'C' # For the extracted filename: initialy compressed (file will have ".pzzp" extension) - # Le bit 30 correspond au flag de compression (bits numérotés de 0 à 31) - is_compressed = (file_descriptor & BIT_COMPRESSION_FLAG) != 0 - if not is_compressed: # Si le fichier n'est pas compressé, on ajoute 'U' derrière l'index - compression_status = 'U' - else: # Si le fichier est compressé on ajoute 'C' derrière l'index et l'extension ".pzzp" - compression_status = 'C' - - # file_descriptor reçoit maintenant les 30 premiers bits : (la taille / CHUNK_SIZE) + # We keep the 30 first bits in file_descriptor (file_len / BLOCK_SIZE) file_descriptor &= FILE_LENGTH_MASK - # file_len reçoit la taille du fichier - # la taille du fichier est un multiple de CHUNK_SIZE, on paddera avec des 0 jusqu'au fichier suivant - # file_len contient alors la taille du fichier en octets - file_len = file_descriptor * CHUNK_SIZE + # file_len is padded according to BLOCK_SIZE + file_len = file_descriptor * BLOCK_SIZE - # On forme le nom du nouveau fichier que l'on va extraire + # We generate file name filename = f"{index:03}{compression_status}_{pzz_path.stem}" file_path = unpacked_pzz_path / filename - logging.debug(f" -> Offset: {offset:010} - {file_path}") + logging.debug(f" -> Offset: {file_offset:010} - {file_path}") - # Si la taille est nulle, on créé un fichier vide et on passe au descripteur de fichier suivant + # If file_len is Null we create an empty file and we pass to the next file_descriptor if file_len == 0: file_path.with_suffix(".dat").touch() continue - # On se positionne au début du fichier dans l'archive - pzz_file.seek(offset) - # On extrait notre fichier et on le décompresse - if compression_status == 'C' and auto_decompress: - file_content = pzz_decompress(pzz_file.read(file_len)) - else: - file_content = pzz_file.read(file_len) + # We seek at the file_offset + pzz_file.seek(file_offset) - file_content = remove_padding(bytearray(file_content)) + # We extract the file and if auto_decompress is set we decompress all files + file_data = pzz_file.read(file_len) + if auto_decompress and compression_status == 'C': + file_data = pzz_decompress(file_data) + file_data = remove_padding(bytearray(file_data)) - if not auto_decompress and compression_status != 'U': + if not auto_decompress and compression_status == 'C': file_path = file_path.with_suffix(".pzzp") else: - file_path = get_file_path(file_content, file_path) + file_path = get_file_path(file_data, file_path) - file_path.write_bytes(bytes_align_decompress(file_content, file_path)) + file_path.write_bytes(fix_pad_decompress(file_data, file_path)) - # Enfin, on ajoute la taille du fichier afin de pointer sur le fichier suivant - # La taille du fichier étant un multiple de CHUNK_SIZE, on aura complété les 2048 octets finaux avec des 0x00 - offset += file_len + # next file_offset = file_offset + file_len + # File_len is aligned to BLOCK_SIZE with Null bytes + file_offset += file_len -def pzz_pack(src_path: Path, dest_file: Path, auto_compress: bool = False): - if dest_file == Path('.'): - dest_file = src_path.with_suffix(".pzz") - if dest_file.suffix != ".pzz" and dest_file.suffix != ".mdt": - logging.warning(f"Invalid file format '{dest_file.suffix}' : dest must be a pzz or mdt") +def pzz_pack(folder_path:Path, pzz_path:Path, auto_compress:bool = False): + if pzz_path == Path('.'): + pzz_path = folder_path.with_suffix(".pzz") + if pzz_path.suffix != ".pzz" and pzz_path.suffix != ".mdt": + logging.warning(f"Invalid file format '{pzz_path.suffix}' : dest must be a pzz or mdt") - # On récupère les fichiers du dossier à compresser - src_files = listdir(src_path) + # We get all filenames from the folder to pzz + files_path = list(folder_path.glob("*")) if auto_compress: - logging.info(f" pzz({src_path}) in pzz {dest_file}") + logging.info(f" pzz({folder_path}) in pzz {pzz_path}") else: - logging.info(f" packing {src_path} in pzz {dest_file}") - logging.debug(f" -> {len(src_files)} files to pack") + logging.info(f" packing({folder_path}) in pzz {pzz_path}") + logging.debug(f" -> {len(files_path)} files to pack") - with dest_file.open("wb") as pzz_file: - # On se place à la fin du header PZZ - pzz_file.seek(CHUNK_SIZE) + with pzz_path.open("wb") as pzz_file: + # We seek to the end of the header + pzz_file.seek(BLOCK_SIZE) - # On récupère le nombre total de fichiers pour le mettre au début du header - header_bytes = len(src_files).to_bytes(4, byteorder='big') + # We get total files count to put it at the begining of the pzz header + header_bytes = len(files_path).to_bytes(4, byteorder='big') - # On écrit tous les fichiers à la suite du header - for src_file_name in src_files: - is_compressed = Path(src_file_name).suffix == ".pzzp" - compression_status = src_file_name[3:4] + # We write every files at the end of the pzz_file + for file_path in files_path: + is_compressed = file_path.suffix == ".pzzp" + compression_status = file_path.name[3:4] - src_file = (src_path / src_file_name).read_bytes() + file_data = file_path.read_bytes() - # Le fichier doit être compressé avant d'être pack + # The file has to be compressed before packing if compression_status == 'C' and not is_compressed and auto_compress: - src_file = pzz_compress(src_file) - # Le fichier doit être décompressé avant d'être pack + file_data = pzz_compress(file_data) + # The file has to be decompressed before packing elif compression_status == 'U' and is_compressed and auto_compress: - src_file = pzz_decompress(src_file) # padding à gérer + file_data = pzz_decompress(file_data) # pad is not handled yet """ - # on ajoute le padding pour correspondre à un multiple de CHUNK_SIZE + # on ajoute le padding pour correspondre à un multiple de BLOCK_SIZE if compression_status == 'U': - if (len(src_file) % CHUNK_SIZE) > 0: - src_file.extend(b"\x00" * (CHUNK_SIZE - (len(src_file) % CHUNK_SIZE))) + if (len(file_data) % BLOCK_SIZE) > 0: + file_data.extend(b"\x00" * (BLOCK_SIZE - (len(file_data) % BLOCK_SIZE))) """ - # file_descriptor = arrondi supérieur de la taille / CHUNK_SIZE - file_descriptor = ceil(len(src_file) / CHUNK_SIZE) + # file_descriptor = ceil of the len of the file / BLOCK_SIZE + file_descriptor = ceil(len(file_data) / BLOCK_SIZE) - # On ajoute le flag de compression au file_descriptor + # We add the compression flag bit to the file_descriptor if compression_status == 'C': file_descriptor |= BIT_COMPRESSION_FLAG header_bytes += file_descriptor.to_bytes(4, byteorder='big') - pzz_file.write(src_file) + pzz_file.write(file_data) pzz_file.seek(0) - # On écrit le header + # We write the header pzz_file.write(header_bytes) -def unpzz(src_path: Path, dest_file: Path): - pzz_unpack(src_path, dest_file, auto_decompress = True) +def unpzz(pzz_path:Path, folder_path:Path): + pzz_unpack(pzz_path, folder_path, auto_decompress = True) -def pzz(src_path: Path, dest_file: Path): - pzz_pack(src_path, dest_file, auto_compress = True) +def pzz(folder_path:Path, pzz_file:Path): + pzz_pack(folder_path, pzz_file, auto_compress = True) def get_argparser(): @@ -391,7 +381,7 @@ if __name__ == '__main__': if(p_output == Path(".")): p_output = Path(p_input.with_suffix(".pzzp")) - # Si on a pas la bonne extension on ne compresse pas le fichier + # Extension check if not args.disable_ignore and p_input.suffix == ".pzzp": logging.warning(f"Ignored - {p_input} - bad extension - must not be a pzzp") elif not args.disable_ignore and p_output.suffix != ".pzzp": @@ -404,45 +394,45 @@ if __name__ == '__main__': if p_output == Path("."): p_output = p_input.parent / p_input.stem - # Si on a pas la bonne extension on ne decompresse pas le fichier + # Extension check if not args.disable_ignore and p_input.suffix != ".pzzp": logging.warning(f"Ignored - {p_input} - bad extension - must be a pzzp") else: - output_file_content = pzz_decompress(p_input.read_bytes()) - p_output = get_file_path(output_file_content, p_output) + output_file_data = pzz_decompress(p_input.read_bytes()) + p_output = get_file_path(output_file_data, p_output) logging.info(f"Decompressing {p_input} in {p_output}") - p_output.write_bytes(bytes_align_decompress(output_file_content, p_output)) + p_output.write_bytes(fix_pad_decompress(output_file_data, p_output)) elif args.batch_compress: logging.info("### Batch Compress") if(p_output == Path(".")): p_output = Path(p_input) p_output.mkdir(exist_ok=True) - for filename in listdir(p_input): - # Si on a pas la bonne extension on ne compresse pas le fichier - if not args.disable_ignore and Path(filename).suffix == ".pzzp": - logging.warning(f"Ignored - {filename} - bad extension - musn't be a pzzp") + for file_path in p_input.glob("*"): + # Extension check + if not args.disable_ignore and file_path.suffix == ".pzzp": + logging.warning(f"Ignored - {file_path} - bad extension - musn't be a pzzp") if p_input != p_output: - shutil.copy(p_input / filename, p_output / filename) + shutil.copy(file_path, p_output/file_path.name) continue - logging.info(f"Compressing {filename}") - (p_output / (Path(filename).stem + ".pzzp")).write_bytes(pzz_compress((p_input / filename).read_bytes())) + logging.info(f"Compressing {file_path}") + (p_output/file_path.stem).with_suffix(".pzzp").write_bytes(pzz_compress(file_path.read_bytes())) elif args.batch_decompress: logging.info("### Batch Decompress") if(p_output == Path(".")): p_output = Path(p_input) p_output.mkdir(exist_ok=True) - for filename in listdir(p_input): - if not args.disable_ignore and Path(filename).suffix != ".pzzp": - logging.warning(f"Ignored - {filename} - bad extension - must be a pzzp") + for file_path in p_input.glob("*"): + if not args.disable_ignore and file_path.suffix != ".pzzp": + logging.warning(f"Ignored - {file_path} - bad extension - must be a pzzp") if p_input != p_output: - shutil.copy(p_input / filename, p_output / filename) + shutil.copy(file_path, p_output / file_path.name) continue - logging.info(f"Decompressing {filename}") - uncompressed_content = pzz_decompress((p_input / filename).read_bytes()) - uncompressed_path = get_file_path(uncompressed_content, p_output / Path(filename)) - uncompressed_path.write_bytes(bytes_align_decompress(uncompressed_content, uncompressed_path)) + logging.info(f"Decompressing {file_path}") + uncompressed_content = pzz_decompress(file_path.read_bytes()) + uncompressed_path = get_file_path(uncompressed_content, p_output / file_path.name) + uncompressed_path.write_bytes(fix_pad_decompress(uncompressed_content, uncompressed_path)) elif args.pack: logging.info("### Pack") pzz_pack(p_input, p_output) @@ -461,29 +451,29 @@ if __name__ == '__main__': if(p_output == Path('.')): p_output = p_input - for folder in listdir(p_input): - pzz_pack(p_input / folder, p_output / Path(folder).with_suffix(".pzz")) + for folder_path in p_input.glob("*"): + pzz_pack(folder_path, p_output / Path(folder_path.name).with_suffix(".pzz")) elif args.batch_unpack: logging.info("### Batch Unpack") p_output.mkdir(exist_ok=True) if(p_output == Path('.')): p_output = p_input - for filename in listdir(p_input): - pzz_unpack(p_input / filename, p_output / Path(filename).stem) + for file_path in p_input.glob("*"): + pzz_unpack(file_path, p_output / file_path.stem) elif args.batch_pzz: logging.info("### Batch PZZ") p_output.mkdir(exist_ok=True) if(p_output == Path('.')): p_output = p_input - for folder in listdir(p_input): - pzz(p_input / folder, p_output / Path(folder).with_suffix(".pzz")) + for folder_path in p_input.glob("*"): + pzz(folder_path, p_output / Path(folder_path.name).with_suffix(".pzz")) elif args.batch_unpzz: logging.info("### Batch UNPZZ") p_output.mkdir(exist_ok=True) if(p_output == Path('.')): p_output = p_input - for filename in listdir(p_input): - unpzz(p_input / filename, p_output / Path(filename).stem) + for file_path in p_input.glob("*"): + unpzz(file_path, p_output / file_path.stem)