Update pzztool.py

This commit is contained in:
tmpz23 2022-01-16 17:39:17 +01:00 committed by GitHub
parent 4b5b5b6026
commit 9a6b751b5a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -2,33 +2,33 @@
from math import ceil
from pathlib import Path
import shutil
from struct import unpack
from os import listdir
import logging
__version__ = "0.14.6"
__version__ = "0.14.7"
__author__ = "rigodron, algoflash, GGLinnk"
__OriginalAutor__ = "infval"
__license__ = "MIT"
__status__ = "developpement"
# Pour plus d'informations sur le format PZZ :
# For more information on the PZZ file format :
# http://virtualre.rf.gd/index.php/PZZ_(Gotcha_Force)
BIT_COMPRESSION_FLAG = 0x40000000
FILE_LENGTH_MASK = 0x3FFFFFFF
CHUNK_SIZE = 0x800
BLOCK_SIZE = 0x800
TPL_MAGIC_NUMBER = b"\x00\x20\xAF\x30" # http://virtualre.rf.gd/index.php/TPL_(Format_de_fichier)
CHD_MAGIC_NUMBER = b"Head"
BIN_HITS_MAGICNUMBER = b"STIH"
BINHITS_MAGIC_NUMBER = b"STIH"
TSB_MAGIC_NUMBER = b"TSBD"
ICON_MAGIC_NUMBER = b"GOTCHA FORCE"
def get_file_path(file_content: bytes, path: Path):
# Attention à l'implémentation de 001 pour les dpxxxx
if path.name[5:7] == "pl" or path.name[5:7] == "dp": # si c'est un plxxxx ou un dpxxxx.pzz
def get_file_path(file_data: bytes, path: Path):
# If it's a plxxxx or a dpxxxx.pzz
# 001 is always absent for dpxxxx
if path.name[5:7] == "pl" or path.name[5:7] == "dp":
if path.name[0:3] == "000":
return path.with_name(path.name + "data").with_suffix(".bin")
if path.name[0:3] == "002":
@ -52,36 +52,37 @@ def get_file_path(file_content: bytes, path: Path):
return path.with_name(path.name + "00_mdl").with_suffix(".arc")
if path.name[0:3] == "002":
return path.with_name(path.name + "01_mdl").with_suffix(".arc")
elif file_content.startswith(ICON_MAGIC_NUMBER):
elif file_data.startswith(ICON_MAGIC_NUMBER):
return path.with_name(path.name + "icon").with_suffix(".bin")
if file_content.startswith(TPL_MAGIC_NUMBER):
if file_data.startswith(TPL_MAGIC_NUMBER):
return path.with_suffix(".tpl")
if file_content.startswith(CHD_MAGIC_NUMBER):
if file_data.startswith(CHD_MAGIC_NUMBER):
return path.with_suffix(".chd")
if file_content.startswith(TSB_MAGIC_NUMBER):
if file_data.startswith(TSB_MAGIC_NUMBER):
return path.with_suffix(".tsb")
if file_content.startswith(BIN_HITS_MAGICNUMBER):
if file_data.startswith(BINHITS_MAGIC_NUMBER):
return path.with_suffix(".bin")
# Par défaut
# Default value
return path.with_suffix(".dat")
# Non implémenté : pour supprimer le pad à la fin des fichiers unpack
# Les fichiers sans pad se terminent éventuellement par des b"\x00"
# ce qui impose de connaître le format de fichier pour implémenter cette fonction
def remove_padding(file_content: bytearray):
return file_content
# return file_content.rstrip(b'\x00')
# Not implemented : remove pad at the end of unpacked files
# The problem is that we can't know the exact initial Null bytes pad of the file.
# -> So we can't remove the trailing pad
def remove_padding(file_data: bytearray):
return file_data
# return file_data.rstrip(b'\x00')
def bytes_align_compress(bout: bytes):
# Comme le montre le fichier pl080d/006C_pl080d.pzzp, on ajoute 0x800 si c'est aligné sur un multiple
if len(bout) % CHUNK_SIZE == 0:
return bout.ljust(CHUNK_SIZE * int(len(bout) / CHUNK_SIZE + 1), b"\x00")
return bout.ljust(CHUNK_SIZE * ceil(len(bout) / CHUNK_SIZE), b"\x00")
def block_align(bout: bytes):
# As demonstrated by pl080d/006C_pl080d.pzzp, we ad BLOCK_SIZE if it's aligned on a multiple of BLOCK_SIZE
if len(bout) % BLOCK_SIZE == 0:
return bout.ljust(BLOCK_SIZE * int(len(bout) / BLOCK_SIZE + 1), b"\x00")
return bout.ljust(BLOCK_SIZE * ceil(len(bout) / BLOCK_SIZE), b"\x00")
def bytes_align_decompress(bout: bytes, path: Path):
# Suite à la décompression, on réajuste la taille en fonction du format du fichier
def fix_pad_decompress(bout: bytes, path: Path):
# We ajust file_len according to the file format after decompress
if path.name[5:7] == "pl" and path.suffix == ".arc" or \
path.name[5:7] == "dp" and path.suffix == ".arc" or \
path.name[5:9] == "efct" and path.suffix == ".arc":
@ -94,10 +95,10 @@ def pzz_decompress(compressed_bytes: bytes):
compressed_bytes_size = len(compressed_bytes) // 2 * 2
cb = 0 # Control bytes
cb_bit = -1 # rotations de 15 à 0 pour le flag de compression
cb_bit = -1 # We rotate from 15 to 0 for compress flag
i = 0
while i < compressed_bytes_size:
if cb_bit < 0: # tous les
if cb_bit < 0:
cb = compressed_bytes[i + 1]
cb |= compressed_bytes[i + 0] << 8
cb_bit = 15
@ -123,7 +124,7 @@ def pzz_decompress(compressed_bytes: bytes):
index = len(uncompressed_bytes) - offset
for j in range(count):
uncompressed_bytes.append(uncompressed_bytes[index + j])
uncompressed_bytes.append(uncompressed_bytes[index + j]) # aaa a améliorer avec un slice
else:
uncompressed_bytes += compressed_bytes[i: i+2]
i += 2
@ -132,38 +133,38 @@ def pzz_decompress(compressed_bytes: bytes):
def pzz_compress(uncompressed_bytes: bytes):
uncompressed_bytes += b"\x00" # l'ajout de padding ne change pas le resultat de la compression
uncompressed_bytes += b"\x00" # # Adding pad doesn't change the result of compress
compressed_bytes = bytearray(2)
size_uncompressed_bytes = len(uncompressed_bytes) // 2 * 2
uncompressed_bytes_len = len(uncompressed_bytes) // 2 * 2
cb = 0 # Control bytes
cb_bit = 15 # rotations de 15 à 0 pour le flag de compression
cb_bit = 15 # We rotate from 15 to 0 for compress flag
cb_pos = 0
i = 0
while i < size_uncompressed_bytes:
start = max(i - 4094, 0) # start = 2 si i = 4096 (0x800*2)
while i < uncompressed_bytes_len:
start = max(i - 4094, 0) # start = 2 if i = 4096 (BLOCK_SIZE*2)
count_r = 0
max_i = -1
#######################################################
# start : contient l'index .. (en cours de rédaction)
# start : contains index .. (analysis of the algorithm is not redacted yet)
#######################################################
while True:
# start = index première occurence de uncompressed_bytes[i:i+2] entre start et i+1
# on regarde maxi dans les 4094 derniers octets
# start = index first occurencie of uncompressed_bytes[i:i+2] between start and i+1
# We look in the 4094 last bytes
start = uncompressed_bytes.find(uncompressed_bytes[i: i+2], start, i+1)
# si les 2 octets étudiés n'apparaissent pas dans les 4094 derniers octets
# if the current 2 bytes aren't in the 4094 last bytes
if start == -1:
break
# si la première occurence n'est pas à un index multiple de 2, on l'ignore
# If the first occurencie isn't an index multiple of 2, we ignore it
if start % 2 != 0:
start += 1
continue
count = 2
while i < size_uncompressed_bytes - count and \
while i < uncompressed_bytes_len - count and \
count < 0xFFFF * 2 and \
uncompressed_bytes[start+count] == uncompressed_bytes[i+count] and \
uncompressed_bytes[start+count+1] == uncompressed_bytes[i+count+1]:
@ -174,9 +175,6 @@ def pzz_compress(uncompressed_bytes: bytes):
start += 2
start = max_i
#######################################################
#
#######################################################
compress_flag = 0
if count_r >= 4:
compress_flag = 1
@ -205,150 +203,142 @@ def pzz_compress(uncompressed_bytes: bytes):
compressed_bytes[cb_pos:cb_pos + 2] = cb.to_bytes(2, "big")
compressed_bytes += b"\x00\x00"
return bytes_align_compress(compressed_bytes)
return block_align(compressed_bytes)
def pzz_unpack(pzz_path: Path, dest_folder: Path, auto_decompress: bool = False):
def pzz_unpack(pzz_path:Path, folder_path:Path, auto_decompress:bool = False):
if pzz_path.suffix != ".pzz" and pzz_path.suffix != ".mdt":
logging.warning(f"Invalid file format '{pzz_path.suffix}'; it should be .pzz or .mdt file format")
if dest_folder != Path('.'):
unpacked_pzz_path = dest_folder
if folder_path != Path('.'):
unpacked_pzz_path = folder_path
else:
unpacked_pzz_path = pzz_path.parent / pzz_path.stem
if(auto_decompress):
if auto_decompress:
logging.info(f" unpzz({pzz_path}) in folder {unpacked_pzz_path}")
else:
logging.info(f" unpacking {pzz_path} in folder {unpacked_pzz_path}")
unpacked_pzz_path.mkdir(exist_ok=True)
with pzz_path.open("rb") as pzz_file:
# file_count reçoit le nombre de fichiers présent dans le PZZ :
# On lit les 4 premiers octets (uint32 big-endian)
file_count, = unpack(">I", pzz_file.read(4))
# files_descriptors reçoit un tuple avec l'ensemble des descripteurs de fichiers (groupes d'uint32 big-endian)
files_descriptors = unpack(f">{file_count}I", pzz_file.read(file_count * 4))
file_count = int.from_bytes(pzz_file.read(4), "big")
logging.debug(f" -> File count : {file_count}")
offset = CHUNK_SIZE
# on parcours le tuple de descripteurs de fichiers
# get a list with header file descriptors
files_descriptors_data = pzz_file.read(file_count * 4)
files_descriptors = [int.from_bytes(files_descriptors_data[i:i+4], "big") for i in range(0, file_count*4, 4)]
file_offset = BLOCK_SIZE
for index, file_descriptor in enumerate(files_descriptors):
# bit 30 is the compression flag (bits from 0 to 31)
if file_descriptor & BIT_COMPRESSION_FLAG == 0:
compression_status = 'U' # For the extracted filename: initialy not compressed
else:
compression_status = 'C' # For the extracted filename: initialy compressed (file will have ".pzzp" extension)
# Le bit 30 correspond au flag de compression (bits numérotés de 0 à 31)
is_compressed = (file_descriptor & BIT_COMPRESSION_FLAG) != 0
if not is_compressed: # Si le fichier n'est pas compressé, on ajoute 'U' derrière l'index
compression_status = 'U'
else: # Si le fichier est compressé on ajoute 'C' derrière l'index et l'extension ".pzzp"
compression_status = 'C'
# file_descriptor reçoit maintenant les 30 premiers bits : (la taille / CHUNK_SIZE)
# We keep the 30 first bits in file_descriptor (file_len / BLOCK_SIZE)
file_descriptor &= FILE_LENGTH_MASK
# file_len reçoit la taille du fichier
# la taille du fichier est un multiple de CHUNK_SIZE, on paddera avec des 0 jusqu'au fichier suivant
# file_len contient alors la taille du fichier en octets
file_len = file_descriptor * CHUNK_SIZE
# file_len is padded according to BLOCK_SIZE
file_len = file_descriptor * BLOCK_SIZE
# On forme le nom du nouveau fichier que l'on va extraire
# We generate file name
filename = f"{index:03}{compression_status}_{pzz_path.stem}"
file_path = unpacked_pzz_path / filename
logging.debug(f" -> Offset: {offset:010} - {file_path}")
logging.debug(f" -> Offset: {file_offset:010} - {file_path}")
# Si la taille est nulle, on créé un fichier vide et on passe au descripteur de fichier suivant
# If file_len is Null we create an empty file and we pass to the next file_descriptor
if file_len == 0:
file_path.with_suffix(".dat").touch()
continue
# On se positionne au début du fichier dans l'archive
pzz_file.seek(offset)
# On extrait notre fichier et on le décompresse
if compression_status == 'C' and auto_decompress:
file_content = pzz_decompress(pzz_file.read(file_len))
else:
file_content = pzz_file.read(file_len)
# We seek at the file_offset
pzz_file.seek(file_offset)
file_content = remove_padding(bytearray(file_content))
# We extract the file and if auto_decompress is set we decompress all files
file_data = pzz_file.read(file_len)
if auto_decompress and compression_status == 'C':
file_data = pzz_decompress(file_data)
file_data = remove_padding(bytearray(file_data))
if not auto_decompress and compression_status != 'U':
if not auto_decompress and compression_status == 'C':
file_path = file_path.with_suffix(".pzzp")
else:
file_path = get_file_path(file_content, file_path)
file_path = get_file_path(file_data, file_path)
file_path.write_bytes(bytes_align_decompress(file_content, file_path))
file_path.write_bytes(fix_pad_decompress(file_data, file_path))
# Enfin, on ajoute la taille du fichier afin de pointer sur le fichier suivant
# La taille du fichier étant un multiple de CHUNK_SIZE, on aura complété les 2048 octets finaux avec des 0x00
offset += file_len
# next file_offset = file_offset + file_len
# File_len is aligned to BLOCK_SIZE with Null bytes
file_offset += file_len
def pzz_pack(src_path: Path, dest_file: Path, auto_compress: bool = False):
if dest_file == Path('.'):
dest_file = src_path.with_suffix(".pzz")
if dest_file.suffix != ".pzz" and dest_file.suffix != ".mdt":
logging.warning(f"Invalid file format '{dest_file.suffix}' : dest must be a pzz or mdt")
def pzz_pack(folder_path:Path, pzz_path:Path, auto_compress:bool = False):
if pzz_path == Path('.'):
pzz_path = folder_path.with_suffix(".pzz")
if pzz_path.suffix != ".pzz" and pzz_path.suffix != ".mdt":
logging.warning(f"Invalid file format '{pzz_path.suffix}' : dest must be a pzz or mdt")
# On récupère les fichiers du dossier à compresser
src_files = listdir(src_path)
# We get all filenames from the folder to pzz
files_path = list(folder_path.glob("*"))
if auto_compress:
logging.info(f" pzz({src_path}) in pzz {dest_file}")
logging.info(f" pzz({folder_path}) in pzz {pzz_path}")
else:
logging.info(f" packing {src_path} in pzz {dest_file}")
logging.debug(f" -> {len(src_files)} files to pack")
logging.info(f" packing({folder_path}) in pzz {pzz_path}")
logging.debug(f" -> {len(files_path)} files to pack")
with dest_file.open("wb") as pzz_file:
# On se place à la fin du header PZZ
pzz_file.seek(CHUNK_SIZE)
with pzz_path.open("wb") as pzz_file:
# We seek to the end of the header
pzz_file.seek(BLOCK_SIZE)
# On récupère le nombre total de fichiers pour le mettre au début du header
header_bytes = len(src_files).to_bytes(4, byteorder='big')
# We get total files count to put it at the begining of the pzz header
header_bytes = len(files_path).to_bytes(4, byteorder='big')
# On écrit tous les fichiers à la suite du header
for src_file_name in src_files:
is_compressed = Path(src_file_name).suffix == ".pzzp"
compression_status = src_file_name[3:4]
# We write every files at the end of the pzz_file
for file_path in files_path:
is_compressed = file_path.suffix == ".pzzp"
compression_status = file_path.name[3:4]
src_file = (src_path / src_file_name).read_bytes()
file_data = file_path.read_bytes()
# Le fichier doit être compressé avant d'être pack
# The file has to be compressed before packing
if compression_status == 'C' and not is_compressed and auto_compress:
src_file = pzz_compress(src_file)
# Le fichier doit être décompressé avant d'être pack
file_data = pzz_compress(file_data)
# The file has to be decompressed before packing
elif compression_status == 'U' and is_compressed and auto_compress:
src_file = pzz_decompress(src_file) # padding à gérer
file_data = pzz_decompress(file_data) # pad is not handled yet
"""
# on ajoute le padding pour correspondre à un multiple de CHUNK_SIZE
# on ajoute le padding pour correspondre à un multiple de BLOCK_SIZE
if compression_status == 'U':
if (len(src_file) % CHUNK_SIZE) > 0:
src_file.extend(b"\x00" * (CHUNK_SIZE - (len(src_file) % CHUNK_SIZE)))
if (len(file_data) % BLOCK_SIZE) > 0:
file_data.extend(b"\x00" * (BLOCK_SIZE - (len(file_data) % BLOCK_SIZE)))
"""
# file_descriptor = arrondi supérieur de la taille / CHUNK_SIZE
file_descriptor = ceil(len(src_file) / CHUNK_SIZE)
# file_descriptor = ceil of the len of the file / BLOCK_SIZE
file_descriptor = ceil(len(file_data) / BLOCK_SIZE)
# On ajoute le flag de compression au file_descriptor
# We add the compression flag bit to the file_descriptor
if compression_status == 'C':
file_descriptor |= BIT_COMPRESSION_FLAG
header_bytes += file_descriptor.to_bytes(4, byteorder='big')
pzz_file.write(src_file)
pzz_file.write(file_data)
pzz_file.seek(0)
# On écrit le header
# We write the header
pzz_file.write(header_bytes)
def unpzz(src_path: Path, dest_file: Path):
pzz_unpack(src_path, dest_file, auto_decompress = True)
def unpzz(pzz_path:Path, folder_path:Path):
pzz_unpack(pzz_path, folder_path, auto_decompress = True)
def pzz(src_path: Path, dest_file: Path):
pzz_pack(src_path, dest_file, auto_compress = True)
def pzz(folder_path:Path, pzz_file:Path):
pzz_pack(folder_path, pzz_file, auto_compress = True)
def get_argparser():
@ -391,7 +381,7 @@ if __name__ == '__main__':
if(p_output == Path(".")):
p_output = Path(p_input.with_suffix(".pzzp"))
# Si on a pas la bonne extension on ne compresse pas le fichier
# Extension check
if not args.disable_ignore and p_input.suffix == ".pzzp":
logging.warning(f"Ignored - {p_input} - bad extension - must not be a pzzp")
elif not args.disable_ignore and p_output.suffix != ".pzzp":
@ -404,45 +394,45 @@ if __name__ == '__main__':
if p_output == Path("."):
p_output = p_input.parent / p_input.stem
# Si on a pas la bonne extension on ne decompresse pas le fichier
# Extension check
if not args.disable_ignore and p_input.suffix != ".pzzp":
logging.warning(f"Ignored - {p_input} - bad extension - must be a pzzp")
else:
output_file_content = pzz_decompress(p_input.read_bytes())
p_output = get_file_path(output_file_content, p_output)
output_file_data = pzz_decompress(p_input.read_bytes())
p_output = get_file_path(output_file_data, p_output)
logging.info(f"Decompressing {p_input} in {p_output}")
p_output.write_bytes(bytes_align_decompress(output_file_content, p_output))
p_output.write_bytes(fix_pad_decompress(output_file_data, p_output))
elif args.batch_compress:
logging.info("### Batch Compress")
if(p_output == Path(".")):
p_output = Path(p_input)
p_output.mkdir(exist_ok=True)
for filename in listdir(p_input):
# Si on a pas la bonne extension on ne compresse pas le fichier
if not args.disable_ignore and Path(filename).suffix == ".pzzp":
logging.warning(f"Ignored - {filename} - bad extension - musn't be a pzzp")
for file_path in p_input.glob("*"):
# Extension check
if not args.disable_ignore and file_path.suffix == ".pzzp":
logging.warning(f"Ignored - {file_path} - bad extension - musn't be a pzzp")
if p_input != p_output:
shutil.copy(p_input / filename, p_output / filename)
shutil.copy(file_path, p_output/file_path.name)
continue
logging.info(f"Compressing {filename}")
(p_output / (Path(filename).stem + ".pzzp")).write_bytes(pzz_compress((p_input / filename).read_bytes()))
logging.info(f"Compressing {file_path}")
(p_output/file_path.stem).with_suffix(".pzzp").write_bytes(pzz_compress(file_path.read_bytes()))
elif args.batch_decompress:
logging.info("### Batch Decompress")
if(p_output == Path(".")):
p_output = Path(p_input)
p_output.mkdir(exist_ok=True)
for filename in listdir(p_input):
if not args.disable_ignore and Path(filename).suffix != ".pzzp":
logging.warning(f"Ignored - {filename} - bad extension - must be a pzzp")
for file_path in p_input.glob("*"):
if not args.disable_ignore and file_path.suffix != ".pzzp":
logging.warning(f"Ignored - {file_path} - bad extension - must be a pzzp")
if p_input != p_output:
shutil.copy(p_input / filename, p_output / filename)
shutil.copy(file_path, p_output / file_path.name)
continue
logging.info(f"Decompressing {filename}")
uncompressed_content = pzz_decompress((p_input / filename).read_bytes())
uncompressed_path = get_file_path(uncompressed_content, p_output / Path(filename))
uncompressed_path.write_bytes(bytes_align_decompress(uncompressed_content, uncompressed_path))
logging.info(f"Decompressing {file_path}")
uncompressed_content = pzz_decompress(file_path.read_bytes())
uncompressed_path = get_file_path(uncompressed_content, p_output / file_path.name)
uncompressed_path.write_bytes(fix_pad_decompress(uncompressed_content, uncompressed_path))
elif args.pack:
logging.info("### Pack")
pzz_pack(p_input, p_output)
@ -461,29 +451,29 @@ if __name__ == '__main__':
if(p_output == Path('.')):
p_output = p_input
for folder in listdir(p_input):
pzz_pack(p_input / folder, p_output / Path(folder).with_suffix(".pzz"))
for folder_path in p_input.glob("*"):
pzz_pack(folder_path, p_output / Path(folder_path.name).with_suffix(".pzz"))
elif args.batch_unpack:
logging.info("### Batch Unpack")
p_output.mkdir(exist_ok=True)
if(p_output == Path('.')):
p_output = p_input
for filename in listdir(p_input):
pzz_unpack(p_input / filename, p_output / Path(filename).stem)
for file_path in p_input.glob("*"):
pzz_unpack(file_path, p_output / file_path.stem)
elif args.batch_pzz:
logging.info("### Batch PZZ")
p_output.mkdir(exist_ok=True)
if(p_output == Path('.')):
p_output = p_input
for folder in listdir(p_input):
pzz(p_input / folder, p_output / Path(folder).with_suffix(".pzz"))
for folder_path in p_input.glob("*"):
pzz(folder_path, p_output / Path(folder_path.name).with_suffix(".pzz"))
elif args.batch_unpzz:
logging.info("### Batch UNPZZ")
p_output.mkdir(exist_ok=True)
if(p_output == Path('.')):
p_output = p_input
for filename in listdir(p_input):
unpzz(p_input / filename, p_output / Path(filename).stem)
for file_path in p_input.glob("*"):
unpzz(file_path, p_output / file_path.stem)