Helsinki-NLP / check_required_files.py
TroyHow's picture
Upload 82 files
77307ef verified
import os
import zipfile
# Initialize counters
total_files_count = 0
contains_all_files_count = 0
missing_files_count = 0
missing_files_info = [] # Used to store information about missing files
def check_zip_contents(zip_file_path):
global total_files_count
global contains_all_files_count
global missing_files_count
global missing_files_info
# Increase the total count of ZIP files
total_files_count += 1
try:
# Your existing code for processing the zip file contents
contains_all_files_count = 0 # Initialize contains_all_files_count here
with zipfile.ZipFile(zip_file_path, 'r') as zf:
# Get all files and folders in the ZIP file
file_list = [os.path.normpath(item) for item in zf.namelist()]
# Get the ZIP file name (excluding extension)
zip_file_name = os.path.splitext(os.path.basename(zip_file_path))[0]
# Build the expected list of files and folders
expected_folder = os.path.normpath(zip_file_name)
expected_files = ['config.json', 'generation_config.json', 'pytorch_model.bin', 'source.spm', 'target.spm', 'tokenizer_config.json', 'vocab.json']
# Check if the folder exists
if expected_folder not in file_list:
missing_files_count += 1
missing_files_info.append(f"{zip_file_name} does not contain the expected folder.\n")
return
# Check if individual files exist
missing_files = []
for expected_file in expected_files:
file_path = os.path.join(expected_folder, expected_file)
if file_path not in file_list:
missing_files.append(expected_file)
if not missing_files:
contains_all_files_count += 1
else:
missing_files_count += 1
missing_files_info.append(f"{zip_file_name} is missing the following files: {', '.join(missing_files)}\n")
except zipfile.BadZipFile as e:
print(f"Error: {e}")
# Additional information about the exception, like file path
print(f"File path: {zip_file_path}")
# The rest of your code remains unchanged
# Use the current working directory as the folder path
folder_path = os.getcwd()
# Get all ZIP files in the folder
zip_files = [f for f in os.listdir(folder_path) if f.endswith('.zip')]
# Iterate over ZIP files and check their contents
for zip_file in zip_files:
zip_file_path = os.path.join(folder_path, zip_file)
check_zip_contents(zip_file_path)
# Display the count of ZIP files containing all files and folders
print(f"\nNumber of ZIP files containing all files and folders: {contains_all_files_count}")
# Display information about missing files
for info in missing_files_info:
print(info)
# Display the count results
print(f"\nProcessed {total_files_count} ZIP files")
print(f"Number of files missing: {missing_files_count}")