File size: 2,994 Bytes
77307ef
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
import os
import zipfile

# Initialize counters
total_files_count = 0
contains_all_files_count = 0
missing_files_count = 0
missing_files_info = []  # Used to store information about missing files

def check_zip_contents(zip_file_path):
    global total_files_count
    global contains_all_files_count
    global missing_files_count
    global missing_files_info

    # Increase the total count of ZIP files
    total_files_count += 1

    try:
        # Your existing code for processing the zip file contents
        contains_all_files_count = 0  # Initialize contains_all_files_count here
        with zipfile.ZipFile(zip_file_path, 'r') as zf:
            # Get all files and folders in the ZIP file
            file_list = [os.path.normpath(item) for item in zf.namelist()]

            # Get the ZIP file name (excluding extension)
            zip_file_name = os.path.splitext(os.path.basename(zip_file_path))[0]

            # Build the expected list of files and folders
            expected_folder = os.path.normpath(zip_file_name)
            expected_files = ['config.json', 'generation_config.json', 'pytorch_model.bin', 'source.spm', 'target.spm', 'tokenizer_config.json', 'vocab.json']

            # Check if the folder exists
            if expected_folder not in file_list:
                missing_files_count += 1
                missing_files_info.append(f"{zip_file_name} does not contain the expected folder.\n")
                return

            # Check if individual files exist
            missing_files = []
            for expected_file in expected_files:
                file_path = os.path.join(expected_folder, expected_file)
                if file_path not in file_list:
                    missing_files.append(expected_file)

            if not missing_files:
                contains_all_files_count += 1
            else:
                missing_files_count += 1
                missing_files_info.append(f"{zip_file_name} is missing the following files: {', '.join(missing_files)}\n")

    except zipfile.BadZipFile as e:
        print(f"Error: {e}")
        # Additional information about the exception, like file path
        print(f"File path: {zip_file_path}")

# The rest of your code remains unchanged

# Use the current working directory as the folder path
folder_path = os.getcwd()

# Get all ZIP files in the folder
zip_files = [f for f in os.listdir(folder_path) if f.endswith('.zip')]

# Iterate over ZIP files and check their contents
for zip_file in zip_files:
    zip_file_path = os.path.join(folder_path, zip_file)
    check_zip_contents(zip_file_path)

# Display the count of ZIP files containing all files and folders
print(f"\nNumber of ZIP files containing all files and folders: {contains_all_files_count}")

# Display information about missing files
for info in missing_files_info:
    print(info)

# Display the count results
print(f"\nProcessed {total_files_count} ZIP files")
print(f"Number of files missing: {missing_files_count}")