Spaces:
Sleeping
Sleeping
File size: 3,543 Bytes
908e980 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 |
import os
import shutil
import zipfile
import pandas as pd
from tqdm import tqdm
PROJECT_DIR = os.path.dirname(os.path.dirname(__file__))
def extract_zipfile(zip_path, tmp_folder):
with zipfile.ZipFile(zip_path, "r") as zip_ref:
zip_ref.extractall(tmp_folder)
def copy_files(folder_path, new_folder_path, file_extension, name_prefix, counter):
num_files = len([f for f in os.listdir(folder_path) if os.path.isfile(os.path.join(folder_path, f)) and f.endswith(file_extension)])
with tqdm(total=num_files) as pbar:
for filename in os.listdir(folder_path):
pbar.update(1)
if filename.endswith(file_extension):
# Generate a new filename for the file
ext = os.path.splitext(filename)[1]
new_filename = f"{name_prefix}_{counter:06d}{ext}"
counter += 1
# Copy the file to the new folder with the new name
src_path = os.path.join(folder_path, filename)
dst_path = os.path.join(new_folder_path, new_filename)
shutil.copy(src_path, dst_path)
return counter, num_files
def process_zipfiles(zip_path, images_path=None, labels_path=None):
# Create the output folders if they don't exist
os.makedirs(images_path, exist_ok=True)
os.makedirs(labels_path, exist_ok=True)
# Initialize counters for generating new filenames
img_counter = 0
label_counter = 0
# Create an empty list to hold the statistical report data
report_data = []
# Iterate over each zip file in the specified directory
for zip_file in os.listdir(zip_path):
if zip_file.endswith(".zip"):
print(f"Processing {zip_file}")
# Extract the zip file to a temporary folder
tmp_folder = "tmp_extracted"
extract_zipfile(os.path.join(zip_path, zip_file), tmp_folder)
try:
# Copy image files to the images folder
img_counter, num_images = copy_files(os.path.join(tmp_folder, "obj_train_data"), images_path, ".PNG", "frame", img_counter)
# Copy label files to the labels folder
label_counter, num_labels = copy_files(os.path.join(tmp_folder, "obj_train_data"), labels_path, ".txt", "frame", label_counter)
# Clean up the temporary folder
shutil.rmtree(tmp_folder)
# Add the statistical report data for the current zip file to the report list
report_data.append({"Zip File": zip_file,
"Num Images": num_images,
"Num Labels": num_labels})
except:
print(f"Error: {zip_file}")
return report_data
def save_report(report_data, report_file):
# Create a pandas DataFrame from the report data list
report_df = pd.DataFrame(report_data)
# Save the report DataFrame to an Excel file
report_df.to_csv(report_file, index=False)
print("Report file has been saved")
if __name__ == "__main__":
# Set the paths for the zip files and output folders
zip_path = os.path.join(PROJECT_DIR, 'data', 'cvat_targetlangan')
report_file = os.path.join(PROJECT_DIR, 'report', 'file.csv')
images_path = os.path.join(PROJECT_DIR, 'data', 'yolo_format', 'images')
labels_path = os.path.join(PROJECT_DIR, 'data', 'yolo_format', 'labels')
report_data = process_zipfiles(zip_path, images_path, labels_path)
save_report(report_data, report_file)
|