Spaces:
Runtime error
Runtime error
import streamlit as st | |
import zipfile | |
import os | |
import shutil | |
import pandas as pd | |
import numpy as np | |
import cv2 | |
from sklearn.model_selection import train_test_split | |
# set the title to be blue color | |
st.title("Automation for Data Preparation") | |
# change color to red | |
st.write("`- This app will automate data preparation to make it easier to handled by different ML libraries.`") | |
st.write(" ") | |
st.write("`- Online datasets are usually in the common format. However, different ML libraries require different formats. This app will help you to convert the common format to the converted format.`") | |
# draw a divider | |
st.write("---") | |
# create 2 columns | |
col1, col2 = st.columns(2) | |
# column 1 | |
with col1: | |
st.write("**Common Format:**") | |
st.write("- root\n" | |
" - class1\n" | |
" - image1.jpg\n" | |
" - image2.jpg\n" | |
" - image3.jpg\n" | |
" - ...\n" | |
" - class2\n" | |
" - image1.jpg\n" | |
" - image2.jpg\n" | |
" - ...\n" | |
" - class3\n" | |
" - image1.jpg\n" | |
" - image2.jpg\n" | |
" - ...\n") | |
# column 2 | |
with col2: | |
# make the text bold | |
st.write("**Converted Format:**") | |
st.write("- root\n" | |
" - train\n" | |
" - class1\n" | |
" - image1.jpg\n" | |
" - image2.jpg\n" | |
" - ...\n" | |
" - class2\n" | |
" - test\n" | |
" - class1\n" | |
" - image1.jpg\n" | |
" - image2.jpg\n" | |
" - ...\n" | |
" - class2\n" | |
" - image1.jpg\n") | |
st.write("---") | |
# input folder | |
st.write("Please update a folder containing images in the default format as a zip file.") | |
input_zip_file = st.file_uploader("", type=["zip"]) | |
default_folder = 'input_folder' | |
X = [] | |
y = [] | |
# add some space | |
st.write(" ") | |
st.write(" ") | |
# unzip the input folder | |
if st.button("Transform"): | |
if input_zip_file is not None: | |
with zipfile.ZipFile(input_zip_file, 'r') as zip_ref: | |
zip_ref.extractall('input_folder') | |
# transform the folder | |
# check the folder structure to see if it is in the default format | |
for folder in os.listdir(default_folder): | |
if folder != '__MACOSX': # now at the root folder | |
for class_folder in os.listdir(folder): | |
classimg = os.path.join(folder, class_folder) | |
for file in os.listdir(classimg): | |
curr_file = os.path.join(classimg, file) | |
if curr_file.endswith('.jpg'): | |
img = cv2.imread(curr_file) | |
img = cv2.resize(img, (224, 224)) | |
X.append(img) | |
y.append(class_folder) | |
X = np.array(X) | |
y = np.array(y) | |
print(len(X)) | |
print(len(y)) | |
# create the new folder with new structure | |
# - ouput | |
# - train | |
# - class1 | |
# - image1.jpg | |
# - image2.jpg | |
# - ... | |
# - class2 | |
# - test | |
# - class1 | |
# - image1.jpg | |
# - image2.jpg | |
# - ... | |
# - class2 | |
output_folder = 'output' | |
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, train_size=0.8, random_state=42) | |
# create the output folder | |
for folder in np.unique(y): | |
curr_path = os.path.join(output_folder, 'train', folder) | |
os.makedirs(curr_path, exist_ok=True) | |
curr_path = os.path.join(output_folder, 'test', folder) | |
os.makedirs(curr_path, exist_ok=True) | |
for i in range(len(X_train)): | |
curr_path = os.path.join(output_folder, 'train', y_train[i], str(i) + '.jpg') | |
cv2.imwrite(curr_path, X_train[i]) | |
for i in range(len(X_test)): | |
curr_path = os.path.join(output_folder, 'test', y_test[i], str(i) + '.jpg') | |
cv2.imwrite(curr_path, X_test[i]) | |
# create the class folders | |
# train | |
# - class1 | |
# - class2 | |
# test | |
# - class1 | |
# - class2 | |
st.write("Transform the folder successfully.") | |
# zip the folder | |
shutil.make_archive('output_folder', 'zip', 'output') | |
def get_binary_file_downloader_html(bin_file, file_label='File'): | |
with open(bin_file, 'rb') as f: | |
data = f.read() | |
bin_str = data | |
href = f'<a href="data:application/octet-stream;base64,{bin_str}" download="{bin_file}">{file_label}</a>' | |
return href | |
get_binary_file_downloader_html('output_folder.zip', 'Zip File') | |
st.write("Download the zip file successfully.") | |