Spaces:
Runtime error
Runtime error
import streamlit as st | |
# x = st.slider("Select a value") | |
# st.write(x, "squared is", x * x) | |
st.title('Welcome to the Labelbox custom classifier training application!') | |
st.header('In this module you will be able to add data to your instance if you dont already have it, and then use bulk classification to train a custom image classification model') | |
st.subheader("If you don't have data in your org, enter your API Click the button below! Otherwise, Skip to section 2") | |
st.video("https://storage.googleapis.com/app-videos/How%20to%20Make%20an%20API%20Key%20on%20labelbox.com.mp4") | |
st.write("this video show you how to create an API key in Labelbox") | |
# -*- coding: utf-8 -*- | |
""" | |
Original file is located at | |
https://colab.research.google.com/drive/1nOSff67KXhNgX_XSfnv3xnddobRoaK0d | |
""" | |
api_key = st.text_input("Enter your api key:", type="password") | |
import labelbox | |
import labelpandas as lp | |
import os | |
import pandas as pd | |
from tensorflow.python.lib.io import file_io | |
import io | |
from pandas import read_csv | |
# read csv file from google cloud storage | |
def read_data(gcs_path): | |
file_stream = file_io.FileIO(gcs_path, mode='r') | |
csv_data = read_csv(io.StringIO(file_stream.read())) | |
return csv_data | |
def freedatatolb(amount_of_data): | |
client = lp.Client(api_key) | |
gcs_path = 'https://storage.googleapis.com/solution_accelerator_datasets/images_styles.csv' | |
df = pd.read_csv(gcs_path) | |
df = df.drop(['id', 'season', 'usage', 'year',"gender", "masterCategory", "subCategory", "articleType","baseColour"], axis =1) | |
fields ={"row_data":["link"], # Column containing URL to asset (single) | |
"global_key": ['filename'], # Column containing globalkey value (single, unique) | |
"external_id": ["productDisplayName"], # Column containing external ID value (single) | |
"metadata_string": [], # Column containing string metadata values (multiple) | |
"metadata_number": [], # Column containing number metadata values (multiple) | |
"metadata_datetime": [] # Column containing datetime metadata values (multiple, must be ISO 8601) | |
} | |
columns = {} | |
for field in fields.keys(): | |
for name in fields[field]: | |
if field.startswith('metadata'): | |
columns[name] = f"{field.split('_')[0]}///{field.split('_')[1]}///{name}" | |
else: | |
columns[name] = field | |
new_df = df.rename(columns=(columns)) | |
testdf = new_df.head(amount_of_data) | |
dataset_id = client.lb_client.create_dataset(name = str(gcs_path.split('/')[-1])).uid | |
# dataset_id = client.lb_client.get_dataset("c4b7prd6207850000lljx2hr8").uid | |
results = client.create_data_rows_from_table( | |
table = testdf, | |
dataset_id = dataset_id, | |
skip_duplicates = True, # If True, will skip data rows where a global key is already in use, | |
verbose = True, # If True, prints information about code execution | |
) | |
return results | |
data_amount = st.slider("choose amout of data to add to labelbox", 500, 1000) | |
if st.button("Add data to your Labelbox"): | |
st.write(f"adding {data_amount} datarows to Labelbox instance") | |
bing = freedatatolb(data_amount) | |
st.title("SECTION 2") | |
st.header("Create project and bulk classify images") | |
st.video("https://storage.googleapis.com/app-videos/Setting%20up%20Platform%20for%20Training%20a%20Model.mp4") | |
st.write("this video will help you set up a project for storing bulk classifications") | |
st.video("https://storage.googleapis.com/app-videos/Bulk%20Classification%20and%20Training%20Our%20Model.mp4") | |
st.write("this video teaches how to bulk classify the images and set up our model for training") | |
st.title("SECTION 3") | |
st.header("Auto Image classifier training and inference: Imagnet Weights") | |
# -*- coding: utf-8 -*- | |
""" | |
Original file is located at | |
https://colab.research.google.com/drive/1CSyAE9DhwGTl7bLaSoo7QSyMuoEqJpCj | |
""" | |
def train_and_inference(api_key, ontology_id, model_run_id): | |
# st.write('thisisstarting') | |
api_key = api_key # insert Labelbox API key | |
ontology_id = ontology_id # get the ontology ID from the Settings tab at the top left of your model run | |
model_run_id = model_run_id #get the model run ID from the settings gear icon on the right side of your Model Run | |
# st.write('1') | |
import pydantic | |
# st.write(pydantic.__version__) | |
import numpy as np | |
# st.write('2') | |
import tensorflow as tf | |
# st.write('3') | |
from tensorflow.keras import layers | |
# st.write('4') | |
from tensorflow.keras.models import Sequential | |
# st.write('5') | |
from tensorflow.keras.preprocessing.image import ImageDataGenerator | |
# st.write('6') | |
import os | |
# st.write('7') | |
import labelbox | |
# st.write('zat') | |
from labelbox import Client | |
# st.write('8') | |
# st.write('9') | |
import numpy as np | |
import tensorflow as tf | |
from tensorflow.keras import layers | |
from tensorflow.keras.models import Sequential | |
from tensorflow.keras.preprocessing.image import ImageDataGenerator | |
import os | |
from labelbox.schema.ontology import OntologyBuilder, Tool, Classification, Option | |
from labelbox import Client, LabelingFrontend, LabelImport, MALPredictionImport | |
from labelbox.data.annotation_types import ( | |
Label, ImageData, ObjectAnnotation, MaskData, | |
Rectangle, Point, Line, Mask, Polygon, | |
Radio, Checklist, Text, | |
ClassificationAnnotation, ClassificationAnswer | |
) | |
from labelbox import MediaType | |
from labelbox.data.serialization import NDJsonConverter | |
import pandas as pd | |
import shutil | |
import labelbox.data | |
import scipy | |
import json | |
import uuid | |
import time | |
import requests | |
import pandas as pd | |
import shutil | |
import json | |
import uuid | |
import time | |
import requests | |
# st.write('imports') | |
"""Connect to labelbox client | |
Define Model Variables | |
""" | |
client = Client(api_key) | |
EPOCHS = 10 | |
"""#Setup Training | |
Export Classifications from Model Run | |
""" | |
model_run = client.get_model_run(model_run_id) | |
client.enable_experimental = True | |
data_json = model_run.export_labels(download=True) | |
print(data_json) | |
"""Separate datarows into folders.""" | |
import requests | |
import os | |
from urllib.parse import unquote | |
def download_and_save_image(url, destination_folder, filename): | |
try: | |
# Decode the URL | |
url = unquote(url) | |
# Ensure destination directory exists | |
if not os.path.exists(destination_folder): | |
os.makedirs(destination_folder) | |
# Start the download process | |
response = requests.get(url, stream=True) | |
# Check if the request was successful | |
if response.status_code == 200: | |
file_path = os.path.join(destination_folder, filename) | |
with open(file_path, 'wb') as file: | |
for chunk in response.iter_content(8192): | |
file.write(chunk) | |
# st.write(f"Image downloaded and saved: {file_path}") | |
# else: | |
# st.write(f"Failed to download the image. Status code: {response.status_code}") | |
except Exception as e: | |
st.write(f"An error occurred: {e}") | |
BASE_DIR = 'dataset' | |
labeldict = {} | |
for entry in data_json: | |
data_split = entry['Data Split'] | |
if data_split not in ['training', 'validation']: # we are skipping 'test' for now | |
continue | |
image_url = f"{entry['Labeled Data']}" | |
label = entry['Label']['classifications'][0]['answer']['value'] | |
labeldict[label] = entry['Label']['classifications'][0]['answer']['title'] | |
destination_folder = os.path.join(BASE_DIR, data_split, label) | |
filename = os.path.basename(image_url) | |
# st.write(filename) | |
download_and_save_image(image_url, destination_folder, filename) | |
"""#Train Model""" | |
# st.write(labeldict) | |
import tensorflow as tf | |
from tensorflow.keras.preprocessing.image import ImageDataGenerator | |
from tensorflow.keras.applications import MobileNetV2 | |
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D | |
from tensorflow.keras.models import Model | |
from tensorflow.keras.optimizers import Adam | |
TRAIN_DIR = 'dataset/training' | |
VALIDATION_DIR = 'dataset/validation' | |
IMG_HEIGHT, IMG_WIDTH = 224, 224 # default size for MobileNetV2 | |
BATCH_SIZE = 32 | |
train_datagen = ImageDataGenerator( | |
rescale=1./255, | |
rotation_range=20, | |
width_shift_range=0.2, | |
height_shift_range=0.2, | |
shear_range=0.2, | |
zoom_range=0.2, | |
horizontal_flip=True, | |
fill_mode='nearest' | |
) | |
validation_datagen = ImageDataGenerator(rescale=1./255) | |
train_ds = train_datagen.flow_from_directory( | |
TRAIN_DIR, | |
target_size=(IMG_HEIGHT, IMG_WIDTH), | |
batch_size=BATCH_SIZE, | |
class_mode='categorical' | |
) | |
validation_ds = validation_datagen.flow_from_directory( | |
VALIDATION_DIR, | |
target_size=(IMG_HEIGHT, IMG_WIDTH), | |
batch_size=BATCH_SIZE, | |
class_mode='categorical' | |
) | |
base_model = MobileNetV2(input_shape=(IMG_HEIGHT, IMG_WIDTH, 3), | |
include_top=False, | |
weights='imagenet') | |
# Freeze the base model | |
for layer in base_model.layers: | |
layer.trainable = False | |
# Create custom classification head | |
x = base_model.output | |
x = GlobalAveragePooling2D()(x) | |
x = Dense(1024, activation='relu')(x) | |
predictions = Dense(train_ds.num_classes, activation='softmax')(x) | |
model = Model(inputs=base_model.input, outputs=predictions) | |
model.compile(optimizer=Adam(learning_rate=0.0001), | |
loss='categorical_crossentropy', | |
metrics=['accuracy']) | |
st.write("training") | |
history = model.fit( | |
train_ds, | |
validation_data=validation_ds, | |
epochs=EPOCHS | |
) | |
"""Run Inference on Model run Datarows""" | |
st.write('running Inference') | |
import numpy as np | |
import requests | |
from tensorflow.keras.preprocessing import image | |
from PIL import Image | |
from io import BytesIO | |
# Fetch the image from the URL | |
def load_image_from_url(img_url, target_size=(224, 224)): | |
response = requests.get(img_url) | |
img = Image.open(BytesIO(response.content)) | |
img = img.resize(target_size) | |
img_array = image.img_to_array(img) | |
return np.expand_dims(img_array, axis=0) | |
def make_prediction(img_url): | |
# Image URL | |
img_url = img_url | |
# Load and preprocess the image | |
img_data = load_image_from_url(img_url) | |
img_data = img_data / 255.0 # Normalize the image data to [0,1] | |
# Make predictions | |
predictions = model.predict(img_data) | |
predicted_class = np.argmax(predictions[0]) | |
# Retrieve the confidence score (probability) for the predicted class | |
confidence = predictions[0][predicted_class] | |
# Map the predicted class index to its corresponding label | |
class_map = train_ds.class_indices | |
inverse_map = {v: k for k, v in class_map.items()} | |
predicted_label = inverse_map[predicted_class] | |
return predicted_label, confidence | |
from tensorflow.errors import InvalidArgumentError # Add this import | |
ontology = client.get_ontology(ontology_id) | |
label_list = [] | |
# st.write(ontology) | |
for datarow in model_run.export_labels(download=True): | |
try: | |
label, confidence = make_prediction(datarow['Labeled Data']) | |
except InvalidArgumentError as e: | |
print(f"InvalidArgumentError: {e}. Skipping this data row.") | |
continue # Skip to the next datarow if an exception occurs | |
my_checklist_answer = ClassificationAnswer( | |
name = labeldict[label.lower()], | |
confidence=confidence) | |
checklist_prediction = ClassificationAnnotation( | |
name=ontology.classifications()[0].instructions, | |
value=Radio( | |
answer = my_checklist_answer | |
)) | |
# print(datarow["DataRow ID"]) | |
label_prediction = Label( | |
data=ImageData(uid=datarow['DataRow ID']), | |
annotations = [checklist_prediction]) | |
label_list.append(label_prediction) | |
prediction_import = model_run.add_predictions( | |
name="prediction_upload_job"+str(uuid.uuid4()), | |
predictions=label_list) | |
prediction_import.wait_until_done() | |
st.write(prediction_import.errors == []) | |
if prediction_import.errors == []: | |
return "Model Trained and inference ran successfully" | |
else: | |
return prediction_import.errors | |
st.title("Enter Applicable IDs and keys below") | |
model_run_id = st.text_input("Enter your model run ID:") | |
ontology_id = st.text_input("Enter your ontology ID:") | |
if st.button("Train and run inference"): | |
st.write('Starting Up...') | |
# Check if the key is not empty | |
if api_key + model_run_id + ontology_id: | |
result = train_and_inference(api_key, ontology_id, model_run_id) | |
st.write(result) | |
else: | |
st.warning("Please enter all keys.") | |