Spaces:
Sleeping
Sleeping
import streamlit as st | |
import pandas as pd | |
import matplotlib.pyplot as plt | |
import requests | |
import io | |
from transformers import pipeline | |
from PIL import Image | |
#import streamlit as st | |
#import pandas as pd | |
#import matplotlib.pyplot as plt | |
st.title('Playing cards Image Analysis') | |
#sample slider; feel free to remove: | |
#x = st.slider('Select a value') | |
#st.write(x, 'squared is', x * x) | |
''' | |
This next piece of code will hit GitHub for two csv files | |
One is the original dataset, broken up into test, train, valid. | |
The second csv is the test dataset, with the results after the models were run through the API | |
''' | |
# Downloading the csv file from your GitHub account | |
url = "https://huggingface.co/datasets/rwcuffney/autotrain-data-pick_a_card/raw/main/cards.csv" | |
download = requests.get(url).content | |
# Reading the downloaded content and turning it into a pandas data frame | |
df = pd.read_csv(io.StringIO(download.decode('utf-8'))) | |
#df = pd.read_csv('playing_cards/cards.csv').sort_values('class index') | |
df_fulldataset=df | |
# Downloading the csv file from your GitHub account | |
url = "https://huggingface.co/datasets/rwcuffney/autotrain-data-pick_a_card/raw/main/ML_results.csv" | |
download = requests.get(url).content | |
# Reading the downloaded content and turning it into a pandas data frame | |
df = pd.read_csv(io.StringIO(download.decode('utf-8'))) | |
#df = pd.read_csv('playing_cards/cards.csv').sort_values('class index') | |
df_test = df | |
# Create the button | |
if st.button('Click me to re-run code',key='RunCode_button'): | |
# Call the function when the button is clicked | |
st.experimental_rerun() | |
st.header('Sample of the .csv data:') | |
x = st.slider('Select a value',value=10,max_value=8000) | |
st.table(df_fulldataset.sample(x)) | |
### HORIZONTAL BAR ### | |
st.header('Distribution of the playing card images:') | |
# Get the value counts of the 'labels' column | |
value_counts = df_fulldataset.groupby('labels')['class index'].count().iloc[::-1] | |
fig, ax = plt.subplots(figsize=(10,10)) | |
# Create a bar chart of the value counts | |
ax = value_counts.plot.barh() | |
# Set the chart title and axis labels | |
ax.set_title('Value Counts of Labels') | |
ax.set_xlabel('Label') | |
ax.set_ylabel('Count') | |
# Show the chart | |
st.pyplot(fig) | |
### PIE CHART ### | |
st.header('Balance of Train,Valid,Test datasets:') | |
# Get the value counts of the 'labels' column | |
value_counts = df_fulldataset.groupby('data set')['class index'].count().iloc[::-1] | |
value_counts =df_fulldataset['data set'].value_counts() | |
fig, ax = plt.subplots(figsize=(5,5) | |
) | |
# Create a bar chart of the value counts | |
ax = value_counts.plot.pie(autopct='%1.1f%%') | |
# Set the chart title and axis labels | |
# Show the chart | |
st.pyplot(fig) | |
models_run= ['SwinForImageClassification_24', | |
'ViTForImageClassification_22', | |
'SwinForImageClassification_21', | |
'ResNetForImageClassification_23', | |
'BeitForImageClassification_25'] | |
from enum import Enum | |
API_dict = dict( | |
SwinForImageClassification_21="https://api-inference.huggingface.co/models/rwcuffney/autotrain-pick_a_card-3726099221", | |
ViTForImageClassification_22="https://api-inference.huggingface.co/models/rwcuffney/autotrain-pick_a_card-3726099222", | |
ResNetForImageClassification_23= "https://api-inference.huggingface.co/models/rwcuffney/autotrain-pick_a_card-3726099223", | |
SwinForImageClassification_24 = "https://api-inference.huggingface.co/models/rwcuffney/autotrain-pick_a_card-3726099224", | |
BeitForImageClassification_25="https://api-inference.huggingface.co/models/rwcuffney/autotrain-pick_a_card-3726099225") | |
pipeline_dict = dict( | |
SwinForImageClassification_21="rwcuffney/autotrain-pick_a_card-3726099221", | |
ViTForImageClassification_22="rwcuffney/autotrain-pick_a_card-3726099222", | |
ResNetForImageClassification_23= "rwcuffney/autotrain-pick_a_card-3726099223", | |
SwinForImageClassification_24 = rwcuffney/autotrain-pick_a_card-3726099224", | |
BeitForImageClassification_25="rwcuffney/autotrain-pick_a_card-3726099225") | |
# printing enum member as string | |
#print(Api_URL.ViTForImageClassification_22.value) | |
####Try it out ### | |
import requests | |
st.header("Try it out") | |
''' | |
Warning: it will error out at first, resubmit a few times. | |
Each model needs to 'warm up' before they start working. | |
You can use any image... try test/queen of hearts/4.jpg to see an example that | |
Got different results with different models | |
''' | |
headers = {"Authorization": "Bearer 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"} | |
def query(filename,api_url): | |
#with open(filename, "rb") as f: | |
#data = f.read() | |
response = requests.post(api_url, headers=headers, data=filename) | |
return response.json() | |
#API_URL = "https://api-inference.huggingface.co/models/rwcuffney/autotrain-pick_a_card-3726099224" | |
##### FORM ##### | |
with st.form("api_form"): | |
api = st.selectbox('Which model do you want to try?',models_run,key='select_box') | |
uploaded_file = st.file_uploader("Choose a file") | |
if uploaded_file is not None: | |
# To read file as bytes: | |
bytes_data = uploaded_file.getvalue() | |
#st.write(bytes_data) | |
image = Image.open(uploaded_file) | |
submitted = st.form_submit_button("Submit") | |
if submitted: | |
pipeline = pipeline(task="image-classification", model=pipeline_dict[api]) | |
def predict(image): | |
predictions = pipeline(image) | |
return {p["label"]: p["score"] for p in predictions} | |
prediction = predict(image) | |
st.write(prediction) | |
#st.write(API_dict[api]) | |
#output = query(bytes_data,API_dict[api]) | |
#prediction = output[0]['label'] | |
#st.write(f'prediction = {prediction}') | |
#st.text(output) | |
#### FUNCTIONS #### | |
import sklearn | |
from sklearn import metrics | |
import matplotlib.pyplot as plt | |
index = ['accuracy_score','Weighted f1', 'Cohen Kappa','Matthews'] | |
df_Metrics =pd.DataFrame(index=index) | |
labels = df_test['labels'].unique() | |
### FUNCTION TO SHOW THE METRICS | |
def show_metrics(test,pred,name): | |
from sklearn import metrics | |
my_Name = name | |
my_Accuracy_score=metrics.accuracy_score(test, pred) | |
#my_ROC_AUC_score= roc_auc_score(y, model.predict_proba(X), multi_class='ovr') | |
my_Weighted_f1= metrics.f1_score(test, pred,average='weighted') | |
my_Cohen_Kappa = metrics.cohen_kappa_score(test, pred) | |
my_Matthews_coefficient=metrics.matthews_corrcoef(test, pred) | |
st.header(f'Metrics for {my_Name}:') | |
report =metrics.classification_report(test, pred, output_dict=True) | |
df_report = pd.DataFrame(report).transpose() | |
st.dataframe(df_report ) | |
st.write(f'Accuracy Score........{metrics.accuracy_score(test, pred):.4f}\n\n' \ | |
#f'ROC AUC Score.........{my_ROC_AUC_score:.4f}\n\n' \ | |
f'Weighted f1 score.....{my_Weighted_f1:.4f}\n\n' \ | |
f'Cohen Kappa...........{my_Cohen_Kappa:.4f}\n\n' \ | |
f'Matthews Coefficient..{my_Matthews_coefficient:.4f}\n\n') | |
my_List = [my_Accuracy_score, my_Weighted_f1, my_Cohen_Kappa, my_Matthews_coefficient] | |
df_Metrics[my_Name] = my_List | |
cfm= metrics.confusion_matrix(test, pred) | |
st.caption(f'Confusion Matrix: {my_Name}') | |
cmd = metrics.ConfusionMatrixDisplay(cfm,display_labels=labels) | |
fig, ax = plt.subplots(figsize=(15,15)) | |
ax = cmd.plot(ax=ax, | |
colorbar=False, | |
values_format = '.0f', | |
cmap='Reds')#='tab20')# see color options here https://matplotlib.org/stable/tutorials/colors/colormaps.html | |
plt.xticks(rotation=90) | |
st.pyplot(fig) | |
st.header('Let\'s see how the models performed') | |
''' | |
The next part of the code will analyze the full dataset. | |
Choose all five models to compare them all | |
''' | |
##### FORM ##### | |
with st.form("my_form"): | |
st.write("You can choose from 1 to 5 models") | |
selected_options = st.multiselect( | |
'Which models would you like to analyze?', models_run) | |
submitted = st.form_submit_button("Submit") | |
if submitted: | |
st.write('you selected',selected_options) | |
###Show the metrics for each dataset: | |
test = df_test['labels'] | |
#for m in models_run: | |
for m in selected_options: | |
pred = df_test[m] | |
show_metrics(test,pred,m) | |
st.header('Metrics for all models:') | |
st.table(df_Metrics) | |
#### GRAPH THE RESULTS ### | |
import seaborn as sns | |
# Reshape the dataframe into long format using pd.melt() | |
#subset_df = pd.melt(df_Metrics[['SwinForImageClassification_24', | |
#'ViTForImageClassification_22', 'SwinForImageClassification_21', 'ResNetForImageClassification_23', 'BeitForImageClassification_25']].reset_index(), id_vars='index', var_name='Model', value_name='Score') | |
subset_df = pd.melt(df_Metrics[selected_options].reset_index(), id_vars='index', var_name='Model', value_name='Score') | |
sns.set_style('whitegrid') | |
ax=sns.catplot(data=subset_df, | |
x='index', | |
y='Score', | |
hue='Model', | |
kind='bar', | |
palette='Blues', | |
aspect=2) | |
plt.xlabel('Clusters') | |
plt.ylabel('Scores') | |
fig = ax.figure | |
st.pyplot(fig) |