rwcuffney commited on
Commit
d7cb2c8
1 Parent(s): 9cd8e9f

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +234 -0
app.py ADDED
@@ -0,0 +1,234 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import matplotlib.pyplot as plt
4
+ import requests
5
+ import io
6
+ from transformers import pipeline
7
+ from PIL import Image
8
+ import requests
9
+
10
+
11
+ st.title('Playing cards Image Analysis')
12
+
13
+ '''
14
+ This next piece of code will hit GitHub for two csv files
15
+ One is the original dataset, broken up into test, train, valid.
16
+ The second csv is the test dataset, with the results after the models were run through the API
17
+ '''
18
+
19
+
20
+ # Downloading the csv file from your GitHub account
21
+ url = "https://huggingface.co/datasets/rwcuffney/autotrain-data-pick_a_card/raw/main/cards.csv"
22
+ download = requests.get(url).content
23
+
24
+ # Reading the downloaded content and turning it into a pandas data frame
25
+ df = pd.read_csv(io.StringIO(download.decode('utf-8')))
26
+ #df = pd.read_csv('playing_cards/cards.csv').sort_values('class index')
27
+ df_fulldataset=df
28
+
29
+ # Downloading the csv file from your GitHub account
30
+ url = "https://huggingface.co/datasets/rwcuffney/autotrain-data-pick_a_card/raw/main/ML_results.csv"
31
+ download = requests.get(url).content
32
+
33
+ # Reading the downloaded content and turning it into a pandas data frame
34
+ df = pd.read_csv(io.StringIO(download.decode('utf-8')))
35
+ #df = pd.read_csv('playing_cards/cards.csv').sort_values('class index')
36
+ df_test = df
37
+
38
+
39
+
40
+ # Create the button
41
+ if st.button('Click me to re-run code',key='RunCode_button'):
42
+ # Call the function when the button is clicked
43
+ st.experimental_rerun()
44
+
45
+ st.header('Sample of the .csv data:')
46
+ x = st.slider('Select a value',value=10,max_value=8000)
47
+ st.table(df_fulldataset.sample(x))
48
+
49
+ ### HORIZONTAL BAR ###
50
+ st.header('Distribution of the playing card images:')
51
+
52
+ # Get the value counts of the 'labels' column
53
+ value_counts = df_fulldataset.groupby('labels')['class index'].count().iloc[::-1]
54
+
55
+ fig, ax = plt.subplots(figsize=(10,10))
56
+
57
+ # Create a bar chart of the value counts
58
+ ax = value_counts.plot.barh()
59
+ # Set the chart title and axis labels
60
+ ax.set_title('Value Counts of Labels')
61
+ ax.set_xlabel('Label')
62
+ ax.set_ylabel('Count')
63
+
64
+ # Show the chart
65
+ st.pyplot(fig)
66
+
67
+
68
+ ### PIE CHART ###
69
+ st.header('Balance of Train,Valid,Test datasets:')
70
+
71
+ # Get the value counts of the 'labels' column
72
+ value_counts =df_fulldataset['data set'].value_counts()
73
+
74
+ fig, ax = plt.subplots(figsize=(5,5)
75
+ )
76
+ # Create a bar chart of the value counts
77
+ ax = value_counts.plot.pie(autopct='%1.1f%%')
78
+
79
+ # Set the chart title and axis labels
80
+ # Show the chart
81
+ st.pyplot(fig)
82
+
83
+
84
+
85
+
86
+
87
+ models_run= ['SwinForImageClassification_24',
88
+ 'ViTForImageClassification_22',
89
+ 'SwinForImageClassification_21',
90
+ 'ResNetForImageClassification_23',
91
+ 'BeitForImageClassification_25']
92
+
93
+ pipeline_dict = dict(
94
+ SwinForImageClassification_21="rwcuffney/autotrain-pick_a_card-3726099221",
95
+ ViTForImageClassification_22="rwcuffney/autotrain-pick_a_card-3726099222",
96
+ ResNetForImageClassification_23= "rwcuffney/autotrain-pick_a_card-3726099223",
97
+ SwinForImageClassification_24 = "rwcuffney/autotrain-pick_a_card-3726099224",
98
+ BeitForImageClassification_25="rwcuffney/autotrain-pick_a_card-3726099225")
99
+
100
+ #### Try it out ###
101
+
102
+ st.header("Try it out")
103
+
104
+ '''
105
+ Warning: changing models takes a minute to download new model.
106
+
107
+ You can use any image... try test/queen of hearts/4.jpg to see an example that
108
+ Got different results with different models
109
+ '''
110
+
111
+ ##### FORM #####
112
+
113
+ with st.form("api_form"):
114
+ model = st.selectbox('Which model do you want to try?',models_run,key='select_box')
115
+ uploaded_file = st.file_uploader("Choose a file")
116
+ if uploaded_file is not None:
117
+ # To read file as bytes:
118
+ st.image(uploaded_file)
119
+ image = Image.open(uploaded_file)
120
+
121
+
122
+ submitted = st.form_submit_button("Submit")
123
+ if submitted:
124
+ pipeline = pipeline(task="image-classification", model=pipeline_dict[model])
125
+ def predict(image):
126
+ predictions = pipeline(image)
127
+ return {p["label"]: p["score"] for p in predictions}
128
+ prediction = predict(image)
129
+ st.write(prediction)
130
+
131
+ #### FUNCTIONS ####
132
+ import sklearn
133
+ from sklearn import metrics
134
+ import matplotlib.pyplot as plt
135
+
136
+ index = ['accuracy_score','Weighted f1', 'Cohen Kappa','Matthews']
137
+ df_Metrics =pd.DataFrame(index=index)
138
+
139
+ labels = df_test['labels'].unique()
140
+
141
+
142
+
143
+ ### FUNCTION TO SHOW THE METRICS
144
+ def show_metrics(test,pred,name):
145
+ from sklearn import metrics
146
+
147
+ my_Name = name
148
+ my_Accuracy_score=metrics.accuracy_score(test, pred)
149
+ #my_ROC_AUC_score= roc_auc_score(y, model.predict_proba(X), multi_class='ovr')
150
+ my_Weighted_f1= metrics.f1_score(test, pred,average='weighted')
151
+ my_Cohen_Kappa = metrics.cohen_kappa_score(test, pred)
152
+ my_Matthews_coefficient=metrics.matthews_corrcoef(test, pred)
153
+
154
+ st.header(f'Metrics for {my_Name}:')
155
+ report =metrics.classification_report(test, pred, output_dict=True)
156
+ df_report = pd.DataFrame(report).transpose()
157
+ st.dataframe(df_report )
158
+ st.write(f'Accuracy Score........{metrics.accuracy_score(test, pred):.4f}\n\n' \
159
+ #f'ROC AUC Score.........{my_ROC_AUC_score:.4f}\n\n' \
160
+ f'Weighted f1 score.....{my_Weighted_f1:.4f}\n\n' \
161
+ f'Cohen Kappa...........{my_Cohen_Kappa:.4f}\n\n' \
162
+ f'Matthews Coefficient..{my_Matthews_coefficient:.4f}\n\n')
163
+ my_List = [my_Accuracy_score, my_Weighted_f1, my_Cohen_Kappa, my_Matthews_coefficient]
164
+
165
+ df_Metrics[my_Name] = my_List
166
+
167
+ cfm= metrics.confusion_matrix(test, pred)
168
+ st.caption(f'Confusion Matrix: {my_Name}')
169
+ cmd = metrics.ConfusionMatrixDisplay(cfm,display_labels=labels)
170
+ fig, ax = plt.subplots(figsize=(15,15))
171
+ ax = cmd.plot(ax=ax,
172
+ colorbar=False,
173
+ values_format = '.0f',
174
+ cmap='Reds')#='tab20')# see color options here https://matplotlib.org/stable/tutorials/colors/colormaps.html
175
+ plt.xticks(rotation=90)
176
+ st.pyplot(fig)
177
+
178
+
179
+ st.header('Let\'s see how the models performed')
180
+
181
+ '''
182
+ The next part of the code will analyze the full dataset.
183
+ Choose all five models to compare them all
184
+
185
+ '''
186
+
187
+
188
+ ##### FORM #####
189
+
190
+ with st.form("my_form"):
191
+ st.write("You can choose from 1 to 5 models")
192
+
193
+
194
+ selected_options = st.multiselect(
195
+ 'Which models would you like to analyze?', models_run)
196
+
197
+ submitted = st.form_submit_button("Submit")
198
+ if submitted:
199
+ st.write('you selected',selected_options)
200
+
201
+
202
+ ###Show the metrics for each dataset:
203
+ test = df_test['labels']
204
+
205
+ #for m in models_run:
206
+ for m in selected_options:
207
+ pred = df_test[m]
208
+ show_metrics(test,pred,m)
209
+
210
+ st.header('Metrics for all models:')
211
+ st.table(df_Metrics)
212
+
213
+ #### GRAPH THE RESULTS ###
214
+ import seaborn as sns
215
+
216
+ # Reshape the dataframe into long format using pd.melt()
217
+ #subset_df = pd.melt(df_Metrics[['SwinForImageClassification_24',
218
+ #'ViTForImageClassification_22', 'SwinForImageClassification_21', 'ResNetForImageClassification_23', 'BeitForImageClassification_25']].reset_index(), id_vars='index', var_name='Model', value_name='Score')
219
+ subset_df = pd.melt(df_Metrics[selected_options].reset_index(), id_vars='index', var_name='Model', value_name='Score')
220
+
221
+ sns.set_style('whitegrid')
222
+ ax=sns.catplot(data=subset_df,
223
+ x='index',
224
+ y='Score',
225
+ hue='Model',
226
+ kind='bar',
227
+ palette='Blues',
228
+ aspect=2)
229
+
230
+ plt.xlabel('Clusters')
231
+ plt.ylabel('Scores')
232
+
233
+ fig = ax.figure
234
+ st.pyplot(fig)