rwcuffney commited on
Commit
d8791ea
1 Parent(s): 1ffb024

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +268 -24
app.py CHANGED
@@ -1,38 +1,282 @@
1
  import streamlit as st
2
- from datasets import load_dataset
 
 
 
3
 
4
- dataset = load_dataset('rwcuffney/pick_a_card_test', batch_size=32, shuffle=True)
 
 
5
 
6
- from transformers import AutoModelForSequenceClassification
7
 
8
- model = AutoModelForSequenceClassification.from_pretrained('rwcuffney/autotrain-pick_a_card-3726099224')
9
 
10
- from transformers import AutoTokenizer
11
 
12
- tokenizer = AutoTokenizer.from_pretrained('rwcuffney/autotrain-pick_a_card-3726099224')
13
 
14
- def preprocess_text(text):
15
- encoded = tokenizer(text, padding='max_length', truncation=True, max_length=128, return_tensors='pt')
16
- return encoded
17
 
18
- device = 'cuda' if torch.cuda.is_available() else 'cpu'
19
- model.to(device)
20
- model.eval()
 
 
 
 
 
21
 
22
- for batch in dataset:
23
- # Preprocess the text
24
- text = batch['text']
25
- inputs = preprocess_text(text)
26
- inputs = inputs.to(device)
27
 
28
- # Make predictions
29
- with torch.no_grad():
30
- outputs = model(**inputs)
31
- predicted_classes = torch.argmax(outputs.logits, dim=-1)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
 
33
- # Print the predicted class labels
34
- predicted_labels = [dataset.features['label'].names[i] for i in predicted_classes]
35
- st.write(predicted_labels)
36
 
37
 
38
 
 
1
  import streamlit as st
2
+ import pandas as pd
3
+ import matplotlib.pyplot as plt
4
+ import requests
5
+ import io
6
 
7
+ #import streamlit as st
8
+ #import pandas as pd
9
+ #import matplotlib.pyplot as plt
10
 
 
11
 
 
12
 
13
+ st.title('Playing cards Image Analysis')
14
 
 
15
 
16
+ #sample slider; feel free to remove:
17
+ #x = st.slider('Select a value')
18
+ #st.write(x, 'squared is', x * x)
19
 
20
+ '''
21
+ This next piece of code will hit GitHub for two csv files
22
+ One is the original dataset, broken up into test, train, valid.
23
+ The second csv is the test dataset, with the results after the models were run through the API
24
+ '''
25
+ # Downloading the csv file from your GitHub account
26
+ url = "https://huggingface.co/datasets/rwcuffney/autotrain-data-pick_a_card/raw/main/cards.csv"
27
+ download = requests.get(url).content
28
 
29
+ # Reading the downloaded content and turning it into a pandas data frame
30
+ df = pd.read_csv(io.StringIO(download.decode('utf-8')))
31
+ #df = pd.read_csv('playing_cards/cards.csv').sort_values('class index')
32
+ df_fulldataset=df
 
33
 
34
+ # Downloading the csv file from your GitHub account
35
+ url = "https://huggingface.co/datasets/rwcuffney/autotrain-data-pick_a_card/raw/main/ML_results.csv"
36
+ download = requests.get(url).content
37
+
38
+ # Reading the downloaded content and turning it into a pandas data frame
39
+ df = pd.read_csv(io.StringIO(download.decode('utf-8')))
40
+ #df = pd.read_csv('playing_cards/cards.csv').sort_values('class index')
41
+ df_test = df
42
+
43
+
44
+
45
+ # Create the button
46
+ if st.button('Click me to re-run code',key='RunCode_button'):
47
+ # Call the function when the button is clicked
48
+ st.experimental_rerun()
49
+
50
+ st.header('Sample of the .csv data:')
51
+ x = st.slider('Select a value',value=10,max_value=8000)
52
+ st.table(df_fulldataset.sample(x))
53
+
54
+ ### HORIZONTAL BAR ###
55
+
56
+ st.header('Distribution of the playing card images:')
57
+
58
+ # Get the value counts of the 'labels' column
59
+ value_counts = df_fulldataset.groupby('labels')['class index'].count().iloc[::-1]
60
+
61
+
62
+ fig, ax = plt.subplots(figsize=(10,10))
63
+
64
+ # Create a bar chart of the value counts
65
+ ax = value_counts.plot.barh()
66
+ # Set the chart title and axis labels
67
+ ax.set_title('Value Counts of Labels')
68
+ ax.set_xlabel('Label')
69
+ ax.set_ylabel('Count')
70
+
71
+ # Show the chart
72
+ st.pyplot(fig)
73
+
74
+
75
+ ### PIE CHART ###
76
+
77
+ st.header('Balance of Train,Valid,Test datasets:')
78
+
79
+ # Get the value counts of the 'labels' column
80
+ value_counts = df_fulldataset.groupby('data set')['class index'].count().iloc[::-1]
81
+
82
+ value_counts =df_fulldataset['data set'].value_counts()
83
+
84
+ fig, ax = plt.subplots(figsize=(5,5)
85
+ )
86
+ # Create a bar chart of the value counts
87
+ ax = value_counts.plot.pie(autopct='%1.1f%%')
88
+
89
+ # Set the chart title and axis labels
90
+ # Show the chart
91
+ st.pyplot(fig)
92
+
93
+
94
+
95
+
96
+
97
+ models_run= ['SwinForImageClassification_24',
98
+ 'ViTForImageClassification_22',
99
+ 'SwinForImageClassification_21',
100
+ 'ResNetForImageClassification_23',
101
+ 'BeitForImageClassification_25']
102
+
103
+
104
+ from enum import Enum
105
+
106
+ API_dict = dict(
107
+ SwinForImageClassification_21="https://api-inference.huggingface.co/models/rwcuffney/autotrain-pick_a_card-3726099221",
108
+ ViTForImageClassification_22="https://api-inference.huggingface.co/models/rwcuffney/autotrain-pick_a_card-3726099222",
109
+ ResNetForImageClassification_23= "https://api-inference.huggingface.co/models/rwcuffney/autotrain-pick_a_card-3726099223",
110
+ SwinForImageClassification_24 = "https://api-inference.huggingface.co/models/rwcuffney/autotrain-pick_a_card-3726099224",
111
+ BeitForImageClassification_25="https://api-inference.huggingface.co/models/rwcuffney/autotrain-pick_a_card-3726099225")
112
+
113
+
114
+ # printing enum member as string
115
+ #print(Api_URL.ViTForImageClassification_22.value)
116
+
117
+
118
+ ####Try it out ###
119
+ import requests
120
+
121
+ st.header("Try it out")
122
+
123
+ '''
124
+ Warning: it will error out at first, resubmit a few times.
125
+ Each model needs to 'warm up' before they start working.
126
+
127
+ You can use any image... try test/queen of hearts/4.jpg to see an example that
128
+ Got different results with different models
129
+ '''
130
+
131
+ headers = {"Authorization": "Bearer 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"}
132
+ def query(filename,api_url):
133
+ #with open(filename, "rb") as f:
134
+ #data = f.read()
135
+ response = requests.post(api_url, headers=headers, data=filename)
136
+ return response.json()
137
+
138
+ #API_URL = "https://api-inference.huggingface.co/models/rwcuffney/autotrain-pick_a_card-3726099224"
139
+
140
+
141
+
142
+
143
+ ##### FORM #####
144
+
145
+ with st.form("api_form"):
146
+ api = st.selectbox('Which model do you want to try?',models_run,key='select_box')
147
+
148
+
149
+
150
+
151
+ uploaded_file = st.file_uploader("Choose a file")
152
+ if uploaded_file is not None:
153
+ # To read file as bytes:
154
+ bytes_data = uploaded_file.getvalue()
155
+ #st.write(bytes_data)
156
+ st.image(uploaded_file)
157
+
158
+
159
+ submitted = st.form_submit_button("Submit")
160
+ if submitted:
161
+ st.write(API_dict[api])
162
+ output = query(bytes_data,API_dict[api])
163
+
164
+ prediction = output[0]['label']
165
+ st.write(f'prediction = {prediction}')
166
+ st.text(output)
167
+
168
+
169
+
170
+
171
+ #### FUNCTIONS ####
172
+ import sklearn
173
+ from sklearn import metrics
174
+ import matplotlib.pyplot as plt
175
+
176
+ index = ['accuracy_score','Weighted f1', 'Cohen Kappa','Matthews']
177
+ df_Metrics =pd.DataFrame(index=index)
178
+
179
+ labels = df_test['labels'].unique()
180
+
181
+
182
+
183
+ ### FUNCTION TO SHOW THE METRICS
184
+ def show_metrics(test,pred,name):
185
+ from sklearn import metrics
186
+
187
+ my_Name = name
188
+ my_Accuracy_score=metrics.accuracy_score(test, pred)
189
+ #my_ROC_AUC_score= roc_auc_score(y, model.predict_proba(X), multi_class='ovr')
190
+ my_Weighted_f1= metrics.f1_score(test, pred,average='weighted')
191
+ my_Cohen_Kappa = metrics.cohen_kappa_score(test, pred)
192
+ my_Matthews_coefficient=metrics.matthews_corrcoef(test, pred)
193
+
194
+ st.header(f'Metrics for {my_Name}:')
195
+ st.write(f'Accuracy Score........{metrics.accuracy_score(test, pred):.4f}\n\n' \
196
+ #f'ROC AUC Score.........{my_ROC_AUC_score:.4f}\n\n' \
197
+ f'Weighted f1 score.....{my_Weighted_f1:.4f}\n\n' \
198
+ f'Cohen Kappa...........{my_Cohen_Kappa:.4f}\n\n' \
199
+ f'Matthews Coefficient..{my_Matthews_coefficient:.4f}\n\n')
200
+ my_List = [my_Accuracy_score, my_Weighted_f1, my_Cohen_Kappa, my_Matthews_coefficient]
201
+
202
+ df_Metrics[my_Name] = my_List
203
+
204
+ cfm= metrics.confusion_matrix(test, pred)
205
+ st.caption(f'Confusion Matrix: {my_Name}')
206
+ cmd = metrics.ConfusionMatrixDisplay(cfm,display_labels=labels)
207
+ fig, ax = plt.subplots(figsize=(15,15))
208
+ ax = cmd.plot(ax=ax,
209
+ colorbar=False,
210
+ values_format = '.0f',
211
+ cmap='Reds')#='tab20')# see color options here https://matplotlib.org/stable/tutorials/colors/colormaps.html
212
+ plt.xticks(rotation=90)
213
+ st.pyplot(fig)
214
+
215
+
216
+
217
+
218
+
219
+ st.header('Let\'s see how the models performed')
220
+
221
+ '''
222
+ The next part of the code will analyze the full dataset.
223
+ Choose all five models to compare them all
224
+
225
+ '''
226
+
227
+
228
+ ##### FORM #####
229
+
230
+ with st.form("my_form"):
231
+ st.write("You can choose from 1 to 5 models")
232
+
233
+
234
+ selected_options = st.multiselect(
235
+ 'Which models would you like to analyze?', models_run)
236
+
237
+ submitted = st.form_submit_button("Submit")
238
+ if submitted:
239
+ st.write('you selected',selected_options)
240
+
241
+
242
+ ###Show the metrics for each dataset:
243
+ test = df_test['labels']
244
+
245
+ #for m in models_run:
246
+ for m in selected_options:
247
+ pred = df_test[m]
248
+ show_metrics(test,pred,m)
249
+
250
+ st.header('Metrics for all models:')
251
+ st.table(df_Metrics)
252
+
253
+ #### GRAPH THE RESULTS ###
254
+ import seaborn as sns
255
+
256
+ # Reshape the dataframe into long format using pd.melt()
257
+ #subset_df = pd.melt(df_Metrics[['SwinForImageClassification_24',
258
+ #'ViTForImageClassification_22', 'SwinForImageClassification_21', 'ResNetForImageClassification_23', 'BeitForImageClassification_25']].reset_index(), id_vars='index', var_name='Model', value_name='Score')
259
+ subset_df = pd.melt(df_Metrics[selected_options].reset_index(), id_vars='index', var_name='Model', value_name='Score')
260
+
261
+ sns.set_style('whitegrid')
262
+ ax=sns.catplot(data=subset_df,
263
+ x='index',
264
+ y='Score',
265
+ hue='Model',
266
+ kind='bar',
267
+ palette='Blues',
268
+ aspect=2)
269
+
270
+ plt.xlabel('Clusters')
271
+ plt.ylabel('Scores')
272
+
273
+ fig = ax.figure
274
+ st.pyplot(fig)
275
+
276
+
277
+
278
+
279
 
 
 
 
280
 
281
 
282