santiviquez commited on
Commit
c56be05
1 Parent(s): 404d4af

text classification app

Browse files
Files changed (3) hide show
  1. analysis.csv +0 -0
  2. app.py +90 -0
  3. reference.csv +0 -0
analysis.csv ADDED
The diff for this file is too large to render. See raw diff
 
app.py ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from transformers import pipeline
3
+ import pandas as pd
4
+ import nannyml as nml
5
+
6
+
7
+ if 'count' not in st.session_state:
8
+ st.session_state.count = 0
9
+
10
+ if 'dissable' not in st.session_state:
11
+ st.session_state.dissable = False
12
+
13
+ def increment_counter():
14
+ st.session_state.count += 1
15
+
16
+ @st.cache_resource
17
+ def get_model(url):
18
+ tokenizer_kwargs = {'padding':True, 'truncation':True, 'max_length':512}
19
+ return pipeline(model=url, **tokenizer_kwargs)
20
+
21
+ rating_classification_model = get_model("NannyML/amazon-reviews-sentiment-bert-base-uncased-6000-samples")
22
+
23
+
24
+ label_mapping = {
25
+ 'LABEL_0': 'Negative',
26
+ 'LABEL_1': 'Neutral',
27
+ 'LABEL_2': 'Positive'
28
+ }
29
+
30
+ review = st.text_input(label='write a review', value='I love this book!')
31
+ single_review_button = st.button(label='Classify Single Review')
32
+ if review and single_review_button:
33
+ rating = rating_classification_model(review)[0]
34
+ label = label_mapping[rating['label']]
35
+ score = rating['score']
36
+ st.write(f"{label} — confidence: {round(score, 2)}")
37
+
38
+
39
+ # # # # # # # #
40
+
41
+ reference_df = pd.read_csv('../reference.csv')
42
+ analysis_df = pd.read_csv('../analysis.csv')
43
+
44
+ reference_df['label'] = reference_df['label'].astype(str)
45
+ reference_df['pred_label'] = reference_df['pred_label'].astype(str)
46
+
47
+ analysis_df['label'] = analysis_df['label'].astype(str)
48
+ analysis_df['pred_label'] = analysis_df['pred_label'].astype(str)
49
+
50
+
51
+ estimator = nml.CBPE(
52
+ y_pred_proba={
53
+ '0': 'pred_proba_label_negative',
54
+ '1': 'pred_proba_label_neutral',
55
+ '2': 'pred_proba_label_positive'},
56
+ y_pred='pred_label',
57
+ y_true='label',
58
+ problem_type='classification_multiclass',
59
+ metrics='f1',
60
+ chunk_size=400,
61
+ )
62
+ estimator.fit(reference_df)
63
+
64
+ calculator = nml.PerformanceCalculator(
65
+ y_pred_proba={
66
+ '0': 'pred_proba_label_negative',
67
+ '1': 'pred_proba_label_neutral',
68
+ '2': 'pred_proba_label_positive'},
69
+ y_true='label',
70
+ y_pred='pred_label',
71
+ problem_type='classification_multiclass',
72
+ metrics=['f1'],
73
+ chunk_size=400,
74
+ )
75
+ calculator.fit(reference_df)
76
+
77
+ multiple_reviews_button = st.button('Estimate Model Performance on 400 Reviews', on_click=increment_counter, disabled=st.session_state.dissable)
78
+
79
+
80
+ if multiple_reviews_button:
81
+ prod_data = analysis_df[0: st.session_state.count * 400]
82
+ results = estimator.estimate(prod_data.drop(columns=['label']))
83
+ realize_results = calculator.calculate(prod_data)
84
+ fig = results.compare(realize_results).plot()
85
+ st.plotly_chart(fig, use_container_width=True, theme=None)
86
+
87
+ st.write(f'Batch {st.session_state.count} / 5')
88
+
89
+ if st.session_state.count >= 5:
90
+ st.session_state.count = 0
reference.csv ADDED
The diff for this file is too large to render. See raw diff