andyqin18 commited on
Commit
a7fbbb7
1 Parent(s): 668f6af
Files changed (2) hide show
  1. app.py +33 -23
  2. milestone3/milestone3.py +29 -13
app.py CHANGED
@@ -18,9 +18,9 @@ def analyze(model_name: str, text: str, top_k=1) -> dict:
18
  return classifier(text)
19
 
20
  # App title
21
- st.title("Sentiment Analysis App - Milestone2")
22
  st.write("This app is to analyze the sentiments behind a text.")
23
- st.write("Currently it uses pre-trained models without fine-tuning.")
24
 
25
  # Model hub
26
  model_descrip = {
@@ -34,25 +34,7 @@ model_descrip = {
34
  Labels: POS; NEU; NEG"
35
  }
36
 
37
- df = pd.read_csv("/milestone3/comp/test_comment.csv")
38
- test_texts = df["comment_text"].values
39
- sample_texts = np.random.choice(test_texts, size=sample_text_num, replace=False)
40
 
41
- init_table_dict = {
42
- "Text": [],
43
- "Highest Toxicity Class": [],
44
- "Highest Score": [],
45
- "Second Highest Toxicity Class": [],
46
- "Second Highest Score": []
47
- }
48
-
49
- for text in sample_texts:
50
- result = analyze(fine_tuned_model, text, top_k=2)
51
- init_table_dict["Text"].append(text[:50])
52
- init_table_dict["Highest Toxicity Class"].append(result[0][0]['label'])
53
- init_table_dict["Highest Score"].append(result[0][0]['score'])
54
- init_table_dict["Second Highest Toxicity Class"].append(result[0][1]['label'])
55
- init_table_dict["Second Highest Score"].append(result[0][1]['score'])
56
 
57
 
58
  user_input = st.text_input("Enter your text:", value="NYU is the better than Columbia.")
@@ -73,10 +55,38 @@ if st.button("Analyze"):
73
  with st.spinner("Hang on.... Analyzing..."):
74
  if user_model == fine_tuned_model:
75
  result = analyze(user_model, user_input, top_k=2)
76
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
77
 
78
- df = pd.DataFrame(init_table_dict)
79
- st.dataframe(df)
80
 
81
  else:
82
  result = analyze(user_model, user_input)
 
18
  return classifier(text)
19
 
20
  # App title
21
+ st.title("Sentiment Analysis App - Milestone3")
22
  st.write("This app is to analyze the sentiments behind a text.")
23
+ st.write("You can choose to use my fine-tuned model or pre-trained models.")
24
 
25
  # Model hub
26
  model_descrip = {
 
34
  Labels: POS; NEU; NEG"
35
  }
36
 
 
 
 
37
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
 
39
 
40
  user_input = st.text_input("Enter your text:", value="NYU is the better than Columbia.")
 
55
  with st.spinner("Hang on.... Analyzing..."):
56
  if user_model == fine_tuned_model:
57
  result = analyze(user_model, user_input, top_k=2)
58
+ result_dict = {
59
+ "Text": [user_input],
60
+ "Highest Toxicity Class": [result[0][0]['label']],
61
+ "Highest Score": [result[0][0]['score']],
62
+ "Second Highest Toxicity Class": [result[0][1]['label']],
63
+ "Second Highest Score": [result[0][1]['score']]
64
+ }
65
+ st.dataframe(pd.DataFrame(result_dict))
66
+ if st.button("Click to generate ten sample analysis"):
67
+ df = pd.read_csv("milestone3/comp/test_comment.csv")
68
+ test_texts = df["comment_text"].values
69
+ sample_texts = np.random.choice(test_texts, size=sample_text_num, replace=False)
70
+
71
+ init_table_dict = {
72
+ "Text": [],
73
+ "Highest Toxicity Class": [],
74
+ "Highest Score": [],
75
+ "Second Highest Toxicity Class": [],
76
+ "Second Highest Score": []
77
+ }
78
+
79
+ for text in sample_texts:
80
+ result = analyze(fine_tuned_model, text[:50], top_k=2)
81
+ init_table_dict["Text"].append(text[:50])
82
+ init_table_dict["Highest Toxicity Class"].append(result[0][0]['label'])
83
+ init_table_dict["Highest Score"].append(result[0][0]['score'])
84
+ init_table_dict["Second Highest Toxicity Class"].append(result[0][1]['label'])
85
+ init_table_dict["Second Highest Score"].append(result[0][1]['score'])
86
+ st.dataframe(pd.DataFrame(init_table_dict))
87
+ else:
88
+ st.write("(─‿‿─)")
89
 
 
 
90
 
91
  else:
92
  result = analyze(user_model, user_input)
milestone3/milestone3.py CHANGED
@@ -1,19 +1,19 @@
1
- # from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
2
 
3
- # def analyze(model_name: str, text: str, top_k=1) -> dict:
4
- # '''
5
- # Output result of sentiment analysis of a text through a defined model
6
- # '''
7
- # model = AutoModelForSequenceClassification.from_pretrained(model_name)
8
- # tokenizer = AutoTokenizer.from_pretrained(model_name)
9
- # classifier = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer, top_k=top_k)
10
- # return classifier(text)
11
 
12
 
13
- # user_input = "Go fuck yourself"
14
- # user_model = "andyqin18/test-finetuned"
15
 
16
- # result = analyze(user_model, user_input, top_k=4)
17
 
18
  # print(result[0][0]['label'])
19
 
@@ -22,4 +22,20 @@ import numpy as np
22
  df = pd.read_csv("milestone3/comp/test_comment.csv")
23
  test_texts = df["comment_text"].values
24
  sample_texts = np.random.choice(test_texts, size=10, replace=False)
25
- print(sample_texts)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
2
 
3
+ def analyze(model_name: str, text: str, top_k=1) -> dict:
4
+ '''
5
+ Output result of sentiment analysis of a text through a defined model
6
+ '''
7
+ model = AutoModelForSequenceClassification.from_pretrained(model_name)
8
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
9
+ classifier = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer, top_k=top_k)
10
+ return classifier(text)
11
 
12
 
13
+ user_input = "Go fuck yourself"
14
+ user_model = "andyqin18/test-finetuned"
15
 
16
+ # result = analyze(user_model, user_input, top_k=2)
17
 
18
  # print(result[0][0]['label'])
19
 
 
22
  df = pd.read_csv("milestone3/comp/test_comment.csv")
23
  test_texts = df["comment_text"].values
24
  sample_texts = np.random.choice(test_texts, size=10, replace=False)
25
+ init_table_dict = {
26
+ "Text": [],
27
+ "Highest Toxicity Class": [],
28
+ "Highest Score": [],
29
+ "Second Highest Toxicity Class": [],
30
+ "Second Highest Score": []
31
+ }
32
+
33
+ for text in sample_texts:
34
+ result = analyze(user_model, text, top_k=2)
35
+ init_table_dict["Text"].append(text[:50])
36
+ init_table_dict["Highest Toxicity Class"].append(result[0][0]['label'])
37
+ init_table_dict["Highest Score"].append(result[0][0]['score'])
38
+ init_table_dict["Second Highest Toxicity Class"].append(result[0][1]['label'])
39
+ init_table_dict["Second Highest Score"].append(result[0][1]['score'])
40
+
41
+ print(init_table_dict)