andyqin18 commited on
Commit
8942604
1 Parent(s): aa2cadb
Files changed (2) hide show
  1. README.md +3 -2
  2. app.py +29 -23
README.md CHANGED
@@ -13,12 +13,13 @@ pinned: false
13
 
14
  Hello! This is a project for CS-UY 4613: Artificial Intelligence. I'm providing a step-by-step instruction on finetuning language models for detecting toxic tweets.
15
 
16
- # Milestone 2
17
 
18
- This milestone includes creating a Streamlit app in HuggingFace for sentiment analysis.
19
 
20
  Link to app: https://huggingface.co/spaces/andyqin18/sentiment-analysis-app
21
 
 
22
  ## 1. Space setup
23
 
24
  After creating a HuggingFace account, we can create our app as a space and choose Streamlit as the space SDK.
 
13
 
14
  Hello! This is a project for CS-UY 4613: Artificial Intelligence. I'm providing a step-by-step instruction on finetuning language models for detecting toxic tweets.
15
 
16
+ # Milestone 3
17
 
18
+ This milestone includes finetuning a language model in HuggingFace for sentiment analysis.
19
 
20
  Link to app: https://huggingface.co/spaces/andyqin18/sentiment-analysis-app
21
 
22
+ ## 1. Space setup
23
  ## 1. Space setup
24
 
25
  After creating a HuggingFace account, we can create our app as a space and choose Streamlit as the space SDK.
app.py CHANGED
@@ -61,31 +61,37 @@ if st.button("Analyze"):
61
  }
62
  st.dataframe(pd.DataFrame(result_dict))
63
 
64
- # 10 Sample Table
65
- if st.button("Click to generate ten sample analysis"):
66
- with st.spinner("Hang on.... Analyzing..."):
67
- df = pd.read_csv("milestone3/comp/test_comment.csv")
68
- test_texts = df["comment_text"].values
69
- sample_texts = np.random.choice(test_texts, size=sample_text_num, replace=False)
 
 
 
 
 
 
 
70
 
71
- init_table_dict = {
72
- "Text": [],
73
- "Highest Toxicity Class": [],
74
- "Highest Score": [],
75
- "Second Highest Toxicity Class": [],
76
- "Second Highest Score": []
77
- }
78
 
79
- for text in sample_texts:
80
- result = analyze(fine_tuned_model, text[:50], top_k=2)
81
- init_table_dict["Text"].append(text[:50])
82
- init_table_dict["Highest Toxicity Class"].append(result[0][0]['label'])
83
- init_table_dict["Highest Score"].append(result[0][0]['score'])
84
- init_table_dict["Second Highest Toxicity Class"].append(result[0][1]['label'])
85
- init_table_dict["Second Highest Score"].append(result[0][1]['score'])
86
- st.dataframe(pd.DataFrame(init_table_dict))
87
- else:
88
- st.write("( ─ ‿ ‿ ─ )")
89
 
90
 
91
  else:
 
61
  }
62
  st.dataframe(pd.DataFrame(result_dict))
63
 
64
+ # 10 Sample Table
65
+ sample_texts = [
66
+ "Please stop. If you continue to vandalize Wikipedia, as you did to Homosexuality, you will be blocked from editing.",
67
+ "knock it off you bloody CWI trot",
68
+ "No, he is an arrogant, self serving, immature idiot. Get it right.",
69
+ "to fuck you and ur family",
70
+ "Search Google, it's listed as 1966 everywhere I've seen, including many PJ related sites.",
71
+ "That entry made a lot of sense to me. ",
72
+ "KSchwartz is an annoying person who often smells of rotten fish and burnt animal hair.",
73
+ "Cool!",
74
+ "u suck u suck u suck u suck u sucku suck u suck u suck u suck u u suck",
75
+ "go fuck yourself ...cunt"
76
+ ]
77
 
78
+ init_table_dict = {
79
+ "Text": [],
80
+ "Highest Toxicity Class": [],
81
+ "Highest Score": [],
82
+ "Second Highest Toxicity Class": [],
83
+ "Second Highest Score": []
84
+ }
85
 
86
+ for text in sample_texts:
87
+ result = analyze(fine_tuned_model, text[:50], top_k=2)
88
+ init_table_dict["Text"].append(text[:50])
89
+ init_table_dict["Highest Toxicity Class"].append(result[0][0]['label'])
90
+ init_table_dict["Highest Score"].append(result[0][0]['score'])
91
+ init_table_dict["Second Highest Toxicity Class"].append(result[0][1]['label'])
92
+ init_table_dict["Second Highest Score"].append(result[0][1]['score'])
93
+ st.dataframe(pd.DataFrame(init_table_dict))
94
+ st.write("( ─ ‿ ‿ ─ )")
 
95
 
96
 
97
  else: