Merge pull request #2 from RobCaamano/milestone-2
Browse files- README.md +12 -1
- app.py +27 -0
- requirements.txt +5 -0
README.md
CHANGED
@@ -1 +1,12 @@
|
|
1 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
title: Finetuning Language Models-Toxic Tweets
|
3 |
+
emoji: π
|
4 |
+
colorFrom: red
|
5 |
+
colorTo: indigo
|
6 |
+
sdk: streamlit
|
7 |
+
sdk_version: 1.17.0
|
8 |
+
app_file: app.py
|
9 |
+
pinned: false
|
10 |
+
---
|
11 |
+
|
12 |
+
# Finetuning_Language_Models-Toxic_Tweets
|
app.py
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from transformers import AutoTokenizer
|
2 |
+
import streamlit as st
|
3 |
+
from transformers import pipeline
|
4 |
+
from transformers import (
|
5 |
+
TFAutoModelForSequenceClassification as AutoModelForSequenceClassification,
|
6 |
+
)
|
7 |
+
|
8 |
+
st.title("Detecting Toxic Tweets")
|
9 |
+
|
10 |
+
demo = """I'm so proud of myself for accomplishing my goals today. #motivation #success"""
|
11 |
+
|
12 |
+
text = st.text_area("Input text", demo, height=250)
|
13 |
+
|
14 |
+
model_name = "distilbert-base-uncased-finetuned-sst-2-english"
|
15 |
+
|
16 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
17 |
+
model = AutoModelForSequenceClassification.from_pretrained(model_name)
|
18 |
+
clf = pipeline(
|
19 |
+
"toxicity-analysis", model=model, tokenizer=tokenizer, return_all_scores=True
|
20 |
+
)
|
21 |
+
|
22 |
+
input = tokenizer(text, return_tensors="tf")
|
23 |
+
|
24 |
+
if st.button("Submit", type="primary"):
|
25 |
+
results = clf(text)[0]
|
26 |
+
classes = dict(d.values() for d in results)
|
27 |
+
st.bar_chart(classes)
|
requirements.txt
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
streamlit
|
2 |
+
opencv-python-headless
|
3 |
+
numpy
|
4 |
+
easyocr
|
5 |
+
Pillow
|