danielcd99 commited on
Commit
6cae882
0 Parent(s):

Duplicate from danielcd99/Toxicity-detection

Browse files
Files changed (6) hide show
  1. .gitattributes +34 -0
  2. Predict.py +12 -0
  3. README.md +13 -0
  4. Scraper.py +12 -0
  5. app.py +45 -0
  6. requirements.txt +4 -0
.gitattributes ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tflite filter=lfs diff=lfs merge=lfs -text
29
+ *.tgz filter=lfs diff=lfs merge=lfs -text
30
+ *.wasm filter=lfs diff=lfs merge=lfs -text
31
+ *.xz filter=lfs diff=lfs merge=lfs -text
32
+ *.zip filter=lfs diff=lfs merge=lfs -text
33
+ *.zst filter=lfs diff=lfs merge=lfs -text
34
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
Predict.py ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def predict_tweet(tweet, pipeline):
2
+ label = pipeline(tweet)[0]['label']
3
+ if label == 'LABEL_0':
4
+ return 0
5
+ else:
6
+ return 1
7
+
8
+ def get_predictions(tweets, pipeline):
9
+ predictions = []
10
+ for tweet in tweets:
11
+ predictions.append(predict_tweet(tweet, pipeline))
12
+ return predictions
README.md ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Toxicity Detection
3
+ emoji: 🐠
4
+ colorFrom: indigo
5
+ colorTo: pink
6
+ sdk: streamlit
7
+ sdk_version: 1.17.0
8
+ app_file: app.py
9
+ pinned: false
10
+ duplicated_from: danielcd99/Toxicity-detection
11
+ ---
12
+
13
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
Scraper.py ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def check_user_existence(scraper):
2
+ pass
3
+
4
+
5
+ def get_tweets(scraper, number_of_tweets):
6
+ tweets = []
7
+
8
+ for i, tweet in enumerate(scraper.get_items()):
9
+ if i == number_of_tweets:
10
+ return tweets
11
+ tweets.append(tweet.rawContent)
12
+
app.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from snscrape.modules.twitter import TwitterUserScraper
3
+ import pandas as pd
4
+ from Predict import *
5
+ from Scraper import *
6
+ from transformers import pipeline
7
+
8
+ # Model and pipeline
9
+ MODEL_PATH = 'danielcd99/multilanguage-toxicity-classifier'
10
+
11
+ def load_pipeline():
12
+ pipe=pipeline(
13
+ "text-classification",
14
+ model=MODEL_PATH
15
+ )
16
+ return pipe
17
+
18
+ pipe = load_pipeline()
19
+
20
+
21
+ # Title and subtitle
22
+ st.title("Toxicity Detection")
23
+ st.subheader("This is an app for detecting toxicity in tweets written in portuguese. "
24
+ "Write the name of the user (without @) and select the number of tweets you want to check.")
25
+
26
+
27
+ # User information
28
+ with st.form(key='forms'):
29
+ st.markdown(
30
+ """#### Tweets are classified in:
31
+ - 0: Harmless
32
+ - 1: Toxic
33
+ """)
34
+ username = st.text_input(label='Username:')
35
+ number_of_tweets = st.selectbox(
36
+ 'How many tweets do you want to check?',
37
+ (5, 10, 20, 30))
38
+ submit_button = st.form_submit_button(label='Analyze')
39
+
40
+ if submit_button:
41
+ scraper = TwitterUserScraper(username)
42
+ tweets = get_tweets(scraper, number_of_tweets)
43
+ predictions = get_predictions(tweets, pipe)
44
+
45
+ st.table(pd.DataFrame({'tweet': tweets, 'toxic':predictions}))
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ snscrape
2
+ numpy
3
+ torch
4
+ transformers