adrianmoses commited on
Commit
ef4cddb
1 Parent(s): 99dc8a3

this works haha

Browse files
Files changed (2) hide show
  1. app.py +103 -2
  2. requirements.txt +5 -0
app.py CHANGED
@@ -1,4 +1,105 @@
1
  import streamlit as st
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
- x = st.slider('Select a value')
4
- st.write(x, 'squared is', x * x)
 
1
  import streamlit as st
2
+ import re
3
+ import torch
4
+ from transformers import AlbertTokenizer, AlbertModel
5
+ import pytorch_lightning as pl
6
+ from huggingface_hub import hf_hub_download
7
+
8
+
9
+ def download_torch_model():
10
+ model_path = hf_hub_download(repo_id="adrianmoses/hate-speech-detection", filename="pytorch_hs_model.net")
11
+ print(model_path)
12
+ return model_path
13
+
14
+ def load_model():
15
+ model = AlbertModel.from_pretrained("albert-base-v2")
16
+ return model
17
+
18
+ def load_tokenizer():
19
+ tokenizer = AlbertTokenizer.from_pretrained("albert-base-v2")
20
+ return tokenizer
21
+
22
+ def clean_tweet(tweet):
23
+ return re.sub(r'@\w+:?', "", tweet, flags=re.IGNORECASE)
24
+
25
+
26
+ def tokenize(tweet):
27
+ tweet = clean_tweet(tweet)
28
+ tokenizer = load_tokenizer()
29
+ return tokenizer(tweet, padding=True, truncation=True, max_length=64, return_tensors='pt')
30
+
31
+
32
+
33
+ class HateSpeechClassifier(pl.LightningModule):
34
+
35
+ def __init__(self, albert_model, dropout, hidden_dim, output_dim):
36
+ super().__init__()
37
+ self.model = albert_model
38
+ self.l1 = torch.nn.Linear(hidden_dim, hidden_dim)
39
+ self.dropout = torch.nn.Dropout(dropout)
40
+ self.l2 = torch.nn.Linear(hidden_dim, output_dim)
41
+ self.loss = torch.nn.NLLLoss()
42
+
43
+
44
+
45
+ def forward(self, input_ids, attention_mask, token_type_ids):
46
+ x = self.model(input_ids,
47
+ attention_mask=attention_mask,
48
+ token_type_ids=token_type_ids)[0]
49
+ x = x[:, 0]
50
+ x = self.dropout(torch.relu(self.l1(x)))
51
+ return torch.log_softmax(self.l2(x), dim=1)
52
+
53
+
54
+ def training_step(self, batch, batch_idx):
55
+ input_ids, attention_masks, token_type_ids, y = batch
56
+ y_hat = self(input_ids, attention_masks, token_type_ids)
57
+ loss = self.loss(y_hat, y.view(-1))
58
+ return loss
59
+
60
+
61
+ def validation_step(self, batch, batch_idx):
62
+ input_ids, attention_masks, token_type_ids, y = batch
63
+ y_hat = self(input_ids, attention_masks, token_type_ids)
64
+ loss = self.loss(y_hat, y.view(-1))
65
+ return loss
66
+
67
+
68
+ def configure_optimizers(self):
69
+ return torch.optim.Adam(self.parameters(), lr=1e-5)
70
+
71
+ def setup_model():
72
+ torch_model_path = download_torch_model()
73
+ albert_model = load_model()
74
+ model = HateSpeechClassifier(albert_model, 0.5, 768, 2)
75
+ model.load_state_dict(torch.load(torch_model_path, map_location=torch.device('cpu')))
76
+ model.eval()
77
+ return model
78
+
79
+
80
+ model = setup_model()
81
+
82
+ st.title("Hate Speech Detection")
83
+ st.title("Text will be truncated to 64 tokens")
84
+
85
+ text = st.text_input("Enter text")
86
+
87
+ encoded_input = tokenize(text)
88
+
89
+ device = 'cuda' if torch.cuda.is_available() else 'cpu'
90
+ input_ids = encoded_input['input_ids']
91
+ attention_mask = encoded_input['attention_mask']
92
+ token_type_ids = encoded_input['token_type_ids']
93
+
94
+ pred = model(input_ids, attention_mask, token_type_ids)
95
+ print(pred)
96
+ print(pred.data.max(1))
97
+ label = pred.data.max(1)[1]
98
+
99
+ print(label)
100
+ is_hate_speech = "YES" if label == 1 else "NO"
101
+
102
+ st.write(f"Is this hate speech?: {is_hate_speech}")
103
+
104
+
105
 
 
 
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ transformers==4.12.3
2
+ SentencePiece
3
+ torch
4
+ pytorch-lightning==1.5.0
5
+ huggingface-hub