RealKintaro commited on
Commit
7f9da02
β€’
1 Parent(s): 8ad8630
Deployment/Bert_medium.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ from transformers import AutoModel
3
+ from torch import nn
4
+ import pytorch_lightning as pl
5
+
6
+
7
+ class MediumBert(pl.LightningModule):
8
+ def __init__(self):
9
+ super().__init__()
10
+ self.bert_model = AutoModel.from_pretrained('asafaya/bert-medium-arabic')
11
+ self.fc = nn.Linear(512,18)
12
+
13
+ def forward(self,input_ids,attention_mask):
14
+ out = self.bert_model(input_ids = input_ids, attention_mask =attention_mask)#inputs["input_ids"],inputs["token_type_ids"],inputs["attention_mask"])
15
+ pooler = out[1]
16
+ out = self.fc(pooler)
17
+ return out
Deployment/Dialect_Bert.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch.nn as nn
2
+ from transformers import BertModel
3
+ import pytorch_lightning as pl
4
+
5
+ BERT_MODEL_NAME = 'alger-ia/dziribert'
6
+ class Dialect_Detection(pl.LightningModule):
7
+ def __init__(self, n_classes):
8
+ super().__init__()
9
+ self.bert = BertModel.from_pretrained(BERT_MODEL_NAME)
10
+ self.classifier = nn.Linear(self.bert.config.hidden_size, n_classes)
11
+ self.criterion = nn.CrossEntropyLoss()
12
+
13
+ def forward(self, input_ids, attention_mask, labels=None):
14
+ output = self.bert(input_ids, attention_mask)
15
+ output = self.classifier(output.pooler_output)
16
+ # if provided with labels return loss and output
17
+ if labels is not None:
18
+ loss = self.criterion(output, labels)
19
+ return loss, output
20
+
21
+ return output
Deployment/Offensive_Bert.py ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import torch.nn as nn
3
+ from transformers import BertModel
4
+
5
+ class BertClassifier(nn.Module):
6
+ """Bert Model for Classification Tasks.
7
+ """
8
+ def __init__(self, freeze_bert=False):
9
+ """
10
+ @param bert: a BertModel object
11
+ @param classifier: a torch.nn.Module classifier
12
+ @param freeze_bert (bool): Set `False` to fine-tune the BERT model
13
+ """
14
+ super(BertClassifier, self).__init__()
15
+ # hidden size of BERT, hidden size of our classifier, number of labels
16
+ D_in, H, D_out = 768, 50, 2
17
+
18
+ # Instantiate BERT model
19
+ self.bert = BertModel.from_pretrained('aubmindlab/bert-base-arabertv02')
20
+
21
+ # Instantiate an one-layer feed-forward classifier
22
+ self.classifier = nn.Sequential(
23
+ nn.Linear(D_in, H),
24
+ nn.ReLU(),
25
+ nn.Dropout(0.1),
26
+ nn.Linear(H, D_out)
27
+ )
28
+
29
+ # Freeze the BERT model
30
+ if freeze_bert:
31
+ for param in self.bert.parameters():
32
+ param.requires_grad = False
33
+
34
+ def forward(self, input_ids, attention_mask):
35
+ """
36
+ Feed input to BERT and the classifier to compute logits.
37
+ @param input_ids (torch.Tensor): an input tensor with shape (batch_size,
38
+ max_length)
39
+ @param attention_mask (torch.Tensor): a tensor that hold attention mask
40
+ information with shape (batch_size, max_length)
41
+ @return logits (torch.Tensor): an output tensor with shape (batch_size,
42
+ num_labels)
43
+ """
44
+
45
+ outputs = self.bert(input_ids=input_ids,
46
+ attention_mask=attention_mask)
47
+
48
+ # Extract the last hidden state of the token `[CLS]` for classification task and feed them to classifier to compute logits
49
+ last_hidden_state_cls = outputs[0][:, 0, :]
50
+ logits = self.classifier(last_hidden_state_cls)
51
+
52
+ return logits
Deployment/__pycache__/Bert_medium.cpython-38.pyc ADDED
Binary file (978 Bytes). View file
 
Deployment/__pycache__/Dialect_Bert.cpython-38.pyc ADDED
Binary file (1.13 kB). View file
 
Deployment/__pycache__/Offensive_Bert.cpython-38.pyc ADDED
Binary file (1.93 kB). View file
 
Deployment/__pycache__/data_cleaning.cpython-38.pyc ADDED
Binary file (3.63 kB). View file
 
Deployment/app.py ADDED
@@ -0,0 +1,293 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Delete all objects from memory
2
+
3
+ keys = list(globals().keys())
4
+
5
+ for o in keys:
6
+ if not o.startswith('_'):
7
+ print(o)
8
+ del globals()[o]
9
+
10
+ # Imort from a file called Bert-medium.py
11
+
12
+ from Bert_medium import MediumBert
13
+ from Offensive_Bert import BertClassifier
14
+ from data_cleaning import cleaning_content
15
+ from Dialect_Bert import Dialect_Detection
16
+
17
+ import torch
18
+ device = torch.device("cpu")
19
+
20
+
21
+ from transformers import BertTokenizer, AutoTokenizer, BertTokenizerFast
22
+ import streamlit as st
23
+
24
+ # file path
25
+ import os
26
+
27
+ path_file = os.path.dirname(os.path.abspath(__file__))
28
+ parent_path = os.path.dirname(path_file)
29
+
30
+ ##########################FUNCTIONS########################
31
+
32
+ def predict_off(review_text,model,device,tokenizer):
33
+
34
+ encoded_review = tokenizer.encode_plus(
35
+ review_text,
36
+ max_length=256,
37
+ add_special_tokens=True,
38
+ return_token_type_ids=False,
39
+ padding='longest',
40
+ return_attention_mask=True,
41
+ return_tensors='pt',
42
+ )
43
+
44
+ input_ids = encoded_review['input_ids'].to(device)
45
+ attention_mask = encoded_review['attention_mask'].to(device)
46
+ output = model(input_ids, attention_mask)
47
+ _, prediction = torch.max(output, dim=1)
48
+ #print(f'Review text: {review_text}')
49
+ index = output.cpu().data.numpy().argmax()
50
+ #print(f'Sentiment : {index}')
51
+ # decode the output of the model to get the predicted label
52
+ pred = index
53
+
54
+ return pred
55
+ #########################################""
56
+ def predict_other(review_text,model,device,tokenizer):
57
+
58
+ encoded_review = tokenizer.encode_plus(
59
+ review_text,
60
+ max_length=217,
61
+ add_special_tokens=True,
62
+ return_token_type_ids=False,
63
+ padding='longest',
64
+ return_attention_mask=True,
65
+ return_tensors='pt',
66
+ )
67
+
68
+ input_ids = encoded_review['input_ids'].to(device)
69
+ attention_mask = encoded_review['attention_mask'].to(device)
70
+ output = model(input_ids, attention_mask)
71
+ _, prediction = torch.max(output, dim=1)
72
+ #print(f'Review text: {review_text}')
73
+ index = output.cpu().data.numpy().argmax()
74
+ #print(f'Sentiment : {index}')
75
+ # decode the output of the model to get the predicted label
76
+
77
+ return index
78
+ #########################"##################
79
+
80
+ def predict_dialect(review_text,model,device,tokenizer):
81
+
82
+ encoded_review = tokenizer.encode_plus(
83
+ review_text,
84
+ max_length=123,
85
+ add_special_tokens=True,
86
+ return_token_type_ids=False,
87
+ padding='longest',
88
+ return_attention_mask=True,
89
+ return_tensors='pt',
90
+ )
91
+
92
+ input_ids = encoded_review['input_ids'].to(device)
93
+ attention_mask = encoded_review['attention_mask'].to(device)
94
+ output = model(input_ids, attention_mask)
95
+ _, prediction = torch.max(output, dim=1)
96
+ #print(f'Review text: {review_text}')
97
+ index = output.cpu().data.numpy().argmax()
98
+ #print(f'Sentiment : {index}')
99
+ pred = index
100
+ return pred
101
+
102
+
103
+ # Main prediction function
104
+
105
+ def predict(text,device,offensive_model,offensive_tokenizer,racism_model,misogyny_model,verbalabuse_model,dialect_model,religionhate_model,tokenizer_dialect,other_tokenizer,off_dictionary,racism_dict,misogyny_dict,verbalabuse_dict,dialect_dict,religionhate_dict):
106
+ # clean text
107
+ text = cleaning_content(text)
108
+
109
+ # predict using offensive model
110
+ off_pred = off_dictionary[predict_off(text,offensive_model,device,offensive_tokenizer)]
111
+
112
+ if off_pred == 'offensive':
113
+ # predict using racism model
114
+ rac_pred = racism_dict[predict_other(text,racism_model,device,other_tokenizer)]
115
+ # predict using misogyny model
116
+ misog_pred = misogyny_dict[predict_other(text,misogyny_model,device,other_tokenizer)]
117
+ # predict using verbal abuse model
118
+ ver_pred = verbalabuse_dict[predict_other(text,verbalabuse_model,device,other_tokenizer)]
119
+ # predict using dialect model
120
+ dialect_pred = dialect_dict[predict_dialect(text,dialect_model,device,tokenizer_dialect)]
121
+ # predict using religion hate model
122
+ Religion_Hate_pred = religionhate_dict[predict_other(text,religionhate_model,device,other_tokenizer)]
123
+ # return the prediction
124
+ return {"Offensiveness": off_pred, "Dialect": dialect_pred, "Misogyny": misog_pred, "Racism": rac_pred, "Verbal Abuse": ver_pred, "Religion Hate": Religion_Hate_pred}
125
+
126
+ # predict using misogyny model
127
+ misog_pred = misogyny_dict[predict_other(text,misogyny_model,device,other_tokenizer)]
128
+ # predict using dialect model
129
+ dialect_pred = dialect_dict[predict_dialect(text,dialect_model,device,tokenizer_dialect)]
130
+
131
+ # return the prediction as a dataframe row
132
+ return {"Offensiveness": off_pred, "Dialect": dialect_pred, "Misogyny": misog_pred, "Racism": "Not_Racism", "Verbal Abuse": "Not Verbal Abuse", "Religion Hate": "Not Religion Hate"}
133
+ ###############################################
134
+
135
+ from geopy.geocoders import Nominatim
136
+ import numpy as np
137
+ import pandas as pd
138
+ import folium
139
+
140
+ geolocator = Nominatim(user_agent="NLP")
141
+
142
+ def geolocate(country):
143
+ try:
144
+ # Geolocate the center of the country
145
+ loc = geolocator.geocode(country)
146
+ # And return latitude and longitude
147
+ return (loc.latitude, loc.longitude)
148
+ except:
149
+ # Return missing value
150
+ return np.nan
151
+
152
+ # Stream lit app
153
+
154
+ st.title("Arabic Hate Speech Detection")
155
+
156
+ st.write("This app detects hate speech in Arabic dialect text")
157
+
158
+ st.write("Please enter your text below")
159
+
160
+
161
+ # Session state
162
+ if 'Loaded' not in st.session_state:
163
+ st.markdown('# Loading models')
164
+ st.session_state['Loaded'] = False
165
+ else:
166
+ print('Model already loaded')
167
+ st.session_state['Loaded'] = True
168
+
169
+
170
+ if st.session_state['Loaded'] == False:
171
+
172
+ # Offensiveness detection model
173
+
174
+ offensive_model = BertClassifier()
175
+ offensive_model.load_state_dict(torch.load(os.path.join(parent_path,'models\modelv3.pt')))
176
+ offensive_tokenizer = BertTokenizer.from_pretrained('aubmindlab/bert-base-arabertv02', do_lower_case=True)
177
+
178
+ #send model to device
179
+
180
+ offensive_model = offensive_model.to(device)
181
+ st.session_state['Offensive_model'] = offensive_model
182
+ st.session_state['Offensive_tokenizer'] = offensive_tokenizer
183
+ print('Offensive model loaded')
184
+ off_dictionary = {1: 'offensive', 0: 'non_offensive'}
185
+ st.session_state['Offensive_dictionary'] = off_dictionary
186
+
187
+ ##############################################################################################################################
188
+
189
+ # Other four models
190
+
191
+ other_tokenizer = AutoTokenizer.from_pretrained("asafaya/bert-medium-arabic")
192
+ st.session_state['Other_tokenizer'] = other_tokenizer
193
+
194
+ racism_model,religionhate_model,verbalabuse_model,misogyny_model = MediumBert(),MediumBert(),MediumBert(),MediumBert()
195
+ ################################################################
196
+
197
+ racism_model.load_state_dict(torch.load(os.path.join(parent_path,'models\\racism\\racism_arabert.pt')))
198
+ racism_dict = {0: 'non_racist', 1: 'racist'}
199
+
200
+ racism_model = racism_model.to(device)
201
+
202
+ st.session_state['Racism_model'] = racism_model
203
+ st.session_state['Racism_dictionary'] = racism_dict
204
+
205
+ print('Racism model loaded')
206
+ ################################################################
207
+
208
+ religionhate_model.load_state_dict(torch.load(os.path.join(parent_path,'models\\religion_hate\\religion_hate_params.pt')))
209
+ religionhate_dict = {0: 'Religion Hate', 1: 'Not Religion Hate'}
210
+
211
+ religionhate_model = religionhate_model.to(device)
212
+
213
+ st.session_state['Religion_hate_model'] = religionhate_model
214
+ st.session_state['Religion_hate_dictionary'] = religionhate_dict
215
+
216
+ print('Religion Hate model loaded')
217
+ ################################################################
218
+
219
+ verbalabuse_model.load_state_dict(torch.load(os.path.join(parent_path,'models\\verbal_abuse\\verbal_abuse_arabert.pt')))
220
+ verbalabuse_dict = {0: 'Verbal Abuse', 1: 'Not Verbal Abuse'}
221
+
222
+ verbalabuse_model=verbalabuse_model.to(device)
223
+
224
+ st.session_state['Verbal_abuse_model'] = verbalabuse_model
225
+ st.session_state['Verbal_abuse_dictionary'] = verbalabuse_dict
226
+
227
+ print('Verbal Abuse model loaded')
228
+ ################################################################
229
+
230
+ misogyny_model.load_state_dict(torch.load(os.path.join(parent_path,'models\\misogyny\\misogyny.pt')))
231
+ misogyny_dict = {0: 'misogyny', 1: 'non_misogyny'}
232
+
233
+ misogyny_model=misogyny_model.to(device)
234
+
235
+ st.session_state['Misogyny_model'] = misogyny_model
236
+ st.session_state['Misogyny_dictionary'] = misogyny_dict
237
+
238
+
239
+ print('Misogyny model loaded')
240
+ ################################################################
241
+
242
+ # Dialect detection model
243
+
244
+ dialect_model = Dialect_Detection(10)
245
+ dialect_model.load_state_dict(torch.load(os.path.join(parent_path,'models\\dialect_classifier.pt')))
246
+
247
+ dialect_model = dialect_model.to(device)
248
+
249
+ st.session_state['Dialect_model'] = dialect_model
250
+
251
+ print('Dialect model loaded')
252
+
253
+ tokenizer_dialect = BertTokenizerFast.from_pretrained('alger-ia/dziribert')
254
+
255
+ st.session_state['Dialect_tokenizer'] = tokenizer_dialect
256
+
257
+ # load the model
258
+ dialect_dict = {0: 'lebanon', 1: 'egypt', 2: 'morocco', 3: 'tunisia', 4: 'algeria', 5: 'qatar', 6: 'iraq', 7: 'saudi arabia', 8: 'libya', 9: 'jordan'}
259
+
260
+ st.session_state['Dialect_dictionary'] = dialect_dict
261
+
262
+ st.session_state['Loaded'] = True
263
+
264
+ text = st.text_area("Enter Text")
265
+
266
+ if st.button("Predict") and text != '':
267
+ result = predict(text = text, device = device,
268
+ offensive_model= st.session_state['Offensive_model'],
269
+ offensive_tokenizer= st.session_state['Offensive_tokenizer'],
270
+ racism_model= st.session_state['Racism_model'],
271
+ misogyny_model=st.session_state['Misogyny_model'],
272
+ verbalabuse_model= st.session_state['Verbal_abuse_model'],
273
+ dialect_model=st.session_state['Dialect_model'],
274
+ religionhate_model=st.session_state['Religion_hate_model'],
275
+ tokenizer_dialect=st.session_state['Dialect_tokenizer'],
276
+ other_tokenizer=st.session_state['Other_tokenizer'],
277
+ off_dictionary=st.session_state['Offensive_dictionary'],
278
+ racism_dict=st.session_state['Racism_dictionary'],
279
+ misogyny_dict=st.session_state['Misogyny_dictionary'],
280
+ verbalabuse_dict=st.session_state['Verbal_abuse_dictionary'],
281
+ dialect_dict=st.session_state['Dialect_dictionary'],
282
+ religionhate_dict=st.session_state['Religion_hate_dictionary'])
283
+
284
+ st.write(result)
285
+
286
+ location = geolocate(result['Dialect'])
287
+
288
+ # map with contry highlited
289
+ location = pd.DataFrame({'lat': [location[0]], 'lon': [location[1]]})
290
+ st.map(data= location , zoom=5)
291
+
292
+ elif text == '':
293
+ st.write('Please enter text to predict')
Deployment/data_cleaning.py ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ import string
3
+ import nltk
4
+ nltk.download('stopwords')
5
+
6
+
7
+ arabic_stopwords = set(nltk.corpus.stopwords.words("arabic"))
8
+
9
+ arabic_diacritics = re.compile("""
10
+ Ω‘ | # Tashdid
11
+ َ | # Fatha
12
+ Ω‹ | # Tanwin Fath
13
+ ُ | # Damma
14
+ ٌ | # Tanwin Damm
15
+ ِ | # Kasra
16
+ ٍ | # Tanwin Kasr
17
+ Ω’ | # Sukun
18
+ Ω€ # Tatwil/Kashida
19
+ """, re.VERBOSE)
20
+
21
+ arabic_punctuations = '''`Γ·Γ—Ψ›<>_()*&^%][Ω€ΨŒ/:"؟.,'{}~Β¦+|!β€β€¦β€œβ€“Ω€'''
22
+ english_punctuations = string.punctuation
23
+ punctuations = arabic_punctuations + english_punctuations
24
+
25
+
26
+ def remove_urls (text):
27
+ text = re.sub(r'(https|http)?:\/\/(\w|\.|\/|\?|\=|\&|\%)*\b', '', text, flags=re.MULTILINE)
28
+ return text
29
+
30
+
31
+ def remove_emails(text):
32
+ text = re.sub(r"(^[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+$)", "", text, flags=re.MULTILINE)
33
+ return text
34
+
35
+ # def remove_emoji(text):
36
+ # return emoji.get_emoji_regexp().sub(u'', text)
37
+
38
+ def remove_emoji(data):
39
+ emoj = re.compile("["
40
+ u"\U0001F600-\U0001F64F" # emoticons
41
+ u"\U0001F300-\U0001F5FF" # symbols & pictographs
42
+ u"\U0001F680-\U0001F6FF" # transport & map symbols
43
+ u"\U0001F1E0-\U0001F1FF" # flags (iOS)
44
+ u"\U00002500-\U00002BEF" # chinese char
45
+ u"\U00002702-\U000027B0"
46
+ u"\U00002702-\U000027B0"
47
+ u"\U000024C2-\U0001F251"
48
+ u"\U0001f926-\U0001f937"
49
+ u"\U00010000-\U0010ffff"
50
+ u"\u2640-\u2642"
51
+ u"\u2600-\u2B55"
52
+ u"\u200d"
53
+ u"\u23cf"
54
+ u"\u23e9"
55
+ u"\u231a"
56
+ u"\ufe0f" # dingbats
57
+ u"\u3030"
58
+ "]+", re.UNICODE)
59
+ return re.sub(emoj, '', data)
60
+
61
+ def normalization(text):
62
+ text = re.sub("[Ψ₯Ψ£Ψ’Ψ§]", "Ψ§", text)
63
+ text = re.sub("Ω‰", "ي", text)
64
+ text = re.sub("Ψ€", "Ψ‘", text)
65
+ text = re.sub("Ψ¦", "Ψ‘", text)
66
+ text = re.sub("Ψ©", "Ω‡", text)
67
+ text = re.sub("Ϊ―", "Ωƒ", text)
68
+ return text
69
+
70
+ def remove_diacritics(text):
71
+ text = re.sub(arabic_diacritics, '', text)
72
+ return text
73
+
74
+ def remove_stopwords(text):
75
+ filtered_sentence = [w for w in text.split() if not w in arabic_stopwords]
76
+ return ' '.join(filtered_sentence)
77
+
78
+ def cleaning_content(line):
79
+ if (isinstance(line, float)):
80
+ return None
81
+ line.replace('\n', ' ')
82
+ line = remove_emails(line)
83
+ line = remove_urls(line)
84
+ line = remove_emoji(line)
85
+ nline = [w if '@' not in w else 'USERID' for w in line.split()]
86
+ line = ' '.join(nline)
87
+ line = line.replace('RT', '').replace('<LF>', '').replace('<br />','').replace('&quot;', '').replace('<url>', '').replace('USERID', '')
88
+
89
+
90
+ # add spaces between punc,
91
+ line = line.translate(str.maketrans({key: " {0} ".format(key) for key in punctuations}))
92
+
93
+ # then remove punc,
94
+ translator = str.maketrans('', '', punctuations)
95
+ line = line.translate(translator)
96
+
97
+ line = remove_stopwords(line)
98
+ line=remove_diacritics(normalization(line))
99
+
100
+ line = line.strip()
101
+ return line
102
+
103
+ def hasDigits(s):
104
+ return any( 48 <= ord(char) <= 57 or 1632 <= ord(char) <= 1641 for char in s)
README.md CHANGED
@@ -1,12 +1,12 @@
1
  ---
2
  title: NLP Project
3
- emoji: πŸƒ
4
- colorFrom: pink
5
- colorTo: gray
6
  sdk: streamlit
7
  sdk_version: 1.15.2
8
- app_file: app.py
9
  pinned: false
10
  ---
11
 
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
  title: NLP Project
3
+ emoji: πŸš€
4
+ colorFrom: red
5
+ colorTo: purple
6
  sdk: streamlit
7
  sdk_version: 1.15.2
8
+ app_file: Deployment/app.py
9
  pinned: false
10
  ---
11
 
12
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
models/dialect_classifier.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e8d9047effcbd6a8f914f074bdfa4aa3898969a22ea5ca0b75346dd5f20bbb66
3
+ size 497881137
models/misogyny/label_encoder.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:43bcfabf857ecea698a3d16884c09a138ef0ffdbae400b6dbe1bcb61153046a2
3
+ size 540
models/misogyny/misogyny.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:58cef97517696f0df0ffa7fdbd6492d299e0ecc35c5c3a1ba438cfb9da2e06a5
3
+ size 168617261
models/modelv3.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c6f54084c52cfb8eb5370194b928a8ac19acffafd4c3b7ad13bb12c90667aeee
3
+ size 541013353
models/offensive_dict.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:22d8cb74c4ef29eb32ba61b0bfa173d9427fbb7348edae1e7ac7bcae7622cccb
3
+ size 48
models/offensive_max_len.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2d7d259e641cf037cbb2ed3449a27212d015e4481e113817ea8cabc26b65cabd
3
+ size 15
models/racism/Racism_Detector.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f8d5876d8d722a32b094fbaf85e008a31659ecf9c799a873f45097fb0156943a
3
+ size 39546896
models/racism/racism_arabert.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b8aeaa8895178ba5b65a65aa5fb7e1b6736dfd7d20441fa4a19e5ef722066d9a
3
+ size 168617261
models/racism/racism_arabert_maxlen.pickle ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:68d53d4b9a2d32c9ed1387fb638a991b45f38f07f8a1c2882018c62616998f21
3
+ size 116
models/racism/racism_dict.pickle ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6f27c423d7c2becfaecdb8029d1b7835605c622d62bca661099906506791b593
3
+ size 42
models/racism/racismmaxlen.pickle ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cfffe1ff1f9447c82a655b9d1140e64eb94a731d098c8b34db58d5488de9c0a7
3
+ size 118
models/racism/racismtokenizer.pickle ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:51d5d54e103e5c6d553e60725ad330ed02258bc1f7538dfb5dc3c00777e2953c
3
+ size 365072
models/religion_hate/religion_hate_params.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dfc0eb24605267d5bc8c3f74f96e4c77143510795adc6702b8d865db32a5f8a2
3
+ size 168617261
models/verbal_abuse/verbal_abuse_arabert.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dd98c5df09ec6070888c3731975566d5fc62c65d60204bbaa31831840b2f18f1
3
+ size 168617261
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ pytorch-lightning == 1.8.6
2
+ torch == 1.11.0+cu113
3
+ transformers == 4.23.1
4
+ numpy == 1.18.5
5
+ pandas == 1.4.0
6
+ nltk == 3.7
7
+ geopy == 2.3.0