abdulmatinomotoso commited on
Commit
2ad849f
1 Parent(s): dfae088

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +59 -0
app.py ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import numpy as np
3
+ import pandas as pd
4
+ import re
5
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
6
+ import torch
7
+
8
+
9
+ #Defining the models and tokenuzer
10
+ model_name = "valurank/distilroberta-spam-comments-detection"
11
+ model = AutoModelForSequenceClassification.from_pretrained(model_name)
12
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
13
+
14
+
15
+
16
+ def clean_text(raw_text):
17
+ text = raw_text.encode("ascii", errors="ignore").decode(
18
+ "ascii"
19
+ ) # remove non-ascii, Chinese characters
20
+
21
+ text = re.sub(r"\n", " ", text)
22
+ text = re.sub(r"\n\n", " ", text)
23
+ text = re.sub(r"\t", " ", text)
24
+ text = text.strip(" ")
25
+ text = re.sub(
26
+ " +", " ", text
27
+ ).strip() # get rid of multiple spaces and replace with a single
28
+
29
+ text = re.sub(r"Date\s\d{1,2}\/\d{1,2}\/\d{4}", "", text) #remove date
30
+ text = re.sub(r"\d{1,2}:\d{2}\s[A-Z]+\s[A-Z]+", "", text) #remove time
31
+
32
+ return text
33
+
34
+
35
+ #Defining a function to get the category of the news article
36
+ def get_category(text):
37
+ text = clean_text(text)
38
+
39
+ input_tensor = tokenizer.encode(text, return_tensors="pt", truncation=True)
40
+ input_tensor = input_tensor.to(device)
41
+ logits = model(input_tensor).logits
42
+
43
+ softmax = torch.nn.Softmax(dim=1)
44
+ probs = softmax(logits)[0]
45
+ p = probs.cpu().detach().numpy()
46
+ pred = {l: p[int(i)] for i, l in model.config.id2label.items()}
47
+ category = max(pred, key=lambda k: pred[k])
48
+
49
+ return category
50
+
51
+ #Creating the interface for the radio app
52
+ demo = gr.Interface(get_category, inputs=gr.Textbox(label="Drop your comment here"),
53
+ outputs = "text",
54
+ title="Spam comments detection")
55
+
56
+
57
+ #Launching the gradio app
58
+ if __name__ == "__main__":
59
+ demo.launch(debug=True)