Priyanhsu commited on
Commit
e6cdfd2
·
1 Parent(s): ac15073

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +48 -79
app.py CHANGED
@@ -1,84 +1,53 @@
1
- from transformers.pipelines.image_segmentation import Predictions
2
- from transformers import DistilBertForSequenceClassification, DistilBertTokenizer
3
- import unidecode, re, unicodedata
4
- from bs4 import BeautifulSoup
5
- from urllib.request import urlopen
6
- from urllib.parse import urlparse
7
- from sklearn.metrics import confusion_matrix, accuracy_score
8
- import torch.nn.functional as F
9
- import gradio as gr
10
- import torch
 
 
 
 
 
11
 
12
- def check_by_title(title):
13
- model = DistilBertForSequenceClassification.from_pretrained(".")
14
- tokenizer = DistilBertTokenizer.from_pretrained(".")
15
- test_encodings = tokenizer.encode_plus(
16
- title,
17
- truncation=True,
18
- padding=True,
19
- max_length=512,
20
- return_tensors="pt"
21
- )
22
- model1=[]
23
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
24
- test_input_ids = test_encodings["input_ids"].to(device)
25
- test_attention_mask = test_encodings["attention_mask"].to(device)
26
- with torch.no_grad():
27
- model1= model.to(device)
28
- model1.eval()
29
- outputs= model1( test_input_ids, attention_mask=test_attention_mask)
30
- logits = outputs.logits
31
- predicted_labels = torch.argmax(logits, dim=1)
32
- probabilities = F.softmax(logits, dim=1)
33
- confidence_score_title = torch.max(probabilities, dim=1).values.tolist()
34
- predicted_labels = torch.argmax(outputs.logits, dim=1)
35
- label_mapping = {1: "SFW", 0: "NSFW"} # 1:True 0:false
36
- predicted_label_title = label_mapping[predicted_labels.item()]
37
- return predicted_label_title, confidence_score_title
38
 
39
- def check_by_content(normalized_content_with_style):
40
- test_encodings = tokenizer.encode_plus(
41
- normalized_content_with_style,
42
- truncation=True,
43
- padding=True,
44
- max_length=512,
45
- return_tensors="pt"
46
- )
47
- model1=[]
48
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
49
- test_input_ids = test_encodings["input_ids"].to(device)
50
- test_attention_mask = test_encodings["attention_mask"].to(device)
51
- with torch.no_grad():
52
- model1= model.to(device)
53
- model1.eval()
54
- outputs= model1( test_input_ids, attention_mask=test_attention_mask)
55
- logits = outputs.logits
56
- predicted_labels = torch.argmax(logits, dim=1)
57
- probabilities = F.softmax(logits, dim=1)
58
- confidence_scores_content = torch.max(probabilities, dim=1).values.tolist()
59
- label_mapping = {1: "SFW", 0: "NSFW"} # 1:True 0:false
60
- predicted_label_content = label_mapping[predicted_labels.item()]
61
 
62
- return predicted_label_content, confidence_scores_content
 
 
63
 
64
- def predict_2(title, normalized_content_with_style):
65
- predicted_label_title, confidence_score_title = check_by_title(title)
66
- predicted_label_content, confidence_scores_content = check_by_content(normalized_content_with_style)
67
- return predicted_label_title, confidence_score_title, predicted_label_content, confidence_scores_content
68
 
69
- demo = gr.Interface(
70
- fn=predict_2,
71
- inputs=[
72
- gr.inputs.Textbox(label="Title", placeholder="Enter title"),
73
- gr.inputs.Textbox(label="Content", placeholder="enter Content"),
74
- ],
75
- outputs= [
76
- gr.outputs.Textbox(label="Title_prediction"),
77
- gr.outputs.Textbox(label="Title_confidence_score"),
78
- gr.outputs.Textbox(label="Content_prediction"),
79
- gr.outputs.Textbox(label="content_confidence_score"),
80
- #gr.outputs.Textbox(label="Description").style(show_copy_button=True)
81
- ],
82
-
83
- )
84
- demo.launch()
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import pandas as pd
3
+ from sklearn.feature_extraction.text import CountVectorizer
4
+ from sklearn.naive_bayes import MultinomialNB
5
+ from sklearn.model_selection import train_test_split
6
+ from sklearn.metrics import accuracy_score
7
+ from sklearn.metrics import confusion_matrix
8
+ import json
9
+ import string
10
+ import string
11
+ import re
12
+ from nltk.corpus import stopwords
13
+ from nltk.tokenize import word_tokenize
14
+ import gradio as gr
15
+ import joblib
16
 
17
+ model = joblib.load('model.bin')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
+ def remove_punctuation(text):
20
+ punctuation_free = "".join([i for i in text if i not in string.punctuation])
21
+ return punctuation_free
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
 
23
+ def test_model(text):
24
+ # Convert text to lowercase
25
+ text = text.lower()
26
 
27
+ # Remove punctuation
28
+ text =remove_punctuation(text)
 
 
29
 
30
+ # Remove numbers
31
+ text = re.sub(r'\d+', '', text)
32
+
33
+ # Remove stopwords
34
+ stop_words = set(stopwords.words('english'))
35
+ tokens = word_tokenize(text)
36
+ filtered_text = [word for word in tokens if word not in stop_words]
37
+
38
+ # Join the filtered tokens back into a string
39
+ preprocessed_text = ' '.join(filtered_text)
40
+
41
+ # Vectorize the preprocessed text
42
+ text_vectorized = vectorizer.transform([preprocessed_text])
43
+
44
+ # Make prediction on the vectorized text
45
+ prediction = model.predict(text_vectorized)[0]
46
+
47
+ # Return the prediction
48
+ return prediction
49
+ # Create the Gradio interface
50
+ iface = gr.Interface(fn=test_model, inputs="text", outputs="text")
51
+
52
+ # Launch the interface
53
+ iface.launch(share=True)