Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -1,84 +1,53 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
|
4 |
-
from
|
5 |
-
from
|
6 |
-
from
|
7 |
-
from sklearn.metrics import confusion_matrix
|
8 |
-
import
|
9 |
-
import
|
10 |
-
import
|
|
|
|
|
|
|
|
|
|
|
11 |
|
12 |
-
|
13 |
-
model = DistilBertForSequenceClassification.from_pretrained(".")
|
14 |
-
tokenizer = DistilBertTokenizer.from_pretrained(".")
|
15 |
-
test_encodings = tokenizer.encode_plus(
|
16 |
-
title,
|
17 |
-
truncation=True,
|
18 |
-
padding=True,
|
19 |
-
max_length=512,
|
20 |
-
return_tensors="pt"
|
21 |
-
)
|
22 |
-
model1=[]
|
23 |
-
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
24 |
-
test_input_ids = test_encodings["input_ids"].to(device)
|
25 |
-
test_attention_mask = test_encodings["attention_mask"].to(device)
|
26 |
-
with torch.no_grad():
|
27 |
-
model1= model.to(device)
|
28 |
-
model1.eval()
|
29 |
-
outputs= model1( test_input_ids, attention_mask=test_attention_mask)
|
30 |
-
logits = outputs.logits
|
31 |
-
predicted_labels = torch.argmax(logits, dim=1)
|
32 |
-
probabilities = F.softmax(logits, dim=1)
|
33 |
-
confidence_score_title = torch.max(probabilities, dim=1).values.tolist()
|
34 |
-
predicted_labels = torch.argmax(outputs.logits, dim=1)
|
35 |
-
label_mapping = {1: "SFW", 0: "NSFW"} # 1:True 0:false
|
36 |
-
predicted_label_title = label_mapping[predicted_labels.item()]
|
37 |
-
return predicted_label_title, confidence_score_title
|
38 |
|
39 |
-
def
|
40 |
-
|
41 |
-
|
42 |
-
truncation=True,
|
43 |
-
padding=True,
|
44 |
-
max_length=512,
|
45 |
-
return_tensors="pt"
|
46 |
-
)
|
47 |
-
model1=[]
|
48 |
-
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
49 |
-
test_input_ids = test_encodings["input_ids"].to(device)
|
50 |
-
test_attention_mask = test_encodings["attention_mask"].to(device)
|
51 |
-
with torch.no_grad():
|
52 |
-
model1= model.to(device)
|
53 |
-
model1.eval()
|
54 |
-
outputs= model1( test_input_ids, attention_mask=test_attention_mask)
|
55 |
-
logits = outputs.logits
|
56 |
-
predicted_labels = torch.argmax(logits, dim=1)
|
57 |
-
probabilities = F.softmax(logits, dim=1)
|
58 |
-
confidence_scores_content = torch.max(probabilities, dim=1).values.tolist()
|
59 |
-
label_mapping = {1: "SFW", 0: "NSFW"} # 1:True 0:false
|
60 |
-
predicted_label_content = label_mapping[predicted_labels.item()]
|
61 |
|
62 |
-
|
|
|
|
|
63 |
|
64 |
-
|
65 |
-
|
66 |
-
predicted_label_content, confidence_scores_content = check_by_content(normalized_content_with_style)
|
67 |
-
return predicted_label_title, confidence_score_title, predicted_label_content, confidence_scores_content
|
68 |
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
import pandas as pd
|
3 |
+
from sklearn.feature_extraction.text import CountVectorizer
|
4 |
+
from sklearn.naive_bayes import MultinomialNB
|
5 |
+
from sklearn.model_selection import train_test_split
|
6 |
+
from sklearn.metrics import accuracy_score
|
7 |
+
from sklearn.metrics import confusion_matrix
|
8 |
+
import json
|
9 |
+
import string
|
10 |
+
import string
|
11 |
+
import re
|
12 |
+
from nltk.corpus import stopwords
|
13 |
+
from nltk.tokenize import word_tokenize
|
14 |
+
import gradio as gr
|
15 |
+
import joblib
|
16 |
|
17 |
+
model = joblib.load('model.bin')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
|
19 |
+
def remove_punctuation(text):
|
20 |
+
punctuation_free = "".join([i for i in text if i not in string.punctuation])
|
21 |
+
return punctuation_free
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
|
23 |
+
def test_model(text):
|
24 |
+
# Convert text to lowercase
|
25 |
+
text = text.lower()
|
26 |
|
27 |
+
# Remove punctuation
|
28 |
+
text =remove_punctuation(text)
|
|
|
|
|
29 |
|
30 |
+
# Remove numbers
|
31 |
+
text = re.sub(r'\d+', '', text)
|
32 |
+
|
33 |
+
# Remove stopwords
|
34 |
+
stop_words = set(stopwords.words('english'))
|
35 |
+
tokens = word_tokenize(text)
|
36 |
+
filtered_text = [word for word in tokens if word not in stop_words]
|
37 |
+
|
38 |
+
# Join the filtered tokens back into a string
|
39 |
+
preprocessed_text = ' '.join(filtered_text)
|
40 |
+
|
41 |
+
# Vectorize the preprocessed text
|
42 |
+
text_vectorized = vectorizer.transform([preprocessed_text])
|
43 |
+
|
44 |
+
# Make prediction on the vectorized text
|
45 |
+
prediction = model.predict(text_vectorized)[0]
|
46 |
+
|
47 |
+
# Return the prediction
|
48 |
+
return prediction
|
49 |
+
# Create the Gradio interface
|
50 |
+
iface = gr.Interface(fn=test_model, inputs="text", outputs="text")
|
51 |
+
|
52 |
+
# Launch the interface
|
53 |
+
iface.launch(share=True)
|