File size: 3,362 Bytes
14b8594
 
 
 
 
 
 
 
 
 
 
860860a
14b8594
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
860860a
14b8594
860860a
14b8594
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
860860a
14b8594
860860a
 
 
 
14b8594
 
 
860860a
 
 
14b8594
860860a
14b8594
 
 
 
860860a
14b8594
860860a
14b8594
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
from transformers.pipelines.image_segmentation import Predictions
from transformers import DistilBertForSequenceClassification, DistilBertTokenizer
import unidecode, re, unicodedata
from bs4 import BeautifulSoup
from urllib.request import urlopen
from urllib.parse import urlparse
from sklearn.metrics import confusion_matrix, accuracy_score
import torch.nn.functional as F
import gradio as gr
import torch

def check_by_title(title):
  model = DistilBertForSequenceClassification.from_pretrained(".")
  tokenizer = DistilBertTokenizer.from_pretrained(".")
  test_encodings = tokenizer.encode_plus(
    title,    
    truncation=True,
  padding=True,
  max_length=512,
  return_tensors="pt"
   )
  model1=[]
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
  test_input_ids = test_encodings["input_ids"].to(device)
  test_attention_mask = test_encodings["attention_mask"].to(device)
  with torch.no_grad():
    model1= model.to(device)
    model1.eval()
    outputs= model1( test_input_ids, attention_mask=test_attention_mask)
    logits = outputs.logits
    predicted_labels = torch.argmax(logits, dim=1)    
    probabilities = F.softmax(logits, dim=1)
    confidence_score_title = torch.max(probabilities, dim=1).values.tolist()
    predicted_labels = torch.argmax(outputs.logits, dim=1)
  label_mapping = {1: "SFW", 0: "NSFW"} # 1:True 0:false  
  predicted_label_title = label_mapping[predicted_labels.item()]
  return predicted_label_title, confidence_score_title

def check_by_content(normalized_content_with_style):
  test_encodings = tokenizer.encode_plus(
      normalized_content_with_style,    
      truncation=True,
      padding=True,
      max_length=512,
      return_tensors="pt"
   )
  model1=[]
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
  test_input_ids = test_encodings["input_ids"].to(device)
  test_attention_mask = test_encodings["attention_mask"].to(device)
  with torch.no_grad():
    model1= model.to(device)
    model1.eval()
    outputs= model1( test_input_ids, attention_mask=test_attention_mask)
    logits = outputs.logits
    predicted_labels = torch.argmax(logits, dim=1)    
    probabilities = F.softmax(logits, dim=1)
    confidence_scores_content = torch.max(probabilities, dim=1).values.tolist()
  label_mapping = {1: "SFW", 0: "NSFW"} # 1:True 0:false  
  predicted_label_content = label_mapping[predicted_labels.item()] 

  return predicted_label_content, confidence_scores_content

def predict_2(title, normalized_content_with_style):
    predicted_label_title, confidence_score_title = check_by_title(title)
    predicted_label_content, confidence_scores_content = check_by_content(normalized_content_with_style)
    return predicted_label_title, confidence_score_title, predicted_label_content, confidence_scores_content

demo = gr.Interface(
  fn=predict_2, 
  inputs=[
      gr.inputs.Textbox(label="Title", placeholder="Enter title"),
      gr.inputs.Textbox(label="Content", placeholder="enter Content"),
      ],
  outputs= [      
      gr.outputs.Textbox(label="Title_prediction"),
      gr.outputs.Textbox(label="Title_confidence_score"), 
      gr.outputs.Textbox(label="Content_prediction"),
      gr.outputs.Textbox(label="content_confidence_score"),
      #gr.outputs.Textbox(label="Description").style(show_copy_button=True)
      ],
  
)
demo.launch()