Spaces:

CSharpCorner
/

CSharpGrammer

Runtime error

App Files Files Community

Priyanhsu commited on May 30, 2023

Commit

e6cdfd2

1 Parent(s): ac15073

Update app.py

Browse files

Files changed (1) hide show

app.py +48 -79

app.py CHANGED Viewed

@@ -1,84 +1,53 @@
-from transformers.pipelines.image_segmentation import Predictions
-from transformers import DistilBertForSequenceClassification, DistilBertTokenizer
-import unidecode, re, unicodedata
-from bs4 import BeautifulSoup
-from urllib.request import urlopen
-from urllib.parse import urlparse
-from sklearn.metrics import confusion_matrix, accuracy_score
-import torch.nn.functional as F
-import gradio as gr
-import torch
-def check_by_title(title):
-  model = DistilBertForSequenceClassification.from_pretrained(".")
-  tokenizer = DistilBertTokenizer.from_pretrained(".")
-  test_encodings = tokenizer.encode_plus(
-    title,
-    truncation=True,
-  padding=True,
-  max_length=512,
-  return_tensors="pt"
-   )
-  model1=[]
-  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-  test_input_ids = test_encodings["input_ids"].to(device)
-  test_attention_mask = test_encodings["attention_mask"].to(device)
-  with torch.no_grad():
-    model1= model.to(device)
-    model1.eval()
-    outputs= model1( test_input_ids, attention_mask=test_attention_mask)
-    logits = outputs.logits
-    predicted_labels = torch.argmax(logits, dim=1)
-    probabilities = F.softmax(logits, dim=1)
-    confidence_score_title = torch.max(probabilities, dim=1).values.tolist()
-    predicted_labels = torch.argmax(outputs.logits, dim=1)
-  label_mapping = {1: "SFW", 0: "NSFW"} # 1:True 0:false
-  predicted_label_title = label_mapping[predicted_labels.item()]
-  return predicted_label_title, confidence_score_title
-def check_by_content(normalized_content_with_style):
-  test_encodings = tokenizer.encode_plus(
-      normalized_content_with_style,
-      truncation=True,
-      padding=True,
-      max_length=512,
-      return_tensors="pt"
-   )
-  model1=[]
-  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-  test_input_ids = test_encodings["input_ids"].to(device)
-  test_attention_mask = test_encodings["attention_mask"].to(device)
-  with torch.no_grad():
-    model1= model.to(device)
-    model1.eval()
-    outputs= model1( test_input_ids, attention_mask=test_attention_mask)
-    logits = outputs.logits
-    predicted_labels = torch.argmax(logits, dim=1)
-    probabilities = F.softmax(logits, dim=1)
-    confidence_scores_content = torch.max(probabilities, dim=1).values.tolist()
-  label_mapping = {1: "SFW", 0: "NSFW"} # 1:True 0:false
-  predicted_label_content = label_mapping[predicted_labels.item()]
-  return predicted_label_content, confidence_scores_content
-def predict_2(title, normalized_content_with_style):
-    predicted_label_title, confidence_score_title = check_by_title(title)
-    predicted_label_content, confidence_scores_content = check_by_content(normalized_content_with_style)
-    return predicted_label_title, confidence_score_title, predicted_label_content, confidence_scores_content
-demo = gr.Interface(
-  fn=predict_2,
-  inputs=[
-      gr.inputs.Textbox(label="Title", placeholder="Enter title"),
-      gr.inputs.Textbox(label="Content", placeholder="enter Content"),
-      ],
-  outputs= [
-      gr.outputs.Textbox(label="Title_prediction"),
-      gr.outputs.Textbox(label="Title_confidence_score"),
-      gr.outputs.Textbox(label="Content_prediction"),
-      gr.outputs.Textbox(label="content_confidence_score"),
-      #gr.outputs.Textbox(label="Description").style(show_copy_button=True)
-      ],
-)
-demo.launch()

+import numpy as np
+import pandas as pd
+from sklearn.feature_extraction.text import CountVectorizer
+from sklearn.naive_bayes import MultinomialNB
+from sklearn.model_selection import train_test_split
+from sklearn.metrics import accuracy_score
+from sklearn.metrics import confusion_matrix
+import json
+import string
+import string
+import re
+from nltk.corpus import stopwords
+from nltk.tokenize import word_tokenize
+import gradio as gr
+import joblib
+model = joblib.load('model.bin')
+def remove_punctuation(text):
+    punctuation_free = "".join([i for i in text if i not in string.punctuation])
+    return punctuation_free
+def test_model(text):
+    # Convert text to lowercase
+    text = text.lower()
+    # Remove punctuation
+    text =remove_punctuation(text)
+    # Remove numbers
+    text = re.sub(r'\d+', '', text)
+    # Remove stopwords
+    stop_words = set(stopwords.words('english'))
+    tokens = word_tokenize(text)
+    filtered_text = [word for word in tokens if word not in stop_words]
+    # Join the filtered tokens back into a string
+    preprocessed_text = ' '.join(filtered_text)
+    # Vectorize the preprocessed text
+    text_vectorized = vectorizer.transform([preprocessed_text])
+    # Make prediction on the vectorized text
+    prediction = model.predict(text_vectorized)[0]
+    # Return the prediction
+    return prediction
+# Create the Gradio interface
+iface = gr.Interface(fn=test_model, inputs="text", outputs="text")
+# Launch the interface
+iface.launch(share=True)