|
import gradio as gr |
|
from transformers import AutoTokenizer, AutoModelForSequenceClassification |
|
import torch |
|
|
|
class TextDetectionApp: |
|
def __init__(self): |
|
|
|
self.deberta_tokenizer = AutoTokenizer.from_pretrained("zeyadusf/deberta-DAIGT-MODELS") |
|
self.deberta_model = AutoModelForSequenceClassification.from_pretrained("zeyadusf/deberta-DAIGT-MODELS") |
|
|
|
|
|
self.roberta_tokenizer = AutoTokenizer.from_pretrained("zeyadusf/roberta-DAIGT-kaggle") |
|
self.roberta_model = AutoModelForSequenceClassification.from_pretrained("zeyadusf/roberta-DAIGT-kaggle") |
|
|
|
|
|
self.ff_model = torch.jit.load("model_scripted.pt") |
|
|
|
def api_huggingface(self, text): |
|
""" |
|
Generate predictions using the DeBERTa and RoBERTa models. |
|
|
|
Args: |
|
text (str): The input text to classify. |
|
|
|
Returns: |
|
tuple: Predictions from RoBERTa and DeBERTa models. |
|
""" |
|
|
|
deberta_inputs = self.deberta_tokenizer(text, return_tensors="pt", truncation=True, padding=True) |
|
deberta_outputs = self.deberta_model(**deberta_inputs) |
|
deberta_logits = deberta_outputs.logits |
|
deberta_scores = torch.softmax(deberta_logits, dim=1) |
|
deberta_predictions = [ |
|
{"label": f"LABEL_{i}", "score": score.item()} |
|
for i, score in enumerate(deberta_scores[0]) |
|
] |
|
|
|
|
|
roberta_inputs = self.roberta_tokenizer(text, return_tensors="pt", truncation=True, padding=True) |
|
roberta_outputs = self.roberta_model(**roberta_inputs) |
|
roberta_logits = roberta_outputs.logits |
|
roberta_scores = torch.softmax(roberta_logits, dim=1) |
|
roberta_predictions = [ |
|
{"label": f"LABEL_{i}", "score": score.item()} |
|
for i, score in enumerate(roberta_scores[0]) |
|
] |
|
|
|
return roberta_predictions, deberta_predictions |
|
|
|
def generate_ff_input(self, models_results): |
|
""" |
|
Generates input features for the Feedforward model from the API output. |
|
|
|
Parameters: |
|
models_results (tuple): Tuple containing the results of DeBERTa and RoBERTa models. |
|
|
|
Returns: |
|
torch.Tensor: Feedforward model input features tensor. |
|
""" |
|
roberta, deberta = models_results |
|
input_ff = [] |
|
try: |
|
if roberta[0]['label'] == 'LABEL_0': |
|
input_ff.append(roberta[0]['score']) |
|
input_ff.append(roberta[1]['score']) |
|
else: |
|
input_ff.append(roberta[1]['score']) |
|
input_ff.append(roberta[0]['score']) |
|
|
|
if deberta[0]['label'] == 'LABEL_0': |
|
input_ff.append(deberta[0]['score']) |
|
input_ff.append(deberta[1]['score']) |
|
else: |
|
input_ff.append(deberta[1]['score']) |
|
input_ff.append(deberta[0]['score']) |
|
|
|
except Exception as e: |
|
print(f"Error {e}: The text is long") |
|
|
|
input_ff = torch.tensor(input_ff, dtype=torch.float32) |
|
input_ff = input_ff.view(1, -1) |
|
return input_ff |
|
|
|
def detect_text(self, text): |
|
""" |
|
Detects whether the input text is generated or human-written using the Feedforward model. |
|
|
|
Returns: |
|
float: The detection result. |
|
""" |
|
with torch.no_grad(): |
|
self.output = self.ff_model(self.generate_ff_input(self.api_huggingface(text)))[0][0].item() |
|
return self.output |
|
|
|
def classify_text(self, text, model_choice): |
|
""" |
|
Classifies the input text using the selected model. |
|
|
|
Args: |
|
text (str): The input text to classify. |
|
model_choice (str): The model to use ('DeBERTa', 'RoBERTa', or 'Feedforward'). |
|
|
|
Returns: |
|
str: The classification result. |
|
""" |
|
if model_choice == 'DeBERTa': |
|
|
|
inputs = self.deberta_tokenizer(text, return_tensors="pt", truncation=True, padding=True) |
|
|
|
|
|
outputs = self.deberta_model(**inputs) |
|
|
|
|
|
logits = outputs.logits |
|
predicted_class_id = logits.argmax().item() |
|
return f"DeBERTa Prediction: Class {predicted_class_id}" |
|
|
|
elif model_choice == 'RoBERTa': |
|
|
|
inputs = self.roberta_tokenizer(text, return_tensors="pt", truncation=True, padding=True) |
|
|
|
|
|
outputs = self.roberta_model(**inputs) |
|
|
|
|
|
logits = outputs.logits |
|
predicted_class_id = logits.argmax().item() |
|
return f"RoBERTa Prediction: Class {predicted_class_id}" |
|
|
|
elif model_choice == 'Feedforward': |
|
|
|
detection_score = self.detect_text(text) |
|
return f"Feedforward Detection Score: {detection_score}" |
|
|
|
else: |
|
return "Invalid model selection." |
|
|
|
|
|
|
|
app = TextDetectionApp() |
|
|
|
|
|
iface = gr.Interface( |
|
fn=app.classify_text, |
|
inputs=[ |
|
gr.Textbox(lines=2, placeholder="Enter your text here..."), |
|
gr.Radio(choices=["DeBERTa", "RoBERTa", "Feedforward"], label="Model Choice") |
|
], |
|
outputs="text", |
|
title="Text Classification with Multiple Models", |
|
description="Classify text using DeBERTa, RoBERTa, or a custom Feedforward model." |
|
) |
|
|
|
iface.launch() |
|
|