Spaces:

STron007
/

Fake-News-Detection-Demo

Running

File size: 8,455 Bytes

import gradio as gr
import onnxruntime as ort
from transformers import RobertaTokenizer, ViTImageProcessor
from PIL import Image
import numpy as np
import torch
import os
import time
import logging

# Setup logging
logging.basicConfig(level=logging.INFO, 
                    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

vit_processor = ViTImageProcessor.from_pretrained("google/vit-base-patch16-224-in21k")
tokenizer = RobertaTokenizer.from_pretrained("roberta-base")

model_path = "./multimodal_model.onnx"
try:
    if not os.path.exists(model_path):
        raise FileNotFoundError(f"ONNX model not found at {model_path}")
    
    logger.info(f"Loading ONNX model from {model_path}")
    sess_options = ort.SessionOptions()
    sess_options.log_severity_level = 0 
    ort_session = ort.InferenceSession(
        model_path, 
        sess_options=sess_options,
        providers=['CPUExecutionProvider']
    )
    logger.info("ONNX model loaded successfully")
    
    input_names = [input.name for input in ort_session.get_inputs()]
    input_shapes = {input.name: input.shape for input in ort_session.get_inputs()}
    output_names = [output.name for output in ort_session.get_outputs()]
    
    logger.info(f"Model inputs: {input_names} with shapes {input_shapes}")
    logger.info(f"Model outputs: {output_names}")
    
except Exception as e:
    logger.error(f"Error loading ONNX model: {e}")
    raise

labels = ["Real", "Real Text with fake image", "Fake"]  

def softmax(x):
    """Compute softmax values for each sets of scores in x."""
    e_x = np.exp(x - np.max(x, axis=1, keepdims=True))
    return e_x / e_x.sum(axis=1, keepdims=True)

def image_with_prediction(img, label, confidence):
    """Return the original image with an overlay showing the prediction"""
    from PIL import Image, ImageDraw, ImageFont
    
    img_copy = img.copy()
    draw = ImageDraw.Draw(img_copy)
    
    width, height = img_copy.size
    
    overlay = Image.new('RGBA', (width, 40), (0, 0, 0, 150))
    img_copy.paste(overlay, (0, height-40), overlay)
    
    text = f"{label}: {confidence:.1%}"
    
    try:
        font = ImageFont.truetype("arial.ttf", 20)
    except IOError:
        font = ImageFont.load_default()
    
    try:
        text_width = draw.textlength(text, font=font)
    except AttributeError:
        text_width = font.getsize(text)[0] if hasattr(font, 'getsize') else 200
    
    text_position = ((width - text_width) // 2, height - 35)
    draw.text(text_position, text, fill=(255, 255, 255), font=font)
    
    return img_copy

def predict_news(text, image):
    if text is None or text.strip() == "":
        return {labels[0]: 0.0, labels[1]: 0.0, labels[2]: 0.0}, None, "Please enter some text to analyze."
    
    if image is None:
        return {labels[0]: 0.0, labels[1]: 0.0, labels[2]: 0.0}, None, "Please upload an image to analyze."
    
    try:
        logger.info(f"Processing text: {text[:50]}...")
        logger.info(f"Processing image size: {image.size}")
        
        # Process text input
        inputs = tokenizer.encode_plus(text, add_special_tokens = True, return_tensors='np', max_length=80, truncation=True, padding='max_length')
        
        input_ids = inputs['input_ids']
        attention_mask = inputs['attention_mask']
        
        logger.info(f"Input IDs shape: {input_ids.shape}")
        logger.info(f"Attention mask shape: {attention_mask.shape}")
        
        # Process image input
        image_processed = vit_processor(images=image, return_tensors="np")["pixel_values"]
        logger.info(f"Processed image shape: {image_processed.shape}")

        ort_inputs = {}
        for input_meta in ort_session.get_inputs():
            input_name = input_meta.name
            if 'ids' in input_name.lower() or input_name == 'text_input_ids':
                ort_inputs[input_name] = input_ids
            elif 'mask' in input_name.lower() or input_name == 'text_attention_mask':
                ort_inputs[input_name] = attention_mask
            elif 'image' in input_name.lower() or input_name == 'image_input':
                ort_inputs[input_name] = image_processed
        
        logger.info(f"ONNX input keys: {list(ort_inputs.keys())}")
        
        # Run inference
        start_time = time.time()
        logger.info("Starting inference")
        outputs = ort_session.run(None, ort_inputs)
        inference_time = time.time() - start_time
        logger.info(f"Inference completed in {inference_time:.3f}s")
        
        # Process model outputs
        logits = outputs[0]
        logger.info(f"Raw output shape: {logits.shape}, values: {logits}")
        
        probs = softmax(logits)[0]
        logger.info(f"Probabilities: {probs}")
        
        pred_idx = int(np.argmax(probs))
        confidence = float(probs[pred_idx])
        
        if pred_idx == 1: 
            color = "orange"
            message = f"This content appears to be **REAL TEXT WITH FAKE IMAGE** with {confidence:.1%} confidence."
        elif pred_idx == 2:
            color = "red"
            message = f"This content appears to contain **FAKE** with {confidence:.1%} confidence."
        else:
            color = "green"
            message = f"This content appears to be **REAL** with {confidence:.1%} confidence."

        analysis = f"""
        <div style='text-align: center; padding: 10px; background-color: {color}15; border-radius: 5px; margin-top: 10px;'>
            <span style='font-size: 18px; color: {color}; font-weight: bold;'>{message}</span>
            <p>Inference time: {inference_time:.3f} seconds</p>
        </div>
        """
        
        result = {
            labels[0]: float(probs[0]), 
            labels[1]: float(probs[1]), 
            labels[2]: float(probs[2])
        }
        
        interpretation = image_with_prediction(image, labels[pred_idx], confidence)
        
        return result, interpretation, analysis
        
    except Exception as e:
        logger.error(f"Error during analysis: {str(e)}", exc_info=True)
        return {labels[0]: 0.0, labels[1]: 0.0, labels[2]: 0.0}, None, f"Error during analysis: {str(e)}"

examples = [
    ["COVID-19 vaccine causes severe side effects in 80% of recipients", "https://images.unsplash.com/photo-1605289982774-9a6fef564df8?q=80&w=1000&auto=format&fit=crop"],
    ["Scientists discover new species of deep-sea fish", "https://images.unsplash.com/photo-1524704796725-9fc3044a58b2?q=80&w=1000&auto=format&fit=crop"],
]
    
# Build Gradio interface
with gr.Blocks(theme=gr.themes.Soft()) as demo:
    gr.Markdown(
        """
        # 📰 Fake News Detector (RoBERTa + ViT)
        
        This multimodal AI system analyzes both text and images to detect potentially fake news content.
        Upload an image and enter a news headline to see if the combination is likely to be real or fake news.
        """
    )
    
    with gr.Row():
        with gr.Column(scale=1):
            text_input = gr.Textbox(
                label="News Headline / Text",
                placeholder="Enter the news headline or text here...",
                lines=3
            )
            image_input = gr.Image(type="pil", label="Associated Image")
            
            analyze_btn = gr.Button("Analyze Content", variant="primary")
        
        with gr.Column(scale=1):
            label_output = gr.Label(label="Prediction Probabilities")
            image_output = gr.Image(type="pil", label="Visual Analysis")
            analysis_html = gr.HTML(label="Analysis")
    
    gr.Examples(
        examples=examples,
        inputs=[text_input, image_input],
        outputs=[label_output, image_output, analysis_html],
        fn=predict_news,
        cache_examples=True,
    )
    
    gr.Markdown(
        """
        This system combines:
        - **RoBERTa**: Analyzes the textual content
        - **ViT**: Processes the image data
        - **Multimodal Fusion**: Combines both signals to make a prediction
        
        The model was trained on the Fakeddit dataset containing real and fake news pairs with both text and images.
        """
    )
    
    analyze_btn.click(
        predict_news,
        inputs=[text_input, image_input],
        outputs=[label_output, image_output, analysis_html]
    )

if __name__ == "__main__":
    logger.info("Starting Gradio application")
    demo.launch()