Spaces:

group9-dsailab
/

multimodal_misinfo_detector

Sleeping

App Files Files Community

rajyalakshmijampani commited on Oct 28

Commit

b758e97

1 Parent(s): bd95f0c

gradio fixes

Browse files

Files changed (1) hide show

app.py +108 -60

app.py CHANGED Viewed

@@ -1,72 +1,120 @@
 import gradio as gr
-import torch, zipfile, os, tempfile, requests
 from PIL import Image
-# --- Download helper ---
-def download_file_from_google_drive(url, dest):
-    if os.path.exists(dest):  # cached
-        return dest
-    print(f"⬇️ Downloading from {url}")
-    r = requests.get(url, allow_redirects=True)
-    with open(dest, "wb") as f:
-        f.write(r.content)
-    return dest
-# --- Text model loader ---
-def load_text_model():
-    zip_path = download_file_from_google_drive(
-        "https://drive.google.com/uc?export=download&id=1Sf2DoVaYBqBcdvonf6GJpo_bLWATSgeq",
-        "text_model.zip")
-    with zipfile.ZipFile(zip_path, 'r') as z:
-        z.extractall("text_model")
-    # example loading — replace with your own
-    from transformers import AutoTokenizer, AutoModelForSequenceClassification
-    tokenizer = AutoTokenizer.from_pretrained("text_model")
-    model = AutoModelForSequenceClassification.from_pretrained("text_model")
-    return tokenizer, model
-# --- Image model loader ---
-def load_image_model():
-    path = download_file_from_google_drive(
-        "https://drive.google.com/uc?export=download&id=19xRLjNtGWty9loc0_6LPjIYOl-EIf2bm",
-        "image_model.pth")
-    model = torch.load(path, map_location="cpu")
-    model.eval()
-    return model
-# Lazy caching
-tokenizer, text_model, image_model = None, None, None
-# --- Text classification ---
 def classify_text(claim):
-    global tokenizer, text_model
-    if tokenizer is None:
-        tokenizer, text_model = load_text_model()
-    # (Fake retrieval for now)
-    evidences = ["Evidence 1", "Evidence 2", "Evidence 3"]
-    inp = claim + " " + " ".join(evidences)
-    inputs = tokenizer(inp, return_tensors="pt", truncation=True)
-    out = text_model(**inputs).logits
-    label = out.argmax(-1).item()
-    label = "REAL" if label == 1 else "FAKE"
-    return f"{label}\n\nTop evidences:\n" + "\n".join(evidences)
-# --- Image classification ---
 def classify_image(img):
-    global image_model
-    if image_model is None:
-        image_model = load_image_model()
-    # preprocess and predict (dummy)
-    # You’ll replace this with your transforms + inference
-    return "REAL" if torch.rand(1).item() > 0.5 else "FAKE"
-# --- Gradio UI ---
-demo = gr.Interface(
-    fn=[classify_text, classify_image],
-    inputs=[gr.Textbox(label="Enter claim text"), gr.Image(type="pil", label="Upload image")],
-    outputs=[gr.Textbox(label="Text Result"), gr.Textbox(label="Image Result")],
-    title="Text & Image Real/Fake Classifier"
-)
-if __name__ == "__main__":
-    demo.launch()

+import os
+import torch
+import zipfile
+import requests
 import gradio as gr
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
 from PIL import Image
+from io import BytesIO
+# -------------------
+# Utility: Download from Google Drive
+# -------------------
+def download_from_drive(drive_url, dest_path):
+    if os.path.exists(dest_path):
+        print(f"✅ Found {dest_path}, skipping download.")
+        return dest_path
+    print(f"⬇️ Downloading {drive_url} ...")
+    file_id = drive_url.split("id=")[-1].split("&")[0]
+    download_url = f"https://drive.google.com/uc?export=download&id={file_id}"
+    response = requests.get(download_url)
+    with open(dest_path, "wb") as f:
+        f.write(response.content)
+    print(f"✅ Saved to {dest_path}")
+    return dest_path
+# -------------------
+# Download models (modify these links!)
+# -------------------
+TEXT_MODEL_ZIP_URL = "https://drive.google.com/uc?export=download&id=1WUB7JzrhWXFBFFsKn6PAKh_4F3410NPZ"
+IMAGE_MODEL_URL = "https://drive.google.com/uc?export=download&id=1WUB7JzrhWXFBFFsKn6PAKh_4F3410NPZ"
+os.makedirs("models", exist_ok=True)
+# Text model
+zip_path = download_from_drive(TEXT_MODEL_ZIP_URL, "models/text_model.zip")
+if not os.path.exists("models/text_model"):
+    with zipfile.ZipFile(zip_path, "r") as zip_ref:
+        zip_ref.extractall("models/text_model")
+tokenizer = AutoTokenizer.from_pretrained("models/text_model")
+text_model = AutoModelForSequenceClassification.from_pretrained("models/text_model")
+# Image model
+image_model_path = download_from_drive(IMAGE_MODEL_URL, "models/image_model.pth")
+image_model = torch.load(image_model_path, map_location=torch.device("cpu"))
+image_model.eval()
+# -------------------
+# Tavily evidence retrieval (mocked if no key)
+# -------------------
+TAVILY_API_KEY = os.getenv("TAVILY_API_KEY")
+def get_top3_evidence(claim):
+    if not TAVILY_API_KEY:
+        return ["Tavily API key not set. Using dummy evidences."]
+    try:
+        response = requests.post(
+            "https://api.tavily.com/search",
+            headers={"Authorization": f"Bearer {TAVILY_API_KEY}"},
+            json={"query": claim, "num_results": 3},
+        )
+        data = response.json()
+        results = [r["content"] for r in data.get("results", [])][:3]
+        return results
+    except Exception as e:
+        return [f"Error getting evidence: {str(e)}"]
+# -------------------
+# Text classification
+# -------------------
 def classify_text(claim):
+    evidences = get_top3_evidence(claim)
+    full_input = claim + " " + " ".join(evidences)
+    inputs = tokenizer(full_input, return_tensors="pt", truncation=True, padding=True)
+    outputs = text_model(**inputs)
+    preds = torch.softmax(outputs.logits, dim=1)
+    label = torch.argmax(preds).item()
+    label_str = "REAL" if label == 1 else "FAKE"
+    explanation = f"Based on the retrieved evidences and model prediction, this claim is **{label_str}**."
+    return f"Prediction: {label_str}\n\nTop Evidences:\n" + "\n".join(evidences) + f"\n\nExplanation:\n{explanation}"
+# -------------------
+# Image classification
+# -------------------
 def classify_image(img):
+    if img is None:
+        return "Please upload an image."
+    transform = torch.nn.Sequential(
+        torch.nn.Identity()  # 👈 replace with actual transforms if needed
+    )
+    img_tensor = torch.tensor(
+        [list(img.resize((224, 224)).getdata())], dtype=torch.float32
+    ).view(1, 224, 224, 3).permute(0, 3, 1, 2) / 255.0
+    with torch.no_grad():
+        output = image_model(img_tensor)
+        preds = torch.softmax(output, dim=1)
+        label = torch.argmax(preds).item()
+        label_str = "REAL" if label == 1 else "FAKE"
+    return f"Prediction: {label_str}\n\nExplanation: The image model classifies this as {label_str.lower()} based on learned patterns."
+# -------------------
+# UI Layout (Gradio)
+# -------------------
+with gr.Blocks() as demo:
+    gr.Markdown("# 🧠 Multimodal Misinformation Detector")
+    with gr.Tab("Text Detector"):
+        claim = gr.Textbox(label="Enter Claim")
+        text_output = gr.Textbox(label="Model Output", lines=8)
+        text_button = gr.Button("Classify Claim")
+        text_button.click(classify_text, inputs=claim, outputs=text_output)
+    with gr.Tab("Image Detector"):
+        img_input = gr.Image(type="pil", label="Upload Image")
+        img_output = gr.Textbox(label="Model Output", lines=6)
+        img_button = gr.Button("Classify Image")
+        img_button.click(classify_image, inputs=img_input, outputs=img_output)
+demo.launch()