Spaces:

group9-dsailab
/

multimodal_misinfo_detector

Sleeping

App Files Files Community

rajyalakshmijampani commited on Nov 1

Commit

8d0a810

1 Parent(s): b4e73b5

image classifier update

Browse files

Files changed (2) hide show

app.py +62 -26
requirements.txt +0 -1

app.py CHANGED Viewed

@@ -1,11 +1,11 @@
 import os
 import torch
 import json
 import requests
 import gradio as gr
-from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
-from PIL import Image
-from io import BytesIO
 import wikipedia
 import wikipediaapi
 import re
@@ -14,7 +14,24 @@ from sklearn.metrics.pairwise import cosine_similarity
 from tavily import TavilyClient
 from huggingface_hub import InferenceClient
 text_classifier = None
 TAVILY_KEY = None
 GOOGLE_KEY = None
 HF_TOKEN = None
@@ -23,6 +40,7 @@ embed_model = SentenceTransformer("all-MiniLM-L6-v2")
 explain_model = "meta-llama/Llama-3.1-8B-Instruct"
 text_model = "rajyalakshmijampani/fever_finetuned_deberta"
 wiki = wikipediaapi.Wikipedia(language='en', user_agent='fact-checker/1.0')
 def get_text_classifier():
     global text_classifier
@@ -32,6 +50,29 @@ def get_text_classifier():
         text_classifier = pipeline("text-classification", model=seq_clf, tokenizer=tokenizer)
     return text_classifier
 def _rank_sentences(claim, sentences, top_k=4):
     if not sentences: return []
     emb_c = embed_model.encode([claim])
@@ -115,7 +156,7 @@ def get_evidence_sentences(claim, k=3):
     evid = [e for e in evid if len(e.strip()) > 10]
     return (evid or ["Error: No relevant evidence found."])[:k]
-# --- Classification Function ---
 def classify_text(claim, hf_token, tavily_key, google_key):
     global HF_TOKEN, TAVILY_KEY, GOOGLE_KEY
@@ -184,24 +225,19 @@ def classify_text(claim, hf_token, tavily_key, google_key):
     return formatted_output.strip()
-# -------------------
-# Image classification
-# -------------------
-def classify_image(img):
-    if img is None:
-        return "Please upload an image."
-    transform = torch.nn.Sequential(
-        torch.nn.Identity()  # 👈 replace with actual transforms if needed
-    )
-    img_tensor = torch.tensor(
-        [list(img.resize((224, 224)).getdata())], dtype=torch.float32
-    ).view(1, 224, 224, 3).permute(0, 3, 1, 2) / 255.0
-    with torch.no_grad():
-        output = image_model(img_tensor)
-        preds = torch.softmax(output, dim=1)
-        label = torch.argmax(preds).item()
-        label_str = "REAL" if label == 1 else "FAKE"
-    return f"Prediction: {label_str}\n\nExplanation: The image model classifies this as {label_str.lower()} based on learned patterns."
 # -------------------
 # UI Layout (Gradio)
@@ -219,9 +255,9 @@ with gr.Blocks() as demo:
             with gr.Column(scale=1):  # Right half — user token inputs
                 gr.Markdown("## Enter your API keys")
-                hf_token = gr.Textbox(label="Hugging Face Token", type="password", value = "Required")
-                tavily_key = gr.Textbox(label="Tavily API Key", type="password", value = "Required")
-                google_key = gr.Textbox(label="Google Fact Check API Key", type="password", value = "Required")
         # Enable button when all fields filled
         def enable_button(hf, tavily, google):
@@ -240,7 +276,7 @@ with gr.Blocks() as demo:
     with gr.Tab("Image Detector"):
         img_input = gr.Image(type="pil", label="Upload Image")
-        img_output = gr.Textbox(label="Model Output", lines=8)
         img_button = gr.Button("Classify Image")
         img_button.click(classify_image, inputs=img_input, outputs=img_output)

 import os
 import torch
+from torch import nn
 import json
 import requests
 import gradio as gr
+from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline, CLIPProcessor, CLIPModel
+from collections import OrderedDict
 import wikipedia
 import wikipediaapi
 import re
 from tavily import TavilyClient
 from huggingface_hub import InferenceClient
+class CLIPImageClassifier(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.clip = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
+        self.classifier = nn.Sequential(
+            nn.Linear(self.clip.config.vision_config.hidden_size, 256),
+            nn.ReLU(),
+            nn.Dropout(0.5),
+            nn.Linear(256, 1),
+            nn.Sigmoid()
+        )
+    def forward(self, pixel_values):
+        feats = self.clip.vision_model(pixel_values=pixel_values).pooler_output
+        return self.classifier(feats)
 text_classifier = None
+image_classifier = None
 TAVILY_KEY = None
 GOOGLE_KEY = None
 HF_TOKEN = None
 explain_model = "meta-llama/Llama-3.1-8B-Instruct"
 text_model = "rajyalakshmijampani/fever_finetuned_deberta"
 wiki = wikipediaapi.Wikipedia(language='en', user_agent='fact-checker/1.0')
+image_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
 def get_text_classifier():
     global text_classifier
         text_classifier = pipeline("text-classification", model=seq_clf, tokenizer=tokenizer)
     return text_classifier
+def get_image_classifier():
+    global image_classifier
+    if image_classifier is None:
+        url = "https://huggingface.co/rajyalakshmijampani/finetuned_clip/resolve/main/best_clip_finetuned_classifier.pth"
+        path = "best_clip_finetuned_classifier.pth"
+        if not os.path.exists(path):
+            r = requests.get(url)
+            with open(path, "wb") as f:
+                f.write(r.content)
+        image_classifier = CLIPImageClassifier()
+        state = torch.load(path, map_location="cpu")
+        clean_state = OrderedDict(
+            (k[7:], v) if k.startswith("module.") else (k, v)
+            for k, v in state.items()
+        )
+        image_classifier.load_state_dict(clean_state, strict=False)
+        image_classifier.eval()
+        return image_classifier
+    return image_classifier
 def _rank_sentences(claim, sentences, top_k=4):
     if not sentences: return []
     emb_c = embed_model.encode([claim])
     evid = [e for e in evid if len(e.strip()) > 10]
     return (evid or ["Error: No relevant evidence found."])[:k]
+# ---Text Classification Function ---
 def classify_text(claim, hf_token, tavily_key, google_key):
     global HF_TOKEN, TAVILY_KEY, GOOGLE_KEY
     return formatted_output.strip()
+# ---- Image classification Function ----
+def classify_image(image):
+    global image_processor
+    classifier = get_image_classifier()
+    try:
+        inputs = image_processor(images=image.convert("RGB"), return_tensors="pt")["pixel_values"]
+        with torch.no_grad():
+            output = classifier(inputs)
+        p = output.item()
+        label = "Fake" if p > 0.5 else "Real"
+        return f"**Prediction:** {label}\n**Confidence score:** {p:.2f}"
+    except Exception as e:
+        return f"Error: {e}"
 # -------------------
 # UI Layout (Gradio)
             with gr.Column(scale=1):  # Right half — user token inputs
                 gr.Markdown("## Enter your API keys")
+                hf_token = gr.Textbox(label="Hugging Face Token 🔴", type="password")
+                tavily_key = gr.Textbox(label="Tavily API Key 🔴", type="password")
+                google_key = gr.Textbox(label="Google Fact Check API Key 🔴", type="password")
         # Enable button when all fields filled
         def enable_button(hf, tavily, google):
     with gr.Tab("Image Detector"):
         img_input = gr.Image(type="pil", label="Upload Image")
+        img_output = gr.Markdown(label="Model Output", value="Results will appear here...")
         img_button = gr.Button("Classify Image")
         img_button.click(classify_image, inputs=img_input, outputs=img_output)

requirements.txt CHANGED Viewed

@@ -2,7 +2,6 @@ gradio
 torch
 transformers
 requests
-Pillow
 wikipedia-api
 wikipedia
 sentence-transformers

 torch
 transformers
 requests
 wikipedia-api
 wikipedia
 sentence-transformers