Spaces:

group9-dsailab
/

multimodal_misinfo_detector

Sleeping

App Files Files Community

rajyalakshmijampani commited on Oct 29

Commit

26212b8

1 Parent(s): 6ff8947

hf interface for mistral

Browse files

Files changed (2) hide show

app.py +20 -51
requirements.txt +2 -1

app.py CHANGED Viewed

@@ -2,7 +2,7 @@ import os
 import torch
 import requests
 import gradio as gr
-from transformers import AutoTokenizer, AutoModelForSequenceClassification, AutoModelForCausalLM, pipeline
 from PIL import Image
 from io import BytesIO
 import wikipedia
@@ -11,46 +11,19 @@ import re
 from sentence_transformers import SentenceTransformer
 from sklearn.metrics.pairwise import cosine_similarity
 from tavily import TavilyClient
-# # Download from Google Drive
-# def download_from_drive(drive_url, dest_path):
-#     gdown.download(drive_url, dest_path, quiet=False)
-#     return
-# # Download models
-# TEXT_MODEL_ZIP_URL = "https://drive.google.com/uc?export=download&id=1Sf2DoVaYBqBcdvonf6GJpo_bLWATSgeq"
-# IMAGE_MODEL_URL = "https://drive.google.com/uc?export=download&id=19xRLjNtGWty9loc0_6LPjIYOl-EIf2bm"
-# os.makedirs("models", exist_ok=True)
-# # Text model
-# if not os.path.exists("models/text_model"):
-#     print("Downloading and extracting text model...")
-#     zip_path = "models/text_model.zip"
-#     download_from_drive(TEXT_MODEL_ZIP_URL, zip_path)
-#     with zipfile.ZipFile(zip_path, 'r') as zip_ref:
-#         zip_ref.extractall("models/text_model")
-# else:
-#     print("Text model already exists.")
-# # Image model
-# if not os.path.exists("models/image_model.pth"):
-#     print("Downloading image model...")
-#     pth_path = "models/image_model.pth"
-#     download_from_drive(IMAGE_MODEL_URL, pth_path)
-# else:
-#     print("Image model already exists.")
 text_classifier = None
-text_explainer = None
 embed_model = SentenceTransformer("all-MiniLM-L6-v2")
 explain_model = "mistralai/Mistral-7B-Instruct-v0.2"
 text_model = "rajyalakshmijampani/fever_finetuned_deberta"
 wiki = wikipediaapi.Wikipedia(language='en', user_agent='fact-checker/1.0')
-TAVILY_KEY = os.getenv("TAVILY_API_KEY")
-GOOGLE_KEY = os.getenv("GOOGLE_FC_KEY")
 tavily = TavilyClient(api_key=TAVILY_KEY)
 def get_text_classifier():
@@ -61,14 +34,6 @@ def get_text_classifier():
         text_classifier = pipeline("text-classification", model=seq_clf, tokenizer=tokenizer)
     return text_classifier
-def get_text_explainer():
-    global text_explainer
-    if text_explainer is None:
-        tokenizer = AutoTokenizer.from_pretrained(explain_model)
-        clm = AutoModelForCausalLM.from_pretrained(explain_model)
-        text_explainer = pipeline("text-generation", model=clm, tokenizer=tokenizer, max_new_tokens=150, temperature=0.5, repetition_penalty=1.2)
-    return text_explainer
 def _rank_sentences(claim, sentences, top_k=4):
     if not sentences: return []
     emb_c = embed_model.encode([claim])
@@ -130,30 +95,34 @@ def get_evidence_sentences(claim, k=3):
     return (evid or ["Error: No relevant evidence found."])[:k]
 # --- Classification Function ---
-def classify_text(claim, text_classifier):
-    text_classifier = get_text_classifier()
-    text_explainer = get_text_explainer()
     evidences = get_evidence_sentences(claim)
     evidence_text = " ".join(evidences)
     # Step 1: FEVER classification
     text = f"claim: {claim} evidence: {evidence_text}"
-    result = text_classifier(text, truncation=True, max_length=512, return_all_scores=True)[0]
     top_label = sorted(result, key=lambda x: x["score"], reverse=True)[0]["label"]
     label_str = "REAL" if top_label == "LABEL_0" else "FAKE"
     # Step 2: Mistral explanation generation
     prompt = f"""
-                    You are a fact-checking assistant.
                     Claim: {claim}
                     Evidence: {chr(10).join(f"- {e}" for e in evidences)}
                     The model predicts this claim is {label_str}.
-                    Write a clear, human-readable explanation of why this classification makes sense, correcting the label if the evidence clearly contradicts it.
             """
-    expl = text_explainer(prompt)[0]["generated_text"]
-    return f"Prediction: {label_str} + \n\nExplanation:\n{expl}"
 # -------------------

 import torch
 import requests
 import gradio as gr
+from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
 from PIL import Image
 from io import BytesIO
 import wikipedia
 from sentence_transformers import SentenceTransformer
 from sklearn.metrics.pairwise import cosine_similarity
 from tavily import TavilyClient
+from huggingface_hub import InferenceClient
 text_classifier = None
+TAVILY_KEY = os.getenv("TAVILY_API_KEY")
+GOOGLE_KEY = os.getenv("GOOGLE_FC_KEY")
+HF_TOKEN = os.getenv("HF_TOKEN")
 embed_model = SentenceTransformer("all-MiniLM-L6-v2")
 explain_model = "mistralai/Mistral-7B-Instruct-v0.2"
 text_model = "rajyalakshmijampani/fever_finetuned_deberta"
+inf_client = InferenceClient(token=HF_TOKEN)
 wiki = wikipediaapi.Wikipedia(language='en', user_agent='fact-checker/1.0')
 tavily = TavilyClient(api_key=TAVILY_KEY)
 def get_text_classifier():
         text_classifier = pipeline("text-classification", model=seq_clf, tokenizer=tokenizer)
     return text_classifier
 def _rank_sentences(claim, sentences, top_k=4):
     if not sentences: return []
     emb_c = embed_model.encode([claim])
     return (evid or ["Error: No relevant evidence found."])[:k]
 # --- Classification Function ---
+def classify_text(claim):
+    classifier = get_text_classifier()
     evidences = get_evidence_sentences(claim)
     evidence_text = " ".join(evidences)
     # Step 1: FEVER classification
     text = f"claim: {claim} evidence: {evidence_text}"
+    result = classifier(text, truncation=True, max_length=512, return_all_scores=True)[0]
     top_label = sorted(result, key=lambda x: x["score"], reverse=True)[0]["label"]
     label_str = "REAL" if top_label == "LABEL_0" else "FAKE"
     # Step 2: Mistral explanation generation
     prompt = f"""
+                    You are a reliable fact-checking assistant.
                     Claim: {claim}
                     Evidence: {chr(10).join(f"- {e}" for e in evidences)}
                     The model predicts this claim is {label_str}.
+                    Write a short, clear explanation of why this classification makes sense.
+                    If the evidence clearly contradicts the label, correct the label in your explanation.
             """
+    messages = [
+        {"role": "system", "content": "You are a reliable fact-checking assistant."},
+        {"role": "user", "content": prompt},
+    ]
+    completion = inf_client.chat_completion( model=explain_model, messages=messages, max_tokens=256, temperature=0.3)
+    explanation = completion.choices[0].message.content.strip()
+    return f"Prediction: {label_str} + \n\nExplanation:\n{explanation}"
 # -------------------

requirements.txt CHANGED Viewed

@@ -7,4 +7,5 @@ wikipedia-api
 wikipedia
 sentence-transformers
 scikit-learn
-tavily-python

 wikipedia
 sentence-transformers
 scikit-learn
+tavily-python
+huggingface-hub