Spaces:

group9-dsailab
/

multimodal_misinfo_detector

Running

App Files Files Community

rajyalakshmijampani commited on Oct 29

Commit

8fc155a

1 Parent(s): 7455224

include mistral explanation

Browse files

Files changed (1) hide show

app.py +34 -30

app.py CHANGED Viewed

@@ -42,7 +42,11 @@ from tavily import TavilyClient
 #     print("Image model already exists.")
 text_classifier = None
-MODEL = SentenceTransformer("all-MiniLM-L6-v2")
 wiki = wikipediaapi.Wikipedia(language='en', user_agent='fact-checker/1.0')
 TAVILY_KEY = os.getenv("TAVILY_API_KEY")
 GOOGLE_KEY = os.getenv("GOOGLE_FC_KEY")
@@ -52,15 +56,15 @@ def get_text_classifier():
     global text_classifier
     if text_classifier is None:
         from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
-        text_tokenizer = AutoTokenizer.from_pretrained("rajyalakshmijampani/fever_finetuned_deberta")
-        text_model = AutoModelForSequenceClassification.from_pretrained("rajyalakshmijampani/fever_finetuned_deberta")
-        text_classifier = pipeline("text-classification", model=text_model, tokenizer=text_tokenizer)
     return text_classifier
 def _rank_sentences(claim, sentences, top_k=4):
     if not sentences: return []
-    emb_c = MODEL.encode([claim])
-    emb_s = MODEL.encode(sentences)
     sims = cosine_similarity(emb_c, emb_s)[0]
     ranked = [s for s, _ in sorted(zip(sentences, sims), key=lambda x: x[1], reverse=True)]
     return ranked[:top_k]
@@ -119,34 +123,34 @@ def get_evidence_sentences(claim, k=3):
 # --- Classification Function ---
 def classify_text(claim, text_classifier):
-    evidences = get_evidence_sentences(claim)
-    print(evidences)
-    if not evidences or "Error" in evidences[0]:
-        return f"Prediction: Unknown\n\nTop Evidences:\n{evidences[0]}\n\nExplanation:\nUnable to retrieve reliable evidences."
-    # --- Prepare model input ---
     evidence_text = " ".join(evidences)
-    text = f"Claim: {claim}\nEvidence: {evidence_text}"
-    # --- Run classifier ---
-    text_classifier = get_text_classifier()
     result = text_classifier(text, truncation=True, max_length=512, return_all_scores=True)[0]
     top_label = sorted(result, key=lambda x: x["score"], reverse=True)[0]["label"]
-    # Map labels according to your model setup
-    label_str = "REAL" if top_label in ["LABEL_0", "REAL", "SUPPORTED"] else "FAKE"
-    explanation = (
-        f"Based on semantically relevant and filtered evidences, "
-        f"this claim is **{top_label}**."
-    )
-    return (
-        f"Prediction: {top_label}\n\n"
-        f"Top Evidences:\n" + "\n".join(f"- {e}" for e in evidences) +
-        f"\n\nExplanation:\n{explanation}"
-    )
 # -------------------

 #     print("Image model already exists.")
 text_classifier = None
+embed_model = SentenceTransformer("all-MiniLM-L6-v2")
+explain_model = "mistralai/Mistral-7B-Instruct-v0.2"
+text_model = "rajyalakshmijampani/fever_finetuned_deberta"
 wiki = wikipediaapi.Wikipedia(language='en', user_agent='fact-checker/1.0')
 TAVILY_KEY = os.getenv("TAVILY_API_KEY")
 GOOGLE_KEY = os.getenv("GOOGLE_FC_KEY")
     global text_classifier
     if text_classifier is None:
         from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
+        tokenizer = AutoTokenizer.from_pretrained(text_model)
+        seq_clf = AutoModelForSequenceClassification.from_pretrained(text_model)
+        text_classifier = pipeline("text-classification", model=seq_clf, tokenizer=tokenizer)
     return text_classifier
 def _rank_sentences(claim, sentences, top_k=4):
     if not sentences: return []
+    emb_c = embed_model.encode([claim])
+    emb_s = embed_model.encode(sentences)
     sims = cosine_similarity(emb_c, emb_s)[0]
     ranked = [s for s, _ in sorted(zip(sentences, sims), key=lambda x: x[1], reverse=True)]
     return ranked[:top_k]
 # --- Classification Function ---
 def classify_text(claim, text_classifier):
+    text_classifier = get_text_classifier()
+    evidences = get_evidence_sentences(claim)
     evidence_text = " ".join(evidences)
+    # Step 1: FEVER classification
+    text = f"claim: {claim} evidence: {evidence_text}"
     result = text_classifier(text, truncation=True, max_length=512, return_all_scores=True)[0]
     top_label = sorted(result, key=lambda x: x["score"], reverse=True)[0]["label"]
+    label_str = "REAL" if top_label == "LABEL_0" else "FAKE"
+    # Step 2: Mistral explanation generation
+    explain_pipe = pipeline("text-generation", model=explain_model, tokenizer=explain_model,
+                            max_new_tokens=150, temperature=0.5, repetition_penalty=1.2)
+    prompt = f"""
+                    You are a fact-checking assistant.
+                    Claim: {claim}
+                    Evidence:
+                    {chr(10).join(f"- {e}" for e in evidences)}
+                    The model predicts this claim is {label_str}.
+                    Write a clear, human-readable explanation of why this classification makes sense, correcting the label if the evidence clearly contradicts it.
+            """
+    expl = explain_pipe(prompt)[0]["generated_text"].split("Evidence:")[-1].strip()
+    return f"Prediction: {label_str} \n\n \
+            Top Evidences:\n" + \
+           "\n".join(f"- {e}" for e in evidences) + \
+           f"\n\nExplanation:\n{expl}"
 # -------------------