aubynsamuel05 commited on
Commit
c8dcdea
Β·
1 Parent(s): 8e1f84c

swap all-mpnet-base-v2 for MoritzLaurer/DeBERTa-v3-large-mnli-fever-anli-ling-wanli

Browse files
Files changed (2) hide show
  1. deploy/main/claim_verifier.py +12 -2
  2. nli_checks.py +116 -0
deploy/main/claim_verifier.py CHANGED
@@ -11,7 +11,9 @@ import string
11
  from deploy.utils.general_utils import TRUSTED_DOMAINS, SUSPICIOUS_DOMAINS
12
  from deploy.utils.content_extractor import extract_content
13
  from deploy.utils.url_filter import _is_corrupted_pdf_content, _is_pdf_or_download_url
14
- from semantic_similarity import calculate_semantic_similarity
 
 
15
 
16
  warnings.filterwarnings("ignore")
17
 
@@ -211,7 +213,15 @@ class ClaimVerifier:
211
  def _semantic_similarity_with_sentences(self, claim: str, sentences: str) -> float:
212
  """Calculate entailment scores and return the best one."""
213
  try:
214
- score = calculate_semantic_similarity(claim, sentences)
 
 
 
 
 
 
 
 
215
  except Exception as e:
216
  logging.error(f"Error analyzing sentence: {e}")
217
  return score
 
11
  from deploy.utils.general_utils import TRUSTED_DOMAINS, SUSPICIOUS_DOMAINS
12
  from deploy.utils.content_extractor import extract_content
13
  from deploy.utils.url_filter import _is_corrupted_pdf_content, _is_pdf_or_download_url
14
+ from nli_checks import advanced_claim_verifier_native
15
+
16
+ # from semantic_similarity import calculate_semantic_similarity
17
 
18
  warnings.filterwarnings("ignore")
19
 
 
213
  def _semantic_similarity_with_sentences(self, claim: str, sentences: str) -> float:
214
  """Calculate entailment scores and return the best one."""
215
  try:
216
+ # score = calculate_semantic_similarity(claim, sentences)
217
+ result = advanced_claim_verifier_native(claim, sentences)
218
+ print(f"Support Score: {result['support_score']:.4f}")
219
+ print(f"Overall Assessment: {result['prediction']} βœ…")
220
+ print("-" * 30)
221
+ print("Most Relevant Evidence Found:")
222
+ print(f"-> \"{result['relevant_chunk']}\"")
223
+ print("\n" + "=" * 50 + "\n")
224
+ score = result["support_score"]
225
  except Exception as e:
226
  logging.error(f"Error analyzing sentence: {e}")
227
  return score
nli_checks.py ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import torch
3
+ import numpy as np
4
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
5
+
6
+ os.environ["TOKENIZERS_PARALLELISM"] = "false"
7
+
8
+ device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
9
+
10
+ model_name = "MoritzLaurer/DeBERTa-v3-large-mnli-fever-anli-ling-wanli"
11
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
12
+ model = AutoModelForSequenceClassification.from_pretrained(model_name).to(device)
13
+ model.eval()
14
+
15
+
16
+ def advanced_claim_verifier_native(claim: str, evidence: str) -> dict:
17
+ """
18
+ Verifies a claim against potentially long evidence using chunking and a powerful NLI model
19
+ with the native transformers library.
20
+
21
+ Args:
22
+ claim (str): The claim to be verified (hypothesis).
23
+ evidence (str): The evidence text, which can be long (premise).
24
+
25
+ Returns:
26
+ dict: A dictionary containing the final support score, the predicted label,
27
+ and the most relevant evidence chunk.
28
+ """
29
+ if not evidence or not claim:
30
+ return {
31
+ "support_score": 0.5, # Neutral default
32
+ "prediction": "Neutral",
33
+ "relevant_chunk": "N/A"
34
+ }
35
+
36
+ # CHUNKING STRATEGY
37
+ chunks = evidence.split('\n\n')
38
+ chunks = [chunk.strip() for chunk in chunks if chunk.strip()]
39
+
40
+ if not chunks:
41
+ return {
42
+ "support_score": 0.5,
43
+ "prediction": "Neutral",
44
+ "relevant_chunk": "N/A"
45
+ }
46
+
47
+
48
+ with torch.no_grad():
49
+ inputs = tokenizer(chunks, [claim] * len(chunks), truncation=True, padding=True, return_tensors="pt").to(device)
50
+
51
+ outputs = model(**inputs)
52
+
53
+ # Convert logits to probabilities using softmax
54
+ probabilities = torch.softmax(outputs.logits, dim=-1)
55
+
56
+ # Convert to a NumPy array for easier handling
57
+ scores = probabilities.cpu().numpy()
58
+
59
+ entailment_scores = scores[:, 0]
60
+
61
+ # Find the chunk with the highest entailment score
62
+ best_chunk_idx = np.argmax(entailment_scores)
63
+
64
+ # The final support score is the highest probability of entailment found
65
+ final_support_score = entailment_scores[best_chunk_idx]
66
+
67
+ # Determine the final label based on the highest probability for that best chunk
68
+ final_prediction_idx = np.argmax(scores[best_chunk_idx])
69
+
70
+ label_map = ["Supported", "Neutral", "Contradicted"]
71
+ final_prediction_label = label_map[final_prediction_idx]
72
+
73
+ most_relevant_chunk = chunks[best_chunk_idx]
74
+
75
+ return {
76
+ "support_score": float(final_support_score),
77
+ "prediction": final_prediction_label,
78
+ "relevant_chunk": most_relevant_chunk
79
+ }
80
+
81
+
82
+ if __name__ == "__main__":
83
+ claim_to_verify = "The company's new 'QuantumLeap' chip is expected to double processing speeds."
84
+
85
+ long_evidence = """
86
+ A press release today from Innovate Corp announced the 'QuantumLeap' processor. CEO Jane Doe stated, "We are thrilled to unveil this technology. Our internal benchmarks show that the QuantumLeap chip doubles the processing speed of our previous generation, a major milestone for the industry."
87
+
88
+ The announcement was met with cautious optimism. Analyst John Smith from TechAdvisory noted, "While the claims are impressive, we've seen bold promises before. Real-world performance will be the true test." He also pointed out that the chip's power consumption remains a concern for mobile applications, which was not addressed in the release.
89
+
90
+ The new processor will first be available in Innovate Corp's high-end desktop line, slated for a Q4 release. Broader availability has not yet been announced.
91
+ """
92
+
93
+ print(f"\nVERIFYING CLAIM: '{claim_to_verify}'\n")
94
+
95
+ result = advanced_claim_verifier_native(claim_to_verify, long_evidence)
96
+
97
+ print(f"Final Support Score: {result['support_score']:.4f}")
98
+ print(f"Overall Assessment: {result['prediction']} βœ…")
99
+ print("-" * 30)
100
+ print("Most Relevant Evidence Found:")
101
+ print(f"-> \"{result['relevant_chunk']}\"")
102
+
103
+ print("\n" + "="*50 + "\n")
104
+
105
+ # Example for contradiction
106
+ claim_contradicted = "Implementing a four-day workweek increases employee productivity"
107
+ evidence_contradicted = "A trial of the four-day workweek led to employees completing fewer tasks overall, with managers noting a decline in project delivery speed and more frequent missed deadlines"
108
+
109
+ print(f"VERIFYING CLAIM: '{claim_contradicted}'\n")
110
+ result_contra = advanced_claim_verifier_native(claim_contradicted, evidence_contradicted)
111
+
112
+ print(f"Final Support Score: {result_contra['support_score']:.4f}")
113
+ print(f"Overall Assessment: {result_contra['prediction']} ❌")
114
+ print("-" * 30)
115
+ print("Most Relevant Evidence Found:")
116
+ print(f"-> \"{result_contra['relevant_chunk']}\"")