| """ |
| Cache Generator for HallucinationGuard-Env |
| ========================================== |
| Run this ONCE on your PC to download all datasets and save them as cache files. |
| Then upload the cache/ folder to your HF Space. |
| |
| Usage: |
| pip install datasets |
| python generate_cache.py |
| |
| Output: |
| server/cache/squad_2000.json |
| server/cache/trivia_qa_2000.json |
| server/cache/halueval_1000.json |
| ... etc (one file per dataset) |
| |
| After this finishes, upload everything: |
| python -c "from huggingface_hub import HfApi; api = HfApi(); api.upload_folder(folder_path='.', repo_id='SamSankar/hallucination-guard-env', repo_type='space', ignore_patterns=['__pycache__', '*.pyc']); print('Upload complete!')" |
| """ |
|
|
| import json |
| import os |
| import sys |
| import time |
| from pathlib import Path |
|
|
| try: |
| from datasets import load_dataset as hf_load |
| except ImportError: |
| print("Run: pip install datasets") |
| sys.exit(1) |
|
|
| CACHE_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "server", "cache") |
| Path(CACHE_DIR).mkdir(parents=True, exist_ok=True) |
|
|
| DATASETS = { |
| "squad": 10000, |
| "trivia_qa": 10000, |
| "halueval": 5000, |
| "truthful_qa": 817, |
| "hotpotqa": 10000, |
| "boolq": 9427, |
| "faithdial": 10000, |
| "fever": 10000, |
| "arc": 3370, |
| "openbookqa": 4957, |
| "ms_marco": 10000, |
| "coqa": 7199, |
| "nq_open": 8000, |
| "commonsense_qa": 8000, |
| "winogrande": 5000, |
| } |
| |
|
|
| def load_squad(cap): |
| ds = hf_load("squad", split=f"train[:{cap}]") |
| out = [] |
| for i, item in enumerate(ds): |
| ans = item.get("answers", {}).get("text", []) |
| answer = ans[0] if ans else "" |
| if not answer or not item.get("context"): continue |
| out.append({"question": item["question"], "context": item["context"][:1500], |
| "answer": answer, "id": f"squad_{i}", "source": "squad", |
| "difficulty": "intermediate", "category": "reading_comprehension", |
| "hallucination_type": None, "entities": [], "metadata": {}}) |
| return out |
|
|
| def load_trivia_qa(cap): |
| ds = hf_load("trivia_qa", "rc.wikipedia", split=f"train[:{cap}]") |
| out = [] |
| for i, item in enumerate(ds): |
| cp = item.get("entity_pages", {}) |
| ctx = "" |
| if isinstance(cp, dict): |
| ctxs = cp.get("wiki_context", []) |
| ctx = ctxs[0] if isinstance(ctxs, list) and ctxs else str(ctxs) |
| if not ctx: continue |
| aliases = item.get("answer", {}).get("normalized_aliases", []) |
| answer = aliases[0] if aliases else item.get("answer", {}).get("value", "") |
| if not answer: continue |
| out.append({"question": item["question"], "context": ctx[:1500], |
| "answer": str(answer), "id": f"triviaqa_{i}", "source": "trivia_qa", |
| "difficulty": "intermediate", "category": "trivia", |
| "hallucination_type": None, "entities": [], "metadata": {}}) |
| return out |
|
|
| def load_halueval(cap): |
| ds = hf_load("pminervini/HaluEval", "qa", split=f"data[:{cap}]") |
| out = [] |
| for i, item in enumerate(ds): |
| q = item.get("question", "") |
| ctx = item.get("knowledge", item.get("context", "")) |
| ans = item.get("right_answer", item.get("answer", "")) |
| if not q or not ans: continue |
| out.append({"question": q, "context": str(ctx)[:1500], |
| "answer": str(ans), "id": f"halueval_{i}", "source": "halueval", |
| "difficulty": "advanced", "category": "hallucination_detection", |
| "hallucination_type": item.get("hallucination_type"), |
| "entities": [], "metadata": {}}) |
| return out |
|
|
| def load_truthful_qa(cap): |
| ds = hf_load("truthful_qa", "generation", split="validation") |
| out = [] |
| for i, item in enumerate(ds): |
| if i >= cap: break |
| best = item.get("best_answer", "") |
| correct = item.get("correct_answers", []) |
| ctx = " ".join(correct) if correct else item.get("question", "") |
| if not best: continue |
| out.append({"question": item["question"], "context": ctx[:1500], |
| "answer": best, "id": f"truthfulqa_{i}", "source": "truthful_qa", |
| "difficulty": "expert", "category": "factuality", |
| "hallucination_type": None, "entities": [], "metadata": {}}) |
| return out |
|
|
| def load_hotpotqa(cap): |
| ds = hf_load("hotpot_qa", "fullwiki", split=f"train[:{cap}]") |
| out = [] |
| for i, item in enumerate(ds): |
| q = item.get("question", "") |
| ans = item.get("answer", "") |
| titles = item.get("context", {}).get("title", []) |
| sents = item.get("context", {}).get("sentences", []) |
| ctx = " ".join(f"{t}: {' '.join(s)}" for t, s in zip(titles, sents))[:1500] |
| if not q or not ans or not ctx: continue |
| out.append({"question": q, "context": ctx, "answer": str(ans), |
| "id": f"hotpotqa_{i}", "source": "hotpotqa", |
| "difficulty": "advanced", "category": "multi_hop_reasoning", |
| "hallucination_type": None, "entities": [], "metadata": {}}) |
| return out |
|
|
| def load_boolq(cap): |
| ds = hf_load("google/boolq", split=f"train[:{cap}]") |
| out = [] |
| for i, item in enumerate(ds): |
| q = item.get("question", "") |
| p = item.get("passage", "") |
| if not q or not p: continue |
| out.append({"question": q, "context": p[:1500], |
| "answer": "yes" if item.get("answer", False) else "no", |
| "id": f"boolq_{i}", "source": "boolq", |
| "difficulty": "beginner", "category": "yes_no_qa", |
| "hallucination_type": None, "entities": [], "metadata": {}}) |
| return out |
|
|
| def load_faithdial(cap): |
| |
| ds = hf_load("Anthropic/hh-rlhf", split="train[:%d]" % cap) |
| out = [] |
| for i, item in enumerate(ds): |
| chosen = item.get("chosen", "") |
| if not chosen: |
| continue |
| parts = chosen.split("Human:") |
| question = "" |
| answer = "" |
| for part in parts[1:]: |
| if "Assistant:" in part: |
| q_part, a_part = part.split("Assistant:", 1) |
| q = q_part.strip() |
| a = a_part.split("Human:")[0].strip() |
| if q and a: |
| question = q |
| answer = a |
| if not question or not answer: |
| continue |
| ctx = chosen[:800] |
| out.append({ |
| "question": question[:200], |
| "context": ctx, |
| "answer": answer[:400], |
| "id": "faithdial_%d" % i, |
| "source": "faithdial", |
| "difficulty": "advanced", |
| "category": "hallucination_detection", |
| "hallucination_type": None, |
| "entities": [], |
| "metadata": {} |
| }) |
| return out |
|
|
| def load_fever(cap): |
| |
| ds = hf_load("stanfordnlp/snli", split=f"train[:{cap}]") |
| label_map = {0: "SUPPORTS", 1: "REFUTES", 2: "NOT ENOUGH INFO", -1: "NOT ENOUGH INFO"} |
| out = [] |
| for i, item in enumerate(ds): |
| premise = item.get("premise", "") |
| hypothesis = item.get("hypothesis", "") |
| label_int = item.get("label", -1) |
| label = label_map.get(int(label_int), "NOT ENOUGH INFO") |
| if not premise or not hypothesis or label_int == -1: continue |
| out.append({"question": f"Does the premise support or refute this hypothesis? Hypothesis: {hypothesis}", |
| "context": f"Premise: {premise}", "answer": label, |
| "id": f"fever_{i}", "source": "fever", |
| "difficulty": "advanced", "category": "fact_verification", |
| "hallucination_type": None, "entities": [], "metadata": {}}) |
| return out |
|
|
|
|
| def load_arc(cap): |
| |
| out = [] |
| for split in ["train", "validation", "test"]: |
| try: |
| ds = hf_load("allenai/ai2_arc", "ARC-Challenge", split=split) |
| for item in ds: |
| if len(out) >= cap: break |
| q = item.get("question", "") |
| choices = item.get("choices", {}) |
| ans_key = item.get("answerKey", "") |
| labels = choices.get("label", []) |
| texts = choices.get("text", []) |
| ctx = "Choices: " + " | ".join(f"{l}: {t}" for l, t in zip(labels, texts)) |
| answer = next((t for l, t in zip(labels, texts) if l == ans_key), "") |
| if not q or not answer: continue |
| out.append({"question": q, "context": ctx, "answer": answer, |
| "id": f"arc_{len(out)}", "source": "arc", |
| "difficulty": "advanced", "category": "science_exam", |
| "hallucination_type": None, "entities": [], "metadata": {}}) |
| except Exception: |
| continue |
| return out |
|
|
| def load_openbookqa(cap): |
| ds = hf_load("allenai/openbookqa", "main", split=f"train[:{cap}]") |
| out = [] |
| for i, item in enumerate(ds): |
| q = item.get("question_stem", "") |
| choices = item.get("choices", {}) |
| ans_key = item.get("answerKey", "") |
| labels = choices.get("label", []) |
| texts = choices.get("text", []) |
| fact = item.get("fact1", "") |
| ctx = f"Core fact: {fact} | Choices: " + " | ".join(f"{l}: {t}" for l, t in zip(labels, texts)) |
| answer = next((t for l, t in zip(labels, texts) if l == ans_key), "") |
| if not q or not answer: continue |
| out.append({"question": q, "context": ctx[:1500], "answer": answer, |
| "id": f"openbookqa_{i}", "source": "openbookqa", |
| "difficulty": "intermediate", "category": "science_facts", |
| "hallucination_type": None, "entities": [], "metadata": {}}) |
| return out |
|
|
| def load_ms_marco(cap): |
| ds = hf_load("microsoft/ms_marco", "v2.1", split=f"train[:{cap}]") |
| out = [] |
| for i, item in enumerate(ds): |
| q = item.get("query", "") |
| passages = item.get("passages", {}) |
| texts = passages.get("passage_text", []) if isinstance(passages, dict) else [] |
| ctx = " ".join(texts)[:1500] if texts else "" |
| answers = item.get("answers", []) |
| answer = answers[0] if answers else "" |
| if not q or not ctx or not answer or answer == "No Answer Present.": continue |
| out.append({"question": q, "context": ctx, "answer": str(answer), |
| "id": f"msmarco_{i}", "source": "ms_marco", |
| "difficulty": "intermediate", "category": "web_search_qa", |
| "hallucination_type": None, "entities": [], "metadata": {}}) |
| return out |
|
|
| def load_coqa(cap): |
| ds = hf_load("stanfordnlp/coqa", split=f"train[:{cap}]") |
| out = [] |
| for i, item in enumerate(ds): |
| story = item.get("story", "") |
| questions = item.get("questions", []) |
| answers = item.get("answers", {}) |
| ans_texts = answers.get("input_text", []) if isinstance(answers, dict) else [] |
| if not story or not questions or not ans_texts: continue |
| q = questions[0] if questions else "" |
| answer = ans_texts[0] if ans_texts else "" |
| if not q or not answer: continue |
| out.append({"question": str(q), "context": story[:1500], "answer": str(answer), |
| "id": f"coqa_{i}", "source": "coqa", |
| "difficulty": "intermediate", "category": "conversational_qa", |
| "hallucination_type": None, "entities": [], "metadata": {}}) |
| return out |
|
|
|
|
| def load_nq_open(cap): |
| ds = hf_load("nq_open", split="train[:%d]" % cap) |
| out = [] |
| for i, item in enumerate(ds): |
| q = item.get("question", "") |
| answers = item.get("answer", []) |
| answer = answers[0] if answers else "" |
| if not q or not answer: |
| continue |
| out.append({ |
| "question": q, |
| "context": "Answer based on your knowledge: " + q, |
| "answer": str(answer), |
| "id": "nq_open_%d" % i, |
| "source": "nq_open", |
| "difficulty": "intermediate", |
| "category": "open_domain_qa", |
| "hallucination_type": None, |
| "entities": [], |
| "metadata": {} |
| }) |
| return out |
|
|
|
|
| def load_commonsense_qa(cap): |
| ds = hf_load("tau/commonsense_qa", split="train[:%d]" % cap) |
| out = [] |
| for i, item in enumerate(ds): |
| q = item.get("question", "") |
| choices = item.get("choices", {}) |
| labels = choices.get("label", []) if isinstance(choices, dict) else [] |
| texts = choices.get("text", []) if isinstance(choices, dict) else [] |
| ans_key = item.get("answerKey", "") |
| ctx = "Choices: " + " | ".join( |
| "%s: %s" % (l, t) for l, t in zip(labels, texts)) |
| answer = next((t for l, t in zip(labels, texts) if l == ans_key), "") |
| if not q or not answer: |
| continue |
| out.append({ |
| "question": q, |
| "context": ctx, |
| "answer": answer, |
| "id": "csqa_%d" % i, |
| "source": "commonsense_qa", |
| "difficulty": "intermediate", |
| "category": "commonsense_reasoning", |
| "hallucination_type": None, |
| "entities": [], |
| "metadata": {} |
| }) |
| return out |
|
|
|
|
| def load_winogrande(cap): |
| |
| ds = hf_load("allenai/winogrande", "winogrande_xl", split="train[:%d]" % cap) |
| out = [] |
| for i, item in enumerate(ds): |
| sentence = item.get("sentence", "") |
| opt1 = item.get("option1", "") |
| opt2 = item.get("option2", "") |
| answer_key = str(item.get("answer", "1")) |
| answer = opt1 if answer_key == "1" else opt2 |
| if not sentence or not answer: |
| continue |
| ctx = "Sentence: %s Options: 1: %s | 2: %s" % (sentence, opt1, opt2) |
| out.append({ |
| "question": "Which option correctly fills the blank? " + sentence, |
| "context": ctx, |
| "answer": answer, |
| "id": "winogrande_%d" % i, |
| "source": "winogrande", |
| "difficulty": "intermediate", |
| "category": "commonsense_reasoning", |
| "hallucination_type": None, |
| "entities": [], |
| "metadata": {} |
| }) |
| return out |
|
|
|
|
|
|
| LOADERS = { |
| "squad": load_squad, |
| "trivia_qa": load_trivia_qa, |
| "halueval": load_halueval, |
| "truthful_qa": load_truthful_qa, |
| "hotpotqa": load_hotpotqa, |
| "boolq": load_boolq, |
| "faithdial": load_faithdial, |
| "fever": load_fever, |
| "arc": load_arc, |
| "openbookqa": load_openbookqa, |
| "ms_marco": load_ms_marco, |
| "coqa": load_coqa, |
| "nq_open": load_nq_open, |
| "commonsense_qa": load_commonsense_qa, |
| "winogrande": load_winogrande, |
| } |
|
|
| def main(): |
| total = 0 |
| print(f"\n{'='*55}") |
| print(f" HallucinationGuard Cache Generator") |
| print(f" Output: {CACHE_DIR}") |
| print(f"{'='*55}\n") |
|
|
| for ds_name, cap in DATASETS.items(): |
| cache_file = os.path.join(CACHE_DIR, f"{ds_name}_{cap}.json") |
|
|
| if os.path.exists(cache_file): |
| with open(cache_file) as f: |
| existing = json.load(f) |
| print(f" ✅ {ds_name}: already cached ({len(existing)} examples) — skipping") |
| total += len(existing) |
| continue |
|
|
| print(f" Downloading {ds_name} ({cap} examples)...", end=" ", flush=True) |
| t0 = time.time() |
| try: |
| loader = LOADERS[ds_name] |
| examples = loader(cap) |
| with open(cache_file, "w") as f: |
| json.dump(examples, f) |
| elapsed = time.time() - t0 |
| total += len(examples) |
| print(f"✅ {len(examples)} examples saved ({elapsed:.0f}s)") |
| except Exception as e: |
| print(f"❌ Failed: {e}") |
|
|
| print(f"\n{'='*55}") |
| print(f" Done! Total examples cached: {total:,}") |
| print(f" Cache location: {CACHE_DIR}") |
| print(f"\n Now upload to HF Space:") |
| print(f" python -c \"from huggingface_hub import HfApi; api = HfApi(); api.upload_folder(folder_path='.', repo_id='SamSankar/hallucination-guard-env', repo_type='space', ignore_patterns=['__pycache__', '*.pyc']); print('Upload complete!')\"") |
| print(f"{'='*55}\n") |
|
|
| if __name__ == "__main__": |
| main() |
|
|