File size: 705 Bytes
beb5479
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
from datasets import load_dataset
import pandas as pd, os

def load_retail_dataset():
    """
    Loads a retail/e-commerce QA dataset from HF (small sample)
    or synthetically creates one for evaluation.
    """
    dataset = load_dataset("amazon_polarity", split="train[:200]")
    df = pd.DataFrame(dataset)
    df["question"] = "Customer asks about this review: " + df["title"]
    df["answer"] = df["content"]
    sample = df[["question", "answer"]]
    os.makedirs("datasets", exist_ok=True)
    sample.to_json("datasets/retail_sample.jsonl", orient="records", lines=True)
    print("βœ… Saved datasets/retail_sample.jsonl")
    return sample

if __name__ == "__main__":
    load_retail_dataset()