Norah / download_dataset.py
Visdom9's picture
Pushing fine-tuned Norah model
3254881
from datasets import load_dataset
# Download the OpenAssistant dataset
dataset = load_dataset("OpenAssistant/oasst1", split="train")
# Keep only French conversations
dataset = dataset.filter(lambda x: x["lang"] == "fr")
# Print an example to check if it's correct
print("Example conversation from dataset:")
print(dataset[0])