import pandas as pd import argilla as rg import spacy from datasets import Dataset # Configuration rg.init( api_url='https://brancengregory-demo-argilla.hf.space', api_key='team.apikey' ) # Plaintiffs data = pd.read_csv("data/labelled_plaintiffs.csv") data = data.rename(columns={"filed_by": "text"}) dataset = rg.read_pandas(data, task="TextClassification") rg.log(dataset, "plaintiff_sample") # Minutes dataset = Dataset.from_csv("data/minutes.csv").rename_column("description", "text") nlp = spacy.load("en_core_web_trf") def tokenize(row): tokens = [token.text for token in nlp(row["text"])] return {"tokens": tokens} dataset = dataset.map(tokenize) dataset = rg.read_datasets(dataset, task="TokenClassification") rg.log(dataset, "minutes_sample")