Spaces:

argilla
/

synthetic-data-generator

Running

feat: Add support for textcat

adc79ce 3 months ago

1.21 kB

	from typing import List

	import pandas as pd

	DEFAULT_DATASET_DESCRIPTIONS = [
	"A dataset covering customer reviews for an e-commerce website.",
	"A dataset covering news articles about various topics.",
	]

	DEFAULT_DATASETS = [
	pd.DataFrame.from_dict(
	{
	"text": [
	"I love the product! It's amazing and I'll buy it again.",
	"The product was okay, but I wouldn't buy it again.",
	],
	"label": ["positive", "negative"],
	}
	),
	pd.DataFrame.from_dict(
	{
	"text": [
	"Yesterday, the US stock market had a significant increase.",
	"New research suggests that the Earth is not a perfect sphere.",
	],
	"label": [["economy", "politics"], ["science", "environment"]],
	}
	),
	]

	DEFAULT_SYSTEM_PROMPTS = [
	"Classify the following customer review as positive or negative.",
	"Classify the following news article into one or more categories.",
	]


	def generate_pipeline_code(
	system_prompt: str, labels: List[str], multi_label: bool
	) -> str:
	return """
	from distilabel import Distilabel

	#### PIPELINE CODE HERE
	"""