davidberenstein1957's picture
feat: Add support for textcat
adc79ce
raw
history blame
1.21 kB
from typing import List
import pandas as pd
DEFAULT_DATASET_DESCRIPTIONS = [
"A dataset covering customer reviews for an e-commerce website.",
"A dataset covering news articles about various topics.",
]
DEFAULT_DATASETS = [
pd.DataFrame.from_dict(
{
"text": [
"I love the product! It's amazing and I'll buy it again.",
"The product was okay, but I wouldn't buy it again.",
],
"label": ["positive", "negative"],
}
),
pd.DataFrame.from_dict(
{
"text": [
"Yesterday, the US stock market had a significant increase.",
"New research suggests that the Earth is not a perfect sphere.",
],
"label": [["economy", "politics"], ["science", "environment"]],
}
),
]
DEFAULT_SYSTEM_PROMPTS = [
"Classify the following customer review as positive or negative.",
"Classify the following news article into one or more categories.",
]
def generate_pipeline_code(
system_prompt: str, labels: List[str], multi_label: bool
) -> str:
return """
from distilabel import Distilabel
#### PIPELINE CODE HERE
"""