|
from typing import List |
|
|
|
import pandas as pd |
|
|
|
DEFAULT_DATASET_DESCRIPTIONS = [ |
|
"A dataset covering customer reviews for an e-commerce website.", |
|
"A dataset covering news articles about various topics.", |
|
] |
|
|
|
DEFAULT_DATASETS = [ |
|
pd.DataFrame.from_dict( |
|
{ |
|
"text": [ |
|
"I love the product! It's amazing and I'll buy it again.", |
|
"The product was okay, but I wouldn't buy it again.", |
|
], |
|
"label": ["positive", "negative"], |
|
} |
|
), |
|
pd.DataFrame.from_dict( |
|
{ |
|
"text": [ |
|
"Yesterday, the US stock market had a significant increase.", |
|
"New research suggests that the Earth is not a perfect sphere.", |
|
], |
|
"label": [["economy", "politics"], ["science", "environment"]], |
|
} |
|
), |
|
] |
|
|
|
DEFAULT_SYSTEM_PROMPTS = [ |
|
"Classify the following customer review as positive or negative.", |
|
"Classify the following news article into one or more categories.", |
|
] |
|
|
|
|
|
def generate_pipeline_code( |
|
system_prompt: str, labels: List[str], multi_label: bool |
|
) -> str: |
|
return """ |
|
from distilabel import Distilabel |
|
|
|
#### PIPELINE CODE HERE |
|
""" |
|
|