Spaces:
Sleeping
Sleeping
| # pip install "distilabel[vllm] @ git+https://github.com/argilla-io/distilabel.git@develop" | |
| # pip install flash-attn --no-build-isolation | |
| # huggingface-cli login | |
| import time | |
| from distilabel.pipeline import Pipeline | |
| from distilabel.steps import KeepColumns, LoadHubDataset | |
| from distilabel.steps.tasks import PrometheusEval | |
| from distilabel.llms import TransformersLLM | |
| if __name__ == "__main__": | |
| start_time = time.time() | |
| with Pipeline(name="prometheus") as pipeline: | |
| load_dataset = LoadHubDataset( | |
| name="load_dataset", | |
| repo_id="HuggingFaceH4/instruction-dataset", | |
| split="test", | |
| output_mappings={"prompt": "instruction", "completion": "generation"}, | |
| ) | |
| task = PrometheusEval( | |
| name="task", | |
| llm=TransformersLLM( | |
| model="prometheus-eval/prometheus-7b-v2.0", | |
| chat_template="[INST] {{ messages[0]['content'] }}\n{{ messages[1]['content'] }}[/INST]", | |
| ), | |
| mode="absolute", | |
| rubric="factual-validity", | |
| reference=False, | |
| num_generations=1, | |
| group_generations=False, | |
| ) | |
| keep_columns = KeepColumns( | |
| name="keep_columns", | |
| columns=["instruction", "generation", "feedback", "result", "model_name"], | |
| ) | |
| load_dataset >> task >> keep_columns # type: ignore | |
| distiset = pipeline.run( | |
| parameters={ | |
| task.name: { # type: ignore | |
| "llm": { | |
| "generation_kwargs": { | |
| "max_new_tokens": 1024, | |
| "temperature": 0.7, | |
| }, | |
| }, | |
| }, | |
| }, | |
| ) | |
| print("--- %s seconds ---" % (time.time() - start_time)) | |
| if distiset is not None: | |
| distiset.push_to_hub("instruction-dataset-prometheus") | |