magpie / app.py
davanstrien's picture
davanstrien HF staff
Super-squash branch 'main' using huggingface_hub
5188e86 verified
raw
history blame
2.97 kB
import gradio as gr
import transformers
import torch
import json
from transformers import AutoTokenizer
import os
from huggingface_hub import login
import spaces
HF_TOKEN = os.getenv("HF_TOKEN")
login(HF_TOKEN)
# Load the model
model_id = "meta-llama/Meta-Llama-3-8B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_id, add_special_tokens=True)
pipeline = transformers.pipeline(
"text-generation",
model=model_id,
model_kwargs={"torch_dtype": torch.bfloat16},
device="cuda",
)
# Load the model configuration
with open("model_configs.json", "r") as f:
model_configs = json.load(f)
model_config = model_configs[model_id]
# Extract instruction
extract_input = model_config["extract_input"]
@spaces.GPU
def generate_instruction_response():
terminators = [
tokenizer.eos_token_id,
tokenizer.convert_tokens_to_ids("<|eot_id|>"),
]
instruction = pipeline(
extract_input,
max_new_tokens=2048,
eos_token_id=terminators,
do_sample=True,
temperature=1,
top_p=1,
)
sanitized_instruction = instruction[0]["generated_text"][
len(extract_input) :
].split("\n")[0]
response_template = f"""<|begin_of_text|><|start_header_id|>user<|end_header_id|>\n\n{sanitized_instruction}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n"""
response = pipeline(
response_template,
max_new_tokens=2048,
eos_token_id=terminators,
do_sample=True,
temperature=1,
top_p=1,
)
user_message = sanitized_instruction
assistant_response = response[0]["generated_text"][len(response_template) :]
return user_message, assistant_response
title = "Magpie demo"
description = """
This Gradio demo allows you to explore the approach outlined in the Magpie paper. "Magpie is a data synthesis pipeline that generates high-quality alignment data. Magpie does not rely on prompt engineering or seed questions. Instead, it directly constructs instruction data by prompting aligned LLMs with a pre-query template for sampling instructions." Essentially, instead of prompting the model with a question or a starting query, this approach relies on the pre-query template of the model to generate instructions. Essentially, you are giving the model only the template up to the point where a user instruction would start, and then the model generates the instruction and the response.
In this demo, you can see how the model generates a user instruction and a model response.
You can learn more about the approach [in the paper](https://huggingface.co/papers/2406.08464).
"""
# Create the Gradio interface
iface = gr.Interface(
fn=generate_instruction_response,
inputs=[],
outputs=[
gr.Text(label="Generated User Instruction"),
gr.Text(label="Generated Model Response"),
],
title=title,
description=description,
)
# Launch the app
iface.launch(debug=True)