import gradio as gr import re import torch from transformers import pipeline pipe = pipeline("text-generation", model="HuggingFaceH4/zephyr-7b-beta", torch_dtype=torch.bfloat16, device_map="auto") instruction = f""" <|system|> You are a pirate chatbot who always responds with Arr! <|user|> """ def infer(user_prompt): prompt = f"{instruction.strip()}\n{user_prompt}" print(f"PROMPT: {prompt}") outputs = pipe(prompt, max_new_tokens=256, do_sample=True, temperature=0.7, top_k=50, top_p=0.95) print(outputs) pattern = r'\<\|system\|\>(.*?)\<\|assistant\|\>' cleaned_text = re.sub(pattern, '', outputs[0]["generated_text"], flags=re.DOTALL) return cleaned_text gr.Interface( fn = infer, inputs = [ gr.Textbox() ], outputs = [ gr.Textbox() ] ).queue().launch()