from transformers import AutoTokenizer, AutoModelForSeq2SeqLM # from transformers import pipeline import torch import gradio as gr from huggingface_hub import InferenceClient # chatgpt-gpt4-prompts-bart-large-cnn-samsum tokenizer = AutoTokenizer.from_pretrained("Kaludi/chatgpt-gpt4-prompts-bart-large-cnn-samsum") model = AutoModelForSeq2SeqLM.from_pretrained("Kaludi/chatgpt-gpt4-prompts-bart-large-cnn-samsum", from_tf=True) # zephyr client = InferenceClient("HuggingFaceH4/zephyr-7b-beta") # pipe = pipeline("text-generation", model="HuggingFaceH4/zephyr-7b-alpha",torch_dtype=torch.bfloat16, device_map="auto") def generateZep(inputuno): prompt = inputuno # promptdos = inputdos generate_kwargs = dict( temperature=0.9, max_new_tokens=3556, top_p=float(0.95), repetition_penalty=1.0, do_sample=True, seed=42, ) batch = tokenizer(prompt, return_tensors="pt") generated_ids = model.generate(batch["input_ids"]) output = tokenizer.batch_decode(generated_ids, skip_special_tokens=True) new_prompt = output[0] # messages = [ # { # "role": "system", "content": str(new_prompt) # }, # { # "role": "user", "content": str(promptdos) # }, # ] formatted_prompt = f"[INST] {new_prompt} [/INST]" stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False) output = "" for response in stream: output += response.token.text yield output return output # # Interface input_prompt = gr.Textbox(label="Actua como: ", value="Chef") # input_promptdos = gr.Textbox(label="Prompt: ", value="Recipe for ham croquettes") output_component = gr.Textbox(label="Output: ") examples = [["photographer"], ["developer"], ["teacher"], ["human resources staff"], ["recipe for ham croquettes"]] description = "" PerfectGPT = gr.Interface(fn=generateZep, inputs=input_prompt, outputs=output_component, examples=examples, title="🗿 PerfectGPT v1 🗿", description=description) PerfectGPT.launch()