Locutusque's picture
Update app.py
8bd462e verified
raw
history blame
2.83 kB
import gradio as gr
from transformers import pipeline, AutoTokenizer, TextIteratorStreamer
import torch
import spaces
from threading import Thread
import os
@spaces.GPU
def load_model(model_name):
return pipeline("text-generation", model=model_name, device_map="cuda", torch_dtype=torch.bfloat16, trust_remote_code=True, token=os.environ["token"])
@spaces.GPU()
def generate(
model_name,
user_input,
temperature=0.4,
top_p=0.95,
top_k=50,
max_new_tokens=256,
):
pipe = load_model(model_name)
# Set tokenize correctly. Otherwise ticking the box breaks it.
if model_name == "M4-ai/tau-1.8B":
prompt = user_input
else:
prompt = f"<|im_start|>system\nYou are an AI assistant that follows instruction extremely well. Help as much as you can.<|im_end|>\n<|im_start|>user\n{user_input}<|im_end|>\n<|im_start|>assistant\n"
streamer = TextIteratorStreamer(tokenizer, timeout=240.0, skip_prompt=True, skip_special_tokens=True)
generation_kwargs = dict(text_inputs=prompt, streamer=streamer, max_new_tokens=max_new_tokens, do_sample=True, top_p=top_p, top_k=top_k,
temperature=temperature, num_beams=1, repetition_penalty=repetition_penalty, eos_token_id=pipe.tokenizer.eos_token_id)
t = Thread(target=pipe.__call__, kwargs=generation_kwargs)
t.start()
outputs = []
for chunk in streamer:
outputs.append(chunk)
yield "".join(outputs)
return outputs[0]["generated_text"]
model_choices = ["M4-ai/Hercules-Mini-1.8B", "Locutusque/Hyperion-3.0-Mistral-7B-DPO", "Locutusque/OpenCerebrum-1.5-Mistral-11B-Evolved-beta", "M4-ai/tau-1.8B", "Locutusque/OpenCerebrum-1.5-Mistral-7b-v0.2-alpha", "Locutusque/Hercules-4.0-Mistral-v0.2-7B", "Locutusque/Hercules-3.1-Mistral-7B"]
# What at the best options?
g = gr.Interface(
fn=generate,
inputs=[
gr.components.Dropdown(choices=model_choices, label="Model", value=model_choices[0], interactive=True),
gr.components.Textbox(lines=2, label="Prompt", value="Write me a Python program that calculates the factorial of a given number."),
gr.components.Slider(minimum=0, maximum=1, value=0.4, label="Temperature"),
gr.components.Slider(minimum=0, maximum=1, value=0.95, label="Top p"),
gr.components.Slider(minimum=0, maximum=100, step=1, value=50, label="Top k"),
gr.components.Slider(minimum=1, maximum=2048, step=1, value=1024, label="Max tokens"),
],
outputs=[gr.Textbox(lines=10, label="Output")],
title="Locutusque's Language Models",
description="Try out Locutusque's (or other's) language models here! Credit goes to Mediocreatmybest for this space. You may also find some experimental preview models that have not been made public here.",
concurrency_limit=1
)
g.launch(max_threads=4)