Spaces:
Running
on
Zero
Running
on
Zero
File size: 2,224 Bytes
3f2900f b87f04a 3f2900f 3d0fb66 2d26215 3f2900f 3d0fb66 b87f04a 9dd7f05 888022c b87f04a 5545870 f80709d b87f04a 63c8ee6 b87f04a 3739aec b87f04a 3f2900f b87f04a 760514e b87f04a ad9f68c 3f2900f b87f04a 80f2b5c 03b4c46 b87f04a 3f2900f c7e5485 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 |
import gradio as gr
from transformers import pipeline, AutoTokenizer
import torch
import spaces
import os
@spaces.GPU
def load_model(model_name):
return pipeline("text-generation", model=model_name, device_map="auto", torch_dtype=torch.bfloat16, trust_remote_code=True, token=os.environ["token"])
@spaces.GPU()
def generate(
model_name,
user_input,
temperature=0.4,
top_p=0.95,
top_k=50,
max_new_tokens=256,
):
pipe = load_model(model_name)
# Set tokenize correctly. Otherwise ticking the box breaks it.
if model_name == "M4-ai/tau-1.8B":
prompt = user_input
else:
prompt = f"<|im_start|>user\n{user_input}<|im_end|>\n<|im_start|>assistant\n"
outputs = pipe(prompt, max_new_tokens=max_new_tokens, do_sample=True,
temperature=temperature, top_k=top_k, top_p=top_p)
return outputs[0]["generated_text"]
model_choices = ["Locutusque/OpenCerebrum-1.0-7B-beta", "M4-ai/NeuralReyna-Mini-1.8B-v0.2", "Locutusque/Hyperion-3.0-Mistral-7B-DPO", "Locutusque/Hyperion-3.0-Mistral-7B-alpha", "M4-ai/tau-1.8B", "Locutusque/Hercules-4.0-Mistral-v0.2-7B", "Locutusque/Hercules-2.5-Mistral-7B", "M4-ai/tau-0.5B"]
# What at the best options?
g = gr.Interface(
fn=generate,
inputs=[
gr.components.Dropdown(choices=model_choices, label="Model", value=model_choices[0], interactive=True),
gr.components.Textbox(lines=2, label="Prompt", value="Write me a Python program that calculates the factorial of a given number."),
gr.components.Slider(minimum=0, maximum=1, value=0.4, label="Temperature"),
gr.components.Slider(minimum=0, maximum=1, value=0.95, label="Top p"),
gr.components.Slider(minimum=0, maximum=100, step=1, value=50, label="Top k"),
gr.components.Slider(minimum=1, maximum=2048, step=1, value=1024, label="Max tokens"),
],
outputs=[gr.Textbox(lines=10, label="Output")],
title="Locutusque's Language Models",
description="Try out Locutusque's (or other's) language models here! Credit goes to Mediocreatmybest for this space. You may also find some experimental preview models that have not been made public here.",
concurrency_limit=1
)
g.launch(max_threads=4)
|