File size: 4,761 Bytes
2bc99a0 927b5de 2bc99a0 b723b47 a3c3064 fc09aa4 af27f87 63a0917 3d2716e 2bc99a0 5628f77 9b2a1cf a3c3064 5628f77 31689b5 2bc99a0 9bc49ef 0d5c130 9bc49ef 31689b5 0d5c130 63a0917 b9faabf 9bc49ef 2bc99a0 aa9a886 a3c3064 2bc99a0 aa9a886 2bc99a0 b9faabf 5ab0bbc a3c3064 2bc99a0 a3c3064 b9faabf 5628f77 b9faabf 34c221f b9faabf 5ab0bbc b9faabf 5628f77 8de5029 c23b550 97a4588 c5ff21b c23b550 97a4588 927b5de 97a4588 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 |
import os
import math
import transformers
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
import gradio as gr
import sentencepiece
from tokenization_xgen import XgenTokenizer
title = "Welcome to 🙋🏻♂️Tonic's🌷Tulu Chat!"
description = "[allenai/tulu-2-dpo-7b](https://huggingface.co/allenai/tulu-2-dpo-7b) and larger Tulu-2 models are Instruct Llama Finetunes using the [mistralai/Mistral-7B](https://huggingface.co/mistralai/Mistral-7B-v0.1) recipe. You can use [allenai/tulu-2-13b](https://huggingface.co/allenai/tulu-2-13b) here via API using Gradio by scrolling down and clicking Use 'Via API' or privately by [cloning this space on huggingface](https://huggingface.co/spaces/Tonic1/TuluDemo?duplicate=true) See also the large model here : [allenai/tulu-2-dpo-70b](https://huggingface.co/allenai/tulu-2-dpo-70b) . [Join my active builders' server on discord](https://discord.gg/VqTxc76K3u). Let's build together!."
os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'max_split_size_mb:50'
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model_name = "allenai/tulu-2-dpo-13b"
tokenizer = AutoTokenizer.from_pretrained("allenai/tulu-2-dpo-13b")
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16, device_map="auto")
class TuluChatBot:
def __init__(self, model, tokenizer, system_message="You are 🌷Tulu, an AI language model created by Tonic-AI. You are a cautious assistant. You carefully follow instructions. You are helpful and harmless and you follow ethical guidelines and promote positive behavior."):
self.model = model
self.tokenizer = tokenizer
self.system_message = system_message
def set_system_message(self, new_system_message):
self.system_message = new_system_message
def format_prompt(self, user_message):
prompt = f"<|assistant|>\n {self.system_message}\n\n <|user|>{user_message}\n\n<|assistant|>\n"
return prompt
def predict(self, user_message, temperature, max_new_tokens, top_p, repetition_penalty, do_sample):
prompt = self.format_prompt(user_message)
inputs = self.tokenizer(prompt, return_tensors='pt', add_special_tokens=False)
input_ids = inputs["input_ids"].to(self.model.device)
attention_mask = inputs["attention_mask"].to(self.model.device)
output_ids = self.model.generate(
input_ids,
attention_mask=attention_mask,
max_length=input_ids.shape[1] + max_new_tokens,
temperature=temperature,
top_p=top_p,
repetition_penalty=repetition_penalty,
do_sample=do_sample
)
response = self.tokenizer.decode(output_ids[0], skip_special_tokens=True)
return response
def gradio_predict(user_message, system_message, max_new_tokens, temperature, top_p, repetition_penalty, do_sample):
Tulu_bot.set_system_message(system_message)
if not do_sample:
max_length = 1269
temperature = 1.2 # Default value
top_p = 0.9 # Default value
repetition_penalty = 0.9 # Default value
response = Tulu_bot.predict(user_message, temperature, max_new_tokens, top_p, repetition_penalty, do_sample)
return response
Tulu_bot = TuluChatBot(model, tokenizer)
with gr.Blocks() as demo:
with gr.Row():
user_message = gr.Textbox(label="Your Message", lines=3)
system_message = gr.Textbox(label="Introduce a Character Here or Set a Scene (system prompt)", lines=2)
with gr.Row():
do_sample = gr.Checkbox(label="Advanced", value=False)
# Advanced settings in an Accordion
with gr.Accordion("Advanced Settings", open=False).style(visible=lambda do_sample: do_sample):
with gr.Row():
max_new_tokens = gr.Slider(label="Max new tokens", value=1269, minimum=550, maximum=3200, step=1)
temperature = gr.Slider(label="Temperature", value=1.2, minimum=0.05, maximum=4.0, step=0.05)
top_p = gr.Slider(label="Top-p (nucleus sampling)", value=0.90, minimum=0.01, maximum=0.99, step=0.05)
repetition_penalty = gr.Slider(label="Repetition penalty", value=1.9, minimum=1.0, maximum=2.0, step=0.05)
submit_button = gr.Button("Submit")
output_text = gr.Textbox()
def process(user_message, system_message, max_new_tokens, temperature, top_p, repetition_penalty, do_sample):
return gradio_predict(user_message, system_message, max_new_tokens, temperature, top_p, repetition_penalty, do_sample)
submit_button.click(
process,
inputs=[user_message, system_message, max_new_tokens, temperature, top_p, repetition_penalty, do_sample],
outputs=output_text
)
demo.launch() |