File size: 3,850 Bytes
ca5433e 6ebba65 2682883 9c95fcd 0256a46 2682883 ca5433e 51c40d0 1015c50 2682883 9c95fcd 9441e83 9c95fcd 1015c50 4d0d5e0 ea82e95 755e1ba 2682883 ea82e95 2682883 d53c674 9c95fcd 2682883 755e1ba 51c40d0 2682883 ea82e95 755e1ba 2682883 755e1ba 3e08e8e 2682883 755e1ba ea82e95 755e1ba 2682883 f6df3e8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 |
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
import os
import gradio as gr
import sentencepiece
from tokenization_yi import YiTokenizer
os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'max_split_size_mb:120'
model_id = "larryvrh/Yi-6B-200K-Llamafied"
tokenizer_path = "./"
DESCRIPTION = """
# 👋🏻Welcome to 🙋🏻♂️Tonic's🧑🏻🚀YI-200K🚀"
You can use this Space to test out the current model [larryvrh/Yi-6B-200K-Llamafied](https://huggingface.co/larryvrh/Yi-6B-200K-Llamafied) a "Llamified" version of [01-ai/Yi-6B-200k](https://huggingface.co/01-ai/Yi-6B-200k) based on [01-ai/Yi-34B](https://huggingface.co/01-ai/Yi-34B)
You can also use 🧑🏻🚀YI-200K🚀 by cloning this space. 🧬🔬🔍 Simply click here: <a style="display:inline-block" href="https://huggingface.co/spaces/Tonic1/YiTonic?duplicate=true"><img src="https://img.shields.io/badge/-Duplicate%20Space-blue?labelColor=white&style=flat&logo=&logoWidth=14" alt="Duplicate Space"></a></h3>
Join us : 🌟TeamTonic🌟 is always making cool demos! Join our active builder's🛠️community on 👻Discord: [Discord](https://discord.gg/nXx5wbX9) On 🤗Huggingface: [TeamTonic](https://huggingface.co/TeamTonic) & [MultiTransformer](https://huggingface.co/MultiTransformer) On 🌐Github: [Polytonic](https://github.com/tonic-ai) & contribute to 🌟 [PolyGPT](https://github.com/tonic-ai/polygpt-alpha)
"""
tokenizer = AutoModelForCausalLM.from_pretrained(tokenizer_path)
tokenizer = YiTokenizer.from_pretrained(tokenizer_path)
model = AutoModelForCausalLM.from_pretrained(model_id=model_id, device_map="auto", torch_dtype="auto", trust_remote_code=True)
def predict(message, max_new_tokens=4056, temperature=3.5, top_p=0.9, top_k=800, do_sample=False):
prompt = message.strip()
input_ids = tokenizer.encode(prompt, return_tensors='pt')
input_ids = input_ids.to(model.device)
response_ids = model.generate(
input_ids,
max_length=max_new_tokens + input_ids.shape[1],
temperature=temperature,
top_p=top_p,
top_k=top_k,
pad_token_id=tokenizer.eos_token_id,
do_sample=do_sample
)
response = tokenizer.decode(response_ids[:, input_ids.shape[-1]:][0], skip_special_tokens=True)
return [("bot", response)]
with gr.Blocks(theme='ParityError/Anime') as demo:
gr.Markdown(DESCRIPTION)
with gr.Group():
textbox = gr.Textbox(placeholder='Enter your message here', label='Your Message', lines=2)
submit_button = gr.Button('Submit', variant='primary')
chatbot = gr.Chatbot(label='TonicYi-6B-200K')
with gr.Accordion(label='Advanced options', open=False):
max_new_tokens = gr.Slider(label='Max New Tokens', minimum=1, maximum=55000, step=1, value=8000)
temperature = gr.Slider(label='Temperature', minimum=0.1, maximum=4.0, step=0.1, value=1.2)
top_p = gr.Slider(label='Top-P (nucleus sampling)', minimum=0.05, maximum=1.0, step=0.05, value=0.9)
top_k = gr.Slider(label='Top-K', minimum=1, maximum=1000, step=1, value=900)
do_sample_checkbox = gr.Checkbox(label='Disable for faster inference', value=False )
submit_button.click(
fn=predict,
inputs=[textbox, max_new_tokens, temperature, top_p, top_k, do_sample_checkbox],
outputs=chatbot
)
demo.launch() |