File size: 4,947 Bytes
ca5433e
6ebba65
2682883
 
 
9c95fcd
0256a46
2682883
ca5433e
95de8bd
1015c50
fb44f24
2682883
9c95fcd
6f055da
353f75e
9c95fcd
 
 
1015c50
95de8bd
 
036b359
fb44f24
 
4d0d5e0
fb44f24
508a95c
58cfbdd
ea82e95
95de8bd
58cfbdd
 
 
2682883
58cfbdd
2682883
 
 
 
 
 
95de8bd
2682883
ea82e95
2682883
58cfbdd
2682883
88569ac
353f75e
 
9c95fcd
88569ac
2682883
 
 
58cfbdd
 
 
0f9dc8f
fb44f24
0f9dc8f
58cfbdd
755e1ba
2682883
 
58cfbdd
755e1ba
2682883
95de8bd
0f9dc8f
2682883
755e1ba
 
58cfbdd
755e1ba
2682883
 
f6df3e8
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
import os
import gradio as gr
import sentencepiece
from tokenization_yi import YiTokenizer


os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'max_split_size_mb:120'
model_id = "01-ai/Yi-6B-200K"
tokenizer_path = "./"
eos_token_id = 7

DESCRIPTION = """
# 👋🏻Welcome to 🙋🏻‍♂️Tonic's🧑🏻‍🚀YI-200K🚀
You can use this Space to test out the current model [01-ai/Yi-6B-200k](https://huggingface.co/01-ai/Yi-6B-200k) "🦙Llamified" version based on [01-ai/Yi-34B](https://huggingface.co/01-ai/Yi-34B)
You can also use 🧑🏻‍🚀YI-200K🚀 by cloning this space. 🧬🔬🔍 Simply click here: <a style="display:inline-block" href="https://huggingface.co/spaces/Tonic1/YiTonic?duplicate=true"><img src="https://img.shields.io/badge/-Duplicate%20Space-blue?labelColor=white&style=flat&logo=data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAYAAAAf8/9hAAAAAXNSR0IArs4c6QAAAP5JREFUOE+lk7FqAkEURY+ltunEgFXS2sZGIbXfEPdLlnxJyDdYB62sbbUKpLbVNhyYFzbrrA74YJlh9r079973psed0cvUD4A+4HoCjsA85X0Dfn/RBLBgBDxnQPfAEJgBY+A9gALA4tcbamSzS4xq4FOQAJgCDwV2CPKV8tZAJcAjMMkUe1vX+U+SMhfAJEHasQIWmXNN3abzDwHUrgcRGmYcgKe0bxrblHEB4E/pndMazNpSZGcsZdBlYJcEL9Afo75molJyM2FxmPgmgPqlWNLGfwZGG6UiyEvLzHYDmoPkDDiNm9JR9uboiONcBXrpY1qmgs21x1QwyZcpvxt9NS09PlsPAAAAAElFTkSuQmCC&logoWidth=14" alt="Duplicate Space"></a></h3> 
Join us : 🌟TeamTonic🌟 is always making cool demos! Join our active builder's🛠️community on 👻Discord: [Discord](https://discord.gg/nXx5wbX9) On 🤗Huggingface: [TeamTonic](https://huggingface.co/TeamTonic) & [MultiTransformer](https://huggingface.co/MultiTransformer) On 🌐Github: [Polytonic](https://github.com/tonic-ai) & contribute to 🌟 [PolyGPT](https://github.com/tonic-ai/polygpt-alpha)
"""

tokenizer = AutoTokenizer.from_pretrained(model_id, device_map="auto", trust_remote_code=True)
# tokenizer = YiTokenizer.from_pretrained(tokenizer_path)
model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", torch_dtype=torch.bfloat16, trust_remote_code=True)
tokenizer.eos_token_id = eos_token_id
model.config.eos_token_id = eos_token_id

def format_prompt(user_message, system_message="You are YiTonic, an AI language model created by Tonic-AI. You are a cautious assistant. You carefully follow instructions. You are helpful and harmless and follow ethical guidelines and promote positive behavior."):
    prompt = f"<|im_start|>assistant\n{system_message}<|im_end|>\n<|im_start|>\nuser\n{user_message}<|im_end|>\nassistant\n"
    return prompt

def predict(message, system_message, max_new_tokens=4056, temperature=3.5, top_p=0.9, top_k=40, model_max_length = 32000, do_sample=False):
    formatted_prompt = format_prompt(message, system_message)

    input_ids = tokenizer.encode(formatted_prompt, return_tensors='pt')
    input_ids = input_ids.to(model.device)

    response_ids = model.generate(
        input_ids,
        max_length=max_new_tokens + input_ids.shape[1],
        temperature=temperature,  
        top_p=top_p,              
        top_k=top_k,              
        no_repeat_ngram_size=9,
        pad_token_id=tokenizer.eos_token_id,
        do_sample=do_sample
    )

    response = tokenizer.decode(response_ids[:, input_ids.shape[-1]:][0], skip_special_tokens=True)
    truncate_str = "<|im_end|>"
    if truncate_str and truncate_str in response:
        response = response.split(truncate_str)[0]

    return [("bot", response)]    
with gr.Blocks(theme='ParityError/Anime') as demo:
    gr.Markdown(DESCRIPTION)
    with gr.Group():
        textbox = gr.Textbox(placeholder='Your Message Here', label='Your Message', lines=2)
        system_prompt = gr.Textbox(placeholder='Provide a System Prompt In The First Person', label='System Prompt', lines=2, value="You are YiTonic, an AI language model created by Tonic-AI. You are a cautious assistant. You carefully follow instructions. You are helpful and harmless and you follow ethical guidelines and promote positive behavior.")

    with gr.Group():
        chatbot = gr.Chatbot(label='TonicYi-6B-200K-🧠🤯')

    with gr.Group():
        submit_button = gr.Button('Submit', variant='primary')

    with gr.Accordion(label='Advanced options', open=False):
        max_new_tokens = gr.Slider(label='Max New Tokens', minimum=1, maximum=55000, step=1, value=4056)
        temperature = gr.Slider(label='Temperature', minimum=0.1, maximum=4.0, step=0.1, value=1.2)
        top_p = gr.Slider(label='Top-P (nucleus sampling)', minimum=0.05, maximum=1.0, step=0.05, value=0.9)
        top_k = gr.Slider(label='Top-K', minimum=1, maximum=1000, step=1, value=40)
        do_sample_checkbox = gr.Checkbox(label='Disable for faster inference', value=True)

    submit_button.click(
        fn=predict,
        inputs=[textbox, system_prompt, max_new_tokens, temperature, top_p, top_k, do_sample_checkbox],
        outputs=chatbot
    )

demo.launch()