File size: 3,850 Bytes
ca5433e
6ebba65
2682883
 
 
9c95fcd
0256a46
2682883
ca5433e
51c40d0
1015c50
2682883
9c95fcd
 
9441e83
9c95fcd
 
 
1015c50
 
 
 
4d0d5e0
ea82e95
 
755e1ba
2682883
 
 
 
 
 
 
 
 
ea82e95
2682883
 
d53c674
 
9c95fcd
2682883
 
 
755e1ba
 
51c40d0
2682883
 
ea82e95
755e1ba
2682883
755e1ba
3e08e8e
2682883
755e1ba
 
ea82e95
755e1ba
2682883
 
f6df3e8
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
import os
import gradio as gr
import sentencepiece
from tokenization_yi import YiTokenizer


os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'max_split_size_mb:120'
model_id = "larryvrh/Yi-6B-200K-Llamafied"
tokenizer_path = "./"

DESCRIPTION = """
# 👋🏻Welcome to 🙋🏻‍♂️Tonic's🧑🏻‍🚀YI-200K🚀"
You can use this Space to test out the current model [larryvrh/Yi-6B-200K-Llamafied](https://huggingface.co/larryvrh/Yi-6B-200K-Llamafied) a "Llamified" version of [01-ai/Yi-6B-200k](https://huggingface.co/01-ai/Yi-6B-200k) based on [01-ai/Yi-34B](https://huggingface.co/01-ai/Yi-34B)
You can also use 🧑🏻‍🚀YI-200K🚀 by cloning this space. 🧬🔬🔍 Simply click here: <a style="display:inline-block" href="https://huggingface.co/spaces/Tonic1/YiTonic?duplicate=true"><img src="https://img.shields.io/badge/-Duplicate%20Space-blue?labelColor=white&style=flat&logo=&logoWidth=14" alt="Duplicate Space"></a></h3> 
Join us : 🌟TeamTonic🌟 is always making cool demos! Join our active builder's🛠️community on 👻Discord: [Discord](https://discord.gg/nXx5wbX9) On 🤗Huggingface: [TeamTonic](https://huggingface.co/TeamTonic) & [MultiTransformer](https://huggingface.co/MultiTransformer) On 🌐Github: [Polytonic](https://github.com/tonic-ai) & contribute to 🌟 [PolyGPT](https://github.com/tonic-ai/polygpt-alpha)
"""

tokenizer = AutoModelForCausalLM.from_pretrained(tokenizer_path)
tokenizer = YiTokenizer.from_pretrained(tokenizer_path)
model = AutoModelForCausalLM.from_pretrained(model_id=model_id, device_map="auto", torch_dtype="auto", trust_remote_code=True)

def predict(message, max_new_tokens=4056, temperature=3.5, top_p=0.9, top_k=800, do_sample=False):

    prompt = message.strip()
    input_ids = tokenizer.encode(prompt, return_tensors='pt')
    input_ids = input_ids.to(model.device)
    response_ids = model.generate(
        input_ids,
        max_length=max_new_tokens + input_ids.shape[1],
        temperature=temperature,  
        top_p=top_p,              
        top_k=top_k,              
        pad_token_id=tokenizer.eos_token_id,
        do_sample=do_sample
    )
    response = tokenizer.decode(response_ids[:, input_ids.shape[-1]:][0], skip_special_tokens=True)
    return [("bot", response)]
    

with gr.Blocks(theme='ParityError/Anime') as demo:
    gr.Markdown(DESCRIPTION)
    with gr.Group():
        textbox = gr.Textbox(placeholder='Enter your message here', label='Your Message', lines=2)
        submit_button = gr.Button('Submit', variant='primary')
        chatbot = gr.Chatbot(label='TonicYi-6B-200K')

    with gr.Accordion(label='Advanced options', open=False):
        max_new_tokens = gr.Slider(label='Max New Tokens', minimum=1, maximum=55000, step=1, value=8000)
        temperature = gr.Slider(label='Temperature', minimum=0.1, maximum=4.0, step=0.1, value=1.2)
        top_p = gr.Slider(label='Top-P (nucleus sampling)', minimum=0.05, maximum=1.0, step=0.05, value=0.9)
        top_k = gr.Slider(label='Top-K', minimum=1, maximum=1000, step=1, value=900)
        do_sample_checkbox = gr.Checkbox(label='Disable for faster inference', value=False )

    submit_button.click(
        fn=predict,
        inputs=[textbox, max_new_tokens, temperature, top_p, top_k, do_sample_checkbox],
        outputs=chatbot
    )

demo.launch()