|
|
|
import gradio as gr |
|
from transformers import AutoModelForCausalLM, AutoTokenizer |
|
import torch |
|
|
|
model_name = "zirui3/gpt_1.4B_oa_instruct" |
|
tokenizer = AutoTokenizer.from_pretrained(model_name) |
|
|
|
chip_map= { |
|
'gpt_neox.embed_in': 0, |
|
'gpt_neox.layers': 0, |
|
'gpt_neox.final_layer_norm': 0, |
|
'embed_out': 0 |
|
} |
|
model = AutoModelForCausalLM.from_pretrained(name, device_map=chip_map, torch_dtype=torch.float16, load_in_8bit=True) |
|
|
|
|
|
|
|
def predict(input, history=[], MAX_NEW_TOKENS = 500): |
|
text = "User: " + input + "\n\nChip: " |
|
new_user_input_ids = tokenizer(text, return_tensors="pt").input_ids |
|
|
|
bot_input_ids = torch.cat([torch.LongTensor(history), new_user_input_ids], dim=-1) |
|
|
|
generated_ids = model.generate(bot_input_ids, |
|
max_length=MAX_NEW_TOKENS, pad_token_id=tokenizer.eos_token_id, |
|
do_sample=True, |
|
top_p=0.95, temperature=0.5, penalty_alpha=0.6, top_k=4, repetition_penalty=1.03, |
|
num_return_sequences=1) |
|
|
|
response = tokenizer.decode(generated_ids[0], skip_special_tokens=True) |
|
history = generated_ids.tolist() |
|
|
|
|
|
response = response.split("\n\n") |
|
response_pairs = [(response[i], response[i+1]) for i in range(0, len(response)-1, 2)] |
|
return response_pairs, history |
|
|
|
|
|
with gr.Blocks() as demo: |
|
chatbot = gr.Chatbot() |
|
state = gr.State([]) |
|
|
|
with gr.Row(): |
|
txt = gr.Textbox(show_label=False, placeholder="Enter text and press enter").style(container=False) |
|
|
|
txt.submit(predict, [txt, state], [chatbot, state]) |
|
|
|
|
|
if __name__ == "__main__": |
|
|
|
demo.launch() |
|
|