# -*- coding: utf-8 -*- """llama3-chatbot.ipynb Automatically generated by Colab. Original file is located at https://colab.research.google.com/drive/135s6JfFHtKhcOcp7xB3b6v6FOYLenQEM """ import transformers import torch model_id = "unsloth/llama-3-8b-Instruct-bnb-4bit" pipeline = transformers.pipeline( "text-generation", model=model_id, model_kwargs={ "torch_dtype": torch.float16, "quantization_config": {"load_in_4bit": True}, "low_cpu_mem_usage": True, }, ) messages = [ {"role": "system", "content": "You are a helpful assistant!"}, {"role": "user", "content": """Hey how are you doing today?"""}, ] prompt = pipeline.tokenizer.apply_chat_template( messages, tokenize=False, add_generation_prompt=True ) terminators = [ pipeline.tokenizer.eos_token_id, pipeline.tokenizer.convert_tokens_to_ids("<|eot_id|>") ] outputs = pipeline( prompt, max_new_tokens=256, eos_token_id=terminators, do_sample=True, temperature=0.6, top_p=0.9, ) print(outputs[0]["generated_text"][len(prompt):]) import gradio as gr messages = [] def add_text(history, text): global messages history = history + [(text,'')] messages = messages + [{"role":'user', 'content': text}] return history, text def generate(history): global messages prompt = pipeline.tokenizer.apply_chat_template( messages, tokenize=False, add_generation_prompt=True ) terminators = [ pipeline.tokenizer.eos_token_id, pipeline.tokenizer.convert_tokens_to_ids("<|eot_id|>") ] outputs = pipeline( prompt, max_new_tokens=256, eos_token_id=terminators, do_sample=True, temperature=0.6, top_p=0.9, ) response_msg = outputs[0]["generated_text"][len(prompt):] for char in response_msg: history[-1][1] += char yield history pass with gr.Blocks() as demo: chatbot = gr.Chatbot(value=[], elem_id="chatbot") with gr.Row(): txt = gr.Textbox( show_label=False, placeholder="Enter text and press enter", ) txt.submit(add_text, [chatbot, txt], [chatbot, txt], queue=False).then( generate, inputs =[chatbot,],outputs = chatbot,) demo.queue() demo.launch(debug=True)