Spaces:

mkthoma
/

Phi2_Chatbot

Runtime error

File size: 3,000 Bytes

91be14d
 
 
 
 
281c8a9
953d7e3
91be14d
7c2adcf
91be14d
7c2adcf
 
 
 
 
91be14d
7c2adcf
 
 
 
 
 
6ea445d
 
7c2adcf
6ea445d
 
 
7c2adcf
 
 
6ea445d
 
7c2adcf
91be14d
 
475af39
91be14d
7c2adcf
 
 
 
 
 
 
91be14d
7c2adcf
 
91be14d
 
7c2adcf
 
 
 
 
 
91be14d

import gradio as gr
from transformers import pipeline
from transformers import AutoModelForCausalLM, AutoTokenizer

model_path = "finetuned_phi2"
model = AutoModelForCausalLM.from_pretrained(model_path, low_cpu_mem_usage=True, trust_remote_code=True)
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)

num_new_tokens = 200  # change to the number of new tokens you want to generate

DESCRIPTION = """\
# 🧑🏽‍💻Microsoft Phi2 Chatbot🤖
This Space demonstrates model [Microsoft Phi2 2.7B](https://huggingface.co/microsoft/phi-2), a model with 2.78B parameters fine-tuned for chat instructions. Feel free to play with it, or duplicate to run generations without a queue! If you want to run your own service, you can also [deploy the model on Inference Endpoints](https://huggingface.co/inference-endpoints).
🔎 For more details about the finetuning, take a look at the [GitHub](https://github.com/mkthoma/llm_finetuning) code.
"""

LICENSE = """
As a derivate work of [Microsoft Phi2 2.7B](https://huggingface.co/microsoft/phi-2), this demo is governed by the original [license](https://huggingface.co/microsoft/phi-2/resolve/main/LICENSE).
"""

def generate(question, context, max_new_tokens = 200, temperature = 0.6):
  
    system_message = "You are a question answering chatbot. Provide a clear and detailed explanation"
    prompt = f"[INST] <<SYS>>\n{system_message}\n<</SYS>>\n\n {question} [/INST]" # replace the command here with something relevant to your task

    # Count the number of tokens in the prompt
    num_prompt_tokens = len(tokenizer(prompt)['input_ids'])
    # Calculate the maximum length for the generation
    max_length = num_prompt_tokens + max_new_tokens

    gen = pipeline('text-generation', model=model, tokenizer=tokenizer, max_length=max_length, temperature=temperature)
    result = gen(prompt)
    return (result[0]['generated_text'].replace(prompt, ''))


bbchatbot = gr.Chatbot(
    avatar_images=["logo/user logo.png", "logo/bot logo.png"], bubble_full_width=False, show_label=False, show_copy_button=True, likeable=True,)

examples = [["What is a large language model?"], ["How to calm down a person?"], ["What is aritificial intelligence?"], ["How to write a good resume?"]]

additional_inputs =  additional_inputs=[gr.Slider(label="Max new tokens",minimum=100,maximum=2048,step=50,value=num_new_tokens),
                                        gr.Slider(label="Temperature",minimum=0.1,maximum=4.0,step=0.1,value=0.6)]

chat_interface  = gr.ChatInterface(fn=generate,
                        additional_inputs=additional_inputs,
                        chatbot=bbchatbot,
                        title="",
                        examples=examples
                       )

with gr.Blocks(css="style.css") as demo:
    gr.Markdown(DESCRIPTION)
    gr.DuplicateButton(value="Duplicate for private use", elem_id="duplicate-button")
    chat_interface.render()
    gr.Markdown(LICENSE)

demo.queue().launch(show_api=False)