Falcon-40B-Instruct-Chatbot

import gradio as gr
import spaces
from typing import List, Tuple
from transformers import pipeline

# Initialize Model
# pipe = pipeline("text-generation", model="tiiuae/falcon-40b-instruct", trust_remote_code=True)

def format_chat_prompt(message: str, chat_history: List[Tuple[str, str]], instruction: str) -> str:
    """
    Formats a chat prompt for a conversational AI model by incorporating the system instruction, 
    previous chat history, and the new user message.

    Args:
        message (str): The latest user message to be included in the prompt.
        chat_history (List[Tuple[str, str]]): A list of tuples where each tuple represents 
            a previous conversation turn, with the user's message and the assistant's response.
        instruction (str): A system-level instruction that guides the assistant's behavior 
            (e.g., helpful, formal, or humorous).

    Returns:
        str: The formatted chat prompt that includes the instruction, chat history, and new user message.
    """
    prompt = f"System: {instruction}"
    for turn in chat_history:
        user_message, bot_message = turn
        prompt = f"{prompt}\nUser: {user_message}\nAssistant: {bot_message}"
    prompt = f"{prompt}\nUser: {message}\nAssistant:"
    return prompt

def respond(message, chat_history, instruction, temperature=0.7):
    formatted_prompt = format_chat_prompt(message, chat_history, instruction)
    chat_history = chat_history + [[message, ""]]
    # stream = client.generate_stream(prompt,
    #                                   max_new_tokens=1024,
    #                                   stop_sequences=["\nUser:", "<|endoftext|>"],
    #                                   temperature=temperature)
                                      #stop_sequences to not generate the user answer
    acc_text = ""
    #Streaming the tokens
    for idx, response in enumerate(stream):
            text_token = response.token.text

            if response.details:
                return

            if idx == 0 and text_token.startswith(" "):
                text_token = text_token[1:]

            acc_text += text_token
            last_turn = list(chat_history.pop(-1))
            last_turn[-1] += acc_text
            chat_history = chat_history + [last_turn]
            yield "", chat_history
            acc_text = ""
        
####### GRADIO APP #######
title = """<h1 id="title"> Falcon-40B-Instruct-Chatbot </h1>"""

description = """
- The model used as the AI Assistant [Falcon-40B-Instruct-Chat](https://huggingface.co/tiiuae/falcon-40b-instruct).
"""

css = '''
h1#title {
  text-align: center;
}
'''

theme = gr.themes.Soft()
demo = gr.Blocks(css=css, theme=theme)

with demo:
    chatbot = gr.Chatbot(height=240) 
    msg = gr.Textbox(label="Prompt")
    
    with gr.Accordion(label="Advanced options",open=False):
        system = gr.Textbox(label="System message", lines=2, value="A conversation between a user and an LLM-based AI assistant. The assistant gives helpful and honest answers.")
        temperature = gr.Slider(label="temperature", minimum=0.1, maximum=1, value=0.7, step=0.1)
    
    btn = gr.Button("Submit")
    clear = gr.ClearButton(components=[msg, chatbot], value="Clear console")

    btn.click(respond, inputs=[msg, chatbot, system], outputs=[msg, chatbot])
    msg.submit(respond, inputs=[msg, chatbot, system], outputs=[msg, chatbot])

demo.launch()