import gradio as gr import spaces from typing import List, Tuple from transformers import pipeline # Initialize Model # pipe = pipeline("text-generation", model="tiiuae/falcon-40b-instruct", trust_remote_code=True) def format_chat_prompt(message: str, chat_history: List[Tuple[str, str]], instruction: str) -> str: """ Formats a chat prompt for a conversational AI model by incorporating the system instruction, previous chat history, and the new user message. Args: message (str): The latest user message to be included in the prompt. chat_history (List[Tuple[str, str]]): A list of tuples where each tuple represents a previous conversation turn, with the user's message and the assistant's response. instruction (str): A system-level instruction that guides the assistant's behavior (e.g., helpful, formal, or humorous). Returns: str: The formatted chat prompt that includes the instruction, chat history, and new user message. """ prompt = f"System: {instruction}" for turn in chat_history: user_message, bot_message = turn prompt = f"{prompt}\nUser: {user_message}\nAssistant: {bot_message}" prompt = f"{prompt}\nUser: {message}\nAssistant:" return prompt def respond(message, chat_history, instruction, temperature=0.7): formatted_prompt = format_chat_prompt(message, chat_history, instruction) chat_history = chat_history + [[message, ""]] # stream = client.generate_stream(prompt, # max_new_tokens=1024, # stop_sequences=["\nUser:", "<|endoftext|>"], # temperature=temperature) #stop_sequences to not generate the user answer acc_text = "" #Streaming the tokens for idx, response in enumerate(stream): text_token = response.token.text if response.details: return if idx == 0 and text_token.startswith(" "): text_token = text_token[1:] acc_text += text_token last_turn = list(chat_history.pop(-1)) last_turn[-1] += acc_text chat_history = chat_history + [last_turn] yield "", chat_history acc_text = "" ####### GRADIO APP ####### title = """