Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from huggingface_hub import InferenceClient | |
| # Import the 'gr.OAuthToken' type for Gradio to handle the OAuth token automatically. | |
| # It is a best practice to define the model ID separately for clarity. | |
| MODEL_ID = "E5K7/eshalskoibito" | |
| def respond( | |
| message, | |
| history: list[dict[str, str]], | |
| system_message: str, | |
| max_tokens: int, | |
| temperature: float, | |
| top_p: float, | |
| hf_token: gr.OAuthToken, | |
| ): | |
| """ | |
| Handles the chatbot's response by sending a request to the Hugging Face Inference API. | |
| Args: | |
| message (str): The user's message. | |
| history (list): The list of previous conversation turns. | |
| system_message (str): The system message for the model. | |
| max_tokens (int): The maximum number of new tokens to generate. | |
| temperature (float): The sampling temperature. | |
| top_p (float): The top-p value for nucleus sampling. | |
| hf_token (gr.OAuthToken): The Hugging Face OAuth token for authentication. | |
| Yields: | |
| str: The generated response, streamed token by token. | |
| """ | |
| # Ensure the Hugging Face token is available before proceeding. | |
| if hf_token is None: | |
| raise gr.Error("You must log in to use the chatbot!") | |
| # Initialize the InferenceClient with the provided token. | |
| client = InferenceClient(token=hf_token.token, model=MODEL_ID) | |
| messages = [{"role": "system", "content": system_message}] | |
| # Format the chat history for the client, which expects a list of dictionaries. | |
| messages.extend( | |
| [ | |
| {"role": turn["role"], "content": turn["content"]} | |
| for turn in history | |
| ] | |
| ) | |
| messages.append({"role": "user", "content": message}) | |
| response = "" | |
| for token in client.chat_completion( | |
| messages=messages, | |
| max_tokens=max_tokens, | |
| stream=True, | |
| temperature=temperature, | |
| top_p=top_p, | |
| ): | |
| if token.choices and token.choices[0].delta.content: | |
| response += token.choices[0].delta.content | |
| yield response | |
| # Create the ChatInterface with updated parameters for better user experience. | |
| chatbot = gr.ChatInterface( | |
| respond, | |
| type="messages", | |
| # Add a title and description for better context. | |
| title="Eshalskoibito Chatbot", | |
| description=f"Interact with the model: **{MODEL_ID}**", | |
| additional_inputs=[ | |
| gr.Textbox( | |
| value="You are a friendly Chatbot.", | |
| label="System message", | |
| info="Define the persona and behavior of the chatbot.", | |
| ), | |
| gr.Slider( | |
| minimum=1, | |
| maximum=2048, | |
| value=512, | |
| step=1, | |
| label="Max new tokens", | |
| info="The maximum number of tokens to generate in the response.", | |
| ), | |
| gr.Slider( | |
| minimum=0.1, | |
| maximum=4.0, | |
| value=0.7, | |
| step=0.1, | |
| label="Temperature", | |
| info="Controls the randomness of the output. Higher values lead to more creative responses.", | |
| ), | |
| gr.Slider( | |
| minimum=0.1, | |
| maximum=1.0, | |
| value=0.95, | |
| step=0.05, | |
| label="Top-p (nucleus sampling)", | |
| info="Filters out low-probability tokens. Lower values make the response more focused.", | |
| ), | |
| ], | |
| # Add a parameter to save chat history locally in the user's browser. | |
| # This prevents conversation mixing between multiple users. | |
| save_history=True, | |
| ) | |
| with gr.Blocks() as demo: | |
| # Use gr.LoginButton() and pass the oauth token to the chatbot function. | |
| with gr.Sidebar(): | |
| gr.LoginButton() | |
| chatbot.render() | |
| if __name__ == "__main__": | |
| demo.launch() | |