Spaces:
Running
Running
# Import the Gradio library for creating the web interface | |
import gradio as gr | |
# Import the InferenceClient from huggingface_hub to interact with the language model | |
from huggingface_hub import InferenceClient | |
# --- Configuration Constants --- | |
# Define the maximum number of tokens the model should generate in a single response | |
FIXED_MAX_TOKENS = 99999 # Note: This is a very high value, typical values are much lower (e.g., 512, 1024, 2048, 4096 for many models) | |
# --- Initialize the InferenceClient --- | |
# For custom OpenAI-compatible APIs, initialize the InferenceClient with the base URL. | |
# The specific model will be specified in the API call (e.g., chat_completion). | |
API_BASE_URL = "https://gtjr14qdt3vjwgdj.us-east-1.aws.endpoints.huggingface.cloud" # Base URL for the custom API | |
try: | |
# Initialize the client with the base URL of your API. | |
# If your API requires an authentication token, you might need to pass it here, | |
# e.g., client = InferenceClient(base_url=API_BASE_URL, token="YOUR_API_TOKEN") | |
# or ensure it's set as an environment variable if the client/API supports that. | |
client = InferenceClient(base_url=API_BASE_URL) | |
print(f"InferenceClient initialized with base_url: {API_BASE_URL}") | |
except Exception as e: | |
print(f"Error initializing InferenceClient with base_url '{API_BASE_URL}': {e}") | |
# Handle the error appropriately, e.g., by exiting or using a fallback | |
raise RuntimeError( | |
"Could not initialize InferenceClient. " | |
f"Please check the API base URL ('{API_BASE_URL}') and ensure the server is accessible. " | |
f"Error: {e}" | |
) | |
# --- Core Chatbot Logic --- | |
def respond(message, history): | |
""" | |
This function processes the user's message and the chat history to generate a response | |
from the language model using the custom API. | |
Args: | |
message (str): The latest message from the user. | |
history (list of lists): A list where each inner list contains a pair of | |
[user_message, ai_message]. | |
Yields: | |
str: The generated response token by token (for streaming). | |
""" | |
# Initialize the messages list | |
messages = [] | |
# Append past interactions from the history to the messages list | |
# This provides context to the language model | |
for user_message, ai_message in history: | |
if user_message: # Ensure there's a user message | |
messages.append({"role": "user", "content": user_message}) | |
if ai_message: # Ensure there's an AI message | |
messages.append({"role": "assistant", "content": ai_message}) | |
# Append the current user's message to the messages list | |
messages.append({"role": "user", "content": message}) | |
# Initialize an empty string to accumulate the response | |
response_text = "" | |
try: | |
# Make a streaming call to the language model's chat completions endpoint. | |
# The `model` parameter specifies which model to use at the endpoint. | |
stream = client.chat_completion( | |
messages=messages, # The conversation history and current message | |
max_tokens=FIXED_MAX_TOKENS, # Maximum tokens for the response | |
stream=True, # Enable streaming for token-by-token output | |
) | |
for chunk in stream: | |
# Check if the chunk contains content and the content is not None | |
# The exact structure of the chunk can vary based on the model/endpoint | |
if chunk.choices and chunk.choices[0].delta and chunk.choices[0].delta.content is not None: | |
token = chunk.choices[0].delta.content # Extract the token from the chunk | |
response_text += token # Append the token to the response string | |
yield response_text # Yield the accumulated response so far (for streaming UI update) | |
except Exception as e: | |
# If any error occurs during the API call, yield an error message | |
error_message = f"An error occurred during model inference: {e}" | |
print(error_message) # Also print to console for debugging | |
yield error_message | |
# --- Gradio Interface Definition --- | |
# URL for the header image | |
header_image_path = "https://cdn-uploads.huggingface.co/production/uploads/6540a02d1389943fef4d2640/j61iZTDaK9g0UW3aWGwWi.gif" | |
# Ko-fi widget script | |
kofi_script = """ | |
<script src='https://storage.ko-fi.com/cdn/scripts/overlay-widget.js'></script> | |
<script> | |
kofiWidgetOverlay.draw('sonnydesorbo', { | |
'type': 'floating-chat', | |
'floating-chat.donateButton.text': 'Support me', | |
'floating-chat.donateButton.background-color': '#00b9fe', | |
'floating-chat.donateButton.text-color': '#fff' | |
}); | |
</script> | |
""" | |
# Create a Gradio Blocks layout for more control over the interface | |
# theme=gr.themes.Soft() applies a soft visual theme | |
# Add the kofi_script to the head of the HTML page | |
with gr.Blocks(theme=gr.themes.Soft(), head=kofi_script) as demo: | |
# Display an image at the top of the chatbot interface | |
gr.Image( | |
value=header_image_path, # Source of the image | |
label="Chatbot Header", # Alt text or label (not shown due to show_label=False) | |
show_label=False, # Hide the label text | |
interactive=False, # Make the image non-interactive | |
height=100, # Set the height of the image | |
elem_id="chatbot-logo" # Assign an HTML ID for potential CSS styling | |
) | |
# Create the chat interface component | |
gr.ChatInterface( | |
fn=respond, # The function to call when a message is sent | |
chatbot=gr.Chatbot( # Configure the chatbot display area | |
height=500 # Set the height of the chat history display | |
), | |
# Additional parameters for ChatInterface can be added here, e.g.: | |
# title="Xortron7 Chat", | |
# description="Chat with Xortron7, your AI assistant.", | |
# examples=[["Hello!", None], ["What is Gradio?", None]], | |
# retry_btn=None, # Removes the retry button | |
# undo_btn="Delete Previous", # Customizes the undo button | |
# clear_btn="Clear Chat", # Customizes the clear button | |
) | |
# --- Application Entry Point --- | |
if __name__ == "__main__": | |
# Launch the Gradio web server | |
# show_api=False disables the API documentation page | |
# share=False prevents creating a public Gradio link (for local development) | |
try: | |
demo.launch(show_api=False, share=False) | |
except NameError as ne: | |
# This might happen if 'client' was not defined due to an error during initialization | |
print(f"Gradio demo could not be launched. 'client' might not have been initialized: {ne}") | |
except RuntimeError as re: | |
# This catches the RuntimeError raised if client initialization failed explicitly | |
print(f"Gradio demo could not be launched due to an error during client initialization: {re}") | |
except Exception as e: | |
print(f"An unexpected error occurred when trying to launch Gradio demo: {e}") |