Spaces:
Runtime error
Runtime error
import gradio as gr | |
from huggingface_hub import InferenceClient | |
import os # Import os to potentially get token from environment | |
""" | |
For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference | |
""" | |
# !! REPLACE THIS WITH YOUR HUGGING FACE MODEL ID !! | |
MODEL_ID = "drwlf/PsychoQwen14b" | |
# It's recommended to use HF_TOKEN from environment/secrets | |
HF_TOKEN = os.getenv("HF_TOKEN") | |
# Initialize client, handle potential missing token | |
client = None # Initialize client to None | |
if not HF_TOKEN: | |
print("Warning: HF_TOKEN secret not found. Cannot initialize InferenceClient.") | |
# Optionally raise an error or handle this case in the respond function | |
else: | |
try: | |
client = InferenceClient(model=MODEL_ID, token=HF_TOKEN) | |
print("InferenceClient initialized successfully.") | |
except Exception as e: | |
print(f"Error initializing InferenceClient: {e}") | |
# Client remains None | |
def respond( | |
message, | |
history: list[tuple[str, str]], | |
system_message, | |
max_tokens, | |
temperature, | |
top_p, | |
top_k # Added top_k parameter | |
): | |
""" | |
Generator function to stream responses from the HF Inference API. | |
""" | |
if not client: | |
yield "Error: Inference Client not initialized. Check HF_TOKEN secret." | |
return | |
if not message or not message.strip(): | |
yield "Please enter a message." | |
return | |
messages = [{"role": "system", "content": system_message}] | |
for val in history: | |
if val[0]: | |
messages.append({"role": "user", "content": val[0]}) | |
if val[1]: | |
messages.append({"role": "assistant", "content": val[1]}) | |
messages.append({"role": "user", "content": message}) | |
response = "" | |
stream = None # Initialize stream variable | |
# Handle Top-K value (API often expects None to disable, not 0) | |
top_k_val = top_k if top_k > 0 else None | |
# Debugging: Print parameters being sent | |
print(f"--- Sending Request ---") | |
print(f"Model: {MODEL_ID}") | |
print(f"Messages: {messages}") | |
print(f"Max Tokens: {max_tokens}, Temp: {temperature}, Top-P: {top_p}, Top-K: {top_k_val}") | |
print(f"-----------------------") | |
try: | |
stream = client.chat_completion( | |
messages, | |
max_tokens=max_tokens, | |
stream=True, | |
temperature=temperature, | |
top_p=top_p, | |
top_k=top_k_val # Pass the adjusted top_k value | |
) | |
for message_chunk in stream: | |
# Check for content and delta before accessing | |
if (hasattr(message_chunk, 'choices') and | |
len(message_chunk.choices) > 0 and | |
hasattr(message_chunk.choices[0], 'delta') and | |
message_chunk.choices[0].delta and | |
hasattr(message_chunk.choices[0].delta, 'content')): | |
token = message_chunk.choices[0].delta.content | |
if token: # Ensure token is not None or empty | |
response += token | |
# print(token, end="") # Debugging stream locally | |
yield response | |
# Optional: Add error checking within the loop if needed | |
except Exception as e: | |
print(f"Error during chat completion: {e}") | |
yield f"Sorry, an error occurred: {str(e)}" | |
# No finally block needed unless specific cleanup is required | |
""" | |
For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface | |
""" | |
demo = gr.ChatInterface( | |
respond, | |
chatbot=gr.Chatbot(height=500), # Set chatbot height | |
additional_inputs=[ | |
gr.Textbox(value="You are a friendly psychotherapy AI capable of thinking.", label="System message"), | |
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"), | |
gr.Slider(minimum=0.1, maximum=2.0, value=0.7, step=0.1, label="Temperature"), # Adjusted max temp based on common usage | |
gr.Slider( | |
minimum=0.05, # Min Top-P often > 0 | |
maximum=1.0, | |
value=0.95, | |
step=0.05, | |
label="Top-P (nucleus sampling)", | |
), | |
# Added Top-K slider | |
gr.Slider( | |
minimum=0, # 0 disables Top-K | |
maximum=100, # Common range, adjust if needed | |
value=0, # Default to disabled | |
step=1, | |
label="Top-K (0 = disabled)", | |
), | |
], | |
title="PsychoQwen Chat", | |
description=f"Chat with {MODEL_ID}. Adjust generation parameters below.", | |
retry_btn="Retry", | |
undo_btn="Undo", | |
clear_btn="Clear Chat", | |
) | |
# --- Launch the app directly --- | |
# The if __name__ == "__main__": block is removed or commented out | |
demo.queue().launch(debug=True) # debug=True is useful for seeing logs in the Space | |