Spaces:
Running
Running
File size: 4,035 Bytes
e8bac0f cde7a7b e8bac0f cde7a7b bcac619 83746e4 6ab04f4 83746e4 cde7a7b 83746e4 e8bcde6 83746e4 6bda5d8 bcac619 83746e4 7826a10 a8032bb cde7a7b a8032bb 7826a10 a8032bb cde7a7b a8032bb a6549b1 a8032bb a6549b1 83746e4 cde7a7b a6549b1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 |
import gradio as gr
import aiohttp
import os
import json
from collections import deque
TOKEN = os.getenv("HUGGINGFACE_API_TOKEN")
if not TOKEN:
raise ValueError("API token is not set. Please set the HUGGINGFACE_API_TOKEN environment variable.")
print(f"API Token: {TOKEN[:5]}...{TOKEN[-5:]}") # Check API token
memory = deque(maxlen=10)
async def test_api():
headers = {"Authorization": f"Bearer {TOKEN}"}
async with aiohttp.ClientSession() as session:
async with session.get("https://api-inference.huggingface.co/models/mistralai/Mistral-Nemo-Instruct-2407", headers=headers) as response:
print(f"Test API response: {await response.text()}")
async def respond(
message,
history: list[tuple[str, str]],
system_message="AI Assistant Role",
max_tokens=512,
temperature=0.7,
top_p=0.95,
):
system_prefix = "System: Respond in the same language as the input (English, Korean, Chinese, Japanese, etc.)."
full_system_message = f"{system_prefix}{system_message}"
memory.append((message, None))
messages = [{"role": "system", "content": full_system_message}]
for val in memory:
if val[0]:
messages.append({"role": "user", "content": val[0]})
if val[1]:
messages.append({"role": "assistant", "content": val[1]})
headers = {
"Authorization": f"Bearer {TOKEN}",
"Content-Type": "application/json"
}
payload = {
"model": "mistralai/Mistral-Nemo-Instruct-2407",
"max_tokens": max_tokens,
"temperature": temperature,
"top_p": top_p,
"messages": messages,
"stream": True
}
try:
async with aiohttp.ClientSession() as session:
async with session.post("https://api-inference.huggingface.co/v1/chat/completions", headers=headers, json=payload) as response:
print(f"Response status: {response.status}")
if response.status != 200:
error_text = await response.text()
print(f"Error response: {error_text}")
yield "An API response error occurred. Please try again."
return
response_text = ""
async for chunk in response.content:
if chunk:
try:
chunk_data = chunk.decode('utf-8')
response_json = json.loads(chunk_data)
if "choices" in response_json:
content = response_json["choices"][0]["message"]["content"]
response_text += content
yield response_text
except json.JSONDecodeError:
continue
if not response_text:
yield "I apologize, but I couldn't generate a response. Please try again."
except Exception as e:
print(f"Exception occurred: {str(e)}")
yield f"An error occurred: {str(e)}"
memory[-1] = (message, response_text)
async def chat(message, history, system_message, max_tokens, temperature, top_p):
response = ""
async for chunk in respond(message, history, system_message, max_tokens, temperature, top_p):
response = chunk
yield response
theme = "Nymbo/Nymbo_Theme"
css = """
footer {
visibility: hidden;
}
"""
demo = gr.ChatInterface(
css=css,
fn=chat,
theme=theme,
additional_inputs=[
gr.Textbox(value="AI Assistant Role", label="System message"),
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
]
)
if __name__ == "__main__":
import asyncio
asyncio.run(test_api()) # Run API test
demo.queue().launch(max_threads=20) |