Spaces:
Sleeping
Sleeping
| from fastapi import FastAPI, HTTPException | |
| from pydantic import BaseModel | |
| from huggingface_hub import InferenceClient | |
| from typing import List, Tuple, Optional | |
| # Инициализация FastAPI | |
| app = FastAPI() | |
| # Глобальные переменные для хранения состояния | |
| client = None | |
| system_message = "You are a friendly Chatbot." | |
| max_tokens = 1024 | |
| temperature = 0.7 | |
| top_p = 0.95 | |
| history = [] | |
| total_tokens = 0 | |
| # Модель данных для запроса | |
| class ChatRequest(BaseModel): | |
| message: str | |
| reset_history: bool = False # Флаг для очистки истории | |
| # Модель данных для инициализации | |
| class InitializeRequest(BaseModel): | |
| model_name: str = "Qwen/Qwen2.5-Coder-32B-Instruct" | |
| system_message: str = "You are a friendly Chatbot." | |
| max_tokens: int = 1024 | |
| temperature: float = 0.7 | |
| top_p: float = 0.95 | |
| # Инициализация модели | |
| def initialize_model( | |
| model_name: str, | |
| sys_message: str, | |
| tokens: int, | |
| temp: float, | |
| top_p_value: float, | |
| ): | |
| global client, system_message, max_tokens, temperature, top_p | |
| client = InferenceClient(model_name) | |
| system_message = sys_message | |
| max_tokens = tokens | |
| temperature = temp | |
| top_p = top_p_value | |
| # Маршрут для инициализации модели | |
| async def initialize(request: InitializeRequest): | |
| try: | |
| initialize_model( | |
| model_name=request.model_name, | |
| sys_message=request.system_message, | |
| tokens=request.max_tokens, | |
| temp=request.temperature, | |
| top_p_value=request.top_p, | |
| ) | |
| return {"status": "Model initialized successfully."} | |
| except Exception as e: | |
| raise HTTPException(status_code=500, detail=str(e)) | |
| # Маршрут для обработки запросов | |
| async def chat(request: ChatRequest): | |
| global history, total_tokens | |
| try: | |
| # Очистка истории, если запрошено | |
| if request.reset_history: | |
| history = [] | |
| total_tokens = 0 | |
| return {"response": "History cleared.", "total_tokens": total_tokens} | |
| # Формируем сообщения для модели | |
| messages = [{"role": "system", "content": system_message}] | |
| # Добавляем историю диалога | |
| for user_msg, assistant_msg in history: | |
| if user_msg: | |
| messages.append({"role": "user", "content": user_msg}) | |
| if assistant_msg: | |
| messages.append({"role": "assistant", "content": assistant_msg}) | |
| # Добавляем текущее сообщение пользователя | |
| messages.append({"role": "user", "content": request.message}) | |
| # Получаем ответ от модели | |
| response = "" | |
| for message in client.chat_completion( | |
| messages, | |
| max_tokens=max_tokens, | |
| stream=True, | |
| temperature=temperature, | |
| top_p=top_p, | |
| ): | |
| token = message.choices[0].delta.content | |
| response += token | |
| # Обновляем историю и счетчик токенов | |
| history.append((request.message, response)) | |
| total_tokens += len(response.split()) # Примерный подсчет токенов | |
| # Возвращаем ответ и количество токенов | |
| return {"response": response, "total_tokens": total_tokens} | |
| except Exception as e: | |
| raise HTTPException(status_code=500, detail=str(e)) | |
| # Маршрут для получения текущего состояния (история и токены) | |
| async def get_status(): | |
| return { | |
| "history": history, | |
| "total_tokens": total_tokens, | |
| } | |
| # Инициализация модели при запуске | |
| initialize_model( | |
| model_name="Qwen/Qwen2.5-Coder-32B-Instruct", | |
| sys_message="You are a friendly Chatbot.", | |
| tokens=1024, | |
| temp=0.7, | |
| top_p_value=0.95, | |
| ) | |
| # Запуск приложения (для локального тестирования) | |
| if __name__ == "__main__": | |
| import uvicorn | |
| uvicorn.run(app, host="0.0.0.0", port=7860) |