EmoCube commited on
Commit
ab9cd81
·
verified ·
1 Parent(s): 9ac7b7c

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +84 -0
main.py CHANGED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, Request
2
+ from pydantic import BaseModel
3
+ from typing import List, Optional, Literal
4
+ from gradio_client import Client
5
+ import uvicorn
6
+ import time
7
+ import uuid
8
+
9
+ # ==== Инициализация Gradio Client ====
10
+ gr_client = Client("Nymbo/Serverless-TextGen-Hub")
11
+
12
+ # ==== Функция обращения к нейросети ====
13
+ def ask(user_prompt, system_prompt, model):
14
+ result = gr_client.predict(
15
+ history=[[user_prompt, None]],
16
+ system_msg=system_prompt,
17
+ max_tokens=512,
18
+ temperature=0.7,
19
+ top_p=0.95,
20
+ freq_penalty=0,
21
+ seed=-1,
22
+ custom_model=model,
23
+ search_term="",
24
+ selected_model=model,
25
+ api_name="/bot"
26
+ )
27
+ return result
28
+
29
+ # ==== FastAPI приложение ====
30
+ app = FastAPI()
31
+
32
+ # ==== Модели запросов/ответов ====
33
+ class Message(BaseModel):
34
+ role: Literal["user", "assistant", "system"]
35
+ content: str
36
+
37
+ class ChatRequest(BaseModel):
38
+ model: str
39
+ messages: List[Message]
40
+ temperature: Optional[float] = 0.7
41
+ top_p: Optional[float] = 0.95
42
+ max_tokens: Optional[int] = 512
43
+ # остальные параметры можно добавить при необходимости
44
+
45
+ @app.post("/v1/chat/completions")
46
+ async def chat_completion(request: ChatRequest):
47
+ # Извлекаем последнее сообщение от пользователя
48
+ user_msg = next((m.content for m in reversed(request.messages) if m.role == "user"), None)
49
+ system_msg = next((m.content for m in request.messages if m.role == "system"), "You are a helpful AI assistant.")
50
+
51
+ if not user_msg:
52
+ return {"error": "User message not found."}
53
+
54
+ # Получаем ответ от модели
55
+ assistant_reply = ask(user_msg, system_msg, request.model)
56
+
57
+ # Формируем ответ в стиле OpenAI API
58
+ response = {
59
+ "id": f"chatcmpl-{uuid.uuid4().hex[:12]}",
60
+ "object": "chat.completion",
61
+ "created": int(time.time()),
62
+ "model": request.model,
63
+ "choices": [
64
+ {
65
+ "index": 0,
66
+ "message": {
67
+ "role": "assistant",
68
+ "content": assistant_reply
69
+ },
70
+ "finish_reason": "stop"
71
+ }
72
+ ],
73
+ "usage": {
74
+ "prompt_tokens": 0, # Можно вычислить при необходимости
75
+ "completion_tokens": 0,
76
+ "total_tokens": 0
77
+ }
78
+ }
79
+
80
+ return response
81
+
82
+ # ==== Запуск сервера ====
83
+ if __name__ == "__main__":
84
+ uvicorn.run("local_openai_server:app", host="0.0.0.0", port=7860, reload=True)