Raju2024 commited on
Commit
852bc31
·
verified ·
1 Parent(s): 6d628c6

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +119 -0
app.py ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ import time
3
+ import uuid
4
+ import json
5
+
6
+ from fastapi import FastAPI, Request
7
+ from fastapi.responses import JSONResponse, StreamingResponse
8
+ from gradio_client import Client
9
+
10
+ app = FastAPI()
11
+
12
+ # HuggingFace Space
13
+ client = Client("CohereLabs/command-a-vision")
14
+
15
+
16
+ # call gradio safely
17
+ def call_gradio(message, max_tokens=12800, temperature=0.1, top_p=0.9):
18
+
19
+ try:
20
+ job = client.submit(
21
+ message=message,
22
+ max_tokens=max_tokens,
23
+ temperature=temperature,
24
+ top_p=top_p,
25
+ api_name="/chat"
26
+ )
27
+
28
+ result = job.result()
29
+
30
+ return result
31
+
32
+ except Exception as e:
33
+ print("Gradio API error:", e)
34
+ return "Error: upstream model failed."
35
+
36
+
37
+ def format_openai_response(content):
38
+ return {
39
+ "id": f"chatcmpl-{uuid.uuid4().hex}",
40
+ "object": "chat.completion",
41
+ "created": int(time.time()),
42
+ "model": "minimax-text-01",
43
+ "choices": [
44
+ {
45
+ "index": 0,
46
+ "message": {
47
+ "role": "assistant",
48
+ "content": content
49
+ },
50
+ "finish_reason": "stop"
51
+ }
52
+ ]
53
+ }
54
+
55
+
56
+ @app.post("/v1/chat/completions")
57
+ async def chat(request: Request):
58
+
59
+ body = await request.json()
60
+
61
+ messages = body.get("messages", [])
62
+ stream = body.get("stream", False)
63
+
64
+ max_tokens = body.get("max_tokens", 12800)
65
+ temperature = body.get("temperature", 0.1)
66
+ top_p = body.get("top_p", 0.9)
67
+
68
+ user_message = messages[-1]["content"]
69
+
70
+ # normal response
71
+ if not stream:
72
+
73
+ result = call_gradio(user_message, max_tokens, temperature, top_p)
74
+
75
+ return JSONResponse(format_openai_response(result))
76
+
77
+ # streaming response
78
+ async def generate():
79
+
80
+ result = call_gradio(user_message, max_tokens, temperature, top_p)
81
+
82
+ words = result.split(" ")
83
+
84
+ for word in words:
85
+
86
+ chunk = {
87
+ "id": f"chatcmpl-{uuid.uuid4().hex}",
88
+ "object": "chat.completion.chunk",
89
+ "created": int(time.time()),
90
+ "model": "minimax-text-01",
91
+ "choices": [
92
+ {
93
+ "delta": {"content": word + " "},
94
+ "index": 0,
95
+ "finish_reason": None
96
+ }
97
+ ]
98
+ }
99
+
100
+ yield f"data: {json.dumps(chunk)}\n\n"
101
+
102
+ await asyncio.sleep(0.02)
103
+
104
+ end_chunk = {
105
+ "id": f"chatcmpl-{uuid.uuid4().hex}",
106
+ "object": "chat.completion.chunk",
107
+ "choices": [
108
+ {
109
+ "delta": {},
110
+ "index": 0,
111
+ "finish_reason": "stop"
112
+ }
113
+ ]
114
+ }
115
+
116
+ yield f"data: {json.dumps(end_chunk)}\n\n"
117
+ yield "data: [DONE]\n\n"
118
+
119
+ return StreamingResponse(generate(), media_type="text/event-stream")