Spaces:

Raju2024
/

Commandapi

Sleeping

App Files Files Community

Raju2024 commited on 20 days ago

Commit

852bc31

verified ·

1 Parent(s): 6d628c6

Create app.py

Browse files

Files changed (1) hide show

app.py +119 -0

app.py ADDED Viewed

	@@ -0,0 +1,119 @@

+import asyncio
+import time
+import uuid
+import json
+from fastapi import FastAPI, Request
+from fastapi.responses import JSONResponse, StreamingResponse
+from gradio_client import Client
+app = FastAPI()
+# HuggingFace Space
+client = Client("CohereLabs/command-a-vision")
+# call gradio safely
+def call_gradio(message, max_tokens=12800, temperature=0.1, top_p=0.9):
+    try:
+        job = client.submit(
+            message=message,
+            max_tokens=max_tokens,
+            temperature=temperature,
+            top_p=top_p,
+            api_name="/chat"
+        )
+        result = job.result()
+        return result
+    except Exception as e:
+        print("Gradio API error:", e)
+        return "Error: upstream model failed."
+def format_openai_response(content):
+    return {
+        "id": f"chatcmpl-{uuid.uuid4().hex}",
+        "object": "chat.completion",
+        "created": int(time.time()),
+        "model": "minimax-text-01",
+        "choices": [
+            {
+                "index": 0,
+                "message": {
+                    "role": "assistant",
+                    "content": content
+                },
+                "finish_reason": "stop"
+            }
+        ]
+    }
+@app.post("/v1/chat/completions")
+async def chat(request: Request):
+    body = await request.json()
+    messages = body.get("messages", [])
+    stream = body.get("stream", False)
+    max_tokens = body.get("max_tokens", 12800)
+    temperature = body.get("temperature", 0.1)
+    top_p = body.get("top_p", 0.9)
+    user_message = messages[-1]["content"]
+    # normal response
+    if not stream:
+        result = call_gradio(user_message, max_tokens, temperature, top_p)
+        return JSONResponse(format_openai_response(result))
+    # streaming response
+    async def generate():
+        result = call_gradio(user_message, max_tokens, temperature, top_p)
+        words = result.split(" ")
+        for word in words:
+            chunk = {
+                "id": f"chatcmpl-{uuid.uuid4().hex}",
+                "object": "chat.completion.chunk",
+                "created": int(time.time()),
+                "model": "minimax-text-01",
+                "choices": [
+                    {
+                        "delta": {"content": word + " "},
+                        "index": 0,
+                        "finish_reason": None
+                    }
+                ]
+            }
+            yield f"data: {json.dumps(chunk)}\n\n"
+            await asyncio.sleep(0.02)
+        end_chunk = {
+            "id": f"chatcmpl-{uuid.uuid4().hex}",
+            "object": "chat.completion.chunk",
+            "choices": [
+                {
+                    "delta": {},
+                    "index": 0,
+                    "finish_reason": "stop"
+                }
+            ]
+        }
+        yield f"data: {json.dumps(end_chunk)}\n\n"
+        yield "data: [DONE]\n\n"
+    return StreamingResponse(generate(), media_type="text/event-stream")