Update models/text/deepinfra/main.py
Browse files- models/text/deepinfra/main.py +77 -51
models/text/deepinfra/main.py
CHANGED
@@ -4,7 +4,6 @@ import asyncio
|
|
4 |
import json
|
5 |
|
6 |
class OFFDeepInfraAPI:
|
7 |
-
|
8 |
headers = {
|
9 |
'Accept-Language': 'en-US,en;q=0.9,ja;q=0.8',
|
10 |
'Connection': 'keep-alive',
|
@@ -26,70 +25,97 @@ class OFFDeepInfraAPI:
|
|
26 |
self.base_url = "https://api.deepinfra.com/v1/openai/chat/completions"
|
27 |
|
28 |
def get_model_list(self):
|
29 |
-
|
30 |
-
|
31 |
-
|
|
|
|
|
|
|
32 |
|
33 |
async def generate(self, json_data: dict):
|
|
|
34 |
json_data['stream_options'] = {
|
35 |
'include_usage': True,
|
36 |
'continuous_usage_stats': True,
|
37 |
}
|
|
|
38 |
chunk_id = "chipling-deepinfraoff-" + "".join(random.choices("0123456789abcdef", k=32))
|
39 |
created = int(asyncio.get_event_loop().time())
|
40 |
-
|
|
|
41 |
|
42 |
try:
|
43 |
async with httpx.AsyncClient(timeout=None) as client:
|
44 |
async with client.stream(
|
45 |
"POST",
|
46 |
-
|
47 |
headers=OFFDeepInfraAPI.headers,
|
48 |
json=json_data
|
49 |
-
) as
|
50 |
-
|
51 |
-
|
52 |
-
async for line in request_ctx.aiter_lines():
|
53 |
-
if line:
|
54 |
-
if line.startswith('0:'):
|
55 |
-
# Clean up the text and properly escape JSON characters
|
56 |
-
text = line[2:].strip()
|
57 |
-
if text.startswith('"') and text.endswith('"'):
|
58 |
-
text = text[1:-1]
|
59 |
-
text = text.replace('\\n', '\n').replace('\\', '')
|
60 |
-
|
61 |
-
response = {
|
62 |
-
"id": chunk_id,
|
63 |
-
"object": "chat.completion.chunk",
|
64 |
-
"created": created,
|
65 |
-
"model": json_data.get("model", "deepseek-r1-distill-llama-70b"),
|
66 |
-
"choices": [{
|
67 |
-
"index": 0,
|
68 |
-
"text": text,
|
69 |
-
"logprobs": None,
|
70 |
-
"finish_reason": None
|
71 |
-
}],
|
72 |
-
"usage": None
|
73 |
-
}
|
74 |
-
yield f"data: {json.dumps(response)}\n\n"
|
75 |
-
total_tokens += 1
|
76 |
-
elif line.startswith('d:'):
|
77 |
-
final = {
|
78 |
-
"id": chunk_id,
|
79 |
-
"object": "chat.completion.chunk",
|
80 |
-
"created": created,
|
81 |
-
"model": json_data.get("model", "deepseek-r1-distill-llama-70b"),
|
82 |
-
"choices": [],
|
83 |
-
"usage": {
|
84 |
-
"prompt_tokens": len(messages),
|
85 |
-
"completion_tokens": total_tokens,
|
86 |
-
"total_tokens": len(messages) + total_tokens
|
87 |
-
}
|
88 |
-
}
|
89 |
-
yield f"data: {json.dumps(final)}\n\n"
|
90 |
-
yield "data: [DONE]\n\n"
|
91 |
return
|
92 |
-
|
93 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
94 |
except Exception as e:
|
95 |
-
yield f"data: [Connection error: {str(e)}]\n\n"
|
|
|
4 |
import json
|
5 |
|
6 |
class OFFDeepInfraAPI:
|
|
|
7 |
headers = {
|
8 |
'Accept-Language': 'en-US,en;q=0.9,ja;q=0.8',
|
9 |
'Connection': 'keep-alive',
|
|
|
25 |
self.base_url = "https://api.deepinfra.com/v1/openai/chat/completions"
|
26 |
|
27 |
def get_model_list(self):
|
28 |
+
return [
|
29 |
+
'meta-llama/Llama-3.3-70B-Instruct-Turbo',
|
30 |
+
'deepseek-ai/DeepSeek-R1-Turbo',
|
31 |
+
'deepseek-ai/DeepSeek-R1-Distill-Llama-70B',
|
32 |
+
'deepseek-ai/DeepSeek-R1-Distill-Qwen-32B'
|
33 |
+
]
|
34 |
|
35 |
async def generate(self, json_data: dict):
|
36 |
+
json_data['stream'] = True # Ensure stream is enabled
|
37 |
json_data['stream_options'] = {
|
38 |
'include_usage': True,
|
39 |
'continuous_usage_stats': True,
|
40 |
}
|
41 |
+
|
42 |
chunk_id = "chipling-deepinfraoff-" + "".join(random.choices("0123456789abcdef", k=32))
|
43 |
created = int(asyncio.get_event_loop().time())
|
44 |
+
total_completion_tokens = 0
|
45 |
+
model_name = json_data.get("model", "unknown")
|
46 |
|
47 |
try:
|
48 |
async with httpx.AsyncClient(timeout=None) as client:
|
49 |
async with client.stream(
|
50 |
"POST",
|
51 |
+
self.base_url,
|
52 |
headers=OFFDeepInfraAPI.headers,
|
53 |
json=json_data
|
54 |
+
) as response:
|
55 |
+
if response.status_code != 200:
|
56 |
+
yield f"data: [Unexpected status code: {response.status_code}]\n\n"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
57 |
return
|
58 |
+
|
59 |
+
async for line in response.aiter_lines():
|
60 |
+
if not line or not line.startswith("data:"):
|
61 |
+
continue
|
62 |
+
|
63 |
+
data_str = line.removeprefix("data:").strip()
|
64 |
+
if data_str == "[DONE]":
|
65 |
+
yield "data: [DONE]\n\n"
|
66 |
+
return
|
67 |
+
|
68 |
+
try:
|
69 |
+
data = json.loads(data_str)
|
70 |
+
delta = data["choices"][0].get("delta", {})
|
71 |
+
content = delta.get("content", "")
|
72 |
+
finish_reason = data["choices"][0].get("finish_reason", None)
|
73 |
+
|
74 |
+
if content or finish_reason:
|
75 |
+
transformed = {
|
76 |
+
"id": chunk_id,
|
77 |
+
"object": "chat.completion.chunk",
|
78 |
+
"created": created,
|
79 |
+
"choices": [{
|
80 |
+
"index": 0,
|
81 |
+
"text": content,
|
82 |
+
"logprobs": None,
|
83 |
+
"finish_reason": finish_reason,
|
84 |
+
"delta": {
|
85 |
+
"token_id": None,
|
86 |
+
"role": delta.get("role", "assistant"),
|
87 |
+
"content": content,
|
88 |
+
"tool_calls": delta.get("tool_calls"),
|
89 |
+
}
|
90 |
+
}],
|
91 |
+
"model": model_name,
|
92 |
+
"usage": None
|
93 |
+
}
|
94 |
+
yield f"data: {json.dumps(transformed)}\n\n"
|
95 |
+
|
96 |
+
# Update usage stats
|
97 |
+
usage = data.get("usage")
|
98 |
+
if usage:
|
99 |
+
total_completion_tokens = usage.get("completion_tokens", total_completion_tokens)
|
100 |
+
|
101 |
+
except json.JSONDecodeError:
|
102 |
+
continue
|
103 |
+
|
104 |
+
# Final usage chunk
|
105 |
+
final = {
|
106 |
+
"id": chunk_id,
|
107 |
+
"object": "chat.completion.chunk",
|
108 |
+
"created": created,
|
109 |
+
"choices": [],
|
110 |
+
"model": model_name,
|
111 |
+
"usage": {
|
112 |
+
"prompt_tokens": 0,
|
113 |
+
"completion_tokens": total_completion_tokens,
|
114 |
+
"total_tokens": total_completion_tokens
|
115 |
+
}
|
116 |
+
}
|
117 |
+
yield f"data: {json.dumps(final)}\n\n"
|
118 |
+
yield "data: [DONE]\n\n"
|
119 |
+
|
120 |
except Exception as e:
|
121 |
+
yield f"data: [Connection error: {str(e)}]\n\n"
|