chipling commited on
Commit
f6ab7af
·
verified ·
1 Parent(s): 8f054a9

Update models/text/deepinfra/main.py

Browse files
Files changed (1) hide show
  1. models/text/deepinfra/main.py +77 -51
models/text/deepinfra/main.py CHANGED
@@ -4,7 +4,6 @@ import asyncio
4
  import json
5
 
6
  class OFFDeepInfraAPI:
7
-
8
  headers = {
9
  'Accept-Language': 'en-US,en;q=0.9,ja;q=0.8',
10
  'Connection': 'keep-alive',
@@ -26,70 +25,97 @@ class OFFDeepInfraAPI:
26
  self.base_url = "https://api.deepinfra.com/v1/openai/chat/completions"
27
 
28
  def get_model_list(self):
29
- models = ['meta-llama/Llama-3.3-70B-Instruct-Turbo', 'deepseek-ai/DeepSeek-R1-Turbo', 'deepseek-ai/DeepSeek-R1-Distill-Llama-70B', 'deepseek-ai/DeepSeek-R1-Distill-Qwen-32B']
30
- return models
31
-
 
 
 
32
 
33
  async def generate(self, json_data: dict):
 
34
  json_data['stream_options'] = {
35
  'include_usage': True,
36
  'continuous_usage_stats': True,
37
  }
 
38
  chunk_id = "chipling-deepinfraoff-" + "".join(random.choices("0123456789abcdef", k=32))
39
  created = int(asyncio.get_event_loop().time())
40
- total_tokens = 0
 
41
 
42
  try:
43
  async with httpx.AsyncClient(timeout=None) as client:
44
  async with client.stream(
45
  "POST",
46
- "https://api.deepinfra.com/v1/openai/chat/completions",
47
  headers=OFFDeepInfraAPI.headers,
48
  json=json_data
49
- ) as request_ctx:
50
- print(request_ctx.status_code)
51
- if request_ctx.status_code == 200:
52
- async for line in request_ctx.aiter_lines():
53
- if line:
54
- if line.startswith('0:'):
55
- # Clean up the text and properly escape JSON characters
56
- text = line[2:].strip()
57
- if text.startswith('"') and text.endswith('"'):
58
- text = text[1:-1]
59
- text = text.replace('\\n', '\n').replace('\\', '')
60
-
61
- response = {
62
- "id": chunk_id,
63
- "object": "chat.completion.chunk",
64
- "created": created,
65
- "model": json_data.get("model", "deepseek-r1-distill-llama-70b"),
66
- "choices": [{
67
- "index": 0,
68
- "text": text,
69
- "logprobs": None,
70
- "finish_reason": None
71
- }],
72
- "usage": None
73
- }
74
- yield f"data: {json.dumps(response)}\n\n"
75
- total_tokens += 1
76
- elif line.startswith('d:'):
77
- final = {
78
- "id": chunk_id,
79
- "object": "chat.completion.chunk",
80
- "created": created,
81
- "model": json_data.get("model", "deepseek-r1-distill-llama-70b"),
82
- "choices": [],
83
- "usage": {
84
- "prompt_tokens": len(messages),
85
- "completion_tokens": total_tokens,
86
- "total_tokens": len(messages) + total_tokens
87
- }
88
- }
89
- yield f"data: {json.dumps(final)}\n\n"
90
- yield "data: [DONE]\n\n"
91
  return
92
- else:
93
- yield f"data: [Unexpected status code: {request_ctx.status_code}]\n\n"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94
  except Exception as e:
95
- yield f"data: [Connection error: {str(e)}]\n\n"
 
4
  import json
5
 
6
  class OFFDeepInfraAPI:
 
7
  headers = {
8
  'Accept-Language': 'en-US,en;q=0.9,ja;q=0.8',
9
  'Connection': 'keep-alive',
 
25
  self.base_url = "https://api.deepinfra.com/v1/openai/chat/completions"
26
 
27
  def get_model_list(self):
28
+ return [
29
+ 'meta-llama/Llama-3.3-70B-Instruct-Turbo',
30
+ 'deepseek-ai/DeepSeek-R1-Turbo',
31
+ 'deepseek-ai/DeepSeek-R1-Distill-Llama-70B',
32
+ 'deepseek-ai/DeepSeek-R1-Distill-Qwen-32B'
33
+ ]
34
 
35
  async def generate(self, json_data: dict):
36
+ json_data['stream'] = True # Ensure stream is enabled
37
  json_data['stream_options'] = {
38
  'include_usage': True,
39
  'continuous_usage_stats': True,
40
  }
41
+
42
  chunk_id = "chipling-deepinfraoff-" + "".join(random.choices("0123456789abcdef", k=32))
43
  created = int(asyncio.get_event_loop().time())
44
+ total_completion_tokens = 0
45
+ model_name = json_data.get("model", "unknown")
46
 
47
  try:
48
  async with httpx.AsyncClient(timeout=None) as client:
49
  async with client.stream(
50
  "POST",
51
+ self.base_url,
52
  headers=OFFDeepInfraAPI.headers,
53
  json=json_data
54
+ ) as response:
55
+ if response.status_code != 200:
56
+ yield f"data: [Unexpected status code: {response.status_code}]\n\n"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
  return
58
+
59
+ async for line in response.aiter_lines():
60
+ if not line or not line.startswith("data:"):
61
+ continue
62
+
63
+ data_str = line.removeprefix("data:").strip()
64
+ if data_str == "[DONE]":
65
+ yield "data: [DONE]\n\n"
66
+ return
67
+
68
+ try:
69
+ data = json.loads(data_str)
70
+ delta = data["choices"][0].get("delta", {})
71
+ content = delta.get("content", "")
72
+ finish_reason = data["choices"][0].get("finish_reason", None)
73
+
74
+ if content or finish_reason:
75
+ transformed = {
76
+ "id": chunk_id,
77
+ "object": "chat.completion.chunk",
78
+ "created": created,
79
+ "choices": [{
80
+ "index": 0,
81
+ "text": content,
82
+ "logprobs": None,
83
+ "finish_reason": finish_reason,
84
+ "delta": {
85
+ "token_id": None,
86
+ "role": delta.get("role", "assistant"),
87
+ "content": content,
88
+ "tool_calls": delta.get("tool_calls"),
89
+ }
90
+ }],
91
+ "model": model_name,
92
+ "usage": None
93
+ }
94
+ yield f"data: {json.dumps(transformed)}\n\n"
95
+
96
+ # Update usage stats
97
+ usage = data.get("usage")
98
+ if usage:
99
+ total_completion_tokens = usage.get("completion_tokens", total_completion_tokens)
100
+
101
+ except json.JSONDecodeError:
102
+ continue
103
+
104
+ # Final usage chunk
105
+ final = {
106
+ "id": chunk_id,
107
+ "object": "chat.completion.chunk",
108
+ "created": created,
109
+ "choices": [],
110
+ "model": model_name,
111
+ "usage": {
112
+ "prompt_tokens": 0,
113
+ "completion_tokens": total_completion_tokens,
114
+ "total_tokens": total_completion_tokens
115
+ }
116
+ }
117
+ yield f"data: {json.dumps(final)}\n\n"
118
+ yield "data: [DONE]\n\n"
119
+
120
  except Exception as e:
121
+ yield f"data: [Connection error: {str(e)}]\n\n"