Hansimov commited on
Commit
d2b20f2
1 Parent(s): 489b65b

:gem: [Feature] Support no-stream mode with dict response

Browse files
apis/chat_api.py CHANGED
@@ -64,19 +64,24 @@ class ChatAPIApp:
64
  streamer = MessageStreamer(model=item.model)
65
  composer = MessageComposer(model=item.model)
66
  composer.merge(messages=item.messages)
67
- event_source_response = EventSourceResponse(
68
- streamer.chat(
69
- prompt=composer.merged_str,
70
- temperature=item.temperature,
71
- max_new_tokens=item.max_tokens,
72
- stream=item.stream,
73
- yield_output=True,
74
- ),
75
- media_type="text/event-stream",
76
- ping=2000,
77
- ping_message_factory=lambda: ServerSentEvent(**{"comment": ""}),
78
  )
79
- return event_source_response
 
 
 
 
 
 
 
 
 
 
80
 
81
  def setup_routes(self):
82
  for prefix in ["", "/v1"]:
 
64
  streamer = MessageStreamer(model=item.model)
65
  composer = MessageComposer(model=item.model)
66
  composer.merge(messages=item.messages)
67
+ # streamer.chat = stream_chat_mock
68
+
69
+ stream_response = streamer.chat_response(
70
+ prompt=composer.merged_str,
71
+ temperature=item.temperature,
72
+ max_new_tokens=item.max_tokens,
 
 
 
 
 
73
  )
74
+ if item.stream:
75
+ event_source_response = EventSourceResponse(
76
+ streamer.chat_return_generator(stream_response),
77
+ media_type="text/event-stream",
78
+ ping=2000,
79
+ ping_message_factory=lambda: ServerSentEvent(**{"comment": ""}),
80
+ )
81
+ return event_source_response
82
+ else:
83
+ data_response = streamer.chat_return_dict(stream_response)
84
+ return data_response
85
 
86
  def setup_routes(self):
87
  for prefix in ["", "/v1"]:
messagers/message_outputer.py CHANGED
@@ -7,20 +7,22 @@ class OpenaiStreamOutputer:
7
  * https://platform.openai.com/docs/api-reference/chat/create
8
  """
9
 
10
- def data_to_string(self, data={}, content_type=""):
11
- data_str = f"{json.dumps(data)}"
12
-
13
- return data_str
14
-
15
- def output(self, content=None, content_type="Completions") -> str:
16
- data = {
17
  "created": 1700000000,
18
  "id": "chatcmpl-hugginface",
19
  "object": "chat.completion.chunk",
20
- # "content_type": content_type,
21
  "model": "hugginface",
22
  "choices": [],
23
  }
 
 
 
 
 
 
 
24
  if content_type == "Role":
25
  data["choices"] = [
26
  {
 
7
  * https://platform.openai.com/docs/api-reference/chat/create
8
  """
9
 
10
+ def __init__(self):
11
+ self.default_data = {
 
 
 
 
 
12
  "created": 1700000000,
13
  "id": "chatcmpl-hugginface",
14
  "object": "chat.completion.chunk",
15
+ # "content_type": "Completions",
16
  "model": "hugginface",
17
  "choices": [],
18
  }
19
+
20
+ def data_to_string(self, data={}, content_type=""):
21
+ data_str = f"{json.dumps(data)}"
22
+ return data_str
23
+
24
+ def output(self, content=None, content_type="Completions") -> str:
25
+ data = self.default_data.copy()
26
  if content_type == "Role":
27
  data["choices"] = [
28
  {
mocks/stream_chat_mocker.py CHANGED
@@ -2,10 +2,11 @@ import time
2
  from utils.logger import logger
3
 
4
 
5
- def stream_chat_mock():
6
- for i in range(8):
 
7
  content = f"W{i+1} "
8
- time.sleep(1.5)
9
  logger.mesg(content, end="")
10
  yield content
11
  logger.mesg("")
 
2
  from utils.logger import logger
3
 
4
 
5
+ def stream_chat_mock(*args, **kwargs):
6
+ logger.note(msg=str(args) + str(kwargs))
7
+ for i in range(10):
8
  content = f"W{i+1} "
9
+ time.sleep(0.1)
10
  logger.mesg(content, end="")
11
  yield content
12
  logger.mesg("")
networks/message_streamer.py CHANGED
@@ -31,13 +31,11 @@ class MessageStreamer:
31
  content = data["token"]["text"]
32
  return content
33
 
34
- def chat(
35
  self,
36
  prompt: str = None,
37
  temperature: float = 0.01,
38
  max_new_tokens: int = 8192,
39
- stream: bool = True,
40
- yield_output: bool = False,
41
  ):
42
  # https://huggingface.co/docs/api-inference/detailed_parameters?code=curl
43
  # curl --proxy http://<server>:<port> https://api-inference.huggingface.co/models/<org>/<model_name> -X POST -d '{"inputs":"who are you?","parameters":{"max_new_token":64}}' -H 'Content-Type: application/json' -H 'Authorization: Bearer <HF_TOKEN>'
@@ -60,24 +58,57 @@ class MessageStreamer:
60
  "max_new_tokens": max_new_tokens,
61
  "return_full_text": False,
62
  },
63
- "stream": stream,
64
  }
65
  logger.back(self.request_url)
66
  enver.set_envs(proxies=True)
67
- stream = requests.post(
68
  self.request_url,
69
  headers=self.request_headers,
70
  json=self.request_body,
71
  proxies=enver.requests_proxies,
72
- stream=stream,
73
  )
74
- status_code = stream.status_code
75
  if status_code == 200:
76
  logger.success(status_code)
77
  else:
78
  logger.err(status_code)
79
 
80
- for line in stream.iter_lines():
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81
  if not line:
82
  continue
83
 
@@ -86,12 +117,15 @@ class MessageStreamer:
86
  if content.strip() == "</s>":
87
  content_type = "Finished"
88
  logger.success("\n[Finished]")
 
89
  else:
90
  content_type = "Completions"
91
  logger.back(content, end="")
92
 
93
- if yield_output:
94
- output = self.message_outputer.output(
95
- content=content, content_type=content_type
96
- )
97
- yield output
 
 
 
31
  content = data["token"]["text"]
32
  return content
33
 
34
+ def chat_response(
35
  self,
36
  prompt: str = None,
37
  temperature: float = 0.01,
38
  max_new_tokens: int = 8192,
 
 
39
  ):
40
  # https://huggingface.co/docs/api-inference/detailed_parameters?code=curl
41
  # curl --proxy http://<server>:<port> https://api-inference.huggingface.co/models/<org>/<model_name> -X POST -d '{"inputs":"who are you?","parameters":{"max_new_token":64}}' -H 'Content-Type: application/json' -H 'Authorization: Bearer <HF_TOKEN>'
 
58
  "max_new_tokens": max_new_tokens,
59
  "return_full_text": False,
60
  },
61
+ "stream": True,
62
  }
63
  logger.back(self.request_url)
64
  enver.set_envs(proxies=True)
65
+ stream_response = requests.post(
66
  self.request_url,
67
  headers=self.request_headers,
68
  json=self.request_body,
69
  proxies=enver.requests_proxies,
70
+ stream=True,
71
  )
72
+ status_code = stream_response.status_code
73
  if status_code == 200:
74
  logger.success(status_code)
75
  else:
76
  logger.err(status_code)
77
 
78
+ return stream_response
79
+
80
+ def chat_return_dict(self, stream_response):
81
+ # https://platform.openai.com/docs/guides/text-generation/chat-completions-response-format
82
+ final_output = self.message_outputer.default_data.copy()
83
+ final_output["choices"] = [
84
+ {
85
+ "index": 0,
86
+ "finish_reason": "stop",
87
+ "message": {
88
+ "role": "assistant",
89
+ "content": "",
90
+ },
91
+ }
92
+ ]
93
+ logger.back(final_output)
94
+
95
+ for line in stream_response.iter_lines():
96
+ if not line:
97
+ continue
98
+ content = self.parse_line(line)
99
+
100
+ if content.strip() == "</s>":
101
+ logger.success("\n[Finished]")
102
+ break
103
+ else:
104
+ logger.back(content, end="")
105
+ final_output["choices"][0]["message"]["content"] += content
106
+
107
+ return final_output
108
+
109
+ def chat_return_generator(self, stream_response):
110
+ is_finished = False
111
+ for line in stream_response.iter_lines():
112
  if not line:
113
  continue
114
 
 
117
  if content.strip() == "</s>":
118
  content_type = "Finished"
119
  logger.success("\n[Finished]")
120
+ is_finished = True
121
  else:
122
  content_type = "Completions"
123
  logger.back(content, end="")
124
 
125
+ output = self.message_outputer.output(
126
+ content=content, content_type=content_type
127
+ )
128
+ yield output
129
+
130
+ if not is_finished:
131
+ yield self.message_outputer.output(content="", content_type="Finished")