Hansimov commited on
Commit
e820e51
1 Parent(s): b96cef7

:pencil: [Doc] Readme for features, and comments for references

Browse files
Files changed (2) hide show
  1. README.md +5 -7
  2. networks/message_streamer.py +4 -2
README.md CHANGED
@@ -8,23 +8,21 @@ app_port: 23333
8
  ---
9
 
10
  ## HF-LLM-API
11
- API for LLM inference in Huggingface spaces.
12
-
13
 
14
  ## Features
15
 
16
  ✅ Implemented:
17
 
18
  - Support Models
19
- - `mixtral-8x7b`
20
  - Support OpenAI API format
21
  - Can use api endpoint via official `openai-python` package
22
- - Support stream response
23
- - Support infinite-round chat
24
- - Support Docker deployment
25
 
26
  🔨 In progress:
27
- - [ ] Support more models
28
 
29
  ## Run API service
30
 
 
8
  ---
9
 
10
  ## HF-LLM-API
11
+ Huggingface LLM Inference API in OpenAI message format.
 
12
 
13
  ## Features
14
 
15
  ✅ Implemented:
16
 
17
  - Support Models
18
+ - `mixtral-8x7b`, `mistral-7b`
19
  - Support OpenAI API format
20
  - Can use api endpoint via official `openai-python` package
21
+ - Stream response
22
+ - Docker deployment
 
23
 
24
  🔨 In progress:
25
+ - [x] Support more models
26
 
27
  ## Run API service
28
 
networks/message_streamer.py CHANGED
@@ -22,6 +22,7 @@ class MessageStreamer:
22
  else:
23
  self.model = "default"
24
  self.model_fullname = self.MODEL_MAP[self.model]
 
25
 
26
  def parse_line(self, line):
27
  line = line.decode("utf-8")
@@ -38,11 +39,11 @@ class MessageStreamer:
38
  stream: bool = True,
39
  yield_output: bool = False,
40
  ):
41
- # https://huggingface.co/docs/text-generation-inference/conceptual/streaming#streaming-with-curl
 
42
  self.request_url = (
43
  f"https://api-inference.huggingface.co/models/{self.model_fullname}"
44
  )
45
- self.message_outputer = OpenaiStreamOutputer()
46
  self.request_headers = {
47
  "Content-Type": "application/json",
48
  }
@@ -51,6 +52,7 @@ class MessageStreamer:
51
  # class InferenceClient > def text_generation()
52
  # huggingface_hub/inference/_text_generation.py:
53
  # class TextGenerationRequest > param `stream`
 
54
  self.request_body = {
55
  "inputs": prompt,
56
  "parameters": {
 
22
  else:
23
  self.model = "default"
24
  self.model_fullname = self.MODEL_MAP[self.model]
25
+ self.message_outputer = OpenaiStreamOutputer()
26
 
27
  def parse_line(self, line):
28
  line = line.decode("utf-8")
 
39
  stream: bool = True,
40
  yield_output: bool = False,
41
  ):
42
+ # https://huggingface.co/docs/api-inference/detailed_parameters?code=curl
43
+ # curl --proxy http://<server>:<port> https://api-inference.huggingface.co/models/<org>/<model_name> -X POST -d '{"inputs":"who are you?","parameters":{"max_new_token":64}}' -H 'Content-Type: application/json' -H 'Authorization: Bearer <HF_TOKEN>'
44
  self.request_url = (
45
  f"https://api-inference.huggingface.co/models/{self.model_fullname}"
46
  )
 
47
  self.request_headers = {
48
  "Content-Type": "application/json",
49
  }
 
52
  # class InferenceClient > def text_generation()
53
  # huggingface_hub/inference/_text_generation.py:
54
  # class TextGenerationRequest > param `stream`
55
+ # https://huggingface.co/docs/text-generation-inference/conceptual/streaming#streaming-with-curl
56
  self.request_body = {
57
  "inputs": prompt,
58
  "parameters": {